{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.021921608328616865, "eval_steps": 1000, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.5677195921819358e-05, "grad_norm": 4.09375, "learning_rate": 0.00019981493881728382, "loss": 3.8085, "step": 1 }, { "epoch": 5.1354391843638716e-05, "grad_norm": 3.796875, "learning_rate": 0.00019981466725181818, "loss": 3.4522, "step": 2 }, { "epoch": 7.703158776545808e-05, "grad_norm": 2.765625, "learning_rate": 0.0001998143954874309, "loss": 3.3251, "step": 3 }, { "epoch": 0.00010270878368727743, "grad_norm": 2.296875, "learning_rate": 0.00019981412352412255, "loss": 3.3513, "step": 4 }, { "epoch": 0.0001283859796090968, "grad_norm": 2.59375, "learning_rate": 0.00019981385136189368, "loss": 3.4416, "step": 5 }, { "epoch": 0.00015406317553091616, "grad_norm": 2.125, "learning_rate": 0.00019981357900074486, "loss": 3.2113, "step": 6 }, { "epoch": 0.0001797403714527355, "grad_norm": 2.28125, "learning_rate": 0.00019981330644067654, "loss": 2.8963, "step": 7 }, { "epoch": 0.00020541756737455487, "grad_norm": 2.078125, "learning_rate": 0.00019981303368168932, "loss": 3.1162, "step": 8 }, { "epoch": 0.0002310947632963742, "grad_norm": 2.171875, "learning_rate": 0.00019981276072378375, "loss": 3.271, "step": 9 }, { "epoch": 0.0002567719592181936, "grad_norm": 1.890625, "learning_rate": 0.00019981248756696033, "loss": 2.723, "step": 10 }, { "epoch": 0.0002824491551400129, "grad_norm": 2.171875, "learning_rate": 0.0001998122142112197, "loss": 2.9188, "step": 11 }, { "epoch": 0.0003081263510618323, "grad_norm": 2.1875, "learning_rate": 0.00019981194065656232, "loss": 2.8855, "step": 12 }, { "epoch": 0.00033380354698365165, "grad_norm": 1.921875, "learning_rate": 0.00019981166690298877, "loss": 2.9555, "step": 13 }, { "epoch": 0.000359480742905471, "grad_norm": 2.15625, "learning_rate": 0.00019981139295049952, "loss": 2.779, "step": 14 }, { "epoch": 0.0003851579388272904, "grad_norm": 2.078125, "learning_rate": 0.00019981111879909525, "loss": 2.8852, "step": 15 }, { "epoch": 0.00041083513474910973, "grad_norm": 2.046875, "learning_rate": 0.00019981084444877642, "loss": 2.8717, "step": 16 }, { "epoch": 0.00043651233067092907, "grad_norm": 2.203125, "learning_rate": 0.00019981056989954357, "loss": 3.0705, "step": 17 }, { "epoch": 0.0004621895265927484, "grad_norm": 1.90625, "learning_rate": 0.0001998102951513973, "loss": 2.8481, "step": 18 }, { "epoch": 0.0004878667225145678, "grad_norm": 1.96875, "learning_rate": 0.0001998100202043381, "loss": 2.8879, "step": 19 }, { "epoch": 0.0005135439184363871, "grad_norm": 1.7109375, "learning_rate": 0.00019980974505836655, "loss": 2.616, "step": 20 }, { "epoch": 0.0005392211143582065, "grad_norm": 2.265625, "learning_rate": 0.00019980946971348323, "loss": 2.8716, "step": 21 }, { "epoch": 0.0005648983102800258, "grad_norm": 1.875, "learning_rate": 0.00019980919416968859, "loss": 2.4833, "step": 22 }, { "epoch": 0.0005905755062018452, "grad_norm": 6.25, "learning_rate": 0.0001998089184269833, "loss": 2.8268, "step": 23 }, { "epoch": 0.0006162527021236646, "grad_norm": 1.8359375, "learning_rate": 0.00019980864248536783, "loss": 2.7042, "step": 24 }, { "epoch": 0.000641929898045484, "grad_norm": 9.5625, "learning_rate": 0.00019980836634484275, "loss": 2.6445, "step": 25 }, { "epoch": 0.0006676070939673033, "grad_norm": 1.90625, "learning_rate": 0.00019980809000540858, "loss": 2.6366, "step": 26 }, { "epoch": 0.0006932842898891226, "grad_norm": 1.7578125, "learning_rate": 0.00019980781346706596, "loss": 2.9186, "step": 27 }, { "epoch": 0.000718961485810942, "grad_norm": 2.125, "learning_rate": 0.00019980753672981533, "loss": 2.5831, "step": 28 }, { "epoch": 0.0007446386817327613, "grad_norm": 1.8984375, "learning_rate": 0.00019980725979365732, "loss": 2.5846, "step": 29 }, { "epoch": 0.0007703158776545808, "grad_norm": 1.7265625, "learning_rate": 0.00019980698265859248, "loss": 2.5265, "step": 30 }, { "epoch": 0.0007959930735764001, "grad_norm": 1.625, "learning_rate": 0.00019980670532462132, "loss": 2.612, "step": 31 }, { "epoch": 0.0008216702694982195, "grad_norm": 1.78125, "learning_rate": 0.0001998064277917444, "loss": 2.4593, "step": 32 }, { "epoch": 0.0008473474654200388, "grad_norm": 1.890625, "learning_rate": 0.0001998061500599623, "loss": 2.5988, "step": 33 }, { "epoch": 0.0008730246613418581, "grad_norm": 1.8828125, "learning_rate": 0.00019980587212927554, "loss": 2.737, "step": 34 }, { "epoch": 0.0008987018572636775, "grad_norm": 1.6875, "learning_rate": 0.0001998055939996847, "loss": 2.6832, "step": 35 }, { "epoch": 0.0009243790531854968, "grad_norm": 1.6796875, "learning_rate": 0.00019980531567119033, "loss": 2.3475, "step": 36 }, { "epoch": 0.0009500562491073163, "grad_norm": 1.78125, "learning_rate": 0.000199805037143793, "loss": 2.4184, "step": 37 }, { "epoch": 0.0009757334450291356, "grad_norm": 2.203125, "learning_rate": 0.0001998047584174932, "loss": 2.5031, "step": 38 }, { "epoch": 0.001001410640950955, "grad_norm": 1.75, "learning_rate": 0.00019980447949229156, "loss": 2.6544, "step": 39 }, { "epoch": 0.0010270878368727743, "grad_norm": 1.9375, "learning_rate": 0.00019980420036818863, "loss": 2.534, "step": 40 }, { "epoch": 0.0010527650327945936, "grad_norm": 1.578125, "learning_rate": 0.00019980392104518492, "loss": 2.3989, "step": 41 }, { "epoch": 0.001078442228716413, "grad_norm": 1.5390625, "learning_rate": 0.00019980364152328103, "loss": 2.4021, "step": 42 }, { "epoch": 0.0011041194246382323, "grad_norm": 3.265625, "learning_rate": 0.00019980336180247746, "loss": 2.6585, "step": 43 }, { "epoch": 0.0011297966205600517, "grad_norm": 1.7578125, "learning_rate": 0.00019980308188277484, "loss": 2.6566, "step": 44 }, { "epoch": 0.001155473816481871, "grad_norm": 1.4921875, "learning_rate": 0.00019980280176417367, "loss": 2.3926, "step": 45 }, { "epoch": 0.0011811510124036903, "grad_norm": 1.53125, "learning_rate": 0.00019980252144667456, "loss": 2.5069, "step": 46 }, { "epoch": 0.00120682820832551, "grad_norm": 1.6640625, "learning_rate": 0.000199802240930278, "loss": 2.5065, "step": 47 }, { "epoch": 0.0012325054042473292, "grad_norm": 1.6640625, "learning_rate": 0.0001998019602149846, "loss": 2.3828, "step": 48 }, { "epoch": 0.0012581826001691486, "grad_norm": 1.5, "learning_rate": 0.00019980167930079494, "loss": 2.4286, "step": 49 }, { "epoch": 0.001283859796090968, "grad_norm": 1.65625, "learning_rate": 0.0001998013981877095, "loss": 2.5382, "step": 50 }, { "epoch": 0.0013095369920127873, "grad_norm": 1.5234375, "learning_rate": 0.00019980111687572892, "loss": 2.4564, "step": 51 }, { "epoch": 0.0013352141879346066, "grad_norm": 1.640625, "learning_rate": 0.0001998008353648537, "loss": 2.5861, "step": 52 }, { "epoch": 0.001360891383856426, "grad_norm": 1.5234375, "learning_rate": 0.00019980055365508448, "loss": 2.4832, "step": 53 }, { "epoch": 0.0013865685797782453, "grad_norm": 1.46875, "learning_rate": 0.00019980027174642176, "loss": 2.1409, "step": 54 }, { "epoch": 0.0014122457757000646, "grad_norm": 1.6015625, "learning_rate": 0.00019979998963886607, "loss": 2.4651, "step": 55 }, { "epoch": 0.001437922971621884, "grad_norm": 1.5234375, "learning_rate": 0.00019979970733241805, "loss": 2.6257, "step": 56 }, { "epoch": 0.0014636001675437033, "grad_norm": 1.484375, "learning_rate": 0.0001997994248270782, "loss": 2.4016, "step": 57 }, { "epoch": 0.0014892773634655227, "grad_norm": 1.546875, "learning_rate": 0.00019979914212284717, "loss": 2.4857, "step": 58 }, { "epoch": 0.001514954559387342, "grad_norm": 1.8515625, "learning_rate": 0.0001997988592197254, "loss": 2.5518, "step": 59 }, { "epoch": 0.0015406317553091616, "grad_norm": 1.671875, "learning_rate": 0.00019979857611771357, "loss": 2.4377, "step": 60 }, { "epoch": 0.001566308951230981, "grad_norm": 1.515625, "learning_rate": 0.00019979829281681214, "loss": 2.3236, "step": 61 }, { "epoch": 0.0015919861471528002, "grad_norm": 1.40625, "learning_rate": 0.00019979800931702176, "loss": 2.4122, "step": 62 }, { "epoch": 0.0016176633430746196, "grad_norm": 1.4921875, "learning_rate": 0.00019979772561834298, "loss": 2.4593, "step": 63 }, { "epoch": 0.001643340538996439, "grad_norm": 1.6796875, "learning_rate": 0.00019979744172077631, "loss": 2.4078, "step": 64 }, { "epoch": 0.0016690177349182583, "grad_norm": 1.59375, "learning_rate": 0.00019979715762432235, "loss": 2.2705, "step": 65 }, { "epoch": 0.0016946949308400776, "grad_norm": 1.5390625, "learning_rate": 0.00019979687332898172, "loss": 2.2347, "step": 66 }, { "epoch": 0.001720372126761897, "grad_norm": 1.4453125, "learning_rate": 0.00019979658883475488, "loss": 2.254, "step": 67 }, { "epoch": 0.0017460493226837163, "grad_norm": 1.4296875, "learning_rate": 0.0001997963041416425, "loss": 2.1997, "step": 68 }, { "epoch": 0.0017717265186055356, "grad_norm": 1.375, "learning_rate": 0.00019979601924964506, "loss": 2.1757, "step": 69 }, { "epoch": 0.001797403714527355, "grad_norm": 1.3671875, "learning_rate": 0.00019979573415876317, "loss": 2.3028, "step": 70 }, { "epoch": 0.0018230809104491743, "grad_norm": 1.390625, "learning_rate": 0.00019979544886899743, "loss": 2.3168, "step": 71 }, { "epoch": 0.0018487581063709937, "grad_norm": 1.4609375, "learning_rate": 0.00019979516338034835, "loss": 2.2942, "step": 72 }, { "epoch": 0.0018744353022928132, "grad_norm": 1.46875, "learning_rate": 0.00019979487769281653, "loss": 2.3045, "step": 73 }, { "epoch": 0.0019001124982146326, "grad_norm": 1.4921875, "learning_rate": 0.00019979459180640254, "loss": 2.2598, "step": 74 }, { "epoch": 0.001925789694136452, "grad_norm": 1.5859375, "learning_rate": 0.00019979430572110692, "loss": 2.2965, "step": 75 }, { "epoch": 0.0019514668900582712, "grad_norm": 1.4609375, "learning_rate": 0.00019979401943693027, "loss": 2.1196, "step": 76 }, { "epoch": 0.0019771440859800904, "grad_norm": 1.53125, "learning_rate": 0.0001997937329538732, "loss": 2.166, "step": 77 }, { "epoch": 0.00200282128190191, "grad_norm": 1.65625, "learning_rate": 0.00019979344627193618, "loss": 2.4396, "step": 78 }, { "epoch": 0.0020284984778237295, "grad_norm": 1.4140625, "learning_rate": 0.00019979315939111985, "loss": 2.2294, "step": 79 }, { "epoch": 0.0020541756737455486, "grad_norm": 1.4453125, "learning_rate": 0.00019979287231142477, "loss": 2.1996, "step": 80 }, { "epoch": 0.002079852869667368, "grad_norm": 1.359375, "learning_rate": 0.00019979258503285152, "loss": 2.0635, "step": 81 }, { "epoch": 0.0021055300655891873, "grad_norm": 1.3984375, "learning_rate": 0.00019979229755540066, "loss": 2.2769, "step": 82 }, { "epoch": 0.002131207261511007, "grad_norm": 1.546875, "learning_rate": 0.00019979200987907273, "loss": 2.3905, "step": 83 }, { "epoch": 0.002156884457432826, "grad_norm": 1.4140625, "learning_rate": 0.0001997917220038684, "loss": 2.2679, "step": 84 }, { "epoch": 0.0021825616533546455, "grad_norm": 1.3828125, "learning_rate": 0.00019979143392978812, "loss": 2.1311, "step": 85 }, { "epoch": 0.0022082388492764646, "grad_norm": 1.4765625, "learning_rate": 0.00019979114565683257, "loss": 2.4163, "step": 86 }, { "epoch": 0.002233916045198284, "grad_norm": 1.4609375, "learning_rate": 0.00019979085718500227, "loss": 2.3178, "step": 87 }, { "epoch": 0.0022595932411201033, "grad_norm": 1.5546875, "learning_rate": 0.0001997905685142978, "loss": 2.2761, "step": 88 }, { "epoch": 0.002285270437041923, "grad_norm": 1.484375, "learning_rate": 0.00019979027964471974, "loss": 2.268, "step": 89 }, { "epoch": 0.002310947632963742, "grad_norm": 1.3671875, "learning_rate": 0.00019978999057626866, "loss": 2.3155, "step": 90 }, { "epoch": 0.0023366248288855616, "grad_norm": 1.390625, "learning_rate": 0.00019978970130894516, "loss": 2.1695, "step": 91 }, { "epoch": 0.0023623020248073807, "grad_norm": 1.5625, "learning_rate": 0.00019978941184274978, "loss": 2.0385, "step": 92 }, { "epoch": 0.0023879792207292003, "grad_norm": 1.453125, "learning_rate": 0.00019978912217768314, "loss": 2.1725, "step": 93 }, { "epoch": 0.00241365641665102, "grad_norm": 1.453125, "learning_rate": 0.0001997888323137458, "loss": 2.1515, "step": 94 }, { "epoch": 0.002439333612572839, "grad_norm": 1.4375, "learning_rate": 0.00019978854225093828, "loss": 2.2315, "step": 95 }, { "epoch": 0.0024650108084946585, "grad_norm": 1.625, "learning_rate": 0.00019978825198926125, "loss": 2.113, "step": 96 }, { "epoch": 0.0024906880044164776, "grad_norm": 1.515625, "learning_rate": 0.00019978796152871523, "loss": 2.3379, "step": 97 }, { "epoch": 0.002516365200338297, "grad_norm": 1.5390625, "learning_rate": 0.00019978767086930083, "loss": 2.0741, "step": 98 }, { "epoch": 0.0025420423962601163, "grad_norm": 1.3984375, "learning_rate": 0.0001997873800110186, "loss": 2.2404, "step": 99 }, { "epoch": 0.002567719592181936, "grad_norm": 1.5, "learning_rate": 0.00019978708895386916, "loss": 2.1393, "step": 100 }, { "epoch": 0.002593396788103755, "grad_norm": 1.375, "learning_rate": 0.00019978679769785306, "loss": 2.1815, "step": 101 }, { "epoch": 0.0026190739840255745, "grad_norm": 1.3828125, "learning_rate": 0.00019978650624297086, "loss": 2.1449, "step": 102 }, { "epoch": 0.0026447511799473937, "grad_norm": 1.4921875, "learning_rate": 0.0001997862145892232, "loss": 2.2345, "step": 103 }, { "epoch": 0.0026704283758692132, "grad_norm": 1.40625, "learning_rate": 0.0001997859227366106, "loss": 2.119, "step": 104 }, { "epoch": 0.0026961055717910323, "grad_norm": 1.4609375, "learning_rate": 0.0001997856306851337, "loss": 2.2419, "step": 105 }, { "epoch": 0.002721782767712852, "grad_norm": 1.3515625, "learning_rate": 0.000199785338434793, "loss": 2.0232, "step": 106 }, { "epoch": 0.0027474599636346715, "grad_norm": 1.390625, "learning_rate": 0.00019978504598558918, "loss": 2.214, "step": 107 }, { "epoch": 0.0027731371595564906, "grad_norm": 1.3125, "learning_rate": 0.00019978475333752279, "loss": 2.0411, "step": 108 }, { "epoch": 0.00279881435547831, "grad_norm": 1.3828125, "learning_rate": 0.00019978446049059436, "loss": 2.0352, "step": 109 }, { "epoch": 0.0028244915514001293, "grad_norm": 1.4375, "learning_rate": 0.00019978416744480452, "loss": 2.2534, "step": 110 }, { "epoch": 0.002850168747321949, "grad_norm": 1.4765625, "learning_rate": 0.00019978387420015387, "loss": 2.3404, "step": 111 }, { "epoch": 0.002875845943243768, "grad_norm": 1.3984375, "learning_rate": 0.00019978358075664295, "loss": 2.3274, "step": 112 }, { "epoch": 0.0029015231391655875, "grad_norm": 1.2890625, "learning_rate": 0.00019978328711427236, "loss": 2.1794, "step": 113 }, { "epoch": 0.0029272003350874066, "grad_norm": 1.5, "learning_rate": 0.00019978299327304272, "loss": 2.3423, "step": 114 }, { "epoch": 0.002952877531009226, "grad_norm": 1.4921875, "learning_rate": 0.00019978269923295457, "loss": 2.0839, "step": 115 }, { "epoch": 0.0029785547269310453, "grad_norm": 1.21875, "learning_rate": 0.00019978240499400854, "loss": 1.8905, "step": 116 }, { "epoch": 0.003004231922852865, "grad_norm": 1.3515625, "learning_rate": 0.00019978211055620516, "loss": 2.1838, "step": 117 }, { "epoch": 0.003029909118774684, "grad_norm": 1.40625, "learning_rate": 0.00019978181591954507, "loss": 2.1985, "step": 118 }, { "epoch": 0.0030555863146965036, "grad_norm": 1.328125, "learning_rate": 0.00019978152108402882, "loss": 2.0684, "step": 119 }, { "epoch": 0.003081263510618323, "grad_norm": 1.4765625, "learning_rate": 0.000199781226049657, "loss": 2.0323, "step": 120 }, { "epoch": 0.0031069407065401422, "grad_norm": 1.3671875, "learning_rate": 0.00019978093081643021, "loss": 1.9744, "step": 121 }, { "epoch": 0.003132617902461962, "grad_norm": 1.3203125, "learning_rate": 0.00019978063538434905, "loss": 2.0766, "step": 122 }, { "epoch": 0.003158295098383781, "grad_norm": 1.3203125, "learning_rate": 0.0001997803397534141, "loss": 2.1999, "step": 123 }, { "epoch": 0.0031839722943056005, "grad_norm": 1.328125, "learning_rate": 0.00019978004392362595, "loss": 1.9197, "step": 124 }, { "epoch": 0.0032096494902274196, "grad_norm": 1.3828125, "learning_rate": 0.00019977974789498517, "loss": 2.1476, "step": 125 }, { "epoch": 0.003235326686149239, "grad_norm": 1.3203125, "learning_rate": 0.00019977945166749236, "loss": 2.1268, "step": 126 }, { "epoch": 0.0032610038820710583, "grad_norm": 1.28125, "learning_rate": 0.00019977915524114814, "loss": 2.0392, "step": 127 }, { "epoch": 0.003286681077992878, "grad_norm": 1.359375, "learning_rate": 0.00019977885861595306, "loss": 2.0557, "step": 128 }, { "epoch": 0.003312358273914697, "grad_norm": 1.4765625, "learning_rate": 0.00019977856179190774, "loss": 2.2573, "step": 129 }, { "epoch": 0.0033380354698365165, "grad_norm": 1.4375, "learning_rate": 0.00019977826476901277, "loss": 2.1071, "step": 130 }, { "epoch": 0.0033637126657583357, "grad_norm": 1.2734375, "learning_rate": 0.0001997779675472687, "loss": 1.8169, "step": 131 }, { "epoch": 0.003389389861680155, "grad_norm": 1.3046875, "learning_rate": 0.00019977767012667618, "loss": 2.4099, "step": 132 }, { "epoch": 0.0034150670576019748, "grad_norm": 1.34375, "learning_rate": 0.00019977737250723575, "loss": 1.929, "step": 133 }, { "epoch": 0.003440744253523794, "grad_norm": 1.2734375, "learning_rate": 0.00019977707468894806, "loss": 1.9606, "step": 134 }, { "epoch": 0.0034664214494456135, "grad_norm": 1.3828125, "learning_rate": 0.00019977677667181365, "loss": 2.0147, "step": 135 }, { "epoch": 0.0034920986453674326, "grad_norm": 1.34375, "learning_rate": 0.00019977647845583315, "loss": 2.0981, "step": 136 }, { "epoch": 0.003517775841289252, "grad_norm": 1.40625, "learning_rate": 0.00019977618004100712, "loss": 2.3351, "step": 137 }, { "epoch": 0.0035434530372110713, "grad_norm": 1.2890625, "learning_rate": 0.0001997758814273362, "loss": 1.9093, "step": 138 }, { "epoch": 0.003569130233132891, "grad_norm": 1.484375, "learning_rate": 0.00019977558261482093, "loss": 1.9715, "step": 139 }, { "epoch": 0.00359480742905471, "grad_norm": 1.296875, "learning_rate": 0.00019977528360346197, "loss": 2.0464, "step": 140 }, { "epoch": 0.0036204846249765295, "grad_norm": 1.3125, "learning_rate": 0.0001997749843932599, "loss": 2.1074, "step": 141 }, { "epoch": 0.0036461618208983486, "grad_norm": 1.3359375, "learning_rate": 0.00019977468498421527, "loss": 2.0343, "step": 142 }, { "epoch": 0.003671839016820168, "grad_norm": 1.203125, "learning_rate": 0.00019977438537632867, "loss": 2.0005, "step": 143 }, { "epoch": 0.0036975162127419873, "grad_norm": 1.3828125, "learning_rate": 0.00019977408556960078, "loss": 2.0801, "step": 144 }, { "epoch": 0.003723193408663807, "grad_norm": 1.2421875, "learning_rate": 0.00019977378556403213, "loss": 2.2685, "step": 145 }, { "epoch": 0.0037488706045856264, "grad_norm": 1.328125, "learning_rate": 0.00019977348535962336, "loss": 1.9362, "step": 146 }, { "epoch": 0.0037745478005074455, "grad_norm": 1.21875, "learning_rate": 0.000199773184956375, "loss": 1.9542, "step": 147 }, { "epoch": 0.003800224996429265, "grad_norm": 1.3359375, "learning_rate": 0.00019977288435428774, "loss": 2.0299, "step": 148 }, { "epoch": 0.0038259021923510842, "grad_norm": 1.328125, "learning_rate": 0.00019977258355336212, "loss": 2.0426, "step": 149 }, { "epoch": 0.003851579388272904, "grad_norm": 1.453125, "learning_rate": 0.00019977228255359876, "loss": 2.096, "step": 150 }, { "epoch": 0.003877256584194723, "grad_norm": 1.59375, "learning_rate": 0.00019977198135499825, "loss": 2.1197, "step": 151 }, { "epoch": 0.0039029337801165425, "grad_norm": 1.296875, "learning_rate": 0.00019977167995756119, "loss": 2.2025, "step": 152 }, { "epoch": 0.003928610976038362, "grad_norm": 1.234375, "learning_rate": 0.0001997713783612882, "loss": 1.8936, "step": 153 }, { "epoch": 0.003954288171960181, "grad_norm": 1.3359375, "learning_rate": 0.0001997710765661798, "loss": 2.1434, "step": 154 }, { "epoch": 0.003979965367882, "grad_norm": 1.21875, "learning_rate": 0.00019977077457223672, "loss": 2.0932, "step": 155 }, { "epoch": 0.00400564256380382, "grad_norm": 1.2421875, "learning_rate": 0.0001997704723794595, "loss": 1.9591, "step": 156 }, { "epoch": 0.004031319759725639, "grad_norm": 1.3125, "learning_rate": 0.00019977016998784871, "loss": 2.0054, "step": 157 }, { "epoch": 0.004056996955647459, "grad_norm": 1.2109375, "learning_rate": 0.00019976986739740502, "loss": 2.1367, "step": 158 }, { "epoch": 0.004082674151569278, "grad_norm": 1.25, "learning_rate": 0.00019976956460812897, "loss": 1.909, "step": 159 }, { "epoch": 0.004108351347491097, "grad_norm": 1.3125, "learning_rate": 0.0001997692616200212, "loss": 2.1406, "step": 160 }, { "epoch": 0.004134028543412917, "grad_norm": 1.2421875, "learning_rate": 0.0001997689584330823, "loss": 1.8451, "step": 161 }, { "epoch": 0.004159705739334736, "grad_norm": 1.7109375, "learning_rate": 0.00019976865504731288, "loss": 2.2586, "step": 162 }, { "epoch": 0.004185382935256555, "grad_norm": 1.359375, "learning_rate": 0.00019976835146271352, "loss": 1.8745, "step": 163 }, { "epoch": 0.0042110601311783746, "grad_norm": 1.3984375, "learning_rate": 0.00019976804767928488, "loss": 1.9142, "step": 164 }, { "epoch": 0.004236737327100194, "grad_norm": 1.5234375, "learning_rate": 0.00019976774369702752, "loss": 2.0619, "step": 165 }, { "epoch": 0.004262414523022014, "grad_norm": 1.3515625, "learning_rate": 0.00019976743951594206, "loss": 1.9781, "step": 166 }, { "epoch": 0.004288091718943832, "grad_norm": 1.2421875, "learning_rate": 0.0001997671351360291, "loss": 1.8087, "step": 167 }, { "epoch": 0.004313768914865652, "grad_norm": 1.3203125, "learning_rate": 0.00019976683055728924, "loss": 2.0817, "step": 168 }, { "epoch": 0.0043394461107874715, "grad_norm": 1.390625, "learning_rate": 0.00019976652577972313, "loss": 2.1426, "step": 169 }, { "epoch": 0.004365123306709291, "grad_norm": 1.28125, "learning_rate": 0.00019976622080333133, "loss": 1.9369, "step": 170 }, { "epoch": 0.004390800502631111, "grad_norm": 1.2734375, "learning_rate": 0.00019976591562811448, "loss": 2.0434, "step": 171 }, { "epoch": 0.004416477698552929, "grad_norm": 1.2578125, "learning_rate": 0.00019976561025407315, "loss": 2.2161, "step": 172 }, { "epoch": 0.004442154894474749, "grad_norm": 1.390625, "learning_rate": 0.00019976530468120797, "loss": 1.977, "step": 173 }, { "epoch": 0.004467832090396568, "grad_norm": 1.3125, "learning_rate": 0.00019976499890951956, "loss": 1.9103, "step": 174 }, { "epoch": 0.004493509286318388, "grad_norm": 1.3046875, "learning_rate": 0.0001997646929390085, "loss": 1.8608, "step": 175 }, { "epoch": 0.004519186482240207, "grad_norm": 1.1953125, "learning_rate": 0.00019976438676967543, "loss": 1.8129, "step": 176 }, { "epoch": 0.004544863678162026, "grad_norm": 1.3828125, "learning_rate": 0.00019976408040152098, "loss": 1.9564, "step": 177 }, { "epoch": 0.004570540874083846, "grad_norm": 1.2890625, "learning_rate": 0.0001997637738345457, "loss": 2.0602, "step": 178 }, { "epoch": 0.004596218070005665, "grad_norm": 1.421875, "learning_rate": 0.00019976346706875023, "loss": 2.0533, "step": 179 }, { "epoch": 0.004621895265927484, "grad_norm": 1.2890625, "learning_rate": 0.00019976316010413515, "loss": 2.0526, "step": 180 }, { "epoch": 0.004647572461849304, "grad_norm": 1.203125, "learning_rate": 0.00019976285294070117, "loss": 1.7198, "step": 181 }, { "epoch": 0.004673249657771123, "grad_norm": 2.078125, "learning_rate": 0.00019976254557844877, "loss": 2.0407, "step": 182 }, { "epoch": 0.004698926853692943, "grad_norm": 1.2421875, "learning_rate": 0.00019976223801737868, "loss": 1.9106, "step": 183 }, { "epoch": 0.004724604049614761, "grad_norm": 1.3203125, "learning_rate": 0.00019976193025749144, "loss": 1.9606, "step": 184 }, { "epoch": 0.004750281245536581, "grad_norm": 1.3828125, "learning_rate": 0.0001997616222987877, "loss": 2.0454, "step": 185 }, { "epoch": 0.0047759584414584005, "grad_norm": 1.3125, "learning_rate": 0.00019976131414126805, "loss": 1.7913, "step": 186 }, { "epoch": 0.00480163563738022, "grad_norm": 1.3984375, "learning_rate": 0.00019976100578493312, "loss": 1.7453, "step": 187 }, { "epoch": 0.00482731283330204, "grad_norm": 1.2890625, "learning_rate": 0.00019976069722978348, "loss": 1.9642, "step": 188 }, { "epoch": 0.004852990029223858, "grad_norm": 1.1953125, "learning_rate": 0.00019976038847581982, "loss": 1.9041, "step": 189 }, { "epoch": 0.004878667225145678, "grad_norm": 1.328125, "learning_rate": 0.00019976007952304266, "loss": 2.0635, "step": 190 }, { "epoch": 0.004904344421067497, "grad_norm": 1.4453125, "learning_rate": 0.00019975977037145276, "loss": 1.7887, "step": 191 }, { "epoch": 0.004930021616989317, "grad_norm": 1.2578125, "learning_rate": 0.00019975946102105058, "loss": 1.9701, "step": 192 }, { "epoch": 0.004955698812911136, "grad_norm": 1.1953125, "learning_rate": 0.00019975915147183685, "loss": 1.8339, "step": 193 }, { "epoch": 0.004981376008832955, "grad_norm": 1.21875, "learning_rate": 0.0001997588417238121, "loss": 2.1332, "step": 194 }, { "epoch": 0.005007053204754775, "grad_norm": 1.2421875, "learning_rate": 0.00019975853177697702, "loss": 2.1316, "step": 195 }, { "epoch": 0.005032730400676594, "grad_norm": 1.296875, "learning_rate": 0.0001997582216313322, "loss": 1.8423, "step": 196 }, { "epoch": 0.005058407596598413, "grad_norm": 1.234375, "learning_rate": 0.00019975791128687824, "loss": 1.775, "step": 197 }, { "epoch": 0.005084084792520233, "grad_norm": 1.1640625, "learning_rate": 0.00019975760074361577, "loss": 1.8544, "step": 198 }, { "epoch": 0.005109761988442052, "grad_norm": 1.3203125, "learning_rate": 0.0001997572900015454, "loss": 2.0779, "step": 199 }, { "epoch": 0.005135439184363872, "grad_norm": 1.2890625, "learning_rate": 0.00019975697906066781, "loss": 1.9575, "step": 200 }, { "epoch": 0.005161116380285691, "grad_norm": 1.2265625, "learning_rate": 0.00019975666792098357, "loss": 1.9392, "step": 201 }, { "epoch": 0.00518679357620751, "grad_norm": 1.3203125, "learning_rate": 0.00019975635658249326, "loss": 2.1059, "step": 202 }, { "epoch": 0.0052124707721293295, "grad_norm": 1.234375, "learning_rate": 0.00019975604504519754, "loss": 1.9351, "step": 203 }, { "epoch": 0.005238147968051149, "grad_norm": 1.2421875, "learning_rate": 0.00019975573330909704, "loss": 1.7031, "step": 204 }, { "epoch": 0.005263825163972969, "grad_norm": 1.265625, "learning_rate": 0.0001997554213741924, "loss": 1.8233, "step": 205 }, { "epoch": 0.005289502359894787, "grad_norm": 1.1796875, "learning_rate": 0.00019975510924048423, "loss": 1.828, "step": 206 }, { "epoch": 0.005315179555816607, "grad_norm": 1.265625, "learning_rate": 0.0001997547969079731, "loss": 1.8282, "step": 207 }, { "epoch": 0.0053408567517384264, "grad_norm": 1.25, "learning_rate": 0.00019975448437665967, "loss": 1.8393, "step": 208 }, { "epoch": 0.005366533947660246, "grad_norm": 1.28125, "learning_rate": 0.00019975417164654456, "loss": 1.9413, "step": 209 }, { "epoch": 0.005392211143582065, "grad_norm": 1.28125, "learning_rate": 0.00019975385871762844, "loss": 1.9984, "step": 210 }, { "epoch": 0.005417888339503884, "grad_norm": 1.3046875, "learning_rate": 0.00019975354558991187, "loss": 2.0349, "step": 211 }, { "epoch": 0.005443565535425704, "grad_norm": 1.15625, "learning_rate": 0.0001997532322633955, "loss": 1.9123, "step": 212 }, { "epoch": 0.005469242731347523, "grad_norm": 1.2109375, "learning_rate": 0.0001997529187380799, "loss": 2.1437, "step": 213 }, { "epoch": 0.005494919927269343, "grad_norm": 1.265625, "learning_rate": 0.0001997526050139658, "loss": 1.9852, "step": 214 }, { "epoch": 0.005520597123191162, "grad_norm": 1.2109375, "learning_rate": 0.00019975229109105372, "loss": 2.0391, "step": 215 }, { "epoch": 0.005546274319112981, "grad_norm": 1.1015625, "learning_rate": 0.0001997519769693444, "loss": 1.8174, "step": 216 }, { "epoch": 0.005571951515034801, "grad_norm": 1.203125, "learning_rate": 0.00019975166264883833, "loss": 1.8886, "step": 217 }, { "epoch": 0.00559762871095662, "grad_norm": 1.3828125, "learning_rate": 0.00019975134812953623, "loss": 2.1096, "step": 218 }, { "epoch": 0.005623305906878439, "grad_norm": 1.21875, "learning_rate": 0.00019975103341143873, "loss": 2.1911, "step": 219 }, { "epoch": 0.0056489831028002585, "grad_norm": 1.25, "learning_rate": 0.00019975071849454641, "loss": 1.9302, "step": 220 }, { "epoch": 0.005674660298722078, "grad_norm": 1.3125, "learning_rate": 0.00019975040337885994, "loss": 2.026, "step": 221 }, { "epoch": 0.005700337494643898, "grad_norm": 1.265625, "learning_rate": 0.0001997500880643799, "loss": 1.8796, "step": 222 }, { "epoch": 0.005726014690565716, "grad_norm": 1.234375, "learning_rate": 0.00019974977255110695, "loss": 1.7137, "step": 223 }, { "epoch": 0.005751691886487536, "grad_norm": 1.28125, "learning_rate": 0.0001997494568390417, "loss": 1.8575, "step": 224 }, { "epoch": 0.0057773690824093555, "grad_norm": 1.3203125, "learning_rate": 0.00019974914092818483, "loss": 2.0677, "step": 225 }, { "epoch": 0.005803046278331175, "grad_norm": 1.265625, "learning_rate": 0.0001997488248185369, "loss": 1.7926, "step": 226 }, { "epoch": 0.005828723474252995, "grad_norm": 1.390625, "learning_rate": 0.0001997485085100986, "loss": 1.9124, "step": 227 }, { "epoch": 0.005854400670174813, "grad_norm": 1.3671875, "learning_rate": 0.00019974819200287052, "loss": 1.905, "step": 228 }, { "epoch": 0.005880077866096633, "grad_norm": 1.4140625, "learning_rate": 0.0001997478752968533, "loss": 2.0241, "step": 229 }, { "epoch": 0.005905755062018452, "grad_norm": 1.359375, "learning_rate": 0.00019974755839204756, "loss": 2.0559, "step": 230 }, { "epoch": 0.005931432257940272, "grad_norm": 1.3359375, "learning_rate": 0.00019974724128845396, "loss": 2.0247, "step": 231 }, { "epoch": 0.005957109453862091, "grad_norm": 1.25, "learning_rate": 0.00019974692398607314, "loss": 2.1181, "step": 232 }, { "epoch": 0.00598278664978391, "grad_norm": 1.328125, "learning_rate": 0.0001997466064849057, "loss": 2.0184, "step": 233 }, { "epoch": 0.00600846384570573, "grad_norm": 1.296875, "learning_rate": 0.00019974628878495225, "loss": 1.8364, "step": 234 }, { "epoch": 0.006034141041627549, "grad_norm": 1.34375, "learning_rate": 0.0001997459708862135, "loss": 1.6873, "step": 235 }, { "epoch": 0.006059818237549368, "grad_norm": 1.28125, "learning_rate": 0.00019974565278869003, "loss": 1.798, "step": 236 }, { "epoch": 0.0060854954334711876, "grad_norm": 1.234375, "learning_rate": 0.00019974533449238245, "loss": 1.842, "step": 237 }, { "epoch": 0.006111172629393007, "grad_norm": 1.296875, "learning_rate": 0.00019974501599729147, "loss": 1.7427, "step": 238 }, { "epoch": 0.006136849825314827, "grad_norm": 1.265625, "learning_rate": 0.00019974469730341766, "loss": 1.9307, "step": 239 }, { "epoch": 0.006162527021236646, "grad_norm": 1.203125, "learning_rate": 0.00019974437841076167, "loss": 1.8584, "step": 240 }, { "epoch": 0.006188204217158465, "grad_norm": 1.2421875, "learning_rate": 0.00019974405931932418, "loss": 1.8383, "step": 241 }, { "epoch": 0.0062138814130802845, "grad_norm": 1.2734375, "learning_rate": 0.00019974374002910573, "loss": 1.9619, "step": 242 }, { "epoch": 0.006239558609002104, "grad_norm": 1.3046875, "learning_rate": 0.00019974342054010708, "loss": 1.8814, "step": 243 }, { "epoch": 0.006265235804923924, "grad_norm": 1.171875, "learning_rate": 0.00019974310085232877, "loss": 1.9053, "step": 244 }, { "epoch": 0.006290913000845742, "grad_norm": 1.2578125, "learning_rate": 0.00019974278096577148, "loss": 1.9791, "step": 245 }, { "epoch": 0.006316590196767562, "grad_norm": 1.1796875, "learning_rate": 0.00019974246088043582, "loss": 1.8957, "step": 246 }, { "epoch": 0.006342267392689381, "grad_norm": 1.2421875, "learning_rate": 0.0001997421405963225, "loss": 2.16, "step": 247 }, { "epoch": 0.006367944588611201, "grad_norm": 1.3671875, "learning_rate": 0.00019974182011343205, "loss": 2.0619, "step": 248 }, { "epoch": 0.00639362178453302, "grad_norm": 1.1171875, "learning_rate": 0.00019974149943176517, "loss": 1.8667, "step": 249 }, { "epoch": 0.006419298980454839, "grad_norm": 1.1484375, "learning_rate": 0.0001997411785513225, "loss": 1.7649, "step": 250 }, { "epoch": 0.006444976176376659, "grad_norm": 1.171875, "learning_rate": 0.00019974085747210464, "loss": 1.8126, "step": 251 }, { "epoch": 0.006470653372298478, "grad_norm": 1.21875, "learning_rate": 0.0001997405361941123, "loss": 1.7649, "step": 252 }, { "epoch": 0.006496330568220298, "grad_norm": 1.21875, "learning_rate": 0.00019974021471734607, "loss": 1.9819, "step": 253 }, { "epoch": 0.006522007764142117, "grad_norm": 1.234375, "learning_rate": 0.0001997398930418066, "loss": 1.8951, "step": 254 }, { "epoch": 0.006547684960063936, "grad_norm": 1.1953125, "learning_rate": 0.00019973957116749453, "loss": 1.8966, "step": 255 }, { "epoch": 0.006573362155985756, "grad_norm": 1.40625, "learning_rate": 0.0001997392490944105, "loss": 1.6778, "step": 256 }, { "epoch": 0.006599039351907575, "grad_norm": 1.1484375, "learning_rate": 0.00019973892682255518, "loss": 1.8668, "step": 257 }, { "epoch": 0.006624716547829394, "grad_norm": 1.1796875, "learning_rate": 0.00019973860435192916, "loss": 1.8748, "step": 258 }, { "epoch": 0.0066503937437512135, "grad_norm": 1.234375, "learning_rate": 0.00019973828168253312, "loss": 1.857, "step": 259 }, { "epoch": 0.006676070939673033, "grad_norm": 1.4296875, "learning_rate": 0.0001997379588143677, "loss": 1.9194, "step": 260 }, { "epoch": 0.006701748135594853, "grad_norm": 1.140625, "learning_rate": 0.00019973763574743353, "loss": 1.7043, "step": 261 }, { "epoch": 0.006727425331516671, "grad_norm": 1.21875, "learning_rate": 0.00019973731248173126, "loss": 1.9553, "step": 262 }, { "epoch": 0.006753102527438491, "grad_norm": 1.2109375, "learning_rate": 0.00019973698901726153, "loss": 1.8621, "step": 263 }, { "epoch": 0.00677877972336031, "grad_norm": 1.1875, "learning_rate": 0.000199736665354025, "loss": 2.0353, "step": 264 }, { "epoch": 0.00680445691928213, "grad_norm": 1.2109375, "learning_rate": 0.00019973634149202232, "loss": 1.925, "step": 265 }, { "epoch": 0.0068301341152039495, "grad_norm": 1.2890625, "learning_rate": 0.00019973601743125409, "loss": 1.8143, "step": 266 }, { "epoch": 0.006855811311125768, "grad_norm": 1.234375, "learning_rate": 0.00019973569317172102, "loss": 2.0033, "step": 267 }, { "epoch": 0.006881488507047588, "grad_norm": 1.21875, "learning_rate": 0.0001997353687134237, "loss": 1.684, "step": 268 }, { "epoch": 0.006907165702969407, "grad_norm": 1.34375, "learning_rate": 0.0001997350440563628, "loss": 2.0488, "step": 269 }, { "epoch": 0.006932842898891227, "grad_norm": 1.1796875, "learning_rate": 0.00019973471920053896, "loss": 1.7987, "step": 270 }, { "epoch": 0.006958520094813046, "grad_norm": 1.1875, "learning_rate": 0.00019973439414595285, "loss": 1.9595, "step": 271 }, { "epoch": 0.006984197290734865, "grad_norm": 1.2109375, "learning_rate": 0.00019973406889260508, "loss": 1.8901, "step": 272 }, { "epoch": 0.007009874486656685, "grad_norm": 1.2734375, "learning_rate": 0.00019973374344049635, "loss": 1.9371, "step": 273 }, { "epoch": 0.007035551682578504, "grad_norm": 1.203125, "learning_rate": 0.00019973341778962726, "loss": 1.7949, "step": 274 }, { "epoch": 0.007061228878500323, "grad_norm": 1.2890625, "learning_rate": 0.0001997330919399985, "loss": 2.1239, "step": 275 }, { "epoch": 0.0070869060744221425, "grad_norm": 1.140625, "learning_rate": 0.00019973276589161065, "loss": 1.8516, "step": 276 }, { "epoch": 0.007112583270343962, "grad_norm": 1.1875, "learning_rate": 0.00019973243964446443, "loss": 1.7713, "step": 277 }, { "epoch": 0.007138260466265782, "grad_norm": 1.203125, "learning_rate": 0.00019973211319856046, "loss": 1.8969, "step": 278 }, { "epoch": 0.007163937662187601, "grad_norm": 1.171875, "learning_rate": 0.0001997317865538994, "loss": 1.8497, "step": 279 }, { "epoch": 0.00718961485810942, "grad_norm": 1.140625, "learning_rate": 0.00019973145971048192, "loss": 1.6959, "step": 280 }, { "epoch": 0.0072152920540312394, "grad_norm": 1.1484375, "learning_rate": 0.00019973113266830863, "loss": 1.8457, "step": 281 }, { "epoch": 0.007240969249953059, "grad_norm": 1.25, "learning_rate": 0.00019973080542738024, "loss": 1.7543, "step": 282 }, { "epoch": 0.0072666464458748786, "grad_norm": 1.203125, "learning_rate": 0.00019973047798769732, "loss": 1.8588, "step": 283 }, { "epoch": 0.007292323641796697, "grad_norm": 1.203125, "learning_rate": 0.00019973015034926056, "loss": 1.9748, "step": 284 }, { "epoch": 0.007318000837718517, "grad_norm": 1.1171875, "learning_rate": 0.00019972982251207063, "loss": 1.663, "step": 285 }, { "epoch": 0.007343678033640336, "grad_norm": 1.15625, "learning_rate": 0.0001997294944761282, "loss": 1.8601, "step": 286 }, { "epoch": 0.007369355229562156, "grad_norm": 1.1875, "learning_rate": 0.00019972916624143384, "loss": 1.9841, "step": 287 }, { "epoch": 0.007395032425483975, "grad_norm": 1.1015625, "learning_rate": 0.0001997288378079883, "loss": 1.7883, "step": 288 }, { "epoch": 0.007420709621405794, "grad_norm": 1.1953125, "learning_rate": 0.0001997285091757922, "loss": 1.997, "step": 289 }, { "epoch": 0.007446386817327614, "grad_norm": 1.1796875, "learning_rate": 0.00019972818034484616, "loss": 1.7257, "step": 290 }, { "epoch": 0.007472064013249433, "grad_norm": 1.234375, "learning_rate": 0.0001997278513151509, "loss": 1.9261, "step": 291 }, { "epoch": 0.007497741209171253, "grad_norm": 1.1796875, "learning_rate": 0.00019972752208670703, "loss": 1.9084, "step": 292 }, { "epoch": 0.0075234184050930715, "grad_norm": 1.1015625, "learning_rate": 0.00019972719265951518, "loss": 1.8128, "step": 293 }, { "epoch": 0.007549095601014891, "grad_norm": 1.28125, "learning_rate": 0.0001997268630335761, "loss": 1.9687, "step": 294 }, { "epoch": 0.007574772796936711, "grad_norm": 1.734375, "learning_rate": 0.00019972653320889035, "loss": 1.9329, "step": 295 }, { "epoch": 0.00760044999285853, "grad_norm": 1.3828125, "learning_rate": 0.00019972620318545862, "loss": 1.7815, "step": 296 }, { "epoch": 0.007626127188780349, "grad_norm": 1.4140625, "learning_rate": 0.0001997258729632816, "loss": 1.8117, "step": 297 }, { "epoch": 0.0076518043847021685, "grad_norm": 1.2265625, "learning_rate": 0.00019972554254235993, "loss": 1.7553, "step": 298 }, { "epoch": 0.007677481580623988, "grad_norm": 1.25, "learning_rate": 0.00019972521192269425, "loss": 1.9957, "step": 299 }, { "epoch": 0.007703158776545808, "grad_norm": 1.1875, "learning_rate": 0.00019972488110428524, "loss": 1.9145, "step": 300 }, { "epoch": 0.007728835972467626, "grad_norm": 1.2109375, "learning_rate": 0.00019972455008713353, "loss": 1.9651, "step": 301 }, { "epoch": 0.007754513168389446, "grad_norm": 1.2890625, "learning_rate": 0.00019972421887123982, "loss": 1.7477, "step": 302 }, { "epoch": 0.007780190364311265, "grad_norm": 1.203125, "learning_rate": 0.00019972388745660474, "loss": 1.8351, "step": 303 }, { "epoch": 0.007805867560233085, "grad_norm": 1.203125, "learning_rate": 0.00019972355584322896, "loss": 1.9175, "step": 304 }, { "epoch": 0.007831544756154904, "grad_norm": 1.1875, "learning_rate": 0.00019972322403111314, "loss": 1.8567, "step": 305 }, { "epoch": 0.007857221952076724, "grad_norm": 1.171875, "learning_rate": 0.00019972289202025795, "loss": 1.6569, "step": 306 }, { "epoch": 0.007882899147998543, "grad_norm": 1.28125, "learning_rate": 0.00019972255981066402, "loss": 1.6634, "step": 307 }, { "epoch": 0.007908576343920361, "grad_norm": 1.2578125, "learning_rate": 0.00019972222740233207, "loss": 1.7974, "step": 308 }, { "epoch": 0.007934253539842182, "grad_norm": 1.2421875, "learning_rate": 0.0001997218947952627, "loss": 1.6202, "step": 309 }, { "epoch": 0.007959930735764, "grad_norm": 1.2578125, "learning_rate": 0.0001997215619894566, "loss": 1.9131, "step": 310 }, { "epoch": 0.007985607931685821, "grad_norm": 1.109375, "learning_rate": 0.00019972122898491448, "loss": 1.7266, "step": 311 }, { "epoch": 0.00801128512760764, "grad_norm": 1.109375, "learning_rate": 0.00019972089578163693, "loss": 1.9187, "step": 312 }, { "epoch": 0.008036962323529458, "grad_norm": 1.1328125, "learning_rate": 0.00019972056237962463, "loss": 1.6832, "step": 313 }, { "epoch": 0.008062639519451279, "grad_norm": 1.2421875, "learning_rate": 0.0001997202287788783, "loss": 1.8736, "step": 314 }, { "epoch": 0.008088316715373097, "grad_norm": 1.234375, "learning_rate": 0.00019971989497939848, "loss": 1.9551, "step": 315 }, { "epoch": 0.008113993911294918, "grad_norm": 1.109375, "learning_rate": 0.000199719560981186, "loss": 1.7719, "step": 316 }, { "epoch": 0.008139671107216737, "grad_norm": 1.1640625, "learning_rate": 0.0001997192267842414, "loss": 1.9235, "step": 317 }, { "epoch": 0.008165348303138555, "grad_norm": 1.1171875, "learning_rate": 0.0001997188923885654, "loss": 1.9036, "step": 318 }, { "epoch": 0.008191025499060376, "grad_norm": 1.1796875, "learning_rate": 0.00019971855779415867, "loss": 1.8696, "step": 319 }, { "epoch": 0.008216702694982194, "grad_norm": 1.15625, "learning_rate": 0.00019971822300102182, "loss": 1.7377, "step": 320 }, { "epoch": 0.008242379890904013, "grad_norm": 1.0703125, "learning_rate": 0.0001997178880091556, "loss": 1.8273, "step": 321 }, { "epoch": 0.008268057086825834, "grad_norm": 1.15625, "learning_rate": 0.00019971755281856062, "loss": 1.7127, "step": 322 }, { "epoch": 0.008293734282747652, "grad_norm": 1.1796875, "learning_rate": 0.00019971721742923756, "loss": 1.8927, "step": 323 }, { "epoch": 0.008319411478669473, "grad_norm": 1.1875, "learning_rate": 0.0001997168818411871, "loss": 1.7798, "step": 324 }, { "epoch": 0.008345088674591291, "grad_norm": 1.15625, "learning_rate": 0.00019971654605440987, "loss": 1.7128, "step": 325 }, { "epoch": 0.00837076587051311, "grad_norm": 1.1484375, "learning_rate": 0.00019971621006890664, "loss": 1.5813, "step": 326 }, { "epoch": 0.00839644306643493, "grad_norm": 1.171875, "learning_rate": 0.00019971587388467797, "loss": 1.6393, "step": 327 }, { "epoch": 0.008422120262356749, "grad_norm": 1.1328125, "learning_rate": 0.00019971553750172455, "loss": 1.8425, "step": 328 }, { "epoch": 0.00844779745827857, "grad_norm": 1.1484375, "learning_rate": 0.0001997152009200471, "loss": 1.7632, "step": 329 }, { "epoch": 0.008473474654200388, "grad_norm": 1.171875, "learning_rate": 0.00019971486413964627, "loss": 1.8557, "step": 330 }, { "epoch": 0.008499151850122207, "grad_norm": 1.203125, "learning_rate": 0.0001997145271605227, "loss": 1.8226, "step": 331 }, { "epoch": 0.008524829046044027, "grad_norm": 1.15625, "learning_rate": 0.0001997141899826771, "loss": 1.6952, "step": 332 }, { "epoch": 0.008550506241965846, "grad_norm": 1.140625, "learning_rate": 0.0001997138526061101, "loss": 1.7493, "step": 333 }, { "epoch": 0.008576183437887665, "grad_norm": 1.265625, "learning_rate": 0.00019971351503082242, "loss": 1.7465, "step": 334 }, { "epoch": 0.008601860633809485, "grad_norm": 1.1875, "learning_rate": 0.00019971317725681474, "loss": 1.7524, "step": 335 }, { "epoch": 0.008627537829731304, "grad_norm": 1.296875, "learning_rate": 0.00019971283928408765, "loss": 1.817, "step": 336 }, { "epoch": 0.008653215025653124, "grad_norm": 1.203125, "learning_rate": 0.00019971250111264192, "loss": 1.8899, "step": 337 }, { "epoch": 0.008678892221574943, "grad_norm": 1.1015625, "learning_rate": 0.00019971216274247816, "loss": 1.6186, "step": 338 }, { "epoch": 0.008704569417496762, "grad_norm": 1.1875, "learning_rate": 0.00019971182417359706, "loss": 1.81, "step": 339 }, { "epoch": 0.008730246613418582, "grad_norm": 1.1484375, "learning_rate": 0.00019971148540599934, "loss": 1.8437, "step": 340 }, { "epoch": 0.0087559238093404, "grad_norm": 1.1328125, "learning_rate": 0.0001997111464396856, "loss": 1.7811, "step": 341 }, { "epoch": 0.008781601005262221, "grad_norm": 1.2421875, "learning_rate": 0.00019971080727465657, "loss": 1.9023, "step": 342 }, { "epoch": 0.00880727820118404, "grad_norm": 1.2109375, "learning_rate": 0.00019971046791091287, "loss": 1.8833, "step": 343 }, { "epoch": 0.008832955397105859, "grad_norm": 1.1015625, "learning_rate": 0.00019971012834845526, "loss": 1.7821, "step": 344 }, { "epoch": 0.008858632593027679, "grad_norm": 1.1328125, "learning_rate": 0.00019970978858728435, "loss": 1.6469, "step": 345 }, { "epoch": 0.008884309788949498, "grad_norm": 1.25, "learning_rate": 0.00019970944862740083, "loss": 1.9876, "step": 346 }, { "epoch": 0.008909986984871316, "grad_norm": 1.1328125, "learning_rate": 0.00019970910846880543, "loss": 1.6967, "step": 347 }, { "epoch": 0.008935664180793137, "grad_norm": 1.25, "learning_rate": 0.00019970876811149871, "loss": 1.8077, "step": 348 }, { "epoch": 0.008961341376714956, "grad_norm": 1.1953125, "learning_rate": 0.00019970842755548146, "loss": 1.7945, "step": 349 }, { "epoch": 0.008987018572636776, "grad_norm": 1.2734375, "learning_rate": 0.00019970808680075435, "loss": 1.7071, "step": 350 }, { "epoch": 0.009012695768558595, "grad_norm": 1.1015625, "learning_rate": 0.000199707745847318, "loss": 1.7332, "step": 351 }, { "epoch": 0.009038372964480413, "grad_norm": 1.1171875, "learning_rate": 0.00019970740469517313, "loss": 1.838, "step": 352 }, { "epoch": 0.009064050160402234, "grad_norm": 1.1484375, "learning_rate": 0.0001997070633443204, "loss": 1.6384, "step": 353 }, { "epoch": 0.009089727356324052, "grad_norm": 1.1953125, "learning_rate": 0.00019970672179476048, "loss": 1.8566, "step": 354 }, { "epoch": 0.009115404552245871, "grad_norm": 1.1953125, "learning_rate": 0.0001997063800464941, "loss": 1.7528, "step": 355 }, { "epoch": 0.009141081748167692, "grad_norm": 1.1171875, "learning_rate": 0.00019970603809952193, "loss": 1.7903, "step": 356 }, { "epoch": 0.00916675894408951, "grad_norm": 1.171875, "learning_rate": 0.00019970569595384458, "loss": 1.845, "step": 357 }, { "epoch": 0.00919243614001133, "grad_norm": 1.1875, "learning_rate": 0.0001997053536094628, "loss": 1.7661, "step": 358 }, { "epoch": 0.00921811333593315, "grad_norm": 1.1484375, "learning_rate": 0.00019970501106637728, "loss": 1.7161, "step": 359 }, { "epoch": 0.009243790531854968, "grad_norm": 1.125, "learning_rate": 0.00019970466832458866, "loss": 1.7202, "step": 360 }, { "epoch": 0.009269467727776788, "grad_norm": 1.25, "learning_rate": 0.00019970432538409763, "loss": 1.7726, "step": 361 }, { "epoch": 0.009295144923698607, "grad_norm": 1.203125, "learning_rate": 0.00019970398224490493, "loss": 1.7279, "step": 362 }, { "epoch": 0.009320822119620428, "grad_norm": 1.0859375, "learning_rate": 0.00019970363890701117, "loss": 1.6867, "step": 363 }, { "epoch": 0.009346499315542246, "grad_norm": 1.140625, "learning_rate": 0.00019970329537041709, "loss": 1.5235, "step": 364 }, { "epoch": 0.009372176511464065, "grad_norm": 1.03125, "learning_rate": 0.0001997029516351233, "loss": 1.6704, "step": 365 }, { "epoch": 0.009397853707385885, "grad_norm": 1.1640625, "learning_rate": 0.00019970260770113055, "loss": 1.8011, "step": 366 }, { "epoch": 0.009423530903307704, "grad_norm": 1.140625, "learning_rate": 0.00019970226356843954, "loss": 1.7362, "step": 367 }, { "epoch": 0.009449208099229523, "grad_norm": 1.1640625, "learning_rate": 0.0001997019192370509, "loss": 1.5848, "step": 368 }, { "epoch": 0.009474885295151343, "grad_norm": 1.1875, "learning_rate": 0.00019970157470696533, "loss": 1.8278, "step": 369 }, { "epoch": 0.009500562491073162, "grad_norm": 1.09375, "learning_rate": 0.00019970122997818354, "loss": 1.8764, "step": 370 }, { "epoch": 0.009526239686994982, "grad_norm": 1.1484375, "learning_rate": 0.0001997008850507062, "loss": 1.9372, "step": 371 }, { "epoch": 0.009551916882916801, "grad_norm": 1.1484375, "learning_rate": 0.000199700539924534, "loss": 1.8385, "step": 372 }, { "epoch": 0.00957759407883862, "grad_norm": 1.0859375, "learning_rate": 0.00019970019459966765, "loss": 1.8667, "step": 373 }, { "epoch": 0.00960327127476044, "grad_norm": 1.171875, "learning_rate": 0.0001996998490761078, "loss": 1.6162, "step": 374 }, { "epoch": 0.009628948470682259, "grad_norm": 1.0859375, "learning_rate": 0.00019969950335385517, "loss": 1.5868, "step": 375 }, { "epoch": 0.00965462566660408, "grad_norm": 1.84375, "learning_rate": 0.00019969915743291044, "loss": 1.8162, "step": 376 }, { "epoch": 0.009680302862525898, "grad_norm": 1.25, "learning_rate": 0.0001996988113132743, "loss": 2.1175, "step": 377 }, { "epoch": 0.009705980058447717, "grad_norm": 1.1953125, "learning_rate": 0.00019969846499494738, "loss": 1.793, "step": 378 }, { "epoch": 0.009731657254369537, "grad_norm": 1.171875, "learning_rate": 0.00019969811847793048, "loss": 1.6211, "step": 379 }, { "epoch": 0.009757334450291356, "grad_norm": 1.1953125, "learning_rate": 0.0001996977717622242, "loss": 1.7763, "step": 380 }, { "epoch": 0.009783011646213174, "grad_norm": 1.1953125, "learning_rate": 0.0001996974248478293, "loss": 1.6399, "step": 381 }, { "epoch": 0.009808688842134995, "grad_norm": 1.171875, "learning_rate": 0.0001996970777347464, "loss": 1.7743, "step": 382 }, { "epoch": 0.009834366038056814, "grad_norm": 1.078125, "learning_rate": 0.00019969673042297627, "loss": 1.6742, "step": 383 }, { "epoch": 0.009860043233978634, "grad_norm": 1.1484375, "learning_rate": 0.00019969638291251953, "loss": 1.7875, "step": 384 }, { "epoch": 0.009885720429900453, "grad_norm": 1.1796875, "learning_rate": 0.00019969603520337694, "loss": 1.7759, "step": 385 }, { "epoch": 0.009911397625822271, "grad_norm": 1.203125, "learning_rate": 0.00019969568729554912, "loss": 1.9311, "step": 386 }, { "epoch": 0.009937074821744092, "grad_norm": 1.1484375, "learning_rate": 0.00019969533918903683, "loss": 1.8094, "step": 387 }, { "epoch": 0.00996275201766591, "grad_norm": 1.046875, "learning_rate": 0.0001996949908838407, "loss": 1.7112, "step": 388 }, { "epoch": 0.009988429213587731, "grad_norm": 1.03125, "learning_rate": 0.0001996946423799615, "loss": 1.55, "step": 389 }, { "epoch": 0.01001410640950955, "grad_norm": 1.09375, "learning_rate": 0.00019969429367739988, "loss": 1.4763, "step": 390 }, { "epoch": 0.010039783605431368, "grad_norm": 1.15625, "learning_rate": 0.0001996939447761565, "loss": 1.6551, "step": 391 }, { "epoch": 0.010065460801353189, "grad_norm": 1.1640625, "learning_rate": 0.0001996935956762321, "loss": 1.9569, "step": 392 }, { "epoch": 0.010091137997275007, "grad_norm": 1.09375, "learning_rate": 0.00019969324637762742, "loss": 1.7528, "step": 393 }, { "epoch": 0.010116815193196826, "grad_norm": 1.046875, "learning_rate": 0.00019969289688034306, "loss": 1.6146, "step": 394 }, { "epoch": 0.010142492389118647, "grad_norm": 1.078125, "learning_rate": 0.00019969254718437974, "loss": 1.7426, "step": 395 }, { "epoch": 0.010168169585040465, "grad_norm": 1.15625, "learning_rate": 0.00019969219728973821, "loss": 1.7774, "step": 396 }, { "epoch": 0.010193846780962286, "grad_norm": 1.1640625, "learning_rate": 0.00019969184719641913, "loss": 1.7462, "step": 397 }, { "epoch": 0.010219523976884104, "grad_norm": 1.046875, "learning_rate": 0.00019969149690442323, "loss": 1.6467, "step": 398 }, { "epoch": 0.010245201172805923, "grad_norm": 1.046875, "learning_rate": 0.00019969114641375116, "loss": 1.7966, "step": 399 }, { "epoch": 0.010270878368727743, "grad_norm": 1.1953125, "learning_rate": 0.00019969079572440364, "loss": 1.7212, "step": 400 }, { "epoch": 0.010296555564649562, "grad_norm": 1.140625, "learning_rate": 0.00019969044483638136, "loss": 1.8428, "step": 401 }, { "epoch": 0.010322232760571383, "grad_norm": 1.1640625, "learning_rate": 0.00019969009374968505, "loss": 1.8797, "step": 402 }, { "epoch": 0.010347909956493201, "grad_norm": 1.0859375, "learning_rate": 0.0001996897424643154, "loss": 1.7703, "step": 403 }, { "epoch": 0.01037358715241502, "grad_norm": 1.1640625, "learning_rate": 0.00019968939098027305, "loss": 1.6691, "step": 404 }, { "epoch": 0.01039926434833684, "grad_norm": 1.1015625, "learning_rate": 0.00019968903929755877, "loss": 1.7057, "step": 405 }, { "epoch": 0.010424941544258659, "grad_norm": 1.1796875, "learning_rate": 0.00019968868741617325, "loss": 1.583, "step": 406 }, { "epoch": 0.010450618740180478, "grad_norm": 1.1484375, "learning_rate": 0.00019968833533611714, "loss": 1.6487, "step": 407 }, { "epoch": 0.010476295936102298, "grad_norm": 1.1015625, "learning_rate": 0.00019968798305739123, "loss": 1.6445, "step": 408 }, { "epoch": 0.010501973132024117, "grad_norm": 1.09375, "learning_rate": 0.00019968763057999617, "loss": 1.7771, "step": 409 }, { "epoch": 0.010527650327945937, "grad_norm": 1.125, "learning_rate": 0.00019968727790393266, "loss": 1.7675, "step": 410 }, { "epoch": 0.010553327523867756, "grad_norm": 1.0234375, "learning_rate": 0.0001996869250292014, "loss": 1.6663, "step": 411 }, { "epoch": 0.010579004719789575, "grad_norm": 1.1953125, "learning_rate": 0.0001996865719558031, "loss": 1.7841, "step": 412 }, { "epoch": 0.010604681915711395, "grad_norm": 1.15625, "learning_rate": 0.0001996862186837385, "loss": 1.8568, "step": 413 }, { "epoch": 0.010630359111633214, "grad_norm": 1.1875, "learning_rate": 0.00019968586521300824, "loss": 1.8833, "step": 414 }, { "epoch": 0.010656036307555034, "grad_norm": 1.15625, "learning_rate": 0.00019968551154361306, "loss": 1.8665, "step": 415 }, { "epoch": 0.010681713503476853, "grad_norm": 1.1171875, "learning_rate": 0.00019968515767555367, "loss": 1.688, "step": 416 }, { "epoch": 0.010707390699398672, "grad_norm": 1.1171875, "learning_rate": 0.00019968480360883076, "loss": 1.7266, "step": 417 }, { "epoch": 0.010733067895320492, "grad_norm": 1.15625, "learning_rate": 0.00019968444934344504, "loss": 1.6336, "step": 418 }, { "epoch": 0.01075874509124231, "grad_norm": 1.0546875, "learning_rate": 0.00019968409487939722, "loss": 1.5974, "step": 419 }, { "epoch": 0.01078442228716413, "grad_norm": 1.140625, "learning_rate": 0.000199683740216688, "loss": 1.6831, "step": 420 }, { "epoch": 0.01081009948308595, "grad_norm": 1.1015625, "learning_rate": 0.00019968338535531808, "loss": 1.5546, "step": 421 }, { "epoch": 0.010835776679007769, "grad_norm": 1.125, "learning_rate": 0.00019968303029528818, "loss": 1.8141, "step": 422 }, { "epoch": 0.010861453874929589, "grad_norm": 1.0703125, "learning_rate": 0.00019968267503659904, "loss": 1.7929, "step": 423 }, { "epoch": 0.010887131070851408, "grad_norm": 1.1015625, "learning_rate": 0.0001996823195792513, "loss": 1.8732, "step": 424 }, { "epoch": 0.010912808266773226, "grad_norm": 1.1875, "learning_rate": 0.00019968196392324567, "loss": 1.7545, "step": 425 }, { "epoch": 0.010938485462695047, "grad_norm": 1.1484375, "learning_rate": 0.00019968160806858295, "loss": 1.7932, "step": 426 }, { "epoch": 0.010964162658616865, "grad_norm": 1.171875, "learning_rate": 0.00019968125201526377, "loss": 1.7704, "step": 427 }, { "epoch": 0.010989839854538686, "grad_norm": 1.2109375, "learning_rate": 0.00019968089576328883, "loss": 1.7187, "step": 428 }, { "epoch": 0.011015517050460505, "grad_norm": 1.109375, "learning_rate": 0.00019968053931265892, "loss": 1.4602, "step": 429 }, { "epoch": 0.011041194246382323, "grad_norm": 1.2109375, "learning_rate": 0.00019968018266337465, "loss": 1.7117, "step": 430 }, { "epoch": 0.011066871442304144, "grad_norm": 1.1953125, "learning_rate": 0.00019967982581543684, "loss": 1.7176, "step": 431 }, { "epoch": 0.011092548638225962, "grad_norm": 1.171875, "learning_rate": 0.00019967946876884608, "loss": 1.7788, "step": 432 }, { "epoch": 0.011118225834147781, "grad_norm": 1.1015625, "learning_rate": 0.00019967911152360317, "loss": 1.7759, "step": 433 }, { "epoch": 0.011143903030069601, "grad_norm": 1.0546875, "learning_rate": 0.00019967875407970879, "loss": 1.5139, "step": 434 }, { "epoch": 0.01116958022599142, "grad_norm": 1.203125, "learning_rate": 0.00019967839643716365, "loss": 1.7944, "step": 435 }, { "epoch": 0.01119525742191324, "grad_norm": 1.1328125, "learning_rate": 0.0001996780385959685, "loss": 1.6012, "step": 436 }, { "epoch": 0.01122093461783506, "grad_norm": 1.1328125, "learning_rate": 0.00019967768055612398, "loss": 1.8449, "step": 437 }, { "epoch": 0.011246611813756878, "grad_norm": 1.140625, "learning_rate": 0.00019967732231763088, "loss": 1.5959, "step": 438 }, { "epoch": 0.011272289009678698, "grad_norm": 1.140625, "learning_rate": 0.00019967696388048986, "loss": 1.6453, "step": 439 }, { "epoch": 0.011297966205600517, "grad_norm": 1.1640625, "learning_rate": 0.00019967660524470166, "loss": 1.634, "step": 440 }, { "epoch": 0.011323643401522338, "grad_norm": 1.21875, "learning_rate": 0.000199676246410267, "loss": 1.671, "step": 441 }, { "epoch": 0.011349320597444156, "grad_norm": 1.1640625, "learning_rate": 0.00019967588737718658, "loss": 1.7569, "step": 442 }, { "epoch": 0.011374997793365975, "grad_norm": 1.0625, "learning_rate": 0.0001996755281454611, "loss": 1.7299, "step": 443 }, { "epoch": 0.011400674989287795, "grad_norm": 1.09375, "learning_rate": 0.00019967516871509134, "loss": 1.7621, "step": 444 }, { "epoch": 0.011426352185209614, "grad_norm": 1.0546875, "learning_rate": 0.00019967480908607792, "loss": 1.5903, "step": 445 }, { "epoch": 0.011452029381131433, "grad_norm": 1.0390625, "learning_rate": 0.00019967444925842164, "loss": 1.7768, "step": 446 }, { "epoch": 0.011477706577053253, "grad_norm": 1.2265625, "learning_rate": 0.00019967408923212318, "loss": 1.7113, "step": 447 }, { "epoch": 0.011503383772975072, "grad_norm": 1.1171875, "learning_rate": 0.00019967372900718327, "loss": 1.5659, "step": 448 }, { "epoch": 0.011529060968896892, "grad_norm": 1.140625, "learning_rate": 0.00019967336858360261, "loss": 1.7802, "step": 449 }, { "epoch": 0.011554738164818711, "grad_norm": 1.1328125, "learning_rate": 0.00019967300796138194, "loss": 1.7782, "step": 450 }, { "epoch": 0.01158041536074053, "grad_norm": 1.1484375, "learning_rate": 0.00019967264714052196, "loss": 1.7433, "step": 451 }, { "epoch": 0.01160609255666235, "grad_norm": 1.203125, "learning_rate": 0.0001996722861210234, "loss": 1.7191, "step": 452 }, { "epoch": 0.011631769752584169, "grad_norm": 1.203125, "learning_rate": 0.00019967192490288698, "loss": 1.7041, "step": 453 }, { "epoch": 0.01165744694850599, "grad_norm": 1.140625, "learning_rate": 0.00019967156348611343, "loss": 1.7039, "step": 454 }, { "epoch": 0.011683124144427808, "grad_norm": 1.0859375, "learning_rate": 0.00019967120187070343, "loss": 1.6203, "step": 455 }, { "epoch": 0.011708801340349627, "grad_norm": 1.0546875, "learning_rate": 0.00019967084005665774, "loss": 1.7507, "step": 456 }, { "epoch": 0.011734478536271447, "grad_norm": 1.1328125, "learning_rate": 0.0001996704780439771, "loss": 1.6025, "step": 457 }, { "epoch": 0.011760155732193266, "grad_norm": 1.1953125, "learning_rate": 0.00019967011583266216, "loss": 1.646, "step": 458 }, { "epoch": 0.011785832928115084, "grad_norm": 1.2578125, "learning_rate": 0.00019966975342271368, "loss": 1.6973, "step": 459 }, { "epoch": 0.011811510124036905, "grad_norm": 1.1640625, "learning_rate": 0.00019966939081413241, "loss": 1.6053, "step": 460 }, { "epoch": 0.011837187319958723, "grad_norm": 1.125, "learning_rate": 0.00019966902800691902, "loss": 1.5869, "step": 461 }, { "epoch": 0.011862864515880544, "grad_norm": 1.1484375, "learning_rate": 0.0001996686650010743, "loss": 1.7187, "step": 462 }, { "epoch": 0.011888541711802363, "grad_norm": 1.0546875, "learning_rate": 0.00019966830179659887, "loss": 1.839, "step": 463 }, { "epoch": 0.011914218907724181, "grad_norm": 1.1328125, "learning_rate": 0.00019966793839349358, "loss": 1.7399, "step": 464 }, { "epoch": 0.011939896103646002, "grad_norm": 1.0625, "learning_rate": 0.00019966757479175905, "loss": 1.5158, "step": 465 }, { "epoch": 0.01196557329956782, "grad_norm": 1.0703125, "learning_rate": 0.0001996672109913961, "loss": 1.6339, "step": 466 }, { "epoch": 0.01199125049548964, "grad_norm": 1.0859375, "learning_rate": 0.00019966684699240534, "loss": 1.7077, "step": 467 }, { "epoch": 0.01201692769141146, "grad_norm": 1.1015625, "learning_rate": 0.00019966648279478758, "loss": 1.7155, "step": 468 }, { "epoch": 0.012042604887333278, "grad_norm": 1.0625, "learning_rate": 0.00019966611839854356, "loss": 1.6848, "step": 469 }, { "epoch": 0.012068282083255099, "grad_norm": 1.203125, "learning_rate": 0.0001996657538036739, "loss": 1.6422, "step": 470 }, { "epoch": 0.012093959279176917, "grad_norm": 1.1015625, "learning_rate": 0.00019966538901017945, "loss": 1.8272, "step": 471 }, { "epoch": 0.012119636475098736, "grad_norm": 1.1015625, "learning_rate": 0.00019966502401806086, "loss": 1.5085, "step": 472 }, { "epoch": 0.012145313671020556, "grad_norm": 1.0625, "learning_rate": 0.00019966465882731885, "loss": 1.7875, "step": 473 }, { "epoch": 0.012170990866942375, "grad_norm": 1.234375, "learning_rate": 0.00019966429343795422, "loss": 1.8757, "step": 474 }, { "epoch": 0.012196668062864196, "grad_norm": 1.1484375, "learning_rate": 0.00019966392784996766, "loss": 1.7758, "step": 475 }, { "epoch": 0.012222345258786014, "grad_norm": 1.046875, "learning_rate": 0.00019966356206335987, "loss": 1.6779, "step": 476 }, { "epoch": 0.012248022454707833, "grad_norm": 1.0546875, "learning_rate": 0.0001996631960781316, "loss": 1.7749, "step": 477 }, { "epoch": 0.012273699650629653, "grad_norm": 1.140625, "learning_rate": 0.0001996628298942836, "loss": 1.8974, "step": 478 }, { "epoch": 0.012299376846551472, "grad_norm": 1.171875, "learning_rate": 0.00019966246351181658, "loss": 1.6213, "step": 479 }, { "epoch": 0.012325054042473292, "grad_norm": 1.109375, "learning_rate": 0.0001996620969307313, "loss": 1.6703, "step": 480 }, { "epoch": 0.012350731238395111, "grad_norm": 1.1484375, "learning_rate": 0.0001996617301510284, "loss": 1.7559, "step": 481 }, { "epoch": 0.01237640843431693, "grad_norm": 1.0, "learning_rate": 0.00019966136317270871, "loss": 1.6991, "step": 482 }, { "epoch": 0.01240208563023875, "grad_norm": 1.0625, "learning_rate": 0.0001996609959957729, "loss": 1.8081, "step": 483 }, { "epoch": 0.012427762826160569, "grad_norm": 1.203125, "learning_rate": 0.0001996606286202218, "loss": 1.85, "step": 484 }, { "epoch": 0.012453440022082388, "grad_norm": 1.0390625, "learning_rate": 0.00019966026104605602, "loss": 1.5634, "step": 485 }, { "epoch": 0.012479117218004208, "grad_norm": 1.140625, "learning_rate": 0.00019965989327327632, "loss": 1.7484, "step": 486 }, { "epoch": 0.012504794413926027, "grad_norm": 1.140625, "learning_rate": 0.0001996595253018835, "loss": 1.5248, "step": 487 }, { "epoch": 0.012530471609847847, "grad_norm": 1.09375, "learning_rate": 0.0001996591571318782, "loss": 1.6394, "step": 488 }, { "epoch": 0.012556148805769666, "grad_norm": 1.1328125, "learning_rate": 0.00019965878876326124, "loss": 1.6277, "step": 489 }, { "epoch": 0.012581826001691485, "grad_norm": 1.0859375, "learning_rate": 0.00019965842019603331, "loss": 1.6765, "step": 490 }, { "epoch": 0.012607503197613305, "grad_norm": 1.0859375, "learning_rate": 0.00019965805143019516, "loss": 1.5707, "step": 491 }, { "epoch": 0.012633180393535124, "grad_norm": 1.140625, "learning_rate": 0.0001996576824657475, "loss": 1.7858, "step": 492 }, { "epoch": 0.012658857589456944, "grad_norm": 1.125, "learning_rate": 0.00019965731330269106, "loss": 1.6784, "step": 493 }, { "epoch": 0.012684534785378763, "grad_norm": 1.03125, "learning_rate": 0.00019965694394102662, "loss": 1.7451, "step": 494 }, { "epoch": 0.012710211981300581, "grad_norm": 1.109375, "learning_rate": 0.0001996565743807549, "loss": 1.6593, "step": 495 }, { "epoch": 0.012735889177222402, "grad_norm": 1.0703125, "learning_rate": 0.0001996562046218766, "loss": 1.5761, "step": 496 }, { "epoch": 0.01276156637314422, "grad_norm": 1.15625, "learning_rate": 0.00019965583466439253, "loss": 1.5775, "step": 497 }, { "epoch": 0.01278724356906604, "grad_norm": 1.2265625, "learning_rate": 0.00019965546450830335, "loss": 1.782, "step": 498 }, { "epoch": 0.01281292076498786, "grad_norm": 1.0546875, "learning_rate": 0.00019965509415360985, "loss": 1.6366, "step": 499 }, { "epoch": 0.012838597960909678, "grad_norm": 1.125, "learning_rate": 0.00019965472360031274, "loss": 1.6308, "step": 500 }, { "epoch": 0.012864275156831499, "grad_norm": 1.2265625, "learning_rate": 0.00019965435284841277, "loss": 1.7294, "step": 501 }, { "epoch": 0.012889952352753318, "grad_norm": 1.1484375, "learning_rate": 0.00019965398189791066, "loss": 1.8646, "step": 502 }, { "epoch": 0.012915629548675136, "grad_norm": 1.1484375, "learning_rate": 0.00019965361074880718, "loss": 1.6965, "step": 503 }, { "epoch": 0.012941306744596957, "grad_norm": 1.1015625, "learning_rate": 0.00019965323940110308, "loss": 1.588, "step": 504 }, { "epoch": 0.012966983940518775, "grad_norm": 1.1484375, "learning_rate": 0.00019965286785479904, "loss": 1.6604, "step": 505 }, { "epoch": 0.012992661136440596, "grad_norm": 1.234375, "learning_rate": 0.00019965249610989582, "loss": 1.738, "step": 506 }, { "epoch": 0.013018338332362414, "grad_norm": 1.0546875, "learning_rate": 0.00019965212416639424, "loss": 1.5191, "step": 507 }, { "epoch": 0.013044015528284233, "grad_norm": 1.2421875, "learning_rate": 0.0001996517520242949, "loss": 1.7419, "step": 508 }, { "epoch": 0.013069692724206054, "grad_norm": 1.0859375, "learning_rate": 0.00019965137968359868, "loss": 1.5536, "step": 509 }, { "epoch": 0.013095369920127872, "grad_norm": 1.109375, "learning_rate": 0.00019965100714430624, "loss": 1.7777, "step": 510 }, { "epoch": 0.013121047116049691, "grad_norm": 1.1328125, "learning_rate": 0.00019965063440641835, "loss": 1.6255, "step": 511 }, { "epoch": 0.013146724311971511, "grad_norm": 1.1875, "learning_rate": 0.00019965026146993574, "loss": 1.8239, "step": 512 }, { "epoch": 0.01317240150789333, "grad_norm": 1.15625, "learning_rate": 0.00019964988833485916, "loss": 1.6318, "step": 513 }, { "epoch": 0.01319807870381515, "grad_norm": 1.1953125, "learning_rate": 0.00019964951500118936, "loss": 1.8425, "step": 514 }, { "epoch": 0.01322375589973697, "grad_norm": 1.1484375, "learning_rate": 0.00019964914146892708, "loss": 1.874, "step": 515 }, { "epoch": 0.013249433095658788, "grad_norm": 1.15625, "learning_rate": 0.00019964876773807306, "loss": 1.6595, "step": 516 }, { "epoch": 0.013275110291580608, "grad_norm": 1.078125, "learning_rate": 0.00019964839380862807, "loss": 1.826, "step": 517 }, { "epoch": 0.013300787487502427, "grad_norm": 1.03125, "learning_rate": 0.0001996480196805928, "loss": 1.6304, "step": 518 }, { "epoch": 0.013326464683424247, "grad_norm": 1.0, "learning_rate": 0.00019964764535396803, "loss": 1.4722, "step": 519 }, { "epoch": 0.013352141879346066, "grad_norm": 1.046875, "learning_rate": 0.0001996472708287545, "loss": 1.6412, "step": 520 }, { "epoch": 0.013377819075267885, "grad_norm": 1.046875, "learning_rate": 0.00019964689610495298, "loss": 1.6124, "step": 521 }, { "epoch": 0.013403496271189705, "grad_norm": 1.078125, "learning_rate": 0.00019964652118256417, "loss": 1.6357, "step": 522 }, { "epoch": 0.013429173467111524, "grad_norm": 1.1328125, "learning_rate": 0.00019964614606158887, "loss": 1.6674, "step": 523 }, { "epoch": 0.013454850663033343, "grad_norm": 1.125, "learning_rate": 0.0001996457707420278, "loss": 1.8987, "step": 524 }, { "epoch": 0.013480527858955163, "grad_norm": 1.0859375, "learning_rate": 0.0001996453952238817, "loss": 1.7633, "step": 525 }, { "epoch": 0.013506205054876982, "grad_norm": 1.1796875, "learning_rate": 0.00019964501950715132, "loss": 1.731, "step": 526 }, { "epoch": 0.013531882250798802, "grad_norm": 1.046875, "learning_rate": 0.00019964464359183745, "loss": 1.5419, "step": 527 }, { "epoch": 0.01355755944672062, "grad_norm": 1.1328125, "learning_rate": 0.00019964426747794077, "loss": 1.7823, "step": 528 }, { "epoch": 0.01358323664264244, "grad_norm": 1.0625, "learning_rate": 0.00019964389116546206, "loss": 1.6951, "step": 529 }, { "epoch": 0.01360891383856426, "grad_norm": 1.03125, "learning_rate": 0.00019964351465440213, "loss": 1.5224, "step": 530 }, { "epoch": 0.013634591034486079, "grad_norm": 1.046875, "learning_rate": 0.00019964313794476165, "loss": 1.677, "step": 531 }, { "epoch": 0.013660268230407899, "grad_norm": 1.1171875, "learning_rate": 0.00019964276103654136, "loss": 1.6735, "step": 532 }, { "epoch": 0.013685945426329718, "grad_norm": 1.109375, "learning_rate": 0.0001996423839297421, "loss": 1.9353, "step": 533 }, { "epoch": 0.013711622622251536, "grad_norm": 1.125, "learning_rate": 0.00019964200662436453, "loss": 1.4975, "step": 534 }, { "epoch": 0.013737299818173357, "grad_norm": 1.046875, "learning_rate": 0.00019964162912040946, "loss": 1.5632, "step": 535 }, { "epoch": 0.013762977014095176, "grad_norm": 1.09375, "learning_rate": 0.00019964125141787764, "loss": 1.719, "step": 536 }, { "epoch": 0.013788654210016994, "grad_norm": 1.0625, "learning_rate": 0.00019964087351676977, "loss": 1.705, "step": 537 }, { "epoch": 0.013814331405938815, "grad_norm": 1.109375, "learning_rate": 0.00019964049541708665, "loss": 1.7028, "step": 538 }, { "epoch": 0.013840008601860633, "grad_norm": 1.1015625, "learning_rate": 0.00019964011711882905, "loss": 1.5913, "step": 539 }, { "epoch": 0.013865685797782454, "grad_norm": 1.1171875, "learning_rate": 0.00019963973862199768, "loss": 1.6848, "step": 540 }, { "epoch": 0.013891362993704272, "grad_norm": 1.09375, "learning_rate": 0.0001996393599265933, "loss": 1.6935, "step": 541 }, { "epoch": 0.013917040189626091, "grad_norm": 1.09375, "learning_rate": 0.0001996389810326167, "loss": 1.5764, "step": 542 }, { "epoch": 0.013942717385547912, "grad_norm": 1.0703125, "learning_rate": 0.0001996386019400686, "loss": 1.7204, "step": 543 }, { "epoch": 0.01396839458146973, "grad_norm": 1.0390625, "learning_rate": 0.00019963822264894976, "loss": 1.6484, "step": 544 }, { "epoch": 0.01399407177739155, "grad_norm": 1.203125, "learning_rate": 0.00019963784315926093, "loss": 1.575, "step": 545 }, { "epoch": 0.01401974897331337, "grad_norm": 1.0703125, "learning_rate": 0.00019963746347100288, "loss": 1.5641, "step": 546 }, { "epoch": 0.014045426169235188, "grad_norm": 1.0390625, "learning_rate": 0.0001996370835841764, "loss": 1.5865, "step": 547 }, { "epoch": 0.014071103365157009, "grad_norm": 1.109375, "learning_rate": 0.00019963670349878218, "loss": 1.7775, "step": 548 }, { "epoch": 0.014096780561078827, "grad_norm": 1.046875, "learning_rate": 0.000199636323214821, "loss": 1.706, "step": 549 }, { "epoch": 0.014122457757000646, "grad_norm": 1.03125, "learning_rate": 0.00019963594273229366, "loss": 1.7055, "step": 550 }, { "epoch": 0.014148134952922466, "grad_norm": 1.0859375, "learning_rate": 0.0001996355620512009, "loss": 1.4927, "step": 551 }, { "epoch": 0.014173812148844285, "grad_norm": 1.0703125, "learning_rate": 0.00019963518117154343, "loss": 1.5922, "step": 552 }, { "epoch": 0.014199489344766105, "grad_norm": 1.15625, "learning_rate": 0.00019963480009332205, "loss": 1.5685, "step": 553 }, { "epoch": 0.014225166540687924, "grad_norm": 1.0625, "learning_rate": 0.00019963441881653754, "loss": 1.6281, "step": 554 }, { "epoch": 0.014250843736609743, "grad_norm": 1.171875, "learning_rate": 0.0001996340373411906, "loss": 1.6265, "step": 555 }, { "epoch": 0.014276520932531563, "grad_norm": 1.0625, "learning_rate": 0.00019963365566728203, "loss": 1.6162, "step": 556 }, { "epoch": 0.014302198128453382, "grad_norm": 1.1015625, "learning_rate": 0.0001996332737948126, "loss": 1.4966, "step": 557 }, { "epoch": 0.014327875324375202, "grad_norm": 1.0546875, "learning_rate": 0.00019963289172378305, "loss": 1.4199, "step": 558 }, { "epoch": 0.014353552520297021, "grad_norm": 1.1796875, "learning_rate": 0.00019963250945419415, "loss": 1.7109, "step": 559 }, { "epoch": 0.01437922971621884, "grad_norm": 1.2109375, "learning_rate": 0.00019963212698604664, "loss": 1.6364, "step": 560 }, { "epoch": 0.01440490691214066, "grad_norm": 1.1328125, "learning_rate": 0.0001996317443193413, "loss": 1.6938, "step": 561 }, { "epoch": 0.014430584108062479, "grad_norm": 1.109375, "learning_rate": 0.00019963136145407893, "loss": 1.7324, "step": 562 }, { "epoch": 0.014456261303984298, "grad_norm": 1.0390625, "learning_rate": 0.0001996309783902602, "loss": 1.534, "step": 563 }, { "epoch": 0.014481938499906118, "grad_norm": 1.0, "learning_rate": 0.00019963059512788598, "loss": 1.5947, "step": 564 }, { "epoch": 0.014507615695827937, "grad_norm": 1.09375, "learning_rate": 0.00019963021166695698, "loss": 1.7233, "step": 565 }, { "epoch": 0.014533292891749757, "grad_norm": 1.046875, "learning_rate": 0.00019962982800747394, "loss": 1.6019, "step": 566 }, { "epoch": 0.014558970087671576, "grad_norm": 1.0625, "learning_rate": 0.00019962944414943766, "loss": 1.759, "step": 567 }, { "epoch": 0.014584647283593394, "grad_norm": 1.0390625, "learning_rate": 0.0001996290600928489, "loss": 1.6707, "step": 568 }, { "epoch": 0.014610324479515215, "grad_norm": 1.046875, "learning_rate": 0.00019962867583770842, "loss": 1.6581, "step": 569 }, { "epoch": 0.014636001675437034, "grad_norm": 1.03125, "learning_rate": 0.00019962829138401697, "loss": 1.6053, "step": 570 }, { "epoch": 0.014661678871358854, "grad_norm": 1.078125, "learning_rate": 0.00019962790673177535, "loss": 1.6717, "step": 571 }, { "epoch": 0.014687356067280673, "grad_norm": 1.1171875, "learning_rate": 0.00019962752188098436, "loss": 1.6913, "step": 572 }, { "epoch": 0.014713033263202491, "grad_norm": 1.1015625, "learning_rate": 0.00019962713683164464, "loss": 1.534, "step": 573 }, { "epoch": 0.014738710459124312, "grad_norm": 1.1171875, "learning_rate": 0.0001996267515837571, "loss": 1.5554, "step": 574 }, { "epoch": 0.01476438765504613, "grad_norm": 1.0703125, "learning_rate": 0.00019962636613732242, "loss": 1.6612, "step": 575 }, { "epoch": 0.01479006485096795, "grad_norm": 1.265625, "learning_rate": 0.00019962598049234136, "loss": 1.5195, "step": 576 }, { "epoch": 0.01481574204688977, "grad_norm": 1.0625, "learning_rate": 0.00019962559464881477, "loss": 1.7079, "step": 577 }, { "epoch": 0.014841419242811588, "grad_norm": 1.1640625, "learning_rate": 0.00019962520860674333, "loss": 1.7328, "step": 578 }, { "epoch": 0.014867096438733409, "grad_norm": 1.0859375, "learning_rate": 0.00019962482236612788, "loss": 1.7201, "step": 579 }, { "epoch": 0.014892773634655227, "grad_norm": 1.015625, "learning_rate": 0.00019962443592696914, "loss": 1.6359, "step": 580 }, { "epoch": 0.014918450830577046, "grad_norm": 1.2421875, "learning_rate": 0.0001996240492892679, "loss": 1.496, "step": 581 }, { "epoch": 0.014944128026498867, "grad_norm": 1.078125, "learning_rate": 0.0001996236624530249, "loss": 1.7146, "step": 582 }, { "epoch": 0.014969805222420685, "grad_norm": 1.1171875, "learning_rate": 0.00019962327541824098, "loss": 1.7726, "step": 583 }, { "epoch": 0.014995482418342506, "grad_norm": 1.1328125, "learning_rate": 0.00019962288818491688, "loss": 1.5442, "step": 584 }, { "epoch": 0.015021159614264324, "grad_norm": 1.125, "learning_rate": 0.00019962250075305333, "loss": 1.4369, "step": 585 }, { "epoch": 0.015046836810186143, "grad_norm": 1.0859375, "learning_rate": 0.00019962211312265113, "loss": 1.7224, "step": 586 }, { "epoch": 0.015072514006107963, "grad_norm": 1.125, "learning_rate": 0.0001996217252937111, "loss": 1.5585, "step": 587 }, { "epoch": 0.015098191202029782, "grad_norm": 1.0546875, "learning_rate": 0.00019962133726623393, "loss": 1.8022, "step": 588 }, { "epoch": 0.015123868397951601, "grad_norm": 1.4296875, "learning_rate": 0.00019962094904022045, "loss": 1.4852, "step": 589 }, { "epoch": 0.015149545593873421, "grad_norm": 1.1328125, "learning_rate": 0.00019962056061567142, "loss": 1.597, "step": 590 }, { "epoch": 0.01517522278979524, "grad_norm": 1.078125, "learning_rate": 0.0001996201719925876, "loss": 1.6516, "step": 591 }, { "epoch": 0.01520089998571706, "grad_norm": 1.078125, "learning_rate": 0.00019961978317096978, "loss": 1.6635, "step": 592 }, { "epoch": 0.015226577181638879, "grad_norm": 1.1640625, "learning_rate": 0.00019961939415081873, "loss": 1.6924, "step": 593 }, { "epoch": 0.015252254377560698, "grad_norm": 1.0859375, "learning_rate": 0.00019961900493213524, "loss": 1.5009, "step": 594 }, { "epoch": 0.015277931573482518, "grad_norm": 1.0703125, "learning_rate": 0.00019961861551492008, "loss": 1.6952, "step": 595 }, { "epoch": 0.015303608769404337, "grad_norm": 1.09375, "learning_rate": 0.00019961822589917397, "loss": 1.4915, "step": 596 }, { "epoch": 0.015329285965326157, "grad_norm": 0.9609375, "learning_rate": 0.0001996178360848978, "loss": 1.505, "step": 597 }, { "epoch": 0.015354963161247976, "grad_norm": 1.0859375, "learning_rate": 0.00019961744607209224, "loss": 1.7452, "step": 598 }, { "epoch": 0.015380640357169795, "grad_norm": 1.1875, "learning_rate": 0.0001996170558607581, "loss": 1.5945, "step": 599 }, { "epoch": 0.015406317553091615, "grad_norm": 1.0390625, "learning_rate": 0.0001996166654508962, "loss": 1.625, "step": 600 }, { "epoch": 0.015431994749013434, "grad_norm": 1.2109375, "learning_rate": 0.00019961627484250724, "loss": 1.6757, "step": 601 }, { "epoch": 0.015457671944935253, "grad_norm": 1.109375, "learning_rate": 0.0001996158840355921, "loss": 1.6766, "step": 602 }, { "epoch": 0.015483349140857073, "grad_norm": 1.0625, "learning_rate": 0.00019961549303015145, "loss": 1.7553, "step": 603 }, { "epoch": 0.015509026336778892, "grad_norm": 1.0234375, "learning_rate": 0.00019961510182618612, "loss": 1.5654, "step": 604 }, { "epoch": 0.015534703532700712, "grad_norm": 1.0390625, "learning_rate": 0.0001996147104236969, "loss": 1.7169, "step": 605 }, { "epoch": 0.01556038072862253, "grad_norm": 1.15625, "learning_rate": 0.00019961431882268457, "loss": 1.6962, "step": 606 }, { "epoch": 0.01558605792454435, "grad_norm": 1.09375, "learning_rate": 0.00019961392702314988, "loss": 1.6041, "step": 607 }, { "epoch": 0.01561173512046617, "grad_norm": 1.125, "learning_rate": 0.0001996135350250937, "loss": 1.7155, "step": 608 }, { "epoch": 0.01563741231638799, "grad_norm": 0.984375, "learning_rate": 0.00019961314282851666, "loss": 1.6277, "step": 609 }, { "epoch": 0.015663089512309807, "grad_norm": 1.0703125, "learning_rate": 0.00019961275043341967, "loss": 1.7286, "step": 610 }, { "epoch": 0.015688766708231626, "grad_norm": 1.15625, "learning_rate": 0.00019961235783980344, "loss": 1.7794, "step": 611 }, { "epoch": 0.015714443904153448, "grad_norm": 1.0390625, "learning_rate": 0.0001996119650476688, "loss": 1.6337, "step": 612 }, { "epoch": 0.015740121100075267, "grad_norm": 1.1015625, "learning_rate": 0.0001996115720570165, "loss": 1.5625, "step": 613 }, { "epoch": 0.015765798295997085, "grad_norm": 1.1171875, "learning_rate": 0.00019961117886784736, "loss": 1.6635, "step": 614 }, { "epoch": 0.015791475491918904, "grad_norm": 1.1328125, "learning_rate": 0.0001996107854801621, "loss": 1.6771, "step": 615 }, { "epoch": 0.015817152687840723, "grad_norm": 1.046875, "learning_rate": 0.00019961039189396159, "loss": 1.5333, "step": 616 }, { "epoch": 0.015842829883762545, "grad_norm": 1.0546875, "learning_rate": 0.00019960999810924652, "loss": 1.4451, "step": 617 }, { "epoch": 0.015868507079684364, "grad_norm": 1.0859375, "learning_rate": 0.00019960960412601772, "loss": 1.5741, "step": 618 }, { "epoch": 0.015894184275606182, "grad_norm": 1.1171875, "learning_rate": 0.000199609209944276, "loss": 1.632, "step": 619 }, { "epoch": 0.015919861471528, "grad_norm": 1.109375, "learning_rate": 0.00019960881556402212, "loss": 1.6803, "step": 620 }, { "epoch": 0.01594553866744982, "grad_norm": 1.1015625, "learning_rate": 0.00019960842098525687, "loss": 1.5647, "step": 621 }, { "epoch": 0.015971215863371642, "grad_norm": 1.0859375, "learning_rate": 0.00019960802620798103, "loss": 1.7056, "step": 622 }, { "epoch": 0.01599689305929346, "grad_norm": 1.0390625, "learning_rate": 0.0001996076312321954, "loss": 1.4768, "step": 623 }, { "epoch": 0.01602257025521528, "grad_norm": 1.109375, "learning_rate": 0.00019960723605790074, "loss": 1.6978, "step": 624 }, { "epoch": 0.016048247451137098, "grad_norm": 1.1171875, "learning_rate": 0.00019960684068509786, "loss": 1.5838, "step": 625 }, { "epoch": 0.016073924647058917, "grad_norm": 1.1015625, "learning_rate": 0.00019960644511378756, "loss": 1.6863, "step": 626 }, { "epoch": 0.01609960184298074, "grad_norm": 1.0546875, "learning_rate": 0.0001996060493439706, "loss": 1.6294, "step": 627 }, { "epoch": 0.016125279038902558, "grad_norm": 1.046875, "learning_rate": 0.00019960565337564777, "loss": 1.6868, "step": 628 }, { "epoch": 0.016150956234824376, "grad_norm": 1.0859375, "learning_rate": 0.0001996052572088199, "loss": 1.5448, "step": 629 }, { "epoch": 0.016176633430746195, "grad_norm": 1.0703125, "learning_rate": 0.00019960486084348773, "loss": 1.6703, "step": 630 }, { "epoch": 0.016202310626668014, "grad_norm": 1.0859375, "learning_rate": 0.00019960446427965206, "loss": 1.6312, "step": 631 }, { "epoch": 0.016227987822589836, "grad_norm": 1.15625, "learning_rate": 0.00019960406751731372, "loss": 1.5482, "step": 632 }, { "epoch": 0.016253665018511654, "grad_norm": 1.09375, "learning_rate": 0.00019960367055647343, "loss": 1.5546, "step": 633 }, { "epoch": 0.016279342214433473, "grad_norm": 1.1875, "learning_rate": 0.00019960327339713205, "loss": 1.6941, "step": 634 }, { "epoch": 0.016305019410355292, "grad_norm": 1.0, "learning_rate": 0.00019960287603929032, "loss": 1.3535, "step": 635 }, { "epoch": 0.01633069660627711, "grad_norm": 1.0546875, "learning_rate": 0.0001996024784829491, "loss": 1.474, "step": 636 }, { "epoch": 0.01635637380219893, "grad_norm": 1.0390625, "learning_rate": 0.0001996020807281091, "loss": 1.3116, "step": 637 }, { "epoch": 0.01638205099812075, "grad_norm": 1.0859375, "learning_rate": 0.00019960168277477117, "loss": 1.3909, "step": 638 }, { "epoch": 0.01640772819404257, "grad_norm": 1.0859375, "learning_rate": 0.00019960128462293608, "loss": 1.5804, "step": 639 }, { "epoch": 0.01643340538996439, "grad_norm": 1.1171875, "learning_rate": 0.0001996008862726046, "loss": 1.6854, "step": 640 }, { "epoch": 0.016459082585886207, "grad_norm": 1.09375, "learning_rate": 0.0001996004877237776, "loss": 1.7849, "step": 641 }, { "epoch": 0.016484759781808026, "grad_norm": 1.0234375, "learning_rate": 0.00019960008897645576, "loss": 1.5514, "step": 642 }, { "epoch": 0.01651043697772985, "grad_norm": 1.1640625, "learning_rate": 0.00019959969003064, "loss": 1.4694, "step": 643 }, { "epoch": 0.016536114173651667, "grad_norm": 1.078125, "learning_rate": 0.00019959929088633104, "loss": 1.6664, "step": 644 }, { "epoch": 0.016561791369573486, "grad_norm": 1.03125, "learning_rate": 0.00019959889154352967, "loss": 1.5039, "step": 645 }, { "epoch": 0.016587468565495304, "grad_norm": 1.1015625, "learning_rate": 0.0001995984920022367, "loss": 1.6584, "step": 646 }, { "epoch": 0.016613145761417123, "grad_norm": 1.1171875, "learning_rate": 0.00019959809226245296, "loss": 1.3829, "step": 647 }, { "epoch": 0.016638822957338945, "grad_norm": 1.015625, "learning_rate": 0.00019959769232417922, "loss": 1.6191, "step": 648 }, { "epoch": 0.016664500153260764, "grad_norm": 0.96875, "learning_rate": 0.00019959729218741625, "loss": 1.5378, "step": 649 }, { "epoch": 0.016690177349182583, "grad_norm": 1.0625, "learning_rate": 0.0001995968918521649, "loss": 1.664, "step": 650 }, { "epoch": 0.0167158545451044, "grad_norm": 1.1171875, "learning_rate": 0.0001995964913184259, "loss": 1.4757, "step": 651 }, { "epoch": 0.01674153174102622, "grad_norm": 1.0625, "learning_rate": 0.00019959609058620013, "loss": 1.5318, "step": 652 }, { "epoch": 0.016767208936948042, "grad_norm": 0.9921875, "learning_rate": 0.00019959568965548835, "loss": 1.7507, "step": 653 }, { "epoch": 0.01679288613286986, "grad_norm": 1.2265625, "learning_rate": 0.00019959528852629132, "loss": 1.772, "step": 654 }, { "epoch": 0.01681856332879168, "grad_norm": 1.03125, "learning_rate": 0.0001995948871986099, "loss": 1.5272, "step": 655 }, { "epoch": 0.016844240524713498, "grad_norm": 1.09375, "learning_rate": 0.00019959448567244485, "loss": 1.705, "step": 656 }, { "epoch": 0.016869917720635317, "grad_norm": 1.0234375, "learning_rate": 0.000199594083947797, "loss": 1.7199, "step": 657 }, { "epoch": 0.01689559491655714, "grad_norm": 1.0546875, "learning_rate": 0.00019959368202466715, "loss": 1.5247, "step": 658 }, { "epoch": 0.016921272112478958, "grad_norm": 1.1640625, "learning_rate": 0.00019959327990305608, "loss": 1.7985, "step": 659 }, { "epoch": 0.016946949308400776, "grad_norm": 1.0234375, "learning_rate": 0.0001995928775829646, "loss": 1.5109, "step": 660 }, { "epoch": 0.016972626504322595, "grad_norm": 1.0, "learning_rate": 0.00019959247506439349, "loss": 1.4539, "step": 661 }, { "epoch": 0.016998303700244414, "grad_norm": 1.171875, "learning_rate": 0.0001995920723473436, "loss": 1.5993, "step": 662 }, { "epoch": 0.017023980896166233, "grad_norm": 1.0859375, "learning_rate": 0.0001995916694318157, "loss": 1.7412, "step": 663 }, { "epoch": 0.017049658092088055, "grad_norm": 1.078125, "learning_rate": 0.00019959126631781057, "loss": 1.4544, "step": 664 }, { "epoch": 0.017075335288009873, "grad_norm": 1.0703125, "learning_rate": 0.00019959086300532907, "loss": 1.5902, "step": 665 }, { "epoch": 0.017101012483931692, "grad_norm": 1.0390625, "learning_rate": 0.00019959045949437197, "loss": 1.5491, "step": 666 }, { "epoch": 0.01712668967985351, "grad_norm": 1.0390625, "learning_rate": 0.00019959005578494007, "loss": 1.7015, "step": 667 }, { "epoch": 0.01715236687577533, "grad_norm": 1.078125, "learning_rate": 0.00019958965187703422, "loss": 1.6744, "step": 668 }, { "epoch": 0.01717804407169715, "grad_norm": 1.0234375, "learning_rate": 0.00019958924777065516, "loss": 1.4934, "step": 669 }, { "epoch": 0.01720372126761897, "grad_norm": 0.99609375, "learning_rate": 0.00019958884346580374, "loss": 1.6639, "step": 670 }, { "epoch": 0.01722939846354079, "grad_norm": 1.2265625, "learning_rate": 0.00019958843896248073, "loss": 1.6003, "step": 671 }, { "epoch": 0.017255075659462608, "grad_norm": 1.109375, "learning_rate": 0.00019958803426068698, "loss": 1.5673, "step": 672 }, { "epoch": 0.017280752855384426, "grad_norm": 1.1015625, "learning_rate": 0.00019958762936042324, "loss": 1.7155, "step": 673 }, { "epoch": 0.01730643005130625, "grad_norm": 1.1484375, "learning_rate": 0.0001995872242616904, "loss": 1.8157, "step": 674 }, { "epoch": 0.017332107247228067, "grad_norm": 1.03125, "learning_rate": 0.00019958681896448918, "loss": 1.6662, "step": 675 }, { "epoch": 0.017357784443149886, "grad_norm": 1.1484375, "learning_rate": 0.00019958641346882043, "loss": 1.6613, "step": 676 }, { "epoch": 0.017383461639071705, "grad_norm": 1.1171875, "learning_rate": 0.00019958600777468497, "loss": 1.4046, "step": 677 }, { "epoch": 0.017409138834993523, "grad_norm": 1.0390625, "learning_rate": 0.0001995856018820836, "loss": 1.6197, "step": 678 }, { "epoch": 0.017434816030915345, "grad_norm": 1.0703125, "learning_rate": 0.0001995851957910171, "loss": 1.6473, "step": 679 }, { "epoch": 0.017460493226837164, "grad_norm": 1.2421875, "learning_rate": 0.0001995847895014863, "loss": 1.6546, "step": 680 }, { "epoch": 0.017486170422758983, "grad_norm": 1.0625, "learning_rate": 0.000199584383013492, "loss": 1.4897, "step": 681 }, { "epoch": 0.0175118476186808, "grad_norm": 1.0625, "learning_rate": 0.00019958397632703504, "loss": 1.4463, "step": 682 }, { "epoch": 0.01753752481460262, "grad_norm": 1.0859375, "learning_rate": 0.0001995835694421162, "loss": 1.6788, "step": 683 }, { "epoch": 0.017563202010524442, "grad_norm": 1.1875, "learning_rate": 0.00019958316235873632, "loss": 1.6394, "step": 684 }, { "epoch": 0.01758887920644626, "grad_norm": 1.0234375, "learning_rate": 0.00019958275507689618, "loss": 1.5224, "step": 685 }, { "epoch": 0.01761455640236808, "grad_norm": 1.0, "learning_rate": 0.0001995823475965966, "loss": 1.5324, "step": 686 }, { "epoch": 0.0176402335982899, "grad_norm": 1.1171875, "learning_rate": 0.00019958193991783838, "loss": 1.6197, "step": 687 }, { "epoch": 0.017665910794211717, "grad_norm": 1.109375, "learning_rate": 0.00019958153204062239, "loss": 1.5688, "step": 688 }, { "epoch": 0.017691587990133536, "grad_norm": 1.0390625, "learning_rate": 0.00019958112396494936, "loss": 1.4585, "step": 689 }, { "epoch": 0.017717265186055358, "grad_norm": 1.109375, "learning_rate": 0.00019958071569082018, "loss": 1.5427, "step": 690 }, { "epoch": 0.017742942381977177, "grad_norm": 1.0, "learning_rate": 0.00019958030721823562, "loss": 1.5665, "step": 691 }, { "epoch": 0.017768619577898995, "grad_norm": 0.91796875, "learning_rate": 0.00019957989854719647, "loss": 1.4858, "step": 692 }, { "epoch": 0.017794296773820814, "grad_norm": 1.0703125, "learning_rate": 0.0001995794896777036, "loss": 1.4827, "step": 693 }, { "epoch": 0.017819973969742633, "grad_norm": 1.078125, "learning_rate": 0.0001995790806097578, "loss": 1.626, "step": 694 }, { "epoch": 0.017845651165664455, "grad_norm": 1.0703125, "learning_rate": 0.00019957867134335992, "loss": 1.7552, "step": 695 }, { "epoch": 0.017871328361586274, "grad_norm": 1.1796875, "learning_rate": 0.0001995782618785107, "loss": 1.5762, "step": 696 }, { "epoch": 0.017897005557508092, "grad_norm": 1.140625, "learning_rate": 0.00019957785221521102, "loss": 1.7809, "step": 697 }, { "epoch": 0.01792268275342991, "grad_norm": 0.94140625, "learning_rate": 0.00019957744235346168, "loss": 1.4838, "step": 698 }, { "epoch": 0.01794835994935173, "grad_norm": 0.98046875, "learning_rate": 0.00019957703229326349, "loss": 1.429, "step": 699 }, { "epoch": 0.017974037145273552, "grad_norm": 1.0390625, "learning_rate": 0.00019957662203461726, "loss": 1.6377, "step": 700 }, { "epoch": 0.01799971434119537, "grad_norm": 1.0859375, "learning_rate": 0.00019957621157752379, "loss": 1.5479, "step": 701 }, { "epoch": 0.01802539153711719, "grad_norm": 1.0703125, "learning_rate": 0.00019957580092198397, "loss": 1.5915, "step": 702 }, { "epoch": 0.018051068733039008, "grad_norm": 1.0546875, "learning_rate": 0.00019957539006799856, "loss": 1.5117, "step": 703 }, { "epoch": 0.018076745928960827, "grad_norm": 1.1328125, "learning_rate": 0.0001995749790155684, "loss": 1.3898, "step": 704 }, { "epoch": 0.01810242312488265, "grad_norm": 1.09375, "learning_rate": 0.00019957456776469428, "loss": 1.6842, "step": 705 }, { "epoch": 0.018128100320804467, "grad_norm": 1.0859375, "learning_rate": 0.00019957415631537706, "loss": 1.6016, "step": 706 }, { "epoch": 0.018153777516726286, "grad_norm": 1.125, "learning_rate": 0.00019957374466761754, "loss": 1.4697, "step": 707 }, { "epoch": 0.018179454712648105, "grad_norm": 1.0546875, "learning_rate": 0.0001995733328214165, "loss": 1.7838, "step": 708 }, { "epoch": 0.018205131908569924, "grad_norm": 1.015625, "learning_rate": 0.00019957292077677486, "loss": 1.6722, "step": 709 }, { "epoch": 0.018230809104491742, "grad_norm": 1.0859375, "learning_rate": 0.00019957250853369334, "loss": 1.6596, "step": 710 }, { "epoch": 0.018256486300413564, "grad_norm": 1.2109375, "learning_rate": 0.00019957209609217284, "loss": 1.5763, "step": 711 }, { "epoch": 0.018282163496335383, "grad_norm": 0.98046875, "learning_rate": 0.0001995716834522141, "loss": 1.7074, "step": 712 }, { "epoch": 0.018307840692257202, "grad_norm": 1.0546875, "learning_rate": 0.00019957127061381803, "loss": 1.5918, "step": 713 }, { "epoch": 0.01833351788817902, "grad_norm": 0.9921875, "learning_rate": 0.0001995708575769854, "loss": 1.4897, "step": 714 }, { "epoch": 0.01835919508410084, "grad_norm": 1.09375, "learning_rate": 0.00019957044434171705, "loss": 1.6304, "step": 715 }, { "epoch": 0.01838487228002266, "grad_norm": 1.0, "learning_rate": 0.0001995700309080138, "loss": 1.5881, "step": 716 }, { "epoch": 0.01841054947594448, "grad_norm": 1.0859375, "learning_rate": 0.0001995696172758765, "loss": 1.7474, "step": 717 }, { "epoch": 0.0184362266718663, "grad_norm": 0.99609375, "learning_rate": 0.0001995692034453059, "loss": 1.4496, "step": 718 }, { "epoch": 0.018461903867788117, "grad_norm": 0.96484375, "learning_rate": 0.00019956878941630287, "loss": 1.4815, "step": 719 }, { "epoch": 0.018487581063709936, "grad_norm": 1.2109375, "learning_rate": 0.00019956837518886827, "loss": 1.5042, "step": 720 }, { "epoch": 0.018513258259631758, "grad_norm": 1.078125, "learning_rate": 0.00019956796076300287, "loss": 1.708, "step": 721 }, { "epoch": 0.018538935455553577, "grad_norm": 1.0546875, "learning_rate": 0.00019956754613870752, "loss": 1.5954, "step": 722 }, { "epoch": 0.018564612651475396, "grad_norm": 1.0625, "learning_rate": 0.00019956713131598304, "loss": 1.5881, "step": 723 }, { "epoch": 0.018590289847397214, "grad_norm": 1.03125, "learning_rate": 0.0001995667162948303, "loss": 1.6546, "step": 724 }, { "epoch": 0.018615967043319033, "grad_norm": 1.078125, "learning_rate": 0.00019956630107525002, "loss": 1.7816, "step": 725 }, { "epoch": 0.018641644239240855, "grad_norm": 1.0390625, "learning_rate": 0.00019956588565724315, "loss": 1.5616, "step": 726 }, { "epoch": 0.018667321435162674, "grad_norm": 1.015625, "learning_rate": 0.00019956547004081046, "loss": 1.5658, "step": 727 }, { "epoch": 0.018692998631084493, "grad_norm": 1.0546875, "learning_rate": 0.00019956505422595274, "loss": 1.5498, "step": 728 }, { "epoch": 0.01871867582700631, "grad_norm": 1.109375, "learning_rate": 0.00019956463821267092, "loss": 1.4889, "step": 729 }, { "epoch": 0.01874435302292813, "grad_norm": 1.046875, "learning_rate": 0.0001995642220009657, "loss": 1.4059, "step": 730 }, { "epoch": 0.018770030218849952, "grad_norm": 1.1171875, "learning_rate": 0.00019956380559083803, "loss": 1.665, "step": 731 }, { "epoch": 0.01879570741477177, "grad_norm": 1.078125, "learning_rate": 0.00019956338898228867, "loss": 1.6749, "step": 732 }, { "epoch": 0.01882138461069359, "grad_norm": 1.046875, "learning_rate": 0.0001995629721753185, "loss": 1.4719, "step": 733 }, { "epoch": 0.018847061806615408, "grad_norm": 1.03125, "learning_rate": 0.00019956255516992828, "loss": 1.7207, "step": 734 }, { "epoch": 0.018872739002537227, "grad_norm": 1.1171875, "learning_rate": 0.00019956213796611891, "loss": 1.5775, "step": 735 }, { "epoch": 0.018898416198459046, "grad_norm": 1.125, "learning_rate": 0.00019956172056389117, "loss": 1.6392, "step": 736 }, { "epoch": 0.018924093394380868, "grad_norm": 1.0078125, "learning_rate": 0.00019956130296324594, "loss": 1.6005, "step": 737 }, { "epoch": 0.018949770590302686, "grad_norm": 1.1015625, "learning_rate": 0.000199560885164184, "loss": 1.5195, "step": 738 }, { "epoch": 0.018975447786224505, "grad_norm": 1.1015625, "learning_rate": 0.00019956046716670622, "loss": 1.4799, "step": 739 }, { "epoch": 0.019001124982146324, "grad_norm": 1.09375, "learning_rate": 0.00019956004897081343, "loss": 1.6152, "step": 740 }, { "epoch": 0.019026802178068142, "grad_norm": 1.015625, "learning_rate": 0.00019955963057650643, "loss": 1.4489, "step": 741 }, { "epoch": 0.019052479373989965, "grad_norm": 1.0625, "learning_rate": 0.0001995592119837861, "loss": 1.7323, "step": 742 }, { "epoch": 0.019078156569911783, "grad_norm": 1.078125, "learning_rate": 0.00019955879319265326, "loss": 1.5552, "step": 743 }, { "epoch": 0.019103833765833602, "grad_norm": 1.09375, "learning_rate": 0.00019955837420310873, "loss": 1.5793, "step": 744 }, { "epoch": 0.01912951096175542, "grad_norm": 1.0703125, "learning_rate": 0.00019955795501515336, "loss": 1.5655, "step": 745 }, { "epoch": 0.01915518815767724, "grad_norm": 1.046875, "learning_rate": 0.00019955753562878796, "loss": 1.6547, "step": 746 }, { "epoch": 0.01918086535359906, "grad_norm": 1.0078125, "learning_rate": 0.0001995571160440134, "loss": 1.4486, "step": 747 }, { "epoch": 0.01920654254952088, "grad_norm": 1.03125, "learning_rate": 0.0001995566962608305, "loss": 1.764, "step": 748 }, { "epoch": 0.0192322197454427, "grad_norm": 1.0546875, "learning_rate": 0.00019955627627924008, "loss": 1.6718, "step": 749 }, { "epoch": 0.019257896941364518, "grad_norm": 0.9609375, "learning_rate": 0.000199555856099243, "loss": 1.4823, "step": 750 }, { "epoch": 0.019283574137286336, "grad_norm": 0.9609375, "learning_rate": 0.0001995554357208401, "loss": 1.6049, "step": 751 }, { "epoch": 0.01930925133320816, "grad_norm": 1.0546875, "learning_rate": 0.00019955501514403224, "loss": 1.6845, "step": 752 }, { "epoch": 0.019334928529129977, "grad_norm": 1.109375, "learning_rate": 0.00019955459436882017, "loss": 1.7104, "step": 753 }, { "epoch": 0.019360605725051796, "grad_norm": 1.0234375, "learning_rate": 0.00019955417339520483, "loss": 1.5796, "step": 754 }, { "epoch": 0.019386282920973615, "grad_norm": 1.0234375, "learning_rate": 0.000199553752223187, "loss": 1.6223, "step": 755 }, { "epoch": 0.019411960116895433, "grad_norm": 1.015625, "learning_rate": 0.0001995533308527675, "loss": 1.6281, "step": 756 }, { "epoch": 0.019437637312817255, "grad_norm": 1.0703125, "learning_rate": 0.00019955290928394727, "loss": 1.5164, "step": 757 }, { "epoch": 0.019463314508739074, "grad_norm": 1.0703125, "learning_rate": 0.00019955248751672705, "loss": 1.4484, "step": 758 }, { "epoch": 0.019488991704660893, "grad_norm": 1.09375, "learning_rate": 0.0001995520655511077, "loss": 1.7047, "step": 759 }, { "epoch": 0.01951466890058271, "grad_norm": 1.078125, "learning_rate": 0.0001995516433870901, "loss": 1.6287, "step": 760 }, { "epoch": 0.01954034609650453, "grad_norm": 0.9296875, "learning_rate": 0.00019955122102467506, "loss": 1.5167, "step": 761 }, { "epoch": 0.01956602329242635, "grad_norm": 1.078125, "learning_rate": 0.0001995507984638634, "loss": 1.6206, "step": 762 }, { "epoch": 0.01959170048834817, "grad_norm": 1.03125, "learning_rate": 0.00019955037570465605, "loss": 1.5878, "step": 763 }, { "epoch": 0.01961737768426999, "grad_norm": 1.1328125, "learning_rate": 0.00019954995274705376, "loss": 1.5512, "step": 764 }, { "epoch": 0.01964305488019181, "grad_norm": 1.0625, "learning_rate": 0.0001995495295910574, "loss": 1.5201, "step": 765 }, { "epoch": 0.019668732076113627, "grad_norm": 1.0625, "learning_rate": 0.00019954910623666783, "loss": 1.6177, "step": 766 }, { "epoch": 0.019694409272035446, "grad_norm": 1.2265625, "learning_rate": 0.0001995486826838859, "loss": 1.6516, "step": 767 }, { "epoch": 0.019720086467957268, "grad_norm": 1.03125, "learning_rate": 0.0001995482589327124, "loss": 1.4382, "step": 768 }, { "epoch": 0.019745763663879087, "grad_norm": 1.125, "learning_rate": 0.00019954783498314825, "loss": 1.5313, "step": 769 }, { "epoch": 0.019771440859800905, "grad_norm": 1.0234375, "learning_rate": 0.00019954741083519424, "loss": 1.5431, "step": 770 }, { "epoch": 0.019797118055722724, "grad_norm": 1.0703125, "learning_rate": 0.0001995469864888512, "loss": 1.5872, "step": 771 }, { "epoch": 0.019822795251644543, "grad_norm": 1.09375, "learning_rate": 0.00019954656194412005, "loss": 1.582, "step": 772 }, { "epoch": 0.019848472447566365, "grad_norm": 1.0390625, "learning_rate": 0.00019954613720100158, "loss": 1.5987, "step": 773 }, { "epoch": 0.019874149643488184, "grad_norm": 1.109375, "learning_rate": 0.00019954571225949666, "loss": 1.3921, "step": 774 }, { "epoch": 0.019899826839410002, "grad_norm": 0.9140625, "learning_rate": 0.00019954528711960611, "loss": 1.4783, "step": 775 }, { "epoch": 0.01992550403533182, "grad_norm": 1.046875, "learning_rate": 0.00019954486178133083, "loss": 1.4535, "step": 776 }, { "epoch": 0.01995118123125364, "grad_norm": 1.03125, "learning_rate": 0.00019954443624467158, "loss": 1.5987, "step": 777 }, { "epoch": 0.019976858427175462, "grad_norm": 1.078125, "learning_rate": 0.00019954401050962929, "loss": 1.6735, "step": 778 }, { "epoch": 0.02000253562309728, "grad_norm": 1.171875, "learning_rate": 0.0001995435845762048, "loss": 1.3754, "step": 779 }, { "epoch": 0.0200282128190191, "grad_norm": 0.98828125, "learning_rate": 0.0001995431584443989, "loss": 1.5412, "step": 780 }, { "epoch": 0.020053890014940918, "grad_norm": 1.0625, "learning_rate": 0.00019954273211421247, "loss": 1.4165, "step": 781 }, { "epoch": 0.020079567210862737, "grad_norm": 0.90234375, "learning_rate": 0.00019954230558564642, "loss": 1.4773, "step": 782 }, { "epoch": 0.02010524440678456, "grad_norm": 0.9765625, "learning_rate": 0.0001995418788587015, "loss": 1.5705, "step": 783 }, { "epoch": 0.020130921602706377, "grad_norm": 1.0546875, "learning_rate": 0.00019954145193337862, "loss": 1.6192, "step": 784 }, { "epoch": 0.020156598798628196, "grad_norm": 1.0625, "learning_rate": 0.0001995410248096786, "loss": 1.393, "step": 785 }, { "epoch": 0.020182275994550015, "grad_norm": 0.94921875, "learning_rate": 0.00019954059748760234, "loss": 1.3434, "step": 786 }, { "epoch": 0.020207953190471833, "grad_norm": 1.1015625, "learning_rate": 0.00019954016996715068, "loss": 1.4503, "step": 787 }, { "epoch": 0.020233630386393652, "grad_norm": 1.046875, "learning_rate": 0.00019953974224832442, "loss": 1.6668, "step": 788 }, { "epoch": 0.020259307582315474, "grad_norm": 1.0703125, "learning_rate": 0.00019953931433112443, "loss": 1.6038, "step": 789 }, { "epoch": 0.020284984778237293, "grad_norm": 1.015625, "learning_rate": 0.0001995388862155516, "loss": 1.4964, "step": 790 }, { "epoch": 0.02031066197415911, "grad_norm": 1.109375, "learning_rate": 0.00019953845790160677, "loss": 1.6327, "step": 791 }, { "epoch": 0.02033633917008093, "grad_norm": 1.015625, "learning_rate": 0.00019953802938929075, "loss": 1.6404, "step": 792 }, { "epoch": 0.02036201636600275, "grad_norm": 1.0078125, "learning_rate": 0.00019953760067860444, "loss": 1.5059, "step": 793 }, { "epoch": 0.02038769356192457, "grad_norm": 0.96875, "learning_rate": 0.0001995371717695487, "loss": 1.5551, "step": 794 }, { "epoch": 0.02041337075784639, "grad_norm": 1.0234375, "learning_rate": 0.00019953674266212437, "loss": 1.5828, "step": 795 }, { "epoch": 0.02043904795376821, "grad_norm": 1.0703125, "learning_rate": 0.0001995363133563323, "loss": 1.6665, "step": 796 }, { "epoch": 0.020464725149690027, "grad_norm": 1.0625, "learning_rate": 0.00019953588385217334, "loss": 1.4506, "step": 797 }, { "epoch": 0.020490402345611846, "grad_norm": 1.0390625, "learning_rate": 0.00019953545414964837, "loss": 1.5594, "step": 798 }, { "epoch": 0.020516079541533668, "grad_norm": 1.0703125, "learning_rate": 0.00019953502424875817, "loss": 1.5167, "step": 799 }, { "epoch": 0.020541756737455487, "grad_norm": 0.99609375, "learning_rate": 0.00019953459414950373, "loss": 1.3748, "step": 800 }, { "epoch": 0.020567433933377306, "grad_norm": 1.09375, "learning_rate": 0.00019953416385188583, "loss": 1.4606, "step": 801 }, { "epoch": 0.020593111129299124, "grad_norm": 1.0546875, "learning_rate": 0.0001995337333559053, "loss": 1.5209, "step": 802 }, { "epoch": 0.020618788325220943, "grad_norm": 1.0703125, "learning_rate": 0.00019953330266156302, "loss": 1.6689, "step": 803 }, { "epoch": 0.020644465521142765, "grad_norm": 0.9765625, "learning_rate": 0.0001995328717688599, "loss": 1.4536, "step": 804 }, { "epoch": 0.020670142717064584, "grad_norm": 0.96484375, "learning_rate": 0.00019953244067779673, "loss": 1.4721, "step": 805 }, { "epoch": 0.020695819912986402, "grad_norm": 1.078125, "learning_rate": 0.0001995320093883744, "loss": 1.7268, "step": 806 }, { "epoch": 0.02072149710890822, "grad_norm": 1.0, "learning_rate": 0.00019953157790059378, "loss": 1.3641, "step": 807 }, { "epoch": 0.02074717430483004, "grad_norm": 1.0859375, "learning_rate": 0.0001995311462144557, "loss": 1.5842, "step": 808 }, { "epoch": 0.020772851500751862, "grad_norm": 0.98828125, "learning_rate": 0.00019953071432996105, "loss": 1.4441, "step": 809 }, { "epoch": 0.02079852869667368, "grad_norm": 0.99609375, "learning_rate": 0.00019953028224711065, "loss": 1.5264, "step": 810 }, { "epoch": 0.0208242058925955, "grad_norm": 1.109375, "learning_rate": 0.00019952984996590543, "loss": 1.3372, "step": 811 }, { "epoch": 0.020849883088517318, "grad_norm": 1.0078125, "learning_rate": 0.0001995294174863462, "loss": 1.6151, "step": 812 }, { "epoch": 0.020875560284439137, "grad_norm": 1.0625, "learning_rate": 0.0001995289848084338, "loss": 1.4193, "step": 813 }, { "epoch": 0.020901237480360955, "grad_norm": 1.046875, "learning_rate": 0.00019952855193216916, "loss": 1.4436, "step": 814 }, { "epoch": 0.020926914676282778, "grad_norm": 0.9921875, "learning_rate": 0.00019952811885755306, "loss": 1.6566, "step": 815 }, { "epoch": 0.020952591872204596, "grad_norm": 1.0703125, "learning_rate": 0.00019952768558458646, "loss": 1.5506, "step": 816 }, { "epoch": 0.020978269068126415, "grad_norm": 0.99609375, "learning_rate": 0.00019952725211327014, "loss": 1.5133, "step": 817 }, { "epoch": 0.021003946264048234, "grad_norm": 0.9921875, "learning_rate": 0.000199526818443605, "loss": 1.4206, "step": 818 }, { "epoch": 0.021029623459970052, "grad_norm": 1.015625, "learning_rate": 0.0001995263845755919, "loss": 1.5456, "step": 819 }, { "epoch": 0.021055300655891875, "grad_norm": 0.9765625, "learning_rate": 0.00019952595050923172, "loss": 1.4414, "step": 820 }, { "epoch": 0.021080977851813693, "grad_norm": 1.0234375, "learning_rate": 0.0001995255162445253, "loss": 1.4631, "step": 821 }, { "epoch": 0.021106655047735512, "grad_norm": 0.9453125, "learning_rate": 0.00019952508178147353, "loss": 1.5066, "step": 822 }, { "epoch": 0.02113233224365733, "grad_norm": 1.0390625, "learning_rate": 0.00019952464712007725, "loss": 1.5715, "step": 823 }, { "epoch": 0.02115800943957915, "grad_norm": 1.0234375, "learning_rate": 0.00019952421226033734, "loss": 1.3491, "step": 824 }, { "epoch": 0.02118368663550097, "grad_norm": 0.9453125, "learning_rate": 0.00019952377720225468, "loss": 1.4982, "step": 825 }, { "epoch": 0.02120936383142279, "grad_norm": 1.109375, "learning_rate": 0.0001995233419458301, "loss": 1.6414, "step": 826 }, { "epoch": 0.02123504102734461, "grad_norm": 1.0, "learning_rate": 0.00019952290649106452, "loss": 1.5785, "step": 827 }, { "epoch": 0.021260718223266428, "grad_norm": 1.0859375, "learning_rate": 0.00019952247083795873, "loss": 1.4548, "step": 828 }, { "epoch": 0.021286395419188246, "grad_norm": 1.0390625, "learning_rate": 0.00019952203498651368, "loss": 1.4499, "step": 829 }, { "epoch": 0.02131207261511007, "grad_norm": 1.0546875, "learning_rate": 0.00019952159893673018, "loss": 1.4731, "step": 830 }, { "epoch": 0.021337749811031887, "grad_norm": 1.0078125, "learning_rate": 0.00019952116268860914, "loss": 1.377, "step": 831 }, { "epoch": 0.021363427006953706, "grad_norm": 0.95703125, "learning_rate": 0.00019952072624215142, "loss": 1.4716, "step": 832 }, { "epoch": 0.021389104202875524, "grad_norm": 0.94921875, "learning_rate": 0.00019952028959735788, "loss": 1.4359, "step": 833 }, { "epoch": 0.021414781398797343, "grad_norm": 1.0625, "learning_rate": 0.00019951985275422937, "loss": 1.6344, "step": 834 }, { "epoch": 0.021440458594719165, "grad_norm": 1.0703125, "learning_rate": 0.0001995194157127668, "loss": 1.5806, "step": 835 }, { "epoch": 0.021466135790640984, "grad_norm": 1.0625, "learning_rate": 0.00019951897847297103, "loss": 1.6625, "step": 836 }, { "epoch": 0.021491812986562803, "grad_norm": 1.0625, "learning_rate": 0.00019951854103484294, "loss": 1.3793, "step": 837 }, { "epoch": 0.02151749018248462, "grad_norm": 1.015625, "learning_rate": 0.00019951810339838334, "loss": 1.4103, "step": 838 }, { "epoch": 0.02154316737840644, "grad_norm": 1.078125, "learning_rate": 0.00019951766556359316, "loss": 1.5541, "step": 839 }, { "epoch": 0.02156884457432826, "grad_norm": 0.9765625, "learning_rate": 0.0001995172275304733, "loss": 1.5021, "step": 840 }, { "epoch": 0.02159452177025008, "grad_norm": 0.94140625, "learning_rate": 0.00019951678929902458, "loss": 1.6555, "step": 841 }, { "epoch": 0.0216201989661719, "grad_norm": 1.0546875, "learning_rate": 0.00019951635086924786, "loss": 1.5905, "step": 842 }, { "epoch": 0.02164587616209372, "grad_norm": 1.140625, "learning_rate": 0.00019951591224114408, "loss": 1.6506, "step": 843 }, { "epoch": 0.021671553358015537, "grad_norm": 1.0390625, "learning_rate": 0.00019951547341471405, "loss": 1.699, "step": 844 }, { "epoch": 0.021697230553937356, "grad_norm": 0.97265625, "learning_rate": 0.0001995150343899587, "loss": 1.5775, "step": 845 }, { "epoch": 0.021722907749859178, "grad_norm": 1.0, "learning_rate": 0.00019951459516687884, "loss": 1.4354, "step": 846 }, { "epoch": 0.021748584945780997, "grad_norm": 0.953125, "learning_rate": 0.00019951415574547538, "loss": 1.5518, "step": 847 }, { "epoch": 0.021774262141702815, "grad_norm": 1.5234375, "learning_rate": 0.0001995137161257492, "loss": 1.6481, "step": 848 }, { "epoch": 0.021799939337624634, "grad_norm": 1.0859375, "learning_rate": 0.0001995132763077012, "loss": 1.5445, "step": 849 }, { "epoch": 0.021825616533546453, "grad_norm": 1.0078125, "learning_rate": 0.00019951283629133222, "loss": 1.3666, "step": 850 }, { "epoch": 0.021851293729468275, "grad_norm": 1.6640625, "learning_rate": 0.00019951239607664313, "loss": 1.6768, "step": 851 }, { "epoch": 0.021876970925390093, "grad_norm": 0.96875, "learning_rate": 0.00019951195566363482, "loss": 1.4034, "step": 852 }, { "epoch": 0.021902648121311912, "grad_norm": 1.046875, "learning_rate": 0.00019951151505230818, "loss": 1.4673, "step": 853 }, { "epoch": 0.02192832531723373, "grad_norm": 1.046875, "learning_rate": 0.0001995110742426641, "loss": 1.566, "step": 854 }, { "epoch": 0.02195400251315555, "grad_norm": 0.96875, "learning_rate": 0.00019951063323470344, "loss": 1.3887, "step": 855 }, { "epoch": 0.02197967970907737, "grad_norm": 1.046875, "learning_rate": 0.00019951019202842703, "loss": 1.424, "step": 856 }, { "epoch": 0.02200535690499919, "grad_norm": 1.0234375, "learning_rate": 0.00019950975062383582, "loss": 1.533, "step": 857 }, { "epoch": 0.02203103410092101, "grad_norm": 0.9765625, "learning_rate": 0.0001995093090209307, "loss": 1.4614, "step": 858 }, { "epoch": 0.022056711296842828, "grad_norm": 1.0078125, "learning_rate": 0.00019950886721971248, "loss": 1.6161, "step": 859 }, { "epoch": 0.022082388492764646, "grad_norm": 1.0234375, "learning_rate": 0.00019950842522018208, "loss": 1.4764, "step": 860 }, { "epoch": 0.02210806568868647, "grad_norm": 0.95703125, "learning_rate": 0.00019950798302234037, "loss": 1.4672, "step": 861 }, { "epoch": 0.022133742884608287, "grad_norm": 1.0859375, "learning_rate": 0.00019950754062618824, "loss": 1.8103, "step": 862 }, { "epoch": 0.022159420080530106, "grad_norm": 1.046875, "learning_rate": 0.0001995070980317266, "loss": 1.5803, "step": 863 }, { "epoch": 0.022185097276451925, "grad_norm": 0.9375, "learning_rate": 0.00019950665523895626, "loss": 1.4217, "step": 864 }, { "epoch": 0.022210774472373743, "grad_norm": 1.078125, "learning_rate": 0.00019950621224787816, "loss": 1.5597, "step": 865 }, { "epoch": 0.022236451668295562, "grad_norm": 0.98046875, "learning_rate": 0.00019950576905849318, "loss": 1.4953, "step": 866 }, { "epoch": 0.022262128864217384, "grad_norm": 1.046875, "learning_rate": 0.0001995053256708022, "loss": 1.4599, "step": 867 }, { "epoch": 0.022287806060139203, "grad_norm": 1.0390625, "learning_rate": 0.00019950488208480606, "loss": 1.3956, "step": 868 }, { "epoch": 0.02231348325606102, "grad_norm": 0.99609375, "learning_rate": 0.0001995044383005057, "loss": 1.6099, "step": 869 }, { "epoch": 0.02233916045198284, "grad_norm": 1.046875, "learning_rate": 0.00019950399431790196, "loss": 1.6576, "step": 870 }, { "epoch": 0.02236483764790466, "grad_norm": 0.94921875, "learning_rate": 0.00019950355013699576, "loss": 1.2904, "step": 871 }, { "epoch": 0.02239051484382648, "grad_norm": 0.93359375, "learning_rate": 0.000199503105757788, "loss": 1.5904, "step": 872 }, { "epoch": 0.0224161920397483, "grad_norm": 1.0078125, "learning_rate": 0.00019950266118027953, "loss": 1.4324, "step": 873 }, { "epoch": 0.02244186923567012, "grad_norm": 1.03125, "learning_rate": 0.00019950221640447122, "loss": 1.458, "step": 874 }, { "epoch": 0.022467546431591937, "grad_norm": 1.046875, "learning_rate": 0.00019950177143036396, "loss": 1.6217, "step": 875 }, { "epoch": 0.022493223627513756, "grad_norm": 1.265625, "learning_rate": 0.00019950132625795873, "loss": 1.4882, "step": 876 }, { "epoch": 0.022518900823435578, "grad_norm": 1.03125, "learning_rate": 0.00019950088088725626, "loss": 1.6471, "step": 877 }, { "epoch": 0.022544578019357397, "grad_norm": 1.078125, "learning_rate": 0.0001995004353182576, "loss": 1.484, "step": 878 }, { "epoch": 0.022570255215279215, "grad_norm": 1.09375, "learning_rate": 0.0001994999895509635, "loss": 1.4079, "step": 879 }, { "epoch": 0.022595932411201034, "grad_norm": 1.0234375, "learning_rate": 0.00019949954358537494, "loss": 1.4707, "step": 880 }, { "epoch": 0.022621609607122853, "grad_norm": 1.015625, "learning_rate": 0.00019949909742149278, "loss": 1.6398, "step": 881 }, { "epoch": 0.022647286803044675, "grad_norm": 1.015625, "learning_rate": 0.0001994986510593179, "loss": 1.4026, "step": 882 }, { "epoch": 0.022672963998966494, "grad_norm": 0.9375, "learning_rate": 0.0001994982044988512, "loss": 1.4445, "step": 883 }, { "epoch": 0.022698641194888312, "grad_norm": 0.9296875, "learning_rate": 0.00019949775774009355, "loss": 1.3877, "step": 884 }, { "epoch": 0.02272431839081013, "grad_norm": 0.984375, "learning_rate": 0.00019949731078304587, "loss": 1.58, "step": 885 }, { "epoch": 0.02274999558673195, "grad_norm": 0.9921875, "learning_rate": 0.00019949686362770902, "loss": 1.3893, "step": 886 }, { "epoch": 0.022775672782653772, "grad_norm": 1.125, "learning_rate": 0.00019949641627408393, "loss": 1.5333, "step": 887 }, { "epoch": 0.02280134997857559, "grad_norm": 0.9453125, "learning_rate": 0.00019949596872217146, "loss": 1.3745, "step": 888 }, { "epoch": 0.02282702717449741, "grad_norm": 1.015625, "learning_rate": 0.00019949552097197254, "loss": 1.4622, "step": 889 }, { "epoch": 0.022852704370419228, "grad_norm": 1.1328125, "learning_rate": 0.00019949507302348797, "loss": 1.4962, "step": 890 }, { "epoch": 0.022878381566341047, "grad_norm": 1.0234375, "learning_rate": 0.00019949462487671874, "loss": 1.4499, "step": 891 }, { "epoch": 0.022904058762262865, "grad_norm": 1.03125, "learning_rate": 0.00019949417653166573, "loss": 1.5165, "step": 892 }, { "epoch": 0.022929735958184688, "grad_norm": 0.98046875, "learning_rate": 0.00019949372798832982, "loss": 1.4468, "step": 893 }, { "epoch": 0.022955413154106506, "grad_norm": 1.078125, "learning_rate": 0.00019949327924671185, "loss": 1.5104, "step": 894 }, { "epoch": 0.022981090350028325, "grad_norm": 1.0078125, "learning_rate": 0.00019949283030681279, "loss": 1.3732, "step": 895 }, { "epoch": 0.023006767545950144, "grad_norm": 1.0859375, "learning_rate": 0.0001994923811686335, "loss": 1.4353, "step": 896 }, { "epoch": 0.023032444741871962, "grad_norm": 1.0078125, "learning_rate": 0.0001994919318321749, "loss": 1.4338, "step": 897 }, { "epoch": 0.023058121937793784, "grad_norm": 1.015625, "learning_rate": 0.00019949148229743785, "loss": 1.4381, "step": 898 }, { "epoch": 0.023083799133715603, "grad_norm": 1.0859375, "learning_rate": 0.00019949103256442326, "loss": 1.6771, "step": 899 }, { "epoch": 0.023109476329637422, "grad_norm": 1.0703125, "learning_rate": 0.00019949058263313204, "loss": 1.8138, "step": 900 }, { "epoch": 0.02313515352555924, "grad_norm": 1.0390625, "learning_rate": 0.00019949013250356506, "loss": 1.5203, "step": 901 }, { "epoch": 0.02316083072148106, "grad_norm": 1.0625, "learning_rate": 0.00019948968217572323, "loss": 1.6572, "step": 902 }, { "epoch": 0.02318650791740288, "grad_norm": 0.87890625, "learning_rate": 0.00019948923164960747, "loss": 1.5407, "step": 903 }, { "epoch": 0.0232121851133247, "grad_norm": 0.921875, "learning_rate": 0.00019948878092521867, "loss": 1.5301, "step": 904 }, { "epoch": 0.02323786230924652, "grad_norm": 0.96875, "learning_rate": 0.00019948833000255767, "loss": 1.4248, "step": 905 }, { "epoch": 0.023263539505168337, "grad_norm": 0.93359375, "learning_rate": 0.00019948787888162545, "loss": 1.4084, "step": 906 }, { "epoch": 0.023289216701090156, "grad_norm": 0.98046875, "learning_rate": 0.00019948742756242287, "loss": 1.3673, "step": 907 }, { "epoch": 0.02331489389701198, "grad_norm": 1.0703125, "learning_rate": 0.00019948697604495083, "loss": 1.5057, "step": 908 }, { "epoch": 0.023340571092933797, "grad_norm": 0.9921875, "learning_rate": 0.00019948652432921022, "loss": 1.2903, "step": 909 }, { "epoch": 0.023366248288855616, "grad_norm": 1.09375, "learning_rate": 0.00019948607241520198, "loss": 1.5024, "step": 910 }, { "epoch": 0.023391925484777434, "grad_norm": 0.97265625, "learning_rate": 0.00019948562030292698, "loss": 1.6616, "step": 911 }, { "epoch": 0.023417602680699253, "grad_norm": 1.0078125, "learning_rate": 0.0001994851679923861, "loss": 1.4012, "step": 912 }, { "epoch": 0.023443279876621075, "grad_norm": 1.0234375, "learning_rate": 0.0001994847154835803, "loss": 1.5844, "step": 913 }, { "epoch": 0.023468957072542894, "grad_norm": 1.0, "learning_rate": 0.00019948426277651042, "loss": 1.3435, "step": 914 }, { "epoch": 0.023494634268464713, "grad_norm": 1.0546875, "learning_rate": 0.00019948380987117742, "loss": 1.5494, "step": 915 }, { "epoch": 0.02352031146438653, "grad_norm": 0.9765625, "learning_rate": 0.00019948335676758214, "loss": 1.5103, "step": 916 }, { "epoch": 0.02354598866030835, "grad_norm": 1.015625, "learning_rate": 0.0001994829034657255, "loss": 1.4475, "step": 917 }, { "epoch": 0.02357166585623017, "grad_norm": 1.046875, "learning_rate": 0.0001994824499656085, "loss": 1.6603, "step": 918 }, { "epoch": 0.02359734305215199, "grad_norm": 0.984375, "learning_rate": 0.0001994819962672319, "loss": 1.4902, "step": 919 }, { "epoch": 0.02362302024807381, "grad_norm": 0.96484375, "learning_rate": 0.00019948154237059667, "loss": 1.7293, "step": 920 }, { "epoch": 0.023648697443995628, "grad_norm": 0.98828125, "learning_rate": 0.00019948108827570372, "loss": 1.6226, "step": 921 }, { "epoch": 0.023674374639917447, "grad_norm": 1.0546875, "learning_rate": 0.00019948063398255394, "loss": 1.4017, "step": 922 }, { "epoch": 0.023700051835839266, "grad_norm": 0.96484375, "learning_rate": 0.00019948017949114827, "loss": 1.2714, "step": 923 }, { "epoch": 0.023725729031761088, "grad_norm": 1.0078125, "learning_rate": 0.00019947972480148756, "loss": 1.6549, "step": 924 }, { "epoch": 0.023751406227682906, "grad_norm": 0.9765625, "learning_rate": 0.00019947926991357274, "loss": 1.667, "step": 925 }, { "epoch": 0.023777083423604725, "grad_norm": 1.03125, "learning_rate": 0.00019947881482740477, "loss": 1.4544, "step": 926 }, { "epoch": 0.023802760619526544, "grad_norm": 0.94921875, "learning_rate": 0.00019947835954298447, "loss": 1.4719, "step": 927 }, { "epoch": 0.023828437815448363, "grad_norm": 1.0390625, "learning_rate": 0.0001994779040603128, "loss": 1.5881, "step": 928 }, { "epoch": 0.023854115011370185, "grad_norm": 0.9375, "learning_rate": 0.00019947744837939065, "loss": 1.4513, "step": 929 }, { "epoch": 0.023879792207292003, "grad_norm": 1.09375, "learning_rate": 0.0001994769925002189, "loss": 1.5569, "step": 930 }, { "epoch": 0.023905469403213822, "grad_norm": 0.97265625, "learning_rate": 0.00019947653642279854, "loss": 1.2876, "step": 931 }, { "epoch": 0.02393114659913564, "grad_norm": 1.0546875, "learning_rate": 0.0001994760801471304, "loss": 1.426, "step": 932 }, { "epoch": 0.02395682379505746, "grad_norm": 1.0703125, "learning_rate": 0.0001994756236732154, "loss": 1.4106, "step": 933 }, { "epoch": 0.02398250099097928, "grad_norm": 1.1015625, "learning_rate": 0.0001994751670010545, "loss": 1.6208, "step": 934 }, { "epoch": 0.0240081781869011, "grad_norm": 1.0625, "learning_rate": 0.00019947471013064856, "loss": 1.4117, "step": 935 }, { "epoch": 0.02403385538282292, "grad_norm": 1.0546875, "learning_rate": 0.00019947425306199852, "loss": 1.4862, "step": 936 }, { "epoch": 0.024059532578744738, "grad_norm": 0.90625, "learning_rate": 0.00019947379579510525, "loss": 1.3705, "step": 937 }, { "epoch": 0.024085209774666556, "grad_norm": 0.9765625, "learning_rate": 0.00019947333832996973, "loss": 1.4999, "step": 938 }, { "epoch": 0.02411088697058838, "grad_norm": 0.98046875, "learning_rate": 0.0001994728806665928, "loss": 1.4521, "step": 939 }, { "epoch": 0.024136564166510197, "grad_norm": 0.9453125, "learning_rate": 0.00019947242280497546, "loss": 1.4601, "step": 940 }, { "epoch": 0.024162241362432016, "grad_norm": 1.015625, "learning_rate": 0.0001994719647451185, "loss": 1.5144, "step": 941 }, { "epoch": 0.024187918558353835, "grad_norm": 1.0234375, "learning_rate": 0.00019947150648702292, "loss": 1.4534, "step": 942 }, { "epoch": 0.024213595754275653, "grad_norm": 1.015625, "learning_rate": 0.00019947104803068964, "loss": 1.4953, "step": 943 }, { "epoch": 0.024239272950197472, "grad_norm": 0.96484375, "learning_rate": 0.0001994705893761195, "loss": 1.5119, "step": 944 }, { "epoch": 0.024264950146119294, "grad_norm": 0.984375, "learning_rate": 0.0001994701305233135, "loss": 1.3314, "step": 945 }, { "epoch": 0.024290627342041113, "grad_norm": 1.0, "learning_rate": 0.0001994696714722725, "loss": 1.374, "step": 946 }, { "epoch": 0.02431630453796293, "grad_norm": 1.03125, "learning_rate": 0.00019946921222299745, "loss": 1.4049, "step": 947 }, { "epoch": 0.02434198173388475, "grad_norm": 0.96484375, "learning_rate": 0.00019946875277548922, "loss": 1.5508, "step": 948 }, { "epoch": 0.02436765892980657, "grad_norm": 1.046875, "learning_rate": 0.00019946829312974875, "loss": 1.531, "step": 949 }, { "epoch": 0.02439333612572839, "grad_norm": 0.984375, "learning_rate": 0.00019946783328577696, "loss": 1.592, "step": 950 }, { "epoch": 0.02441901332165021, "grad_norm": 1.046875, "learning_rate": 0.00019946737324357477, "loss": 1.2547, "step": 951 }, { "epoch": 0.02444469051757203, "grad_norm": 0.96484375, "learning_rate": 0.0001994669130031431, "loss": 1.5151, "step": 952 }, { "epoch": 0.024470367713493847, "grad_norm": 1.0703125, "learning_rate": 0.00019946645256448286, "loss": 1.53, "step": 953 }, { "epoch": 0.024496044909415666, "grad_norm": 1.0859375, "learning_rate": 0.00019946599192759493, "loss": 1.4622, "step": 954 }, { "epoch": 0.024521722105337488, "grad_norm": 0.9921875, "learning_rate": 0.00019946553109248028, "loss": 1.4093, "step": 955 }, { "epoch": 0.024547399301259307, "grad_norm": 0.96484375, "learning_rate": 0.0001994650700591398, "loss": 1.5277, "step": 956 }, { "epoch": 0.024573076497181125, "grad_norm": 0.9765625, "learning_rate": 0.00019946460882757443, "loss": 1.4143, "step": 957 }, { "epoch": 0.024598753693102944, "grad_norm": 0.953125, "learning_rate": 0.00019946414739778509, "loss": 1.5181, "step": 958 }, { "epoch": 0.024624430889024763, "grad_norm": 1.0078125, "learning_rate": 0.00019946368576977266, "loss": 1.6324, "step": 959 }, { "epoch": 0.024650108084946585, "grad_norm": 0.94921875, "learning_rate": 0.00019946322394353812, "loss": 1.4392, "step": 960 }, { "epoch": 0.024675785280868404, "grad_norm": 0.9765625, "learning_rate": 0.00019946276191908235, "loss": 1.5506, "step": 961 }, { "epoch": 0.024701462476790222, "grad_norm": 0.91015625, "learning_rate": 0.00019946229969640625, "loss": 1.5007, "step": 962 }, { "epoch": 0.02472713967271204, "grad_norm": 1.0546875, "learning_rate": 0.0001994618372755108, "loss": 1.3606, "step": 963 }, { "epoch": 0.02475281686863386, "grad_norm": 0.984375, "learning_rate": 0.00019946137465639692, "loss": 1.3803, "step": 964 }, { "epoch": 0.024778494064555682, "grad_norm": 0.99609375, "learning_rate": 0.00019946091183906548, "loss": 1.3568, "step": 965 }, { "epoch": 0.0248041712604775, "grad_norm": 1.0546875, "learning_rate": 0.00019946044882351743, "loss": 1.5297, "step": 966 }, { "epoch": 0.02482984845639932, "grad_norm": 0.9921875, "learning_rate": 0.00019945998560975367, "loss": 1.5713, "step": 967 }, { "epoch": 0.024855525652321138, "grad_norm": 1.0234375, "learning_rate": 0.00019945952219777518, "loss": 1.3684, "step": 968 }, { "epoch": 0.024881202848242957, "grad_norm": 1.015625, "learning_rate": 0.0001994590585875828, "loss": 1.5556, "step": 969 }, { "epoch": 0.024906880044164775, "grad_norm": 0.95703125, "learning_rate": 0.00019945859477917753, "loss": 1.5417, "step": 970 }, { "epoch": 0.024932557240086597, "grad_norm": 1.0234375, "learning_rate": 0.00019945813077256025, "loss": 1.6196, "step": 971 }, { "epoch": 0.024958234436008416, "grad_norm": 1.0390625, "learning_rate": 0.00019945766656773192, "loss": 1.608, "step": 972 }, { "epoch": 0.024983911631930235, "grad_norm": 1.0859375, "learning_rate": 0.00019945720216469343, "loss": 1.4669, "step": 973 }, { "epoch": 0.025009588827852054, "grad_norm": 1.0, "learning_rate": 0.0001994567375634457, "loss": 1.6096, "step": 974 }, { "epoch": 0.025035266023773872, "grad_norm": 1.0390625, "learning_rate": 0.0001994562727639897, "loss": 1.5775, "step": 975 }, { "epoch": 0.025060943219695694, "grad_norm": 0.9921875, "learning_rate": 0.00019945580776632634, "loss": 1.5278, "step": 976 }, { "epoch": 0.025086620415617513, "grad_norm": 1.03125, "learning_rate": 0.00019945534257045653, "loss": 1.4514, "step": 977 }, { "epoch": 0.025112297611539332, "grad_norm": 0.92578125, "learning_rate": 0.0001994548771763812, "loss": 1.4368, "step": 978 }, { "epoch": 0.02513797480746115, "grad_norm": 1.0, "learning_rate": 0.0001994544115841013, "loss": 1.6954, "step": 979 }, { "epoch": 0.02516365200338297, "grad_norm": 1.09375, "learning_rate": 0.0001994539457936177, "loss": 1.4419, "step": 980 }, { "epoch": 0.02518932919930479, "grad_norm": 0.9375, "learning_rate": 0.0001994534798049314, "loss": 1.4578, "step": 981 }, { "epoch": 0.02521500639522661, "grad_norm": 1.03125, "learning_rate": 0.00019945301361804331, "loss": 1.4524, "step": 982 }, { "epoch": 0.02524068359114843, "grad_norm": 0.99609375, "learning_rate": 0.00019945254723295435, "loss": 1.4384, "step": 983 }, { "epoch": 0.025266360787070247, "grad_norm": 0.9921875, "learning_rate": 0.00019945208064966544, "loss": 1.6247, "step": 984 }, { "epoch": 0.025292037982992066, "grad_norm": 0.90625, "learning_rate": 0.0001994516138681775, "loss": 1.391, "step": 985 }, { "epoch": 0.025317715178913888, "grad_norm": 1.0234375, "learning_rate": 0.00019945114688849148, "loss": 1.5169, "step": 986 }, { "epoch": 0.025343392374835707, "grad_norm": 0.953125, "learning_rate": 0.00019945067971060832, "loss": 1.2809, "step": 987 }, { "epoch": 0.025369069570757526, "grad_norm": 1.0859375, "learning_rate": 0.00019945021233452894, "loss": 1.7627, "step": 988 }, { "epoch": 0.025394746766679344, "grad_norm": 1.0703125, "learning_rate": 0.00019944974476025426, "loss": 1.6699, "step": 989 }, { "epoch": 0.025420423962601163, "grad_norm": 1.0234375, "learning_rate": 0.00019944927698778523, "loss": 1.4578, "step": 990 }, { "epoch": 0.025446101158522985, "grad_norm": 0.984375, "learning_rate": 0.00019944880901712276, "loss": 1.4202, "step": 991 }, { "epoch": 0.025471778354444804, "grad_norm": 1.03125, "learning_rate": 0.0001994483408482678, "loss": 1.5926, "step": 992 }, { "epoch": 0.025497455550366623, "grad_norm": 0.99609375, "learning_rate": 0.00019944787248122128, "loss": 1.4173, "step": 993 }, { "epoch": 0.02552313274628844, "grad_norm": 1.0234375, "learning_rate": 0.00019944740391598416, "loss": 1.5358, "step": 994 }, { "epoch": 0.02554880994221026, "grad_norm": 1.0703125, "learning_rate": 0.0001994469351525573, "loss": 1.4454, "step": 995 }, { "epoch": 0.02557448713813208, "grad_norm": 1.015625, "learning_rate": 0.00019944646619094169, "loss": 1.5302, "step": 996 }, { "epoch": 0.0256001643340539, "grad_norm": 1.1015625, "learning_rate": 0.00019944599703113829, "loss": 1.4566, "step": 997 }, { "epoch": 0.02562584152997572, "grad_norm": 1.015625, "learning_rate": 0.000199445527673148, "loss": 1.5232, "step": 998 }, { "epoch": 0.025651518725897538, "grad_norm": 1.0, "learning_rate": 0.00019944505811697174, "loss": 1.5513, "step": 999 }, { "epoch": 0.025677195921819357, "grad_norm": 1.078125, "learning_rate": 0.00019944458836261043, "loss": 1.5472, "step": 1000 }, { "epoch": 0.025677195921819357, "eval_loss": 1.4854185581207275, "eval_model_preparation_time": 0.0065, "eval_runtime": 406.1321, "eval_samples_per_second": 24.623, "eval_steps_per_second": 0.771, "step": 1000 }, { "epoch": 0.025702873117741176, "grad_norm": 0.98828125, "learning_rate": 0.0001994441184100651, "loss": 1.4708, "step": 1001 }, { "epoch": 0.025728550313662998, "grad_norm": 0.921875, "learning_rate": 0.00019944364825933656, "loss": 1.4844, "step": 1002 }, { "epoch": 0.025754227509584816, "grad_norm": 1.03125, "learning_rate": 0.00019944317791042586, "loss": 1.4611, "step": 1003 }, { "epoch": 0.025779904705506635, "grad_norm": 0.98046875, "learning_rate": 0.00019944270736333387, "loss": 1.5811, "step": 1004 }, { "epoch": 0.025805581901428454, "grad_norm": 0.9921875, "learning_rate": 0.00019944223661806155, "loss": 1.6801, "step": 1005 }, { "epoch": 0.025831259097350272, "grad_norm": 0.94921875, "learning_rate": 0.00019944176567460983, "loss": 1.5402, "step": 1006 }, { "epoch": 0.025856936293272095, "grad_norm": 1.0546875, "learning_rate": 0.00019944129453297967, "loss": 1.6332, "step": 1007 }, { "epoch": 0.025882613489193913, "grad_norm": 1.0078125, "learning_rate": 0.000199440823193172, "loss": 1.3894, "step": 1008 }, { "epoch": 0.025908290685115732, "grad_norm": 1.0625, "learning_rate": 0.00019944035165518772, "loss": 1.7039, "step": 1009 }, { "epoch": 0.02593396788103755, "grad_norm": 1.03125, "learning_rate": 0.00019943987991902784, "loss": 1.4799, "step": 1010 }, { "epoch": 0.02595964507695937, "grad_norm": 0.98046875, "learning_rate": 0.00019943940798469322, "loss": 1.5618, "step": 1011 }, { "epoch": 0.02598532227288119, "grad_norm": 0.9765625, "learning_rate": 0.00019943893585218486, "loss": 1.4429, "step": 1012 }, { "epoch": 0.02601099946880301, "grad_norm": 0.96484375, "learning_rate": 0.00019943846352150367, "loss": 1.5287, "step": 1013 }, { "epoch": 0.02603667666472483, "grad_norm": 1.0078125, "learning_rate": 0.00019943799099265063, "loss": 1.5508, "step": 1014 }, { "epoch": 0.026062353860646648, "grad_norm": 1.0234375, "learning_rate": 0.00019943751826562663, "loss": 1.572, "step": 1015 }, { "epoch": 0.026088031056568466, "grad_norm": 0.9765625, "learning_rate": 0.00019943704534043267, "loss": 1.4846, "step": 1016 }, { "epoch": 0.02611370825249029, "grad_norm": 1.0, "learning_rate": 0.00019943657221706965, "loss": 1.5034, "step": 1017 }, { "epoch": 0.026139385448412107, "grad_norm": 1.0234375, "learning_rate": 0.0001994360988955385, "loss": 1.4575, "step": 1018 }, { "epoch": 0.026165062644333926, "grad_norm": 0.97265625, "learning_rate": 0.00019943562537584023, "loss": 1.5424, "step": 1019 }, { "epoch": 0.026190739840255745, "grad_norm": 1.078125, "learning_rate": 0.00019943515165797568, "loss": 1.4437, "step": 1020 }, { "epoch": 0.026216417036177563, "grad_norm": 0.94921875, "learning_rate": 0.0001994346777419459, "loss": 1.5223, "step": 1021 }, { "epoch": 0.026242094232099382, "grad_norm": 1.0859375, "learning_rate": 0.00019943420362775177, "loss": 1.4361, "step": 1022 }, { "epoch": 0.026267771428021204, "grad_norm": 0.99609375, "learning_rate": 0.0001994337293153943, "loss": 1.3712, "step": 1023 }, { "epoch": 0.026293448623943023, "grad_norm": 0.953125, "learning_rate": 0.00019943325480487433, "loss": 1.3299, "step": 1024 }, { "epoch": 0.02631912581986484, "grad_norm": 1.0234375, "learning_rate": 0.0001994327800961929, "loss": 1.5511, "step": 1025 }, { "epoch": 0.02634480301578666, "grad_norm": 0.98046875, "learning_rate": 0.00019943230518935088, "loss": 1.412, "step": 1026 }, { "epoch": 0.02637048021170848, "grad_norm": 1.078125, "learning_rate": 0.0001994318300843493, "loss": 1.395, "step": 1027 }, { "epoch": 0.0263961574076303, "grad_norm": 0.984375, "learning_rate": 0.00019943135478118904, "loss": 1.4236, "step": 1028 }, { "epoch": 0.02642183460355212, "grad_norm": 0.94140625, "learning_rate": 0.0001994308792798711, "loss": 1.5184, "step": 1029 }, { "epoch": 0.02644751179947394, "grad_norm": 0.9453125, "learning_rate": 0.00019943040358039635, "loss": 1.3486, "step": 1030 }, { "epoch": 0.026473188995395757, "grad_norm": 1.0078125, "learning_rate": 0.00019942992768276583, "loss": 1.4837, "step": 1031 }, { "epoch": 0.026498866191317576, "grad_norm": 1.0234375, "learning_rate": 0.00019942945158698042, "loss": 1.6237, "step": 1032 }, { "epoch": 0.026524543387239398, "grad_norm": 1.1796875, "learning_rate": 0.00019942897529304113, "loss": 1.4504, "step": 1033 }, { "epoch": 0.026550220583161217, "grad_norm": 1.0625, "learning_rate": 0.00019942849880094884, "loss": 1.5517, "step": 1034 }, { "epoch": 0.026575897779083035, "grad_norm": 1.015625, "learning_rate": 0.00019942802211070455, "loss": 1.45, "step": 1035 }, { "epoch": 0.026601574975004854, "grad_norm": 1.0703125, "learning_rate": 0.00019942754522230918, "loss": 1.637, "step": 1036 }, { "epoch": 0.026627252170926673, "grad_norm": 1.125, "learning_rate": 0.00019942706813576367, "loss": 1.4628, "step": 1037 }, { "epoch": 0.026652929366848495, "grad_norm": 1.0390625, "learning_rate": 0.00019942659085106905, "loss": 1.3764, "step": 1038 }, { "epoch": 0.026678606562770314, "grad_norm": 0.98046875, "learning_rate": 0.0001994261133682262, "loss": 1.6435, "step": 1039 }, { "epoch": 0.026704283758692132, "grad_norm": 1.03125, "learning_rate": 0.00019942563568723607, "loss": 1.4869, "step": 1040 }, { "epoch": 0.02672996095461395, "grad_norm": 1.125, "learning_rate": 0.00019942515780809963, "loss": 1.4512, "step": 1041 }, { "epoch": 0.02675563815053577, "grad_norm": 1.046875, "learning_rate": 0.00019942467973081785, "loss": 1.4988, "step": 1042 }, { "epoch": 0.026781315346457592, "grad_norm": 0.9765625, "learning_rate": 0.00019942420145539164, "loss": 1.5283, "step": 1043 }, { "epoch": 0.02680699254237941, "grad_norm": 0.98828125, "learning_rate": 0.00019942372298182198, "loss": 1.4573, "step": 1044 }, { "epoch": 0.02683266973830123, "grad_norm": 1.0703125, "learning_rate": 0.00019942324431010983, "loss": 1.5492, "step": 1045 }, { "epoch": 0.026858346934223048, "grad_norm": 1.0625, "learning_rate": 0.00019942276544025614, "loss": 1.3878, "step": 1046 }, { "epoch": 0.026884024130144867, "grad_norm": 1.03125, "learning_rate": 0.00019942228637226186, "loss": 1.5424, "step": 1047 }, { "epoch": 0.026909701326066685, "grad_norm": 1.09375, "learning_rate": 0.00019942180710612794, "loss": 1.5653, "step": 1048 }, { "epoch": 0.026935378521988507, "grad_norm": 1.15625, "learning_rate": 0.00019942132764185535, "loss": 1.4343, "step": 1049 }, { "epoch": 0.026961055717910326, "grad_norm": 1.03125, "learning_rate": 0.00019942084797944503, "loss": 1.4759, "step": 1050 }, { "epoch": 0.026986732913832145, "grad_norm": 1.0703125, "learning_rate": 0.00019942036811889792, "loss": 1.5807, "step": 1051 }, { "epoch": 0.027012410109753963, "grad_norm": 0.9609375, "learning_rate": 0.00019941988806021502, "loss": 1.3789, "step": 1052 }, { "epoch": 0.027038087305675782, "grad_norm": 1.0, "learning_rate": 0.00019941940780339725, "loss": 1.3312, "step": 1053 }, { "epoch": 0.027063764501597604, "grad_norm": 1.0546875, "learning_rate": 0.00019941892734844557, "loss": 1.4798, "step": 1054 }, { "epoch": 0.027089441697519423, "grad_norm": 1.0625, "learning_rate": 0.000199418446695361, "loss": 1.5944, "step": 1055 }, { "epoch": 0.02711511889344124, "grad_norm": 0.9453125, "learning_rate": 0.00019941796584414437, "loss": 1.3328, "step": 1056 }, { "epoch": 0.02714079608936306, "grad_norm": 1.046875, "learning_rate": 0.00019941748479479677, "loss": 1.4425, "step": 1057 }, { "epoch": 0.02716647328528488, "grad_norm": 1.0546875, "learning_rate": 0.0001994170035473191, "loss": 1.533, "step": 1058 }, { "epoch": 0.0271921504812067, "grad_norm": 0.98828125, "learning_rate": 0.00019941652210171232, "loss": 1.404, "step": 1059 }, { "epoch": 0.02721782767712852, "grad_norm": 0.94921875, "learning_rate": 0.00019941604045797736, "loss": 1.4773, "step": 1060 }, { "epoch": 0.02724350487305034, "grad_norm": 1.046875, "learning_rate": 0.00019941555861611524, "loss": 1.667, "step": 1061 }, { "epoch": 0.027269182068972157, "grad_norm": 1.015625, "learning_rate": 0.00019941507657612688, "loss": 1.5115, "step": 1062 }, { "epoch": 0.027294859264893976, "grad_norm": 1.0, "learning_rate": 0.00019941459433801324, "loss": 1.6065, "step": 1063 }, { "epoch": 0.027320536460815798, "grad_norm": 1.0625, "learning_rate": 0.00019941411190177533, "loss": 1.4143, "step": 1064 }, { "epoch": 0.027346213656737617, "grad_norm": 1.0859375, "learning_rate": 0.00019941362926741404, "loss": 1.4215, "step": 1065 }, { "epoch": 0.027371890852659436, "grad_norm": 0.93359375, "learning_rate": 0.00019941314643493038, "loss": 1.3561, "step": 1066 }, { "epoch": 0.027397568048581254, "grad_norm": 1.1171875, "learning_rate": 0.0001994126634043253, "loss": 1.4143, "step": 1067 }, { "epoch": 0.027423245244503073, "grad_norm": 0.98046875, "learning_rate": 0.00019941218017559978, "loss": 1.4436, "step": 1068 }, { "epoch": 0.027448922440424895, "grad_norm": 1.0234375, "learning_rate": 0.00019941169674875475, "loss": 1.387, "step": 1069 }, { "epoch": 0.027474599636346714, "grad_norm": 0.984375, "learning_rate": 0.00019941121312379116, "loss": 1.5323, "step": 1070 }, { "epoch": 0.027500276832268532, "grad_norm": 0.96484375, "learning_rate": 0.00019941072930071006, "loss": 1.5316, "step": 1071 }, { "epoch": 0.02752595402819035, "grad_norm": 1.0, "learning_rate": 0.00019941024527951232, "loss": 1.4795, "step": 1072 }, { "epoch": 0.02755163122411217, "grad_norm": 1.078125, "learning_rate": 0.00019940976106019894, "loss": 1.5218, "step": 1073 }, { "epoch": 0.02757730842003399, "grad_norm": 1.734375, "learning_rate": 0.0001994092766427709, "loss": 1.279, "step": 1074 }, { "epoch": 0.02760298561595581, "grad_norm": 0.984375, "learning_rate": 0.00019940879202722913, "loss": 1.5853, "step": 1075 }, { "epoch": 0.02762866281187763, "grad_norm": 1.078125, "learning_rate": 0.0001994083072135746, "loss": 1.4069, "step": 1076 }, { "epoch": 0.027654340007799448, "grad_norm": 1.0625, "learning_rate": 0.00019940782220180837, "loss": 1.5102, "step": 1077 }, { "epoch": 0.027680017203721267, "grad_norm": 1.0, "learning_rate": 0.0001994073369919313, "loss": 1.4527, "step": 1078 }, { "epoch": 0.027705694399643085, "grad_norm": 1.015625, "learning_rate": 0.00019940685158394432, "loss": 1.5117, "step": 1079 }, { "epoch": 0.027731371595564908, "grad_norm": 1.03125, "learning_rate": 0.00019940636597784854, "loss": 1.396, "step": 1080 }, { "epoch": 0.027757048791486726, "grad_norm": 1.0546875, "learning_rate": 0.00019940588017364482, "loss": 1.5491, "step": 1081 }, { "epoch": 0.027782725987408545, "grad_norm": 1.0703125, "learning_rate": 0.00019940539417133418, "loss": 1.3757, "step": 1082 }, { "epoch": 0.027808403183330364, "grad_norm": 0.98046875, "learning_rate": 0.00019940490797091753, "loss": 1.3879, "step": 1083 }, { "epoch": 0.027834080379252182, "grad_norm": 0.99609375, "learning_rate": 0.0001994044215723959, "loss": 1.4296, "step": 1084 }, { "epoch": 0.027859757575174005, "grad_norm": 0.9453125, "learning_rate": 0.00019940393497577024, "loss": 1.352, "step": 1085 }, { "epoch": 0.027885434771095823, "grad_norm": 1.1015625, "learning_rate": 0.00019940344818104153, "loss": 1.391, "step": 1086 }, { "epoch": 0.027911111967017642, "grad_norm": 1.0546875, "learning_rate": 0.0001994029611882107, "loss": 1.4751, "step": 1087 }, { "epoch": 0.02793678916293946, "grad_norm": 0.953125, "learning_rate": 0.00019940247399727876, "loss": 1.465, "step": 1088 }, { "epoch": 0.02796246635886128, "grad_norm": 1.046875, "learning_rate": 0.00019940198660824666, "loss": 1.3967, "step": 1089 }, { "epoch": 0.0279881435547831, "grad_norm": 1.0546875, "learning_rate": 0.0001994014990211154, "loss": 1.4257, "step": 1090 }, { "epoch": 0.02801382075070492, "grad_norm": 0.96484375, "learning_rate": 0.0001994010112358859, "loss": 1.4247, "step": 1091 }, { "epoch": 0.02803949794662674, "grad_norm": 1.0078125, "learning_rate": 0.0001994005232525592, "loss": 1.5331, "step": 1092 }, { "epoch": 0.028065175142548558, "grad_norm": 0.9140625, "learning_rate": 0.0001994000350711362, "loss": 1.411, "step": 1093 }, { "epoch": 0.028090852338470376, "grad_norm": 1.0234375, "learning_rate": 0.00019939954669161795, "loss": 1.3371, "step": 1094 }, { "epoch": 0.0281165295343922, "grad_norm": 1.0625, "learning_rate": 0.00019939905811400537, "loss": 1.5818, "step": 1095 }, { "epoch": 0.028142206730314017, "grad_norm": 1.0390625, "learning_rate": 0.00019939856933829942, "loss": 1.3699, "step": 1096 }, { "epoch": 0.028167883926235836, "grad_norm": 1.0859375, "learning_rate": 0.00019939808036450112, "loss": 1.4681, "step": 1097 }, { "epoch": 0.028193561122157654, "grad_norm": 1.0234375, "learning_rate": 0.00019939759119261141, "loss": 1.4487, "step": 1098 }, { "epoch": 0.028219238318079473, "grad_norm": 1.1328125, "learning_rate": 0.0001993971018226313, "loss": 1.4987, "step": 1099 }, { "epoch": 0.028244915514001292, "grad_norm": 1.109375, "learning_rate": 0.00019939661225456173, "loss": 1.5479, "step": 1100 }, { "epoch": 0.028270592709923114, "grad_norm": 1.0390625, "learning_rate": 0.00019939612248840368, "loss": 1.3609, "step": 1101 }, { "epoch": 0.028296269905844933, "grad_norm": 1.046875, "learning_rate": 0.00019939563252415816, "loss": 1.4547, "step": 1102 }, { "epoch": 0.02832194710176675, "grad_norm": 0.94140625, "learning_rate": 0.0001993951423618261, "loss": 1.5048, "step": 1103 }, { "epoch": 0.02834762429768857, "grad_norm": 0.91015625, "learning_rate": 0.00019939465200140855, "loss": 1.4501, "step": 1104 }, { "epoch": 0.02837330149361039, "grad_norm": 1.0546875, "learning_rate": 0.00019939416144290636, "loss": 1.6549, "step": 1105 }, { "epoch": 0.02839897868953221, "grad_norm": 0.9921875, "learning_rate": 0.00019939367068632066, "loss": 1.3473, "step": 1106 }, { "epoch": 0.02842465588545403, "grad_norm": 1.0546875, "learning_rate": 0.0001993931797316523, "loss": 1.3497, "step": 1107 }, { "epoch": 0.02845033308137585, "grad_norm": 0.97265625, "learning_rate": 0.00019939268857890232, "loss": 1.5023, "step": 1108 }, { "epoch": 0.028476010277297667, "grad_norm": 0.9765625, "learning_rate": 0.0001993921972280717, "loss": 1.4863, "step": 1109 }, { "epoch": 0.028501687473219486, "grad_norm": 1.0234375, "learning_rate": 0.00019939170567916142, "loss": 1.4593, "step": 1110 }, { "epoch": 0.028527364669141308, "grad_norm": 1.0625, "learning_rate": 0.00019939121393217245, "loss": 1.5566, "step": 1111 }, { "epoch": 0.028553041865063127, "grad_norm": 1.28125, "learning_rate": 0.0001993907219871057, "loss": 1.3723, "step": 1112 }, { "epoch": 0.028578719060984945, "grad_norm": 1.0078125, "learning_rate": 0.0001993902298439623, "loss": 1.5394, "step": 1113 }, { "epoch": 0.028604396256906764, "grad_norm": 0.96484375, "learning_rate": 0.00019938973750274313, "loss": 1.3245, "step": 1114 }, { "epoch": 0.028630073452828583, "grad_norm": 0.9375, "learning_rate": 0.00019938924496344917, "loss": 1.5583, "step": 1115 }, { "epoch": 0.028655750648750405, "grad_norm": 1.0390625, "learning_rate": 0.00019938875222608144, "loss": 1.4821, "step": 1116 }, { "epoch": 0.028681427844672223, "grad_norm": 0.9921875, "learning_rate": 0.0001993882592906409, "loss": 1.406, "step": 1117 }, { "epoch": 0.028707105040594042, "grad_norm": 1.0078125, "learning_rate": 0.00019938776615712852, "loss": 1.4598, "step": 1118 }, { "epoch": 0.02873278223651586, "grad_norm": 0.9765625, "learning_rate": 0.00019938727282554534, "loss": 1.5509, "step": 1119 }, { "epoch": 0.02875845943243768, "grad_norm": 0.91015625, "learning_rate": 0.00019938677929589227, "loss": 1.4516, "step": 1120 }, { "epoch": 0.0287841366283595, "grad_norm": 0.98046875, "learning_rate": 0.00019938628556817034, "loss": 1.3546, "step": 1121 }, { "epoch": 0.02880981382428132, "grad_norm": 0.9296875, "learning_rate": 0.0001993857916423805, "loss": 1.3877, "step": 1122 }, { "epoch": 0.02883549102020314, "grad_norm": 0.98046875, "learning_rate": 0.0001993852975185238, "loss": 1.2831, "step": 1123 }, { "epoch": 0.028861168216124958, "grad_norm": 1.0546875, "learning_rate": 0.00019938480319660112, "loss": 1.5056, "step": 1124 }, { "epoch": 0.028886845412046776, "grad_norm": 0.99609375, "learning_rate": 0.00019938430867661355, "loss": 1.3605, "step": 1125 }, { "epoch": 0.028912522607968595, "grad_norm": 0.98828125, "learning_rate": 0.00019938381395856204, "loss": 1.4964, "step": 1126 }, { "epoch": 0.028938199803890417, "grad_norm": 0.9921875, "learning_rate": 0.00019938331904244754, "loss": 1.499, "step": 1127 }, { "epoch": 0.028963876999812236, "grad_norm": 0.99609375, "learning_rate": 0.00019938282392827102, "loss": 1.536, "step": 1128 }, { "epoch": 0.028989554195734055, "grad_norm": 0.91015625, "learning_rate": 0.0001993823286160336, "loss": 1.4503, "step": 1129 }, { "epoch": 0.029015231391655873, "grad_norm": 0.98046875, "learning_rate": 0.0001993818331057361, "loss": 1.5386, "step": 1130 }, { "epoch": 0.029040908587577692, "grad_norm": 1.0, "learning_rate": 0.00019938133739737964, "loss": 1.4607, "step": 1131 }, { "epoch": 0.029066585783499514, "grad_norm": 1.1171875, "learning_rate": 0.00019938084149096513, "loss": 1.2683, "step": 1132 }, { "epoch": 0.029092262979421333, "grad_norm": 0.92578125, "learning_rate": 0.00019938034538649355, "loss": 1.4044, "step": 1133 }, { "epoch": 0.02911794017534315, "grad_norm": 0.93359375, "learning_rate": 0.00019937984908396594, "loss": 1.5974, "step": 1134 }, { "epoch": 0.02914361737126497, "grad_norm": 0.97265625, "learning_rate": 0.00019937935258338327, "loss": 1.3665, "step": 1135 }, { "epoch": 0.02916929456718679, "grad_norm": 0.98828125, "learning_rate": 0.0001993788558847465, "loss": 1.4654, "step": 1136 }, { "epoch": 0.02919497176310861, "grad_norm": 1.0546875, "learning_rate": 0.0001993783589880567, "loss": 1.2685, "step": 1137 }, { "epoch": 0.02922064895903043, "grad_norm": 0.96484375, "learning_rate": 0.0001993778618933148, "loss": 1.401, "step": 1138 }, { "epoch": 0.02924632615495225, "grad_norm": 1.0234375, "learning_rate": 0.00019937736460052177, "loss": 1.3639, "step": 1139 }, { "epoch": 0.029272003350874067, "grad_norm": 1.046875, "learning_rate": 0.00019937686710967865, "loss": 1.498, "step": 1140 }, { "epoch": 0.029297680546795886, "grad_norm": 1.0625, "learning_rate": 0.00019937636942078638, "loss": 1.5416, "step": 1141 }, { "epoch": 0.029323357742717708, "grad_norm": 1.0625, "learning_rate": 0.00019937587153384602, "loss": 1.6077, "step": 1142 }, { "epoch": 0.029349034938639527, "grad_norm": 1.0390625, "learning_rate": 0.00019937537344885852, "loss": 1.4465, "step": 1143 }, { "epoch": 0.029374712134561345, "grad_norm": 1.1015625, "learning_rate": 0.00019937487516582484, "loss": 1.5702, "step": 1144 }, { "epoch": 0.029400389330483164, "grad_norm": 0.9765625, "learning_rate": 0.00019937437668474602, "loss": 1.2581, "step": 1145 }, { "epoch": 0.029426066526404983, "grad_norm": 1.0234375, "learning_rate": 0.00019937387800562307, "loss": 1.5457, "step": 1146 }, { "epoch": 0.029451743722326805, "grad_norm": 0.92578125, "learning_rate": 0.00019937337912845696, "loss": 1.5024, "step": 1147 }, { "epoch": 0.029477420918248624, "grad_norm": 0.9921875, "learning_rate": 0.00019937288005324866, "loss": 1.3323, "step": 1148 }, { "epoch": 0.029503098114170442, "grad_norm": 0.9765625, "learning_rate": 0.0001993723807799992, "loss": 1.5063, "step": 1149 }, { "epoch": 0.02952877531009226, "grad_norm": 0.93359375, "learning_rate": 0.00019937188130870955, "loss": 1.4807, "step": 1150 }, { "epoch": 0.02955445250601408, "grad_norm": 0.93359375, "learning_rate": 0.00019937138163938074, "loss": 1.4422, "step": 1151 }, { "epoch": 0.0295801297019359, "grad_norm": 0.98046875, "learning_rate": 0.0001993708817720137, "loss": 1.6101, "step": 1152 }, { "epoch": 0.02960580689785772, "grad_norm": 1.0, "learning_rate": 0.0001993703817066095, "loss": 1.2915, "step": 1153 }, { "epoch": 0.02963148409377954, "grad_norm": 0.9375, "learning_rate": 0.0001993698814431691, "loss": 1.4729, "step": 1154 }, { "epoch": 0.029657161289701358, "grad_norm": 1.1171875, "learning_rate": 0.00019936938098169348, "loss": 1.3154, "step": 1155 }, { "epoch": 0.029682838485623177, "grad_norm": 1.0078125, "learning_rate": 0.0001993688803221837, "loss": 1.1856, "step": 1156 }, { "epoch": 0.029708515681544995, "grad_norm": 1.0390625, "learning_rate": 0.00019936837946464068, "loss": 1.4448, "step": 1157 }, { "epoch": 0.029734192877466818, "grad_norm": 0.95703125, "learning_rate": 0.00019936787840906547, "loss": 1.5734, "step": 1158 }, { "epoch": 0.029759870073388636, "grad_norm": 1.5234375, "learning_rate": 0.0001993673771554591, "loss": 1.5457, "step": 1159 }, { "epoch": 0.029785547269310455, "grad_norm": 0.96484375, "learning_rate": 0.00019936687570382247, "loss": 1.5326, "step": 1160 }, { "epoch": 0.029811224465232274, "grad_norm": 0.96875, "learning_rate": 0.00019936637405415663, "loss": 1.4062, "step": 1161 }, { "epoch": 0.029836901661154092, "grad_norm": 1.015625, "learning_rate": 0.00019936587220646259, "loss": 1.3567, "step": 1162 }, { "epoch": 0.029862578857075914, "grad_norm": 0.9921875, "learning_rate": 0.00019936537016074137, "loss": 1.3598, "step": 1163 }, { "epoch": 0.029888256052997733, "grad_norm": 1.828125, "learning_rate": 0.00019936486791699391, "loss": 1.3307, "step": 1164 }, { "epoch": 0.029913933248919552, "grad_norm": 0.984375, "learning_rate": 0.00019936436547522126, "loss": 1.4853, "step": 1165 }, { "epoch": 0.02993961044484137, "grad_norm": 1.0, "learning_rate": 0.0001993638628354244, "loss": 1.3955, "step": 1166 }, { "epoch": 0.02996528764076319, "grad_norm": 0.91796875, "learning_rate": 0.00019936335999760433, "loss": 1.2615, "step": 1167 }, { "epoch": 0.02999096483668501, "grad_norm": 1.0390625, "learning_rate": 0.0001993628569617621, "loss": 1.4572, "step": 1168 }, { "epoch": 0.03001664203260683, "grad_norm": 0.9453125, "learning_rate": 0.0001993623537278986, "loss": 1.3467, "step": 1169 }, { "epoch": 0.03004231922852865, "grad_norm": 0.97265625, "learning_rate": 0.00019936185029601494, "loss": 1.551, "step": 1170 }, { "epoch": 0.030067996424450467, "grad_norm": 0.92578125, "learning_rate": 0.0001993613466661121, "loss": 1.5532, "step": 1171 }, { "epoch": 0.030093673620372286, "grad_norm": 0.92578125, "learning_rate": 0.00019936084283819105, "loss": 1.3497, "step": 1172 }, { "epoch": 0.03011935081629411, "grad_norm": 1.0390625, "learning_rate": 0.00019936033881225285, "loss": 1.5198, "step": 1173 }, { "epoch": 0.030145028012215927, "grad_norm": 1.0078125, "learning_rate": 0.00019935983458829843, "loss": 1.4712, "step": 1174 }, { "epoch": 0.030170705208137746, "grad_norm": 1.15625, "learning_rate": 0.00019935933016632887, "loss": 1.4757, "step": 1175 }, { "epoch": 0.030196382404059564, "grad_norm": 0.92578125, "learning_rate": 0.0001993588255463451, "loss": 1.3571, "step": 1176 }, { "epoch": 0.030222059599981383, "grad_norm": 1.0546875, "learning_rate": 0.00019935832072834822, "loss": 1.4206, "step": 1177 }, { "epoch": 0.030247736795903202, "grad_norm": 1.2890625, "learning_rate": 0.00019935781571233911, "loss": 1.4748, "step": 1178 }, { "epoch": 0.030273413991825024, "grad_norm": 0.97265625, "learning_rate": 0.0001993573104983189, "loss": 1.5958, "step": 1179 }, { "epoch": 0.030299091187746843, "grad_norm": 1.015625, "learning_rate": 0.00019935680508628852, "loss": 1.3775, "step": 1180 }, { "epoch": 0.03032476838366866, "grad_norm": 0.921875, "learning_rate": 0.00019935629947624904, "loss": 1.3504, "step": 1181 }, { "epoch": 0.03035044557959048, "grad_norm": 0.95703125, "learning_rate": 0.00019935579366820138, "loss": 1.6669, "step": 1182 }, { "epoch": 0.0303761227755123, "grad_norm": 0.96875, "learning_rate": 0.00019935528766214664, "loss": 1.347, "step": 1183 }, { "epoch": 0.03040179997143412, "grad_norm": 1.078125, "learning_rate": 0.00019935478145808577, "loss": 1.3578, "step": 1184 }, { "epoch": 0.03042747716735594, "grad_norm": 0.9609375, "learning_rate": 0.0001993542750560198, "loss": 1.2549, "step": 1185 }, { "epoch": 0.030453154363277758, "grad_norm": 0.95703125, "learning_rate": 0.0001993537684559497, "loss": 1.3755, "step": 1186 }, { "epoch": 0.030478831559199577, "grad_norm": 1.0234375, "learning_rate": 0.00019935326165787656, "loss": 1.3527, "step": 1187 }, { "epoch": 0.030504508755121396, "grad_norm": 0.984375, "learning_rate": 0.00019935275466180134, "loss": 1.3499, "step": 1188 }, { "epoch": 0.030530185951043218, "grad_norm": 0.9296875, "learning_rate": 0.00019935224746772502, "loss": 1.3158, "step": 1189 }, { "epoch": 0.030555863146965036, "grad_norm": 0.9921875, "learning_rate": 0.00019935174007564867, "loss": 1.3525, "step": 1190 }, { "epoch": 0.030581540342886855, "grad_norm": 1.0703125, "learning_rate": 0.00019935123248557328, "loss": 1.3885, "step": 1191 }, { "epoch": 0.030607217538808674, "grad_norm": 0.91796875, "learning_rate": 0.00019935072469749984, "loss": 1.3659, "step": 1192 }, { "epoch": 0.030632894734730493, "grad_norm": 1.0, "learning_rate": 0.0001993502167114294, "loss": 1.3813, "step": 1193 }, { "epoch": 0.030658571930652315, "grad_norm": 1.0703125, "learning_rate": 0.00019934970852736295, "loss": 1.3151, "step": 1194 }, { "epoch": 0.030684249126574133, "grad_norm": 1.015625, "learning_rate": 0.0001993492001453015, "loss": 1.526, "step": 1195 }, { "epoch": 0.030709926322495952, "grad_norm": 1.0625, "learning_rate": 0.00019934869156524606, "loss": 1.4802, "step": 1196 }, { "epoch": 0.03073560351841777, "grad_norm": 0.96875, "learning_rate": 0.00019934818278719764, "loss": 1.4433, "step": 1197 }, { "epoch": 0.03076128071433959, "grad_norm": 0.94921875, "learning_rate": 0.00019934767381115732, "loss": 1.3783, "step": 1198 }, { "epoch": 0.03078695791026141, "grad_norm": 1.1875, "learning_rate": 0.000199347164637126, "loss": 1.3718, "step": 1199 }, { "epoch": 0.03081263510618323, "grad_norm": 1.046875, "learning_rate": 0.00019934665526510478, "loss": 1.6825, "step": 1200 }, { "epoch": 0.03083831230210505, "grad_norm": 1.0234375, "learning_rate": 0.00019934614569509465, "loss": 1.3985, "step": 1201 }, { "epoch": 0.030863989498026868, "grad_norm": 0.9765625, "learning_rate": 0.00019934563592709662, "loss": 1.2761, "step": 1202 }, { "epoch": 0.030889666693948686, "grad_norm": 1.03125, "learning_rate": 0.00019934512596111174, "loss": 1.447, "step": 1203 }, { "epoch": 0.030915343889870505, "grad_norm": 0.91015625, "learning_rate": 0.00019934461579714093, "loss": 1.2719, "step": 1204 }, { "epoch": 0.030941021085792327, "grad_norm": 1.171875, "learning_rate": 0.00019934410543518536, "loss": 1.4181, "step": 1205 }, { "epoch": 0.030966698281714146, "grad_norm": 1.1875, "learning_rate": 0.00019934359487524592, "loss": 1.4599, "step": 1206 }, { "epoch": 0.030992375477635965, "grad_norm": 0.94140625, "learning_rate": 0.00019934308411732364, "loss": 1.4054, "step": 1207 }, { "epoch": 0.031018052673557783, "grad_norm": 1.0703125, "learning_rate": 0.00019934257316141961, "loss": 1.411, "step": 1208 }, { "epoch": 0.031043729869479602, "grad_norm": 1.0234375, "learning_rate": 0.0001993420620075348, "loss": 1.3816, "step": 1209 }, { "epoch": 0.031069407065401424, "grad_norm": 0.96484375, "learning_rate": 0.00019934155065567022, "loss": 1.6254, "step": 1210 }, { "epoch": 0.031095084261323243, "grad_norm": 1.1015625, "learning_rate": 0.0001993410391058269, "loss": 1.5864, "step": 1211 }, { "epoch": 0.03112076145724506, "grad_norm": 1.078125, "learning_rate": 0.00019934052735800587, "loss": 1.6024, "step": 1212 }, { "epoch": 0.03114643865316688, "grad_norm": 1.1484375, "learning_rate": 0.00019934001541220816, "loss": 1.614, "step": 1213 }, { "epoch": 0.0311721158490887, "grad_norm": 1.03125, "learning_rate": 0.00019933950326843472, "loss": 1.4872, "step": 1214 }, { "epoch": 0.03119779304501052, "grad_norm": 0.97265625, "learning_rate": 0.00019933899092668667, "loss": 1.4049, "step": 1215 }, { "epoch": 0.03122347024093234, "grad_norm": 0.97265625, "learning_rate": 0.000199338478386965, "loss": 1.3268, "step": 1216 }, { "epoch": 0.03124914743685416, "grad_norm": 0.9765625, "learning_rate": 0.00019933796564927068, "loss": 1.366, "step": 1217 }, { "epoch": 0.03127482463277598, "grad_norm": 0.984375, "learning_rate": 0.00019933745271360478, "loss": 1.3945, "step": 1218 }, { "epoch": 0.0313005018286978, "grad_norm": 0.94140625, "learning_rate": 0.00019933693957996832, "loss": 1.437, "step": 1219 }, { "epoch": 0.031326179024619615, "grad_norm": 0.91796875, "learning_rate": 0.00019933642624836228, "loss": 1.3241, "step": 1220 }, { "epoch": 0.03135185622054144, "grad_norm": 1.09375, "learning_rate": 0.00019933591271878776, "loss": 1.7029, "step": 1221 }, { "epoch": 0.03137753341646325, "grad_norm": 1.0, "learning_rate": 0.0001993353989912457, "loss": 1.3072, "step": 1222 }, { "epoch": 0.031403210612385074, "grad_norm": 1.015625, "learning_rate": 0.00019933488506573716, "loss": 1.4956, "step": 1223 }, { "epoch": 0.031428887808306896, "grad_norm": 0.94921875, "learning_rate": 0.0001993343709422632, "loss": 1.3802, "step": 1224 }, { "epoch": 0.03145456500422871, "grad_norm": 0.96484375, "learning_rate": 0.0001993338566208248, "loss": 1.4746, "step": 1225 }, { "epoch": 0.031480242200150534, "grad_norm": 0.984375, "learning_rate": 0.00019933334210142298, "loss": 1.34, "step": 1226 }, { "epoch": 0.03150591939607235, "grad_norm": 0.94140625, "learning_rate": 0.00019933282738405882, "loss": 1.4646, "step": 1227 }, { "epoch": 0.03153159659199417, "grad_norm": 1.1015625, "learning_rate": 0.00019933231246873324, "loss": 1.3813, "step": 1228 }, { "epoch": 0.03155727378791599, "grad_norm": 1.0390625, "learning_rate": 0.00019933179735544737, "loss": 1.5668, "step": 1229 }, { "epoch": 0.03158295098383781, "grad_norm": 0.96484375, "learning_rate": 0.00019933128204420222, "loss": 1.3167, "step": 1230 }, { "epoch": 0.03160862817975963, "grad_norm": 0.9765625, "learning_rate": 0.00019933076653499878, "loss": 1.5303, "step": 1231 }, { "epoch": 0.031634305375681446, "grad_norm": 0.96875, "learning_rate": 0.0001993302508278381, "loss": 1.4316, "step": 1232 }, { "epoch": 0.03165998257160327, "grad_norm": 0.91796875, "learning_rate": 0.0001993297349227212, "loss": 1.418, "step": 1233 }, { "epoch": 0.03168565976752509, "grad_norm": 1.0078125, "learning_rate": 0.00019932921881964913, "loss": 1.415, "step": 1234 }, { "epoch": 0.031711336963446905, "grad_norm": 0.91015625, "learning_rate": 0.0001993287025186229, "loss": 1.298, "step": 1235 }, { "epoch": 0.03173701415936873, "grad_norm": 1.0625, "learning_rate": 0.00019932818601964348, "loss": 1.6523, "step": 1236 }, { "epoch": 0.03176269135529054, "grad_norm": 1.09375, "learning_rate": 0.000199327669322712, "loss": 1.3831, "step": 1237 }, { "epoch": 0.031788368551212365, "grad_norm": 1.0390625, "learning_rate": 0.00019932715242782946, "loss": 1.3538, "step": 1238 }, { "epoch": 0.03181404574713419, "grad_norm": 1.0546875, "learning_rate": 0.00019932663533499685, "loss": 1.4682, "step": 1239 }, { "epoch": 0.031839722943056, "grad_norm": 1.140625, "learning_rate": 0.00019932611804421524, "loss": 1.486, "step": 1240 }, { "epoch": 0.031865400138977824, "grad_norm": 0.9765625, "learning_rate": 0.00019932560055548564, "loss": 1.5256, "step": 1241 }, { "epoch": 0.03189107733489964, "grad_norm": 0.98828125, "learning_rate": 0.0001993250828688091, "loss": 1.4685, "step": 1242 }, { "epoch": 0.03191675453082146, "grad_norm": 0.9140625, "learning_rate": 0.00019932456498418662, "loss": 1.4535, "step": 1243 }, { "epoch": 0.031942431726743284, "grad_norm": 1.0078125, "learning_rate": 0.00019932404690161927, "loss": 1.4444, "step": 1244 }, { "epoch": 0.0319681089226651, "grad_norm": 0.9609375, "learning_rate": 0.0001993235286211081, "loss": 1.3452, "step": 1245 }, { "epoch": 0.03199378611858692, "grad_norm": 0.9140625, "learning_rate": 0.00019932301014265405, "loss": 1.3219, "step": 1246 }, { "epoch": 0.032019463314508737, "grad_norm": 0.95703125, "learning_rate": 0.00019932249146625825, "loss": 1.527, "step": 1247 }, { "epoch": 0.03204514051043056, "grad_norm": 0.9375, "learning_rate": 0.00019932197259192168, "loss": 1.3103, "step": 1248 }, { "epoch": 0.03207081770635238, "grad_norm": 0.9765625, "learning_rate": 0.00019932145351964542, "loss": 1.5292, "step": 1249 }, { "epoch": 0.032096494902274196, "grad_norm": 1.0, "learning_rate": 0.00019932093424943043, "loss": 1.5054, "step": 1250 }, { "epoch": 0.03212217209819602, "grad_norm": 0.921875, "learning_rate": 0.00019932041478127783, "loss": 1.4028, "step": 1251 }, { "epoch": 0.03214784929411783, "grad_norm": 0.90234375, "learning_rate": 0.0001993198951151886, "loss": 1.395, "step": 1252 }, { "epoch": 0.032173526490039656, "grad_norm": 0.953125, "learning_rate": 0.00019931937525116377, "loss": 1.3956, "step": 1253 }, { "epoch": 0.03219920368596148, "grad_norm": 1.0, "learning_rate": 0.00019931885518920442, "loss": 1.461, "step": 1254 }, { "epoch": 0.03222488088188329, "grad_norm": 0.9921875, "learning_rate": 0.00019931833492931156, "loss": 1.5444, "step": 1255 }, { "epoch": 0.032250558077805115, "grad_norm": 1.046875, "learning_rate": 0.00019931781447148623, "loss": 1.519, "step": 1256 }, { "epoch": 0.03227623527372693, "grad_norm": 1.046875, "learning_rate": 0.00019931729381572947, "loss": 1.3063, "step": 1257 }, { "epoch": 0.03230191246964875, "grad_norm": 1.0703125, "learning_rate": 0.0001993167729620423, "loss": 1.4971, "step": 1258 }, { "epoch": 0.032327589665570575, "grad_norm": 0.91796875, "learning_rate": 0.0001993162519104258, "loss": 1.3789, "step": 1259 }, { "epoch": 0.03235326686149239, "grad_norm": 0.9375, "learning_rate": 0.00019931573066088096, "loss": 1.3854, "step": 1260 }, { "epoch": 0.03237894405741421, "grad_norm": 0.98828125, "learning_rate": 0.00019931520921340882, "loss": 1.2787, "step": 1261 }, { "epoch": 0.03240462125333603, "grad_norm": 0.9921875, "learning_rate": 0.00019931468756801047, "loss": 1.3239, "step": 1262 }, { "epoch": 0.03243029844925785, "grad_norm": 1.0234375, "learning_rate": 0.0001993141657246869, "loss": 1.3758, "step": 1263 }, { "epoch": 0.03245597564517967, "grad_norm": 1.9375, "learning_rate": 0.0001993136436834392, "loss": 1.5797, "step": 1264 }, { "epoch": 0.03248165284110149, "grad_norm": 1.0078125, "learning_rate": 0.00019931312144426836, "loss": 1.4412, "step": 1265 }, { "epoch": 0.03250733003702331, "grad_norm": 0.90625, "learning_rate": 0.00019931259900717545, "loss": 1.3212, "step": 1266 }, { "epoch": 0.032533007232945124, "grad_norm": 0.92578125, "learning_rate": 0.00019931207637216146, "loss": 1.4087, "step": 1267 }, { "epoch": 0.032558684428866946, "grad_norm": 0.91015625, "learning_rate": 0.00019931155353922753, "loss": 1.3294, "step": 1268 }, { "epoch": 0.03258436162478876, "grad_norm": 0.98046875, "learning_rate": 0.0001993110305083746, "loss": 1.5901, "step": 1269 }, { "epoch": 0.032610038820710584, "grad_norm": 0.98828125, "learning_rate": 0.00019931050727960378, "loss": 1.4147, "step": 1270 }, { "epoch": 0.032635716016632406, "grad_norm": 0.94921875, "learning_rate": 0.00019930998385291607, "loss": 1.4479, "step": 1271 }, { "epoch": 0.03266139321255422, "grad_norm": 0.984375, "learning_rate": 0.00019930946022831257, "loss": 1.4087, "step": 1272 }, { "epoch": 0.03268707040847604, "grad_norm": 0.9765625, "learning_rate": 0.00019930893640579425, "loss": 1.4766, "step": 1273 }, { "epoch": 0.03271274760439786, "grad_norm": 1.0390625, "learning_rate": 0.00019930841238536222, "loss": 1.6565, "step": 1274 }, { "epoch": 0.03273842480031968, "grad_norm": 0.9140625, "learning_rate": 0.00019930788816701747, "loss": 1.5196, "step": 1275 }, { "epoch": 0.0327641019962415, "grad_norm": 0.96875, "learning_rate": 0.00019930736375076106, "loss": 1.4316, "step": 1276 }, { "epoch": 0.03278977919216332, "grad_norm": 0.9375, "learning_rate": 0.00019930683913659407, "loss": 1.4696, "step": 1277 }, { "epoch": 0.03281545638808514, "grad_norm": 0.98046875, "learning_rate": 0.0001993063143245175, "loss": 1.3563, "step": 1278 }, { "epoch": 0.032841133584006955, "grad_norm": 0.97265625, "learning_rate": 0.00019930578931453243, "loss": 1.3368, "step": 1279 }, { "epoch": 0.03286681077992878, "grad_norm": 1.09375, "learning_rate": 0.0001993052641066399, "loss": 1.49, "step": 1280 }, { "epoch": 0.0328924879758506, "grad_norm": 1.0234375, "learning_rate": 0.00019930473870084095, "loss": 1.2795, "step": 1281 }, { "epoch": 0.032918165171772415, "grad_norm": 0.9140625, "learning_rate": 0.00019930421309713662, "loss": 1.4165, "step": 1282 }, { "epoch": 0.03294384236769424, "grad_norm": 0.921875, "learning_rate": 0.00019930368729552793, "loss": 1.46, "step": 1283 }, { "epoch": 0.03296951956361605, "grad_norm": 1.0234375, "learning_rate": 0.000199303161296016, "loss": 1.4022, "step": 1284 }, { "epoch": 0.032995196759537875, "grad_norm": 0.9765625, "learning_rate": 0.00019930263509860183, "loss": 1.3569, "step": 1285 }, { "epoch": 0.0330208739554597, "grad_norm": 0.96484375, "learning_rate": 0.00019930210870328647, "loss": 1.362, "step": 1286 }, { "epoch": 0.03304655115138151, "grad_norm": 0.99609375, "learning_rate": 0.00019930158211007098, "loss": 1.2735, "step": 1287 }, { "epoch": 0.033072228347303334, "grad_norm": 1.0703125, "learning_rate": 0.00019930105531895643, "loss": 1.4294, "step": 1288 }, { "epoch": 0.03309790554322515, "grad_norm": 0.91015625, "learning_rate": 0.00019930052832994383, "loss": 1.4341, "step": 1289 }, { "epoch": 0.03312358273914697, "grad_norm": 0.96875, "learning_rate": 0.00019930000114303425, "loss": 1.5677, "step": 1290 }, { "epoch": 0.033149259935068794, "grad_norm": 0.921875, "learning_rate": 0.00019929947375822872, "loss": 1.387, "step": 1291 }, { "epoch": 0.03317493713099061, "grad_norm": 0.9296875, "learning_rate": 0.00019929894617552832, "loss": 1.3053, "step": 1292 }, { "epoch": 0.03320061432691243, "grad_norm": 0.94140625, "learning_rate": 0.00019929841839493408, "loss": 1.5284, "step": 1293 }, { "epoch": 0.033226291522834246, "grad_norm": 1.015625, "learning_rate": 0.0001992978904164471, "loss": 1.2499, "step": 1294 }, { "epoch": 0.03325196871875607, "grad_norm": 1.1015625, "learning_rate": 0.00019929736224006834, "loss": 1.4412, "step": 1295 }, { "epoch": 0.03327764591467789, "grad_norm": 0.9140625, "learning_rate": 0.00019929683386579893, "loss": 1.3622, "step": 1296 }, { "epoch": 0.033303323110599706, "grad_norm": 0.94921875, "learning_rate": 0.0001992963052936399, "loss": 1.397, "step": 1297 }, { "epoch": 0.03332900030652153, "grad_norm": 1.0, "learning_rate": 0.00019929577652359233, "loss": 1.3953, "step": 1298 }, { "epoch": 0.03335467750244334, "grad_norm": 0.90625, "learning_rate": 0.0001992952475556572, "loss": 1.2782, "step": 1299 }, { "epoch": 0.033380354698365165, "grad_norm": 1.0, "learning_rate": 0.0001992947183898356, "loss": 1.4231, "step": 1300 }, { "epoch": 0.03340603189428699, "grad_norm": 0.9296875, "learning_rate": 0.00019929418902612866, "loss": 1.3707, "step": 1301 }, { "epoch": 0.0334317090902088, "grad_norm": 0.9609375, "learning_rate": 0.00019929365946453733, "loss": 1.3749, "step": 1302 }, { "epoch": 0.033457386286130625, "grad_norm": 0.98046875, "learning_rate": 0.00019929312970506273, "loss": 1.3626, "step": 1303 }, { "epoch": 0.03348306348205244, "grad_norm": 1.0078125, "learning_rate": 0.00019929259974770587, "loss": 1.3875, "step": 1304 }, { "epoch": 0.03350874067797426, "grad_norm": 0.9375, "learning_rate": 0.00019929206959246783, "loss": 1.5559, "step": 1305 }, { "epoch": 0.033534417873896084, "grad_norm": 1.0234375, "learning_rate": 0.00019929153923934967, "loss": 1.5799, "step": 1306 }, { "epoch": 0.0335600950698179, "grad_norm": 1.1484375, "learning_rate": 0.00019929100868835243, "loss": 1.5069, "step": 1307 }, { "epoch": 0.03358577226573972, "grad_norm": 1.0078125, "learning_rate": 0.00019929047793947722, "loss": 1.4231, "step": 1308 }, { "epoch": 0.03361144946166154, "grad_norm": 0.97265625, "learning_rate": 0.000199289946992725, "loss": 1.397, "step": 1309 }, { "epoch": 0.03363712665758336, "grad_norm": 0.9453125, "learning_rate": 0.0001992894158480969, "loss": 1.4448, "step": 1310 }, { "epoch": 0.03366280385350518, "grad_norm": 0.984375, "learning_rate": 0.00019928888450559398, "loss": 1.3748, "step": 1311 }, { "epoch": 0.033688481049426997, "grad_norm": 0.9609375, "learning_rate": 0.0001992883529652173, "loss": 1.2605, "step": 1312 }, { "epoch": 0.03371415824534882, "grad_norm": 0.9375, "learning_rate": 0.00019928782122696786, "loss": 1.3911, "step": 1313 }, { "epoch": 0.033739835441270634, "grad_norm": 0.87109375, "learning_rate": 0.00019928728929084676, "loss": 1.3722, "step": 1314 }, { "epoch": 0.033765512637192456, "grad_norm": 0.99609375, "learning_rate": 0.00019928675715685508, "loss": 1.5363, "step": 1315 }, { "epoch": 0.03379118983311428, "grad_norm": 0.9765625, "learning_rate": 0.00019928622482499387, "loss": 1.4025, "step": 1316 }, { "epoch": 0.03381686702903609, "grad_norm": 1.0078125, "learning_rate": 0.00019928569229526417, "loss": 1.5471, "step": 1317 }, { "epoch": 0.033842544224957916, "grad_norm": 0.9140625, "learning_rate": 0.00019928515956766705, "loss": 1.294, "step": 1318 }, { "epoch": 0.03386822142087973, "grad_norm": 0.9765625, "learning_rate": 0.00019928462664220358, "loss": 1.3571, "step": 1319 }, { "epoch": 0.03389389861680155, "grad_norm": 1.0234375, "learning_rate": 0.0001992840935188748, "loss": 1.4216, "step": 1320 }, { "epoch": 0.03391957581272337, "grad_norm": 1.0078125, "learning_rate": 0.00019928356019768183, "loss": 1.3485, "step": 1321 }, { "epoch": 0.03394525300864519, "grad_norm": 0.93359375, "learning_rate": 0.00019928302667862567, "loss": 1.4083, "step": 1322 }, { "epoch": 0.03397093020456701, "grad_norm": 1.0390625, "learning_rate": 0.00019928249296170738, "loss": 1.4517, "step": 1323 }, { "epoch": 0.03399660740048883, "grad_norm": 1.2890625, "learning_rate": 0.0001992819590469281, "loss": 1.4167, "step": 1324 }, { "epoch": 0.03402228459641065, "grad_norm": 1.03125, "learning_rate": 0.0001992814249342888, "loss": 1.3819, "step": 1325 }, { "epoch": 0.034047961792332465, "grad_norm": 0.98046875, "learning_rate": 0.00019928089062379062, "loss": 1.4575, "step": 1326 }, { "epoch": 0.03407363898825429, "grad_norm": 0.99609375, "learning_rate": 0.00019928035611543456, "loss": 1.4305, "step": 1327 }, { "epoch": 0.03409931618417611, "grad_norm": 0.95703125, "learning_rate": 0.00019927982140922174, "loss": 1.3553, "step": 1328 }, { "epoch": 0.034124993380097925, "grad_norm": 0.984375, "learning_rate": 0.00019927928650515322, "loss": 1.3282, "step": 1329 }, { "epoch": 0.03415067057601975, "grad_norm": 1.046875, "learning_rate": 0.00019927875140323003, "loss": 1.3842, "step": 1330 }, { "epoch": 0.03417634777194156, "grad_norm": 0.98046875, "learning_rate": 0.00019927821610345325, "loss": 1.3942, "step": 1331 }, { "epoch": 0.034202024967863384, "grad_norm": 0.91015625, "learning_rate": 0.00019927768060582396, "loss": 1.2644, "step": 1332 }, { "epoch": 0.034227702163785206, "grad_norm": 0.9609375, "learning_rate": 0.00019927714491034323, "loss": 1.3022, "step": 1333 }, { "epoch": 0.03425337935970702, "grad_norm": 0.91796875, "learning_rate": 0.0001992766090170121, "loss": 1.4074, "step": 1334 }, { "epoch": 0.034279056555628844, "grad_norm": 1.03125, "learning_rate": 0.00019927607292583168, "loss": 1.3799, "step": 1335 }, { "epoch": 0.03430473375155066, "grad_norm": 0.89453125, "learning_rate": 0.000199275536636803, "loss": 1.218, "step": 1336 }, { "epoch": 0.03433041094747248, "grad_norm": 1.125, "learning_rate": 0.00019927500014992714, "loss": 1.4376, "step": 1337 }, { "epoch": 0.0343560881433943, "grad_norm": 0.953125, "learning_rate": 0.00019927446346520517, "loss": 1.358, "step": 1338 }, { "epoch": 0.03438176533931612, "grad_norm": 0.95703125, "learning_rate": 0.00019927392658263817, "loss": 1.3068, "step": 1339 }, { "epoch": 0.03440744253523794, "grad_norm": 1.03125, "learning_rate": 0.00019927338950222718, "loss": 1.3437, "step": 1340 }, { "epoch": 0.034433119731159756, "grad_norm": 0.98046875, "learning_rate": 0.00019927285222397334, "loss": 1.5072, "step": 1341 }, { "epoch": 0.03445879692708158, "grad_norm": 1.0546875, "learning_rate": 0.00019927231474787762, "loss": 1.388, "step": 1342 }, { "epoch": 0.0344844741230034, "grad_norm": 1.015625, "learning_rate": 0.0001992717770739412, "loss": 1.3923, "step": 1343 }, { "epoch": 0.034510151318925215, "grad_norm": 1.0, "learning_rate": 0.00019927123920216504, "loss": 1.5415, "step": 1344 }, { "epoch": 0.03453582851484704, "grad_norm": 1.03125, "learning_rate": 0.00019927070113255027, "loss": 1.3741, "step": 1345 }, { "epoch": 0.03456150571076885, "grad_norm": 0.87109375, "learning_rate": 0.00019927016286509802, "loss": 1.2864, "step": 1346 }, { "epoch": 0.034587182906690675, "grad_norm": 1.1171875, "learning_rate": 0.00019926962439980925, "loss": 1.4656, "step": 1347 }, { "epoch": 0.0346128601026125, "grad_norm": 0.93359375, "learning_rate": 0.0001992690857366851, "loss": 1.1963, "step": 1348 }, { "epoch": 0.03463853729853431, "grad_norm": 0.89453125, "learning_rate": 0.00019926854687572662, "loss": 1.3047, "step": 1349 }, { "epoch": 0.034664214494456135, "grad_norm": 1.1484375, "learning_rate": 0.00019926800781693487, "loss": 1.4785, "step": 1350 }, { "epoch": 0.03468989169037795, "grad_norm": 0.8984375, "learning_rate": 0.000199267468560311, "loss": 1.2921, "step": 1351 }, { "epoch": 0.03471556888629977, "grad_norm": 0.8984375, "learning_rate": 0.00019926692910585603, "loss": 1.2401, "step": 1352 }, { "epoch": 0.034741246082221594, "grad_norm": 1.1484375, "learning_rate": 0.00019926638945357098, "loss": 1.5164, "step": 1353 }, { "epoch": 0.03476692327814341, "grad_norm": 1.0, "learning_rate": 0.00019926584960345704, "loss": 1.3102, "step": 1354 }, { "epoch": 0.03479260047406523, "grad_norm": 1.015625, "learning_rate": 0.0001992653095555152, "loss": 1.3392, "step": 1355 }, { "epoch": 0.03481827766998705, "grad_norm": 0.9453125, "learning_rate": 0.00019926476930974657, "loss": 1.4317, "step": 1356 }, { "epoch": 0.03484395486590887, "grad_norm": 0.9296875, "learning_rate": 0.00019926422886615223, "loss": 1.5288, "step": 1357 }, { "epoch": 0.03486963206183069, "grad_norm": 1.03125, "learning_rate": 0.00019926368822473323, "loss": 1.2037, "step": 1358 }, { "epoch": 0.034895309257752506, "grad_norm": 0.90625, "learning_rate": 0.00019926314738549067, "loss": 1.2477, "step": 1359 }, { "epoch": 0.03492098645367433, "grad_norm": 0.9453125, "learning_rate": 0.00019926260634842567, "loss": 1.2366, "step": 1360 }, { "epoch": 0.034946663649596144, "grad_norm": 1.0390625, "learning_rate": 0.0001992620651135392, "loss": 1.5529, "step": 1361 }, { "epoch": 0.034972340845517966, "grad_norm": 1.0625, "learning_rate": 0.00019926152368083242, "loss": 1.5609, "step": 1362 }, { "epoch": 0.03499801804143979, "grad_norm": 1.046875, "learning_rate": 0.0001992609820503064, "loss": 1.6152, "step": 1363 }, { "epoch": 0.0350236952373616, "grad_norm": 0.9453125, "learning_rate": 0.0001992604402219622, "loss": 1.3876, "step": 1364 }, { "epoch": 0.035049372433283425, "grad_norm": 0.90234375, "learning_rate": 0.0001992598981958009, "loss": 1.36, "step": 1365 }, { "epoch": 0.03507504962920524, "grad_norm": 1.078125, "learning_rate": 0.0001992593559718236, "loss": 1.3105, "step": 1366 }, { "epoch": 0.03510072682512706, "grad_norm": 0.98828125, "learning_rate": 0.0001992588135500314, "loss": 1.354, "step": 1367 }, { "epoch": 0.035126404021048885, "grad_norm": 0.90234375, "learning_rate": 0.0001992582709304253, "loss": 1.3792, "step": 1368 }, { "epoch": 0.0351520812169707, "grad_norm": 0.93359375, "learning_rate": 0.00019925772811300646, "loss": 1.3436, "step": 1369 }, { "epoch": 0.03517775841289252, "grad_norm": 0.97265625, "learning_rate": 0.0001992571850977759, "loss": 1.2874, "step": 1370 }, { "epoch": 0.03520343560881434, "grad_norm": 1.03125, "learning_rate": 0.00019925664188473477, "loss": 1.5504, "step": 1371 }, { "epoch": 0.03522911280473616, "grad_norm": 1.0390625, "learning_rate": 0.0001992560984738841, "loss": 1.6633, "step": 1372 }, { "epoch": 0.035254790000657975, "grad_norm": 1.0, "learning_rate": 0.00019925555486522502, "loss": 1.4889, "step": 1373 }, { "epoch": 0.0352804671965798, "grad_norm": 0.99609375, "learning_rate": 0.00019925501105875855, "loss": 1.4307, "step": 1374 }, { "epoch": 0.03530614439250162, "grad_norm": 0.94140625, "learning_rate": 0.00019925446705448585, "loss": 1.2615, "step": 1375 }, { "epoch": 0.035331821588423434, "grad_norm": 0.9453125, "learning_rate": 0.00019925392285240792, "loss": 1.3757, "step": 1376 }, { "epoch": 0.035357498784345257, "grad_norm": 0.96484375, "learning_rate": 0.0001992533784525259, "loss": 1.3837, "step": 1377 }, { "epoch": 0.03538317598026707, "grad_norm": 1.0625, "learning_rate": 0.00019925283385484086, "loss": 1.3426, "step": 1378 }, { "epoch": 0.035408853176188894, "grad_norm": 0.9296875, "learning_rate": 0.00019925228905935392, "loss": 1.3465, "step": 1379 }, { "epoch": 0.035434530372110716, "grad_norm": 0.9140625, "learning_rate": 0.0001992517440660661, "loss": 1.3799, "step": 1380 }, { "epoch": 0.03546020756803253, "grad_norm": 0.9765625, "learning_rate": 0.0001992511988749785, "loss": 1.3472, "step": 1381 }, { "epoch": 0.03548588476395435, "grad_norm": 0.93359375, "learning_rate": 0.0001992506534860923, "loss": 1.427, "step": 1382 }, { "epoch": 0.03551156195987617, "grad_norm": 0.97265625, "learning_rate": 0.00019925010789940845, "loss": 1.5358, "step": 1383 }, { "epoch": 0.03553723915579799, "grad_norm": 1.0234375, "learning_rate": 0.00019924956211492812, "loss": 1.5095, "step": 1384 }, { "epoch": 0.03556291635171981, "grad_norm": 0.9296875, "learning_rate": 0.00019924901613265237, "loss": 1.2929, "step": 1385 }, { "epoch": 0.03558859354764163, "grad_norm": 0.890625, "learning_rate": 0.0001992484699525823, "loss": 1.4175, "step": 1386 }, { "epoch": 0.03561427074356345, "grad_norm": 1.03125, "learning_rate": 0.00019924792357471898, "loss": 1.4011, "step": 1387 }, { "epoch": 0.035639947939485266, "grad_norm": 0.953125, "learning_rate": 0.00019924737699906353, "loss": 1.4942, "step": 1388 }, { "epoch": 0.03566562513540709, "grad_norm": 1.0, "learning_rate": 0.00019924683022561702, "loss": 1.5811, "step": 1389 }, { "epoch": 0.03569130233132891, "grad_norm": 0.984375, "learning_rate": 0.00019924628325438055, "loss": 1.5009, "step": 1390 }, { "epoch": 0.035716979527250725, "grad_norm": 0.98828125, "learning_rate": 0.0001992457360853552, "loss": 1.5456, "step": 1391 }, { "epoch": 0.03574265672317255, "grad_norm": 0.94140625, "learning_rate": 0.00019924518871854206, "loss": 1.4641, "step": 1392 }, { "epoch": 0.03576833391909436, "grad_norm": 0.93359375, "learning_rate": 0.00019924464115394223, "loss": 1.3502, "step": 1393 }, { "epoch": 0.035794011115016185, "grad_norm": 0.92578125, "learning_rate": 0.00019924409339155678, "loss": 1.4687, "step": 1394 }, { "epoch": 0.03581968831093801, "grad_norm": 0.92578125, "learning_rate": 0.00019924354543138684, "loss": 1.4876, "step": 1395 }, { "epoch": 0.03584536550685982, "grad_norm": 0.921875, "learning_rate": 0.00019924299727343346, "loss": 1.4132, "step": 1396 }, { "epoch": 0.035871042702781644, "grad_norm": 0.96484375, "learning_rate": 0.00019924244891769775, "loss": 1.3422, "step": 1397 }, { "epoch": 0.03589671989870346, "grad_norm": 0.98046875, "learning_rate": 0.00019924190036418077, "loss": 1.4029, "step": 1398 }, { "epoch": 0.03592239709462528, "grad_norm": 0.9921875, "learning_rate": 0.0001992413516128837, "loss": 1.4894, "step": 1399 }, { "epoch": 0.035948074290547104, "grad_norm": 0.94921875, "learning_rate": 0.00019924080266380757, "loss": 1.2839, "step": 1400 }, { "epoch": 0.03597375148646892, "grad_norm": 0.91796875, "learning_rate": 0.00019924025351695347, "loss": 1.3967, "step": 1401 }, { "epoch": 0.03599942868239074, "grad_norm": 0.9375, "learning_rate": 0.00019923970417232254, "loss": 1.3999, "step": 1402 }, { "epoch": 0.036025105878312556, "grad_norm": 0.890625, "learning_rate": 0.0001992391546299158, "loss": 1.332, "step": 1403 }, { "epoch": 0.03605078307423438, "grad_norm": 0.89453125, "learning_rate": 0.00019923860488973443, "loss": 1.2533, "step": 1404 }, { "epoch": 0.0360764602701562, "grad_norm": 0.94921875, "learning_rate": 0.00019923805495177947, "loss": 1.58, "step": 1405 }, { "epoch": 0.036102137466078016, "grad_norm": 0.98046875, "learning_rate": 0.000199237504816052, "loss": 1.2631, "step": 1406 }, { "epoch": 0.03612781466199984, "grad_norm": 0.9609375, "learning_rate": 0.0001992369544825532, "loss": 1.389, "step": 1407 }, { "epoch": 0.03615349185792165, "grad_norm": 0.9765625, "learning_rate": 0.00019923640395128409, "loss": 1.5878, "step": 1408 }, { "epoch": 0.036179169053843475, "grad_norm": 0.97265625, "learning_rate": 0.00019923585322224576, "loss": 1.3869, "step": 1409 }, { "epoch": 0.0362048462497653, "grad_norm": 0.96875, "learning_rate": 0.00019923530229543938, "loss": 1.3539, "step": 1410 }, { "epoch": 0.03623052344568711, "grad_norm": 0.8515625, "learning_rate": 0.000199234751170866, "loss": 1.4121, "step": 1411 }, { "epoch": 0.036256200641608935, "grad_norm": 1.171875, "learning_rate": 0.0001992341998485267, "loss": 1.3528, "step": 1412 }, { "epoch": 0.03628187783753075, "grad_norm": 0.88671875, "learning_rate": 0.00019923364832842263, "loss": 1.3281, "step": 1413 }, { "epoch": 0.03630755503345257, "grad_norm": 0.94921875, "learning_rate": 0.00019923309661055484, "loss": 1.3571, "step": 1414 }, { "epoch": 0.036333232229374395, "grad_norm": 0.9296875, "learning_rate": 0.0001992325446949245, "loss": 1.5198, "step": 1415 }, { "epoch": 0.03635890942529621, "grad_norm": 0.91015625, "learning_rate": 0.0001992319925815326, "loss": 1.4966, "step": 1416 }, { "epoch": 0.03638458662121803, "grad_norm": 1.0234375, "learning_rate": 0.00019923144027038034, "loss": 1.4983, "step": 1417 }, { "epoch": 0.03641026381713985, "grad_norm": 1.0546875, "learning_rate": 0.00019923088776146878, "loss": 1.3515, "step": 1418 }, { "epoch": 0.03643594101306167, "grad_norm": 0.97265625, "learning_rate": 0.000199230335054799, "loss": 1.3502, "step": 1419 }, { "epoch": 0.036461618208983484, "grad_norm": 0.9921875, "learning_rate": 0.00019922978215037215, "loss": 1.4101, "step": 1420 }, { "epoch": 0.03648729540490531, "grad_norm": 0.96875, "learning_rate": 0.0001992292290481893, "loss": 1.2212, "step": 1421 }, { "epoch": 0.03651297260082713, "grad_norm": 0.8671875, "learning_rate": 0.00019922867574825157, "loss": 1.2241, "step": 1422 }, { "epoch": 0.036538649796748944, "grad_norm": 1.0234375, "learning_rate": 0.00019922812225056004, "loss": 1.456, "step": 1423 }, { "epoch": 0.036564326992670766, "grad_norm": 0.953125, "learning_rate": 0.00019922756855511584, "loss": 1.4287, "step": 1424 }, { "epoch": 0.03659000418859258, "grad_norm": 0.98046875, "learning_rate": 0.00019922701466192004, "loss": 1.3166, "step": 1425 }, { "epoch": 0.036615681384514404, "grad_norm": 0.984375, "learning_rate": 0.00019922646057097377, "loss": 1.5949, "step": 1426 }, { "epoch": 0.036641358580436226, "grad_norm": 0.9765625, "learning_rate": 0.00019922590628227812, "loss": 1.3839, "step": 1427 }, { "epoch": 0.03666703577635804, "grad_norm": 0.875, "learning_rate": 0.00019922535179583422, "loss": 1.4304, "step": 1428 }, { "epoch": 0.03669271297227986, "grad_norm": 1.0546875, "learning_rate": 0.00019922479711164315, "loss": 1.333, "step": 1429 }, { "epoch": 0.03671839016820168, "grad_norm": 0.99609375, "learning_rate": 0.000199224242229706, "loss": 1.3465, "step": 1430 }, { "epoch": 0.0367440673641235, "grad_norm": 0.8828125, "learning_rate": 0.00019922368715002392, "loss": 1.4738, "step": 1431 }, { "epoch": 0.03676974456004532, "grad_norm": 1.015625, "learning_rate": 0.000199223131872598, "loss": 1.3818, "step": 1432 }, { "epoch": 0.03679542175596714, "grad_norm": 0.953125, "learning_rate": 0.00019922257639742934, "loss": 1.3826, "step": 1433 }, { "epoch": 0.03682109895188896, "grad_norm": 0.9765625, "learning_rate": 0.00019922202072451904, "loss": 1.3152, "step": 1434 }, { "epoch": 0.036846776147810775, "grad_norm": 1.0546875, "learning_rate": 0.0001992214648538682, "loss": 1.3862, "step": 1435 }, { "epoch": 0.0368724533437326, "grad_norm": 0.95703125, "learning_rate": 0.00019922090878547798, "loss": 1.3916, "step": 1436 }, { "epoch": 0.03689813053965442, "grad_norm": 1.0703125, "learning_rate": 0.0001992203525193494, "loss": 1.379, "step": 1437 }, { "epoch": 0.036923807735576235, "grad_norm": 1.046875, "learning_rate": 0.00019921979605548368, "loss": 1.3768, "step": 1438 }, { "epoch": 0.03694948493149806, "grad_norm": 0.9921875, "learning_rate": 0.00019921923939388182, "loss": 1.4391, "step": 1439 }, { "epoch": 0.03697516212741987, "grad_norm": 0.9765625, "learning_rate": 0.000199218682534545, "loss": 1.4632, "step": 1440 }, { "epoch": 0.037000839323341694, "grad_norm": 0.94921875, "learning_rate": 0.0001992181254774743, "loss": 1.4478, "step": 1441 }, { "epoch": 0.037026516519263517, "grad_norm": 0.98046875, "learning_rate": 0.00019921756822267086, "loss": 1.3356, "step": 1442 }, { "epoch": 0.03705219371518533, "grad_norm": 0.9296875, "learning_rate": 0.00019921701077013575, "loss": 1.5418, "step": 1443 }, { "epoch": 0.037077870911107154, "grad_norm": 0.9609375, "learning_rate": 0.0001992164531198701, "loss": 1.4791, "step": 1444 }, { "epoch": 0.03710354810702897, "grad_norm": 1.0078125, "learning_rate": 0.00019921589527187504, "loss": 1.5326, "step": 1445 }, { "epoch": 0.03712922530295079, "grad_norm": 1.015625, "learning_rate": 0.00019921533722615164, "loss": 1.551, "step": 1446 }, { "epoch": 0.03715490249887261, "grad_norm": 1.0546875, "learning_rate": 0.000199214778982701, "loss": 1.4105, "step": 1447 }, { "epoch": 0.03718057969479443, "grad_norm": 1.28125, "learning_rate": 0.00019921422054152435, "loss": 1.328, "step": 1448 }, { "epoch": 0.03720625689071625, "grad_norm": 0.94921875, "learning_rate": 0.00019921366190262267, "loss": 1.3933, "step": 1449 }, { "epoch": 0.037231934086638066, "grad_norm": 0.97265625, "learning_rate": 0.00019921310306599712, "loss": 1.36, "step": 1450 }, { "epoch": 0.03725761128255989, "grad_norm": 1.0546875, "learning_rate": 0.00019921254403164885, "loss": 1.5009, "step": 1451 }, { "epoch": 0.03728328847848171, "grad_norm": 1.0625, "learning_rate": 0.0001992119847995789, "loss": 1.521, "step": 1452 }, { "epoch": 0.037308965674403526, "grad_norm": 1.015625, "learning_rate": 0.00019921142536978844, "loss": 1.5055, "step": 1453 }, { "epoch": 0.03733464287032535, "grad_norm": 1.015625, "learning_rate": 0.0001992108657422786, "loss": 1.5727, "step": 1454 }, { "epoch": 0.03736032006624716, "grad_norm": 1.0546875, "learning_rate": 0.00019921030591705047, "loss": 1.4005, "step": 1455 }, { "epoch": 0.037385997262168985, "grad_norm": 0.92578125, "learning_rate": 0.00019920974589410513, "loss": 1.4962, "step": 1456 }, { "epoch": 0.03741167445809081, "grad_norm": 0.9921875, "learning_rate": 0.0001992091856734437, "loss": 1.4774, "step": 1457 }, { "epoch": 0.03743735165401262, "grad_norm": 1.0859375, "learning_rate": 0.00019920862525506737, "loss": 1.4959, "step": 1458 }, { "epoch": 0.037463028849934445, "grad_norm": 0.94140625, "learning_rate": 0.0001992080646389772, "loss": 1.3618, "step": 1459 }, { "epoch": 0.03748870604585626, "grad_norm": 1.015625, "learning_rate": 0.00019920750382517434, "loss": 1.3206, "step": 1460 }, { "epoch": 0.03751438324177808, "grad_norm": 0.9921875, "learning_rate": 0.00019920694281365986, "loss": 1.1625, "step": 1461 }, { "epoch": 0.037540060437699904, "grad_norm": 0.97265625, "learning_rate": 0.0001992063816044349, "loss": 1.3956, "step": 1462 }, { "epoch": 0.03756573763362172, "grad_norm": 0.95703125, "learning_rate": 0.00019920582019750062, "loss": 1.3738, "step": 1463 }, { "epoch": 0.03759141482954354, "grad_norm": 0.96484375, "learning_rate": 0.00019920525859285805, "loss": 1.2921, "step": 1464 }, { "epoch": 0.03761709202546536, "grad_norm": 0.9453125, "learning_rate": 0.0001992046967905084, "loss": 1.3562, "step": 1465 }, { "epoch": 0.03764276922138718, "grad_norm": 0.96875, "learning_rate": 0.00019920413479045275, "loss": 1.3119, "step": 1466 }, { "epoch": 0.037668446417309, "grad_norm": 0.89453125, "learning_rate": 0.00019920357259269218, "loss": 1.3312, "step": 1467 }, { "epoch": 0.037694123613230816, "grad_norm": 0.92578125, "learning_rate": 0.0001992030101972279, "loss": 1.3349, "step": 1468 }, { "epoch": 0.03771980080915264, "grad_norm": 0.97265625, "learning_rate": 0.00019920244760406096, "loss": 1.4584, "step": 1469 }, { "epoch": 0.037745478005074454, "grad_norm": 0.8984375, "learning_rate": 0.00019920188481319247, "loss": 1.2528, "step": 1470 }, { "epoch": 0.037771155200996276, "grad_norm": 0.921875, "learning_rate": 0.00019920132182462362, "loss": 1.4069, "step": 1471 }, { "epoch": 0.03779683239691809, "grad_norm": 0.9453125, "learning_rate": 0.00019920075863835552, "loss": 1.1422, "step": 1472 }, { "epoch": 0.03782250959283991, "grad_norm": 0.94921875, "learning_rate": 0.00019920019525438925, "loss": 1.495, "step": 1473 }, { "epoch": 0.037848186788761735, "grad_norm": 1.0390625, "learning_rate": 0.0001991996316727259, "loss": 1.5764, "step": 1474 }, { "epoch": 0.03787386398468355, "grad_norm": 0.91796875, "learning_rate": 0.0001991990678933667, "loss": 1.4822, "step": 1475 }, { "epoch": 0.03789954118060537, "grad_norm": 0.953125, "learning_rate": 0.00019919850391631272, "loss": 1.1688, "step": 1476 }, { "epoch": 0.03792521837652719, "grad_norm": 0.9296875, "learning_rate": 0.00019919793974156505, "loss": 1.3047, "step": 1477 }, { "epoch": 0.03795089557244901, "grad_norm": 0.9609375, "learning_rate": 0.00019919737536912489, "loss": 1.4364, "step": 1478 }, { "epoch": 0.03797657276837083, "grad_norm": 1.015625, "learning_rate": 0.00019919681079899327, "loss": 1.3653, "step": 1479 }, { "epoch": 0.03800224996429265, "grad_norm": 0.890625, "learning_rate": 0.0001991962460311714, "loss": 1.3193, "step": 1480 }, { "epoch": 0.03802792716021447, "grad_norm": 0.984375, "learning_rate": 0.00019919568106566038, "loss": 1.3126, "step": 1481 }, { "epoch": 0.038053604356136285, "grad_norm": 0.94921875, "learning_rate": 0.0001991951159024613, "loss": 1.4965, "step": 1482 }, { "epoch": 0.03807928155205811, "grad_norm": 1.7109375, "learning_rate": 0.00019919455054157533, "loss": 1.4155, "step": 1483 }, { "epoch": 0.03810495874797993, "grad_norm": 0.98046875, "learning_rate": 0.00019919398498300357, "loss": 1.2981, "step": 1484 }, { "epoch": 0.038130635943901744, "grad_norm": 0.96875, "learning_rate": 0.0001991934192267472, "loss": 1.4848, "step": 1485 }, { "epoch": 0.03815631313982357, "grad_norm": 0.94140625, "learning_rate": 0.00019919285327280726, "loss": 1.3882, "step": 1486 }, { "epoch": 0.03818199033574538, "grad_norm": 0.9609375, "learning_rate": 0.00019919228712118493, "loss": 1.3575, "step": 1487 }, { "epoch": 0.038207667531667204, "grad_norm": 1.03125, "learning_rate": 0.00019919172077188132, "loss": 1.3743, "step": 1488 }, { "epoch": 0.038233344727589026, "grad_norm": 1.0390625, "learning_rate": 0.0001991911542248976, "loss": 1.3553, "step": 1489 }, { "epoch": 0.03825902192351084, "grad_norm": 1.0625, "learning_rate": 0.00019919058748023484, "loss": 1.3861, "step": 1490 }, { "epoch": 0.038284699119432664, "grad_norm": 1.0, "learning_rate": 0.00019919002053789422, "loss": 1.4908, "step": 1491 }, { "epoch": 0.03831037631535448, "grad_norm": 1.0, "learning_rate": 0.00019918945339787687, "loss": 1.4496, "step": 1492 }, { "epoch": 0.0383360535112763, "grad_norm": 0.921875, "learning_rate": 0.00019918888606018387, "loss": 1.3719, "step": 1493 }, { "epoch": 0.03836173070719812, "grad_norm": 0.859375, "learning_rate": 0.00019918831852481638, "loss": 1.2127, "step": 1494 }, { "epoch": 0.03838740790311994, "grad_norm": 0.96875, "learning_rate": 0.0001991877507917755, "loss": 1.4935, "step": 1495 }, { "epoch": 0.03841308509904176, "grad_norm": 0.91796875, "learning_rate": 0.00019918718286106245, "loss": 1.3855, "step": 1496 }, { "epoch": 0.038438762294963576, "grad_norm": 1.0234375, "learning_rate": 0.00019918661473267827, "loss": 1.376, "step": 1497 }, { "epoch": 0.0384644394908854, "grad_norm": 0.90234375, "learning_rate": 0.00019918604640662416, "loss": 1.4583, "step": 1498 }, { "epoch": 0.03849011668680722, "grad_norm": 0.8984375, "learning_rate": 0.0001991854778829012, "loss": 1.42, "step": 1499 }, { "epoch": 0.038515793882729035, "grad_norm": 0.921875, "learning_rate": 0.00019918490916151052, "loss": 1.52, "step": 1500 }, { "epoch": 0.03854147107865086, "grad_norm": 0.92578125, "learning_rate": 0.00019918434024245328, "loss": 1.401, "step": 1501 }, { "epoch": 0.03856714827457267, "grad_norm": 0.9296875, "learning_rate": 0.00019918377112573065, "loss": 1.5658, "step": 1502 }, { "epoch": 0.038592825470494495, "grad_norm": 0.9140625, "learning_rate": 0.0001991832018113437, "loss": 1.3486, "step": 1503 }, { "epoch": 0.03861850266641632, "grad_norm": 0.92578125, "learning_rate": 0.00019918263229929358, "loss": 1.3445, "step": 1504 }, { "epoch": 0.03864417986233813, "grad_norm": 0.9375, "learning_rate": 0.00019918206258958142, "loss": 1.2173, "step": 1505 }, { "epoch": 0.038669857058259954, "grad_norm": 0.91796875, "learning_rate": 0.0001991814926822084, "loss": 1.5226, "step": 1506 }, { "epoch": 0.03869553425418177, "grad_norm": 0.92578125, "learning_rate": 0.0001991809225771756, "loss": 1.3405, "step": 1507 }, { "epoch": 0.03872121145010359, "grad_norm": 1.0546875, "learning_rate": 0.0001991803522744842, "loss": 1.317, "step": 1508 }, { "epoch": 0.038746888646025414, "grad_norm": 0.8984375, "learning_rate": 0.0001991797817741353, "loss": 1.1821, "step": 1509 }, { "epoch": 0.03877256584194723, "grad_norm": 1.09375, "learning_rate": 0.00019917921107613006, "loss": 1.6162, "step": 1510 }, { "epoch": 0.03879824303786905, "grad_norm": 0.921875, "learning_rate": 0.0001991786401804696, "loss": 1.4272, "step": 1511 }, { "epoch": 0.038823920233790866, "grad_norm": 0.9375, "learning_rate": 0.0001991780690871551, "loss": 1.3422, "step": 1512 }, { "epoch": 0.03884959742971269, "grad_norm": 0.92578125, "learning_rate": 0.00019917749779618763, "loss": 1.4964, "step": 1513 }, { "epoch": 0.03887527462563451, "grad_norm": 0.84765625, "learning_rate": 0.0001991769263075684, "loss": 1.2891, "step": 1514 }, { "epoch": 0.038900951821556326, "grad_norm": 0.9453125, "learning_rate": 0.0001991763546212985, "loss": 1.2648, "step": 1515 }, { "epoch": 0.03892662901747815, "grad_norm": 1.0625, "learning_rate": 0.00019917578273737907, "loss": 1.3042, "step": 1516 }, { "epoch": 0.03895230621339996, "grad_norm": 0.8671875, "learning_rate": 0.00019917521065581127, "loss": 1.2898, "step": 1517 }, { "epoch": 0.038977983409321786, "grad_norm": 0.921875, "learning_rate": 0.00019917463837659626, "loss": 1.3854, "step": 1518 }, { "epoch": 0.03900366060524361, "grad_norm": 0.984375, "learning_rate": 0.00019917406589973512, "loss": 1.3566, "step": 1519 }, { "epoch": 0.03902933780116542, "grad_norm": 0.96875, "learning_rate": 0.00019917349322522902, "loss": 1.2936, "step": 1520 }, { "epoch": 0.039055014997087245, "grad_norm": 0.9609375, "learning_rate": 0.00019917292035307913, "loss": 1.3708, "step": 1521 }, { "epoch": 0.03908069219300906, "grad_norm": 0.9296875, "learning_rate": 0.00019917234728328658, "loss": 1.5504, "step": 1522 }, { "epoch": 0.03910636938893088, "grad_norm": 1.0078125, "learning_rate": 0.00019917177401585246, "loss": 1.3721, "step": 1523 }, { "epoch": 0.0391320465848527, "grad_norm": 0.90625, "learning_rate": 0.000199171200550778, "loss": 1.3206, "step": 1524 }, { "epoch": 0.03915772378077452, "grad_norm": 0.92578125, "learning_rate": 0.00019917062688806425, "loss": 1.3414, "step": 1525 }, { "epoch": 0.03918340097669634, "grad_norm": 1.0078125, "learning_rate": 0.00019917005302771244, "loss": 1.4972, "step": 1526 }, { "epoch": 0.03920907817261816, "grad_norm": 1.015625, "learning_rate": 0.00019916947896972365, "loss": 1.4944, "step": 1527 }, { "epoch": 0.03923475536853998, "grad_norm": 0.875, "learning_rate": 0.00019916890471409905, "loss": 1.4231, "step": 1528 }, { "epoch": 0.039260432564461795, "grad_norm": 0.9296875, "learning_rate": 0.00019916833026083975, "loss": 1.4347, "step": 1529 }, { "epoch": 0.03928610976038362, "grad_norm": 1.0078125, "learning_rate": 0.00019916775560994697, "loss": 1.3661, "step": 1530 }, { "epoch": 0.03931178695630544, "grad_norm": 1.0078125, "learning_rate": 0.0001991671807614218, "loss": 1.5283, "step": 1531 }, { "epoch": 0.039337464152227254, "grad_norm": 0.94140625, "learning_rate": 0.00019916660571526538, "loss": 1.4445, "step": 1532 }, { "epoch": 0.039363141348149076, "grad_norm": 1.5390625, "learning_rate": 0.00019916603047147888, "loss": 1.6612, "step": 1533 }, { "epoch": 0.03938881854407089, "grad_norm": 0.9765625, "learning_rate": 0.00019916545503006344, "loss": 1.3085, "step": 1534 }, { "epoch": 0.039414495739992714, "grad_norm": 0.9296875, "learning_rate": 0.00019916487939102023, "loss": 1.3679, "step": 1535 }, { "epoch": 0.039440172935914536, "grad_norm": 0.95703125, "learning_rate": 0.00019916430355435032, "loss": 1.2591, "step": 1536 }, { "epoch": 0.03946585013183635, "grad_norm": 1.0078125, "learning_rate": 0.00019916372752005495, "loss": 1.3744, "step": 1537 }, { "epoch": 0.03949152732775817, "grad_norm": 0.95703125, "learning_rate": 0.0001991631512881352, "loss": 1.3794, "step": 1538 }, { "epoch": 0.03951720452367999, "grad_norm": 0.8671875, "learning_rate": 0.00019916257485859228, "loss": 1.3393, "step": 1539 }, { "epoch": 0.03954288171960181, "grad_norm": 0.9140625, "learning_rate": 0.0001991619982314273, "loss": 1.3299, "step": 1540 }, { "epoch": 0.03956855891552363, "grad_norm": 0.9609375, "learning_rate": 0.0001991614214066414, "loss": 1.51, "step": 1541 }, { "epoch": 0.03959423611144545, "grad_norm": 0.93359375, "learning_rate": 0.00019916084438423575, "loss": 1.314, "step": 1542 }, { "epoch": 0.03961991330736727, "grad_norm": 0.92578125, "learning_rate": 0.0001991602671642115, "loss": 1.3667, "step": 1543 }, { "epoch": 0.039645590503289085, "grad_norm": 0.9921875, "learning_rate": 0.00019915968974656978, "loss": 1.3948, "step": 1544 }, { "epoch": 0.03967126769921091, "grad_norm": 1.0, "learning_rate": 0.00019915911213131177, "loss": 1.3902, "step": 1545 }, { "epoch": 0.03969694489513273, "grad_norm": 0.98046875, "learning_rate": 0.0001991585343184386, "loss": 1.4541, "step": 1546 }, { "epoch": 0.039722622091054545, "grad_norm": 0.96484375, "learning_rate": 0.00019915795630795144, "loss": 1.3656, "step": 1547 }, { "epoch": 0.03974829928697637, "grad_norm": 0.94921875, "learning_rate": 0.0001991573780998514, "loss": 1.3135, "step": 1548 }, { "epoch": 0.03977397648289818, "grad_norm": 1.015625, "learning_rate": 0.00019915679969413969, "loss": 1.2685, "step": 1549 }, { "epoch": 0.039799653678820004, "grad_norm": 0.98046875, "learning_rate": 0.00019915622109081745, "loss": 1.4244, "step": 1550 }, { "epoch": 0.03982533087474183, "grad_norm": 0.984375, "learning_rate": 0.0001991556422898858, "loss": 1.4732, "step": 1551 }, { "epoch": 0.03985100807066364, "grad_norm": 0.9765625, "learning_rate": 0.00019915506329134586, "loss": 1.3535, "step": 1552 }, { "epoch": 0.039876685266585464, "grad_norm": 0.96484375, "learning_rate": 0.00019915448409519893, "loss": 1.2659, "step": 1553 }, { "epoch": 0.03990236246250728, "grad_norm": 0.94140625, "learning_rate": 0.000199153904701446, "loss": 1.3599, "step": 1554 }, { "epoch": 0.0399280396584291, "grad_norm": 0.953125, "learning_rate": 0.00019915332511008833, "loss": 1.3674, "step": 1555 }, { "epoch": 0.039953716854350924, "grad_norm": 0.98828125, "learning_rate": 0.00019915274532112702, "loss": 1.5626, "step": 1556 }, { "epoch": 0.03997939405027274, "grad_norm": 0.9375, "learning_rate": 0.00019915216533456325, "loss": 1.4518, "step": 1557 }, { "epoch": 0.04000507124619456, "grad_norm": 1.28125, "learning_rate": 0.00019915158515039817, "loss": 1.4206, "step": 1558 }, { "epoch": 0.040030748442116376, "grad_norm": 0.9296875, "learning_rate": 0.00019915100476863295, "loss": 1.2435, "step": 1559 }, { "epoch": 0.0400564256380382, "grad_norm": 0.94140625, "learning_rate": 0.00019915042418926871, "loss": 1.3289, "step": 1560 }, { "epoch": 0.04008210283396002, "grad_norm": 1.0625, "learning_rate": 0.00019914984341230666, "loss": 1.4664, "step": 1561 }, { "epoch": 0.040107780029881836, "grad_norm": 0.875, "learning_rate": 0.0001991492624377479, "loss": 1.3392, "step": 1562 }, { "epoch": 0.04013345722580366, "grad_norm": 0.95703125, "learning_rate": 0.00019914868126559365, "loss": 1.4754, "step": 1563 }, { "epoch": 0.04015913442172547, "grad_norm": 0.96875, "learning_rate": 0.000199148099895845, "loss": 1.1943, "step": 1564 }, { "epoch": 0.040184811617647295, "grad_norm": 0.953125, "learning_rate": 0.00019914751832850316, "loss": 1.3792, "step": 1565 }, { "epoch": 0.04021048881356912, "grad_norm": 0.98046875, "learning_rate": 0.00019914693656356927, "loss": 1.4542, "step": 1566 }, { "epoch": 0.04023616600949093, "grad_norm": 1.0234375, "learning_rate": 0.0001991463546010445, "loss": 1.3897, "step": 1567 }, { "epoch": 0.040261843205412755, "grad_norm": 0.98046875, "learning_rate": 0.00019914577244092998, "loss": 1.3639, "step": 1568 }, { "epoch": 0.04028752040133457, "grad_norm": 0.93359375, "learning_rate": 0.0001991451900832269, "loss": 1.4334, "step": 1569 }, { "epoch": 0.04031319759725639, "grad_norm": 0.94140625, "learning_rate": 0.0001991446075279364, "loss": 1.3039, "step": 1570 }, { "epoch": 0.040338874793178214, "grad_norm": 0.98828125, "learning_rate": 0.00019914402477505967, "loss": 1.3307, "step": 1571 }, { "epoch": 0.04036455198910003, "grad_norm": 0.98828125, "learning_rate": 0.00019914344182459786, "loss": 1.3686, "step": 1572 }, { "epoch": 0.04039022918502185, "grad_norm": 0.8359375, "learning_rate": 0.00019914285867655212, "loss": 1.245, "step": 1573 }, { "epoch": 0.04041590638094367, "grad_norm": 1.015625, "learning_rate": 0.0001991422753309236, "loss": 1.388, "step": 1574 }, { "epoch": 0.04044158357686549, "grad_norm": 0.96484375, "learning_rate": 0.0001991416917877135, "loss": 1.365, "step": 1575 }, { "epoch": 0.040467260772787304, "grad_norm": 0.96484375, "learning_rate": 0.00019914110804692295, "loss": 1.3769, "step": 1576 }, { "epoch": 0.040492937968709126, "grad_norm": 0.84375, "learning_rate": 0.00019914052410855315, "loss": 1.1926, "step": 1577 }, { "epoch": 0.04051861516463095, "grad_norm": 0.9453125, "learning_rate": 0.00019913993997260524, "loss": 1.1941, "step": 1578 }, { "epoch": 0.040544292360552764, "grad_norm": 0.99609375, "learning_rate": 0.00019913935563908034, "loss": 1.3306, "step": 1579 }, { "epoch": 0.040569969556474586, "grad_norm": 0.95703125, "learning_rate": 0.0001991387711079797, "loss": 1.3418, "step": 1580 }, { "epoch": 0.0405956467523964, "grad_norm": 1.0, "learning_rate": 0.00019913818637930445, "loss": 1.3973, "step": 1581 }, { "epoch": 0.04062132394831822, "grad_norm": 1.0, "learning_rate": 0.0001991376014530557, "loss": 1.426, "step": 1582 }, { "epoch": 0.040647001144240046, "grad_norm": 1.046875, "learning_rate": 0.0001991370163292347, "loss": 1.5343, "step": 1583 }, { "epoch": 0.04067267834016186, "grad_norm": 1.2109375, "learning_rate": 0.0001991364310078426, "loss": 1.4846, "step": 1584 }, { "epoch": 0.04069835553608368, "grad_norm": 0.9140625, "learning_rate": 0.00019913584548888054, "loss": 1.4194, "step": 1585 }, { "epoch": 0.0407240327320055, "grad_norm": 0.8984375, "learning_rate": 0.00019913525977234968, "loss": 1.1677, "step": 1586 }, { "epoch": 0.04074970992792732, "grad_norm": 0.94921875, "learning_rate": 0.00019913467385825122, "loss": 1.4102, "step": 1587 }, { "epoch": 0.04077538712384914, "grad_norm": 0.921875, "learning_rate": 0.0001991340877465863, "loss": 1.276, "step": 1588 }, { "epoch": 0.04080106431977096, "grad_norm": 1.0234375, "learning_rate": 0.00019913350143735607, "loss": 1.3191, "step": 1589 }, { "epoch": 0.04082674151569278, "grad_norm": 0.91015625, "learning_rate": 0.00019913291493056176, "loss": 1.3794, "step": 1590 }, { "epoch": 0.040852418711614595, "grad_norm": 0.8203125, "learning_rate": 0.0001991323282262045, "loss": 1.3338, "step": 1591 }, { "epoch": 0.04087809590753642, "grad_norm": 1.015625, "learning_rate": 0.00019913174132428547, "loss": 1.4016, "step": 1592 }, { "epoch": 0.04090377310345824, "grad_norm": 0.96484375, "learning_rate": 0.00019913115422480582, "loss": 1.3537, "step": 1593 }, { "epoch": 0.040929450299380055, "grad_norm": 1.1015625, "learning_rate": 0.00019913056692776672, "loss": 1.4297, "step": 1594 }, { "epoch": 0.04095512749530188, "grad_norm": 0.98828125, "learning_rate": 0.00019912997943316937, "loss": 1.3011, "step": 1595 }, { "epoch": 0.04098080469122369, "grad_norm": 0.9296875, "learning_rate": 0.00019912939174101494, "loss": 1.3994, "step": 1596 }, { "epoch": 0.041006481887145514, "grad_norm": 0.9609375, "learning_rate": 0.00019912880385130458, "loss": 1.4406, "step": 1597 }, { "epoch": 0.041032159083067336, "grad_norm": 0.9765625, "learning_rate": 0.00019912821576403947, "loss": 1.4418, "step": 1598 }, { "epoch": 0.04105783627898915, "grad_norm": 0.98828125, "learning_rate": 0.00019912762747922077, "loss": 1.595, "step": 1599 }, { "epoch": 0.041083513474910974, "grad_norm": 0.97265625, "learning_rate": 0.00019912703899684965, "loss": 1.4109, "step": 1600 }, { "epoch": 0.04110919067083279, "grad_norm": 0.9375, "learning_rate": 0.00019912645031692732, "loss": 1.4194, "step": 1601 }, { "epoch": 0.04113486786675461, "grad_norm": 1.0234375, "learning_rate": 0.00019912586143945493, "loss": 1.4314, "step": 1602 }, { "epoch": 0.04116054506267643, "grad_norm": 1.015625, "learning_rate": 0.00019912527236443363, "loss": 1.3965, "step": 1603 }, { "epoch": 0.04118622225859825, "grad_norm": 0.8828125, "learning_rate": 0.00019912468309186463, "loss": 1.205, "step": 1604 }, { "epoch": 0.04121189945452007, "grad_norm": 0.8984375, "learning_rate": 0.0001991240936217491, "loss": 1.3464, "step": 1605 }, { "epoch": 0.041237576650441886, "grad_norm": 0.91015625, "learning_rate": 0.00019912350395408817, "loss": 1.3733, "step": 1606 }, { "epoch": 0.04126325384636371, "grad_norm": 0.9609375, "learning_rate": 0.0001991229140888831, "loss": 1.3432, "step": 1607 }, { "epoch": 0.04128893104228553, "grad_norm": 0.8828125, "learning_rate": 0.00019912232402613498, "loss": 1.4234, "step": 1608 }, { "epoch": 0.041314608238207345, "grad_norm": 0.87890625, "learning_rate": 0.00019912173376584503, "loss": 1.3509, "step": 1609 }, { "epoch": 0.04134028543412917, "grad_norm": 0.99609375, "learning_rate": 0.0001991211433080144, "loss": 1.266, "step": 1610 }, { "epoch": 0.04136596263005098, "grad_norm": 0.8828125, "learning_rate": 0.00019912055265264433, "loss": 1.3016, "step": 1611 }, { "epoch": 0.041391639825972805, "grad_norm": 1.015625, "learning_rate": 0.00019911996179973593, "loss": 1.4575, "step": 1612 }, { "epoch": 0.04141731702189463, "grad_norm": 0.96875, "learning_rate": 0.0001991193707492904, "loss": 1.5211, "step": 1613 }, { "epoch": 0.04144299421781644, "grad_norm": 0.94140625, "learning_rate": 0.0001991187795013089, "loss": 1.3238, "step": 1614 }, { "epoch": 0.041468671413738264, "grad_norm": 0.91796875, "learning_rate": 0.00019911818805579265, "loss": 1.4201, "step": 1615 }, { "epoch": 0.04149434860966008, "grad_norm": 0.90234375, "learning_rate": 0.0001991175964127428, "loss": 1.3077, "step": 1616 }, { "epoch": 0.0415200258055819, "grad_norm": 0.9375, "learning_rate": 0.0001991170045721605, "loss": 1.4672, "step": 1617 }, { "epoch": 0.041545703001503724, "grad_norm": 0.98828125, "learning_rate": 0.000199116412534047, "loss": 1.4139, "step": 1618 }, { "epoch": 0.04157138019742554, "grad_norm": 0.87109375, "learning_rate": 0.00019911582029840346, "loss": 1.4184, "step": 1619 }, { "epoch": 0.04159705739334736, "grad_norm": 0.89453125, "learning_rate": 0.00019911522786523103, "loss": 1.4186, "step": 1620 }, { "epoch": 0.04162273458926918, "grad_norm": 0.9375, "learning_rate": 0.00019911463523453088, "loss": 1.4161, "step": 1621 }, { "epoch": 0.041648411785191, "grad_norm": 0.83984375, "learning_rate": 0.00019911404240630424, "loss": 1.2604, "step": 1622 }, { "epoch": 0.04167408898111282, "grad_norm": 0.93359375, "learning_rate": 0.00019911344938055222, "loss": 1.3052, "step": 1623 }, { "epoch": 0.041699766177034636, "grad_norm": 1.03125, "learning_rate": 0.0001991128561572761, "loss": 1.5536, "step": 1624 }, { "epoch": 0.04172544337295646, "grad_norm": 0.94140625, "learning_rate": 0.00019911226273647698, "loss": 1.1958, "step": 1625 }, { "epoch": 0.041751120568878274, "grad_norm": 0.96484375, "learning_rate": 0.0001991116691181561, "loss": 1.3935, "step": 1626 }, { "epoch": 0.041776797764800096, "grad_norm": 0.97265625, "learning_rate": 0.0001991110753023146, "loss": 1.296, "step": 1627 }, { "epoch": 0.04180247496072191, "grad_norm": 0.98046875, "learning_rate": 0.00019911048128895366, "loss": 1.3806, "step": 1628 }, { "epoch": 0.04182815215664373, "grad_norm": 0.91796875, "learning_rate": 0.0001991098870780745, "loss": 1.2721, "step": 1629 }, { "epoch": 0.041853829352565555, "grad_norm": 0.98046875, "learning_rate": 0.0001991092926696783, "loss": 1.7051, "step": 1630 }, { "epoch": 0.04187950654848737, "grad_norm": 0.99609375, "learning_rate": 0.00019910869806376625, "loss": 1.5318, "step": 1631 }, { "epoch": 0.04190518374440919, "grad_norm": 0.97265625, "learning_rate": 0.00019910810326033947, "loss": 1.2405, "step": 1632 }, { "epoch": 0.04193086094033101, "grad_norm": 0.875, "learning_rate": 0.0001991075082593992, "loss": 1.2528, "step": 1633 }, { "epoch": 0.04195653813625283, "grad_norm": 0.90625, "learning_rate": 0.00019910691306094665, "loss": 1.4062, "step": 1634 }, { "epoch": 0.04198221533217465, "grad_norm": 0.93359375, "learning_rate": 0.00019910631766498294, "loss": 1.3453, "step": 1635 }, { "epoch": 0.04200789252809647, "grad_norm": 0.89453125, "learning_rate": 0.00019910572207150931, "loss": 1.3607, "step": 1636 }, { "epoch": 0.04203356972401829, "grad_norm": 0.90234375, "learning_rate": 0.00019910512628052693, "loss": 1.2142, "step": 1637 }, { "epoch": 0.042059246919940105, "grad_norm": 1.0, "learning_rate": 0.000199104530292037, "loss": 1.2022, "step": 1638 }, { "epoch": 0.04208492411586193, "grad_norm": 0.96875, "learning_rate": 0.00019910393410604068, "loss": 1.5272, "step": 1639 }, { "epoch": 0.04211060131178375, "grad_norm": 0.93359375, "learning_rate": 0.00019910333772253915, "loss": 1.3595, "step": 1640 }, { "epoch": 0.042136278507705564, "grad_norm": 1.0078125, "learning_rate": 0.00019910274114153363, "loss": 1.4432, "step": 1641 }, { "epoch": 0.042161955703627386, "grad_norm": 0.9453125, "learning_rate": 0.0001991021443630253, "loss": 1.225, "step": 1642 }, { "epoch": 0.0421876328995492, "grad_norm": 0.9296875, "learning_rate": 0.00019910154738701536, "loss": 1.6048, "step": 1643 }, { "epoch": 0.042213310095471024, "grad_norm": 0.94140625, "learning_rate": 0.00019910095021350498, "loss": 1.3443, "step": 1644 }, { "epoch": 0.042238987291392846, "grad_norm": 0.91015625, "learning_rate": 0.00019910035284249536, "loss": 1.4495, "step": 1645 }, { "epoch": 0.04226466448731466, "grad_norm": 0.9140625, "learning_rate": 0.0001990997552739877, "loss": 1.3786, "step": 1646 }, { "epoch": 0.04229034168323648, "grad_norm": 0.93359375, "learning_rate": 0.00019909915750798316, "loss": 1.3464, "step": 1647 }, { "epoch": 0.0423160188791583, "grad_norm": 0.91015625, "learning_rate": 0.00019909855954448297, "loss": 1.5096, "step": 1648 }, { "epoch": 0.04234169607508012, "grad_norm": 0.95703125, "learning_rate": 0.00019909796138348828, "loss": 1.461, "step": 1649 }, { "epoch": 0.04236737327100194, "grad_norm": 0.94140625, "learning_rate": 0.00019909736302500033, "loss": 1.3549, "step": 1650 }, { "epoch": 0.04239305046692376, "grad_norm": 0.96875, "learning_rate": 0.00019909676446902027, "loss": 1.4585, "step": 1651 }, { "epoch": 0.04241872766284558, "grad_norm": 0.90234375, "learning_rate": 0.0001990961657155493, "loss": 1.4361, "step": 1652 }, { "epoch": 0.042444404858767396, "grad_norm": 0.91015625, "learning_rate": 0.00019909556676458862, "loss": 1.3659, "step": 1653 }, { "epoch": 0.04247008205468922, "grad_norm": 1.046875, "learning_rate": 0.00019909496761613945, "loss": 1.5284, "step": 1654 }, { "epoch": 0.04249575925061104, "grad_norm": 0.91796875, "learning_rate": 0.00019909436827020294, "loss": 1.3675, "step": 1655 }, { "epoch": 0.042521436446532855, "grad_norm": 0.8984375, "learning_rate": 0.00019909376872678035, "loss": 1.5367, "step": 1656 }, { "epoch": 0.04254711364245468, "grad_norm": 0.9921875, "learning_rate": 0.00019909316898587278, "loss": 1.3617, "step": 1657 }, { "epoch": 0.04257279083837649, "grad_norm": 0.94921875, "learning_rate": 0.0001990925690474815, "loss": 1.3759, "step": 1658 }, { "epoch": 0.042598468034298315, "grad_norm": 1.03125, "learning_rate": 0.00019909196891160767, "loss": 1.3926, "step": 1659 }, { "epoch": 0.04262414523022014, "grad_norm": 1.0234375, "learning_rate": 0.0001990913685782525, "loss": 1.4042, "step": 1660 }, { "epoch": 0.04264982242614195, "grad_norm": 1.0, "learning_rate": 0.00019909076804741716, "loss": 1.5368, "step": 1661 }, { "epoch": 0.042675499622063774, "grad_norm": 0.8359375, "learning_rate": 0.0001990901673191029, "loss": 1.2533, "step": 1662 }, { "epoch": 0.04270117681798559, "grad_norm": 0.87109375, "learning_rate": 0.0001990895663933109, "loss": 1.2974, "step": 1663 }, { "epoch": 0.04272685401390741, "grad_norm": 0.91015625, "learning_rate": 0.00019908896527004232, "loss": 1.2865, "step": 1664 }, { "epoch": 0.042752531209829234, "grad_norm": 0.921875, "learning_rate": 0.00019908836394929837, "loss": 1.2297, "step": 1665 }, { "epoch": 0.04277820840575105, "grad_norm": 0.9453125, "learning_rate": 0.0001990877624310803, "loss": 1.3115, "step": 1666 }, { "epoch": 0.04280388560167287, "grad_norm": 0.8984375, "learning_rate": 0.00019908716071538924, "loss": 1.3543, "step": 1667 }, { "epoch": 0.042829562797594686, "grad_norm": 0.98828125, "learning_rate": 0.00019908655880222643, "loss": 1.4775, "step": 1668 }, { "epoch": 0.04285523999351651, "grad_norm": 0.87890625, "learning_rate": 0.00019908595669159307, "loss": 1.2797, "step": 1669 }, { "epoch": 0.04288091718943833, "grad_norm": 0.93359375, "learning_rate": 0.00019908535438349032, "loss": 1.4395, "step": 1670 }, { "epoch": 0.042906594385360146, "grad_norm": 0.86328125, "learning_rate": 0.00019908475187791944, "loss": 1.3158, "step": 1671 }, { "epoch": 0.04293227158128197, "grad_norm": 0.91015625, "learning_rate": 0.00019908414917488157, "loss": 1.4196, "step": 1672 }, { "epoch": 0.04295794877720378, "grad_norm": 0.96875, "learning_rate": 0.00019908354627437796, "loss": 1.3912, "step": 1673 }, { "epoch": 0.042983625973125605, "grad_norm": 0.9609375, "learning_rate": 0.00019908294317640978, "loss": 1.3021, "step": 1674 }, { "epoch": 0.04300930316904743, "grad_norm": 0.94921875, "learning_rate": 0.00019908233988097824, "loss": 1.2208, "step": 1675 }, { "epoch": 0.04303498036496924, "grad_norm": 0.9453125, "learning_rate": 0.00019908173638808458, "loss": 1.3677, "step": 1676 }, { "epoch": 0.043060657560891065, "grad_norm": 0.91796875, "learning_rate": 0.00019908113269772993, "loss": 1.3349, "step": 1677 }, { "epoch": 0.04308633475681288, "grad_norm": 0.93359375, "learning_rate": 0.00019908052880991556, "loss": 1.4401, "step": 1678 }, { "epoch": 0.0431120119527347, "grad_norm": 0.94921875, "learning_rate": 0.00019907992472464265, "loss": 1.2382, "step": 1679 }, { "epoch": 0.04313768914865652, "grad_norm": 0.875, "learning_rate": 0.0001990793204419124, "loss": 1.3762, "step": 1680 }, { "epoch": 0.04316336634457834, "grad_norm": 0.984375, "learning_rate": 0.000199078715961726, "loss": 1.2757, "step": 1681 }, { "epoch": 0.04318904354050016, "grad_norm": 0.9140625, "learning_rate": 0.00019907811128408467, "loss": 1.4355, "step": 1682 }, { "epoch": 0.04321472073642198, "grad_norm": 0.8984375, "learning_rate": 0.0001990775064089896, "loss": 1.4188, "step": 1683 }, { "epoch": 0.0432403979323438, "grad_norm": 0.91796875, "learning_rate": 0.00019907690133644202, "loss": 1.2681, "step": 1684 }, { "epoch": 0.043266075128265614, "grad_norm": 0.859375, "learning_rate": 0.00019907629606644313, "loss": 1.2036, "step": 1685 }, { "epoch": 0.04329175232418744, "grad_norm": 0.85546875, "learning_rate": 0.00019907569059899412, "loss": 1.2254, "step": 1686 }, { "epoch": 0.04331742952010926, "grad_norm": 1.03125, "learning_rate": 0.00019907508493409624, "loss": 1.6581, "step": 1687 }, { "epoch": 0.043343106716031074, "grad_norm": 0.828125, "learning_rate": 0.00019907447907175062, "loss": 1.3669, "step": 1688 }, { "epoch": 0.043368783911952896, "grad_norm": 0.9296875, "learning_rate": 0.00019907387301195854, "loss": 1.2608, "step": 1689 }, { "epoch": 0.04339446110787471, "grad_norm": 0.93359375, "learning_rate": 0.00019907326675472117, "loss": 1.4503, "step": 1690 }, { "epoch": 0.043420138303796534, "grad_norm": 0.94921875, "learning_rate": 0.00019907266030003975, "loss": 1.3387, "step": 1691 }, { "epoch": 0.043445815499718356, "grad_norm": 0.9375, "learning_rate": 0.00019907205364791546, "loss": 1.3771, "step": 1692 }, { "epoch": 0.04347149269564017, "grad_norm": 0.86328125, "learning_rate": 0.0001990714467983495, "loss": 1.4836, "step": 1693 }, { "epoch": 0.04349716989156199, "grad_norm": 0.98046875, "learning_rate": 0.00019907083975134312, "loss": 1.2994, "step": 1694 }, { "epoch": 0.04352284708748381, "grad_norm": 0.87109375, "learning_rate": 0.00019907023250689747, "loss": 1.2572, "step": 1695 }, { "epoch": 0.04354852428340563, "grad_norm": 0.8671875, "learning_rate": 0.00019906962506501384, "loss": 1.3051, "step": 1696 }, { "epoch": 0.04357420147932745, "grad_norm": 0.94921875, "learning_rate": 0.00019906901742569336, "loss": 1.1798, "step": 1697 }, { "epoch": 0.04359987867524927, "grad_norm": 0.92578125, "learning_rate": 0.00019906840958893728, "loss": 1.3665, "step": 1698 }, { "epoch": 0.04362555587117109, "grad_norm": 0.890625, "learning_rate": 0.00019906780155474682, "loss": 1.273, "step": 1699 }, { "epoch": 0.043651233067092905, "grad_norm": 0.953125, "learning_rate": 0.00019906719332312317, "loss": 1.4202, "step": 1700 }, { "epoch": 0.04367691026301473, "grad_norm": 1.0, "learning_rate": 0.00019906658489406755, "loss": 1.3708, "step": 1701 }, { "epoch": 0.04370258745893655, "grad_norm": 0.87890625, "learning_rate": 0.0001990659762675812, "loss": 1.3034, "step": 1702 }, { "epoch": 0.043728264654858365, "grad_norm": 0.8828125, "learning_rate": 0.0001990653674436653, "loss": 1.28, "step": 1703 }, { "epoch": 0.04375394185078019, "grad_norm": 0.84375, "learning_rate": 0.00019906475842232103, "loss": 1.3439, "step": 1704 }, { "epoch": 0.043779619046702, "grad_norm": 1.0078125, "learning_rate": 0.0001990641492035497, "loss": 1.3427, "step": 1705 }, { "epoch": 0.043805296242623824, "grad_norm": 0.875, "learning_rate": 0.00019906353978735246, "loss": 1.5143, "step": 1706 }, { "epoch": 0.043830973438545646, "grad_norm": 0.93359375, "learning_rate": 0.00019906293017373053, "loss": 1.4082, "step": 1707 }, { "epoch": 0.04385665063446746, "grad_norm": 0.8828125, "learning_rate": 0.0001990623203626851, "loss": 1.1209, "step": 1708 }, { "epoch": 0.043882327830389284, "grad_norm": 0.91015625, "learning_rate": 0.00019906171035421742, "loss": 1.4365, "step": 1709 }, { "epoch": 0.0439080050263111, "grad_norm": 0.9375, "learning_rate": 0.00019906110014832873, "loss": 1.379, "step": 1710 }, { "epoch": 0.04393368222223292, "grad_norm": 0.953125, "learning_rate": 0.00019906048974502018, "loss": 1.3833, "step": 1711 }, { "epoch": 0.04395935941815474, "grad_norm": 0.94140625, "learning_rate": 0.00019905987914429306, "loss": 1.235, "step": 1712 }, { "epoch": 0.04398503661407656, "grad_norm": 0.98046875, "learning_rate": 0.00019905926834614852, "loss": 1.4068, "step": 1713 }, { "epoch": 0.04401071380999838, "grad_norm": 0.87890625, "learning_rate": 0.0001990586573505878, "loss": 1.4553, "step": 1714 }, { "epoch": 0.044036391005920196, "grad_norm": 0.9765625, "learning_rate": 0.00019905804615761213, "loss": 1.5087, "step": 1715 }, { "epoch": 0.04406206820184202, "grad_norm": 1.0, "learning_rate": 0.00019905743476722275, "loss": 1.3817, "step": 1716 }, { "epoch": 0.04408774539776384, "grad_norm": 0.86328125, "learning_rate": 0.00019905682317942084, "loss": 1.3351, "step": 1717 }, { "epoch": 0.044113422593685656, "grad_norm": 0.9140625, "learning_rate": 0.0001990562113942076, "loss": 1.345, "step": 1718 }, { "epoch": 0.04413909978960748, "grad_norm": 0.87890625, "learning_rate": 0.00019905559941158432, "loss": 1.3873, "step": 1719 }, { "epoch": 0.04416477698552929, "grad_norm": 0.91015625, "learning_rate": 0.00019905498723155215, "loss": 1.2417, "step": 1720 }, { "epoch": 0.044190454181451115, "grad_norm": 0.9375, "learning_rate": 0.00019905437485411235, "loss": 1.5311, "step": 1721 }, { "epoch": 0.04421613137737294, "grad_norm": 1.0078125, "learning_rate": 0.00019905376227926614, "loss": 1.2231, "step": 1722 }, { "epoch": 0.04424180857329475, "grad_norm": 0.90234375, "learning_rate": 0.0001990531495070147, "loss": 1.3523, "step": 1723 }, { "epoch": 0.044267485769216575, "grad_norm": 0.91015625, "learning_rate": 0.0001990525365373593, "loss": 1.2911, "step": 1724 }, { "epoch": 0.04429316296513839, "grad_norm": 0.93359375, "learning_rate": 0.00019905192337030112, "loss": 1.3291, "step": 1725 }, { "epoch": 0.04431884016106021, "grad_norm": 0.8828125, "learning_rate": 0.00019905131000584142, "loss": 1.3328, "step": 1726 }, { "epoch": 0.044344517356982034, "grad_norm": 0.9765625, "learning_rate": 0.0001990506964439814, "loss": 1.4742, "step": 1727 }, { "epoch": 0.04437019455290385, "grad_norm": 0.921875, "learning_rate": 0.0001990500826847223, "loss": 1.5323, "step": 1728 }, { "epoch": 0.04439587174882567, "grad_norm": 0.8828125, "learning_rate": 0.00019904946872806534, "loss": 1.3256, "step": 1729 }, { "epoch": 0.04442154894474749, "grad_norm": 0.95703125, "learning_rate": 0.00019904885457401172, "loss": 1.4141, "step": 1730 }, { "epoch": 0.04444722614066931, "grad_norm": 0.91015625, "learning_rate": 0.0001990482402225627, "loss": 1.4072, "step": 1731 }, { "epoch": 0.044472903336591124, "grad_norm": 1.015625, "learning_rate": 0.00019904762567371945, "loss": 1.396, "step": 1732 }, { "epoch": 0.044498580532512946, "grad_norm": 0.94921875, "learning_rate": 0.00019904701092748325, "loss": 1.2789, "step": 1733 }, { "epoch": 0.04452425772843477, "grad_norm": 0.88671875, "learning_rate": 0.00019904639598385528, "loss": 1.2924, "step": 1734 }, { "epoch": 0.044549934924356584, "grad_norm": 0.8671875, "learning_rate": 0.00019904578084283683, "loss": 1.2515, "step": 1735 }, { "epoch": 0.044575612120278406, "grad_norm": 0.890625, "learning_rate": 0.00019904516550442905, "loss": 1.3907, "step": 1736 }, { "epoch": 0.04460128931620022, "grad_norm": 0.87109375, "learning_rate": 0.00019904454996863322, "loss": 1.3173, "step": 1737 }, { "epoch": 0.04462696651212204, "grad_norm": 0.90234375, "learning_rate": 0.00019904393423545057, "loss": 1.4315, "step": 1738 }, { "epoch": 0.044652643708043865, "grad_norm": 0.88671875, "learning_rate": 0.00019904331830488227, "loss": 1.3145, "step": 1739 }, { "epoch": 0.04467832090396568, "grad_norm": 0.90625, "learning_rate": 0.00019904270217692958, "loss": 1.3054, "step": 1740 }, { "epoch": 0.0447039980998875, "grad_norm": 0.89453125, "learning_rate": 0.00019904208585159373, "loss": 1.3834, "step": 1741 }, { "epoch": 0.04472967529580932, "grad_norm": 0.9453125, "learning_rate": 0.00019904146932887598, "loss": 1.2632, "step": 1742 }, { "epoch": 0.04475535249173114, "grad_norm": 0.8828125, "learning_rate": 0.00019904085260877752, "loss": 1.3033, "step": 1743 }, { "epoch": 0.04478102968765296, "grad_norm": 0.87109375, "learning_rate": 0.00019904023569129956, "loss": 1.343, "step": 1744 }, { "epoch": 0.04480670688357478, "grad_norm": 0.8828125, "learning_rate": 0.0001990396185764434, "loss": 1.2395, "step": 1745 }, { "epoch": 0.0448323840794966, "grad_norm": 1.0078125, "learning_rate": 0.0001990390012642102, "loss": 1.34, "step": 1746 }, { "epoch": 0.044858061275418415, "grad_norm": 0.984375, "learning_rate": 0.00019903838375460122, "loss": 1.3444, "step": 1747 }, { "epoch": 0.04488373847134024, "grad_norm": 0.97265625, "learning_rate": 0.0001990377660476177, "loss": 1.3753, "step": 1748 }, { "epoch": 0.04490941566726206, "grad_norm": 0.94921875, "learning_rate": 0.00019903714814326081, "loss": 1.2913, "step": 1749 }, { "epoch": 0.044935092863183874, "grad_norm": 0.9921875, "learning_rate": 0.0001990365300415319, "loss": 1.452, "step": 1750 }, { "epoch": 0.0449607700591057, "grad_norm": 0.91015625, "learning_rate": 0.0001990359117424321, "loss": 1.381, "step": 1751 }, { "epoch": 0.04498644725502751, "grad_norm": 1.046875, "learning_rate": 0.00019903529324596266, "loss": 1.3718, "step": 1752 }, { "epoch": 0.045012124450949334, "grad_norm": 0.953125, "learning_rate": 0.00019903467455212483, "loss": 1.3817, "step": 1753 }, { "epoch": 0.045037801646871156, "grad_norm": 1.015625, "learning_rate": 0.00019903405566091986, "loss": 1.384, "step": 1754 }, { "epoch": 0.04506347884279297, "grad_norm": 1.0546875, "learning_rate": 0.00019903343657234895, "loss": 1.364, "step": 1755 }, { "epoch": 0.045089156038714794, "grad_norm": 2.953125, "learning_rate": 0.00019903281728641332, "loss": 1.138, "step": 1756 }, { "epoch": 0.04511483323463661, "grad_norm": 0.97265625, "learning_rate": 0.00019903219780311428, "loss": 1.3119, "step": 1757 }, { "epoch": 0.04514051043055843, "grad_norm": 0.9140625, "learning_rate": 0.000199031578122453, "loss": 1.3028, "step": 1758 }, { "epoch": 0.04516618762648025, "grad_norm": 0.8828125, "learning_rate": 0.0001990309582444307, "loss": 1.3991, "step": 1759 }, { "epoch": 0.04519186482240207, "grad_norm": 0.9296875, "learning_rate": 0.00019903033816904867, "loss": 1.196, "step": 1760 }, { "epoch": 0.04521754201832389, "grad_norm": 0.890625, "learning_rate": 0.00019902971789630814, "loss": 1.3355, "step": 1761 }, { "epoch": 0.045243219214245706, "grad_norm": 0.96484375, "learning_rate": 0.0001990290974262103, "loss": 1.2933, "step": 1762 }, { "epoch": 0.04526889641016753, "grad_norm": 0.890625, "learning_rate": 0.0001990284767587564, "loss": 1.3024, "step": 1763 }, { "epoch": 0.04529457360608935, "grad_norm": 0.98828125, "learning_rate": 0.00019902785589394773, "loss": 1.382, "step": 1764 }, { "epoch": 0.045320250802011165, "grad_norm": 0.90625, "learning_rate": 0.00019902723483178548, "loss": 1.2414, "step": 1765 }, { "epoch": 0.04534592799793299, "grad_norm": 0.9453125, "learning_rate": 0.0001990266135722709, "loss": 1.1616, "step": 1766 }, { "epoch": 0.0453716051938548, "grad_norm": 0.875, "learning_rate": 0.00019902599211540518, "loss": 1.2809, "step": 1767 }, { "epoch": 0.045397282389776625, "grad_norm": 0.94140625, "learning_rate": 0.00019902537046118964, "loss": 1.4541, "step": 1768 }, { "epoch": 0.04542295958569845, "grad_norm": 0.9609375, "learning_rate": 0.00019902474860962544, "loss": 1.3831, "step": 1769 }, { "epoch": 0.04544863678162026, "grad_norm": 0.96484375, "learning_rate": 0.00019902412656071393, "loss": 1.3633, "step": 1770 }, { "epoch": 0.045474313977542084, "grad_norm": 0.90234375, "learning_rate": 0.00019902350431445624, "loss": 1.2061, "step": 1771 }, { "epoch": 0.0454999911734639, "grad_norm": 0.9140625, "learning_rate": 0.00019902288187085364, "loss": 1.3202, "step": 1772 }, { "epoch": 0.04552566836938572, "grad_norm": 0.91015625, "learning_rate": 0.0001990222592299074, "loss": 1.4322, "step": 1773 }, { "epoch": 0.045551345565307544, "grad_norm": 0.875, "learning_rate": 0.00019902163639161876, "loss": 1.2942, "step": 1774 }, { "epoch": 0.04557702276122936, "grad_norm": 0.953125, "learning_rate": 0.00019902101335598894, "loss": 1.5953, "step": 1775 }, { "epoch": 0.04560269995715118, "grad_norm": 0.94921875, "learning_rate": 0.00019902039012301915, "loss": 1.2818, "step": 1776 }, { "epoch": 0.045628377153072996, "grad_norm": 0.94921875, "learning_rate": 0.00019901976669271066, "loss": 1.4459, "step": 1777 }, { "epoch": 0.04565405434899482, "grad_norm": 0.94140625, "learning_rate": 0.00019901914306506475, "loss": 1.4129, "step": 1778 }, { "epoch": 0.04567973154491664, "grad_norm": 2.640625, "learning_rate": 0.00019901851924008262, "loss": 1.2153, "step": 1779 }, { "epoch": 0.045705408740838456, "grad_norm": 0.9375, "learning_rate": 0.00019901789521776554, "loss": 1.3318, "step": 1780 }, { "epoch": 0.04573108593676028, "grad_norm": 0.85546875, "learning_rate": 0.00019901727099811475, "loss": 1.3562, "step": 1781 }, { "epoch": 0.04575676313268209, "grad_norm": 0.8359375, "learning_rate": 0.00019901664658113146, "loss": 1.2037, "step": 1782 }, { "epoch": 0.045782440328603916, "grad_norm": 0.9296875, "learning_rate": 0.00019901602196681695, "loss": 1.2838, "step": 1783 }, { "epoch": 0.04580811752452573, "grad_norm": 0.99609375, "learning_rate": 0.00019901539715517244, "loss": 1.3768, "step": 1784 }, { "epoch": 0.04583379472044755, "grad_norm": 0.97265625, "learning_rate": 0.00019901477214619918, "loss": 1.5139, "step": 1785 }, { "epoch": 0.045859471916369375, "grad_norm": 0.8125, "learning_rate": 0.00019901414693989844, "loss": 1.325, "step": 1786 }, { "epoch": 0.04588514911229119, "grad_norm": 0.91796875, "learning_rate": 0.00019901352153627145, "loss": 1.3502, "step": 1787 }, { "epoch": 0.04591082630821301, "grad_norm": 0.83203125, "learning_rate": 0.00019901289593531943, "loss": 1.3468, "step": 1788 }, { "epoch": 0.04593650350413483, "grad_norm": 0.8671875, "learning_rate": 0.0001990122701370437, "loss": 1.3555, "step": 1789 }, { "epoch": 0.04596218070005665, "grad_norm": 0.91015625, "learning_rate": 0.00019901164414144542, "loss": 1.4775, "step": 1790 }, { "epoch": 0.04598785789597847, "grad_norm": 0.99609375, "learning_rate": 0.00019901101794852588, "loss": 1.4308, "step": 1791 }, { "epoch": 0.04601353509190029, "grad_norm": 0.94921875, "learning_rate": 0.00019901039155828634, "loss": 1.0866, "step": 1792 }, { "epoch": 0.04603921228782211, "grad_norm": 1.0078125, "learning_rate": 0.000199009764970728, "loss": 1.4743, "step": 1793 }, { "epoch": 0.046064889483743925, "grad_norm": 0.9765625, "learning_rate": 0.00019900913818585218, "loss": 1.2047, "step": 1794 }, { "epoch": 0.04609056667966575, "grad_norm": 0.96875, "learning_rate": 0.00019900851120366008, "loss": 1.4747, "step": 1795 }, { "epoch": 0.04611624387558757, "grad_norm": 0.87109375, "learning_rate": 0.00019900788402415297, "loss": 1.235, "step": 1796 }, { "epoch": 0.046141921071509384, "grad_norm": 0.98828125, "learning_rate": 0.0001990072566473321, "loss": 1.4638, "step": 1797 }, { "epoch": 0.046167598267431206, "grad_norm": 1.0078125, "learning_rate": 0.00019900662907319867, "loss": 1.3177, "step": 1798 }, { "epoch": 0.04619327546335302, "grad_norm": 0.99609375, "learning_rate": 0.00019900600130175398, "loss": 1.5346, "step": 1799 }, { "epoch": 0.046218952659274844, "grad_norm": 0.84765625, "learning_rate": 0.0001990053733329993, "loss": 1.3965, "step": 1800 }, { "epoch": 0.046244629855196666, "grad_norm": 1.0390625, "learning_rate": 0.00019900474516693583, "loss": 1.4277, "step": 1801 }, { "epoch": 0.04627030705111848, "grad_norm": 0.87890625, "learning_rate": 0.00019900411680356486, "loss": 1.3192, "step": 1802 }, { "epoch": 0.0462959842470403, "grad_norm": 0.89453125, "learning_rate": 0.0001990034882428876, "loss": 1.4739, "step": 1803 }, { "epoch": 0.04632166144296212, "grad_norm": 1.0625, "learning_rate": 0.00019900285948490538, "loss": 1.4226, "step": 1804 }, { "epoch": 0.04634733863888394, "grad_norm": 1.0078125, "learning_rate": 0.00019900223052961936, "loss": 1.4224, "step": 1805 }, { "epoch": 0.04637301583480576, "grad_norm": 0.9296875, "learning_rate": 0.00019900160137703088, "loss": 1.2938, "step": 1806 }, { "epoch": 0.04639869303072758, "grad_norm": 0.8984375, "learning_rate": 0.0001990009720271411, "loss": 1.2583, "step": 1807 }, { "epoch": 0.0464243702266494, "grad_norm": 0.93359375, "learning_rate": 0.00019900034247995137, "loss": 1.258, "step": 1808 }, { "epoch": 0.046450047422571215, "grad_norm": 0.8828125, "learning_rate": 0.00019899971273546288, "loss": 1.2772, "step": 1809 }, { "epoch": 0.04647572461849304, "grad_norm": 1.0234375, "learning_rate": 0.00019899908279367689, "loss": 1.3402, "step": 1810 }, { "epoch": 0.04650140181441486, "grad_norm": 1.015625, "learning_rate": 0.0001989984526545947, "loss": 1.4393, "step": 1811 }, { "epoch": 0.046527079010336675, "grad_norm": 0.875, "learning_rate": 0.00019899782231821753, "loss": 1.2326, "step": 1812 }, { "epoch": 0.0465527562062585, "grad_norm": 0.94140625, "learning_rate": 0.00019899719178454662, "loss": 1.2685, "step": 1813 }, { "epoch": 0.04657843340218031, "grad_norm": 1.015625, "learning_rate": 0.0001989965610535833, "loss": 1.4393, "step": 1814 }, { "epoch": 0.046604110598102134, "grad_norm": 0.9296875, "learning_rate": 0.00019899593012532871, "loss": 1.2085, "step": 1815 }, { "epoch": 0.04662978779402396, "grad_norm": 0.9296875, "learning_rate": 0.00019899529899978423, "loss": 1.2921, "step": 1816 }, { "epoch": 0.04665546498994577, "grad_norm": 1.484375, "learning_rate": 0.000198994667676951, "loss": 1.4882, "step": 1817 }, { "epoch": 0.046681142185867594, "grad_norm": 1.0703125, "learning_rate": 0.00019899403615683038, "loss": 1.463, "step": 1818 }, { "epoch": 0.04670681938178941, "grad_norm": 0.89453125, "learning_rate": 0.00019899340443942359, "loss": 1.3184, "step": 1819 }, { "epoch": 0.04673249657771123, "grad_norm": 0.9453125, "learning_rate": 0.0001989927725247319, "loss": 1.3515, "step": 1820 }, { "epoch": 0.046758173773633054, "grad_norm": 0.96875, "learning_rate": 0.0001989921404127565, "loss": 1.2751, "step": 1821 }, { "epoch": 0.04678385096955487, "grad_norm": 0.94140625, "learning_rate": 0.00019899150810349876, "loss": 1.361, "step": 1822 }, { "epoch": 0.04680952816547669, "grad_norm": 0.9296875, "learning_rate": 0.0001989908755969599, "loss": 1.3211, "step": 1823 }, { "epoch": 0.046835205361398506, "grad_norm": 0.9375, "learning_rate": 0.0001989902428931411, "loss": 1.3466, "step": 1824 }, { "epoch": 0.04686088255732033, "grad_norm": 1.03125, "learning_rate": 0.00019898960999204372, "loss": 1.2442, "step": 1825 }, { "epoch": 0.04688655975324215, "grad_norm": 1.015625, "learning_rate": 0.00019898897689366903, "loss": 1.433, "step": 1826 }, { "epoch": 0.046912236949163966, "grad_norm": 0.953125, "learning_rate": 0.0001989883435980182, "loss": 1.4258, "step": 1827 }, { "epoch": 0.04693791414508579, "grad_norm": 1.0078125, "learning_rate": 0.00019898771010509254, "loss": 1.2149, "step": 1828 }, { "epoch": 0.0469635913410076, "grad_norm": 0.93359375, "learning_rate": 0.00019898707641489334, "loss": 1.3916, "step": 1829 }, { "epoch": 0.046989268536929425, "grad_norm": 1.0078125, "learning_rate": 0.00019898644252742183, "loss": 1.2549, "step": 1830 }, { "epoch": 0.04701494573285125, "grad_norm": 0.92578125, "learning_rate": 0.00019898580844267928, "loss": 1.1896, "step": 1831 }, { "epoch": 0.04704062292877306, "grad_norm": 0.96875, "learning_rate": 0.00019898517416066695, "loss": 1.4658, "step": 1832 }, { "epoch": 0.047066300124694885, "grad_norm": 0.92578125, "learning_rate": 0.00019898453968138612, "loss": 1.2441, "step": 1833 }, { "epoch": 0.0470919773206167, "grad_norm": 0.97265625, "learning_rate": 0.00019898390500483807, "loss": 1.3928, "step": 1834 }, { "epoch": 0.04711765451653852, "grad_norm": 0.92578125, "learning_rate": 0.00019898327013102398, "loss": 1.3401, "step": 1835 }, { "epoch": 0.04714333171246034, "grad_norm": 1.0078125, "learning_rate": 0.00019898263505994522, "loss": 1.4061, "step": 1836 }, { "epoch": 0.04716900890838216, "grad_norm": 0.94140625, "learning_rate": 0.00019898199979160298, "loss": 1.5672, "step": 1837 }, { "epoch": 0.04719468610430398, "grad_norm": 0.83203125, "learning_rate": 0.00019898136432599855, "loss": 1.1383, "step": 1838 }, { "epoch": 0.0472203633002258, "grad_norm": 0.91796875, "learning_rate": 0.00019898072866313325, "loss": 1.2283, "step": 1839 }, { "epoch": 0.04724604049614762, "grad_norm": 0.8671875, "learning_rate": 0.00019898009280300825, "loss": 1.221, "step": 1840 }, { "epoch": 0.047271717692069434, "grad_norm": 0.97265625, "learning_rate": 0.0001989794567456249, "loss": 1.3311, "step": 1841 }, { "epoch": 0.047297394887991256, "grad_norm": 0.9296875, "learning_rate": 0.0001989788204909844, "loss": 1.4348, "step": 1842 }, { "epoch": 0.04732307208391308, "grad_norm": 0.87890625, "learning_rate": 0.00019897818403908805, "loss": 1.407, "step": 1843 }, { "epoch": 0.047348749279834894, "grad_norm": 0.87890625, "learning_rate": 0.00019897754738993715, "loss": 1.3012, "step": 1844 }, { "epoch": 0.047374426475756716, "grad_norm": 0.9453125, "learning_rate": 0.00019897691054353295, "loss": 1.1661, "step": 1845 }, { "epoch": 0.04740010367167853, "grad_norm": 0.9296875, "learning_rate": 0.00019897627349987668, "loss": 1.5119, "step": 1846 }, { "epoch": 0.04742578086760035, "grad_norm": 0.8359375, "learning_rate": 0.00019897563625896964, "loss": 1.2502, "step": 1847 }, { "epoch": 0.047451458063522176, "grad_norm": 0.87109375, "learning_rate": 0.00019897499882081307, "loss": 1.3346, "step": 1848 }, { "epoch": 0.04747713525944399, "grad_norm": 0.99609375, "learning_rate": 0.0001989743611854083, "loss": 1.3553, "step": 1849 }, { "epoch": 0.04750281245536581, "grad_norm": 0.90625, "learning_rate": 0.00019897372335275657, "loss": 1.4586, "step": 1850 }, { "epoch": 0.04752848965128763, "grad_norm": 0.91796875, "learning_rate": 0.00019897308532285915, "loss": 1.3451, "step": 1851 }, { "epoch": 0.04755416684720945, "grad_norm": 0.9140625, "learning_rate": 0.0001989724470957173, "loss": 1.4275, "step": 1852 }, { "epoch": 0.04757984404313127, "grad_norm": 0.9453125, "learning_rate": 0.0001989718086713323, "loss": 1.3249, "step": 1853 }, { "epoch": 0.04760552123905309, "grad_norm": 0.8828125, "learning_rate": 0.00019897117004970545, "loss": 1.446, "step": 1854 }, { "epoch": 0.04763119843497491, "grad_norm": 0.9453125, "learning_rate": 0.00019897053123083797, "loss": 1.2704, "step": 1855 }, { "epoch": 0.047656875630896725, "grad_norm": 0.9453125, "learning_rate": 0.00019896989221473117, "loss": 1.328, "step": 1856 }, { "epoch": 0.04768255282681855, "grad_norm": 1.0, "learning_rate": 0.00019896925300138635, "loss": 1.3396, "step": 1857 }, { "epoch": 0.04770823002274037, "grad_norm": 0.91015625, "learning_rate": 0.0001989686135908047, "loss": 1.2413, "step": 1858 }, { "epoch": 0.047733907218662185, "grad_norm": 0.8828125, "learning_rate": 0.00019896797398298753, "loss": 1.3542, "step": 1859 }, { "epoch": 0.04775958441458401, "grad_norm": 0.87890625, "learning_rate": 0.00019896733417793617, "loss": 1.2678, "step": 1860 }, { "epoch": 0.04778526161050582, "grad_norm": 0.92578125, "learning_rate": 0.00019896669417565185, "loss": 1.4454, "step": 1861 }, { "epoch": 0.047810938806427644, "grad_norm": 0.84375, "learning_rate": 0.00019896605397613584, "loss": 1.159, "step": 1862 }, { "epoch": 0.047836616002349466, "grad_norm": 0.95703125, "learning_rate": 0.00019896541357938943, "loss": 1.3332, "step": 1863 }, { "epoch": 0.04786229319827128, "grad_norm": 0.87890625, "learning_rate": 0.00019896477298541386, "loss": 1.3755, "step": 1864 }, { "epoch": 0.047887970394193104, "grad_norm": 0.93359375, "learning_rate": 0.00019896413219421048, "loss": 1.3697, "step": 1865 }, { "epoch": 0.04791364759011492, "grad_norm": 0.9375, "learning_rate": 0.0001989634912057805, "loss": 1.2226, "step": 1866 }, { "epoch": 0.04793932478603674, "grad_norm": 0.8828125, "learning_rate": 0.00019896285002012523, "loss": 1.2025, "step": 1867 }, { "epoch": 0.04796500198195856, "grad_norm": 0.83203125, "learning_rate": 0.00019896220863724592, "loss": 1.2845, "step": 1868 }, { "epoch": 0.04799067917788038, "grad_norm": 0.97265625, "learning_rate": 0.00019896156705714388, "loss": 1.1383, "step": 1869 }, { "epoch": 0.0480163563738022, "grad_norm": 0.90234375, "learning_rate": 0.0001989609252798204, "loss": 1.1869, "step": 1870 }, { "epoch": 0.048042033569724016, "grad_norm": 0.96484375, "learning_rate": 0.0001989602833052767, "loss": 1.3871, "step": 1871 }, { "epoch": 0.04806771076564584, "grad_norm": 1.1328125, "learning_rate": 0.0001989596411335141, "loss": 1.2186, "step": 1872 }, { "epoch": 0.04809338796156766, "grad_norm": 0.8515625, "learning_rate": 0.0001989589987645339, "loss": 1.1982, "step": 1873 }, { "epoch": 0.048119065157489475, "grad_norm": 0.90625, "learning_rate": 0.00019895835619833732, "loss": 1.3458, "step": 1874 }, { "epoch": 0.0481447423534113, "grad_norm": 0.94140625, "learning_rate": 0.00019895771343492568, "loss": 1.3958, "step": 1875 }, { "epoch": 0.04817041954933311, "grad_norm": 0.8046875, "learning_rate": 0.0001989570704743003, "loss": 1.1922, "step": 1876 }, { "epoch": 0.048196096745254935, "grad_norm": 0.87109375, "learning_rate": 0.00019895642731646235, "loss": 1.3391, "step": 1877 }, { "epoch": 0.04822177394117676, "grad_norm": 0.89453125, "learning_rate": 0.0001989557839614132, "loss": 1.3069, "step": 1878 }, { "epoch": 0.04824745113709857, "grad_norm": 0.94140625, "learning_rate": 0.00019895514040915412, "loss": 1.388, "step": 1879 }, { "epoch": 0.048273128333020394, "grad_norm": 0.96484375, "learning_rate": 0.0001989544966596864, "loss": 1.3724, "step": 1880 }, { "epoch": 0.04829880552894221, "grad_norm": 0.98828125, "learning_rate": 0.0001989538527130113, "loss": 1.4909, "step": 1881 }, { "epoch": 0.04832448272486403, "grad_norm": 0.953125, "learning_rate": 0.00019895320856913012, "loss": 1.2209, "step": 1882 }, { "epoch": 0.048350159920785854, "grad_norm": 0.9765625, "learning_rate": 0.0001989525642280441, "loss": 1.4584, "step": 1883 }, { "epoch": 0.04837583711670767, "grad_norm": 0.9140625, "learning_rate": 0.00019895191968975458, "loss": 1.2348, "step": 1884 }, { "epoch": 0.04840151431262949, "grad_norm": 0.91796875, "learning_rate": 0.00019895127495426284, "loss": 1.3479, "step": 1885 }, { "epoch": 0.04842719150855131, "grad_norm": 0.94140625, "learning_rate": 0.0001989506300215701, "loss": 1.2382, "step": 1886 }, { "epoch": 0.04845286870447313, "grad_norm": 0.87890625, "learning_rate": 0.00019894998489167773, "loss": 1.232, "step": 1887 }, { "epoch": 0.048478545900394944, "grad_norm": 0.84765625, "learning_rate": 0.00019894933956458695, "loss": 1.3108, "step": 1888 }, { "epoch": 0.048504223096316766, "grad_norm": 0.93359375, "learning_rate": 0.0001989486940402991, "loss": 1.3861, "step": 1889 }, { "epoch": 0.04852990029223859, "grad_norm": 0.92578125, "learning_rate": 0.00019894804831881543, "loss": 1.3553, "step": 1890 }, { "epoch": 0.048555577488160404, "grad_norm": 0.91796875, "learning_rate": 0.00019894740240013726, "loss": 1.2326, "step": 1891 }, { "epoch": 0.048581254684082226, "grad_norm": 0.9921875, "learning_rate": 0.00019894675628426582, "loss": 1.4462, "step": 1892 }, { "epoch": 0.04860693188000404, "grad_norm": 0.88671875, "learning_rate": 0.00019894610997120245, "loss": 1.2508, "step": 1893 }, { "epoch": 0.04863260907592586, "grad_norm": 0.890625, "learning_rate": 0.0001989454634609484, "loss": 1.3946, "step": 1894 }, { "epoch": 0.048658286271847685, "grad_norm": 0.90234375, "learning_rate": 0.000198944816753505, "loss": 1.5199, "step": 1895 }, { "epoch": 0.0486839634677695, "grad_norm": 0.9375, "learning_rate": 0.00019894416984887352, "loss": 1.3076, "step": 1896 }, { "epoch": 0.04870964066369132, "grad_norm": 0.92578125, "learning_rate": 0.00019894352274705523, "loss": 1.2816, "step": 1897 }, { "epoch": 0.04873531785961314, "grad_norm": 0.9453125, "learning_rate": 0.00019894287544805145, "loss": 1.2654, "step": 1898 }, { "epoch": 0.04876099505553496, "grad_norm": 1.03125, "learning_rate": 0.00019894222795186346, "loss": 1.4038, "step": 1899 }, { "epoch": 0.04878667225145678, "grad_norm": 0.92578125, "learning_rate": 0.00019894158025849255, "loss": 1.3736, "step": 1900 }, { "epoch": 0.0488123494473786, "grad_norm": 0.97265625, "learning_rate": 0.00019894093236794, "loss": 1.4918, "step": 1901 }, { "epoch": 0.04883802664330042, "grad_norm": 0.875, "learning_rate": 0.0001989402842802071, "loss": 1.2634, "step": 1902 }, { "epoch": 0.048863703839222235, "grad_norm": 1.1171875, "learning_rate": 0.00019893963599529517, "loss": 1.4167, "step": 1903 }, { "epoch": 0.04888938103514406, "grad_norm": 0.89453125, "learning_rate": 0.00019893898751320544, "loss": 1.2536, "step": 1904 }, { "epoch": 0.04891505823106588, "grad_norm": 0.8984375, "learning_rate": 0.0001989383388339393, "loss": 1.2672, "step": 1905 }, { "epoch": 0.048940735426987694, "grad_norm": 0.9140625, "learning_rate": 0.00019893768995749795, "loss": 1.3471, "step": 1906 }, { "epoch": 0.048966412622909516, "grad_norm": 0.8671875, "learning_rate": 0.00019893704088388273, "loss": 1.4547, "step": 1907 }, { "epoch": 0.04899208981883133, "grad_norm": 0.93359375, "learning_rate": 0.00019893639161309493, "loss": 1.3532, "step": 1908 }, { "epoch": 0.049017767014753154, "grad_norm": 0.90234375, "learning_rate": 0.00019893574214513584, "loss": 1.3043, "step": 1909 }, { "epoch": 0.049043444210674976, "grad_norm": 0.84375, "learning_rate": 0.00019893509248000677, "loss": 1.0792, "step": 1910 }, { "epoch": 0.04906912140659679, "grad_norm": 0.9296875, "learning_rate": 0.00019893444261770898, "loss": 1.0859, "step": 1911 }, { "epoch": 0.04909479860251861, "grad_norm": 0.9765625, "learning_rate": 0.0001989337925582438, "loss": 1.4225, "step": 1912 }, { "epoch": 0.04912047579844043, "grad_norm": 1.0, "learning_rate": 0.00019893314230161246, "loss": 1.2433, "step": 1913 }, { "epoch": 0.04914615299436225, "grad_norm": 0.94921875, "learning_rate": 0.00019893249184781633, "loss": 1.3691, "step": 1914 }, { "epoch": 0.04917183019028407, "grad_norm": 0.94921875, "learning_rate": 0.00019893184119685667, "loss": 1.3235, "step": 1915 }, { "epoch": 0.04919750738620589, "grad_norm": 0.85546875, "learning_rate": 0.00019893119034873483, "loss": 1.1618, "step": 1916 }, { "epoch": 0.04922318458212771, "grad_norm": 0.88671875, "learning_rate": 0.00019893053930345202, "loss": 1.2079, "step": 1917 }, { "epoch": 0.049248861778049526, "grad_norm": 0.97265625, "learning_rate": 0.00019892988806100958, "loss": 1.2573, "step": 1918 }, { "epoch": 0.04927453897397135, "grad_norm": 0.96484375, "learning_rate": 0.00019892923662140883, "loss": 1.2765, "step": 1919 }, { "epoch": 0.04930021616989317, "grad_norm": 0.9453125, "learning_rate": 0.00019892858498465107, "loss": 1.3863, "step": 1920 }, { "epoch": 0.049325893365814985, "grad_norm": 1.0625, "learning_rate": 0.0001989279331507375, "loss": 1.3919, "step": 1921 }, { "epoch": 0.04935157056173681, "grad_norm": 0.85546875, "learning_rate": 0.0001989272811196696, "loss": 1.3087, "step": 1922 }, { "epoch": 0.04937724775765862, "grad_norm": 0.98046875, "learning_rate": 0.00019892662889144848, "loss": 1.2897, "step": 1923 }, { "epoch": 0.049402924953580445, "grad_norm": 0.86328125, "learning_rate": 0.00019892597646607556, "loss": 1.2133, "step": 1924 }, { "epoch": 0.04942860214950227, "grad_norm": 0.85546875, "learning_rate": 0.0001989253238435521, "loss": 1.3948, "step": 1925 }, { "epoch": 0.04945427934542408, "grad_norm": 0.90625, "learning_rate": 0.00019892467102387943, "loss": 1.4706, "step": 1926 }, { "epoch": 0.049479956541345904, "grad_norm": 0.89453125, "learning_rate": 0.00019892401800705877, "loss": 1.2848, "step": 1927 }, { "epoch": 0.04950563373726772, "grad_norm": 0.92578125, "learning_rate": 0.00019892336479309153, "loss": 1.2711, "step": 1928 }, { "epoch": 0.04953131093318954, "grad_norm": 0.8671875, "learning_rate": 0.00019892271138197894, "loss": 1.1948, "step": 1929 }, { "epoch": 0.049556988129111364, "grad_norm": 0.8671875, "learning_rate": 0.0001989220577737223, "loss": 1.2033, "step": 1930 }, { "epoch": 0.04958266532503318, "grad_norm": 0.8984375, "learning_rate": 0.00019892140396832297, "loss": 1.4853, "step": 1931 }, { "epoch": 0.049608342520955, "grad_norm": 0.87890625, "learning_rate": 0.00019892074996578218, "loss": 1.4383, "step": 1932 }, { "epoch": 0.049634019716876816, "grad_norm": 0.83203125, "learning_rate": 0.00019892009576610132, "loss": 1.1994, "step": 1933 }, { "epoch": 0.04965969691279864, "grad_norm": 0.87890625, "learning_rate": 0.0001989194413692816, "loss": 1.3445, "step": 1934 }, { "epoch": 0.04968537410872046, "grad_norm": 0.93359375, "learning_rate": 0.00019891878677532438, "loss": 1.3417, "step": 1935 }, { "epoch": 0.049711051304642276, "grad_norm": 0.90234375, "learning_rate": 0.00019891813198423094, "loss": 1.2185, "step": 1936 }, { "epoch": 0.0497367285005641, "grad_norm": 0.95703125, "learning_rate": 0.00019891747699600263, "loss": 1.3291, "step": 1937 }, { "epoch": 0.04976240569648591, "grad_norm": 0.90234375, "learning_rate": 0.00019891682181064072, "loss": 1.2572, "step": 1938 }, { "epoch": 0.049788082892407735, "grad_norm": 0.97265625, "learning_rate": 0.0001989161664281465, "loss": 1.2999, "step": 1939 }, { "epoch": 0.04981376008832955, "grad_norm": 0.90234375, "learning_rate": 0.0001989155108485213, "loss": 1.1443, "step": 1940 }, { "epoch": 0.04983943728425137, "grad_norm": 0.87890625, "learning_rate": 0.00019891485507176647, "loss": 1.3962, "step": 1941 }, { "epoch": 0.049865114480173195, "grad_norm": 0.9296875, "learning_rate": 0.0001989141990978832, "loss": 1.2528, "step": 1942 }, { "epoch": 0.04989079167609501, "grad_norm": 0.9453125, "learning_rate": 0.0001989135429268729, "loss": 1.3721, "step": 1943 }, { "epoch": 0.04991646887201683, "grad_norm": 0.98828125, "learning_rate": 0.00019891288655873683, "loss": 1.3572, "step": 1944 }, { "epoch": 0.04994214606793865, "grad_norm": 0.890625, "learning_rate": 0.0001989122299934763, "loss": 1.2663, "step": 1945 }, { "epoch": 0.04996782326386047, "grad_norm": 0.96484375, "learning_rate": 0.00019891157323109266, "loss": 1.3917, "step": 1946 }, { "epoch": 0.04999350045978229, "grad_norm": 0.86328125, "learning_rate": 0.0001989109162715872, "loss": 1.311, "step": 1947 }, { "epoch": 0.05001917765570411, "grad_norm": 0.921875, "learning_rate": 0.00019891025911496118, "loss": 1.2017, "step": 1948 }, { "epoch": 0.05004485485162593, "grad_norm": 0.90625, "learning_rate": 0.000198909601761216, "loss": 1.2428, "step": 1949 }, { "epoch": 0.050070532047547744, "grad_norm": 0.89453125, "learning_rate": 0.00019890894421035284, "loss": 1.3438, "step": 1950 }, { "epoch": 0.05009620924346957, "grad_norm": 0.98046875, "learning_rate": 0.00019890828646237314, "loss": 1.3809, "step": 1951 }, { "epoch": 0.05012188643939139, "grad_norm": 1.0390625, "learning_rate": 0.00019890762851727814, "loss": 1.3385, "step": 1952 }, { "epoch": 0.050147563635313204, "grad_norm": 0.91015625, "learning_rate": 0.00019890697037506917, "loss": 1.4331, "step": 1953 }, { "epoch": 0.050173240831235026, "grad_norm": 0.8828125, "learning_rate": 0.00019890631203574758, "loss": 1.2484, "step": 1954 }, { "epoch": 0.05019891802715684, "grad_norm": 0.92578125, "learning_rate": 0.0001989056534993146, "loss": 1.3241, "step": 1955 }, { "epoch": 0.050224595223078664, "grad_norm": 0.97265625, "learning_rate": 0.0001989049947657716, "loss": 1.3935, "step": 1956 }, { "epoch": 0.050250272419000486, "grad_norm": 0.9296875, "learning_rate": 0.0001989043358351199, "loss": 1.3358, "step": 1957 }, { "epoch": 0.0502759496149223, "grad_norm": 0.90625, "learning_rate": 0.00019890367670736078, "loss": 1.2177, "step": 1958 }, { "epoch": 0.05030162681084412, "grad_norm": 0.84375, "learning_rate": 0.00019890301738249554, "loss": 1.2815, "step": 1959 }, { "epoch": 0.05032730400676594, "grad_norm": 0.91796875, "learning_rate": 0.00019890235786052557, "loss": 1.2568, "step": 1960 }, { "epoch": 0.05035298120268776, "grad_norm": 0.8515625, "learning_rate": 0.0001989016981414521, "loss": 1.2639, "step": 1961 }, { "epoch": 0.05037865839860958, "grad_norm": 0.93359375, "learning_rate": 0.00019890103822527649, "loss": 1.2524, "step": 1962 }, { "epoch": 0.0504043355945314, "grad_norm": 0.95703125, "learning_rate": 0.00019890037811200003, "loss": 1.3949, "step": 1963 }, { "epoch": 0.05043001279045322, "grad_norm": 0.91015625, "learning_rate": 0.00019889971780162407, "loss": 1.3615, "step": 1964 }, { "epoch": 0.050455689986375035, "grad_norm": 0.94140625, "learning_rate": 0.00019889905729414991, "loss": 1.516, "step": 1965 }, { "epoch": 0.05048136718229686, "grad_norm": 0.9765625, "learning_rate": 0.00019889839658957884, "loss": 1.3248, "step": 1966 }, { "epoch": 0.05050704437821868, "grad_norm": 0.91015625, "learning_rate": 0.00019889773568791222, "loss": 1.4225, "step": 1967 }, { "epoch": 0.050532721574140495, "grad_norm": 0.9609375, "learning_rate": 0.00019889707458915133, "loss": 1.5194, "step": 1968 }, { "epoch": 0.05055839877006232, "grad_norm": 0.95703125, "learning_rate": 0.00019889641329329748, "loss": 1.4552, "step": 1969 }, { "epoch": 0.05058407596598413, "grad_norm": 0.92578125, "learning_rate": 0.00019889575180035205, "loss": 1.4266, "step": 1970 }, { "epoch": 0.050609753161905954, "grad_norm": 0.96875, "learning_rate": 0.0001988950901103163, "loss": 1.3355, "step": 1971 }, { "epoch": 0.050635430357827776, "grad_norm": 0.9140625, "learning_rate": 0.00019889442822319158, "loss": 1.4212, "step": 1972 }, { "epoch": 0.05066110755374959, "grad_norm": 0.90625, "learning_rate": 0.0001988937661389792, "loss": 1.369, "step": 1973 }, { "epoch": 0.050686784749671414, "grad_norm": 0.90234375, "learning_rate": 0.00019889310385768047, "loss": 1.3675, "step": 1974 }, { "epoch": 0.05071246194559323, "grad_norm": 0.8984375, "learning_rate": 0.00019889244137929674, "loss": 1.3959, "step": 1975 }, { "epoch": 0.05073813914151505, "grad_norm": 0.9609375, "learning_rate": 0.00019889177870382926, "loss": 1.2925, "step": 1976 }, { "epoch": 0.05076381633743687, "grad_norm": 0.87109375, "learning_rate": 0.00019889111583127944, "loss": 1.2357, "step": 1977 }, { "epoch": 0.05078949353335869, "grad_norm": 1.046875, "learning_rate": 0.00019889045276164855, "loss": 1.164, "step": 1978 }, { "epoch": 0.05081517072928051, "grad_norm": 0.9765625, "learning_rate": 0.0001988897894949379, "loss": 1.2161, "step": 1979 }, { "epoch": 0.050840847925202326, "grad_norm": 0.95703125, "learning_rate": 0.00019888912603114887, "loss": 1.4026, "step": 1980 }, { "epoch": 0.05086652512112415, "grad_norm": 0.87890625, "learning_rate": 0.00019888846237028272, "loss": 1.31, "step": 1981 }, { "epoch": 0.05089220231704597, "grad_norm": 0.9453125, "learning_rate": 0.00019888779851234077, "loss": 1.3797, "step": 1982 }, { "epoch": 0.050917879512967786, "grad_norm": 0.9296875, "learning_rate": 0.00019888713445732442, "loss": 1.3592, "step": 1983 }, { "epoch": 0.05094355670888961, "grad_norm": 0.9140625, "learning_rate": 0.00019888647020523492, "loss": 1.34, "step": 1984 }, { "epoch": 0.05096923390481142, "grad_norm": 0.89453125, "learning_rate": 0.00019888580575607362, "loss": 1.2588, "step": 1985 }, { "epoch": 0.050994911100733245, "grad_norm": 0.9765625, "learning_rate": 0.00019888514110984185, "loss": 1.2873, "step": 1986 }, { "epoch": 0.05102058829665507, "grad_norm": 0.88671875, "learning_rate": 0.0001988844762665409, "loss": 1.2102, "step": 1987 }, { "epoch": 0.05104626549257688, "grad_norm": 0.91015625, "learning_rate": 0.00019888381122617213, "loss": 1.402, "step": 1988 }, { "epoch": 0.051071942688498705, "grad_norm": 0.84765625, "learning_rate": 0.00019888314598873687, "loss": 1.2647, "step": 1989 }, { "epoch": 0.05109761988442052, "grad_norm": 0.98046875, "learning_rate": 0.00019888248055423643, "loss": 1.4236, "step": 1990 }, { "epoch": 0.05112329708034234, "grad_norm": 0.9375, "learning_rate": 0.00019888181492267216, "loss": 1.3762, "step": 1991 }, { "epoch": 0.05114897427626416, "grad_norm": 0.91796875, "learning_rate": 0.0001988811490940453, "loss": 1.431, "step": 1992 }, { "epoch": 0.05117465147218598, "grad_norm": 0.921875, "learning_rate": 0.00019888048306835728, "loss": 1.1737, "step": 1993 }, { "epoch": 0.0512003286681078, "grad_norm": 0.87890625, "learning_rate": 0.0001988798168456094, "loss": 1.2038, "step": 1994 }, { "epoch": 0.05122600586402962, "grad_norm": 0.95703125, "learning_rate": 0.00019887915042580295, "loss": 1.368, "step": 1995 }, { "epoch": 0.05125168305995144, "grad_norm": 0.88671875, "learning_rate": 0.00019887848380893935, "loss": 1.4637, "step": 1996 }, { "epoch": 0.051277360255873254, "grad_norm": 0.96484375, "learning_rate": 0.0001988778169950198, "loss": 1.3273, "step": 1997 }, { "epoch": 0.051303037451795076, "grad_norm": 0.94140625, "learning_rate": 0.0001988771499840457, "loss": 1.3009, "step": 1998 }, { "epoch": 0.0513287146477169, "grad_norm": 0.953125, "learning_rate": 0.0001988764827760184, "loss": 1.4057, "step": 1999 }, { "epoch": 0.051354391843638714, "grad_norm": 0.875, "learning_rate": 0.00019887581537093917, "loss": 1.35, "step": 2000 }, { "epoch": 0.051354391843638714, "eval_loss": 1.3078194856643677, "eval_model_preparation_time": 0.0065, "eval_runtime": 408.4975, "eval_samples_per_second": 24.48, "eval_steps_per_second": 0.766, "step": 2000 }, { "epoch": 0.051380069039560536, "grad_norm": 0.875, "learning_rate": 0.00019887514776880936, "loss": 1.0943, "step": 2001 }, { "epoch": 0.05140574623548235, "grad_norm": 0.9375, "learning_rate": 0.00019887447996963035, "loss": 1.3075, "step": 2002 }, { "epoch": 0.05143142343140417, "grad_norm": 0.8984375, "learning_rate": 0.0001988738119734034, "loss": 1.4501, "step": 2003 }, { "epoch": 0.051457100627325995, "grad_norm": 0.94140625, "learning_rate": 0.00019887314378012988, "loss": 1.3165, "step": 2004 }, { "epoch": 0.05148277782324781, "grad_norm": 0.98828125, "learning_rate": 0.00019887247538981115, "loss": 1.38, "step": 2005 }, { "epoch": 0.05150845501916963, "grad_norm": 0.91796875, "learning_rate": 0.00019887180680244849, "loss": 1.3203, "step": 2006 }, { "epoch": 0.05153413221509145, "grad_norm": 0.87109375, "learning_rate": 0.00019887113801804324, "loss": 1.2588, "step": 2007 }, { "epoch": 0.05155980941101327, "grad_norm": 0.91015625, "learning_rate": 0.00019887046903659675, "loss": 1.4877, "step": 2008 }, { "epoch": 0.05158548660693509, "grad_norm": 1.0078125, "learning_rate": 0.00019886979985811034, "loss": 1.4762, "step": 2009 }, { "epoch": 0.05161116380285691, "grad_norm": 0.9375, "learning_rate": 0.00019886913048258533, "loss": 1.3109, "step": 2010 }, { "epoch": 0.05163684099877873, "grad_norm": 0.8984375, "learning_rate": 0.00019886846091002311, "loss": 1.0714, "step": 2011 }, { "epoch": 0.051662518194700545, "grad_norm": 0.890625, "learning_rate": 0.00019886779114042496, "loss": 1.2042, "step": 2012 }, { "epoch": 0.05168819539062237, "grad_norm": 0.9296875, "learning_rate": 0.00019886712117379225, "loss": 1.4327, "step": 2013 }, { "epoch": 0.05171387258654419, "grad_norm": 0.890625, "learning_rate": 0.00019886645101012626, "loss": 1.2262, "step": 2014 }, { "epoch": 0.051739549782466004, "grad_norm": 0.953125, "learning_rate": 0.00019886578064942843, "loss": 1.3222, "step": 2015 }, { "epoch": 0.05176522697838783, "grad_norm": 0.8984375, "learning_rate": 0.00019886511009169997, "loss": 1.2282, "step": 2016 }, { "epoch": 0.05179090417430964, "grad_norm": 0.9296875, "learning_rate": 0.00019886443933694227, "loss": 1.1666, "step": 2017 }, { "epoch": 0.051816581370231464, "grad_norm": 0.921875, "learning_rate": 0.00019886376838515671, "loss": 1.4997, "step": 2018 }, { "epoch": 0.051842258566153286, "grad_norm": 0.8984375, "learning_rate": 0.0001988630972363446, "loss": 1.3359, "step": 2019 }, { "epoch": 0.0518679357620751, "grad_norm": 0.8984375, "learning_rate": 0.00019886242589050722, "loss": 1.2791, "step": 2020 }, { "epoch": 0.051893612957996924, "grad_norm": 0.89453125, "learning_rate": 0.000198861754347646, "loss": 1.2563, "step": 2021 }, { "epoch": 0.05191929015391874, "grad_norm": 0.91015625, "learning_rate": 0.0001988610826077622, "loss": 1.3144, "step": 2022 }, { "epoch": 0.05194496734984056, "grad_norm": 0.90625, "learning_rate": 0.00019886041067085721, "loss": 1.2859, "step": 2023 }, { "epoch": 0.05197064454576238, "grad_norm": 0.8671875, "learning_rate": 0.00019885973853693236, "loss": 1.1412, "step": 2024 }, { "epoch": 0.0519963217416842, "grad_norm": 1.0078125, "learning_rate": 0.00019885906620598895, "loss": 1.0788, "step": 2025 }, { "epoch": 0.05202199893760602, "grad_norm": 1.03125, "learning_rate": 0.00019885839367802838, "loss": 1.3189, "step": 2026 }, { "epoch": 0.052047676133527836, "grad_norm": 0.94921875, "learning_rate": 0.00019885772095305196, "loss": 1.3785, "step": 2027 }, { "epoch": 0.05207335332944966, "grad_norm": 0.875, "learning_rate": 0.00019885704803106102, "loss": 1.301, "step": 2028 }, { "epoch": 0.05209903052537148, "grad_norm": 0.92578125, "learning_rate": 0.0001988563749120569, "loss": 1.3255, "step": 2029 }, { "epoch": 0.052124707721293295, "grad_norm": 0.93359375, "learning_rate": 0.00019885570159604095, "loss": 1.2634, "step": 2030 }, { "epoch": 0.05215038491721512, "grad_norm": 0.94140625, "learning_rate": 0.00019885502808301457, "loss": 1.3346, "step": 2031 }, { "epoch": 0.05217606211313693, "grad_norm": 0.85546875, "learning_rate": 0.000198854354372979, "loss": 1.2621, "step": 2032 }, { "epoch": 0.052201739309058755, "grad_norm": 0.87890625, "learning_rate": 0.00019885368046593566, "loss": 1.2375, "step": 2033 }, { "epoch": 0.05222741650498058, "grad_norm": 0.91796875, "learning_rate": 0.00019885300636188587, "loss": 1.2985, "step": 2034 }, { "epoch": 0.05225309370090239, "grad_norm": 0.90234375, "learning_rate": 0.00019885233206083094, "loss": 1.3735, "step": 2035 }, { "epoch": 0.052278770896824214, "grad_norm": 0.96875, "learning_rate": 0.00019885165756277224, "loss": 1.3123, "step": 2036 }, { "epoch": 0.05230444809274603, "grad_norm": 0.984375, "learning_rate": 0.00019885098286771114, "loss": 1.2665, "step": 2037 }, { "epoch": 0.05233012528866785, "grad_norm": 0.9140625, "learning_rate": 0.00019885030797564893, "loss": 1.1628, "step": 2038 }, { "epoch": 0.052355802484589674, "grad_norm": 0.9609375, "learning_rate": 0.00019884963288658702, "loss": 1.2593, "step": 2039 }, { "epoch": 0.05238147968051149, "grad_norm": 0.98046875, "learning_rate": 0.00019884895760052668, "loss": 1.306, "step": 2040 }, { "epoch": 0.05240715687643331, "grad_norm": 0.95703125, "learning_rate": 0.00019884828211746934, "loss": 1.5319, "step": 2041 }, { "epoch": 0.052432834072355126, "grad_norm": 0.9609375, "learning_rate": 0.00019884760643741628, "loss": 1.4409, "step": 2042 }, { "epoch": 0.05245851126827695, "grad_norm": 0.9375, "learning_rate": 0.00019884693056036886, "loss": 1.1899, "step": 2043 }, { "epoch": 0.052484188464198764, "grad_norm": 1.015625, "learning_rate": 0.00019884625448632844, "loss": 1.1932, "step": 2044 }, { "epoch": 0.052509865660120586, "grad_norm": 0.921875, "learning_rate": 0.0001988455782152964, "loss": 1.0321, "step": 2045 }, { "epoch": 0.05253554285604241, "grad_norm": 0.87109375, "learning_rate": 0.000198844901747274, "loss": 1.2805, "step": 2046 }, { "epoch": 0.05256122005196422, "grad_norm": 1.0078125, "learning_rate": 0.00019884422508226267, "loss": 1.1858, "step": 2047 }, { "epoch": 0.052586897247886046, "grad_norm": 0.921875, "learning_rate": 0.0001988435482202637, "loss": 1.3124, "step": 2048 }, { "epoch": 0.05261257444380786, "grad_norm": 1.0625, "learning_rate": 0.00019884287116127852, "loss": 1.3196, "step": 2049 }, { "epoch": 0.05263825163972968, "grad_norm": 0.90234375, "learning_rate": 0.00019884219390530836, "loss": 1.3815, "step": 2050 }, { "epoch": 0.052663928835651505, "grad_norm": 0.98046875, "learning_rate": 0.00019884151645235468, "loss": 1.2959, "step": 2051 }, { "epoch": 0.05268960603157332, "grad_norm": 0.875, "learning_rate": 0.00019884083880241876, "loss": 1.3616, "step": 2052 }, { "epoch": 0.05271528322749514, "grad_norm": 0.9453125, "learning_rate": 0.000198840160955502, "loss": 1.3333, "step": 2053 }, { "epoch": 0.05274096042341696, "grad_norm": 0.8828125, "learning_rate": 0.00019883948291160572, "loss": 1.3365, "step": 2054 }, { "epoch": 0.05276663761933878, "grad_norm": 0.953125, "learning_rate": 0.0001988388046707313, "loss": 1.4535, "step": 2055 }, { "epoch": 0.0527923148152606, "grad_norm": 0.91796875, "learning_rate": 0.00019883812623288003, "loss": 1.3846, "step": 2056 }, { "epoch": 0.05281799201118242, "grad_norm": 0.91796875, "learning_rate": 0.00019883744759805331, "loss": 1.2507, "step": 2057 }, { "epoch": 0.05284366920710424, "grad_norm": 0.9375, "learning_rate": 0.0001988367687662525, "loss": 1.4706, "step": 2058 }, { "epoch": 0.052869346403026055, "grad_norm": 0.98828125, "learning_rate": 0.00019883608973747893, "loss": 1.4118, "step": 2059 }, { "epoch": 0.05289502359894788, "grad_norm": 0.94921875, "learning_rate": 0.00019883541051173394, "loss": 1.3776, "step": 2060 }, { "epoch": 0.0529207007948697, "grad_norm": 0.9453125, "learning_rate": 0.00019883473108901897, "loss": 1.2741, "step": 2061 }, { "epoch": 0.052946377990791514, "grad_norm": 0.9375, "learning_rate": 0.00019883405146933525, "loss": 1.3325, "step": 2062 }, { "epoch": 0.052972055186713336, "grad_norm": 0.90625, "learning_rate": 0.0001988333716526842, "loss": 1.3921, "step": 2063 }, { "epoch": 0.05299773238263515, "grad_norm": 0.89453125, "learning_rate": 0.00019883269163906717, "loss": 1.2357, "step": 2064 }, { "epoch": 0.053023409578556974, "grad_norm": 0.79296875, "learning_rate": 0.00019883201142848554, "loss": 1.2261, "step": 2065 }, { "epoch": 0.053049086774478796, "grad_norm": 1.0, "learning_rate": 0.0001988313310209406, "loss": 1.2858, "step": 2066 }, { "epoch": 0.05307476397040061, "grad_norm": 0.89453125, "learning_rate": 0.00019883065041643378, "loss": 1.37, "step": 2067 }, { "epoch": 0.05310044116632243, "grad_norm": 0.88671875, "learning_rate": 0.0001988299696149664, "loss": 1.2699, "step": 2068 }, { "epoch": 0.05312611836224425, "grad_norm": 0.90625, "learning_rate": 0.0001988292886165398, "loss": 1.332, "step": 2069 }, { "epoch": 0.05315179555816607, "grad_norm": 0.91796875, "learning_rate": 0.00019882860742115535, "loss": 1.2524, "step": 2070 }, { "epoch": 0.05317747275408789, "grad_norm": 0.8671875, "learning_rate": 0.00019882792602881442, "loss": 1.1156, "step": 2071 }, { "epoch": 0.05320314995000971, "grad_norm": 0.94921875, "learning_rate": 0.00019882724443951838, "loss": 1.5621, "step": 2072 }, { "epoch": 0.05322882714593153, "grad_norm": 1.0, "learning_rate": 0.00019882656265326852, "loss": 1.3806, "step": 2073 }, { "epoch": 0.053254504341853345, "grad_norm": 0.94140625, "learning_rate": 0.0001988258806700663, "loss": 1.2464, "step": 2074 }, { "epoch": 0.05328018153777517, "grad_norm": 0.85546875, "learning_rate": 0.00019882519848991298, "loss": 1.2882, "step": 2075 }, { "epoch": 0.05330585873369699, "grad_norm": 0.89453125, "learning_rate": 0.00019882451611281, "loss": 1.2245, "step": 2076 }, { "epoch": 0.053331535929618805, "grad_norm": 0.90625, "learning_rate": 0.00019882383353875868, "loss": 1.235, "step": 2077 }, { "epoch": 0.05335721312554063, "grad_norm": 0.9375, "learning_rate": 0.00019882315076776038, "loss": 1.3184, "step": 2078 }, { "epoch": 0.05338289032146244, "grad_norm": 0.91796875, "learning_rate": 0.00019882246779981646, "loss": 1.4397, "step": 2079 }, { "epoch": 0.053408567517384264, "grad_norm": 0.921875, "learning_rate": 0.0001988217846349283, "loss": 1.1651, "step": 2080 }, { "epoch": 0.05343424471330609, "grad_norm": 0.875, "learning_rate": 0.00019882110127309726, "loss": 1.235, "step": 2081 }, { "epoch": 0.0534599219092279, "grad_norm": 0.91015625, "learning_rate": 0.00019882041771432466, "loss": 1.1791, "step": 2082 }, { "epoch": 0.053485599105149724, "grad_norm": 0.96484375, "learning_rate": 0.00019881973395861194, "loss": 1.3552, "step": 2083 }, { "epoch": 0.05351127630107154, "grad_norm": 0.96875, "learning_rate": 0.0001988190500059604, "loss": 1.3097, "step": 2084 }, { "epoch": 0.05353695349699336, "grad_norm": 0.9453125, "learning_rate": 0.0001988183658563714, "loss": 1.2065, "step": 2085 }, { "epoch": 0.053562630692915184, "grad_norm": 0.91015625, "learning_rate": 0.0001988176815098463, "loss": 1.3288, "step": 2086 }, { "epoch": 0.053588307888837, "grad_norm": 0.875, "learning_rate": 0.00019881699696638655, "loss": 1.3044, "step": 2087 }, { "epoch": 0.05361398508475882, "grad_norm": 0.89453125, "learning_rate": 0.0001988163122259934, "loss": 1.1959, "step": 2088 }, { "epoch": 0.053639662280680636, "grad_norm": 0.91796875, "learning_rate": 0.0001988156272886683, "loss": 1.2977, "step": 2089 }, { "epoch": 0.05366533947660246, "grad_norm": 0.890625, "learning_rate": 0.00019881494215441258, "loss": 1.3833, "step": 2090 }, { "epoch": 0.05369101667252428, "grad_norm": 0.859375, "learning_rate": 0.00019881425682322757, "loss": 1.2247, "step": 2091 }, { "epoch": 0.053716693868446096, "grad_norm": 0.92578125, "learning_rate": 0.0001988135712951147, "loss": 1.319, "step": 2092 }, { "epoch": 0.05374237106436792, "grad_norm": 0.9140625, "learning_rate": 0.0001988128855700753, "loss": 1.2458, "step": 2093 }, { "epoch": 0.05376804826028973, "grad_norm": 0.84375, "learning_rate": 0.00019881219964811074, "loss": 1.1517, "step": 2094 }, { "epoch": 0.053793725456211555, "grad_norm": 0.91796875, "learning_rate": 0.00019881151352922242, "loss": 1.1995, "step": 2095 }, { "epoch": 0.05381940265213337, "grad_norm": 0.94921875, "learning_rate": 0.00019881082721341168, "loss": 1.2963, "step": 2096 }, { "epoch": 0.05384507984805519, "grad_norm": 0.984375, "learning_rate": 0.00019881014070067984, "loss": 1.4053, "step": 2097 }, { "epoch": 0.053870757043977015, "grad_norm": 0.9453125, "learning_rate": 0.00019880945399102835, "loss": 1.1891, "step": 2098 }, { "epoch": 0.05389643423989883, "grad_norm": 0.87890625, "learning_rate": 0.00019880876708445856, "loss": 1.3646, "step": 2099 }, { "epoch": 0.05392211143582065, "grad_norm": 0.87109375, "learning_rate": 0.00019880807998097177, "loss": 1.1036, "step": 2100 }, { "epoch": 0.05394778863174247, "grad_norm": 1.0, "learning_rate": 0.00019880739268056944, "loss": 1.3872, "step": 2101 }, { "epoch": 0.05397346582766429, "grad_norm": 0.9375, "learning_rate": 0.00019880670518325288, "loss": 1.2082, "step": 2102 }, { "epoch": 0.05399914302358611, "grad_norm": 0.8828125, "learning_rate": 0.0001988060174890235, "loss": 1.2335, "step": 2103 }, { "epoch": 0.05402482021950793, "grad_norm": 0.92578125, "learning_rate": 0.00019880532959788263, "loss": 1.46, "step": 2104 }, { "epoch": 0.05405049741542975, "grad_norm": 0.96875, "learning_rate": 0.00019880464150983168, "loss": 1.3003, "step": 2105 }, { "epoch": 0.054076174611351564, "grad_norm": 0.875, "learning_rate": 0.000198803953224872, "loss": 1.4065, "step": 2106 }, { "epoch": 0.054101851807273386, "grad_norm": 0.93359375, "learning_rate": 0.00019880326474300497, "loss": 1.3131, "step": 2107 }, { "epoch": 0.05412752900319521, "grad_norm": 0.89453125, "learning_rate": 0.00019880257606423197, "loss": 1.3857, "step": 2108 }, { "epoch": 0.054153206199117024, "grad_norm": 0.89453125, "learning_rate": 0.00019880188718855433, "loss": 1.2329, "step": 2109 }, { "epoch": 0.054178883395038846, "grad_norm": 0.87890625, "learning_rate": 0.0001988011981159735, "loss": 1.1935, "step": 2110 }, { "epoch": 0.05420456059096066, "grad_norm": 0.89453125, "learning_rate": 0.00019880050884649078, "loss": 1.383, "step": 2111 }, { "epoch": 0.05423023778688248, "grad_norm": 0.89453125, "learning_rate": 0.00019879981938010754, "loss": 1.2846, "step": 2112 }, { "epoch": 0.054255914982804306, "grad_norm": 0.875, "learning_rate": 0.00019879912971682524, "loss": 1.5588, "step": 2113 }, { "epoch": 0.05428159217872612, "grad_norm": 0.91015625, "learning_rate": 0.00019879843985664514, "loss": 1.3921, "step": 2114 }, { "epoch": 0.05430726937464794, "grad_norm": 0.93359375, "learning_rate": 0.00019879774979956872, "loss": 1.2027, "step": 2115 }, { "epoch": 0.05433294657056976, "grad_norm": 0.95703125, "learning_rate": 0.00019879705954559728, "loss": 1.3014, "step": 2116 }, { "epoch": 0.05435862376649158, "grad_norm": 0.9921875, "learning_rate": 0.00019879636909473226, "loss": 1.3817, "step": 2117 }, { "epoch": 0.0543843009624134, "grad_norm": 0.859375, "learning_rate": 0.000198795678446975, "loss": 1.3864, "step": 2118 }, { "epoch": 0.05440997815833522, "grad_norm": 0.94140625, "learning_rate": 0.00019879498760232684, "loss": 1.3812, "step": 2119 }, { "epoch": 0.05443565535425704, "grad_norm": 0.9140625, "learning_rate": 0.0001987942965607892, "loss": 1.3025, "step": 2120 }, { "epoch": 0.054461332550178855, "grad_norm": 0.83984375, "learning_rate": 0.00019879360532236347, "loss": 1.1075, "step": 2121 }, { "epoch": 0.05448700974610068, "grad_norm": 0.9375, "learning_rate": 0.000198792913887051, "loss": 1.1759, "step": 2122 }, { "epoch": 0.0545126869420225, "grad_norm": 0.796875, "learning_rate": 0.00019879222225485318, "loss": 1.2904, "step": 2123 }, { "epoch": 0.054538364137944315, "grad_norm": 0.921875, "learning_rate": 0.0001987915304257714, "loss": 1.1638, "step": 2124 }, { "epoch": 0.05456404133386614, "grad_norm": 0.94921875, "learning_rate": 0.00019879083839980698, "loss": 1.3074, "step": 2125 }, { "epoch": 0.05458971852978795, "grad_norm": 0.9609375, "learning_rate": 0.00019879014617696136, "loss": 1.4983, "step": 2126 }, { "epoch": 0.054615395725709774, "grad_norm": 0.9140625, "learning_rate": 0.00019878945375723594, "loss": 1.2858, "step": 2127 }, { "epoch": 0.054641072921631596, "grad_norm": 0.8828125, "learning_rate": 0.000198788761140632, "loss": 1.2752, "step": 2128 }, { "epoch": 0.05466675011755341, "grad_norm": 0.91796875, "learning_rate": 0.00019878806832715102, "loss": 1.2038, "step": 2129 }, { "epoch": 0.054692427313475234, "grad_norm": 0.90625, "learning_rate": 0.00019878737531679437, "loss": 1.4201, "step": 2130 }, { "epoch": 0.05471810450939705, "grad_norm": 0.85546875, "learning_rate": 0.00019878668210956336, "loss": 1.2794, "step": 2131 }, { "epoch": 0.05474378170531887, "grad_norm": 0.98046875, "learning_rate": 0.00019878598870545942, "loss": 1.3725, "step": 2132 }, { "epoch": 0.05476945890124069, "grad_norm": 0.87890625, "learning_rate": 0.00019878529510448397, "loss": 1.3015, "step": 2133 }, { "epoch": 0.05479513609716251, "grad_norm": 1.046875, "learning_rate": 0.0001987846013066383, "loss": 1.52, "step": 2134 }, { "epoch": 0.05482081329308433, "grad_norm": 0.83984375, "learning_rate": 0.00019878390731192385, "loss": 1.3116, "step": 2135 }, { "epoch": 0.054846490489006146, "grad_norm": 0.9375, "learning_rate": 0.00019878321312034202, "loss": 1.4708, "step": 2136 }, { "epoch": 0.05487216768492797, "grad_norm": 0.95703125, "learning_rate": 0.00019878251873189416, "loss": 1.313, "step": 2137 }, { "epoch": 0.05489784488084979, "grad_norm": 0.875, "learning_rate": 0.00019878182414658166, "loss": 1.2716, "step": 2138 }, { "epoch": 0.054923522076771605, "grad_norm": 0.859375, "learning_rate": 0.00019878112936440589, "loss": 1.2439, "step": 2139 }, { "epoch": 0.05494919927269343, "grad_norm": 1.03125, "learning_rate": 0.00019878043438536827, "loss": 1.4015, "step": 2140 }, { "epoch": 0.05497487646861524, "grad_norm": 0.859375, "learning_rate": 0.00019877973920947017, "loss": 1.2873, "step": 2141 }, { "epoch": 0.055000553664537065, "grad_norm": 0.85546875, "learning_rate": 0.00019877904383671296, "loss": 1.318, "step": 2142 }, { "epoch": 0.05502623086045888, "grad_norm": 0.91015625, "learning_rate": 0.00019877834826709804, "loss": 1.4446, "step": 2143 }, { "epoch": 0.0550519080563807, "grad_norm": 0.9140625, "learning_rate": 0.0001987776525006268, "loss": 1.1837, "step": 2144 }, { "epoch": 0.055077585252302524, "grad_norm": 0.90625, "learning_rate": 0.00019877695653730063, "loss": 1.3495, "step": 2145 }, { "epoch": 0.05510326244822434, "grad_norm": 0.92578125, "learning_rate": 0.00019877626037712091, "loss": 1.3372, "step": 2146 }, { "epoch": 0.05512893964414616, "grad_norm": 0.90625, "learning_rate": 0.000198775564020089, "loss": 1.067, "step": 2147 }, { "epoch": 0.05515461684006798, "grad_norm": 0.890625, "learning_rate": 0.00019877486746620635, "loss": 1.0175, "step": 2148 }, { "epoch": 0.0551802940359898, "grad_norm": 0.9609375, "learning_rate": 0.00019877417071547426, "loss": 1.3964, "step": 2149 }, { "epoch": 0.05520597123191162, "grad_norm": 0.94140625, "learning_rate": 0.0001987734737678942, "loss": 1.1235, "step": 2150 }, { "epoch": 0.05523164842783344, "grad_norm": 0.92578125, "learning_rate": 0.00019877277662346755, "loss": 1.2778, "step": 2151 }, { "epoch": 0.05525732562375526, "grad_norm": 0.9296875, "learning_rate": 0.00019877207928219566, "loss": 1.3542, "step": 2152 }, { "epoch": 0.055283002819677074, "grad_norm": 0.87890625, "learning_rate": 0.0001987713817440799, "loss": 1.304, "step": 2153 }, { "epoch": 0.055308680015598896, "grad_norm": 0.89453125, "learning_rate": 0.00019877068400912175, "loss": 1.3343, "step": 2154 }, { "epoch": 0.05533435721152072, "grad_norm": 0.95703125, "learning_rate": 0.00019876998607732253, "loss": 1.3362, "step": 2155 }, { "epoch": 0.055360034407442534, "grad_norm": 0.91015625, "learning_rate": 0.00019876928794868365, "loss": 1.34, "step": 2156 }, { "epoch": 0.055385711603364356, "grad_norm": 0.8671875, "learning_rate": 0.0001987685896232065, "loss": 1.1004, "step": 2157 }, { "epoch": 0.05541138879928617, "grad_norm": 0.97265625, "learning_rate": 0.0001987678911008925, "loss": 1.289, "step": 2158 }, { "epoch": 0.05543706599520799, "grad_norm": 0.85546875, "learning_rate": 0.000198767192381743, "loss": 1.265, "step": 2159 }, { "epoch": 0.055462743191129815, "grad_norm": 0.859375, "learning_rate": 0.00019876649346575937, "loss": 1.2621, "step": 2160 }, { "epoch": 0.05548842038705163, "grad_norm": 0.90625, "learning_rate": 0.00019876579435294307, "loss": 1.1374, "step": 2161 }, { "epoch": 0.05551409758297345, "grad_norm": 0.94921875, "learning_rate": 0.00019876509504329544, "loss": 1.2725, "step": 2162 }, { "epoch": 0.05553977477889527, "grad_norm": 0.8671875, "learning_rate": 0.00019876439553681794, "loss": 1.2743, "step": 2163 }, { "epoch": 0.05556545197481709, "grad_norm": 0.9296875, "learning_rate": 0.0001987636958335119, "loss": 1.2597, "step": 2164 }, { "epoch": 0.05559112917073891, "grad_norm": 0.93359375, "learning_rate": 0.00019876299593337871, "loss": 1.4844, "step": 2165 }, { "epoch": 0.05561680636666073, "grad_norm": 0.921875, "learning_rate": 0.00019876229583641982, "loss": 1.2919, "step": 2166 }, { "epoch": 0.05564248356258255, "grad_norm": 0.90234375, "learning_rate": 0.0001987615955426366, "loss": 1.2401, "step": 2167 }, { "epoch": 0.055668160758504365, "grad_norm": 0.875, "learning_rate": 0.00019876089505203043, "loss": 1.346, "step": 2168 }, { "epoch": 0.05569383795442619, "grad_norm": 0.91796875, "learning_rate": 0.00019876019436460273, "loss": 1.3235, "step": 2169 }, { "epoch": 0.05571951515034801, "grad_norm": 1.0390625, "learning_rate": 0.00019875949348035485, "loss": 1.3286, "step": 2170 }, { "epoch": 0.055745192346269824, "grad_norm": 0.90234375, "learning_rate": 0.00019875879239928827, "loss": 1.3121, "step": 2171 }, { "epoch": 0.055770869542191646, "grad_norm": 0.85546875, "learning_rate": 0.00019875809112140428, "loss": 1.3647, "step": 2172 }, { "epoch": 0.05579654673811346, "grad_norm": 1.015625, "learning_rate": 0.0001987573896467044, "loss": 1.4047, "step": 2173 }, { "epoch": 0.055822223934035284, "grad_norm": 0.90234375, "learning_rate": 0.0001987566879751899, "loss": 1.2958, "step": 2174 }, { "epoch": 0.055847901129957106, "grad_norm": 0.93359375, "learning_rate": 0.00019875598610686227, "loss": 1.1011, "step": 2175 }, { "epoch": 0.05587357832587892, "grad_norm": 1.0078125, "learning_rate": 0.00019875528404172287, "loss": 1.3185, "step": 2176 }, { "epoch": 0.05589925552180074, "grad_norm": 0.828125, "learning_rate": 0.00019875458177977313, "loss": 1.1743, "step": 2177 }, { "epoch": 0.05592493271772256, "grad_norm": 0.88671875, "learning_rate": 0.0001987538793210144, "loss": 1.305, "step": 2178 }, { "epoch": 0.05595060991364438, "grad_norm": 0.94140625, "learning_rate": 0.00019875317666544814, "loss": 1.4606, "step": 2179 }, { "epoch": 0.0559762871095662, "grad_norm": 0.89453125, "learning_rate": 0.0001987524738130757, "loss": 1.2737, "step": 2180 }, { "epoch": 0.05600196430548802, "grad_norm": 0.90234375, "learning_rate": 0.00019875177076389853, "loss": 1.32, "step": 2181 }, { "epoch": 0.05602764150140984, "grad_norm": 0.8828125, "learning_rate": 0.00019875106751791796, "loss": 1.3141, "step": 2182 }, { "epoch": 0.056053318697331656, "grad_norm": 0.83203125, "learning_rate": 0.00019875036407513544, "loss": 1.2447, "step": 2183 }, { "epoch": 0.05607899589325348, "grad_norm": 0.87109375, "learning_rate": 0.0001987496604355524, "loss": 1.2184, "step": 2184 }, { "epoch": 0.0561046730891753, "grad_norm": 0.90625, "learning_rate": 0.00019874895659917019, "loss": 1.3967, "step": 2185 }, { "epoch": 0.056130350285097115, "grad_norm": 0.95703125, "learning_rate": 0.0001987482525659902, "loss": 1.2631, "step": 2186 }, { "epoch": 0.05615602748101894, "grad_norm": 0.9765625, "learning_rate": 0.0001987475483360139, "loss": 1.4103, "step": 2187 }, { "epoch": 0.05618170467694075, "grad_norm": 0.89453125, "learning_rate": 0.00019874684390924264, "loss": 1.2202, "step": 2188 }, { "epoch": 0.056207381872862575, "grad_norm": 0.94140625, "learning_rate": 0.00019874613928567785, "loss": 1.2875, "step": 2189 }, { "epoch": 0.0562330590687844, "grad_norm": 0.90625, "learning_rate": 0.0001987454344653209, "loss": 1.0618, "step": 2190 }, { "epoch": 0.05625873626470621, "grad_norm": 0.83203125, "learning_rate": 0.00019874472944817324, "loss": 1.3171, "step": 2191 }, { "epoch": 0.056284413460628034, "grad_norm": 0.9296875, "learning_rate": 0.00019874402423423625, "loss": 1.2348, "step": 2192 }, { "epoch": 0.05631009065654985, "grad_norm": 0.9140625, "learning_rate": 0.00019874331882351132, "loss": 1.2365, "step": 2193 }, { "epoch": 0.05633576785247167, "grad_norm": 0.9296875, "learning_rate": 0.00019874261321599989, "loss": 1.4614, "step": 2194 }, { "epoch": 0.05636144504839349, "grad_norm": 1.0234375, "learning_rate": 0.00019874190741170337, "loss": 1.3108, "step": 2195 }, { "epoch": 0.05638712224431531, "grad_norm": 1.1640625, "learning_rate": 0.00019874120141062312, "loss": 1.1305, "step": 2196 }, { "epoch": 0.05641279944023713, "grad_norm": 0.96484375, "learning_rate": 0.0001987404952127606, "loss": 1.3475, "step": 2197 }, { "epoch": 0.056438476636158946, "grad_norm": 1.0546875, "learning_rate": 0.00019873978881811716, "loss": 1.1946, "step": 2198 }, { "epoch": 0.05646415383208077, "grad_norm": 0.9453125, "learning_rate": 0.00019873908222669425, "loss": 1.6327, "step": 2199 }, { "epoch": 0.056489831028002584, "grad_norm": 0.94921875, "learning_rate": 0.00019873837543849326, "loss": 1.2018, "step": 2200 }, { "epoch": 0.056515508223924406, "grad_norm": 0.84765625, "learning_rate": 0.00019873766845351565, "loss": 1.1654, "step": 2201 }, { "epoch": 0.05654118541984623, "grad_norm": 0.8515625, "learning_rate": 0.00019873696127176277, "loss": 1.3326, "step": 2202 }, { "epoch": 0.05656686261576804, "grad_norm": 0.90625, "learning_rate": 0.000198736253893236, "loss": 1.2287, "step": 2203 }, { "epoch": 0.056592539811689865, "grad_norm": 0.984375, "learning_rate": 0.00019873554631793684, "loss": 1.2828, "step": 2204 }, { "epoch": 0.05661821700761168, "grad_norm": 0.953125, "learning_rate": 0.00019873483854586664, "loss": 1.2957, "step": 2205 }, { "epoch": 0.0566438942035335, "grad_norm": 0.89453125, "learning_rate": 0.0001987341305770268, "loss": 1.2704, "step": 2206 }, { "epoch": 0.056669571399455325, "grad_norm": 0.9296875, "learning_rate": 0.0001987334224114188, "loss": 1.4488, "step": 2207 }, { "epoch": 0.05669524859537714, "grad_norm": 0.92578125, "learning_rate": 0.00019873271404904398, "loss": 1.285, "step": 2208 }, { "epoch": 0.05672092579129896, "grad_norm": 0.88671875, "learning_rate": 0.0001987320054899038, "loss": 1.367, "step": 2209 }, { "epoch": 0.05674660298722078, "grad_norm": 0.8828125, "learning_rate": 0.00019873129673399963, "loss": 1.2773, "step": 2210 }, { "epoch": 0.0567722801831426, "grad_norm": 0.80078125, "learning_rate": 0.00019873058778133293, "loss": 1.1573, "step": 2211 }, { "epoch": 0.05679795737906442, "grad_norm": 0.87109375, "learning_rate": 0.00019872987863190508, "loss": 1.2695, "step": 2212 }, { "epoch": 0.05682363457498624, "grad_norm": 0.93359375, "learning_rate": 0.00019872916928571747, "loss": 1.2685, "step": 2213 }, { "epoch": 0.05684931177090806, "grad_norm": 0.8828125, "learning_rate": 0.00019872845974277156, "loss": 1.23, "step": 2214 }, { "epoch": 0.056874988966829874, "grad_norm": 0.82421875, "learning_rate": 0.0001987277500030688, "loss": 1.3185, "step": 2215 }, { "epoch": 0.0569006661627517, "grad_norm": 0.87109375, "learning_rate": 0.00019872704006661048, "loss": 1.2539, "step": 2216 }, { "epoch": 0.05692634335867352, "grad_norm": 0.89453125, "learning_rate": 0.00019872632993339813, "loss": 1.2184, "step": 2217 }, { "epoch": 0.056952020554595334, "grad_norm": 0.91015625, "learning_rate": 0.0001987256196034331, "loss": 1.2051, "step": 2218 }, { "epoch": 0.056977697750517156, "grad_norm": 0.91796875, "learning_rate": 0.00019872490907671685, "loss": 1.3704, "step": 2219 }, { "epoch": 0.05700337494643897, "grad_norm": 0.84765625, "learning_rate": 0.00019872419835325074, "loss": 1.2852, "step": 2220 }, { "epoch": 0.057029052142360794, "grad_norm": 0.9140625, "learning_rate": 0.00019872348743303625, "loss": 1.3746, "step": 2221 }, { "epoch": 0.057054729338282616, "grad_norm": 0.953125, "learning_rate": 0.00019872277631607474, "loss": 1.2533, "step": 2222 }, { "epoch": 0.05708040653420443, "grad_norm": 0.87890625, "learning_rate": 0.00019872206500236766, "loss": 1.221, "step": 2223 }, { "epoch": 0.05710608373012625, "grad_norm": 0.9921875, "learning_rate": 0.00019872135349191644, "loss": 1.2732, "step": 2224 }, { "epoch": 0.05713176092604807, "grad_norm": 0.90625, "learning_rate": 0.00019872064178472247, "loss": 1.281, "step": 2225 }, { "epoch": 0.05715743812196989, "grad_norm": 0.95703125, "learning_rate": 0.00019871992988078718, "loss": 1.2581, "step": 2226 }, { "epoch": 0.05718311531789171, "grad_norm": 0.8984375, "learning_rate": 0.000198719217780112, "loss": 1.4676, "step": 2227 }, { "epoch": 0.05720879251381353, "grad_norm": 0.92578125, "learning_rate": 0.00019871850548269833, "loss": 1.1525, "step": 2228 }, { "epoch": 0.05723446970973535, "grad_norm": 0.91796875, "learning_rate": 0.00019871779298854758, "loss": 1.2833, "step": 2229 }, { "epoch": 0.057260146905657165, "grad_norm": 1.0234375, "learning_rate": 0.00019871708029766118, "loss": 1.4007, "step": 2230 }, { "epoch": 0.05728582410157899, "grad_norm": 0.98828125, "learning_rate": 0.00019871636741004058, "loss": 1.2364, "step": 2231 }, { "epoch": 0.05731150129750081, "grad_norm": 0.8828125, "learning_rate": 0.00019871565432568716, "loss": 1.2648, "step": 2232 }, { "epoch": 0.057337178493422625, "grad_norm": 0.87890625, "learning_rate": 0.00019871494104460239, "loss": 1.2885, "step": 2233 }, { "epoch": 0.05736285568934445, "grad_norm": 0.89453125, "learning_rate": 0.0001987142275667876, "loss": 1.4951, "step": 2234 }, { "epoch": 0.05738853288526626, "grad_norm": 0.88671875, "learning_rate": 0.00019871351389224432, "loss": 1.4172, "step": 2235 }, { "epoch": 0.057414210081188084, "grad_norm": 0.859375, "learning_rate": 0.00019871280002097392, "loss": 1.2632, "step": 2236 }, { "epoch": 0.057439887277109906, "grad_norm": 0.91015625, "learning_rate": 0.0001987120859529778, "loss": 1.2752, "step": 2237 }, { "epoch": 0.05746556447303172, "grad_norm": 0.8671875, "learning_rate": 0.00019871137168825744, "loss": 1.2693, "step": 2238 }, { "epoch": 0.057491241668953544, "grad_norm": 0.90234375, "learning_rate": 0.0001987106572268142, "loss": 1.3259, "step": 2239 }, { "epoch": 0.05751691886487536, "grad_norm": 0.9609375, "learning_rate": 0.00019870994256864955, "loss": 1.3788, "step": 2240 }, { "epoch": 0.05754259606079718, "grad_norm": 0.90234375, "learning_rate": 0.00019870922771376496, "loss": 1.2492, "step": 2241 }, { "epoch": 0.057568273256719, "grad_norm": 0.91015625, "learning_rate": 0.00019870851266216171, "loss": 1.364, "step": 2242 }, { "epoch": 0.05759395045264082, "grad_norm": 0.9296875, "learning_rate": 0.00019870779741384135, "loss": 1.4743, "step": 2243 }, { "epoch": 0.05761962764856264, "grad_norm": 0.99609375, "learning_rate": 0.00019870708196880526, "loss": 1.239, "step": 2244 }, { "epoch": 0.057645304844484456, "grad_norm": 0.9375, "learning_rate": 0.00019870636632705486, "loss": 1.2229, "step": 2245 }, { "epoch": 0.05767098204040628, "grad_norm": 0.9140625, "learning_rate": 0.0001987056504885916, "loss": 1.3021, "step": 2246 }, { "epoch": 0.05769665923632809, "grad_norm": 1.0078125, "learning_rate": 0.0001987049344534169, "loss": 1.3069, "step": 2247 }, { "epoch": 0.057722336432249916, "grad_norm": 0.8984375, "learning_rate": 0.00019870421822153217, "loss": 1.3793, "step": 2248 }, { "epoch": 0.05774801362817174, "grad_norm": 0.8515625, "learning_rate": 0.00019870350179293885, "loss": 1.3197, "step": 2249 }, { "epoch": 0.05777369082409355, "grad_norm": 0.9375, "learning_rate": 0.00019870278516763835, "loss": 1.3622, "step": 2250 }, { "epoch": 0.057799368020015375, "grad_norm": 0.98046875, "learning_rate": 0.00019870206834563217, "loss": 1.2994, "step": 2251 }, { "epoch": 0.05782504521593719, "grad_norm": 0.85546875, "learning_rate": 0.00019870135132692162, "loss": 1.3788, "step": 2252 }, { "epoch": 0.05785072241185901, "grad_norm": 0.875, "learning_rate": 0.00019870063411150823, "loss": 1.3912, "step": 2253 }, { "epoch": 0.057876399607780835, "grad_norm": 0.95703125, "learning_rate": 0.00019869991669939339, "loss": 1.4139, "step": 2254 }, { "epoch": 0.05790207680370265, "grad_norm": 0.80859375, "learning_rate": 0.0001986991990905785, "loss": 1.0621, "step": 2255 }, { "epoch": 0.05792775399962447, "grad_norm": 0.91796875, "learning_rate": 0.00019869848128506505, "loss": 1.1829, "step": 2256 }, { "epoch": 0.05795343119554629, "grad_norm": 0.9296875, "learning_rate": 0.00019869776328285444, "loss": 1.2203, "step": 2257 }, { "epoch": 0.05797910839146811, "grad_norm": 0.9296875, "learning_rate": 0.00019869704508394814, "loss": 1.2501, "step": 2258 }, { "epoch": 0.05800478558738993, "grad_norm": 0.796875, "learning_rate": 0.00019869632668834749, "loss": 0.9786, "step": 2259 }, { "epoch": 0.05803046278331175, "grad_norm": 0.9375, "learning_rate": 0.000198695608096054, "loss": 1.2736, "step": 2260 }, { "epoch": 0.05805613997923357, "grad_norm": 0.94140625, "learning_rate": 0.00019869488930706906, "loss": 1.2117, "step": 2261 }, { "epoch": 0.058081817175155384, "grad_norm": 0.90234375, "learning_rate": 0.00019869417032139413, "loss": 1.2015, "step": 2262 }, { "epoch": 0.058107494371077206, "grad_norm": 0.98046875, "learning_rate": 0.00019869345113903065, "loss": 1.2175, "step": 2263 }, { "epoch": 0.05813317156699903, "grad_norm": 0.9375, "learning_rate": 0.00019869273175998, "loss": 1.3594, "step": 2264 }, { "epoch": 0.058158848762920844, "grad_norm": 0.92578125, "learning_rate": 0.00019869201218424367, "loss": 1.1208, "step": 2265 }, { "epoch": 0.058184525958842666, "grad_norm": 0.9296875, "learning_rate": 0.00019869129241182308, "loss": 1.2713, "step": 2266 }, { "epoch": 0.05821020315476448, "grad_norm": 0.921875, "learning_rate": 0.00019869057244271967, "loss": 1.1953, "step": 2267 }, { "epoch": 0.0582358803506863, "grad_norm": 0.94140625, "learning_rate": 0.00019868985227693484, "loss": 1.4039, "step": 2268 }, { "epoch": 0.058261557546608125, "grad_norm": 0.82421875, "learning_rate": 0.00019868913191447004, "loss": 1.2397, "step": 2269 }, { "epoch": 0.05828723474252994, "grad_norm": 1.0234375, "learning_rate": 0.00019868841135532673, "loss": 1.3064, "step": 2270 }, { "epoch": 0.05831291193845176, "grad_norm": 0.83984375, "learning_rate": 0.00019868769059950636, "loss": 1.1607, "step": 2271 }, { "epoch": 0.05833858913437358, "grad_norm": 0.890625, "learning_rate": 0.0001986869696470103, "loss": 1.1993, "step": 2272 }, { "epoch": 0.0583642663302954, "grad_norm": 0.9296875, "learning_rate": 0.00019868624849784004, "loss": 1.4712, "step": 2273 }, { "epoch": 0.05838994352621722, "grad_norm": 1.0546875, "learning_rate": 0.000198685527151997, "loss": 1.2852, "step": 2274 }, { "epoch": 0.05841562072213904, "grad_norm": 0.94921875, "learning_rate": 0.0001986848056094826, "loss": 1.4401, "step": 2275 }, { "epoch": 0.05844129791806086, "grad_norm": 0.8359375, "learning_rate": 0.00019868408387029832, "loss": 1.2137, "step": 2276 }, { "epoch": 0.058466975113982675, "grad_norm": 0.91015625, "learning_rate": 0.00019868336193444556, "loss": 1.3219, "step": 2277 }, { "epoch": 0.0584926523099045, "grad_norm": 0.85546875, "learning_rate": 0.0001986826398019258, "loss": 1.2851, "step": 2278 }, { "epoch": 0.05851832950582632, "grad_norm": 0.9140625, "learning_rate": 0.00019868191747274042, "loss": 1.3395, "step": 2279 }, { "epoch": 0.058544006701748134, "grad_norm": 0.921875, "learning_rate": 0.00019868119494689088, "loss": 1.3317, "step": 2280 }, { "epoch": 0.05856968389766996, "grad_norm": 0.91796875, "learning_rate": 0.00019868047222437868, "loss": 1.3788, "step": 2281 }, { "epoch": 0.05859536109359177, "grad_norm": 0.90234375, "learning_rate": 0.00019867974930520518, "loss": 1.2817, "step": 2282 }, { "epoch": 0.058621038289513594, "grad_norm": 0.9453125, "learning_rate": 0.00019867902618937185, "loss": 1.3305, "step": 2283 }, { "epoch": 0.058646715485435416, "grad_norm": 0.8359375, "learning_rate": 0.00019867830287688016, "loss": 1.2652, "step": 2284 }, { "epoch": 0.05867239268135723, "grad_norm": 0.86328125, "learning_rate": 0.0001986775793677315, "loss": 1.2706, "step": 2285 }, { "epoch": 0.058698069877279054, "grad_norm": 0.98828125, "learning_rate": 0.00019867685566192736, "loss": 1.5038, "step": 2286 }, { "epoch": 0.05872374707320087, "grad_norm": 0.93359375, "learning_rate": 0.00019867613175946915, "loss": 1.2135, "step": 2287 }, { "epoch": 0.05874942426912269, "grad_norm": 0.87109375, "learning_rate": 0.00019867540766035831, "loss": 1.1866, "step": 2288 }, { "epoch": 0.05877510146504451, "grad_norm": 0.84375, "learning_rate": 0.0001986746833645963, "loss": 1.2882, "step": 2289 }, { "epoch": 0.05880077866096633, "grad_norm": 0.96875, "learning_rate": 0.00019867395887218457, "loss": 1.4522, "step": 2290 }, { "epoch": 0.05882645585688815, "grad_norm": 1.0625, "learning_rate": 0.00019867323418312457, "loss": 1.3224, "step": 2291 }, { "epoch": 0.058852133052809966, "grad_norm": 0.92578125, "learning_rate": 0.0001986725092974177, "loss": 1.1956, "step": 2292 }, { "epoch": 0.05887781024873179, "grad_norm": 0.9375, "learning_rate": 0.00019867178421506545, "loss": 1.4472, "step": 2293 }, { "epoch": 0.05890348744465361, "grad_norm": 0.91796875, "learning_rate": 0.00019867105893606922, "loss": 1.4312, "step": 2294 }, { "epoch": 0.058929164640575425, "grad_norm": 0.91015625, "learning_rate": 0.0001986703334604305, "loss": 1.2157, "step": 2295 }, { "epoch": 0.05895484183649725, "grad_norm": 0.859375, "learning_rate": 0.00019866960778815073, "loss": 1.268, "step": 2296 }, { "epoch": 0.05898051903241906, "grad_norm": 0.84765625, "learning_rate": 0.00019866888191923134, "loss": 1.0882, "step": 2297 }, { "epoch": 0.059006196228340885, "grad_norm": 0.890625, "learning_rate": 0.0001986681558536738, "loss": 1.2015, "step": 2298 }, { "epoch": 0.0590318734242627, "grad_norm": 0.890625, "learning_rate": 0.00019866742959147947, "loss": 1.2758, "step": 2299 }, { "epoch": 0.05905755062018452, "grad_norm": 0.89453125, "learning_rate": 0.00019866670313264993, "loss": 1.4245, "step": 2300 }, { "epoch": 0.059083227816106344, "grad_norm": 0.890625, "learning_rate": 0.00019866597647718654, "loss": 1.1496, "step": 2301 }, { "epoch": 0.05910890501202816, "grad_norm": 0.91796875, "learning_rate": 0.00019866524962509078, "loss": 1.3092, "step": 2302 }, { "epoch": 0.05913458220794998, "grad_norm": 0.9765625, "learning_rate": 0.00019866452257636408, "loss": 1.3171, "step": 2303 }, { "epoch": 0.0591602594038718, "grad_norm": 0.88671875, "learning_rate": 0.00019866379533100789, "loss": 1.0103, "step": 2304 }, { "epoch": 0.05918593659979362, "grad_norm": 0.9375, "learning_rate": 0.00019866306788902369, "loss": 1.3435, "step": 2305 }, { "epoch": 0.05921161379571544, "grad_norm": 0.859375, "learning_rate": 0.00019866234025041292, "loss": 1.218, "step": 2306 }, { "epoch": 0.059237290991637256, "grad_norm": 0.86328125, "learning_rate": 0.00019866161241517696, "loss": 1.1686, "step": 2307 }, { "epoch": 0.05926296818755908, "grad_norm": 0.91015625, "learning_rate": 0.00019866088438331736, "loss": 1.309, "step": 2308 }, { "epoch": 0.059288645383480894, "grad_norm": 0.8203125, "learning_rate": 0.00019866015615483553, "loss": 0.9594, "step": 2309 }, { "epoch": 0.059314322579402716, "grad_norm": 0.8515625, "learning_rate": 0.00019865942772973288, "loss": 1.2569, "step": 2310 }, { "epoch": 0.05933999977532454, "grad_norm": 0.9140625, "learning_rate": 0.00019865869910801095, "loss": 1.2087, "step": 2311 }, { "epoch": 0.05936567697124635, "grad_norm": 0.91796875, "learning_rate": 0.00019865797028967115, "loss": 1.2365, "step": 2312 }, { "epoch": 0.059391354167168176, "grad_norm": 0.88671875, "learning_rate": 0.0001986572412747149, "loss": 1.2693, "step": 2313 }, { "epoch": 0.05941703136308999, "grad_norm": 0.8828125, "learning_rate": 0.0001986565120631437, "loss": 1.226, "step": 2314 }, { "epoch": 0.05944270855901181, "grad_norm": 0.91015625, "learning_rate": 0.00019865578265495896, "loss": 1.2406, "step": 2315 }, { "epoch": 0.059468385754933635, "grad_norm": 0.83984375, "learning_rate": 0.00019865505305016216, "loss": 1.2012, "step": 2316 }, { "epoch": 0.05949406295085545, "grad_norm": 0.953125, "learning_rate": 0.00019865432324875476, "loss": 1.3467, "step": 2317 }, { "epoch": 0.05951974014677727, "grad_norm": 0.98046875, "learning_rate": 0.0001986535932507382, "loss": 1.2173, "step": 2318 }, { "epoch": 0.05954541734269909, "grad_norm": 0.88671875, "learning_rate": 0.00019865286305611394, "loss": 1.3724, "step": 2319 }, { "epoch": 0.05957109453862091, "grad_norm": 0.91015625, "learning_rate": 0.00019865213266488345, "loss": 1.3275, "step": 2320 }, { "epoch": 0.05959677173454273, "grad_norm": 0.8203125, "learning_rate": 0.00019865140207704816, "loss": 1.2891, "step": 2321 }, { "epoch": 0.05962244893046455, "grad_norm": 0.90625, "learning_rate": 0.00019865067129260956, "loss": 1.192, "step": 2322 }, { "epoch": 0.05964812612638637, "grad_norm": 0.94921875, "learning_rate": 0.00019864994031156902, "loss": 1.2708, "step": 2323 }, { "epoch": 0.059673803322308185, "grad_norm": 0.8984375, "learning_rate": 0.00019864920913392813, "loss": 1.2473, "step": 2324 }, { "epoch": 0.05969948051823001, "grad_norm": 0.984375, "learning_rate": 0.00019864847775968823, "loss": 1.116, "step": 2325 }, { "epoch": 0.05972515771415183, "grad_norm": 0.8984375, "learning_rate": 0.00019864774618885082, "loss": 1.1229, "step": 2326 }, { "epoch": 0.059750834910073644, "grad_norm": 0.859375, "learning_rate": 0.00019864701442141739, "loss": 1.1474, "step": 2327 }, { "epoch": 0.059776512105995466, "grad_norm": 0.859375, "learning_rate": 0.00019864628245738937, "loss": 1.3466, "step": 2328 }, { "epoch": 0.05980218930191728, "grad_norm": 0.89453125, "learning_rate": 0.0001986455502967682, "loss": 1.2491, "step": 2329 }, { "epoch": 0.059827866497839104, "grad_norm": 0.8984375, "learning_rate": 0.00019864481793955538, "loss": 1.2859, "step": 2330 }, { "epoch": 0.059853543693760926, "grad_norm": 0.9296875, "learning_rate": 0.00019864408538575233, "loss": 1.2313, "step": 2331 }, { "epoch": 0.05987922088968274, "grad_norm": 0.8515625, "learning_rate": 0.00019864335263536056, "loss": 1.3255, "step": 2332 }, { "epoch": 0.05990489808560456, "grad_norm": 0.9296875, "learning_rate": 0.0001986426196883815, "loss": 1.3362, "step": 2333 }, { "epoch": 0.05993057528152638, "grad_norm": 0.90234375, "learning_rate": 0.00019864188654481656, "loss": 1.2798, "step": 2334 }, { "epoch": 0.0599562524774482, "grad_norm": 0.91796875, "learning_rate": 0.0001986411532046673, "loss": 1.4122, "step": 2335 }, { "epoch": 0.05998192967337002, "grad_norm": 0.8828125, "learning_rate": 0.0001986404196679351, "loss": 1.3648, "step": 2336 }, { "epoch": 0.06000760686929184, "grad_norm": 0.91796875, "learning_rate": 0.0001986396859346215, "loss": 1.3549, "step": 2337 }, { "epoch": 0.06003328406521366, "grad_norm": 0.90234375, "learning_rate": 0.00019863895200472785, "loss": 1.2714, "step": 2338 }, { "epoch": 0.060058961261135475, "grad_norm": 0.87109375, "learning_rate": 0.00019863821787825573, "loss": 1.2863, "step": 2339 }, { "epoch": 0.0600846384570573, "grad_norm": 0.890625, "learning_rate": 0.0001986374835552065, "loss": 1.3616, "step": 2340 }, { "epoch": 0.06011031565297912, "grad_norm": 0.8828125, "learning_rate": 0.0001986367490355817, "loss": 1.2421, "step": 2341 }, { "epoch": 0.060135992848900935, "grad_norm": 1.0390625, "learning_rate": 0.00019863601431938278, "loss": 1.354, "step": 2342 }, { "epoch": 0.06016167004482276, "grad_norm": 0.94140625, "learning_rate": 0.00019863527940661117, "loss": 1.3127, "step": 2343 }, { "epoch": 0.06018734724074457, "grad_norm": 0.94140625, "learning_rate": 0.0001986345442972684, "loss": 1.1301, "step": 2344 }, { "epoch": 0.060213024436666394, "grad_norm": 1.03125, "learning_rate": 0.00019863380899135587, "loss": 1.2882, "step": 2345 }, { "epoch": 0.06023870163258822, "grad_norm": 0.9296875, "learning_rate": 0.00019863307348887507, "loss": 1.3148, "step": 2346 }, { "epoch": 0.06026437882851003, "grad_norm": 0.98046875, "learning_rate": 0.00019863233778982746, "loss": 1.2027, "step": 2347 }, { "epoch": 0.060290056024431854, "grad_norm": 0.9375, "learning_rate": 0.0001986316018942145, "loss": 1.1576, "step": 2348 }, { "epoch": 0.06031573322035367, "grad_norm": 0.9609375, "learning_rate": 0.00019863086580203766, "loss": 1.2426, "step": 2349 }, { "epoch": 0.06034141041627549, "grad_norm": 0.93359375, "learning_rate": 0.00019863012951329846, "loss": 1.2944, "step": 2350 }, { "epoch": 0.06036708761219731, "grad_norm": 0.85546875, "learning_rate": 0.00019862939302799827, "loss": 1.2725, "step": 2351 }, { "epoch": 0.06039276480811913, "grad_norm": 0.88671875, "learning_rate": 0.00019862865634613864, "loss": 1.3806, "step": 2352 }, { "epoch": 0.06041844200404095, "grad_norm": 0.87890625, "learning_rate": 0.000198627919467721, "loss": 1.2009, "step": 2353 }, { "epoch": 0.060444119199962766, "grad_norm": 0.95703125, "learning_rate": 0.00019862718239274681, "loss": 1.3075, "step": 2354 }, { "epoch": 0.06046979639588459, "grad_norm": 0.8359375, "learning_rate": 0.0001986264451212176, "loss": 1.292, "step": 2355 }, { "epoch": 0.060495473591806403, "grad_norm": 0.8203125, "learning_rate": 0.00019862570765313476, "loss": 1.0848, "step": 2356 }, { "epoch": 0.060521150787728226, "grad_norm": 0.87890625, "learning_rate": 0.0001986249699884998, "loss": 1.2704, "step": 2357 }, { "epoch": 0.06054682798365005, "grad_norm": 0.94140625, "learning_rate": 0.00019862423212731417, "loss": 1.282, "step": 2358 }, { "epoch": 0.06057250517957186, "grad_norm": 0.83203125, "learning_rate": 0.00019862349406957934, "loss": 1.1477, "step": 2359 }, { "epoch": 0.060598182375493685, "grad_norm": 1.046875, "learning_rate": 0.00019862275581529685, "loss": 1.3214, "step": 2360 }, { "epoch": 0.0606238595714155, "grad_norm": 0.87890625, "learning_rate": 0.0001986220173644681, "loss": 1.1193, "step": 2361 }, { "epoch": 0.06064953676733732, "grad_norm": 0.8828125, "learning_rate": 0.00019862127871709452, "loss": 1.4294, "step": 2362 }, { "epoch": 0.060675213963259145, "grad_norm": 1.25, "learning_rate": 0.0001986205398731777, "loss": 1.2488, "step": 2363 }, { "epoch": 0.06070089115918096, "grad_norm": 0.87890625, "learning_rate": 0.00019861980083271902, "loss": 1.2793, "step": 2364 }, { "epoch": 0.06072656835510278, "grad_norm": 0.98046875, "learning_rate": 0.00019861906159572, "loss": 1.0481, "step": 2365 }, { "epoch": 0.0607522455510246, "grad_norm": 0.97265625, "learning_rate": 0.0001986183221621821, "loss": 1.3362, "step": 2366 }, { "epoch": 0.06077792274694642, "grad_norm": 1.078125, "learning_rate": 0.00019861758253210678, "loss": 1.3308, "step": 2367 }, { "epoch": 0.06080359994286824, "grad_norm": 0.90234375, "learning_rate": 0.00019861684270549557, "loss": 1.3751, "step": 2368 }, { "epoch": 0.06082927713879006, "grad_norm": 0.80859375, "learning_rate": 0.00019861610268234986, "loss": 1.0264, "step": 2369 }, { "epoch": 0.06085495433471188, "grad_norm": 0.94921875, "learning_rate": 0.00019861536246267117, "loss": 1.2502, "step": 2370 }, { "epoch": 0.060880631530633694, "grad_norm": 0.78515625, "learning_rate": 0.00019861462204646095, "loss": 1.0756, "step": 2371 }, { "epoch": 0.060906308726555516, "grad_norm": 0.90234375, "learning_rate": 0.00019861388143372073, "loss": 1.2922, "step": 2372 }, { "epoch": 0.06093198592247734, "grad_norm": 0.97265625, "learning_rate": 0.00019861314062445193, "loss": 1.2408, "step": 2373 }, { "epoch": 0.060957663118399154, "grad_norm": 0.98828125, "learning_rate": 0.00019861239961865607, "loss": 1.1707, "step": 2374 }, { "epoch": 0.060983340314320976, "grad_norm": 0.95703125, "learning_rate": 0.0001986116584163346, "loss": 1.0758, "step": 2375 }, { "epoch": 0.06100901751024279, "grad_norm": 0.91015625, "learning_rate": 0.000198610917017489, "loss": 1.3627, "step": 2376 }, { "epoch": 0.06103469470616461, "grad_norm": 1.0, "learning_rate": 0.00019861017542212076, "loss": 1.1715, "step": 2377 }, { "epoch": 0.061060371902086436, "grad_norm": 1.0234375, "learning_rate": 0.0001986094336302313, "loss": 1.2798, "step": 2378 }, { "epoch": 0.06108604909800825, "grad_norm": 0.921875, "learning_rate": 0.0001986086916418222, "loss": 1.2541, "step": 2379 }, { "epoch": 0.06111172629393007, "grad_norm": 0.9140625, "learning_rate": 0.00019860794945689486, "loss": 1.3726, "step": 2380 }, { "epoch": 0.06113740348985189, "grad_norm": 0.92578125, "learning_rate": 0.00019860720707545079, "loss": 1.3819, "step": 2381 }, { "epoch": 0.06116308068577371, "grad_norm": 0.91015625, "learning_rate": 0.00019860646449749148, "loss": 1.1901, "step": 2382 }, { "epoch": 0.06118875788169553, "grad_norm": 0.91015625, "learning_rate": 0.00019860572172301834, "loss": 1.3441, "step": 2383 }, { "epoch": 0.06121443507761735, "grad_norm": 0.92578125, "learning_rate": 0.00019860497875203294, "loss": 1.2996, "step": 2384 }, { "epoch": 0.06124011227353917, "grad_norm": 0.97265625, "learning_rate": 0.00019860423558453673, "loss": 1.2187, "step": 2385 }, { "epoch": 0.061265789469460985, "grad_norm": 0.9921875, "learning_rate": 0.00019860349222053118, "loss": 1.5184, "step": 2386 }, { "epoch": 0.06129146666538281, "grad_norm": 0.90625, "learning_rate": 0.0001986027486600178, "loss": 1.3676, "step": 2387 }, { "epoch": 0.06131714386130463, "grad_norm": 0.9453125, "learning_rate": 0.00019860200490299802, "loss": 1.3625, "step": 2388 }, { "epoch": 0.061342821057226445, "grad_norm": 0.8125, "learning_rate": 0.00019860126094947337, "loss": 1.0733, "step": 2389 }, { "epoch": 0.06136849825314827, "grad_norm": 0.88671875, "learning_rate": 0.00019860051679944528, "loss": 1.1093, "step": 2390 }, { "epoch": 0.06139417544907008, "grad_norm": 0.96484375, "learning_rate": 0.00019859977245291529, "loss": 1.3244, "step": 2391 }, { "epoch": 0.061419852644991904, "grad_norm": 0.86328125, "learning_rate": 0.00019859902790988484, "loss": 1.2411, "step": 2392 }, { "epoch": 0.061445529840913726, "grad_norm": 0.8828125, "learning_rate": 0.00019859828317035543, "loss": 1.302, "step": 2393 }, { "epoch": 0.06147120703683554, "grad_norm": 0.8984375, "learning_rate": 0.0001985975382343286, "loss": 1.2498, "step": 2394 }, { "epoch": 0.061496884232757364, "grad_norm": 1.40625, "learning_rate": 0.00019859679310180574, "loss": 1.1964, "step": 2395 }, { "epoch": 0.06152256142867918, "grad_norm": 0.87109375, "learning_rate": 0.0001985960477727884, "loss": 1.3167, "step": 2396 }, { "epoch": 0.061548238624601, "grad_norm": 0.921875, "learning_rate": 0.00019859530224727802, "loss": 1.337, "step": 2397 }, { "epoch": 0.06157391582052282, "grad_norm": 0.80859375, "learning_rate": 0.00019859455652527613, "loss": 1.2062, "step": 2398 }, { "epoch": 0.06159959301644464, "grad_norm": 0.88671875, "learning_rate": 0.00019859381060678417, "loss": 1.2106, "step": 2399 }, { "epoch": 0.06162527021236646, "grad_norm": 0.9765625, "learning_rate": 0.00019859306449180366, "loss": 1.2547, "step": 2400 }, { "epoch": 0.061650947408288276, "grad_norm": 0.89453125, "learning_rate": 0.0001985923181803361, "loss": 1.2912, "step": 2401 }, { "epoch": 0.0616766246042101, "grad_norm": 1.0, "learning_rate": 0.00019859157167238293, "loss": 1.298, "step": 2402 }, { "epoch": 0.06170230180013191, "grad_norm": 1.171875, "learning_rate": 0.0001985908249679457, "loss": 1.5087, "step": 2403 }, { "epoch": 0.061727978996053735, "grad_norm": 1.0, "learning_rate": 0.0001985900780670258, "loss": 1.2972, "step": 2404 }, { "epoch": 0.06175365619197556, "grad_norm": 0.9375, "learning_rate": 0.00019858933096962483, "loss": 1.435, "step": 2405 }, { "epoch": 0.06177933338789737, "grad_norm": 0.8671875, "learning_rate": 0.00019858858367574421, "loss": 1.3473, "step": 2406 }, { "epoch": 0.061805010583819195, "grad_norm": 1.0078125, "learning_rate": 0.00019858783618538546, "loss": 1.3174, "step": 2407 }, { "epoch": 0.06183068777974101, "grad_norm": 0.9296875, "learning_rate": 0.00019858708849855003, "loss": 1.2668, "step": 2408 }, { "epoch": 0.06185636497566283, "grad_norm": 0.89453125, "learning_rate": 0.00019858634061523946, "loss": 1.3767, "step": 2409 }, { "epoch": 0.061882042171584654, "grad_norm": 0.96484375, "learning_rate": 0.0001985855925354552, "loss": 1.3085, "step": 2410 }, { "epoch": 0.06190771936750647, "grad_norm": 0.9375, "learning_rate": 0.0001985848442591988, "loss": 1.2159, "step": 2411 }, { "epoch": 0.06193339656342829, "grad_norm": 0.94921875, "learning_rate": 0.0001985840957864717, "loss": 1.2202, "step": 2412 }, { "epoch": 0.06195907375935011, "grad_norm": 0.98828125, "learning_rate": 0.00019858334711727539, "loss": 1.2913, "step": 2413 }, { "epoch": 0.06198475095527193, "grad_norm": 0.953125, "learning_rate": 0.00019858259825161135, "loss": 1.4344, "step": 2414 }, { "epoch": 0.06201042815119375, "grad_norm": 0.890625, "learning_rate": 0.00019858184918948113, "loss": 1.2394, "step": 2415 }, { "epoch": 0.06203610534711557, "grad_norm": 0.8515625, "learning_rate": 0.00019858109993088619, "loss": 1.3483, "step": 2416 }, { "epoch": 0.06206178254303739, "grad_norm": 0.96484375, "learning_rate": 0.00019858035047582798, "loss": 1.1542, "step": 2417 }, { "epoch": 0.062087459738959204, "grad_norm": 0.83984375, "learning_rate": 0.00019857960082430808, "loss": 1.1704, "step": 2418 }, { "epoch": 0.062113136934881026, "grad_norm": 0.8125, "learning_rate": 0.00019857885097632793, "loss": 1.1617, "step": 2419 }, { "epoch": 0.06213881413080285, "grad_norm": 0.8828125, "learning_rate": 0.00019857810093188905, "loss": 1.1978, "step": 2420 }, { "epoch": 0.062164491326724663, "grad_norm": 0.93359375, "learning_rate": 0.0001985773506909929, "loss": 1.3687, "step": 2421 }, { "epoch": 0.062190168522646486, "grad_norm": 0.9140625, "learning_rate": 0.00019857660025364099, "loss": 1.3078, "step": 2422 }, { "epoch": 0.0622158457185683, "grad_norm": 0.96484375, "learning_rate": 0.0001985758496198348, "loss": 1.1902, "step": 2423 }, { "epoch": 0.06224152291449012, "grad_norm": 0.83984375, "learning_rate": 0.0001985750987895759, "loss": 1.2965, "step": 2424 }, { "epoch": 0.062267200110411945, "grad_norm": 0.890625, "learning_rate": 0.0001985743477628657, "loss": 1.288, "step": 2425 }, { "epoch": 0.06229287730633376, "grad_norm": 0.87109375, "learning_rate": 0.00019857359653970572, "loss": 1.271, "step": 2426 }, { "epoch": 0.06231855450225558, "grad_norm": 0.92578125, "learning_rate": 0.0001985728451200975, "loss": 1.259, "step": 2427 }, { "epoch": 0.0623442316981774, "grad_norm": 0.921875, "learning_rate": 0.00019857209350404248, "loss": 1.254, "step": 2428 }, { "epoch": 0.06236990889409922, "grad_norm": 0.85546875, "learning_rate": 0.00019857134169154217, "loss": 1.1353, "step": 2429 }, { "epoch": 0.06239558609002104, "grad_norm": 0.94140625, "learning_rate": 0.0001985705896825981, "loss": 1.2271, "step": 2430 }, { "epoch": 0.06242126328594286, "grad_norm": 0.93359375, "learning_rate": 0.00019856983747721174, "loss": 1.3734, "step": 2431 }, { "epoch": 0.06244694048186468, "grad_norm": 0.8359375, "learning_rate": 0.00019856908507538462, "loss": 1.072, "step": 2432 }, { "epoch": 0.062472617677786495, "grad_norm": 0.84765625, "learning_rate": 0.0001985683324771182, "loss": 1.1394, "step": 2433 }, { "epoch": 0.06249829487370832, "grad_norm": 0.91796875, "learning_rate": 0.00019856757968241397, "loss": 1.2484, "step": 2434 }, { "epoch": 0.06252397206963013, "grad_norm": 0.95703125, "learning_rate": 0.0001985668266912735, "loss": 1.3193, "step": 2435 }, { "epoch": 0.06254964926555195, "grad_norm": 0.92578125, "learning_rate": 0.00019856607350369823, "loss": 1.35, "step": 2436 }, { "epoch": 0.06257532646147378, "grad_norm": 0.875, "learning_rate": 0.00019856532011968967, "loss": 1.2797, "step": 2437 }, { "epoch": 0.0626010036573956, "grad_norm": 0.91796875, "learning_rate": 0.00019856456653924933, "loss": 1.1851, "step": 2438 }, { "epoch": 0.0626266808533174, "grad_norm": 0.83984375, "learning_rate": 0.00019856381276237874, "loss": 1.2445, "step": 2439 }, { "epoch": 0.06265235804923923, "grad_norm": 1.0, "learning_rate": 0.00019856305878907936, "loss": 1.4098, "step": 2440 }, { "epoch": 0.06267803524516105, "grad_norm": 0.83984375, "learning_rate": 0.0001985623046193527, "loss": 1.0661, "step": 2441 }, { "epoch": 0.06270371244108287, "grad_norm": 0.85546875, "learning_rate": 0.00019856155025320025, "loss": 1.2973, "step": 2442 }, { "epoch": 0.0627293896370047, "grad_norm": 0.9453125, "learning_rate": 0.00019856079569062355, "loss": 1.329, "step": 2443 }, { "epoch": 0.0627550668329265, "grad_norm": 0.875, "learning_rate": 0.00019856004093162413, "loss": 1.3051, "step": 2444 }, { "epoch": 0.06278074402884833, "grad_norm": 0.83984375, "learning_rate": 0.0001985592859762034, "loss": 1.3607, "step": 2445 }, { "epoch": 0.06280642122477015, "grad_norm": 0.8984375, "learning_rate": 0.00019855853082436292, "loss": 1.3604, "step": 2446 }, { "epoch": 0.06283209842069197, "grad_norm": 0.83203125, "learning_rate": 0.00019855777547610418, "loss": 1.3384, "step": 2447 }, { "epoch": 0.06285777561661379, "grad_norm": 0.96875, "learning_rate": 0.00019855701993142874, "loss": 1.2217, "step": 2448 }, { "epoch": 0.0628834528125356, "grad_norm": 0.875, "learning_rate": 0.00019855626419033805, "loss": 1.1331, "step": 2449 }, { "epoch": 0.06290913000845742, "grad_norm": 0.90234375, "learning_rate": 0.0001985555082528336, "loss": 1.4944, "step": 2450 }, { "epoch": 0.06293480720437925, "grad_norm": 0.90234375, "learning_rate": 0.00019855475211891696, "loss": 1.1773, "step": 2451 }, { "epoch": 0.06296048440030107, "grad_norm": 0.87109375, "learning_rate": 0.00019855399578858957, "loss": 1.1546, "step": 2452 }, { "epoch": 0.06298616159622289, "grad_norm": 0.9140625, "learning_rate": 0.00019855323926185297, "loss": 1.3887, "step": 2453 }, { "epoch": 0.0630118387921447, "grad_norm": 0.875, "learning_rate": 0.0001985524825387087, "loss": 1.2761, "step": 2454 }, { "epoch": 0.06303751598806652, "grad_norm": 0.87109375, "learning_rate": 0.0001985517256191582, "loss": 1.3046, "step": 2455 }, { "epoch": 0.06306319318398834, "grad_norm": 0.9453125, "learning_rate": 0.00019855096850320304, "loss": 1.1865, "step": 2456 }, { "epoch": 0.06308887037991016, "grad_norm": 0.9609375, "learning_rate": 0.00019855021119084468, "loss": 1.4052, "step": 2457 }, { "epoch": 0.06311454757583199, "grad_norm": 0.98828125, "learning_rate": 0.00019854945368208467, "loss": 1.2281, "step": 2458 }, { "epoch": 0.0631402247717538, "grad_norm": 0.91796875, "learning_rate": 0.0001985486959769245, "loss": 1.3421, "step": 2459 }, { "epoch": 0.06316590196767562, "grad_norm": 0.8359375, "learning_rate": 0.00019854793807536566, "loss": 1.2832, "step": 2460 }, { "epoch": 0.06319157916359744, "grad_norm": 0.87890625, "learning_rate": 0.0001985471799774097, "loss": 1.3739, "step": 2461 }, { "epoch": 0.06321725635951926, "grad_norm": 0.921875, "learning_rate": 0.0001985464216830581, "loss": 1.3954, "step": 2462 }, { "epoch": 0.06324293355544108, "grad_norm": 0.8828125, "learning_rate": 0.00019854566319231238, "loss": 1.4157, "step": 2463 }, { "epoch": 0.06326861075136289, "grad_norm": 0.90625, "learning_rate": 0.00019854490450517406, "loss": 1.2791, "step": 2464 }, { "epoch": 0.06329428794728471, "grad_norm": 0.86328125, "learning_rate": 0.00019854414562164464, "loss": 1.3336, "step": 2465 }, { "epoch": 0.06331996514320654, "grad_norm": 0.8203125, "learning_rate": 0.00019854338654172566, "loss": 1.0102, "step": 2466 }, { "epoch": 0.06334564233912836, "grad_norm": 0.89453125, "learning_rate": 0.0001985426272654186, "loss": 1.2214, "step": 2467 }, { "epoch": 0.06337131953505018, "grad_norm": 0.97265625, "learning_rate": 0.000198541867792725, "loss": 1.2247, "step": 2468 }, { "epoch": 0.06339699673097199, "grad_norm": 1.015625, "learning_rate": 0.00019854110812364634, "loss": 1.3206, "step": 2469 }, { "epoch": 0.06342267392689381, "grad_norm": 0.90234375, "learning_rate": 0.00019854034825818415, "loss": 1.3076, "step": 2470 }, { "epoch": 0.06344835112281563, "grad_norm": 0.875, "learning_rate": 0.00019853958819633996, "loss": 1.2237, "step": 2471 }, { "epoch": 0.06347402831873745, "grad_norm": 0.890625, "learning_rate": 0.00019853882793811526, "loss": 1.2567, "step": 2472 }, { "epoch": 0.06349970551465928, "grad_norm": 0.8203125, "learning_rate": 0.0001985380674835116, "loss": 1.102, "step": 2473 }, { "epoch": 0.06352538271058109, "grad_norm": 0.8515625, "learning_rate": 0.00019853730683253043, "loss": 1.2833, "step": 2474 }, { "epoch": 0.06355105990650291, "grad_norm": 0.9375, "learning_rate": 0.00019853654598517336, "loss": 1.2836, "step": 2475 }, { "epoch": 0.06357673710242473, "grad_norm": 0.8359375, "learning_rate": 0.0001985357849414418, "loss": 1.1835, "step": 2476 }, { "epoch": 0.06360241429834655, "grad_norm": 0.7890625, "learning_rate": 0.00019853502370133734, "loss": 1.1182, "step": 2477 }, { "epoch": 0.06362809149426837, "grad_norm": 0.92578125, "learning_rate": 0.00019853426226486151, "loss": 1.3237, "step": 2478 }, { "epoch": 0.06365376869019018, "grad_norm": 0.875, "learning_rate": 0.00019853350063201577, "loss": 1.159, "step": 2479 }, { "epoch": 0.063679445886112, "grad_norm": 0.84375, "learning_rate": 0.00019853273880280167, "loss": 1.1632, "step": 2480 }, { "epoch": 0.06370512308203383, "grad_norm": 0.96484375, "learning_rate": 0.00019853197677722074, "loss": 1.2326, "step": 2481 }, { "epoch": 0.06373080027795565, "grad_norm": 0.9765625, "learning_rate": 0.00019853121455527443, "loss": 1.3331, "step": 2482 }, { "epoch": 0.06375647747387747, "grad_norm": 0.8984375, "learning_rate": 0.00019853045213696433, "loss": 1.3287, "step": 2483 }, { "epoch": 0.06378215466979928, "grad_norm": 0.81640625, "learning_rate": 0.00019852968952229196, "loss": 1.1219, "step": 2484 }, { "epoch": 0.0638078318657211, "grad_norm": 0.796875, "learning_rate": 0.0001985289267112588, "loss": 1.0858, "step": 2485 }, { "epoch": 0.06383350906164292, "grad_norm": 0.94140625, "learning_rate": 0.00019852816370386642, "loss": 1.1566, "step": 2486 }, { "epoch": 0.06385918625756475, "grad_norm": 0.92578125, "learning_rate": 0.00019852740050011627, "loss": 1.3007, "step": 2487 }, { "epoch": 0.06388486345348657, "grad_norm": 0.8828125, "learning_rate": 0.00019852663710000993, "loss": 1.3479, "step": 2488 }, { "epoch": 0.06391054064940838, "grad_norm": 0.93359375, "learning_rate": 0.00019852587350354889, "loss": 1.1577, "step": 2489 }, { "epoch": 0.0639362178453302, "grad_norm": 0.80078125, "learning_rate": 0.0001985251097107347, "loss": 1.1898, "step": 2490 }, { "epoch": 0.06396189504125202, "grad_norm": 0.875, "learning_rate": 0.00019852434572156886, "loss": 1.2055, "step": 2491 }, { "epoch": 0.06398757223717384, "grad_norm": 0.85546875, "learning_rate": 0.0001985235815360529, "loss": 1.1815, "step": 2492 }, { "epoch": 0.06401324943309566, "grad_norm": 0.84765625, "learning_rate": 0.00019852281715418833, "loss": 1.2676, "step": 2493 }, { "epoch": 0.06403892662901747, "grad_norm": 0.84375, "learning_rate": 0.0001985220525759767, "loss": 1.1579, "step": 2494 }, { "epoch": 0.0640646038249393, "grad_norm": 0.92578125, "learning_rate": 0.00019852128780141953, "loss": 1.2977, "step": 2495 }, { "epoch": 0.06409028102086112, "grad_norm": 0.84765625, "learning_rate": 0.0001985205228305183, "loss": 1.2611, "step": 2496 }, { "epoch": 0.06411595821678294, "grad_norm": 0.953125, "learning_rate": 0.0001985197576632746, "loss": 1.2658, "step": 2497 }, { "epoch": 0.06414163541270476, "grad_norm": 0.87109375, "learning_rate": 0.00019851899229968989, "loss": 1.4224, "step": 2498 }, { "epoch": 0.06416731260862657, "grad_norm": 0.859375, "learning_rate": 0.00019851822673976576, "loss": 1.1335, "step": 2499 }, { "epoch": 0.06419298980454839, "grad_norm": 0.8671875, "learning_rate": 0.00019851746098350365, "loss": 1.293, "step": 2500 }, { "epoch": 0.06421866700047021, "grad_norm": 0.9609375, "learning_rate": 0.0001985166950309052, "loss": 1.1102, "step": 2501 }, { "epoch": 0.06424434419639204, "grad_norm": 0.86328125, "learning_rate": 0.00019851592888197185, "loss": 1.2894, "step": 2502 }, { "epoch": 0.06427002139231386, "grad_norm": 0.98046875, "learning_rate": 0.00019851516253670517, "loss": 1.3982, "step": 2503 }, { "epoch": 0.06429569858823567, "grad_norm": 0.9140625, "learning_rate": 0.00019851439599510664, "loss": 1.4808, "step": 2504 }, { "epoch": 0.06432137578415749, "grad_norm": 0.8359375, "learning_rate": 0.00019851362925717782, "loss": 1.0581, "step": 2505 }, { "epoch": 0.06434705298007931, "grad_norm": 0.9375, "learning_rate": 0.00019851286232292026, "loss": 1.3049, "step": 2506 }, { "epoch": 0.06437273017600113, "grad_norm": 0.91015625, "learning_rate": 0.00019851209519233546, "loss": 1.295, "step": 2507 }, { "epoch": 0.06439840737192296, "grad_norm": 1.4140625, "learning_rate": 0.00019851132786542492, "loss": 1.1319, "step": 2508 }, { "epoch": 0.06442408456784476, "grad_norm": 0.8203125, "learning_rate": 0.00019851056034219023, "loss": 1.1906, "step": 2509 }, { "epoch": 0.06444976176376659, "grad_norm": 0.859375, "learning_rate": 0.0001985097926226329, "loss": 1.0929, "step": 2510 }, { "epoch": 0.06447543895968841, "grad_norm": 0.87890625, "learning_rate": 0.00019850902470675444, "loss": 1.112, "step": 2511 }, { "epoch": 0.06450111615561023, "grad_norm": 1.0078125, "learning_rate": 0.00019850825659455638, "loss": 1.332, "step": 2512 }, { "epoch": 0.06452679335153205, "grad_norm": 1.0390625, "learning_rate": 0.00019850748828604027, "loss": 1.3611, "step": 2513 }, { "epoch": 0.06455247054745386, "grad_norm": 0.8359375, "learning_rate": 0.00019850671978120766, "loss": 1.1321, "step": 2514 }, { "epoch": 0.06457814774337568, "grad_norm": 0.87890625, "learning_rate": 0.00019850595108006003, "loss": 1.1118, "step": 2515 }, { "epoch": 0.0646038249392975, "grad_norm": 1.1015625, "learning_rate": 0.00019850518218259895, "loss": 1.4464, "step": 2516 }, { "epoch": 0.06462950213521933, "grad_norm": 0.890625, "learning_rate": 0.00019850441308882593, "loss": 1.0672, "step": 2517 }, { "epoch": 0.06465517933114115, "grad_norm": 0.87109375, "learning_rate": 0.0001985036437987425, "loss": 1.3362, "step": 2518 }, { "epoch": 0.06468085652706296, "grad_norm": 0.8984375, "learning_rate": 0.00019850287431235023, "loss": 1.2466, "step": 2519 }, { "epoch": 0.06470653372298478, "grad_norm": 0.93359375, "learning_rate": 0.0001985021046296506, "loss": 1.138, "step": 2520 }, { "epoch": 0.0647322109189066, "grad_norm": 0.86328125, "learning_rate": 0.00019850133475064522, "loss": 1.1753, "step": 2521 }, { "epoch": 0.06475788811482842, "grad_norm": 0.91796875, "learning_rate": 0.00019850056467533552, "loss": 1.2398, "step": 2522 }, { "epoch": 0.06478356531075025, "grad_norm": 1.015625, "learning_rate": 0.00019849979440372313, "loss": 1.3125, "step": 2523 }, { "epoch": 0.06480924250667205, "grad_norm": 0.83984375, "learning_rate": 0.00019849902393580954, "loss": 1.2109, "step": 2524 }, { "epoch": 0.06483491970259388, "grad_norm": 0.87109375, "learning_rate": 0.0001984982532715963, "loss": 1.2724, "step": 2525 }, { "epoch": 0.0648605968985157, "grad_norm": 0.84765625, "learning_rate": 0.00019849748241108494, "loss": 1.2012, "step": 2526 }, { "epoch": 0.06488627409443752, "grad_norm": 0.8828125, "learning_rate": 0.00019849671135427696, "loss": 1.3287, "step": 2527 }, { "epoch": 0.06491195129035934, "grad_norm": 0.9140625, "learning_rate": 0.00019849594010117394, "loss": 1.1965, "step": 2528 }, { "epoch": 0.06493762848628115, "grad_norm": 0.85546875, "learning_rate": 0.00019849516865177744, "loss": 1.2634, "step": 2529 }, { "epoch": 0.06496330568220297, "grad_norm": 0.87890625, "learning_rate": 0.00019849439700608893, "loss": 1.1146, "step": 2530 }, { "epoch": 0.0649889828781248, "grad_norm": 1.171875, "learning_rate": 0.00019849362516411004, "loss": 1.2529, "step": 2531 }, { "epoch": 0.06501466007404662, "grad_norm": 0.95703125, "learning_rate": 0.0001984928531258422, "loss": 1.0763, "step": 2532 }, { "epoch": 0.06504033726996843, "grad_norm": 0.91796875, "learning_rate": 0.000198492080891287, "loss": 1.314, "step": 2533 }, { "epoch": 0.06506601446589025, "grad_norm": 0.94921875, "learning_rate": 0.000198491308460446, "loss": 1.2941, "step": 2534 }, { "epoch": 0.06509169166181207, "grad_norm": 0.96875, "learning_rate": 0.0001984905358333207, "loss": 1.2532, "step": 2535 }, { "epoch": 0.06511736885773389, "grad_norm": 0.82421875, "learning_rate": 0.0001984897630099127, "loss": 1.1975, "step": 2536 }, { "epoch": 0.06514304605365571, "grad_norm": 0.8828125, "learning_rate": 0.00019848898999022348, "loss": 1.0975, "step": 2537 }, { "epoch": 0.06516872324957752, "grad_norm": 0.890625, "learning_rate": 0.0001984882167742546, "loss": 1.3534, "step": 2538 }, { "epoch": 0.06519440044549935, "grad_norm": 0.8828125, "learning_rate": 0.00019848744336200757, "loss": 1.2956, "step": 2539 }, { "epoch": 0.06522007764142117, "grad_norm": 0.9375, "learning_rate": 0.000198486669753484, "loss": 1.2228, "step": 2540 }, { "epoch": 0.06524575483734299, "grad_norm": 0.828125, "learning_rate": 0.00019848589594868537, "loss": 1.2763, "step": 2541 }, { "epoch": 0.06527143203326481, "grad_norm": 0.9296875, "learning_rate": 0.00019848512194761323, "loss": 1.3866, "step": 2542 }, { "epoch": 0.06529710922918662, "grad_norm": 0.953125, "learning_rate": 0.0001984843477502692, "loss": 1.2577, "step": 2543 }, { "epoch": 0.06532278642510844, "grad_norm": 0.890625, "learning_rate": 0.00019848357335665472, "loss": 1.2339, "step": 2544 }, { "epoch": 0.06534846362103026, "grad_norm": 0.90234375, "learning_rate": 0.00019848279876677136, "loss": 1.2251, "step": 2545 }, { "epoch": 0.06537414081695209, "grad_norm": 0.97265625, "learning_rate": 0.0001984820239806207, "loss": 1.2382, "step": 2546 }, { "epoch": 0.06539981801287391, "grad_norm": 0.94140625, "learning_rate": 0.00019848124899820424, "loss": 1.4308, "step": 2547 }, { "epoch": 0.06542549520879572, "grad_norm": 0.9453125, "learning_rate": 0.00019848047381952358, "loss": 1.3339, "step": 2548 }, { "epoch": 0.06545117240471754, "grad_norm": 0.8828125, "learning_rate": 0.00019847969844458022, "loss": 1.3099, "step": 2549 }, { "epoch": 0.06547684960063936, "grad_norm": 0.8359375, "learning_rate": 0.0001984789228733757, "loss": 1.1981, "step": 2550 }, { "epoch": 0.06550252679656118, "grad_norm": 0.84765625, "learning_rate": 0.0001984781471059116, "loss": 1.169, "step": 2551 }, { "epoch": 0.065528203992483, "grad_norm": 0.85546875, "learning_rate": 0.00019847737114218945, "loss": 1.3198, "step": 2552 }, { "epoch": 0.06555388118840481, "grad_norm": 0.859375, "learning_rate": 0.00019847659498221077, "loss": 1.108, "step": 2553 }, { "epoch": 0.06557955838432664, "grad_norm": 0.8984375, "learning_rate": 0.00019847581862597716, "loss": 1.1996, "step": 2554 }, { "epoch": 0.06560523558024846, "grad_norm": 0.9453125, "learning_rate": 0.00019847504207349013, "loss": 1.2712, "step": 2555 }, { "epoch": 0.06563091277617028, "grad_norm": 0.8046875, "learning_rate": 0.0001984742653247512, "loss": 1.2102, "step": 2556 }, { "epoch": 0.0656565899720921, "grad_norm": 0.9453125, "learning_rate": 0.00019847348837976201, "loss": 1.1677, "step": 2557 }, { "epoch": 0.06568226716801391, "grad_norm": 0.88671875, "learning_rate": 0.00019847271123852402, "loss": 1.2714, "step": 2558 }, { "epoch": 0.06570794436393573, "grad_norm": 0.90234375, "learning_rate": 0.0001984719339010388, "loss": 1.1635, "step": 2559 }, { "epoch": 0.06573362155985756, "grad_norm": 0.890625, "learning_rate": 0.00019847115636730794, "loss": 1.4198, "step": 2560 }, { "epoch": 0.06575929875577938, "grad_norm": 0.85546875, "learning_rate": 0.00019847037863733298, "loss": 1.2765, "step": 2561 }, { "epoch": 0.0657849759517012, "grad_norm": 0.9140625, "learning_rate": 0.0001984696007111154, "loss": 1.2182, "step": 2562 }, { "epoch": 0.06581065314762301, "grad_norm": 0.8515625, "learning_rate": 0.00019846882258865684, "loss": 1.3611, "step": 2563 }, { "epoch": 0.06583633034354483, "grad_norm": 0.921875, "learning_rate": 0.00019846804426995878, "loss": 1.1917, "step": 2564 }, { "epoch": 0.06586200753946665, "grad_norm": 0.9296875, "learning_rate": 0.00019846726575502283, "loss": 1.1543, "step": 2565 }, { "epoch": 0.06588768473538847, "grad_norm": 0.90625, "learning_rate": 0.0001984664870438505, "loss": 1.3033, "step": 2566 }, { "epoch": 0.0659133619313103, "grad_norm": 0.89453125, "learning_rate": 0.00019846570813644333, "loss": 1.2841, "step": 2567 }, { "epoch": 0.0659390391272321, "grad_norm": 1.046875, "learning_rate": 0.00019846492903280295, "loss": 1.2552, "step": 2568 }, { "epoch": 0.06596471632315393, "grad_norm": 0.9921875, "learning_rate": 0.00019846414973293084, "loss": 1.2325, "step": 2569 }, { "epoch": 0.06599039351907575, "grad_norm": 0.8515625, "learning_rate": 0.00019846337023682854, "loss": 1.2113, "step": 2570 }, { "epoch": 0.06601607071499757, "grad_norm": 0.8828125, "learning_rate": 0.00019846259054449766, "loss": 1.3021, "step": 2571 }, { "epoch": 0.0660417479109194, "grad_norm": 0.90625, "learning_rate": 0.00019846181065593976, "loss": 1.2946, "step": 2572 }, { "epoch": 0.0660674251068412, "grad_norm": 0.98828125, "learning_rate": 0.00019846103057115635, "loss": 1.134, "step": 2573 }, { "epoch": 0.06609310230276302, "grad_norm": 0.921875, "learning_rate": 0.00019846025029014898, "loss": 1.2628, "step": 2574 }, { "epoch": 0.06611877949868485, "grad_norm": 0.84765625, "learning_rate": 0.00019845946981291926, "loss": 1.1249, "step": 2575 }, { "epoch": 0.06614445669460667, "grad_norm": 0.8515625, "learning_rate": 0.00019845868913946868, "loss": 1.2511, "step": 2576 }, { "epoch": 0.06617013389052849, "grad_norm": 0.8359375, "learning_rate": 0.0001984579082697988, "loss": 1.2161, "step": 2577 }, { "epoch": 0.0661958110864503, "grad_norm": 0.89453125, "learning_rate": 0.00019845712720391125, "loss": 1.3211, "step": 2578 }, { "epoch": 0.06622148828237212, "grad_norm": 0.8671875, "learning_rate": 0.00019845634594180752, "loss": 1.2536, "step": 2579 }, { "epoch": 0.06624716547829394, "grad_norm": 0.86328125, "learning_rate": 0.00019845556448348922, "loss": 1.3096, "step": 2580 }, { "epoch": 0.06627284267421577, "grad_norm": 0.94921875, "learning_rate": 0.00019845478282895783, "loss": 1.1871, "step": 2581 }, { "epoch": 0.06629851987013759, "grad_norm": 0.89453125, "learning_rate": 0.000198454000978215, "loss": 1.3047, "step": 2582 }, { "epoch": 0.0663241970660594, "grad_norm": 0.86328125, "learning_rate": 0.0001984532189312622, "loss": 1.156, "step": 2583 }, { "epoch": 0.06634987426198122, "grad_norm": 0.9140625, "learning_rate": 0.000198452436688101, "loss": 1.3385, "step": 2584 }, { "epoch": 0.06637555145790304, "grad_norm": 0.9140625, "learning_rate": 0.00019845165424873303, "loss": 1.195, "step": 2585 }, { "epoch": 0.06640122865382486, "grad_norm": 0.90234375, "learning_rate": 0.0001984508716131598, "loss": 1.1762, "step": 2586 }, { "epoch": 0.06642690584974668, "grad_norm": 0.9140625, "learning_rate": 0.00019845008878138288, "loss": 1.1865, "step": 2587 }, { "epoch": 0.06645258304566849, "grad_norm": 0.83984375, "learning_rate": 0.0001984493057534038, "loss": 1.2385, "step": 2588 }, { "epoch": 0.06647826024159031, "grad_norm": 0.93359375, "learning_rate": 0.00019844852252922418, "loss": 1.206, "step": 2589 }, { "epoch": 0.06650393743751214, "grad_norm": 0.92578125, "learning_rate": 0.00019844773910884553, "loss": 1.1809, "step": 2590 }, { "epoch": 0.06652961463343396, "grad_norm": 0.87890625, "learning_rate": 0.00019844695549226944, "loss": 1.2973, "step": 2591 }, { "epoch": 0.06655529182935578, "grad_norm": 0.89453125, "learning_rate": 0.00019844617167949746, "loss": 1.1044, "step": 2592 }, { "epoch": 0.06658096902527759, "grad_norm": 0.88671875, "learning_rate": 0.00019844538767053112, "loss": 1.309, "step": 2593 }, { "epoch": 0.06660664622119941, "grad_norm": 0.8828125, "learning_rate": 0.00019844460346537206, "loss": 1.3412, "step": 2594 }, { "epoch": 0.06663232341712123, "grad_norm": 0.85546875, "learning_rate": 0.00019844381906402176, "loss": 1.2417, "step": 2595 }, { "epoch": 0.06665800061304306, "grad_norm": 0.9453125, "learning_rate": 0.00019844303446648186, "loss": 1.2381, "step": 2596 }, { "epoch": 0.06668367780896488, "grad_norm": 0.85546875, "learning_rate": 0.00019844224967275383, "loss": 1.2439, "step": 2597 }, { "epoch": 0.06670935500488669, "grad_norm": 0.890625, "learning_rate": 0.00019844146468283932, "loss": 1.3912, "step": 2598 }, { "epoch": 0.06673503220080851, "grad_norm": 0.98828125, "learning_rate": 0.0001984406794967399, "loss": 1.3404, "step": 2599 }, { "epoch": 0.06676070939673033, "grad_norm": 0.88671875, "learning_rate": 0.00019843989411445705, "loss": 1.4781, "step": 2600 }, { "epoch": 0.06678638659265215, "grad_norm": 0.96875, "learning_rate": 0.00019843910853599238, "loss": 1.2234, "step": 2601 }, { "epoch": 0.06681206378857397, "grad_norm": 0.7734375, "learning_rate": 0.0001984383227613475, "loss": 1.2055, "step": 2602 }, { "epoch": 0.06683774098449578, "grad_norm": 0.87890625, "learning_rate": 0.00019843753679052388, "loss": 1.3055, "step": 2603 }, { "epoch": 0.0668634181804176, "grad_norm": 0.96484375, "learning_rate": 0.00019843675062352319, "loss": 1.2958, "step": 2604 }, { "epoch": 0.06688909537633943, "grad_norm": 0.87890625, "learning_rate": 0.00019843596426034692, "loss": 1.19, "step": 2605 }, { "epoch": 0.06691477257226125, "grad_norm": 0.90234375, "learning_rate": 0.0001984351777009967, "loss": 1.387, "step": 2606 }, { "epoch": 0.06694044976818307, "grad_norm": 0.87890625, "learning_rate": 0.000198434390945474, "loss": 1.253, "step": 2607 }, { "epoch": 0.06696612696410488, "grad_norm": 1.015625, "learning_rate": 0.0001984336039937805, "loss": 1.1235, "step": 2608 }, { "epoch": 0.0669918041600267, "grad_norm": 0.87109375, "learning_rate": 0.0001984328168459177, "loss": 1.2749, "step": 2609 }, { "epoch": 0.06701748135594852, "grad_norm": 0.8515625, "learning_rate": 0.0001984320295018872, "loss": 1.2765, "step": 2610 }, { "epoch": 0.06704315855187035, "grad_norm": 0.84375, "learning_rate": 0.00019843124196169054, "loss": 1.2251, "step": 2611 }, { "epoch": 0.06706883574779217, "grad_norm": 0.82421875, "learning_rate": 0.00019843045422532932, "loss": 1.1463, "step": 2612 }, { "epoch": 0.06709451294371398, "grad_norm": 0.86328125, "learning_rate": 0.00019842966629280508, "loss": 1.0884, "step": 2613 }, { "epoch": 0.0671201901396358, "grad_norm": 0.91015625, "learning_rate": 0.00019842887816411942, "loss": 1.2868, "step": 2614 }, { "epoch": 0.06714586733555762, "grad_norm": 0.90625, "learning_rate": 0.0001984280898392739, "loss": 1.1814, "step": 2615 }, { "epoch": 0.06717154453147944, "grad_norm": 0.88671875, "learning_rate": 0.00019842730131827007, "loss": 1.2702, "step": 2616 }, { "epoch": 0.06719722172740127, "grad_norm": 0.89453125, "learning_rate": 0.0001984265126011095, "loss": 1.2337, "step": 2617 }, { "epoch": 0.06722289892332307, "grad_norm": 0.84765625, "learning_rate": 0.00019842572368779384, "loss": 1.0591, "step": 2618 }, { "epoch": 0.0672485761192449, "grad_norm": 0.90234375, "learning_rate": 0.00019842493457832458, "loss": 1.1172, "step": 2619 }, { "epoch": 0.06727425331516672, "grad_norm": 0.95703125, "learning_rate": 0.00019842414527270331, "loss": 1.2634, "step": 2620 }, { "epoch": 0.06729993051108854, "grad_norm": 0.86328125, "learning_rate": 0.0001984233557709316, "loss": 1.154, "step": 2621 }, { "epoch": 0.06732560770701036, "grad_norm": 0.90234375, "learning_rate": 0.00019842256607301108, "loss": 1.2741, "step": 2622 }, { "epoch": 0.06735128490293217, "grad_norm": 0.9296875, "learning_rate": 0.00019842177617894322, "loss": 1.2866, "step": 2623 }, { "epoch": 0.06737696209885399, "grad_norm": 0.8984375, "learning_rate": 0.00019842098608872965, "loss": 1.2716, "step": 2624 }, { "epoch": 0.06740263929477582, "grad_norm": 0.890625, "learning_rate": 0.00019842019580237197, "loss": 1.2493, "step": 2625 }, { "epoch": 0.06742831649069764, "grad_norm": 0.92578125, "learning_rate": 0.0001984194053198717, "loss": 1.3161, "step": 2626 }, { "epoch": 0.06745399368661946, "grad_norm": 0.9375, "learning_rate": 0.00019841861464123047, "loss": 1.2649, "step": 2627 }, { "epoch": 0.06747967088254127, "grad_norm": 0.89453125, "learning_rate": 0.00019841782376644985, "loss": 1.2744, "step": 2628 }, { "epoch": 0.06750534807846309, "grad_norm": 0.91015625, "learning_rate": 0.0001984170326955314, "loss": 1.1168, "step": 2629 }, { "epoch": 0.06753102527438491, "grad_norm": 0.890625, "learning_rate": 0.00019841624142847666, "loss": 1.0659, "step": 2630 }, { "epoch": 0.06755670247030673, "grad_norm": 0.8828125, "learning_rate": 0.00019841544996528724, "loss": 1.3606, "step": 2631 }, { "epoch": 0.06758237966622856, "grad_norm": 0.83984375, "learning_rate": 0.00019841465830596473, "loss": 1.0744, "step": 2632 }, { "epoch": 0.06760805686215036, "grad_norm": 0.92578125, "learning_rate": 0.0001984138664505107, "loss": 1.1769, "step": 2633 }, { "epoch": 0.06763373405807219, "grad_norm": 0.84765625, "learning_rate": 0.0001984130743989267, "loss": 1.4478, "step": 2634 }, { "epoch": 0.06765941125399401, "grad_norm": 0.97265625, "learning_rate": 0.0001984122821512144, "loss": 1.2657, "step": 2635 }, { "epoch": 0.06768508844991583, "grad_norm": 0.85546875, "learning_rate": 0.00019841148970737525, "loss": 1.2108, "step": 2636 }, { "epoch": 0.06771076564583764, "grad_norm": 0.84765625, "learning_rate": 0.0001984106970674109, "loss": 1.1042, "step": 2637 }, { "epoch": 0.06773644284175946, "grad_norm": 0.87109375, "learning_rate": 0.0001984099042313229, "loss": 1.238, "step": 2638 }, { "epoch": 0.06776212003768128, "grad_norm": 0.984375, "learning_rate": 0.00019840911119911288, "loss": 1.2059, "step": 2639 }, { "epoch": 0.0677877972336031, "grad_norm": 0.91015625, "learning_rate": 0.0001984083179707824, "loss": 1.2614, "step": 2640 }, { "epoch": 0.06781347442952493, "grad_norm": 0.91796875, "learning_rate": 0.000198407524546333, "loss": 1.2741, "step": 2641 }, { "epoch": 0.06783915162544674, "grad_norm": 0.9140625, "learning_rate": 0.00019840673092576632, "loss": 1.1073, "step": 2642 }, { "epoch": 0.06786482882136856, "grad_norm": 0.9765625, "learning_rate": 0.0001984059371090839, "loss": 1.3456, "step": 2643 }, { "epoch": 0.06789050601729038, "grad_norm": 0.97265625, "learning_rate": 0.00019840514309628734, "loss": 1.2727, "step": 2644 }, { "epoch": 0.0679161832132122, "grad_norm": 1.0234375, "learning_rate": 0.00019840434888737822, "loss": 1.2734, "step": 2645 }, { "epoch": 0.06794186040913403, "grad_norm": 0.92578125, "learning_rate": 0.00019840355448235813, "loss": 1.1624, "step": 2646 }, { "epoch": 0.06796753760505583, "grad_norm": 0.94921875, "learning_rate": 0.00019840275988122863, "loss": 1.5552, "step": 2647 }, { "epoch": 0.06799321480097766, "grad_norm": 0.953125, "learning_rate": 0.0001984019650839913, "loss": 1.2936, "step": 2648 }, { "epoch": 0.06801889199689948, "grad_norm": 0.8671875, "learning_rate": 0.0001984011700906478, "loss": 1.2829, "step": 2649 }, { "epoch": 0.0680445691928213, "grad_norm": 0.9296875, "learning_rate": 0.0001984003749011996, "loss": 1.3205, "step": 2650 }, { "epoch": 0.06807024638874312, "grad_norm": 0.94140625, "learning_rate": 0.00019839957951564837, "loss": 1.3008, "step": 2651 }, { "epoch": 0.06809592358466493, "grad_norm": 0.91796875, "learning_rate": 0.00019839878393399567, "loss": 1.4079, "step": 2652 }, { "epoch": 0.06812160078058675, "grad_norm": 0.88671875, "learning_rate": 0.00019839798815624306, "loss": 1.1295, "step": 2653 }, { "epoch": 0.06814727797650857, "grad_norm": 0.90625, "learning_rate": 0.00019839719218239216, "loss": 1.4476, "step": 2654 }, { "epoch": 0.0681729551724304, "grad_norm": 0.8984375, "learning_rate": 0.00019839639601244456, "loss": 1.3034, "step": 2655 }, { "epoch": 0.06819863236835222, "grad_norm": 0.9140625, "learning_rate": 0.0001983955996464018, "loss": 1.1643, "step": 2656 }, { "epoch": 0.06822430956427403, "grad_norm": 0.8203125, "learning_rate": 0.0001983948030842655, "loss": 1.0973, "step": 2657 }, { "epoch": 0.06824998676019585, "grad_norm": 0.94140625, "learning_rate": 0.00019839400632603725, "loss": 1.2163, "step": 2658 }, { "epoch": 0.06827566395611767, "grad_norm": 0.875, "learning_rate": 0.00019839320937171866, "loss": 1.1652, "step": 2659 }, { "epoch": 0.0683013411520395, "grad_norm": 0.91015625, "learning_rate": 0.00019839241222131127, "loss": 1.2527, "step": 2660 }, { "epoch": 0.06832701834796132, "grad_norm": 0.87109375, "learning_rate": 0.0001983916148748167, "loss": 1.1421, "step": 2661 }, { "epoch": 0.06835269554388312, "grad_norm": 0.90625, "learning_rate": 0.00019839081733223654, "loss": 1.2499, "step": 2662 }, { "epoch": 0.06837837273980495, "grad_norm": 0.890625, "learning_rate": 0.00019839001959357232, "loss": 1.2608, "step": 2663 }, { "epoch": 0.06840404993572677, "grad_norm": 0.91796875, "learning_rate": 0.00019838922165882572, "loss": 1.2241, "step": 2664 }, { "epoch": 0.06842972713164859, "grad_norm": 0.8125, "learning_rate": 0.0001983884235279983, "loss": 1.1213, "step": 2665 }, { "epoch": 0.06845540432757041, "grad_norm": 0.80078125, "learning_rate": 0.0001983876252010916, "loss": 1.0645, "step": 2666 }, { "epoch": 0.06848108152349222, "grad_norm": 0.97265625, "learning_rate": 0.00019838682667810726, "loss": 1.2441, "step": 2667 }, { "epoch": 0.06850675871941404, "grad_norm": 0.87109375, "learning_rate": 0.00019838602795904686, "loss": 1.2493, "step": 2668 }, { "epoch": 0.06853243591533587, "grad_norm": 0.91015625, "learning_rate": 0.00019838522904391202, "loss": 1.2599, "step": 2669 }, { "epoch": 0.06855811311125769, "grad_norm": 0.9375, "learning_rate": 0.00019838442993270428, "loss": 1.1604, "step": 2670 }, { "epoch": 0.06858379030717951, "grad_norm": 1.0703125, "learning_rate": 0.00019838363062542525, "loss": 1.3253, "step": 2671 }, { "epoch": 0.06860946750310132, "grad_norm": 0.828125, "learning_rate": 0.00019838283112207656, "loss": 1.0599, "step": 2672 }, { "epoch": 0.06863514469902314, "grad_norm": 0.96875, "learning_rate": 0.00019838203142265976, "loss": 1.3177, "step": 2673 }, { "epoch": 0.06866082189494496, "grad_norm": 0.87109375, "learning_rate": 0.00019838123152717647, "loss": 1.1481, "step": 2674 }, { "epoch": 0.06868649909086678, "grad_norm": 0.8984375, "learning_rate": 0.00019838043143562825, "loss": 1.286, "step": 2675 }, { "epoch": 0.0687121762867886, "grad_norm": 0.98046875, "learning_rate": 0.00019837963114801674, "loss": 1.3614, "step": 2676 }, { "epoch": 0.06873785348271041, "grad_norm": 0.93359375, "learning_rate": 0.0001983788306643435, "loss": 1.2611, "step": 2677 }, { "epoch": 0.06876353067863224, "grad_norm": 0.80859375, "learning_rate": 0.00019837802998461012, "loss": 1.2055, "step": 2678 }, { "epoch": 0.06878920787455406, "grad_norm": 0.91015625, "learning_rate": 0.00019837722910881828, "loss": 1.3022, "step": 2679 }, { "epoch": 0.06881488507047588, "grad_norm": 0.90625, "learning_rate": 0.00019837642803696942, "loss": 1.448, "step": 2680 }, { "epoch": 0.0688405622663977, "grad_norm": 0.9609375, "learning_rate": 0.00019837562676906526, "loss": 1.2462, "step": 2681 }, { "epoch": 0.06886623946231951, "grad_norm": 0.83984375, "learning_rate": 0.00019837482530510737, "loss": 1.1572, "step": 2682 }, { "epoch": 0.06889191665824133, "grad_norm": 0.93359375, "learning_rate": 0.00019837402364509735, "loss": 1.3512, "step": 2683 }, { "epoch": 0.06891759385416316, "grad_norm": 0.95703125, "learning_rate": 0.00019837322178903677, "loss": 1.2779, "step": 2684 }, { "epoch": 0.06894327105008498, "grad_norm": 0.91796875, "learning_rate": 0.00019837241973692723, "loss": 1.1687, "step": 2685 }, { "epoch": 0.0689689482460068, "grad_norm": 0.84765625, "learning_rate": 0.00019837161748877038, "loss": 1.1639, "step": 2686 }, { "epoch": 0.06899462544192861, "grad_norm": 0.88671875, "learning_rate": 0.00019837081504456777, "loss": 1.1801, "step": 2687 }, { "epoch": 0.06902030263785043, "grad_norm": 0.8828125, "learning_rate": 0.000198370012404321, "loss": 1.3209, "step": 2688 }, { "epoch": 0.06904597983377225, "grad_norm": 0.8359375, "learning_rate": 0.00019836920956803165, "loss": 1.276, "step": 2689 }, { "epoch": 0.06907165702969408, "grad_norm": 0.82421875, "learning_rate": 0.0001983684065357014, "loss": 1.1796, "step": 2690 }, { "epoch": 0.0690973342256159, "grad_norm": 0.91015625, "learning_rate": 0.0001983676033073318, "loss": 1.3116, "step": 2691 }, { "epoch": 0.0691230114215377, "grad_norm": 0.87109375, "learning_rate": 0.00019836679988292444, "loss": 1.3379, "step": 2692 }, { "epoch": 0.06914868861745953, "grad_norm": 0.890625, "learning_rate": 0.00019836599626248095, "loss": 1.134, "step": 2693 }, { "epoch": 0.06917436581338135, "grad_norm": 0.875, "learning_rate": 0.0001983651924460029, "loss": 1.2175, "step": 2694 }, { "epoch": 0.06920004300930317, "grad_norm": 0.88671875, "learning_rate": 0.0001983643884334919, "loss": 1.0849, "step": 2695 }, { "epoch": 0.069225720205225, "grad_norm": 0.90625, "learning_rate": 0.00019836358422494957, "loss": 1.2731, "step": 2696 }, { "epoch": 0.0692513974011468, "grad_norm": 0.91015625, "learning_rate": 0.00019836277982037748, "loss": 1.1828, "step": 2697 }, { "epoch": 0.06927707459706862, "grad_norm": 0.84765625, "learning_rate": 0.00019836197521977725, "loss": 1.1325, "step": 2698 }, { "epoch": 0.06930275179299045, "grad_norm": 0.94921875, "learning_rate": 0.00019836117042315053, "loss": 1.2362, "step": 2699 }, { "epoch": 0.06932842898891227, "grad_norm": 0.9140625, "learning_rate": 0.00019836036543049884, "loss": 1.377, "step": 2700 }, { "epoch": 0.06935410618483409, "grad_norm": 0.953125, "learning_rate": 0.00019835956024182388, "loss": 1.0676, "step": 2701 }, { "epoch": 0.0693797833807559, "grad_norm": 0.8828125, "learning_rate": 0.00019835875485712715, "loss": 1.0473, "step": 2702 }, { "epoch": 0.06940546057667772, "grad_norm": 0.984375, "learning_rate": 0.00019835794927641032, "loss": 1.3782, "step": 2703 }, { "epoch": 0.06943113777259954, "grad_norm": 0.88671875, "learning_rate": 0.000198357143499675, "loss": 1.3416, "step": 2704 }, { "epoch": 0.06945681496852137, "grad_norm": 0.92578125, "learning_rate": 0.00019835633752692276, "loss": 1.3109, "step": 2705 }, { "epoch": 0.06948249216444319, "grad_norm": 0.875, "learning_rate": 0.00019835553135815522, "loss": 1.2434, "step": 2706 }, { "epoch": 0.069508169360365, "grad_norm": 0.9296875, "learning_rate": 0.00019835472499337402, "loss": 1.1841, "step": 2707 }, { "epoch": 0.06953384655628682, "grad_norm": 0.81640625, "learning_rate": 0.0001983539184325807, "loss": 1.217, "step": 2708 }, { "epoch": 0.06955952375220864, "grad_norm": 0.81640625, "learning_rate": 0.00019835311167577696, "loss": 1.3279, "step": 2709 }, { "epoch": 0.06958520094813046, "grad_norm": 0.875, "learning_rate": 0.00019835230472296428, "loss": 1.1702, "step": 2710 }, { "epoch": 0.06961087814405229, "grad_norm": 0.828125, "learning_rate": 0.00019835149757414436, "loss": 1.1429, "step": 2711 }, { "epoch": 0.0696365553399741, "grad_norm": 0.91015625, "learning_rate": 0.00019835069022931882, "loss": 1.3895, "step": 2712 }, { "epoch": 0.06966223253589592, "grad_norm": 0.8515625, "learning_rate": 0.00019834988268848923, "loss": 1.2791, "step": 2713 }, { "epoch": 0.06968790973181774, "grad_norm": 0.92578125, "learning_rate": 0.0001983490749516572, "loss": 1.1623, "step": 2714 }, { "epoch": 0.06971358692773956, "grad_norm": 0.89453125, "learning_rate": 0.00019834826701882436, "loss": 1.1019, "step": 2715 }, { "epoch": 0.06973926412366138, "grad_norm": 0.90625, "learning_rate": 0.00019834745888999228, "loss": 1.2098, "step": 2716 }, { "epoch": 0.06976494131958319, "grad_norm": 0.8828125, "learning_rate": 0.00019834665056516263, "loss": 1.3257, "step": 2717 }, { "epoch": 0.06979061851550501, "grad_norm": 0.90234375, "learning_rate": 0.00019834584204433698, "loss": 1.1319, "step": 2718 }, { "epoch": 0.06981629571142683, "grad_norm": 0.8828125, "learning_rate": 0.00019834503332751693, "loss": 1.1939, "step": 2719 }, { "epoch": 0.06984197290734866, "grad_norm": 0.94921875, "learning_rate": 0.00019834422441470412, "loss": 1.2615, "step": 2720 }, { "epoch": 0.06986765010327048, "grad_norm": 0.84765625, "learning_rate": 0.00019834341530590015, "loss": 1.1781, "step": 2721 }, { "epoch": 0.06989332729919229, "grad_norm": 1.015625, "learning_rate": 0.00019834260600110667, "loss": 1.2718, "step": 2722 }, { "epoch": 0.06991900449511411, "grad_norm": 0.8984375, "learning_rate": 0.00019834179650032524, "loss": 1.2442, "step": 2723 }, { "epoch": 0.06994468169103593, "grad_norm": 0.85546875, "learning_rate": 0.00019834098680355746, "loss": 1.1262, "step": 2724 }, { "epoch": 0.06997035888695775, "grad_norm": 0.8984375, "learning_rate": 0.000198340176910805, "loss": 1.1685, "step": 2725 }, { "epoch": 0.06999603608287958, "grad_norm": 0.9921875, "learning_rate": 0.00019833936682206948, "loss": 1.2928, "step": 2726 }, { "epoch": 0.07002171327880138, "grad_norm": 0.94921875, "learning_rate": 0.00019833855653735244, "loss": 1.2885, "step": 2727 }, { "epoch": 0.0700473904747232, "grad_norm": 0.84765625, "learning_rate": 0.00019833774605665556, "loss": 1.0235, "step": 2728 }, { "epoch": 0.07007306767064503, "grad_norm": 0.8359375, "learning_rate": 0.00019833693537998044, "loss": 1.3848, "step": 2729 }, { "epoch": 0.07009874486656685, "grad_norm": 0.94140625, "learning_rate": 0.00019833612450732868, "loss": 1.2387, "step": 2730 }, { "epoch": 0.07012442206248867, "grad_norm": 0.9140625, "learning_rate": 0.00019833531343870192, "loss": 1.3113, "step": 2731 }, { "epoch": 0.07015009925841048, "grad_norm": 0.859375, "learning_rate": 0.00019833450217410174, "loss": 1.261, "step": 2732 }, { "epoch": 0.0701757764543323, "grad_norm": 0.79296875, "learning_rate": 0.0001983336907135298, "loss": 1.2007, "step": 2733 }, { "epoch": 0.07020145365025413, "grad_norm": 0.83984375, "learning_rate": 0.0001983328790569877, "loss": 1.3723, "step": 2734 }, { "epoch": 0.07022713084617595, "grad_norm": 0.81640625, "learning_rate": 0.00019833206720447705, "loss": 1.2698, "step": 2735 }, { "epoch": 0.07025280804209777, "grad_norm": 1.0703125, "learning_rate": 0.00019833125515599946, "loss": 1.1595, "step": 2736 }, { "epoch": 0.07027848523801958, "grad_norm": 0.9453125, "learning_rate": 0.00019833044291155659, "loss": 1.2174, "step": 2737 }, { "epoch": 0.0703041624339414, "grad_norm": 0.96875, "learning_rate": 0.00019832963047115, "loss": 1.3904, "step": 2738 }, { "epoch": 0.07032983962986322, "grad_norm": 0.88671875, "learning_rate": 0.00019832881783478134, "loss": 1.3716, "step": 2739 }, { "epoch": 0.07035551682578504, "grad_norm": 0.92578125, "learning_rate": 0.0001983280050024522, "loss": 1.2501, "step": 2740 }, { "epoch": 0.07038119402170685, "grad_norm": 0.91796875, "learning_rate": 0.00019832719197416428, "loss": 1.1694, "step": 2741 }, { "epoch": 0.07040687121762867, "grad_norm": 0.79296875, "learning_rate": 0.00019832637874991912, "loss": 1.2991, "step": 2742 }, { "epoch": 0.0704325484135505, "grad_norm": 0.84375, "learning_rate": 0.0001983255653297184, "loss": 1.1516, "step": 2743 }, { "epoch": 0.07045822560947232, "grad_norm": 0.84375, "learning_rate": 0.00019832475171356368, "loss": 1.3292, "step": 2744 }, { "epoch": 0.07048390280539414, "grad_norm": 0.86328125, "learning_rate": 0.00019832393790145662, "loss": 1.2701, "step": 2745 }, { "epoch": 0.07050958000131595, "grad_norm": 1.015625, "learning_rate": 0.00019832312389339885, "loss": 1.1789, "step": 2746 }, { "epoch": 0.07053525719723777, "grad_norm": 0.859375, "learning_rate": 0.00019832230968939193, "loss": 1.2139, "step": 2747 }, { "epoch": 0.0705609343931596, "grad_norm": 1.0078125, "learning_rate": 0.00019832149528943758, "loss": 1.3393, "step": 2748 }, { "epoch": 0.07058661158908142, "grad_norm": 0.921875, "learning_rate": 0.00019832068069353736, "loss": 1.272, "step": 2749 }, { "epoch": 0.07061228878500324, "grad_norm": 1.0, "learning_rate": 0.0001983198659016929, "loss": 1.2665, "step": 2750 }, { "epoch": 0.07063796598092505, "grad_norm": 0.98828125, "learning_rate": 0.00019831905091390582, "loss": 1.1569, "step": 2751 }, { "epoch": 0.07066364317684687, "grad_norm": 0.875, "learning_rate": 0.00019831823573017774, "loss": 1.3752, "step": 2752 }, { "epoch": 0.07068932037276869, "grad_norm": 0.8671875, "learning_rate": 0.0001983174203505103, "loss": 1.3193, "step": 2753 }, { "epoch": 0.07071499756869051, "grad_norm": 0.91015625, "learning_rate": 0.00019831660477490515, "loss": 1.2465, "step": 2754 }, { "epoch": 0.07074067476461234, "grad_norm": 0.953125, "learning_rate": 0.00019831578900336388, "loss": 1.2383, "step": 2755 }, { "epoch": 0.07076635196053414, "grad_norm": 0.890625, "learning_rate": 0.0001983149730358881, "loss": 1.2919, "step": 2756 }, { "epoch": 0.07079202915645597, "grad_norm": 0.8828125, "learning_rate": 0.00019831415687247947, "loss": 1.4165, "step": 2757 }, { "epoch": 0.07081770635237779, "grad_norm": 0.859375, "learning_rate": 0.00019831334051313961, "loss": 1.2657, "step": 2758 }, { "epoch": 0.07084338354829961, "grad_norm": 0.86328125, "learning_rate": 0.00019831252395787017, "loss": 1.1052, "step": 2759 }, { "epoch": 0.07086906074422143, "grad_norm": 0.97265625, "learning_rate": 0.0001983117072066727, "loss": 1.3421, "step": 2760 }, { "epoch": 0.07089473794014324, "grad_norm": 0.9921875, "learning_rate": 0.00019831089025954893, "loss": 1.4382, "step": 2761 }, { "epoch": 0.07092041513606506, "grad_norm": 0.953125, "learning_rate": 0.0001983100731165004, "loss": 1.2161, "step": 2762 }, { "epoch": 0.07094609233198688, "grad_norm": 0.87890625, "learning_rate": 0.00019830925577752877, "loss": 1.4057, "step": 2763 }, { "epoch": 0.0709717695279087, "grad_norm": 0.7890625, "learning_rate": 0.00019830843824263566, "loss": 1.1795, "step": 2764 }, { "epoch": 0.07099744672383053, "grad_norm": 0.90625, "learning_rate": 0.00019830762051182278, "loss": 1.0756, "step": 2765 }, { "epoch": 0.07102312391975234, "grad_norm": 0.953125, "learning_rate": 0.00019830680258509166, "loss": 1.0941, "step": 2766 }, { "epoch": 0.07104880111567416, "grad_norm": 0.96484375, "learning_rate": 0.00019830598446244393, "loss": 1.2399, "step": 2767 }, { "epoch": 0.07107447831159598, "grad_norm": 0.98046875, "learning_rate": 0.00019830516614388128, "loss": 1.1577, "step": 2768 }, { "epoch": 0.0711001555075178, "grad_norm": 0.84765625, "learning_rate": 0.0001983043476294053, "loss": 1.1403, "step": 2769 }, { "epoch": 0.07112583270343963, "grad_norm": 0.890625, "learning_rate": 0.00019830352891901765, "loss": 1.0915, "step": 2770 }, { "epoch": 0.07115150989936143, "grad_norm": 0.8828125, "learning_rate": 0.00019830271001271995, "loss": 1.2511, "step": 2771 }, { "epoch": 0.07117718709528326, "grad_norm": 0.89453125, "learning_rate": 0.0001983018909105138, "loss": 1.1923, "step": 2772 }, { "epoch": 0.07120286429120508, "grad_norm": 0.8984375, "learning_rate": 0.0001983010716124009, "loss": 1.2925, "step": 2773 }, { "epoch": 0.0712285414871269, "grad_norm": 0.84375, "learning_rate": 0.0001983002521183828, "loss": 1.1815, "step": 2774 }, { "epoch": 0.07125421868304872, "grad_norm": 0.83984375, "learning_rate": 0.00019829943242846124, "loss": 1.0761, "step": 2775 }, { "epoch": 0.07127989587897053, "grad_norm": 0.93359375, "learning_rate": 0.00019829861254263772, "loss": 1.2138, "step": 2776 }, { "epoch": 0.07130557307489235, "grad_norm": 0.92578125, "learning_rate": 0.000198297792460914, "loss": 1.1049, "step": 2777 }, { "epoch": 0.07133125027081418, "grad_norm": 0.84765625, "learning_rate": 0.00019829697218329165, "loss": 1.1479, "step": 2778 }, { "epoch": 0.071356927466736, "grad_norm": 1.09375, "learning_rate": 0.00019829615170977228, "loss": 1.3467, "step": 2779 }, { "epoch": 0.07138260466265782, "grad_norm": 1.09375, "learning_rate": 0.00019829533104035759, "loss": 1.2629, "step": 2780 }, { "epoch": 0.07140828185857963, "grad_norm": 0.9140625, "learning_rate": 0.00019829451017504916, "loss": 1.3317, "step": 2781 }, { "epoch": 0.07143395905450145, "grad_norm": 0.85546875, "learning_rate": 0.00019829368911384866, "loss": 1.2623, "step": 2782 }, { "epoch": 0.07145963625042327, "grad_norm": 1.21875, "learning_rate": 0.0001982928678567577, "loss": 1.271, "step": 2783 }, { "epoch": 0.0714853134463451, "grad_norm": 0.89453125, "learning_rate": 0.000198292046403778, "loss": 1.3831, "step": 2784 }, { "epoch": 0.07151099064226692, "grad_norm": 0.84765625, "learning_rate": 0.00019829122475491104, "loss": 1.2126, "step": 2785 }, { "epoch": 0.07153666783818872, "grad_norm": 0.96484375, "learning_rate": 0.0001982904029101586, "loss": 1.3015, "step": 2786 }, { "epoch": 0.07156234503411055, "grad_norm": 0.81640625, "learning_rate": 0.00019828958086952225, "loss": 1.1366, "step": 2787 }, { "epoch": 0.07158802223003237, "grad_norm": 0.97265625, "learning_rate": 0.00019828875863300366, "loss": 1.1911, "step": 2788 }, { "epoch": 0.07161369942595419, "grad_norm": 1.578125, "learning_rate": 0.00019828793620060443, "loss": 1.1233, "step": 2789 }, { "epoch": 0.07163937662187601, "grad_norm": 0.921875, "learning_rate": 0.00019828711357232625, "loss": 1.1768, "step": 2790 }, { "epoch": 0.07166505381779782, "grad_norm": 1.109375, "learning_rate": 0.0001982862907481707, "loss": 1.2874, "step": 2791 }, { "epoch": 0.07169073101371964, "grad_norm": 0.8828125, "learning_rate": 0.00019828546772813948, "loss": 1.1802, "step": 2792 }, { "epoch": 0.07171640820964147, "grad_norm": 1.0546875, "learning_rate": 0.00019828464451223418, "loss": 1.079, "step": 2793 }, { "epoch": 0.07174208540556329, "grad_norm": 1.015625, "learning_rate": 0.00019828382110045648, "loss": 1.447, "step": 2794 }, { "epoch": 0.07176776260148511, "grad_norm": 1.09375, "learning_rate": 0.00019828299749280798, "loss": 1.1174, "step": 2795 }, { "epoch": 0.07179343979740692, "grad_norm": 1.0859375, "learning_rate": 0.00019828217368929036, "loss": 1.4195, "step": 2796 }, { "epoch": 0.07181911699332874, "grad_norm": 0.95703125, "learning_rate": 0.00019828134968990523, "loss": 1.2582, "step": 2797 }, { "epoch": 0.07184479418925056, "grad_norm": 0.90625, "learning_rate": 0.00019828052549465425, "loss": 1.311, "step": 2798 }, { "epoch": 0.07187047138517239, "grad_norm": 0.90234375, "learning_rate": 0.0001982797011035391, "loss": 1.2783, "step": 2799 }, { "epoch": 0.07189614858109421, "grad_norm": 0.8671875, "learning_rate": 0.00019827887651656135, "loss": 1.197, "step": 2800 }, { "epoch": 0.07192182577701602, "grad_norm": 0.9921875, "learning_rate": 0.00019827805173372264, "loss": 1.2337, "step": 2801 }, { "epoch": 0.07194750297293784, "grad_norm": 0.953125, "learning_rate": 0.0001982772267550247, "loss": 1.3409, "step": 2802 }, { "epoch": 0.07197318016885966, "grad_norm": 0.9609375, "learning_rate": 0.00019827640158046912, "loss": 1.36, "step": 2803 }, { "epoch": 0.07199885736478148, "grad_norm": 0.96484375, "learning_rate": 0.00019827557621005751, "loss": 1.3119, "step": 2804 }, { "epoch": 0.0720245345607033, "grad_norm": 0.9375, "learning_rate": 0.0001982747506437916, "loss": 1.2796, "step": 2805 }, { "epoch": 0.07205021175662511, "grad_norm": 0.84375, "learning_rate": 0.000198273924881673, "loss": 1.1467, "step": 2806 }, { "epoch": 0.07207588895254693, "grad_norm": 0.8984375, "learning_rate": 0.0001982730989237033, "loss": 1.2538, "step": 2807 }, { "epoch": 0.07210156614846876, "grad_norm": 0.921875, "learning_rate": 0.0001982722727698842, "loss": 1.1173, "step": 2808 }, { "epoch": 0.07212724334439058, "grad_norm": 0.91015625, "learning_rate": 0.0001982714464202174, "loss": 1.3651, "step": 2809 }, { "epoch": 0.0721529205403124, "grad_norm": 0.86328125, "learning_rate": 0.0001982706198747044, "loss": 1.237, "step": 2810 }, { "epoch": 0.07217859773623421, "grad_norm": 0.90625, "learning_rate": 0.00019826979313334694, "loss": 1.1621, "step": 2811 }, { "epoch": 0.07220427493215603, "grad_norm": 0.95703125, "learning_rate": 0.0001982689661961467, "loss": 1.3059, "step": 2812 }, { "epoch": 0.07222995212807785, "grad_norm": 0.90625, "learning_rate": 0.00019826813906310528, "loss": 1.2354, "step": 2813 }, { "epoch": 0.07225562932399968, "grad_norm": 0.94921875, "learning_rate": 0.0001982673117342243, "loss": 1.3089, "step": 2814 }, { "epoch": 0.0722813065199215, "grad_norm": 0.91796875, "learning_rate": 0.00019826648420950547, "loss": 1.2351, "step": 2815 }, { "epoch": 0.0723069837158433, "grad_norm": 0.90625, "learning_rate": 0.0001982656564889504, "loss": 1.1956, "step": 2816 }, { "epoch": 0.07233266091176513, "grad_norm": 0.94921875, "learning_rate": 0.0001982648285725608, "loss": 1.2215, "step": 2817 }, { "epoch": 0.07235833810768695, "grad_norm": 0.94921875, "learning_rate": 0.00019826400046033823, "loss": 1.3396, "step": 2818 }, { "epoch": 0.07238401530360877, "grad_norm": 0.88671875, "learning_rate": 0.00019826317215228438, "loss": 1.1436, "step": 2819 }, { "epoch": 0.0724096924995306, "grad_norm": 0.90234375, "learning_rate": 0.00019826234364840093, "loss": 1.22, "step": 2820 }, { "epoch": 0.0724353696954524, "grad_norm": 0.91015625, "learning_rate": 0.0001982615149486895, "loss": 1.2626, "step": 2821 }, { "epoch": 0.07246104689137423, "grad_norm": 0.94921875, "learning_rate": 0.00019826068605315172, "loss": 1.1073, "step": 2822 }, { "epoch": 0.07248672408729605, "grad_norm": 0.88671875, "learning_rate": 0.0001982598569617893, "loss": 1.0957, "step": 2823 }, { "epoch": 0.07251240128321787, "grad_norm": 0.96484375, "learning_rate": 0.00019825902767460382, "loss": 1.1902, "step": 2824 }, { "epoch": 0.07253807847913969, "grad_norm": 0.84765625, "learning_rate": 0.000198258198191597, "loss": 1.2485, "step": 2825 }, { "epoch": 0.0725637556750615, "grad_norm": 0.92578125, "learning_rate": 0.00019825736851277045, "loss": 1.2015, "step": 2826 }, { "epoch": 0.07258943287098332, "grad_norm": 1.109375, "learning_rate": 0.0001982565386381259, "loss": 1.302, "step": 2827 }, { "epoch": 0.07261511006690514, "grad_norm": 0.91796875, "learning_rate": 0.00019825570856766487, "loss": 1.3781, "step": 2828 }, { "epoch": 0.07264078726282697, "grad_norm": 0.98046875, "learning_rate": 0.0001982548783013891, "loss": 1.3897, "step": 2829 }, { "epoch": 0.07266646445874879, "grad_norm": 1.21875, "learning_rate": 0.00019825404783930026, "loss": 1.1313, "step": 2830 }, { "epoch": 0.0726921416546706, "grad_norm": 0.875, "learning_rate": 0.00019825321718139997, "loss": 1.2727, "step": 2831 }, { "epoch": 0.07271781885059242, "grad_norm": 0.91796875, "learning_rate": 0.0001982523863276899, "loss": 1.2827, "step": 2832 }, { "epoch": 0.07274349604651424, "grad_norm": 0.88671875, "learning_rate": 0.00019825155527817168, "loss": 1.2233, "step": 2833 }, { "epoch": 0.07276917324243606, "grad_norm": 0.94921875, "learning_rate": 0.00019825072403284702, "loss": 1.313, "step": 2834 }, { "epoch": 0.07279485043835789, "grad_norm": 0.8515625, "learning_rate": 0.0001982498925917175, "loss": 1.2505, "step": 2835 }, { "epoch": 0.0728205276342797, "grad_norm": 0.91015625, "learning_rate": 0.00019824906095478484, "loss": 1.2942, "step": 2836 }, { "epoch": 0.07284620483020152, "grad_norm": 0.90625, "learning_rate": 0.00019824822912205067, "loss": 1.195, "step": 2837 }, { "epoch": 0.07287188202612334, "grad_norm": 0.87109375, "learning_rate": 0.00019824739709351665, "loss": 1.3502, "step": 2838 }, { "epoch": 0.07289755922204516, "grad_norm": 0.90625, "learning_rate": 0.00019824656486918448, "loss": 1.2646, "step": 2839 }, { "epoch": 0.07292323641796697, "grad_norm": 0.859375, "learning_rate": 0.00019824573244905572, "loss": 1.3529, "step": 2840 }, { "epoch": 0.07294891361388879, "grad_norm": 0.8671875, "learning_rate": 0.00019824489983313211, "loss": 1.2687, "step": 2841 }, { "epoch": 0.07297459080981061, "grad_norm": 0.77734375, "learning_rate": 0.0001982440670214153, "loss": 1.1307, "step": 2842 }, { "epoch": 0.07300026800573244, "grad_norm": 0.875, "learning_rate": 0.00019824323401390696, "loss": 1.1441, "step": 2843 }, { "epoch": 0.07302594520165426, "grad_norm": 0.85546875, "learning_rate": 0.0001982424008106087, "loss": 1.2633, "step": 2844 }, { "epoch": 0.07305162239757607, "grad_norm": 0.8671875, "learning_rate": 0.00019824156741152222, "loss": 1.1579, "step": 2845 }, { "epoch": 0.07307729959349789, "grad_norm": 0.86328125, "learning_rate": 0.00019824073381664915, "loss": 1.0947, "step": 2846 }, { "epoch": 0.07310297678941971, "grad_norm": 0.875, "learning_rate": 0.0001982399000259912, "loss": 1.3037, "step": 2847 }, { "epoch": 0.07312865398534153, "grad_norm": 0.8515625, "learning_rate": 0.00019823906603954998, "loss": 1.2909, "step": 2848 }, { "epoch": 0.07315433118126335, "grad_norm": 0.82421875, "learning_rate": 0.0001982382318573272, "loss": 1.3853, "step": 2849 }, { "epoch": 0.07318000837718516, "grad_norm": 0.91015625, "learning_rate": 0.00019823739747932447, "loss": 1.2858, "step": 2850 }, { "epoch": 0.07320568557310698, "grad_norm": 0.98828125, "learning_rate": 0.0001982365629055435, "loss": 1.3219, "step": 2851 }, { "epoch": 0.07323136276902881, "grad_norm": 0.87890625, "learning_rate": 0.00019823572813598594, "loss": 1.1728, "step": 2852 }, { "epoch": 0.07325703996495063, "grad_norm": 0.88671875, "learning_rate": 0.00019823489317065342, "loss": 1.2258, "step": 2853 }, { "epoch": 0.07328271716087245, "grad_norm": 0.8515625, "learning_rate": 0.00019823405800954767, "loss": 1.1069, "step": 2854 }, { "epoch": 0.07330839435679426, "grad_norm": 0.88671875, "learning_rate": 0.00019823322265267028, "loss": 1.1905, "step": 2855 }, { "epoch": 0.07333407155271608, "grad_norm": 0.81640625, "learning_rate": 0.00019823238710002297, "loss": 1.2566, "step": 2856 }, { "epoch": 0.0733597487486379, "grad_norm": 0.87109375, "learning_rate": 0.00019823155135160737, "loss": 1.0591, "step": 2857 }, { "epoch": 0.07338542594455973, "grad_norm": 0.859375, "learning_rate": 0.00019823071540742517, "loss": 1.1, "step": 2858 }, { "epoch": 0.07341110314048155, "grad_norm": 0.9140625, "learning_rate": 0.00019822987926747802, "loss": 1.1854, "step": 2859 }, { "epoch": 0.07343678033640336, "grad_norm": 0.92578125, "learning_rate": 0.0001982290429317676, "loss": 1.0551, "step": 2860 }, { "epoch": 0.07346245753232518, "grad_norm": 0.80859375, "learning_rate": 0.0001982282064002956, "loss": 1.0582, "step": 2861 }, { "epoch": 0.073488134728247, "grad_norm": 0.875, "learning_rate": 0.0001982273696730636, "loss": 1.1562, "step": 2862 }, { "epoch": 0.07351381192416882, "grad_norm": 0.859375, "learning_rate": 0.00019822653275007338, "loss": 1.3228, "step": 2863 }, { "epoch": 0.07353948912009065, "grad_norm": 0.8359375, "learning_rate": 0.0001982256956313265, "loss": 1.2372, "step": 2864 }, { "epoch": 0.07356516631601245, "grad_norm": 0.87890625, "learning_rate": 0.00019822485831682472, "loss": 1.1474, "step": 2865 }, { "epoch": 0.07359084351193428, "grad_norm": 0.8515625, "learning_rate": 0.00019822402080656967, "loss": 1.3195, "step": 2866 }, { "epoch": 0.0736165207078561, "grad_norm": 0.8828125, "learning_rate": 0.00019822318310056303, "loss": 1.1602, "step": 2867 }, { "epoch": 0.07364219790377792, "grad_norm": 0.83984375, "learning_rate": 0.00019822234519880643, "loss": 1.1606, "step": 2868 }, { "epoch": 0.07366787509969974, "grad_norm": 0.921875, "learning_rate": 0.00019822150710130157, "loss": 1.3146, "step": 2869 }, { "epoch": 0.07369355229562155, "grad_norm": 0.88671875, "learning_rate": 0.00019822066880805013, "loss": 1.2999, "step": 2870 }, { "epoch": 0.07371922949154337, "grad_norm": 0.90625, "learning_rate": 0.00019821983031905377, "loss": 1.1915, "step": 2871 }, { "epoch": 0.0737449066874652, "grad_norm": 0.87890625, "learning_rate": 0.00019821899163431414, "loss": 1.2481, "step": 2872 }, { "epoch": 0.07377058388338702, "grad_norm": 0.88671875, "learning_rate": 0.00019821815275383294, "loss": 1.3105, "step": 2873 }, { "epoch": 0.07379626107930884, "grad_norm": 0.859375, "learning_rate": 0.00019821731367761184, "loss": 1.0851, "step": 2874 }, { "epoch": 0.07382193827523065, "grad_norm": 0.84375, "learning_rate": 0.00019821647440565249, "loss": 1.2549, "step": 2875 }, { "epoch": 0.07384761547115247, "grad_norm": 0.88671875, "learning_rate": 0.0001982156349379566, "loss": 1.301, "step": 2876 }, { "epoch": 0.07387329266707429, "grad_norm": 0.84765625, "learning_rate": 0.0001982147952745258, "loss": 1.3145, "step": 2877 }, { "epoch": 0.07389896986299611, "grad_norm": 0.890625, "learning_rate": 0.00019821395541536182, "loss": 1.2252, "step": 2878 }, { "epoch": 0.07392464705891794, "grad_norm": 0.9140625, "learning_rate": 0.00019821311536046628, "loss": 1.273, "step": 2879 }, { "epoch": 0.07395032425483974, "grad_norm": 0.85546875, "learning_rate": 0.00019821227510984088, "loss": 1.2575, "step": 2880 }, { "epoch": 0.07397600145076157, "grad_norm": 0.875, "learning_rate": 0.00019821143466348725, "loss": 1.1918, "step": 2881 }, { "epoch": 0.07400167864668339, "grad_norm": 0.8984375, "learning_rate": 0.00019821059402140716, "loss": 1.2132, "step": 2882 }, { "epoch": 0.07402735584260521, "grad_norm": 0.9453125, "learning_rate": 0.0001982097531836022, "loss": 1.3432, "step": 2883 }, { "epoch": 0.07405303303852703, "grad_norm": 0.91015625, "learning_rate": 0.00019820891215007407, "loss": 1.3749, "step": 2884 }, { "epoch": 0.07407871023444884, "grad_norm": 0.86328125, "learning_rate": 0.00019820807092082445, "loss": 1.1522, "step": 2885 }, { "epoch": 0.07410438743037066, "grad_norm": 0.98828125, "learning_rate": 0.000198207229495855, "loss": 1.3482, "step": 2886 }, { "epoch": 0.07413006462629249, "grad_norm": 0.84765625, "learning_rate": 0.00019820638787516743, "loss": 1.2224, "step": 2887 }, { "epoch": 0.07415574182221431, "grad_norm": 0.8984375, "learning_rate": 0.0001982055460587634, "loss": 1.168, "step": 2888 }, { "epoch": 0.07418141901813613, "grad_norm": 0.90625, "learning_rate": 0.0001982047040466446, "loss": 1.2771, "step": 2889 }, { "epoch": 0.07420709621405794, "grad_norm": 0.875, "learning_rate": 0.00019820386183881268, "loss": 1.243, "step": 2890 }, { "epoch": 0.07423277340997976, "grad_norm": 0.9140625, "learning_rate": 0.0001982030194352693, "loss": 1.3289, "step": 2891 }, { "epoch": 0.07425845060590158, "grad_norm": 0.890625, "learning_rate": 0.00019820217683601623, "loss": 1.0944, "step": 2892 }, { "epoch": 0.0742841278018234, "grad_norm": 0.8515625, "learning_rate": 0.00019820133404105508, "loss": 1.1939, "step": 2893 }, { "epoch": 0.07430980499774523, "grad_norm": 0.9765625, "learning_rate": 0.00019820049105038755, "loss": 1.2049, "step": 2894 }, { "epoch": 0.07433548219366704, "grad_norm": 0.95703125, "learning_rate": 0.00019819964786401526, "loss": 1.1444, "step": 2895 }, { "epoch": 0.07436115938958886, "grad_norm": 0.80859375, "learning_rate": 0.00019819880448193998, "loss": 1.0059, "step": 2896 }, { "epoch": 0.07438683658551068, "grad_norm": 0.84375, "learning_rate": 0.00019819796090416337, "loss": 1.2008, "step": 2897 }, { "epoch": 0.0744125137814325, "grad_norm": 0.83203125, "learning_rate": 0.00019819711713068707, "loss": 1.0023, "step": 2898 }, { "epoch": 0.07443819097735432, "grad_norm": 0.91015625, "learning_rate": 0.0001981962731615128, "loss": 1.1831, "step": 2899 }, { "epoch": 0.07446386817327613, "grad_norm": 0.875, "learning_rate": 0.00019819542899664221, "loss": 1.1112, "step": 2900 }, { "epoch": 0.07448954536919795, "grad_norm": 0.8203125, "learning_rate": 0.000198194584636077, "loss": 1.2917, "step": 2901 }, { "epoch": 0.07451522256511978, "grad_norm": 0.8671875, "learning_rate": 0.00019819374007981884, "loss": 1.2404, "step": 2902 }, { "epoch": 0.0745408997610416, "grad_norm": 0.8203125, "learning_rate": 0.00019819289532786946, "loss": 1.1728, "step": 2903 }, { "epoch": 0.07456657695696342, "grad_norm": 0.91015625, "learning_rate": 0.0001981920503802305, "loss": 1.3043, "step": 2904 }, { "epoch": 0.07459225415288523, "grad_norm": 0.84765625, "learning_rate": 0.00019819120523690363, "loss": 1.2243, "step": 2905 }, { "epoch": 0.07461793134880705, "grad_norm": 1.0078125, "learning_rate": 0.00019819035989789057, "loss": 1.2427, "step": 2906 }, { "epoch": 0.07464360854472887, "grad_norm": 0.87109375, "learning_rate": 0.00019818951436319303, "loss": 1.0878, "step": 2907 }, { "epoch": 0.0746692857406507, "grad_norm": 0.8359375, "learning_rate": 0.0001981886686328126, "loss": 1.1846, "step": 2908 }, { "epoch": 0.07469496293657252, "grad_norm": 0.84375, "learning_rate": 0.00019818782270675104, "loss": 1.2901, "step": 2909 }, { "epoch": 0.07472064013249433, "grad_norm": 0.8984375, "learning_rate": 0.00019818697658501003, "loss": 1.2502, "step": 2910 }, { "epoch": 0.07474631732841615, "grad_norm": 0.90234375, "learning_rate": 0.00019818613026759122, "loss": 1.2001, "step": 2911 }, { "epoch": 0.07477199452433797, "grad_norm": 0.921875, "learning_rate": 0.00019818528375449635, "loss": 1.2826, "step": 2912 }, { "epoch": 0.07479767172025979, "grad_norm": 0.8671875, "learning_rate": 0.0001981844370457271, "loss": 1.3017, "step": 2913 }, { "epoch": 0.07482334891618161, "grad_norm": 0.828125, "learning_rate": 0.0001981835901412851, "loss": 1.1525, "step": 2914 }, { "epoch": 0.07484902611210342, "grad_norm": 0.9453125, "learning_rate": 0.00019818274304117206, "loss": 1.3293, "step": 2915 }, { "epoch": 0.07487470330802524, "grad_norm": 0.9140625, "learning_rate": 0.00019818189574538972, "loss": 1.2715, "step": 2916 }, { "epoch": 0.07490038050394707, "grad_norm": 1.03125, "learning_rate": 0.0001981810482539397, "loss": 1.1311, "step": 2917 }, { "epoch": 0.07492605769986889, "grad_norm": 0.9609375, "learning_rate": 0.00019818020056682373, "loss": 1.1759, "step": 2918 }, { "epoch": 0.07495173489579071, "grad_norm": 0.93359375, "learning_rate": 0.0001981793526840435, "loss": 1.1866, "step": 2919 }, { "epoch": 0.07497741209171252, "grad_norm": 0.97265625, "learning_rate": 0.00019817850460560068, "loss": 1.0909, "step": 2920 }, { "epoch": 0.07500308928763434, "grad_norm": 0.91796875, "learning_rate": 0.00019817765633149697, "loss": 1.1822, "step": 2921 }, { "epoch": 0.07502876648355616, "grad_norm": 1.0, "learning_rate": 0.00019817680786173405, "loss": 1.1825, "step": 2922 }, { "epoch": 0.07505444367947799, "grad_norm": 0.89453125, "learning_rate": 0.0001981759591963136, "loss": 1.2731, "step": 2923 }, { "epoch": 0.07508012087539981, "grad_norm": 0.92578125, "learning_rate": 0.0001981751103352374, "loss": 1.2962, "step": 2924 }, { "epoch": 0.07510579807132162, "grad_norm": 0.890625, "learning_rate": 0.000198174261278507, "loss": 1.0693, "step": 2925 }, { "epoch": 0.07513147526724344, "grad_norm": 0.8203125, "learning_rate": 0.00019817341202612424, "loss": 1.1638, "step": 2926 }, { "epoch": 0.07515715246316526, "grad_norm": 0.8984375, "learning_rate": 0.00019817256257809068, "loss": 1.1487, "step": 2927 }, { "epoch": 0.07518282965908708, "grad_norm": 0.828125, "learning_rate": 0.0001981717129344081, "loss": 1.1848, "step": 2928 }, { "epoch": 0.0752085068550089, "grad_norm": 0.921875, "learning_rate": 0.00019817086309507814, "loss": 1.3097, "step": 2929 }, { "epoch": 0.07523418405093071, "grad_norm": 0.91796875, "learning_rate": 0.00019817001306010251, "loss": 1.2523, "step": 2930 }, { "epoch": 0.07525986124685254, "grad_norm": 0.90234375, "learning_rate": 0.00019816916282948294, "loss": 1.3066, "step": 2931 }, { "epoch": 0.07528553844277436, "grad_norm": 0.86328125, "learning_rate": 0.00019816831240322108, "loss": 1.2895, "step": 2932 }, { "epoch": 0.07531121563869618, "grad_norm": 1.921875, "learning_rate": 0.00019816746178131863, "loss": 1.3618, "step": 2933 }, { "epoch": 0.075336892834618, "grad_norm": 0.86328125, "learning_rate": 0.00019816661096377735, "loss": 1.2693, "step": 2934 }, { "epoch": 0.07536257003053981, "grad_norm": 0.9609375, "learning_rate": 0.00019816575995059883, "loss": 1.1944, "step": 2935 }, { "epoch": 0.07538824722646163, "grad_norm": 0.94140625, "learning_rate": 0.00019816490874178483, "loss": 1.194, "step": 2936 }, { "epoch": 0.07541392442238345, "grad_norm": 0.83203125, "learning_rate": 0.00019816405733733702, "loss": 1.1929, "step": 2937 }, { "epoch": 0.07543960161830528, "grad_norm": 0.94921875, "learning_rate": 0.00019816320573725715, "loss": 1.1457, "step": 2938 }, { "epoch": 0.0754652788142271, "grad_norm": 0.88671875, "learning_rate": 0.00019816235394154685, "loss": 1.1833, "step": 2939 }, { "epoch": 0.07549095601014891, "grad_norm": 0.85546875, "learning_rate": 0.00019816150195020783, "loss": 1.236, "step": 2940 }, { "epoch": 0.07551663320607073, "grad_norm": 0.8515625, "learning_rate": 0.00019816064976324184, "loss": 1.2829, "step": 2941 }, { "epoch": 0.07554231040199255, "grad_norm": 0.91796875, "learning_rate": 0.00019815979738065055, "loss": 1.1427, "step": 2942 }, { "epoch": 0.07556798759791437, "grad_norm": 0.875, "learning_rate": 0.0001981589448024356, "loss": 1.0539, "step": 2943 }, { "epoch": 0.07559366479383618, "grad_norm": 0.859375, "learning_rate": 0.00019815809202859875, "loss": 1.095, "step": 2944 }, { "epoch": 0.075619341989758, "grad_norm": 0.83984375, "learning_rate": 0.00019815723905914175, "loss": 1.3226, "step": 2945 }, { "epoch": 0.07564501918567983, "grad_norm": 0.96875, "learning_rate": 0.00019815638589406616, "loss": 1.1504, "step": 2946 }, { "epoch": 0.07567069638160165, "grad_norm": 0.83203125, "learning_rate": 0.0001981555325333738, "loss": 1.1752, "step": 2947 }, { "epoch": 0.07569637357752347, "grad_norm": 0.9375, "learning_rate": 0.00019815467897706636, "loss": 1.1789, "step": 2948 }, { "epoch": 0.07572205077344528, "grad_norm": 0.90625, "learning_rate": 0.00019815382522514545, "loss": 1.3232, "step": 2949 }, { "epoch": 0.0757477279693671, "grad_norm": 0.8828125, "learning_rate": 0.00019815297127761286, "loss": 1.2176, "step": 2950 }, { "epoch": 0.07577340516528892, "grad_norm": 0.96875, "learning_rate": 0.00019815211713447025, "loss": 1.1768, "step": 2951 }, { "epoch": 0.07579908236121075, "grad_norm": 0.8515625, "learning_rate": 0.00019815126279571933, "loss": 1.1881, "step": 2952 }, { "epoch": 0.07582475955713257, "grad_norm": 0.8828125, "learning_rate": 0.00019815040826136183, "loss": 1.2897, "step": 2953 }, { "epoch": 0.07585043675305438, "grad_norm": 0.96875, "learning_rate": 0.00019814955353139943, "loss": 1.2902, "step": 2954 }, { "epoch": 0.0758761139489762, "grad_norm": 0.95703125, "learning_rate": 0.00019814869860583384, "loss": 1.2628, "step": 2955 }, { "epoch": 0.07590179114489802, "grad_norm": 0.859375, "learning_rate": 0.00019814784348466677, "loss": 1.2888, "step": 2956 }, { "epoch": 0.07592746834081984, "grad_norm": 0.9375, "learning_rate": 0.0001981469881678999, "loss": 1.2296, "step": 2957 }, { "epoch": 0.07595314553674166, "grad_norm": 0.91796875, "learning_rate": 0.00019814613265553495, "loss": 1.3699, "step": 2958 }, { "epoch": 0.07597882273266347, "grad_norm": 0.93359375, "learning_rate": 0.00019814527694757362, "loss": 1.2137, "step": 2959 }, { "epoch": 0.0760044999285853, "grad_norm": 0.921875, "learning_rate": 0.00019814442104401764, "loss": 1.2538, "step": 2960 }, { "epoch": 0.07603017712450712, "grad_norm": 0.9453125, "learning_rate": 0.00019814356494486863, "loss": 1.1658, "step": 2961 }, { "epoch": 0.07605585432042894, "grad_norm": 0.89453125, "learning_rate": 0.0001981427086501284, "loss": 1.1207, "step": 2962 }, { "epoch": 0.07608153151635076, "grad_norm": 0.85546875, "learning_rate": 0.00019814185215979863, "loss": 1.1761, "step": 2963 }, { "epoch": 0.07610720871227257, "grad_norm": 1.0390625, "learning_rate": 0.000198140995473881, "loss": 1.1451, "step": 2964 }, { "epoch": 0.07613288590819439, "grad_norm": 0.9609375, "learning_rate": 0.00019814013859237726, "loss": 1.1935, "step": 2965 }, { "epoch": 0.07615856310411621, "grad_norm": 0.87109375, "learning_rate": 0.00019813928151528903, "loss": 1.1859, "step": 2966 }, { "epoch": 0.07618424030003804, "grad_norm": 0.8359375, "learning_rate": 0.00019813842424261813, "loss": 1.1416, "step": 2967 }, { "epoch": 0.07620991749595986, "grad_norm": 0.99609375, "learning_rate": 0.00019813756677436619, "loss": 1.1775, "step": 2968 }, { "epoch": 0.07623559469188167, "grad_norm": 0.8984375, "learning_rate": 0.00019813670911053495, "loss": 1.1036, "step": 2969 }, { "epoch": 0.07626127188780349, "grad_norm": 0.83984375, "learning_rate": 0.00019813585125112612, "loss": 1.2163, "step": 2970 }, { "epoch": 0.07628694908372531, "grad_norm": 0.859375, "learning_rate": 0.00019813499319614136, "loss": 1.2171, "step": 2971 }, { "epoch": 0.07631262627964713, "grad_norm": 0.86328125, "learning_rate": 0.0001981341349455825, "loss": 1.1192, "step": 2972 }, { "epoch": 0.07633830347556896, "grad_norm": 1.015625, "learning_rate": 0.0001981332764994511, "loss": 1.4285, "step": 2973 }, { "epoch": 0.07636398067149076, "grad_norm": 0.8984375, "learning_rate": 0.000198132417857749, "loss": 1.2152, "step": 2974 }, { "epoch": 0.07638965786741259, "grad_norm": 0.9375, "learning_rate": 0.00019813155902047785, "loss": 1.1429, "step": 2975 }, { "epoch": 0.07641533506333441, "grad_norm": 0.82421875, "learning_rate": 0.00019813069998763933, "loss": 1.0398, "step": 2976 }, { "epoch": 0.07644101225925623, "grad_norm": 2.078125, "learning_rate": 0.00019812984075923522, "loss": 1.4037, "step": 2977 }, { "epoch": 0.07646668945517805, "grad_norm": 0.91796875, "learning_rate": 0.0001981289813352672, "loss": 1.215, "step": 2978 }, { "epoch": 0.07649236665109986, "grad_norm": 1.0703125, "learning_rate": 0.00019812812171573696, "loss": 1.2431, "step": 2979 }, { "epoch": 0.07651804384702168, "grad_norm": 1.1015625, "learning_rate": 0.00019812726190064623, "loss": 1.1523, "step": 2980 }, { "epoch": 0.0765437210429435, "grad_norm": 0.83203125, "learning_rate": 0.00019812640188999675, "loss": 1.1995, "step": 2981 }, { "epoch": 0.07656939823886533, "grad_norm": 0.9140625, "learning_rate": 0.00019812554168379022, "loss": 1.118, "step": 2982 }, { "epoch": 0.07659507543478715, "grad_norm": 0.90625, "learning_rate": 0.00019812468128202837, "loss": 1.0956, "step": 2983 }, { "epoch": 0.07662075263070896, "grad_norm": 0.875, "learning_rate": 0.00019812382068471284, "loss": 1.1611, "step": 2984 }, { "epoch": 0.07664642982663078, "grad_norm": 0.921875, "learning_rate": 0.00019812295989184544, "loss": 1.3343, "step": 2985 }, { "epoch": 0.0766721070225526, "grad_norm": 0.9375, "learning_rate": 0.00019812209890342783, "loss": 1.3106, "step": 2986 }, { "epoch": 0.07669778421847442, "grad_norm": 0.8515625, "learning_rate": 0.00019812123771946173, "loss": 1.2097, "step": 2987 }, { "epoch": 0.07672346141439625, "grad_norm": 0.87890625, "learning_rate": 0.00019812037633994888, "loss": 1.1538, "step": 2988 }, { "epoch": 0.07674913861031805, "grad_norm": 0.83203125, "learning_rate": 0.000198119514764891, "loss": 1.1401, "step": 2989 }, { "epoch": 0.07677481580623988, "grad_norm": 0.96875, "learning_rate": 0.00019811865299428977, "loss": 1.2764, "step": 2990 }, { "epoch": 0.0768004930021617, "grad_norm": 0.8515625, "learning_rate": 0.00019811779102814692, "loss": 1.1098, "step": 2991 }, { "epoch": 0.07682617019808352, "grad_norm": 0.90234375, "learning_rate": 0.0001981169288664642, "loss": 1.2977, "step": 2992 }, { "epoch": 0.07685184739400534, "grad_norm": 0.91015625, "learning_rate": 0.0001981160665092433, "loss": 1.0798, "step": 2993 }, { "epoch": 0.07687752458992715, "grad_norm": 0.94921875, "learning_rate": 0.00019811520395648593, "loss": 1.3404, "step": 2994 }, { "epoch": 0.07690320178584897, "grad_norm": 0.94140625, "learning_rate": 0.0001981143412081938, "loss": 1.3571, "step": 2995 }, { "epoch": 0.0769288789817708, "grad_norm": 1.015625, "learning_rate": 0.00019811347826436868, "loss": 1.3444, "step": 2996 }, { "epoch": 0.07695455617769262, "grad_norm": 1.359375, "learning_rate": 0.00019811261512501225, "loss": 1.2398, "step": 2997 }, { "epoch": 0.07698023337361444, "grad_norm": 0.80078125, "learning_rate": 0.00019811175179012625, "loss": 1.1922, "step": 2998 }, { "epoch": 0.07700591056953625, "grad_norm": 0.875, "learning_rate": 0.00019811088825971238, "loss": 1.2819, "step": 2999 }, { "epoch": 0.07703158776545807, "grad_norm": 0.875, "learning_rate": 0.00019811002453377235, "loss": 1.1615, "step": 3000 }, { "epoch": 0.07703158776545807, "eval_loss": 1.2214481830596924, "eval_model_preparation_time": 0.0065, "eval_runtime": 401.4544, "eval_samples_per_second": 24.909, "eval_steps_per_second": 0.78, "step": 3000 }, { "epoch": 0.07705726496137989, "grad_norm": 0.84765625, "learning_rate": 0.00019810916061230796, "loss": 1.1882, "step": 3001 }, { "epoch": 0.07708294215730171, "grad_norm": 0.921875, "learning_rate": 0.00019810829649532084, "loss": 1.1589, "step": 3002 }, { "epoch": 0.07710861935322354, "grad_norm": 0.8984375, "learning_rate": 0.00019810743218281275, "loss": 1.2782, "step": 3003 }, { "epoch": 0.07713429654914535, "grad_norm": 0.8515625, "learning_rate": 0.0001981065676747854, "loss": 1.1591, "step": 3004 }, { "epoch": 0.07715997374506717, "grad_norm": 0.828125, "learning_rate": 0.00019810570297124056, "loss": 1.2506, "step": 3005 }, { "epoch": 0.07718565094098899, "grad_norm": 0.96484375, "learning_rate": 0.0001981048380721799, "loss": 1.324, "step": 3006 }, { "epoch": 0.07721132813691081, "grad_norm": 0.9453125, "learning_rate": 0.00019810397297760513, "loss": 1.1495, "step": 3007 }, { "epoch": 0.07723700533283263, "grad_norm": 0.8515625, "learning_rate": 0.00019810310768751804, "loss": 1.1966, "step": 3008 }, { "epoch": 0.07726268252875444, "grad_norm": 0.82421875, "learning_rate": 0.0001981022422019203, "loss": 1.198, "step": 3009 }, { "epoch": 0.07728835972467626, "grad_norm": 0.890625, "learning_rate": 0.00019810137652081364, "loss": 1.3172, "step": 3010 }, { "epoch": 0.07731403692059809, "grad_norm": 0.921875, "learning_rate": 0.00019810051064419982, "loss": 1.0432, "step": 3011 }, { "epoch": 0.07733971411651991, "grad_norm": 0.73828125, "learning_rate": 0.00019809964457208055, "loss": 1.1597, "step": 3012 }, { "epoch": 0.07736539131244173, "grad_norm": 0.87109375, "learning_rate": 0.00019809877830445754, "loss": 0.8613, "step": 3013 }, { "epoch": 0.07739106850836354, "grad_norm": 0.8125, "learning_rate": 0.0001980979118413325, "loss": 1.1473, "step": 3014 }, { "epoch": 0.07741674570428536, "grad_norm": 0.91015625, "learning_rate": 0.0001980970451827072, "loss": 1.2458, "step": 3015 }, { "epoch": 0.07744242290020718, "grad_norm": 0.8828125, "learning_rate": 0.00019809617832858335, "loss": 1.2877, "step": 3016 }, { "epoch": 0.077468100096129, "grad_norm": 0.87890625, "learning_rate": 0.0001980953112789627, "loss": 1.2058, "step": 3017 }, { "epoch": 0.07749377729205083, "grad_norm": 0.85546875, "learning_rate": 0.00019809444403384694, "loss": 1.1571, "step": 3018 }, { "epoch": 0.07751945448797264, "grad_norm": 0.91796875, "learning_rate": 0.00019809357659323783, "loss": 1.1565, "step": 3019 }, { "epoch": 0.07754513168389446, "grad_norm": 0.765625, "learning_rate": 0.00019809270895713703, "loss": 1.1104, "step": 3020 }, { "epoch": 0.07757080887981628, "grad_norm": 0.85546875, "learning_rate": 0.00019809184112554634, "loss": 1.2429, "step": 3021 }, { "epoch": 0.0775964860757381, "grad_norm": 1.0, "learning_rate": 0.00019809097309846752, "loss": 1.1766, "step": 3022 }, { "epoch": 0.07762216327165992, "grad_norm": 0.83203125, "learning_rate": 0.0001980901048759022, "loss": 1.2028, "step": 3023 }, { "epoch": 0.07764784046758173, "grad_norm": 0.90625, "learning_rate": 0.0001980892364578522, "loss": 1.2844, "step": 3024 }, { "epoch": 0.07767351766350356, "grad_norm": 1.0, "learning_rate": 0.00019808836784431918, "loss": 1.1129, "step": 3025 }, { "epoch": 0.07769919485942538, "grad_norm": 0.8984375, "learning_rate": 0.00019808749903530492, "loss": 1.0673, "step": 3026 }, { "epoch": 0.0777248720553472, "grad_norm": 0.91015625, "learning_rate": 0.00019808663003081112, "loss": 1.0591, "step": 3027 }, { "epoch": 0.07775054925126902, "grad_norm": 0.8828125, "learning_rate": 0.00019808576083083957, "loss": 1.2624, "step": 3028 }, { "epoch": 0.07777622644719083, "grad_norm": 0.96484375, "learning_rate": 0.0001980848914353919, "loss": 1.3165, "step": 3029 }, { "epoch": 0.07780190364311265, "grad_norm": 0.9921875, "learning_rate": 0.00019808402184446993, "loss": 1.2443, "step": 3030 }, { "epoch": 0.07782758083903447, "grad_norm": 0.78125, "learning_rate": 0.00019808315205807535, "loss": 1.2104, "step": 3031 }, { "epoch": 0.0778532580349563, "grad_norm": 0.9375, "learning_rate": 0.00019808228207620992, "loss": 1.2484, "step": 3032 }, { "epoch": 0.07787893523087812, "grad_norm": 0.8671875, "learning_rate": 0.00019808141189887538, "loss": 1.1419, "step": 3033 }, { "epoch": 0.07790461242679993, "grad_norm": 0.86328125, "learning_rate": 0.00019808054152607341, "loss": 1.221, "step": 3034 }, { "epoch": 0.07793028962272175, "grad_norm": 0.953125, "learning_rate": 0.0001980796709578058, "loss": 1.4304, "step": 3035 }, { "epoch": 0.07795596681864357, "grad_norm": 0.87109375, "learning_rate": 0.00019807880019407427, "loss": 1.1546, "step": 3036 }, { "epoch": 0.0779816440145654, "grad_norm": 0.9609375, "learning_rate": 0.00019807792923488054, "loss": 1.1852, "step": 3037 }, { "epoch": 0.07800732121048722, "grad_norm": 0.9921875, "learning_rate": 0.00019807705808022635, "loss": 1.1732, "step": 3038 }, { "epoch": 0.07803299840640902, "grad_norm": 0.859375, "learning_rate": 0.00019807618673011346, "loss": 1.1273, "step": 3039 }, { "epoch": 0.07805867560233085, "grad_norm": 1.3125, "learning_rate": 0.0001980753151845436, "loss": 1.3679, "step": 3040 }, { "epoch": 0.07808435279825267, "grad_norm": 0.8359375, "learning_rate": 0.00019807444344351844, "loss": 1.1571, "step": 3041 }, { "epoch": 0.07811002999417449, "grad_norm": 0.90234375, "learning_rate": 0.00019807357150703983, "loss": 1.2467, "step": 3042 }, { "epoch": 0.07813570719009631, "grad_norm": 1.0, "learning_rate": 0.0001980726993751094, "loss": 1.2703, "step": 3043 }, { "epoch": 0.07816138438601812, "grad_norm": 0.9296875, "learning_rate": 0.000198071827047729, "loss": 1.3975, "step": 3044 }, { "epoch": 0.07818706158193994, "grad_norm": 0.8984375, "learning_rate": 0.00019807095452490026, "loss": 1.0815, "step": 3045 }, { "epoch": 0.07821273877786176, "grad_norm": 0.9296875, "learning_rate": 0.00019807008180662498, "loss": 1.1755, "step": 3046 }, { "epoch": 0.07823841597378359, "grad_norm": 0.91015625, "learning_rate": 0.0001980692088929049, "loss": 1.0729, "step": 3047 }, { "epoch": 0.0782640931697054, "grad_norm": 0.89453125, "learning_rate": 0.00019806833578374174, "loss": 1.3352, "step": 3048 }, { "epoch": 0.07828977036562722, "grad_norm": 0.953125, "learning_rate": 0.00019806746247913722, "loss": 1.4354, "step": 3049 }, { "epoch": 0.07831544756154904, "grad_norm": 0.828125, "learning_rate": 0.00019806658897909314, "loss": 1.2832, "step": 3050 }, { "epoch": 0.07834112475747086, "grad_norm": 1.25, "learning_rate": 0.00019806571528361119, "loss": 1.2254, "step": 3051 }, { "epoch": 0.07836680195339268, "grad_norm": 0.86328125, "learning_rate": 0.00019806484139269311, "loss": 1.1258, "step": 3052 }, { "epoch": 0.07839247914931449, "grad_norm": 0.9609375, "learning_rate": 0.0001980639673063407, "loss": 1.3175, "step": 3053 }, { "epoch": 0.07841815634523631, "grad_norm": 0.8359375, "learning_rate": 0.00019806309302455565, "loss": 1.1119, "step": 3054 }, { "epoch": 0.07844383354115814, "grad_norm": 0.87109375, "learning_rate": 0.0001980622185473397, "loss": 1.2497, "step": 3055 }, { "epoch": 0.07846951073707996, "grad_norm": 0.98046875, "learning_rate": 0.0001980613438746946, "loss": 1.2851, "step": 3056 }, { "epoch": 0.07849518793300178, "grad_norm": 0.95703125, "learning_rate": 0.00019806046900662212, "loss": 1.3508, "step": 3057 }, { "epoch": 0.07852086512892359, "grad_norm": 0.94140625, "learning_rate": 0.00019805959394312395, "loss": 1.182, "step": 3058 }, { "epoch": 0.07854654232484541, "grad_norm": 0.87109375, "learning_rate": 0.0001980587186842019, "loss": 1.1706, "step": 3059 }, { "epoch": 0.07857221952076723, "grad_norm": 0.92578125, "learning_rate": 0.00019805784322985765, "loss": 1.3305, "step": 3060 }, { "epoch": 0.07859789671668906, "grad_norm": 0.87109375, "learning_rate": 0.000198056967580093, "loss": 1.0204, "step": 3061 }, { "epoch": 0.07862357391261088, "grad_norm": 0.875, "learning_rate": 0.00019805609173490968, "loss": 1.096, "step": 3062 }, { "epoch": 0.07864925110853269, "grad_norm": 0.8671875, "learning_rate": 0.0001980552156943094, "loss": 1.0732, "step": 3063 }, { "epoch": 0.07867492830445451, "grad_norm": 0.82421875, "learning_rate": 0.00019805433945829396, "loss": 1.2371, "step": 3064 }, { "epoch": 0.07870060550037633, "grad_norm": 0.85546875, "learning_rate": 0.00019805346302686507, "loss": 1.0855, "step": 3065 }, { "epoch": 0.07872628269629815, "grad_norm": 0.85546875, "learning_rate": 0.00019805258640002445, "loss": 1.1087, "step": 3066 }, { "epoch": 0.07875195989221997, "grad_norm": 0.8671875, "learning_rate": 0.00019805170957777396, "loss": 1.2245, "step": 3067 }, { "epoch": 0.07877763708814178, "grad_norm": 0.8984375, "learning_rate": 0.00019805083256011519, "loss": 1.1586, "step": 3068 }, { "epoch": 0.0788033142840636, "grad_norm": 0.98828125, "learning_rate": 0.00019804995534705003, "loss": 1.2733, "step": 3069 }, { "epoch": 0.07882899147998543, "grad_norm": 0.85546875, "learning_rate": 0.0001980490779385801, "loss": 1.1908, "step": 3070 }, { "epoch": 0.07885466867590725, "grad_norm": 0.80859375, "learning_rate": 0.00019804820033470725, "loss": 1.2225, "step": 3071 }, { "epoch": 0.07888034587182907, "grad_norm": 0.88671875, "learning_rate": 0.0001980473225354332, "loss": 1.2142, "step": 3072 }, { "epoch": 0.07890602306775088, "grad_norm": 0.88671875, "learning_rate": 0.00019804644454075968, "loss": 1.1509, "step": 3073 }, { "epoch": 0.0789317002636727, "grad_norm": 0.90234375, "learning_rate": 0.00019804556635068848, "loss": 1.2113, "step": 3074 }, { "epoch": 0.07895737745959452, "grad_norm": 0.88671875, "learning_rate": 0.00019804468796522128, "loss": 1.1386, "step": 3075 }, { "epoch": 0.07898305465551635, "grad_norm": 0.9140625, "learning_rate": 0.00019804380938435992, "loss": 1.2329, "step": 3076 }, { "epoch": 0.07900873185143817, "grad_norm": 0.8828125, "learning_rate": 0.00019804293060810606, "loss": 1.381, "step": 3077 }, { "epoch": 0.07903440904735998, "grad_norm": 0.8359375, "learning_rate": 0.00019804205163646152, "loss": 1.247, "step": 3078 }, { "epoch": 0.0790600862432818, "grad_norm": 0.91015625, "learning_rate": 0.00019804117246942802, "loss": 1.3649, "step": 3079 }, { "epoch": 0.07908576343920362, "grad_norm": 0.8671875, "learning_rate": 0.00019804029310700733, "loss": 1.2954, "step": 3080 }, { "epoch": 0.07911144063512544, "grad_norm": 0.796875, "learning_rate": 0.00019803941354920116, "loss": 1.1696, "step": 3081 }, { "epoch": 0.07913711783104727, "grad_norm": 0.82421875, "learning_rate": 0.00019803853379601134, "loss": 1.1372, "step": 3082 }, { "epoch": 0.07916279502696907, "grad_norm": 0.94921875, "learning_rate": 0.00019803765384743954, "loss": 1.3965, "step": 3083 }, { "epoch": 0.0791884722228909, "grad_norm": 0.8984375, "learning_rate": 0.00019803677370348758, "loss": 1.2579, "step": 3084 }, { "epoch": 0.07921414941881272, "grad_norm": 0.92578125, "learning_rate": 0.00019803589336415714, "loss": 1.2296, "step": 3085 }, { "epoch": 0.07923982661473454, "grad_norm": 0.87890625, "learning_rate": 0.00019803501282945006, "loss": 1.1712, "step": 3086 }, { "epoch": 0.07926550381065636, "grad_norm": 0.875, "learning_rate": 0.00019803413209936807, "loss": 1.3424, "step": 3087 }, { "epoch": 0.07929118100657817, "grad_norm": 0.94140625, "learning_rate": 0.00019803325117391288, "loss": 1.1522, "step": 3088 }, { "epoch": 0.07931685820249999, "grad_norm": 0.84375, "learning_rate": 0.00019803237005308627, "loss": 1.1266, "step": 3089 }, { "epoch": 0.07934253539842182, "grad_norm": 0.8984375, "learning_rate": 0.00019803148873689, "loss": 1.394, "step": 3090 }, { "epoch": 0.07936821259434364, "grad_norm": 0.94140625, "learning_rate": 0.00019803060722532588, "loss": 1.2796, "step": 3091 }, { "epoch": 0.07939388979026546, "grad_norm": 0.88671875, "learning_rate": 0.00019802972551839556, "loss": 1.2322, "step": 3092 }, { "epoch": 0.07941956698618727, "grad_norm": 0.921875, "learning_rate": 0.00019802884361610088, "loss": 1.2058, "step": 3093 }, { "epoch": 0.07944524418210909, "grad_norm": 0.82421875, "learning_rate": 0.00019802796151844357, "loss": 1.214, "step": 3094 }, { "epoch": 0.07947092137803091, "grad_norm": 0.8671875, "learning_rate": 0.00019802707922542535, "loss": 1.1646, "step": 3095 }, { "epoch": 0.07949659857395273, "grad_norm": 0.94140625, "learning_rate": 0.00019802619673704806, "loss": 1.2468, "step": 3096 }, { "epoch": 0.07952227576987456, "grad_norm": 0.84765625, "learning_rate": 0.0001980253140533134, "loss": 1.1527, "step": 3097 }, { "epoch": 0.07954795296579636, "grad_norm": 0.92578125, "learning_rate": 0.00019802443117422313, "loss": 1.2369, "step": 3098 }, { "epoch": 0.07957363016171819, "grad_norm": 0.87890625, "learning_rate": 0.00019802354809977905, "loss": 1.2662, "step": 3099 }, { "epoch": 0.07959930735764001, "grad_norm": 1.03125, "learning_rate": 0.00019802266482998285, "loss": 1.1835, "step": 3100 }, { "epoch": 0.07962498455356183, "grad_norm": 0.90625, "learning_rate": 0.00019802178136483638, "loss": 1.2441, "step": 3101 }, { "epoch": 0.07965066174948365, "grad_norm": 0.859375, "learning_rate": 0.00019802089770434134, "loss": 1.2645, "step": 3102 }, { "epoch": 0.07967633894540546, "grad_norm": 0.83203125, "learning_rate": 0.0001980200138484995, "loss": 1.103, "step": 3103 }, { "epoch": 0.07970201614132728, "grad_norm": 0.86328125, "learning_rate": 0.00019801912979731262, "loss": 1.2253, "step": 3104 }, { "epoch": 0.0797276933372491, "grad_norm": 0.8984375, "learning_rate": 0.00019801824555078248, "loss": 1.1634, "step": 3105 }, { "epoch": 0.07975337053317093, "grad_norm": 0.83984375, "learning_rate": 0.0001980173611089108, "loss": 1.31, "step": 3106 }, { "epoch": 0.07977904772909275, "grad_norm": 0.8515625, "learning_rate": 0.0001980164764716994, "loss": 1.213, "step": 3107 }, { "epoch": 0.07980472492501456, "grad_norm": 0.90625, "learning_rate": 0.00019801559163915003, "loss": 1.207, "step": 3108 }, { "epoch": 0.07983040212093638, "grad_norm": 0.87890625, "learning_rate": 0.00019801470661126442, "loss": 1.2007, "step": 3109 }, { "epoch": 0.0798560793168582, "grad_norm": 0.98046875, "learning_rate": 0.00019801382138804436, "loss": 1.2503, "step": 3110 }, { "epoch": 0.07988175651278002, "grad_norm": 0.90234375, "learning_rate": 0.0001980129359694916, "loss": 1.1615, "step": 3111 }, { "epoch": 0.07990743370870185, "grad_norm": 0.85546875, "learning_rate": 0.00019801205035560794, "loss": 1.1175, "step": 3112 }, { "epoch": 0.07993311090462366, "grad_norm": 0.79296875, "learning_rate": 0.0001980111645463951, "loss": 1.1527, "step": 3113 }, { "epoch": 0.07995878810054548, "grad_norm": 0.8359375, "learning_rate": 0.00019801027854185482, "loss": 1.0649, "step": 3114 }, { "epoch": 0.0799844652964673, "grad_norm": 0.8203125, "learning_rate": 0.00019800939234198897, "loss": 1.3412, "step": 3115 }, { "epoch": 0.08001014249238912, "grad_norm": 0.84375, "learning_rate": 0.00019800850594679922, "loss": 1.2746, "step": 3116 }, { "epoch": 0.08003581968831094, "grad_norm": 0.98046875, "learning_rate": 0.00019800761935628738, "loss": 1.2061, "step": 3117 }, { "epoch": 0.08006149688423275, "grad_norm": 0.83203125, "learning_rate": 0.0001980067325704552, "loss": 1.2752, "step": 3118 }, { "epoch": 0.08008717408015457, "grad_norm": 0.90625, "learning_rate": 0.00019800584558930445, "loss": 1.3231, "step": 3119 }, { "epoch": 0.0801128512760764, "grad_norm": 0.83984375, "learning_rate": 0.00019800495841283694, "loss": 1.1057, "step": 3120 }, { "epoch": 0.08013852847199822, "grad_norm": 0.8671875, "learning_rate": 0.00019800407104105436, "loss": 1.1631, "step": 3121 }, { "epoch": 0.08016420566792004, "grad_norm": 0.94140625, "learning_rate": 0.00019800318347395855, "loss": 1.1801, "step": 3122 }, { "epoch": 0.08018988286384185, "grad_norm": 1.0, "learning_rate": 0.0001980022957115512, "loss": 1.2896, "step": 3123 }, { "epoch": 0.08021556005976367, "grad_norm": 0.90625, "learning_rate": 0.00019800140775383417, "loss": 1.2108, "step": 3124 }, { "epoch": 0.0802412372556855, "grad_norm": 0.83984375, "learning_rate": 0.0001980005196008092, "loss": 1.2214, "step": 3125 }, { "epoch": 0.08026691445160732, "grad_norm": 1.0234375, "learning_rate": 0.00019799963125247802, "loss": 1.1521, "step": 3126 }, { "epoch": 0.08029259164752914, "grad_norm": 0.88671875, "learning_rate": 0.0001979987427088424, "loss": 1.1673, "step": 3127 }, { "epoch": 0.08031826884345095, "grad_norm": 0.9140625, "learning_rate": 0.0001979978539699042, "loss": 1.2036, "step": 3128 }, { "epoch": 0.08034394603937277, "grad_norm": 0.83984375, "learning_rate": 0.0001979969650356651, "loss": 1.1122, "step": 3129 }, { "epoch": 0.08036962323529459, "grad_norm": 0.80859375, "learning_rate": 0.00019799607590612688, "loss": 1.0159, "step": 3130 }, { "epoch": 0.08039530043121641, "grad_norm": 0.83203125, "learning_rate": 0.00019799518658129137, "loss": 1.1261, "step": 3131 }, { "epoch": 0.08042097762713823, "grad_norm": 0.8828125, "learning_rate": 0.00019799429706116026, "loss": 1.2971, "step": 3132 }, { "epoch": 0.08044665482306004, "grad_norm": 0.79296875, "learning_rate": 0.00019799340734573542, "loss": 1.2817, "step": 3133 }, { "epoch": 0.08047233201898187, "grad_norm": 0.8828125, "learning_rate": 0.00019799251743501852, "loss": 1.3342, "step": 3134 }, { "epoch": 0.08049800921490369, "grad_norm": 0.83203125, "learning_rate": 0.00019799162732901142, "loss": 1.2005, "step": 3135 }, { "epoch": 0.08052368641082551, "grad_norm": 0.953125, "learning_rate": 0.00019799073702771584, "loss": 1.2515, "step": 3136 }, { "epoch": 0.08054936360674733, "grad_norm": 0.8828125, "learning_rate": 0.00019798984653113358, "loss": 1.1952, "step": 3137 }, { "epoch": 0.08057504080266914, "grad_norm": 0.9140625, "learning_rate": 0.00019798895583926642, "loss": 1.0856, "step": 3138 }, { "epoch": 0.08060071799859096, "grad_norm": 0.84375, "learning_rate": 0.0001979880649521161, "loss": 1.2187, "step": 3139 }, { "epoch": 0.08062639519451278, "grad_norm": 0.91796875, "learning_rate": 0.00019798717386968444, "loss": 1.1139, "step": 3140 }, { "epoch": 0.0806520723904346, "grad_norm": 0.8359375, "learning_rate": 0.00019798628259197316, "loss": 1.1245, "step": 3141 }, { "epoch": 0.08067774958635643, "grad_norm": 0.83203125, "learning_rate": 0.0001979853911189841, "loss": 1.1896, "step": 3142 }, { "epoch": 0.08070342678227824, "grad_norm": 0.8203125, "learning_rate": 0.000197984499450719, "loss": 1.233, "step": 3143 }, { "epoch": 0.08072910397820006, "grad_norm": 0.8828125, "learning_rate": 0.00019798360758717962, "loss": 1.1989, "step": 3144 }, { "epoch": 0.08075478117412188, "grad_norm": 0.8203125, "learning_rate": 0.0001979827155283678, "loss": 1.1448, "step": 3145 }, { "epoch": 0.0807804583700437, "grad_norm": 0.92578125, "learning_rate": 0.00019798182327428524, "loss": 1.166, "step": 3146 }, { "epoch": 0.08080613556596553, "grad_norm": 0.85546875, "learning_rate": 0.00019798093082493379, "loss": 1.224, "step": 3147 }, { "epoch": 0.08083181276188733, "grad_norm": 0.85546875, "learning_rate": 0.0001979800381803152, "loss": 1.3312, "step": 3148 }, { "epoch": 0.08085748995780916, "grad_norm": 0.8828125, "learning_rate": 0.00019797914534043121, "loss": 1.147, "step": 3149 }, { "epoch": 0.08088316715373098, "grad_norm": 0.875, "learning_rate": 0.00019797825230528365, "loss": 1.2704, "step": 3150 }, { "epoch": 0.0809088443496528, "grad_norm": 0.7734375, "learning_rate": 0.00019797735907487428, "loss": 1.0012, "step": 3151 }, { "epoch": 0.08093452154557461, "grad_norm": 0.9609375, "learning_rate": 0.0001979764656492049, "loss": 1.1913, "step": 3152 }, { "epoch": 0.08096019874149643, "grad_norm": 0.890625, "learning_rate": 0.00019797557202827726, "loss": 1.2702, "step": 3153 }, { "epoch": 0.08098587593741825, "grad_norm": 0.8515625, "learning_rate": 0.00019797467821209313, "loss": 1.0569, "step": 3154 }, { "epoch": 0.08101155313334008, "grad_norm": 0.9921875, "learning_rate": 0.00019797378420065436, "loss": 1.1963, "step": 3155 }, { "epoch": 0.0810372303292619, "grad_norm": 0.84375, "learning_rate": 0.00019797288999396265, "loss": 1.1176, "step": 3156 }, { "epoch": 0.0810629075251837, "grad_norm": 0.85546875, "learning_rate": 0.00019797199559201984, "loss": 1.1668, "step": 3157 }, { "epoch": 0.08108858472110553, "grad_norm": 0.90625, "learning_rate": 0.00019797110099482773, "loss": 1.272, "step": 3158 }, { "epoch": 0.08111426191702735, "grad_norm": 0.91015625, "learning_rate": 0.00019797020620238799, "loss": 1.0964, "step": 3159 }, { "epoch": 0.08113993911294917, "grad_norm": 0.87109375, "learning_rate": 0.00019796931121470252, "loss": 1.1456, "step": 3160 }, { "epoch": 0.081165616308871, "grad_norm": 0.97265625, "learning_rate": 0.00019796841603177308, "loss": 1.3371, "step": 3161 }, { "epoch": 0.0811912935047928, "grad_norm": 0.8203125, "learning_rate": 0.0001979675206536014, "loss": 1.121, "step": 3162 }, { "epoch": 0.08121697070071462, "grad_norm": 0.9453125, "learning_rate": 0.00019796662508018932, "loss": 1.32, "step": 3163 }, { "epoch": 0.08124264789663645, "grad_norm": 0.8515625, "learning_rate": 0.00019796572931153863, "loss": 1.2187, "step": 3164 }, { "epoch": 0.08126832509255827, "grad_norm": 0.85546875, "learning_rate": 0.00019796483334765105, "loss": 1.2247, "step": 3165 }, { "epoch": 0.08129400228848009, "grad_norm": 0.88671875, "learning_rate": 0.00019796393718852842, "loss": 1.0933, "step": 3166 }, { "epoch": 0.0813196794844019, "grad_norm": 0.859375, "learning_rate": 0.00019796304083417253, "loss": 1.4081, "step": 3167 }, { "epoch": 0.08134535668032372, "grad_norm": 0.9453125, "learning_rate": 0.0001979621442845851, "loss": 1.183, "step": 3168 }, { "epoch": 0.08137103387624554, "grad_norm": 0.9296875, "learning_rate": 0.00019796124753976798, "loss": 1.2247, "step": 3169 }, { "epoch": 0.08139671107216737, "grad_norm": 1.15625, "learning_rate": 0.00019796035059972298, "loss": 1.1926, "step": 3170 }, { "epoch": 0.08142238826808919, "grad_norm": 0.90625, "learning_rate": 0.00019795945346445183, "loss": 1.2139, "step": 3171 }, { "epoch": 0.081448065464011, "grad_norm": 0.8125, "learning_rate": 0.00019795855613395633, "loss": 1.2054, "step": 3172 }, { "epoch": 0.08147374265993282, "grad_norm": 0.83984375, "learning_rate": 0.0001979576586082383, "loss": 1.076, "step": 3173 }, { "epoch": 0.08149941985585464, "grad_norm": 0.84765625, "learning_rate": 0.00019795676088729946, "loss": 1.2338, "step": 3174 }, { "epoch": 0.08152509705177646, "grad_norm": 0.9140625, "learning_rate": 0.0001979558629711417, "loss": 1.1816, "step": 3175 }, { "epoch": 0.08155077424769828, "grad_norm": 0.87890625, "learning_rate": 0.00019795496485976673, "loss": 1.2586, "step": 3176 }, { "epoch": 0.0815764514436201, "grad_norm": 0.90625, "learning_rate": 0.00019795406655317632, "loss": 1.1768, "step": 3177 }, { "epoch": 0.08160212863954192, "grad_norm": 0.91015625, "learning_rate": 0.00019795316805137236, "loss": 1.2037, "step": 3178 }, { "epoch": 0.08162780583546374, "grad_norm": 0.91015625, "learning_rate": 0.00019795226935435655, "loss": 1.0858, "step": 3179 }, { "epoch": 0.08165348303138556, "grad_norm": 0.875, "learning_rate": 0.00019795137046213076, "loss": 1.1768, "step": 3180 }, { "epoch": 0.08167916022730738, "grad_norm": 0.83203125, "learning_rate": 0.0001979504713746967, "loss": 1.3245, "step": 3181 }, { "epoch": 0.08170483742322919, "grad_norm": 0.98046875, "learning_rate": 0.0001979495720920562, "loss": 1.2267, "step": 3182 }, { "epoch": 0.08173051461915101, "grad_norm": 0.8359375, "learning_rate": 0.00019794867261421106, "loss": 1.1539, "step": 3183 }, { "epoch": 0.08175619181507283, "grad_norm": 0.88671875, "learning_rate": 0.00019794777294116305, "loss": 1.2724, "step": 3184 }, { "epoch": 0.08178186901099466, "grad_norm": 0.859375, "learning_rate": 0.00019794687307291396, "loss": 1.2629, "step": 3185 }, { "epoch": 0.08180754620691648, "grad_norm": 0.86328125, "learning_rate": 0.00019794597300946566, "loss": 1.1838, "step": 3186 }, { "epoch": 0.08183322340283829, "grad_norm": 0.8671875, "learning_rate": 0.00019794507275081984, "loss": 1.1786, "step": 3187 }, { "epoch": 0.08185890059876011, "grad_norm": 0.859375, "learning_rate": 0.00019794417229697833, "loss": 1.2303, "step": 3188 }, { "epoch": 0.08188457779468193, "grad_norm": 0.87890625, "learning_rate": 0.00019794327164794298, "loss": 1.1794, "step": 3189 }, { "epoch": 0.08191025499060375, "grad_norm": 0.84375, "learning_rate": 0.0001979423708037155, "loss": 1.0404, "step": 3190 }, { "epoch": 0.08193593218652558, "grad_norm": 0.86328125, "learning_rate": 0.0001979414697642977, "loss": 1.1206, "step": 3191 }, { "epoch": 0.08196160938244738, "grad_norm": 0.8125, "learning_rate": 0.00019794056852969142, "loss": 1.1658, "step": 3192 }, { "epoch": 0.0819872865783692, "grad_norm": 0.8125, "learning_rate": 0.00019793966709989844, "loss": 1.2676, "step": 3193 }, { "epoch": 0.08201296377429103, "grad_norm": 0.81640625, "learning_rate": 0.00019793876547492055, "loss": 1.1234, "step": 3194 }, { "epoch": 0.08203864097021285, "grad_norm": 0.8515625, "learning_rate": 0.00019793786365475955, "loss": 1.2049, "step": 3195 }, { "epoch": 0.08206431816613467, "grad_norm": 0.875, "learning_rate": 0.00019793696163941725, "loss": 1.1663, "step": 3196 }, { "epoch": 0.08208999536205648, "grad_norm": 0.86328125, "learning_rate": 0.00019793605942889538, "loss": 1.236, "step": 3197 }, { "epoch": 0.0821156725579783, "grad_norm": 0.890625, "learning_rate": 0.00019793515702319585, "loss": 1.1726, "step": 3198 }, { "epoch": 0.08214134975390013, "grad_norm": 0.94921875, "learning_rate": 0.00019793425442232035, "loss": 1.3263, "step": 3199 }, { "epoch": 0.08216702694982195, "grad_norm": 0.87890625, "learning_rate": 0.00019793335162627073, "loss": 1.3517, "step": 3200 }, { "epoch": 0.08219270414574377, "grad_norm": 0.8203125, "learning_rate": 0.00019793244863504882, "loss": 1.2659, "step": 3201 }, { "epoch": 0.08221838134166558, "grad_norm": 0.8828125, "learning_rate": 0.0001979315454486564, "loss": 1.3489, "step": 3202 }, { "epoch": 0.0822440585375874, "grad_norm": 0.9296875, "learning_rate": 0.0001979306420670952, "loss": 1.158, "step": 3203 }, { "epoch": 0.08226973573350922, "grad_norm": 0.87890625, "learning_rate": 0.00019792973849036712, "loss": 1.1653, "step": 3204 }, { "epoch": 0.08229541292943104, "grad_norm": 0.9921875, "learning_rate": 0.00019792883471847389, "loss": 1.2814, "step": 3205 }, { "epoch": 0.08232109012535287, "grad_norm": 0.859375, "learning_rate": 0.00019792793075141732, "loss": 1.2513, "step": 3206 }, { "epoch": 0.08234676732127467, "grad_norm": 0.86328125, "learning_rate": 0.00019792702658919927, "loss": 1.0929, "step": 3207 }, { "epoch": 0.0823724445171965, "grad_norm": 0.89453125, "learning_rate": 0.0001979261222318215, "loss": 1.2315, "step": 3208 }, { "epoch": 0.08239812171311832, "grad_norm": 0.83984375, "learning_rate": 0.0001979252176792858, "loss": 1.1424, "step": 3209 }, { "epoch": 0.08242379890904014, "grad_norm": 0.87109375, "learning_rate": 0.00019792431293159398, "loss": 1.2429, "step": 3210 }, { "epoch": 0.08244947610496196, "grad_norm": 0.8125, "learning_rate": 0.00019792340798874787, "loss": 1.2138, "step": 3211 }, { "epoch": 0.08247515330088377, "grad_norm": 0.87109375, "learning_rate": 0.00019792250285074925, "loss": 1.162, "step": 3212 }, { "epoch": 0.0825008304968056, "grad_norm": 1.015625, "learning_rate": 0.00019792159751759992, "loss": 1.1434, "step": 3213 }, { "epoch": 0.08252650769272742, "grad_norm": 0.9296875, "learning_rate": 0.0001979206919893017, "loss": 1.2324, "step": 3214 }, { "epoch": 0.08255218488864924, "grad_norm": 0.82421875, "learning_rate": 0.00019791978626585636, "loss": 1.1201, "step": 3215 }, { "epoch": 0.08257786208457106, "grad_norm": 0.91796875, "learning_rate": 0.00019791888034726576, "loss": 1.2766, "step": 3216 }, { "epoch": 0.08260353928049287, "grad_norm": 0.890625, "learning_rate": 0.00019791797423353167, "loss": 1.1285, "step": 3217 }, { "epoch": 0.08262921647641469, "grad_norm": 0.875, "learning_rate": 0.0001979170679246559, "loss": 0.9866, "step": 3218 }, { "epoch": 0.08265489367233651, "grad_norm": 0.95703125, "learning_rate": 0.00019791616142064022, "loss": 1.2461, "step": 3219 }, { "epoch": 0.08268057086825834, "grad_norm": 1.1796875, "learning_rate": 0.00019791525472148653, "loss": 1.1953, "step": 3220 }, { "epoch": 0.08270624806418016, "grad_norm": 0.890625, "learning_rate": 0.00019791434782719659, "loss": 1.3099, "step": 3221 }, { "epoch": 0.08273192526010197, "grad_norm": 0.8671875, "learning_rate": 0.00019791344073777213, "loss": 1.1243, "step": 3222 }, { "epoch": 0.08275760245602379, "grad_norm": 0.85546875, "learning_rate": 0.00019791253345321507, "loss": 1.318, "step": 3223 }, { "epoch": 0.08278327965194561, "grad_norm": 0.83203125, "learning_rate": 0.0001979116259735272, "loss": 1.0865, "step": 3224 }, { "epoch": 0.08280895684786743, "grad_norm": 0.78125, "learning_rate": 0.00019791071829871026, "loss": 1.1332, "step": 3225 }, { "epoch": 0.08283463404378925, "grad_norm": 0.91796875, "learning_rate": 0.00019790981042876613, "loss": 1.2503, "step": 3226 }, { "epoch": 0.08286031123971106, "grad_norm": 1.0390625, "learning_rate": 0.0001979089023636966, "loss": 1.3376, "step": 3227 }, { "epoch": 0.08288598843563288, "grad_norm": 0.9296875, "learning_rate": 0.00019790799410350344, "loss": 1.2412, "step": 3228 }, { "epoch": 0.0829116656315547, "grad_norm": 0.921875, "learning_rate": 0.0001979070856481885, "loss": 1.2392, "step": 3229 }, { "epoch": 0.08293734282747653, "grad_norm": 0.84765625, "learning_rate": 0.00019790617699775358, "loss": 1.0747, "step": 3230 }, { "epoch": 0.08296302002339835, "grad_norm": 0.91796875, "learning_rate": 0.0001979052681522005, "loss": 1.1245, "step": 3231 }, { "epoch": 0.08298869721932016, "grad_norm": 0.984375, "learning_rate": 0.00019790435911153106, "loss": 1.2486, "step": 3232 }, { "epoch": 0.08301437441524198, "grad_norm": 0.8515625, "learning_rate": 0.0001979034498757471, "loss": 1.0596, "step": 3233 }, { "epoch": 0.0830400516111638, "grad_norm": 0.80078125, "learning_rate": 0.00019790254044485037, "loss": 1.1315, "step": 3234 }, { "epoch": 0.08306572880708563, "grad_norm": 0.90625, "learning_rate": 0.00019790163081884275, "loss": 1.2317, "step": 3235 }, { "epoch": 0.08309140600300745, "grad_norm": 0.8359375, "learning_rate": 0.00019790072099772603, "loss": 1.2138, "step": 3236 }, { "epoch": 0.08311708319892926, "grad_norm": 0.94140625, "learning_rate": 0.00019789981098150197, "loss": 1.264, "step": 3237 }, { "epoch": 0.08314276039485108, "grad_norm": 0.890625, "learning_rate": 0.0001978989007701725, "loss": 1.124, "step": 3238 }, { "epoch": 0.0831684375907729, "grad_norm": 0.91796875, "learning_rate": 0.0001978979903637393, "loss": 1.1994, "step": 3239 }, { "epoch": 0.08319411478669472, "grad_norm": 1.4140625, "learning_rate": 0.00019789707976220427, "loss": 1.3064, "step": 3240 }, { "epoch": 0.08321979198261654, "grad_norm": 0.96875, "learning_rate": 0.00019789616896556921, "loss": 1.2342, "step": 3241 }, { "epoch": 0.08324546917853835, "grad_norm": 0.80078125, "learning_rate": 0.0001978952579738359, "loss": 1.0331, "step": 3242 }, { "epoch": 0.08327114637446018, "grad_norm": 1.078125, "learning_rate": 0.00019789434678700623, "loss": 1.4082, "step": 3243 }, { "epoch": 0.083296823570382, "grad_norm": 1.8125, "learning_rate": 0.00019789343540508193, "loss": 1.31, "step": 3244 }, { "epoch": 0.08332250076630382, "grad_norm": 0.83984375, "learning_rate": 0.0001978925238280649, "loss": 1.1408, "step": 3245 }, { "epoch": 0.08334817796222564, "grad_norm": 0.8125, "learning_rate": 0.00019789161205595687, "loss": 1.2393, "step": 3246 }, { "epoch": 0.08337385515814745, "grad_norm": 0.86328125, "learning_rate": 0.00019789070008875971, "loss": 1.181, "step": 3247 }, { "epoch": 0.08339953235406927, "grad_norm": 0.9609375, "learning_rate": 0.00019788978792647523, "loss": 1.2992, "step": 3248 }, { "epoch": 0.0834252095499911, "grad_norm": 0.95703125, "learning_rate": 0.00019788887556910524, "loss": 1.356, "step": 3249 }, { "epoch": 0.08345088674591292, "grad_norm": 1.0625, "learning_rate": 0.00019788796301665158, "loss": 1.1172, "step": 3250 }, { "epoch": 0.08347656394183474, "grad_norm": 2.1875, "learning_rate": 0.00019788705026911604, "loss": 1.448, "step": 3251 }, { "epoch": 0.08350224113775655, "grad_norm": 0.96484375, "learning_rate": 0.00019788613732650044, "loss": 1.1733, "step": 3252 }, { "epoch": 0.08352791833367837, "grad_norm": 0.921875, "learning_rate": 0.00019788522418880665, "loss": 1.1427, "step": 3253 }, { "epoch": 0.08355359552960019, "grad_norm": 0.8671875, "learning_rate": 0.00019788431085603642, "loss": 1.2393, "step": 3254 }, { "epoch": 0.08357927272552201, "grad_norm": 0.9375, "learning_rate": 0.00019788339732819158, "loss": 1.1973, "step": 3255 }, { "epoch": 0.08360494992144382, "grad_norm": 1.5390625, "learning_rate": 0.00019788248360527402, "loss": 1.2135, "step": 3256 }, { "epoch": 0.08363062711736564, "grad_norm": 1.21875, "learning_rate": 0.00019788156968728545, "loss": 1.3322, "step": 3257 }, { "epoch": 0.08365630431328747, "grad_norm": 1.1484375, "learning_rate": 0.00019788065557422782, "loss": 1.3762, "step": 3258 }, { "epoch": 0.08368198150920929, "grad_norm": 0.95703125, "learning_rate": 0.00019787974126610284, "loss": 1.1702, "step": 3259 }, { "epoch": 0.08370765870513111, "grad_norm": 0.83203125, "learning_rate": 0.0001978788267629124, "loss": 1.1563, "step": 3260 }, { "epoch": 0.08373333590105292, "grad_norm": 0.8984375, "learning_rate": 0.00019787791206465828, "loss": 1.2337, "step": 3261 }, { "epoch": 0.08375901309697474, "grad_norm": 0.98046875, "learning_rate": 0.00019787699717134234, "loss": 1.2594, "step": 3262 }, { "epoch": 0.08378469029289656, "grad_norm": 0.7734375, "learning_rate": 0.0001978760820829664, "loss": 1.1183, "step": 3263 }, { "epoch": 0.08381036748881839, "grad_norm": 0.9140625, "learning_rate": 0.00019787516679953225, "loss": 1.1413, "step": 3264 }, { "epoch": 0.08383604468474021, "grad_norm": 0.83203125, "learning_rate": 0.0001978742513210417, "loss": 0.9059, "step": 3265 }, { "epoch": 0.08386172188066202, "grad_norm": 0.828125, "learning_rate": 0.00019787333564749666, "loss": 1.1896, "step": 3266 }, { "epoch": 0.08388739907658384, "grad_norm": 1.0234375, "learning_rate": 0.0001978724197788989, "loss": 1.1307, "step": 3267 }, { "epoch": 0.08391307627250566, "grad_norm": 1.0, "learning_rate": 0.00019787150371525022, "loss": 1.2027, "step": 3268 }, { "epoch": 0.08393875346842748, "grad_norm": 0.921875, "learning_rate": 0.00019787058745655248, "loss": 1.2061, "step": 3269 }, { "epoch": 0.0839644306643493, "grad_norm": 0.95703125, "learning_rate": 0.0001978696710028075, "loss": 1.1829, "step": 3270 }, { "epoch": 0.08399010786027111, "grad_norm": 0.9375, "learning_rate": 0.0001978687543540171, "loss": 1.425, "step": 3271 }, { "epoch": 0.08401578505619293, "grad_norm": 0.87109375, "learning_rate": 0.00019786783751018315, "loss": 1.1366, "step": 3272 }, { "epoch": 0.08404146225211476, "grad_norm": 0.8984375, "learning_rate": 0.0001978669204713074, "loss": 1.1734, "step": 3273 }, { "epoch": 0.08406713944803658, "grad_norm": 1.046875, "learning_rate": 0.00019786600323739174, "loss": 1.2034, "step": 3274 }, { "epoch": 0.0840928166439584, "grad_norm": 0.87109375, "learning_rate": 0.00019786508580843797, "loss": 1.1262, "step": 3275 }, { "epoch": 0.08411849383988021, "grad_norm": 0.9296875, "learning_rate": 0.00019786416818444794, "loss": 1.1436, "step": 3276 }, { "epoch": 0.08414417103580203, "grad_norm": 0.9375, "learning_rate": 0.0001978632503654234, "loss": 1.1173, "step": 3277 }, { "epoch": 0.08416984823172385, "grad_norm": 0.9453125, "learning_rate": 0.0001978623323513663, "loss": 1.3724, "step": 3278 }, { "epoch": 0.08419552542764568, "grad_norm": 0.859375, "learning_rate": 0.0001978614141422784, "loss": 1.3188, "step": 3279 }, { "epoch": 0.0842212026235675, "grad_norm": 0.87109375, "learning_rate": 0.00019786049573816157, "loss": 1.2234, "step": 3280 }, { "epoch": 0.0842468798194893, "grad_norm": 0.8203125, "learning_rate": 0.00019785957713901758, "loss": 1.1338, "step": 3281 }, { "epoch": 0.08427255701541113, "grad_norm": 0.9140625, "learning_rate": 0.00019785865834484828, "loss": 1.2793, "step": 3282 }, { "epoch": 0.08429823421133295, "grad_norm": 0.859375, "learning_rate": 0.00019785773935565552, "loss": 1.3279, "step": 3283 }, { "epoch": 0.08432391140725477, "grad_norm": 0.921875, "learning_rate": 0.00019785682017144115, "loss": 1.0464, "step": 3284 }, { "epoch": 0.0843495886031766, "grad_norm": 0.98828125, "learning_rate": 0.00019785590079220695, "loss": 1.0603, "step": 3285 }, { "epoch": 0.0843752657990984, "grad_norm": 0.88671875, "learning_rate": 0.00019785498121795478, "loss": 1.2792, "step": 3286 }, { "epoch": 0.08440094299502023, "grad_norm": 0.8984375, "learning_rate": 0.0001978540614486865, "loss": 1.1293, "step": 3287 }, { "epoch": 0.08442662019094205, "grad_norm": 0.890625, "learning_rate": 0.00019785314148440387, "loss": 1.144, "step": 3288 }, { "epoch": 0.08445229738686387, "grad_norm": 1.3125, "learning_rate": 0.00019785222132510878, "loss": 1.1997, "step": 3289 }, { "epoch": 0.08447797458278569, "grad_norm": 0.921875, "learning_rate": 0.00019785130097080308, "loss": 1.4469, "step": 3290 }, { "epoch": 0.0845036517787075, "grad_norm": 0.9296875, "learning_rate": 0.00019785038042148857, "loss": 1.1522, "step": 3291 }, { "epoch": 0.08452932897462932, "grad_norm": 0.94921875, "learning_rate": 0.0001978494596771671, "loss": 1.3213, "step": 3292 }, { "epoch": 0.08455500617055114, "grad_norm": 0.89453125, "learning_rate": 0.00019784853873784045, "loss": 1.2236, "step": 3293 }, { "epoch": 0.08458068336647297, "grad_norm": 0.91015625, "learning_rate": 0.00019784761760351053, "loss": 1.0752, "step": 3294 }, { "epoch": 0.08460636056239479, "grad_norm": 0.84375, "learning_rate": 0.00019784669627417913, "loss": 1.1213, "step": 3295 }, { "epoch": 0.0846320377583166, "grad_norm": 1.09375, "learning_rate": 0.00019784577474984814, "loss": 1.1066, "step": 3296 }, { "epoch": 0.08465771495423842, "grad_norm": 0.88671875, "learning_rate": 0.00019784485303051935, "loss": 1.3931, "step": 3297 }, { "epoch": 0.08468339215016024, "grad_norm": 0.86328125, "learning_rate": 0.00019784393111619457, "loss": 1.2324, "step": 3298 }, { "epoch": 0.08470906934608206, "grad_norm": 0.83203125, "learning_rate": 0.00019784300900687569, "loss": 1.0495, "step": 3299 }, { "epoch": 0.08473474654200389, "grad_norm": 0.95703125, "learning_rate": 0.00019784208670256456, "loss": 1.185, "step": 3300 }, { "epoch": 0.0847604237379257, "grad_norm": 0.875, "learning_rate": 0.00019784116420326294, "loss": 1.209, "step": 3301 }, { "epoch": 0.08478610093384752, "grad_norm": 0.921875, "learning_rate": 0.00019784024150897275, "loss": 1.1104, "step": 3302 }, { "epoch": 0.08481177812976934, "grad_norm": 0.86328125, "learning_rate": 0.00019783931861969581, "loss": 1.2195, "step": 3303 }, { "epoch": 0.08483745532569116, "grad_norm": 0.83203125, "learning_rate": 0.00019783839553543393, "loss": 1.1154, "step": 3304 }, { "epoch": 0.08486313252161298, "grad_norm": 0.89453125, "learning_rate": 0.00019783747225618896, "loss": 1.1747, "step": 3305 }, { "epoch": 0.08488880971753479, "grad_norm": 0.875, "learning_rate": 0.00019783654878196275, "loss": 1.2178, "step": 3306 }, { "epoch": 0.08491448691345661, "grad_norm": 0.9296875, "learning_rate": 0.0001978356251127571, "loss": 1.2756, "step": 3307 }, { "epoch": 0.08494016410937844, "grad_norm": 0.890625, "learning_rate": 0.00019783470124857394, "loss": 1.0409, "step": 3308 }, { "epoch": 0.08496584130530026, "grad_norm": 0.890625, "learning_rate": 0.00019783377718941503, "loss": 1.2614, "step": 3309 }, { "epoch": 0.08499151850122208, "grad_norm": 0.9609375, "learning_rate": 0.00019783285293528224, "loss": 1.3144, "step": 3310 }, { "epoch": 0.08501719569714389, "grad_norm": 0.8984375, "learning_rate": 0.0001978319284861774, "loss": 1.2056, "step": 3311 }, { "epoch": 0.08504287289306571, "grad_norm": 0.90625, "learning_rate": 0.00019783100384210243, "loss": 1.3258, "step": 3312 }, { "epoch": 0.08506855008898753, "grad_norm": 0.8671875, "learning_rate": 0.00019783007900305903, "loss": 1.215, "step": 3313 }, { "epoch": 0.08509422728490935, "grad_norm": 0.86328125, "learning_rate": 0.00019782915396904916, "loss": 1.1779, "step": 3314 }, { "epoch": 0.08511990448083118, "grad_norm": 0.92578125, "learning_rate": 0.00019782822874007461, "loss": 1.099, "step": 3315 }, { "epoch": 0.08514558167675298, "grad_norm": 0.8828125, "learning_rate": 0.00019782730331613724, "loss": 1.1973, "step": 3316 }, { "epoch": 0.08517125887267481, "grad_norm": 0.88671875, "learning_rate": 0.0001978263776972389, "loss": 1.1025, "step": 3317 }, { "epoch": 0.08519693606859663, "grad_norm": 0.828125, "learning_rate": 0.0001978254518833814, "loss": 1.24, "step": 3318 }, { "epoch": 0.08522261326451845, "grad_norm": 0.87890625, "learning_rate": 0.0001978245258745666, "loss": 1.2816, "step": 3319 }, { "epoch": 0.08524829046044027, "grad_norm": 0.953125, "learning_rate": 0.0001978235996707964, "loss": 1.1593, "step": 3320 }, { "epoch": 0.08527396765636208, "grad_norm": 0.828125, "learning_rate": 0.00019782267327207256, "loss": 1.2219, "step": 3321 }, { "epoch": 0.0852996448522839, "grad_norm": 0.96484375, "learning_rate": 0.00019782174667839703, "loss": 1.1216, "step": 3322 }, { "epoch": 0.08532532204820573, "grad_norm": 0.95703125, "learning_rate": 0.00019782081988977155, "loss": 1.4018, "step": 3323 }, { "epoch": 0.08535099924412755, "grad_norm": 0.84375, "learning_rate": 0.00019781989290619802, "loss": 1.2271, "step": 3324 }, { "epoch": 0.08537667644004937, "grad_norm": 1.1328125, "learning_rate": 0.0001978189657276783, "loss": 1.2062, "step": 3325 }, { "epoch": 0.08540235363597118, "grad_norm": 0.9375, "learning_rate": 0.00019781803835421417, "loss": 1.4166, "step": 3326 }, { "epoch": 0.085428030831893, "grad_norm": 0.8671875, "learning_rate": 0.00019781711078580756, "loss": 1.1514, "step": 3327 }, { "epoch": 0.08545370802781482, "grad_norm": 0.90234375, "learning_rate": 0.00019781618302246027, "loss": 1.2469, "step": 3328 }, { "epoch": 0.08547938522373665, "grad_norm": 0.96875, "learning_rate": 0.00019781525506417418, "loss": 1.3499, "step": 3329 }, { "epoch": 0.08550506241965847, "grad_norm": 0.91015625, "learning_rate": 0.0001978143269109511, "loss": 1.3263, "step": 3330 }, { "epoch": 0.08553073961558028, "grad_norm": 0.828125, "learning_rate": 0.00019781339856279293, "loss": 1.0786, "step": 3331 }, { "epoch": 0.0855564168115021, "grad_norm": 0.9296875, "learning_rate": 0.00019781247001970145, "loss": 1.2106, "step": 3332 }, { "epoch": 0.08558209400742392, "grad_norm": 0.859375, "learning_rate": 0.0001978115412816786, "loss": 1.32, "step": 3333 }, { "epoch": 0.08560777120334574, "grad_norm": 0.94921875, "learning_rate": 0.00019781061234872615, "loss": 1.2388, "step": 3334 }, { "epoch": 0.08563344839926756, "grad_norm": 0.87890625, "learning_rate": 0.000197809683220846, "loss": 1.1623, "step": 3335 }, { "epoch": 0.08565912559518937, "grad_norm": 0.890625, "learning_rate": 0.00019780875389804, "loss": 1.2454, "step": 3336 }, { "epoch": 0.0856848027911112, "grad_norm": 0.8515625, "learning_rate": 0.00019780782438030996, "loss": 1.2003, "step": 3337 }, { "epoch": 0.08571047998703302, "grad_norm": 0.83203125, "learning_rate": 0.00019780689466765777, "loss": 1.2733, "step": 3338 }, { "epoch": 0.08573615718295484, "grad_norm": 0.88671875, "learning_rate": 0.00019780596476008525, "loss": 1.4233, "step": 3339 }, { "epoch": 0.08576183437887666, "grad_norm": 0.78125, "learning_rate": 0.00019780503465759432, "loss": 1.2411, "step": 3340 }, { "epoch": 0.08578751157479847, "grad_norm": 0.84765625, "learning_rate": 0.00019780410436018677, "loss": 1.2151, "step": 3341 }, { "epoch": 0.08581318877072029, "grad_norm": 0.8671875, "learning_rate": 0.00019780317386786447, "loss": 1.1143, "step": 3342 }, { "epoch": 0.08583886596664211, "grad_norm": 0.9375, "learning_rate": 0.0001978022431806293, "loss": 1.1736, "step": 3343 }, { "epoch": 0.08586454316256394, "grad_norm": 0.91015625, "learning_rate": 0.00019780131229848305, "loss": 1.1963, "step": 3344 }, { "epoch": 0.08589022035848576, "grad_norm": 0.9140625, "learning_rate": 0.00019780038122142767, "loss": 1.188, "step": 3345 }, { "epoch": 0.08591589755440757, "grad_norm": 0.91015625, "learning_rate": 0.00019779944994946494, "loss": 1.3588, "step": 3346 }, { "epoch": 0.08594157475032939, "grad_norm": 0.8984375, "learning_rate": 0.00019779851848259676, "loss": 1.1109, "step": 3347 }, { "epoch": 0.08596725194625121, "grad_norm": 0.83203125, "learning_rate": 0.00019779758682082497, "loss": 1.2063, "step": 3348 }, { "epoch": 0.08599292914217303, "grad_norm": 0.97265625, "learning_rate": 0.00019779665496415138, "loss": 1.2861, "step": 3349 }, { "epoch": 0.08601860633809486, "grad_norm": 0.84375, "learning_rate": 0.00019779572291257793, "loss": 1.235, "step": 3350 }, { "epoch": 0.08604428353401666, "grad_norm": 0.85546875, "learning_rate": 0.0001977947906661064, "loss": 1.1216, "step": 3351 }, { "epoch": 0.08606996072993849, "grad_norm": 0.82421875, "learning_rate": 0.00019779385822473873, "loss": 1.2477, "step": 3352 }, { "epoch": 0.08609563792586031, "grad_norm": 0.87890625, "learning_rate": 0.0001977929255884767, "loss": 1.2243, "step": 3353 }, { "epoch": 0.08612131512178213, "grad_norm": 0.87109375, "learning_rate": 0.00019779199275732224, "loss": 1.2065, "step": 3354 }, { "epoch": 0.08614699231770395, "grad_norm": 0.88671875, "learning_rate": 0.00019779105973127714, "loss": 1.1895, "step": 3355 }, { "epoch": 0.08617266951362576, "grad_norm": 0.8671875, "learning_rate": 0.0001977901265103433, "loss": 1.2017, "step": 3356 }, { "epoch": 0.08619834670954758, "grad_norm": 0.8125, "learning_rate": 0.00019778919309452255, "loss": 1.0589, "step": 3357 }, { "epoch": 0.0862240239054694, "grad_norm": 0.87890625, "learning_rate": 0.0001977882594838168, "loss": 1.1282, "step": 3358 }, { "epoch": 0.08624970110139123, "grad_norm": 0.875, "learning_rate": 0.0001977873256782279, "loss": 1.3407, "step": 3359 }, { "epoch": 0.08627537829731304, "grad_norm": 0.8046875, "learning_rate": 0.00019778639167775764, "loss": 1.1199, "step": 3360 }, { "epoch": 0.08630105549323486, "grad_norm": 0.8984375, "learning_rate": 0.00019778545748240799, "loss": 1.1068, "step": 3361 }, { "epoch": 0.08632673268915668, "grad_norm": 0.84375, "learning_rate": 0.00019778452309218068, "loss": 1.1324, "step": 3362 }, { "epoch": 0.0863524098850785, "grad_norm": 0.8828125, "learning_rate": 0.00019778358850707772, "loss": 1.3116, "step": 3363 }, { "epoch": 0.08637808708100032, "grad_norm": 0.87890625, "learning_rate": 0.0001977826537271009, "loss": 1.1414, "step": 3364 }, { "epoch": 0.08640376427692213, "grad_norm": 0.796875, "learning_rate": 0.00019778171875225203, "loss": 1.0908, "step": 3365 }, { "epoch": 0.08642944147284395, "grad_norm": 0.92578125, "learning_rate": 0.00019778078358253308, "loss": 1.1971, "step": 3366 }, { "epoch": 0.08645511866876578, "grad_norm": 0.9765625, "learning_rate": 0.0001977798482179458, "loss": 1.2653, "step": 3367 }, { "epoch": 0.0864807958646876, "grad_norm": 0.8984375, "learning_rate": 0.00019777891265849217, "loss": 1.1879, "step": 3368 }, { "epoch": 0.08650647306060942, "grad_norm": 0.921875, "learning_rate": 0.000197777976904174, "loss": 1.1848, "step": 3369 }, { "epoch": 0.08653215025653123, "grad_norm": 0.859375, "learning_rate": 0.00019777704095499314, "loss": 1.2273, "step": 3370 }, { "epoch": 0.08655782745245305, "grad_norm": 0.8984375, "learning_rate": 0.0001977761048109515, "loss": 1.2121, "step": 3371 }, { "epoch": 0.08658350464837487, "grad_norm": 0.90234375, "learning_rate": 0.00019777516847205084, "loss": 1.1381, "step": 3372 }, { "epoch": 0.0866091818442967, "grad_norm": 0.8359375, "learning_rate": 0.00019777423193829315, "loss": 1.1687, "step": 3373 }, { "epoch": 0.08663485904021852, "grad_norm": 0.8671875, "learning_rate": 0.00019777329520968023, "loss": 1.1278, "step": 3374 }, { "epoch": 0.08666053623614033, "grad_norm": 0.91015625, "learning_rate": 0.000197772358286214, "loss": 1.1706, "step": 3375 }, { "epoch": 0.08668621343206215, "grad_norm": 0.90625, "learning_rate": 0.00019777142116789625, "loss": 1.2967, "step": 3376 }, { "epoch": 0.08671189062798397, "grad_norm": 0.921875, "learning_rate": 0.0001977704838547289, "loss": 1.068, "step": 3377 }, { "epoch": 0.08673756782390579, "grad_norm": 0.875, "learning_rate": 0.00019776954634671384, "loss": 1.2258, "step": 3378 }, { "epoch": 0.08676324501982761, "grad_norm": 1.0, "learning_rate": 0.00019776860864385287, "loss": 1.2474, "step": 3379 }, { "epoch": 0.08678892221574942, "grad_norm": 0.86328125, "learning_rate": 0.0001977676707461479, "loss": 1.0664, "step": 3380 }, { "epoch": 0.08681459941167124, "grad_norm": 0.84765625, "learning_rate": 0.00019776673265360078, "loss": 1.1986, "step": 3381 }, { "epoch": 0.08684027660759307, "grad_norm": 0.84375, "learning_rate": 0.00019776579436621343, "loss": 1.1121, "step": 3382 }, { "epoch": 0.08686595380351489, "grad_norm": 0.83203125, "learning_rate": 0.00019776485588398766, "loss": 1.081, "step": 3383 }, { "epoch": 0.08689163099943671, "grad_norm": 0.8125, "learning_rate": 0.00019776391720692534, "loss": 1.1419, "step": 3384 }, { "epoch": 0.08691730819535852, "grad_norm": 0.90625, "learning_rate": 0.00019776297833502842, "loss": 1.2476, "step": 3385 }, { "epoch": 0.08694298539128034, "grad_norm": 0.90234375, "learning_rate": 0.0001977620392682987, "loss": 1.2424, "step": 3386 }, { "epoch": 0.08696866258720216, "grad_norm": 0.7890625, "learning_rate": 0.00019776110000673805, "loss": 1.1877, "step": 3387 }, { "epoch": 0.08699433978312399, "grad_norm": 0.90234375, "learning_rate": 0.00019776016055034833, "loss": 1.2428, "step": 3388 }, { "epoch": 0.08702001697904581, "grad_norm": 0.890625, "learning_rate": 0.0001977592208991315, "loss": 1.103, "step": 3389 }, { "epoch": 0.08704569417496762, "grad_norm": 0.828125, "learning_rate": 0.00019775828105308933, "loss": 1.1544, "step": 3390 }, { "epoch": 0.08707137137088944, "grad_norm": 0.97265625, "learning_rate": 0.00019775734101222376, "loss": 1.3028, "step": 3391 }, { "epoch": 0.08709704856681126, "grad_norm": 0.88671875, "learning_rate": 0.00019775640077653663, "loss": 1.4075, "step": 3392 }, { "epoch": 0.08712272576273308, "grad_norm": 0.93359375, "learning_rate": 0.00019775546034602983, "loss": 1.1422, "step": 3393 }, { "epoch": 0.0871484029586549, "grad_norm": 0.8046875, "learning_rate": 0.0001977545197207052, "loss": 1.0718, "step": 3394 }, { "epoch": 0.08717408015457671, "grad_norm": 0.83203125, "learning_rate": 0.00019775357890056467, "loss": 1.2613, "step": 3395 }, { "epoch": 0.08719975735049854, "grad_norm": 0.82421875, "learning_rate": 0.0001977526378856101, "loss": 1.099, "step": 3396 }, { "epoch": 0.08722543454642036, "grad_norm": 0.92578125, "learning_rate": 0.00019775169667584331, "loss": 1.2679, "step": 3397 }, { "epoch": 0.08725111174234218, "grad_norm": 0.87890625, "learning_rate": 0.00019775075527126625, "loss": 1.2637, "step": 3398 }, { "epoch": 0.087276788938264, "grad_norm": 0.84375, "learning_rate": 0.00019774981367188076, "loss": 1.3208, "step": 3399 }, { "epoch": 0.08730246613418581, "grad_norm": 0.8203125, "learning_rate": 0.0001977488718776887, "loss": 1.073, "step": 3400 }, { "epoch": 0.08732814333010763, "grad_norm": 0.91796875, "learning_rate": 0.000197747929888692, "loss": 1.294, "step": 3401 }, { "epoch": 0.08735382052602945, "grad_norm": 0.94921875, "learning_rate": 0.0001977469877048925, "loss": 1.2477, "step": 3402 }, { "epoch": 0.08737949772195128, "grad_norm": 0.87109375, "learning_rate": 0.0001977460453262921, "loss": 1.109, "step": 3403 }, { "epoch": 0.0874051749178731, "grad_norm": 0.89453125, "learning_rate": 0.00019774510275289263, "loss": 1.1977, "step": 3404 }, { "epoch": 0.08743085211379491, "grad_norm": 0.80859375, "learning_rate": 0.000197744159984696, "loss": 1.2558, "step": 3405 }, { "epoch": 0.08745652930971673, "grad_norm": 0.8828125, "learning_rate": 0.00019774321702170408, "loss": 1.3438, "step": 3406 }, { "epoch": 0.08748220650563855, "grad_norm": 0.8359375, "learning_rate": 0.00019774227386391876, "loss": 0.9025, "step": 3407 }, { "epoch": 0.08750788370156037, "grad_norm": 0.86328125, "learning_rate": 0.00019774133051134193, "loss": 1.1197, "step": 3408 }, { "epoch": 0.0875335608974822, "grad_norm": 0.9375, "learning_rate": 0.00019774038696397547, "loss": 1.3373, "step": 3409 }, { "epoch": 0.087559238093404, "grad_norm": 0.88671875, "learning_rate": 0.00019773944322182122, "loss": 1.2295, "step": 3410 }, { "epoch": 0.08758491528932583, "grad_norm": 1.1328125, "learning_rate": 0.0001977384992848811, "loss": 1.236, "step": 3411 }, { "epoch": 0.08761059248524765, "grad_norm": 0.83203125, "learning_rate": 0.000197737555153157, "loss": 1.0346, "step": 3412 }, { "epoch": 0.08763626968116947, "grad_norm": 0.91015625, "learning_rate": 0.00019773661082665077, "loss": 1.377, "step": 3413 }, { "epoch": 0.08766194687709129, "grad_norm": 0.8359375, "learning_rate": 0.00019773566630536425, "loss": 1.1271, "step": 3414 }, { "epoch": 0.0876876240730131, "grad_norm": 0.89453125, "learning_rate": 0.00019773472158929946, "loss": 1.1339, "step": 3415 }, { "epoch": 0.08771330126893492, "grad_norm": 0.77734375, "learning_rate": 0.00019773377667845816, "loss": 1.0705, "step": 3416 }, { "epoch": 0.08773897846485675, "grad_norm": 0.77734375, "learning_rate": 0.00019773283157284227, "loss": 1.0183, "step": 3417 }, { "epoch": 0.08776465566077857, "grad_norm": 0.84765625, "learning_rate": 0.00019773188627245368, "loss": 1.1152, "step": 3418 }, { "epoch": 0.08779033285670039, "grad_norm": 0.90234375, "learning_rate": 0.00019773094077729425, "loss": 1.116, "step": 3419 }, { "epoch": 0.0878160100526222, "grad_norm": 0.875, "learning_rate": 0.0001977299950873659, "loss": 1.2162, "step": 3420 }, { "epoch": 0.08784168724854402, "grad_norm": 0.84765625, "learning_rate": 0.00019772904920267048, "loss": 1.1196, "step": 3421 }, { "epoch": 0.08786736444446584, "grad_norm": 0.88671875, "learning_rate": 0.00019772810312320994, "loss": 1.2709, "step": 3422 }, { "epoch": 0.08789304164038766, "grad_norm": 0.96875, "learning_rate": 0.00019772715684898606, "loss": 1.3344, "step": 3423 }, { "epoch": 0.08791871883630949, "grad_norm": 0.83984375, "learning_rate": 0.0001977262103800008, "loss": 1.1656, "step": 3424 }, { "epoch": 0.0879443960322313, "grad_norm": 0.84375, "learning_rate": 0.00019772526371625605, "loss": 1.075, "step": 3425 }, { "epoch": 0.08797007322815312, "grad_norm": 0.8515625, "learning_rate": 0.00019772431685775366, "loss": 1.1003, "step": 3426 }, { "epoch": 0.08799575042407494, "grad_norm": 0.87890625, "learning_rate": 0.00019772336980449556, "loss": 1.2277, "step": 3427 }, { "epoch": 0.08802142761999676, "grad_norm": 0.859375, "learning_rate": 0.0001977224225564836, "loss": 1.2231, "step": 3428 }, { "epoch": 0.08804710481591858, "grad_norm": 0.859375, "learning_rate": 0.00019772147511371965, "loss": 1.2706, "step": 3429 }, { "epoch": 0.08807278201184039, "grad_norm": 0.94921875, "learning_rate": 0.00019772052747620564, "loss": 1.3283, "step": 3430 }, { "epoch": 0.08809845920776221, "grad_norm": 1.0390625, "learning_rate": 0.00019771957964394345, "loss": 1.1445, "step": 3431 }, { "epoch": 0.08812413640368404, "grad_norm": 0.8984375, "learning_rate": 0.000197718631616935, "loss": 1.3331, "step": 3432 }, { "epoch": 0.08814981359960586, "grad_norm": 0.796875, "learning_rate": 0.0001977176833951821, "loss": 1.2168, "step": 3433 }, { "epoch": 0.08817549079552768, "grad_norm": 0.9140625, "learning_rate": 0.0001977167349786867, "loss": 1.0645, "step": 3434 }, { "epoch": 0.08820116799144949, "grad_norm": 0.88671875, "learning_rate": 0.00019771578636745068, "loss": 1.1214, "step": 3435 }, { "epoch": 0.08822684518737131, "grad_norm": 0.94140625, "learning_rate": 0.00019771483756147592, "loss": 1.1401, "step": 3436 }, { "epoch": 0.08825252238329313, "grad_norm": 0.875, "learning_rate": 0.0001977138885607643, "loss": 1.0407, "step": 3437 }, { "epoch": 0.08827819957921496, "grad_norm": 0.9296875, "learning_rate": 0.00019771293936531774, "loss": 1.278, "step": 3438 }, { "epoch": 0.08830387677513678, "grad_norm": 0.87890625, "learning_rate": 0.0001977119899751381, "loss": 1.2765, "step": 3439 }, { "epoch": 0.08832955397105859, "grad_norm": 0.90234375, "learning_rate": 0.00019771104039022731, "loss": 1.2111, "step": 3440 }, { "epoch": 0.08835523116698041, "grad_norm": 0.75390625, "learning_rate": 0.00019771009061058723, "loss": 1.0282, "step": 3441 }, { "epoch": 0.08838090836290223, "grad_norm": 0.8359375, "learning_rate": 0.00019770914063621978, "loss": 1.3079, "step": 3442 }, { "epoch": 0.08840658555882405, "grad_norm": 0.85546875, "learning_rate": 0.00019770819046712682, "loss": 1.2296, "step": 3443 }, { "epoch": 0.08843226275474587, "grad_norm": 0.89453125, "learning_rate": 0.0001977072401033103, "loss": 1.3098, "step": 3444 }, { "epoch": 0.08845793995066768, "grad_norm": 0.90234375, "learning_rate": 0.00019770628954477205, "loss": 1.4024, "step": 3445 }, { "epoch": 0.0884836171465895, "grad_norm": 0.8984375, "learning_rate": 0.00019770533879151397, "loss": 1.0906, "step": 3446 }, { "epoch": 0.08850929434251133, "grad_norm": 0.90234375, "learning_rate": 0.000197704387843538, "loss": 1.1841, "step": 3447 }, { "epoch": 0.08853497153843315, "grad_norm": 0.92578125, "learning_rate": 0.00019770343670084603, "loss": 1.1031, "step": 3448 }, { "epoch": 0.08856064873435497, "grad_norm": 0.80078125, "learning_rate": 0.00019770248536343988, "loss": 1.2232, "step": 3449 }, { "epoch": 0.08858632593027678, "grad_norm": 0.85546875, "learning_rate": 0.00019770153383132154, "loss": 1.2045, "step": 3450 }, { "epoch": 0.0886120031261986, "grad_norm": 0.859375, "learning_rate": 0.00019770058210449287, "loss": 1.1571, "step": 3451 }, { "epoch": 0.08863768032212042, "grad_norm": 0.921875, "learning_rate": 0.00019769963018295575, "loss": 1.2859, "step": 3452 }, { "epoch": 0.08866335751804225, "grad_norm": 0.890625, "learning_rate": 0.00019769867806671213, "loss": 1.095, "step": 3453 }, { "epoch": 0.08868903471396407, "grad_norm": 0.890625, "learning_rate": 0.00019769772575576385, "loss": 1.2418, "step": 3454 }, { "epoch": 0.08871471190988588, "grad_norm": 0.8828125, "learning_rate": 0.0001976967732501128, "loss": 1.2457, "step": 3455 }, { "epoch": 0.0887403891058077, "grad_norm": 0.9296875, "learning_rate": 0.00019769582054976093, "loss": 1.2137, "step": 3456 }, { "epoch": 0.08876606630172952, "grad_norm": 0.94140625, "learning_rate": 0.00019769486765471012, "loss": 1.204, "step": 3457 }, { "epoch": 0.08879174349765134, "grad_norm": 0.91015625, "learning_rate": 0.00019769391456496223, "loss": 1.1165, "step": 3458 }, { "epoch": 0.08881742069357317, "grad_norm": 0.82421875, "learning_rate": 0.0001976929612805192, "loss": 1.1577, "step": 3459 }, { "epoch": 0.08884309788949497, "grad_norm": 0.84375, "learning_rate": 0.00019769200780138298, "loss": 1.2467, "step": 3460 }, { "epoch": 0.0888687750854168, "grad_norm": 0.7890625, "learning_rate": 0.00019769105412755536, "loss": 1.2336, "step": 3461 }, { "epoch": 0.08889445228133862, "grad_norm": 0.80859375, "learning_rate": 0.00019769010025903828, "loss": 1.1852, "step": 3462 }, { "epoch": 0.08892012947726044, "grad_norm": 0.8203125, "learning_rate": 0.00019768914619583373, "loss": 1.0817, "step": 3463 }, { "epoch": 0.08894580667318225, "grad_norm": 0.87890625, "learning_rate": 0.00019768819193794347, "loss": 1.0664, "step": 3464 }, { "epoch": 0.08897148386910407, "grad_norm": 0.83984375, "learning_rate": 0.00019768723748536947, "loss": 1.1639, "step": 3465 }, { "epoch": 0.08899716106502589, "grad_norm": 0.875, "learning_rate": 0.00019768628283811364, "loss": 1.1187, "step": 3466 }, { "epoch": 0.08902283826094771, "grad_norm": 0.9609375, "learning_rate": 0.0001976853279961779, "loss": 1.2422, "step": 3467 }, { "epoch": 0.08904851545686954, "grad_norm": 0.875, "learning_rate": 0.00019768437295956408, "loss": 1.2136, "step": 3468 }, { "epoch": 0.08907419265279135, "grad_norm": 0.83203125, "learning_rate": 0.00019768341772827416, "loss": 1.0745, "step": 3469 }, { "epoch": 0.08909986984871317, "grad_norm": 0.875, "learning_rate": 0.00019768246230231, "loss": 1.183, "step": 3470 }, { "epoch": 0.08912554704463499, "grad_norm": 0.84765625, "learning_rate": 0.0001976815066816735, "loss": 1.3272, "step": 3471 }, { "epoch": 0.08915122424055681, "grad_norm": 0.859375, "learning_rate": 0.00019768055086636662, "loss": 1.1612, "step": 3472 }, { "epoch": 0.08917690143647863, "grad_norm": 0.8203125, "learning_rate": 0.0001976795948563912, "loss": 1.2123, "step": 3473 }, { "epoch": 0.08920257863240044, "grad_norm": 0.90234375, "learning_rate": 0.00019767863865174915, "loss": 1.3453, "step": 3474 }, { "epoch": 0.08922825582832226, "grad_norm": 0.90234375, "learning_rate": 0.00019767768225244242, "loss": 1.2323, "step": 3475 }, { "epoch": 0.08925393302424409, "grad_norm": 0.875, "learning_rate": 0.00019767672565847294, "loss": 1.2274, "step": 3476 }, { "epoch": 0.08927961022016591, "grad_norm": 0.91796875, "learning_rate": 0.0001976757688698425, "loss": 1.2835, "step": 3477 }, { "epoch": 0.08930528741608773, "grad_norm": 0.859375, "learning_rate": 0.0001976748118865531, "loss": 1.167, "step": 3478 }, { "epoch": 0.08933096461200954, "grad_norm": 0.84765625, "learning_rate": 0.0001976738547086066, "loss": 1.3696, "step": 3479 }, { "epoch": 0.08935664180793136, "grad_norm": 0.88671875, "learning_rate": 0.00019767289733600495, "loss": 1.2541, "step": 3480 }, { "epoch": 0.08938231900385318, "grad_norm": 0.8515625, "learning_rate": 0.00019767193976875005, "loss": 1.1841, "step": 3481 }, { "epoch": 0.089407996199775, "grad_norm": 0.8671875, "learning_rate": 0.00019767098200684378, "loss": 1.3088, "step": 3482 }, { "epoch": 0.08943367339569683, "grad_norm": 0.81640625, "learning_rate": 0.0001976700240502881, "loss": 1.1758, "step": 3483 }, { "epoch": 0.08945935059161864, "grad_norm": 0.91015625, "learning_rate": 0.00019766906589908483, "loss": 1.2384, "step": 3484 }, { "epoch": 0.08948502778754046, "grad_norm": 0.86328125, "learning_rate": 0.00019766810755323598, "loss": 1.2955, "step": 3485 }, { "epoch": 0.08951070498346228, "grad_norm": 0.87890625, "learning_rate": 0.0001976671490127434, "loss": 1.2288, "step": 3486 }, { "epoch": 0.0895363821793841, "grad_norm": 1.09375, "learning_rate": 0.000197666190277609, "loss": 1.0241, "step": 3487 }, { "epoch": 0.08956205937530592, "grad_norm": 0.8671875, "learning_rate": 0.0001976652313478347, "loss": 1.3169, "step": 3488 }, { "epoch": 0.08958773657122773, "grad_norm": 0.87890625, "learning_rate": 0.00019766427222342244, "loss": 1.2537, "step": 3489 }, { "epoch": 0.08961341376714956, "grad_norm": 0.79296875, "learning_rate": 0.00019766331290437406, "loss": 1.2139, "step": 3490 }, { "epoch": 0.08963909096307138, "grad_norm": 0.8671875, "learning_rate": 0.00019766235339069158, "loss": 1.1276, "step": 3491 }, { "epoch": 0.0896647681589932, "grad_norm": 0.81640625, "learning_rate": 0.00019766139368237684, "loss": 1.1526, "step": 3492 }, { "epoch": 0.08969044535491502, "grad_norm": 0.86328125, "learning_rate": 0.00019766043377943176, "loss": 1.1978, "step": 3493 }, { "epoch": 0.08971612255083683, "grad_norm": 0.93359375, "learning_rate": 0.0001976594736818582, "loss": 1.2248, "step": 3494 }, { "epoch": 0.08974179974675865, "grad_norm": 0.81640625, "learning_rate": 0.0001976585133896582, "loss": 1.1323, "step": 3495 }, { "epoch": 0.08976747694268047, "grad_norm": 0.828125, "learning_rate": 0.00019765755290283358, "loss": 1.137, "step": 3496 }, { "epoch": 0.0897931541386023, "grad_norm": 0.85546875, "learning_rate": 0.00019765659222138632, "loss": 1.2348, "step": 3497 }, { "epoch": 0.08981883133452412, "grad_norm": 0.859375, "learning_rate": 0.0001976556313453182, "loss": 1.1223, "step": 3498 }, { "epoch": 0.08984450853044593, "grad_norm": 0.87890625, "learning_rate": 0.0001976546702746313, "loss": 1.252, "step": 3499 }, { "epoch": 0.08987018572636775, "grad_norm": 0.90234375, "learning_rate": 0.00019765370900932746, "loss": 1.2062, "step": 3500 }, { "epoch": 0.08989586292228957, "grad_norm": 0.80078125, "learning_rate": 0.0001976527475494086, "loss": 1.1739, "step": 3501 }, { "epoch": 0.0899215401182114, "grad_norm": 0.9296875, "learning_rate": 0.0001976517858948766, "loss": 1.244, "step": 3502 }, { "epoch": 0.08994721731413322, "grad_norm": 0.9453125, "learning_rate": 0.00019765082404573346, "loss": 1.1425, "step": 3503 }, { "epoch": 0.08997289451005502, "grad_norm": 0.8359375, "learning_rate": 0.00019764986200198102, "loss": 1.1702, "step": 3504 }, { "epoch": 0.08999857170597685, "grad_norm": 0.875, "learning_rate": 0.0001976488997636212, "loss": 1.2471, "step": 3505 }, { "epoch": 0.09002424890189867, "grad_norm": 1.1796875, "learning_rate": 0.00019764793733065598, "loss": 1.2444, "step": 3506 }, { "epoch": 0.09004992609782049, "grad_norm": 0.9140625, "learning_rate": 0.00019764697470308723, "loss": 1.2147, "step": 3507 }, { "epoch": 0.09007560329374231, "grad_norm": 0.90234375, "learning_rate": 0.00019764601188091688, "loss": 1.2127, "step": 3508 }, { "epoch": 0.09010128048966412, "grad_norm": 0.85546875, "learning_rate": 0.00019764504886414684, "loss": 1.1834, "step": 3509 }, { "epoch": 0.09012695768558594, "grad_norm": 0.90625, "learning_rate": 0.00019764408565277903, "loss": 1.2221, "step": 3510 }, { "epoch": 0.09015263488150776, "grad_norm": 0.890625, "learning_rate": 0.0001976431222468154, "loss": 1.1683, "step": 3511 }, { "epoch": 0.09017831207742959, "grad_norm": 0.91015625, "learning_rate": 0.00019764215864625785, "loss": 1.0982, "step": 3512 }, { "epoch": 0.09020398927335141, "grad_norm": 0.890625, "learning_rate": 0.00019764119485110828, "loss": 1.2546, "step": 3513 }, { "epoch": 0.09022966646927322, "grad_norm": 0.87890625, "learning_rate": 0.00019764023086136864, "loss": 1.1547, "step": 3514 }, { "epoch": 0.09025534366519504, "grad_norm": 0.86328125, "learning_rate": 0.00019763926667704085, "loss": 1.2131, "step": 3515 }, { "epoch": 0.09028102086111686, "grad_norm": 0.8828125, "learning_rate": 0.0001976383022981268, "loss": 1.2215, "step": 3516 }, { "epoch": 0.09030669805703868, "grad_norm": 0.83984375, "learning_rate": 0.00019763733772462845, "loss": 1.1932, "step": 3517 }, { "epoch": 0.0903323752529605, "grad_norm": 0.87890625, "learning_rate": 0.0001976363729565477, "loss": 1.3536, "step": 3518 }, { "epoch": 0.09035805244888231, "grad_norm": 0.88671875, "learning_rate": 0.00019763540799388647, "loss": 1.1482, "step": 3519 }, { "epoch": 0.09038372964480414, "grad_norm": 0.89453125, "learning_rate": 0.0001976344428366467, "loss": 1.3066, "step": 3520 }, { "epoch": 0.09040940684072596, "grad_norm": 0.828125, "learning_rate": 0.0001976334774848303, "loss": 1.0399, "step": 3521 }, { "epoch": 0.09043508403664778, "grad_norm": 0.9296875, "learning_rate": 0.0001976325119384392, "loss": 1.193, "step": 3522 }, { "epoch": 0.0904607612325696, "grad_norm": 0.90625, "learning_rate": 0.00019763154619747533, "loss": 1.1619, "step": 3523 }, { "epoch": 0.09048643842849141, "grad_norm": 1.0546875, "learning_rate": 0.0001976305802619406, "loss": 1.178, "step": 3524 }, { "epoch": 0.09051211562441323, "grad_norm": 0.8359375, "learning_rate": 0.00019762961413183694, "loss": 1.0072, "step": 3525 }, { "epoch": 0.09053779282033506, "grad_norm": 0.87890625, "learning_rate": 0.00019762864780716627, "loss": 1.1181, "step": 3526 }, { "epoch": 0.09056347001625688, "grad_norm": 0.86328125, "learning_rate": 0.00019762768128793055, "loss": 1.2408, "step": 3527 }, { "epoch": 0.0905891472121787, "grad_norm": 0.87890625, "learning_rate": 0.00019762671457413165, "loss": 1.2244, "step": 3528 }, { "epoch": 0.09061482440810051, "grad_norm": 0.9609375, "learning_rate": 0.00019762574766577154, "loss": 1.2748, "step": 3529 }, { "epoch": 0.09064050160402233, "grad_norm": 0.9296875, "learning_rate": 0.00019762478056285215, "loss": 1.2724, "step": 3530 }, { "epoch": 0.09066617879994415, "grad_norm": 0.87890625, "learning_rate": 0.0001976238132653754, "loss": 1.095, "step": 3531 }, { "epoch": 0.09069185599586597, "grad_norm": 1.015625, "learning_rate": 0.00019762284577334315, "loss": 1.1536, "step": 3532 }, { "epoch": 0.0907175331917878, "grad_norm": 7.0625, "learning_rate": 0.0001976218780867574, "loss": 1.2239, "step": 3533 }, { "epoch": 0.0907432103877096, "grad_norm": 0.93359375, "learning_rate": 0.0001976209102056201, "loss": 1.2654, "step": 3534 }, { "epoch": 0.09076888758363143, "grad_norm": 0.84375, "learning_rate": 0.00019761994212993313, "loss": 1.0803, "step": 3535 }, { "epoch": 0.09079456477955325, "grad_norm": 0.8359375, "learning_rate": 0.00019761897385969844, "loss": 1.2197, "step": 3536 }, { "epoch": 0.09082024197547507, "grad_norm": 0.83984375, "learning_rate": 0.00019761800539491796, "loss": 1.051, "step": 3537 }, { "epoch": 0.0908459191713969, "grad_norm": 1.3203125, "learning_rate": 0.00019761703673559357, "loss": 1.0659, "step": 3538 }, { "epoch": 0.0908715963673187, "grad_norm": 0.91015625, "learning_rate": 0.0001976160678817273, "loss": 1.1237, "step": 3539 }, { "epoch": 0.09089727356324052, "grad_norm": 0.91796875, "learning_rate": 0.00019761509883332097, "loss": 1.342, "step": 3540 }, { "epoch": 0.09092295075916235, "grad_norm": 0.95703125, "learning_rate": 0.0001976141295903766, "loss": 1.2189, "step": 3541 }, { "epoch": 0.09094862795508417, "grad_norm": 0.99609375, "learning_rate": 0.0001976131601528961, "loss": 1.244, "step": 3542 }, { "epoch": 0.09097430515100599, "grad_norm": 0.98046875, "learning_rate": 0.00019761219052088133, "loss": 1.2537, "step": 3543 }, { "epoch": 0.0909999823469278, "grad_norm": 0.97265625, "learning_rate": 0.00019761122069433435, "loss": 1.2231, "step": 3544 }, { "epoch": 0.09102565954284962, "grad_norm": 0.91796875, "learning_rate": 0.00019761025067325697, "loss": 1.2404, "step": 3545 }, { "epoch": 0.09105133673877144, "grad_norm": 0.8203125, "learning_rate": 0.0001976092804576512, "loss": 1.3409, "step": 3546 }, { "epoch": 0.09107701393469327, "grad_norm": 0.94921875, "learning_rate": 0.00019760831004751896, "loss": 1.2185, "step": 3547 }, { "epoch": 0.09110269113061509, "grad_norm": 0.90625, "learning_rate": 0.00019760733944286217, "loss": 1.3951, "step": 3548 }, { "epoch": 0.0911283683265369, "grad_norm": 0.9296875, "learning_rate": 0.0001976063686436828, "loss": 1.2014, "step": 3549 }, { "epoch": 0.09115404552245872, "grad_norm": 0.8828125, "learning_rate": 0.0001976053976499827, "loss": 1.2398, "step": 3550 }, { "epoch": 0.09117972271838054, "grad_norm": 0.921875, "learning_rate": 0.00019760442646176387, "loss": 1.2559, "step": 3551 }, { "epoch": 0.09120539991430236, "grad_norm": 0.84375, "learning_rate": 0.00019760345507902828, "loss": 1.2502, "step": 3552 }, { "epoch": 0.09123107711022418, "grad_norm": 0.90234375, "learning_rate": 0.00019760248350177774, "loss": 1.2942, "step": 3553 }, { "epoch": 0.09125675430614599, "grad_norm": 0.88671875, "learning_rate": 0.00019760151173001434, "loss": 1.2311, "step": 3554 }, { "epoch": 0.09128243150206782, "grad_norm": 0.84765625, "learning_rate": 0.0001976005397637399, "loss": 1.1235, "step": 3555 }, { "epoch": 0.09130810869798964, "grad_norm": 0.92578125, "learning_rate": 0.0001975995676029564, "loss": 1.2001, "step": 3556 }, { "epoch": 0.09133378589391146, "grad_norm": 0.85546875, "learning_rate": 0.00019759859524766583, "loss": 1.2287, "step": 3557 }, { "epoch": 0.09135946308983328, "grad_norm": 0.84375, "learning_rate": 0.00019759762269787, "loss": 1.268, "step": 3558 }, { "epoch": 0.09138514028575509, "grad_norm": 0.89453125, "learning_rate": 0.000197596649953571, "loss": 1.2827, "step": 3559 }, { "epoch": 0.09141081748167691, "grad_norm": 0.94140625, "learning_rate": 0.00019759567701477064, "loss": 1.3658, "step": 3560 }, { "epoch": 0.09143649467759873, "grad_norm": 1.125, "learning_rate": 0.00019759470388147095, "loss": 1.241, "step": 3561 }, { "epoch": 0.09146217187352056, "grad_norm": 0.8671875, "learning_rate": 0.0001975937305536738, "loss": 1.0729, "step": 3562 }, { "epoch": 0.09148784906944236, "grad_norm": 0.7890625, "learning_rate": 0.00019759275703138115, "loss": 1.1098, "step": 3563 }, { "epoch": 0.09151352626536419, "grad_norm": 0.875, "learning_rate": 0.000197591783314595, "loss": 1.2655, "step": 3564 }, { "epoch": 0.09153920346128601, "grad_norm": 0.8046875, "learning_rate": 0.0001975908094033172, "loss": 1.2426, "step": 3565 }, { "epoch": 0.09156488065720783, "grad_norm": 0.8203125, "learning_rate": 0.00019758983529754974, "loss": 1.0285, "step": 3566 }, { "epoch": 0.09159055785312965, "grad_norm": 0.87890625, "learning_rate": 0.00019758886099729454, "loss": 1.1256, "step": 3567 }, { "epoch": 0.09161623504905146, "grad_norm": 0.96875, "learning_rate": 0.00019758788650255357, "loss": 1.0601, "step": 3568 }, { "epoch": 0.09164191224497328, "grad_norm": 2.15625, "learning_rate": 0.00019758691181332875, "loss": 1.244, "step": 3569 }, { "epoch": 0.0916675894408951, "grad_norm": 0.9921875, "learning_rate": 0.00019758593692962206, "loss": 1.2478, "step": 3570 }, { "epoch": 0.09169326663681693, "grad_norm": 0.8203125, "learning_rate": 0.00019758496185143538, "loss": 1.1113, "step": 3571 }, { "epoch": 0.09171894383273875, "grad_norm": 0.87109375, "learning_rate": 0.0001975839865787707, "loss": 1.1232, "step": 3572 }, { "epoch": 0.09174462102866056, "grad_norm": 0.96484375, "learning_rate": 0.00019758301111162996, "loss": 1.2238, "step": 3573 }, { "epoch": 0.09177029822458238, "grad_norm": 0.88671875, "learning_rate": 0.00019758203545001507, "loss": 1.2019, "step": 3574 }, { "epoch": 0.0917959754205042, "grad_norm": 0.9296875, "learning_rate": 0.000197581059593928, "loss": 1.3095, "step": 3575 }, { "epoch": 0.09182165261642602, "grad_norm": 0.828125, "learning_rate": 0.00019758008354337072, "loss": 1.0971, "step": 3576 }, { "epoch": 0.09184732981234785, "grad_norm": 0.859375, "learning_rate": 0.00019757910729834512, "loss": 1.2007, "step": 3577 }, { "epoch": 0.09187300700826966, "grad_norm": 0.8515625, "learning_rate": 0.00019757813085885317, "loss": 1.3019, "step": 3578 }, { "epoch": 0.09189868420419148, "grad_norm": 1.2421875, "learning_rate": 0.00019757715422489685, "loss": 1.1733, "step": 3579 }, { "epoch": 0.0919243614001133, "grad_norm": 0.90234375, "learning_rate": 0.00019757617739647808, "loss": 1.1918, "step": 3580 }, { "epoch": 0.09195003859603512, "grad_norm": 0.8984375, "learning_rate": 0.00019757520037359875, "loss": 1.3057, "step": 3581 }, { "epoch": 0.09197571579195694, "grad_norm": 0.859375, "learning_rate": 0.00019757422315626092, "loss": 1.2066, "step": 3582 }, { "epoch": 0.09200139298787875, "grad_norm": 0.8671875, "learning_rate": 0.00019757324574446643, "loss": 1.2865, "step": 3583 }, { "epoch": 0.09202707018380057, "grad_norm": 0.83984375, "learning_rate": 0.00019757226813821728, "loss": 1.2038, "step": 3584 }, { "epoch": 0.0920527473797224, "grad_norm": 0.93359375, "learning_rate": 0.00019757129033751543, "loss": 1.1713, "step": 3585 }, { "epoch": 0.09207842457564422, "grad_norm": 0.8359375, "learning_rate": 0.0001975703123423628, "loss": 1.2662, "step": 3586 }, { "epoch": 0.09210410177156604, "grad_norm": 0.9609375, "learning_rate": 0.00019756933415276135, "loss": 1.0773, "step": 3587 }, { "epoch": 0.09212977896748785, "grad_norm": 0.96484375, "learning_rate": 0.00019756835576871304, "loss": 1.309, "step": 3588 }, { "epoch": 0.09215545616340967, "grad_norm": 0.84375, "learning_rate": 0.00019756737719021981, "loss": 1.1691, "step": 3589 }, { "epoch": 0.0921811333593315, "grad_norm": 0.85546875, "learning_rate": 0.0001975663984172836, "loss": 1.1459, "step": 3590 }, { "epoch": 0.09220681055525332, "grad_norm": 0.90625, "learning_rate": 0.00019756541944990642, "loss": 1.1238, "step": 3591 }, { "epoch": 0.09223248775117514, "grad_norm": 0.87109375, "learning_rate": 0.00019756444028809012, "loss": 1.2424, "step": 3592 }, { "epoch": 0.09225816494709695, "grad_norm": 0.86328125, "learning_rate": 0.00019756346093183672, "loss": 1.1603, "step": 3593 }, { "epoch": 0.09228384214301877, "grad_norm": 0.84375, "learning_rate": 0.00019756248138114814, "loss": 0.9949, "step": 3594 }, { "epoch": 0.09230951933894059, "grad_norm": 0.8359375, "learning_rate": 0.00019756150163602635, "loss": 1.1268, "step": 3595 }, { "epoch": 0.09233519653486241, "grad_norm": 1.078125, "learning_rate": 0.00019756052169647332, "loss": 1.2449, "step": 3596 }, { "epoch": 0.09236087373078423, "grad_norm": 0.9296875, "learning_rate": 0.00019755954156249093, "loss": 1.1496, "step": 3597 }, { "epoch": 0.09238655092670604, "grad_norm": 0.77734375, "learning_rate": 0.00019755856123408126, "loss": 1.2095, "step": 3598 }, { "epoch": 0.09241222812262787, "grad_norm": 1.09375, "learning_rate": 0.00019755758071124616, "loss": 1.169, "step": 3599 }, { "epoch": 0.09243790531854969, "grad_norm": 0.9375, "learning_rate": 0.00019755659999398762, "loss": 1.4576, "step": 3600 }, { "epoch": 0.09246358251447151, "grad_norm": 0.921875, "learning_rate": 0.00019755561908230758, "loss": 1.1245, "step": 3601 }, { "epoch": 0.09248925971039333, "grad_norm": 0.83984375, "learning_rate": 0.00019755463797620798, "loss": 1.2389, "step": 3602 }, { "epoch": 0.09251493690631514, "grad_norm": 0.90625, "learning_rate": 0.00019755365667569085, "loss": 1.2031, "step": 3603 }, { "epoch": 0.09254061410223696, "grad_norm": 0.94921875, "learning_rate": 0.00019755267518075807, "loss": 1.1247, "step": 3604 }, { "epoch": 0.09256629129815878, "grad_norm": 0.84375, "learning_rate": 0.0001975516934914116, "loss": 1.1493, "step": 3605 }, { "epoch": 0.0925919684940806, "grad_norm": 1.1640625, "learning_rate": 0.00019755071160765342, "loss": 1.1034, "step": 3606 }, { "epoch": 0.09261764569000243, "grad_norm": 0.80078125, "learning_rate": 0.00019754972952948553, "loss": 1.19, "step": 3607 }, { "epoch": 0.09264332288592424, "grad_norm": 0.93359375, "learning_rate": 0.0001975487472569098, "loss": 1.1307, "step": 3608 }, { "epoch": 0.09266900008184606, "grad_norm": 0.9140625, "learning_rate": 0.0001975477647899282, "loss": 1.1305, "step": 3609 }, { "epoch": 0.09269467727776788, "grad_norm": 0.8125, "learning_rate": 0.00019754678212854275, "loss": 1.283, "step": 3610 }, { "epoch": 0.0927203544736897, "grad_norm": 0.953125, "learning_rate": 0.0001975457992727554, "loss": 1.1449, "step": 3611 }, { "epoch": 0.09274603166961153, "grad_norm": 0.89453125, "learning_rate": 0.00019754481622256807, "loss": 1.1921, "step": 3612 }, { "epoch": 0.09277170886553333, "grad_norm": 0.90625, "learning_rate": 0.0001975438329779827, "loss": 1.3327, "step": 3613 }, { "epoch": 0.09279738606145516, "grad_norm": 0.859375, "learning_rate": 0.0001975428495390013, "loss": 1.08, "step": 3614 }, { "epoch": 0.09282306325737698, "grad_norm": 0.77734375, "learning_rate": 0.00019754186590562582, "loss": 1.114, "step": 3615 }, { "epoch": 0.0928487404532988, "grad_norm": 0.875, "learning_rate": 0.0001975408820778582, "loss": 1.3341, "step": 3616 }, { "epoch": 0.09287441764922062, "grad_norm": 1.046875, "learning_rate": 0.00019753989805570044, "loss": 1.3621, "step": 3617 }, { "epoch": 0.09290009484514243, "grad_norm": 1.0546875, "learning_rate": 0.00019753891383915445, "loss": 1.1447, "step": 3618 }, { "epoch": 0.09292577204106425, "grad_norm": 0.82421875, "learning_rate": 0.0001975379294282222, "loss": 1.0748, "step": 3619 }, { "epoch": 0.09295144923698608, "grad_norm": 0.86328125, "learning_rate": 0.0001975369448229057, "loss": 1.1988, "step": 3620 }, { "epoch": 0.0929771264329079, "grad_norm": 0.91796875, "learning_rate": 0.00019753596002320686, "loss": 1.289, "step": 3621 }, { "epoch": 0.09300280362882972, "grad_norm": 0.83203125, "learning_rate": 0.0001975349750291277, "loss": 1.2003, "step": 3622 }, { "epoch": 0.09302848082475153, "grad_norm": 0.85546875, "learning_rate": 0.00019753398984067008, "loss": 1.266, "step": 3623 }, { "epoch": 0.09305415802067335, "grad_norm": 0.93359375, "learning_rate": 0.00019753300445783607, "loss": 1.1414, "step": 3624 }, { "epoch": 0.09307983521659517, "grad_norm": 0.98828125, "learning_rate": 0.00019753201888062758, "loss": 1.3385, "step": 3625 }, { "epoch": 0.093105512412517, "grad_norm": 0.87109375, "learning_rate": 0.0001975310331090466, "loss": 1.139, "step": 3626 }, { "epoch": 0.09313118960843882, "grad_norm": 0.91015625, "learning_rate": 0.00019753004714309505, "loss": 1.1768, "step": 3627 }, { "epoch": 0.09315686680436062, "grad_norm": 0.86328125, "learning_rate": 0.00019752906098277495, "loss": 1.1279, "step": 3628 }, { "epoch": 0.09318254400028245, "grad_norm": 0.859375, "learning_rate": 0.00019752807462808825, "loss": 1.1893, "step": 3629 }, { "epoch": 0.09320822119620427, "grad_norm": 0.859375, "learning_rate": 0.00019752708807903687, "loss": 1.1989, "step": 3630 }, { "epoch": 0.09323389839212609, "grad_norm": 0.87109375, "learning_rate": 0.00019752610133562287, "loss": 1.2225, "step": 3631 }, { "epoch": 0.09325957558804791, "grad_norm": 0.875, "learning_rate": 0.00019752511439784815, "loss": 1.223, "step": 3632 }, { "epoch": 0.09328525278396972, "grad_norm": 0.89453125, "learning_rate": 0.00019752412726571462, "loss": 1.1011, "step": 3633 }, { "epoch": 0.09331092997989154, "grad_norm": 0.82421875, "learning_rate": 0.00019752313993922437, "loss": 1.1251, "step": 3634 }, { "epoch": 0.09333660717581337, "grad_norm": 0.8203125, "learning_rate": 0.0001975221524183793, "loss": 1.1548, "step": 3635 }, { "epoch": 0.09336228437173519, "grad_norm": 0.8203125, "learning_rate": 0.00019752116470318139, "loss": 1.2051, "step": 3636 }, { "epoch": 0.09338796156765701, "grad_norm": 0.8828125, "learning_rate": 0.0001975201767936326, "loss": 1.2406, "step": 3637 }, { "epoch": 0.09341363876357882, "grad_norm": 0.8671875, "learning_rate": 0.0001975191886897349, "loss": 1.1784, "step": 3638 }, { "epoch": 0.09343931595950064, "grad_norm": 0.90234375, "learning_rate": 0.00019751820039149028, "loss": 1.066, "step": 3639 }, { "epoch": 0.09346499315542246, "grad_norm": 0.9921875, "learning_rate": 0.00019751721189890072, "loss": 1.1528, "step": 3640 }, { "epoch": 0.09349067035134428, "grad_norm": 0.94921875, "learning_rate": 0.00019751622321196815, "loss": 1.2577, "step": 3641 }, { "epoch": 0.09351634754726611, "grad_norm": 0.83203125, "learning_rate": 0.00019751523433069452, "loss": 1.0231, "step": 3642 }, { "epoch": 0.09354202474318792, "grad_norm": 0.94921875, "learning_rate": 0.00019751424525508187, "loss": 1.0922, "step": 3643 }, { "epoch": 0.09356770193910974, "grad_norm": 0.828125, "learning_rate": 0.00019751325598513212, "loss": 1.1894, "step": 3644 }, { "epoch": 0.09359337913503156, "grad_norm": 0.91796875, "learning_rate": 0.00019751226652084728, "loss": 1.2717, "step": 3645 }, { "epoch": 0.09361905633095338, "grad_norm": 0.90625, "learning_rate": 0.00019751127686222928, "loss": 1.2493, "step": 3646 }, { "epoch": 0.0936447335268752, "grad_norm": 0.9375, "learning_rate": 0.00019751028700928014, "loss": 1.2309, "step": 3647 }, { "epoch": 0.09367041072279701, "grad_norm": 0.8828125, "learning_rate": 0.00019750929696200176, "loss": 1.104, "step": 3648 }, { "epoch": 0.09369608791871883, "grad_norm": 0.90625, "learning_rate": 0.00019750830672039623, "loss": 1.1416, "step": 3649 }, { "epoch": 0.09372176511464066, "grad_norm": 0.84765625, "learning_rate": 0.0001975073162844654, "loss": 1.0922, "step": 3650 }, { "epoch": 0.09374744231056248, "grad_norm": 0.97265625, "learning_rate": 0.0001975063256542113, "loss": 1.2211, "step": 3651 }, { "epoch": 0.0937731195064843, "grad_norm": 0.90234375, "learning_rate": 0.00019750533482963591, "loss": 1.2065, "step": 3652 }, { "epoch": 0.09379879670240611, "grad_norm": 0.86328125, "learning_rate": 0.0001975043438107412, "loss": 1.109, "step": 3653 }, { "epoch": 0.09382447389832793, "grad_norm": 0.90234375, "learning_rate": 0.00019750335259752913, "loss": 1.2152, "step": 3654 }, { "epoch": 0.09385015109424975, "grad_norm": 0.91015625, "learning_rate": 0.0001975023611900017, "loss": 1.159, "step": 3655 }, { "epoch": 0.09387582829017158, "grad_norm": 0.9296875, "learning_rate": 0.00019750136958816085, "loss": 1.2625, "step": 3656 }, { "epoch": 0.0939015054860934, "grad_norm": 0.91796875, "learning_rate": 0.0001975003777920086, "loss": 1.2882, "step": 3657 }, { "epoch": 0.0939271826820152, "grad_norm": 0.80859375, "learning_rate": 0.00019749938580154687, "loss": 1.0964, "step": 3658 }, { "epoch": 0.09395285987793703, "grad_norm": 0.875, "learning_rate": 0.0001974983936167777, "loss": 1.3513, "step": 3659 }, { "epoch": 0.09397853707385885, "grad_norm": 0.89453125, "learning_rate": 0.00019749740123770304, "loss": 1.2628, "step": 3660 }, { "epoch": 0.09400421426978067, "grad_norm": 0.93359375, "learning_rate": 0.00019749640866432487, "loss": 1.1465, "step": 3661 }, { "epoch": 0.0940298914657025, "grad_norm": 0.85546875, "learning_rate": 0.00019749541589664514, "loss": 1.2172, "step": 3662 }, { "epoch": 0.0940555686616243, "grad_norm": 0.81640625, "learning_rate": 0.00019749442293466588, "loss": 1.1048, "step": 3663 }, { "epoch": 0.09408124585754613, "grad_norm": 0.89453125, "learning_rate": 0.00019749342977838904, "loss": 1.1641, "step": 3664 }, { "epoch": 0.09410692305346795, "grad_norm": 0.94140625, "learning_rate": 0.00019749243642781656, "loss": 1.1637, "step": 3665 }, { "epoch": 0.09413260024938977, "grad_norm": 0.89453125, "learning_rate": 0.0001974914428829505, "loss": 1.2744, "step": 3666 }, { "epoch": 0.09415827744531158, "grad_norm": 0.87109375, "learning_rate": 0.00019749044914379278, "loss": 1.1789, "step": 3667 }, { "epoch": 0.0941839546412334, "grad_norm": 0.859375, "learning_rate": 0.0001974894552103454, "loss": 1.18, "step": 3668 }, { "epoch": 0.09420963183715522, "grad_norm": 0.9921875, "learning_rate": 0.00019748846108261036, "loss": 1.139, "step": 3669 }, { "epoch": 0.09423530903307704, "grad_norm": 0.95703125, "learning_rate": 0.0001974874667605896, "loss": 1.1923, "step": 3670 }, { "epoch": 0.09426098622899887, "grad_norm": 0.89453125, "learning_rate": 0.00019748647224428515, "loss": 1.2413, "step": 3671 }, { "epoch": 0.09428666342492067, "grad_norm": 0.859375, "learning_rate": 0.00019748547753369898, "loss": 1.1761, "step": 3672 }, { "epoch": 0.0943123406208425, "grad_norm": 0.77734375, "learning_rate": 0.00019748448262883303, "loss": 1.1459, "step": 3673 }, { "epoch": 0.09433801781676432, "grad_norm": 0.90625, "learning_rate": 0.00019748348752968931, "loss": 1.3845, "step": 3674 }, { "epoch": 0.09436369501268614, "grad_norm": 0.890625, "learning_rate": 0.00019748249223626983, "loss": 1.2405, "step": 3675 }, { "epoch": 0.09438937220860796, "grad_norm": 0.88671875, "learning_rate": 0.00019748149674857652, "loss": 1.2064, "step": 3676 }, { "epoch": 0.09441504940452977, "grad_norm": 0.86328125, "learning_rate": 0.00019748050106661142, "loss": 1.1008, "step": 3677 }, { "epoch": 0.0944407266004516, "grad_norm": 0.87109375, "learning_rate": 0.00019747950519037645, "loss": 1.2904, "step": 3678 }, { "epoch": 0.09446640379637342, "grad_norm": 0.9140625, "learning_rate": 0.00019747850911987366, "loss": 1.174, "step": 3679 }, { "epoch": 0.09449208099229524, "grad_norm": 0.86328125, "learning_rate": 0.000197477512855105, "loss": 1.1252, "step": 3680 }, { "epoch": 0.09451775818821706, "grad_norm": 0.8671875, "learning_rate": 0.00019747651639607247, "loss": 1.2278, "step": 3681 }, { "epoch": 0.09454343538413887, "grad_norm": 0.8671875, "learning_rate": 0.00019747551974277805, "loss": 1.2764, "step": 3682 }, { "epoch": 0.09456911258006069, "grad_norm": 0.9375, "learning_rate": 0.0001974745228952237, "loss": 1.0539, "step": 3683 }, { "epoch": 0.09459478977598251, "grad_norm": 0.93359375, "learning_rate": 0.00019747352585341145, "loss": 1.2078, "step": 3684 }, { "epoch": 0.09462046697190434, "grad_norm": 0.88671875, "learning_rate": 0.00019747252861734325, "loss": 1.1393, "step": 3685 }, { "epoch": 0.09464614416782616, "grad_norm": 0.8828125, "learning_rate": 0.00019747153118702113, "loss": 1.2904, "step": 3686 }, { "epoch": 0.09467182136374797, "grad_norm": 0.88671875, "learning_rate": 0.00019747053356244704, "loss": 1.1368, "step": 3687 }, { "epoch": 0.09469749855966979, "grad_norm": 0.84375, "learning_rate": 0.00019746953574362297, "loss": 1.1926, "step": 3688 }, { "epoch": 0.09472317575559161, "grad_norm": 0.83203125, "learning_rate": 0.0001974685377305509, "loss": 1.0769, "step": 3689 }, { "epoch": 0.09474885295151343, "grad_norm": 0.8515625, "learning_rate": 0.00019746753952323292, "loss": 1.1879, "step": 3690 }, { "epoch": 0.09477453014743525, "grad_norm": 0.9140625, "learning_rate": 0.00019746654112167086, "loss": 1.3894, "step": 3691 }, { "epoch": 0.09480020734335706, "grad_norm": 0.92578125, "learning_rate": 0.0001974655425258668, "loss": 1.1965, "step": 3692 }, { "epoch": 0.09482588453927888, "grad_norm": 0.89453125, "learning_rate": 0.00019746454373582278, "loss": 1.2488, "step": 3693 }, { "epoch": 0.0948515617352007, "grad_norm": 0.84765625, "learning_rate": 0.00019746354475154066, "loss": 1.2538, "step": 3694 }, { "epoch": 0.09487723893112253, "grad_norm": 0.82421875, "learning_rate": 0.00019746254557302252, "loss": 1.0912, "step": 3695 }, { "epoch": 0.09490291612704435, "grad_norm": 0.95703125, "learning_rate": 0.00019746154620027033, "loss": 1.1559, "step": 3696 }, { "epoch": 0.09492859332296616, "grad_norm": 0.91015625, "learning_rate": 0.00019746054663328608, "loss": 1.116, "step": 3697 }, { "epoch": 0.09495427051888798, "grad_norm": 0.83203125, "learning_rate": 0.00019745954687207178, "loss": 1.2935, "step": 3698 }, { "epoch": 0.0949799477148098, "grad_norm": 0.94140625, "learning_rate": 0.00019745854691662938, "loss": 1.3225, "step": 3699 }, { "epoch": 0.09500562491073163, "grad_norm": 0.84375, "learning_rate": 0.00019745754676696092, "loss": 1.2331, "step": 3700 }, { "epoch": 0.09503130210665345, "grad_norm": 0.82421875, "learning_rate": 0.00019745654642306835, "loss": 1.1397, "step": 3701 }, { "epoch": 0.09505697930257526, "grad_norm": 1.0859375, "learning_rate": 0.00019745554588495372, "loss": 1.1509, "step": 3702 }, { "epoch": 0.09508265649849708, "grad_norm": 0.86328125, "learning_rate": 0.00019745454515261897, "loss": 1.3661, "step": 3703 }, { "epoch": 0.0951083336944189, "grad_norm": 0.83984375, "learning_rate": 0.0001974535442260661, "loss": 1.2357, "step": 3704 }, { "epoch": 0.09513401089034072, "grad_norm": 0.875, "learning_rate": 0.00019745254310529715, "loss": 1.2762, "step": 3705 }, { "epoch": 0.09515968808626254, "grad_norm": 0.88671875, "learning_rate": 0.00019745154179031407, "loss": 1.2749, "step": 3706 }, { "epoch": 0.09518536528218435, "grad_norm": 0.85546875, "learning_rate": 0.00019745054028111886, "loss": 1.1469, "step": 3707 }, { "epoch": 0.09521104247810618, "grad_norm": 0.87890625, "learning_rate": 0.00019744953857771353, "loss": 1.2373, "step": 3708 }, { "epoch": 0.095236719674028, "grad_norm": 0.8828125, "learning_rate": 0.00019744853668010007, "loss": 1.3669, "step": 3709 }, { "epoch": 0.09526239686994982, "grad_norm": 0.83984375, "learning_rate": 0.00019744753458828053, "loss": 1.0996, "step": 3710 }, { "epoch": 0.09528807406587164, "grad_norm": 0.82421875, "learning_rate": 0.0001974465323022568, "loss": 1.1206, "step": 3711 }, { "epoch": 0.09531375126179345, "grad_norm": 0.859375, "learning_rate": 0.00019744552982203092, "loss": 1.3149, "step": 3712 }, { "epoch": 0.09533942845771527, "grad_norm": 0.88671875, "learning_rate": 0.00019744452714760496, "loss": 1.3466, "step": 3713 }, { "epoch": 0.0953651056536371, "grad_norm": 0.828125, "learning_rate": 0.0001974435242789808, "loss": 1.1655, "step": 3714 }, { "epoch": 0.09539078284955892, "grad_norm": 0.7265625, "learning_rate": 0.00019744252121616055, "loss": 1.1534, "step": 3715 }, { "epoch": 0.09541646004548074, "grad_norm": 0.875, "learning_rate": 0.0001974415179591461, "loss": 1.157, "step": 3716 }, { "epoch": 0.09544213724140255, "grad_norm": 0.984375, "learning_rate": 0.00019744051450793957, "loss": 1.1664, "step": 3717 }, { "epoch": 0.09546781443732437, "grad_norm": 0.80859375, "learning_rate": 0.00019743951086254285, "loss": 0.9518, "step": 3718 }, { "epoch": 0.09549349163324619, "grad_norm": 0.92578125, "learning_rate": 0.000197438507022958, "loss": 1.3556, "step": 3719 }, { "epoch": 0.09551916882916801, "grad_norm": 0.8515625, "learning_rate": 0.00019743750298918702, "loss": 1.1636, "step": 3720 }, { "epoch": 0.09554484602508984, "grad_norm": 0.859375, "learning_rate": 0.00019743649876123186, "loss": 1.3551, "step": 3721 }, { "epoch": 0.09557052322101164, "grad_norm": 0.94140625, "learning_rate": 0.0001974354943390946, "loss": 1.3426, "step": 3722 }, { "epoch": 0.09559620041693347, "grad_norm": 0.9375, "learning_rate": 0.0001974344897227772, "loss": 1.1872, "step": 3723 }, { "epoch": 0.09562187761285529, "grad_norm": 0.7734375, "learning_rate": 0.00019743348491228164, "loss": 1.1276, "step": 3724 }, { "epoch": 0.09564755480877711, "grad_norm": 0.890625, "learning_rate": 0.00019743247990760995, "loss": 1.0758, "step": 3725 }, { "epoch": 0.09567323200469893, "grad_norm": 0.92578125, "learning_rate": 0.00019743147470876413, "loss": 1.0971, "step": 3726 }, { "epoch": 0.09569890920062074, "grad_norm": 0.8515625, "learning_rate": 0.0001974304693157462, "loss": 1.2424, "step": 3727 }, { "epoch": 0.09572458639654256, "grad_norm": 0.92578125, "learning_rate": 0.00019742946372855813, "loss": 1.1445, "step": 3728 }, { "epoch": 0.09575026359246439, "grad_norm": 0.84375, "learning_rate": 0.00019742845794720192, "loss": 1.2338, "step": 3729 }, { "epoch": 0.09577594078838621, "grad_norm": 0.890625, "learning_rate": 0.0001974274519716796, "loss": 1.2138, "step": 3730 }, { "epoch": 0.09580161798430803, "grad_norm": 0.875, "learning_rate": 0.0001974264458019932, "loss": 1.1351, "step": 3731 }, { "epoch": 0.09582729518022984, "grad_norm": 0.91015625, "learning_rate": 0.0001974254394381447, "loss": 1.0728, "step": 3732 }, { "epoch": 0.09585297237615166, "grad_norm": 0.87109375, "learning_rate": 0.0001974244328801361, "loss": 1.357, "step": 3733 }, { "epoch": 0.09587864957207348, "grad_norm": 0.90234375, "learning_rate": 0.00019742342612796935, "loss": 1.2212, "step": 3734 }, { "epoch": 0.0959043267679953, "grad_norm": 0.79296875, "learning_rate": 0.00019742241918164656, "loss": 1.0806, "step": 3735 }, { "epoch": 0.09593000396391713, "grad_norm": 0.90625, "learning_rate": 0.00019742141204116966, "loss": 1.2332, "step": 3736 }, { "epoch": 0.09595568115983893, "grad_norm": 0.90234375, "learning_rate": 0.0001974204047065407, "loss": 1.1527, "step": 3737 }, { "epoch": 0.09598135835576076, "grad_norm": 0.875, "learning_rate": 0.00019741939717776168, "loss": 1.1615, "step": 3738 }, { "epoch": 0.09600703555168258, "grad_norm": 0.84765625, "learning_rate": 0.0001974183894548346, "loss": 1.3667, "step": 3739 }, { "epoch": 0.0960327127476044, "grad_norm": 0.91015625, "learning_rate": 0.00019741738153776145, "loss": 1.2184, "step": 3740 }, { "epoch": 0.09605838994352622, "grad_norm": 0.89453125, "learning_rate": 0.00019741637342654428, "loss": 1.1657, "step": 3741 }, { "epoch": 0.09608406713944803, "grad_norm": 0.828125, "learning_rate": 0.00019741536512118507, "loss": 1.0794, "step": 3742 }, { "epoch": 0.09610974433536985, "grad_norm": 0.7734375, "learning_rate": 0.00019741435662168585, "loss": 1.1298, "step": 3743 }, { "epoch": 0.09613542153129168, "grad_norm": 0.9765625, "learning_rate": 0.0001974133479280486, "loss": 1.2634, "step": 3744 }, { "epoch": 0.0961610987272135, "grad_norm": 0.8125, "learning_rate": 0.00019741233904027535, "loss": 1.187, "step": 3745 }, { "epoch": 0.09618677592313532, "grad_norm": 0.8125, "learning_rate": 0.00019741132995836812, "loss": 1.1484, "step": 3746 }, { "epoch": 0.09621245311905713, "grad_norm": 0.828125, "learning_rate": 0.00019741032068232889, "loss": 1.1486, "step": 3747 }, { "epoch": 0.09623813031497895, "grad_norm": 0.86328125, "learning_rate": 0.00019740931121215967, "loss": 1.2594, "step": 3748 }, { "epoch": 0.09626380751090077, "grad_norm": 0.86328125, "learning_rate": 0.00019740830154786254, "loss": 1.1114, "step": 3749 }, { "epoch": 0.0962894847068226, "grad_norm": 0.92578125, "learning_rate": 0.00019740729168943942, "loss": 0.9505, "step": 3750 }, { "epoch": 0.09631516190274442, "grad_norm": 0.859375, "learning_rate": 0.0001974062816368924, "loss": 1.04, "step": 3751 }, { "epoch": 0.09634083909866623, "grad_norm": 0.84765625, "learning_rate": 0.00019740527139022347, "loss": 1.2937, "step": 3752 }, { "epoch": 0.09636651629458805, "grad_norm": 0.87890625, "learning_rate": 0.0001974042609494346, "loss": 1.1996, "step": 3753 }, { "epoch": 0.09639219349050987, "grad_norm": 0.7890625, "learning_rate": 0.00019740325031452783, "loss": 1.0522, "step": 3754 }, { "epoch": 0.09641787068643169, "grad_norm": 0.89453125, "learning_rate": 0.0001974022394855052, "loss": 1.2138, "step": 3755 }, { "epoch": 0.09644354788235351, "grad_norm": 0.84375, "learning_rate": 0.0001974012284623687, "loss": 1.0155, "step": 3756 }, { "epoch": 0.09646922507827532, "grad_norm": 0.8984375, "learning_rate": 0.00019740021724512036, "loss": 1.1274, "step": 3757 }, { "epoch": 0.09649490227419714, "grad_norm": 0.90234375, "learning_rate": 0.00019739920583376218, "loss": 1.2105, "step": 3758 }, { "epoch": 0.09652057947011897, "grad_norm": 0.859375, "learning_rate": 0.00019739819422829618, "loss": 1.2578, "step": 3759 }, { "epoch": 0.09654625666604079, "grad_norm": 0.84375, "learning_rate": 0.00019739718242872437, "loss": 1.2383, "step": 3760 }, { "epoch": 0.09657193386196261, "grad_norm": 0.984375, "learning_rate": 0.0001973961704350488, "loss": 1.5368, "step": 3761 }, { "epoch": 0.09659761105788442, "grad_norm": 0.87109375, "learning_rate": 0.0001973951582472714, "loss": 1.1066, "step": 3762 }, { "epoch": 0.09662328825380624, "grad_norm": 0.9375, "learning_rate": 0.00019739414586539432, "loss": 1.1013, "step": 3763 }, { "epoch": 0.09664896544972806, "grad_norm": 0.84765625, "learning_rate": 0.00019739313328941943, "loss": 1.0953, "step": 3764 }, { "epoch": 0.09667464264564989, "grad_norm": 0.85546875, "learning_rate": 0.00019739212051934888, "loss": 1.1213, "step": 3765 }, { "epoch": 0.09670031984157171, "grad_norm": 0.94140625, "learning_rate": 0.00019739110755518462, "loss": 1.1256, "step": 3766 }, { "epoch": 0.09672599703749352, "grad_norm": 0.90625, "learning_rate": 0.00019739009439692867, "loss": 1.1779, "step": 3767 }, { "epoch": 0.09675167423341534, "grad_norm": 0.83203125, "learning_rate": 0.00019738908104458306, "loss": 1.0783, "step": 3768 }, { "epoch": 0.09677735142933716, "grad_norm": 0.80078125, "learning_rate": 0.0001973880674981498, "loss": 1.1011, "step": 3769 }, { "epoch": 0.09680302862525898, "grad_norm": 0.94921875, "learning_rate": 0.00019738705375763095, "loss": 1.2036, "step": 3770 }, { "epoch": 0.09682870582118079, "grad_norm": 0.9296875, "learning_rate": 0.00019738603982302847, "loss": 1.2927, "step": 3771 }, { "epoch": 0.09685438301710261, "grad_norm": 1.0234375, "learning_rate": 0.00019738502569434442, "loss": 1.4425, "step": 3772 }, { "epoch": 0.09688006021302444, "grad_norm": 0.87890625, "learning_rate": 0.00019738401137158083, "loss": 1.2649, "step": 3773 }, { "epoch": 0.09690573740894626, "grad_norm": 0.9453125, "learning_rate": 0.0001973829968547397, "loss": 1.1621, "step": 3774 }, { "epoch": 0.09693141460486808, "grad_norm": 0.85546875, "learning_rate": 0.00019738198214382303, "loss": 0.9427, "step": 3775 }, { "epoch": 0.09695709180078989, "grad_norm": 0.89453125, "learning_rate": 0.00019738096723883285, "loss": 1.1759, "step": 3776 }, { "epoch": 0.09698276899671171, "grad_norm": 0.8828125, "learning_rate": 0.00019737995213977124, "loss": 1.1048, "step": 3777 }, { "epoch": 0.09700844619263353, "grad_norm": 0.890625, "learning_rate": 0.00019737893684664015, "loss": 1.1817, "step": 3778 }, { "epoch": 0.09703412338855535, "grad_norm": 0.9375, "learning_rate": 0.00019737792135944164, "loss": 1.2181, "step": 3779 }, { "epoch": 0.09705980058447718, "grad_norm": 0.8515625, "learning_rate": 0.00019737690567817775, "loss": 1.1999, "step": 3780 }, { "epoch": 0.09708547778039898, "grad_norm": 0.8671875, "learning_rate": 0.00019737588980285047, "loss": 1.1426, "step": 3781 }, { "epoch": 0.09711115497632081, "grad_norm": 0.890625, "learning_rate": 0.00019737487373346183, "loss": 1.2667, "step": 3782 }, { "epoch": 0.09713683217224263, "grad_norm": 1.609375, "learning_rate": 0.00019737385747001387, "loss": 1.0507, "step": 3783 }, { "epoch": 0.09716250936816445, "grad_norm": 0.93359375, "learning_rate": 0.0001973728410125086, "loss": 1.2371, "step": 3784 }, { "epoch": 0.09718818656408627, "grad_norm": 0.8359375, "learning_rate": 0.00019737182436094805, "loss": 1.1599, "step": 3785 }, { "epoch": 0.09721386376000808, "grad_norm": 0.8515625, "learning_rate": 0.00019737080751533427, "loss": 1.3813, "step": 3786 }, { "epoch": 0.0972395409559299, "grad_norm": 0.8671875, "learning_rate": 0.00019736979047566924, "loss": 1.3358, "step": 3787 }, { "epoch": 0.09726521815185173, "grad_norm": 0.80859375, "learning_rate": 0.00019736877324195504, "loss": 1.264, "step": 3788 }, { "epoch": 0.09729089534777355, "grad_norm": 0.82421875, "learning_rate": 0.00019736775581419367, "loss": 0.9728, "step": 3789 }, { "epoch": 0.09731657254369537, "grad_norm": 0.80078125, "learning_rate": 0.00019736673819238713, "loss": 1.2481, "step": 3790 }, { "epoch": 0.09734224973961718, "grad_norm": 0.9453125, "learning_rate": 0.0001973657203765375, "loss": 1.1691, "step": 3791 }, { "epoch": 0.097367926935539, "grad_norm": 0.8203125, "learning_rate": 0.00019736470236664678, "loss": 1.3541, "step": 3792 }, { "epoch": 0.09739360413146082, "grad_norm": 0.921875, "learning_rate": 0.00019736368416271697, "loss": 1.2615, "step": 3793 }, { "epoch": 0.09741928132738265, "grad_norm": 0.80859375, "learning_rate": 0.00019736266576475019, "loss": 1.1612, "step": 3794 }, { "epoch": 0.09744495852330447, "grad_norm": 0.8828125, "learning_rate": 0.00019736164717274837, "loss": 1.1731, "step": 3795 }, { "epoch": 0.09747063571922628, "grad_norm": 0.8828125, "learning_rate": 0.0001973606283867136, "loss": 1.2255, "step": 3796 }, { "epoch": 0.0974963129151481, "grad_norm": 0.8671875, "learning_rate": 0.00019735960940664787, "loss": 1.2039, "step": 3797 }, { "epoch": 0.09752199011106992, "grad_norm": 0.859375, "learning_rate": 0.00019735859023255328, "loss": 1.0716, "step": 3798 }, { "epoch": 0.09754766730699174, "grad_norm": 0.8203125, "learning_rate": 0.0001973575708644318, "loss": 1.2124, "step": 3799 }, { "epoch": 0.09757334450291356, "grad_norm": 0.90625, "learning_rate": 0.00019735655130228544, "loss": 1.1593, "step": 3800 }, { "epoch": 0.09759902169883537, "grad_norm": 0.86328125, "learning_rate": 0.00019735553154611628, "loss": 1.2432, "step": 3801 }, { "epoch": 0.0976246988947572, "grad_norm": 0.84765625, "learning_rate": 0.00019735451159592636, "loss": 1.2401, "step": 3802 }, { "epoch": 0.09765037609067902, "grad_norm": 0.8203125, "learning_rate": 0.00019735349145171767, "loss": 1.1835, "step": 3803 }, { "epoch": 0.09767605328660084, "grad_norm": 0.9140625, "learning_rate": 0.0001973524711134923, "loss": 1.2016, "step": 3804 }, { "epoch": 0.09770173048252266, "grad_norm": 0.94921875, "learning_rate": 0.0001973514505812522, "loss": 1.0517, "step": 3805 }, { "epoch": 0.09772740767844447, "grad_norm": 0.921875, "learning_rate": 0.00019735042985499947, "loss": 1.2813, "step": 3806 }, { "epoch": 0.09775308487436629, "grad_norm": 0.83203125, "learning_rate": 0.00019734940893473615, "loss": 1.055, "step": 3807 }, { "epoch": 0.09777876207028811, "grad_norm": 0.8671875, "learning_rate": 0.00019734838782046424, "loss": 1.1992, "step": 3808 }, { "epoch": 0.09780443926620994, "grad_norm": 0.91015625, "learning_rate": 0.00019734736651218576, "loss": 1.1972, "step": 3809 }, { "epoch": 0.09783011646213176, "grad_norm": 0.87890625, "learning_rate": 0.0001973463450099028, "loss": 1.2179, "step": 3810 }, { "epoch": 0.09785579365805357, "grad_norm": 0.828125, "learning_rate": 0.00019734532331361736, "loss": 0.8147, "step": 3811 }, { "epoch": 0.09788147085397539, "grad_norm": 0.8828125, "learning_rate": 0.00019734430142333148, "loss": 1.1601, "step": 3812 }, { "epoch": 0.09790714804989721, "grad_norm": 1.0625, "learning_rate": 0.00019734327933904723, "loss": 1.0455, "step": 3813 }, { "epoch": 0.09793282524581903, "grad_norm": 1.3203125, "learning_rate": 0.0001973422570607666, "loss": 1.0758, "step": 3814 }, { "epoch": 0.09795850244174086, "grad_norm": 0.94140625, "learning_rate": 0.0001973412345884916, "loss": 1.2291, "step": 3815 }, { "epoch": 0.09798417963766266, "grad_norm": 0.87890625, "learning_rate": 0.00019734021192222438, "loss": 1.2439, "step": 3816 }, { "epoch": 0.09800985683358449, "grad_norm": 0.84375, "learning_rate": 0.00019733918906196686, "loss": 1.2264, "step": 3817 }, { "epoch": 0.09803553402950631, "grad_norm": 0.84375, "learning_rate": 0.00019733816600772116, "loss": 1.1595, "step": 3818 }, { "epoch": 0.09806121122542813, "grad_norm": 0.7734375, "learning_rate": 0.00019733714275948927, "loss": 1.0938, "step": 3819 }, { "epoch": 0.09808688842134995, "grad_norm": 0.859375, "learning_rate": 0.00019733611931727326, "loss": 1.1814, "step": 3820 }, { "epoch": 0.09811256561727176, "grad_norm": 0.8515625, "learning_rate": 0.00019733509568107516, "loss": 1.2993, "step": 3821 }, { "epoch": 0.09813824281319358, "grad_norm": 0.80078125, "learning_rate": 0.000197334071850897, "loss": 1.2435, "step": 3822 }, { "epoch": 0.0981639200091154, "grad_norm": 0.90234375, "learning_rate": 0.00019733304782674083, "loss": 1.3087, "step": 3823 }, { "epoch": 0.09818959720503723, "grad_norm": 0.859375, "learning_rate": 0.00019733202360860865, "loss": 1.1817, "step": 3824 }, { "epoch": 0.09821527440095905, "grad_norm": 0.91015625, "learning_rate": 0.00019733099919650257, "loss": 1.2764, "step": 3825 }, { "epoch": 0.09824095159688086, "grad_norm": 0.8125, "learning_rate": 0.0001973299745904246, "loss": 1.1193, "step": 3826 }, { "epoch": 0.09826662879280268, "grad_norm": 0.8671875, "learning_rate": 0.0001973289497903768, "loss": 1.27, "step": 3827 }, { "epoch": 0.0982923059887245, "grad_norm": 0.83984375, "learning_rate": 0.00019732792479636117, "loss": 1.2051, "step": 3828 }, { "epoch": 0.09831798318464632, "grad_norm": 0.7890625, "learning_rate": 0.0001973268996083798, "loss": 1.2228, "step": 3829 }, { "epoch": 0.09834366038056815, "grad_norm": 0.8828125, "learning_rate": 0.0001973258742264347, "loss": 1.1197, "step": 3830 }, { "epoch": 0.09836933757648995, "grad_norm": 0.83984375, "learning_rate": 0.0001973248486505279, "loss": 1.0543, "step": 3831 }, { "epoch": 0.09839501477241178, "grad_norm": 0.8515625, "learning_rate": 0.0001973238228806615, "loss": 1.2091, "step": 3832 }, { "epoch": 0.0984206919683336, "grad_norm": 0.8671875, "learning_rate": 0.0001973227969168375, "loss": 1.1645, "step": 3833 }, { "epoch": 0.09844636916425542, "grad_norm": 0.89453125, "learning_rate": 0.00019732177075905795, "loss": 1.2682, "step": 3834 }, { "epoch": 0.09847204636017724, "grad_norm": 0.90234375, "learning_rate": 0.00019732074440732491, "loss": 1.1757, "step": 3835 }, { "epoch": 0.09849772355609905, "grad_norm": 0.86328125, "learning_rate": 0.00019731971786164044, "loss": 1.0185, "step": 3836 }, { "epoch": 0.09852340075202087, "grad_norm": 0.875, "learning_rate": 0.0001973186911220065, "loss": 1.2896, "step": 3837 }, { "epoch": 0.0985490779479427, "grad_norm": 0.8671875, "learning_rate": 0.00019731766418842526, "loss": 1.03, "step": 3838 }, { "epoch": 0.09857475514386452, "grad_norm": 0.82421875, "learning_rate": 0.00019731663706089867, "loss": 1.1162, "step": 3839 }, { "epoch": 0.09860043233978634, "grad_norm": 0.97265625, "learning_rate": 0.00019731560973942883, "loss": 1.0856, "step": 3840 }, { "epoch": 0.09862610953570815, "grad_norm": 0.87890625, "learning_rate": 0.00019731458222401778, "loss": 1.0444, "step": 3841 }, { "epoch": 0.09865178673162997, "grad_norm": 0.89453125, "learning_rate": 0.00019731355451466754, "loss": 1.2106, "step": 3842 }, { "epoch": 0.09867746392755179, "grad_norm": 0.84375, "learning_rate": 0.00019731252661138018, "loss": 1.0946, "step": 3843 }, { "epoch": 0.09870314112347361, "grad_norm": 0.8515625, "learning_rate": 0.00019731149851415775, "loss": 1.0713, "step": 3844 }, { "epoch": 0.09872881831939544, "grad_norm": 0.81640625, "learning_rate": 0.0001973104702230023, "loss": 1.0242, "step": 3845 }, { "epoch": 0.09875449551531724, "grad_norm": 0.83203125, "learning_rate": 0.00019730944173791586, "loss": 1.2818, "step": 3846 }, { "epoch": 0.09878017271123907, "grad_norm": 0.9375, "learning_rate": 0.0001973084130589005, "loss": 1.3578, "step": 3847 }, { "epoch": 0.09880584990716089, "grad_norm": 0.92578125, "learning_rate": 0.00019730738418595825, "loss": 1.2066, "step": 3848 }, { "epoch": 0.09883152710308271, "grad_norm": 0.91796875, "learning_rate": 0.00019730635511909118, "loss": 1.2333, "step": 3849 }, { "epoch": 0.09885720429900453, "grad_norm": 0.88671875, "learning_rate": 0.00019730532585830136, "loss": 1.1222, "step": 3850 }, { "epoch": 0.09888288149492634, "grad_norm": 0.9296875, "learning_rate": 0.00019730429640359075, "loss": 1.1469, "step": 3851 }, { "epoch": 0.09890855869084816, "grad_norm": 0.8515625, "learning_rate": 0.0001973032667549615, "loss": 1.2401, "step": 3852 }, { "epoch": 0.09893423588676999, "grad_norm": 0.8046875, "learning_rate": 0.00019730223691241566, "loss": 1.0836, "step": 3853 }, { "epoch": 0.09895991308269181, "grad_norm": 0.83984375, "learning_rate": 0.0001973012068759552, "loss": 1.2489, "step": 3854 }, { "epoch": 0.09898559027861363, "grad_norm": 0.8515625, "learning_rate": 0.00019730017664558227, "loss": 1.1381, "step": 3855 }, { "epoch": 0.09901126747453544, "grad_norm": 0.8828125, "learning_rate": 0.00019729914622129886, "loss": 1.316, "step": 3856 }, { "epoch": 0.09903694467045726, "grad_norm": 0.94921875, "learning_rate": 0.00019729811560310702, "loss": 1.2471, "step": 3857 }, { "epoch": 0.09906262186637908, "grad_norm": 0.859375, "learning_rate": 0.00019729708479100885, "loss": 1.128, "step": 3858 }, { "epoch": 0.0990882990623009, "grad_norm": 0.7890625, "learning_rate": 0.00019729605378500639, "loss": 1.1239, "step": 3859 }, { "epoch": 0.09911397625822273, "grad_norm": 0.7734375, "learning_rate": 0.00019729502258510165, "loss": 1.1208, "step": 3860 }, { "epoch": 0.09913965345414454, "grad_norm": 0.890625, "learning_rate": 0.00019729399119129673, "loss": 1.2815, "step": 3861 }, { "epoch": 0.09916533065006636, "grad_norm": 0.77734375, "learning_rate": 0.0001972929596035937, "loss": 1.1725, "step": 3862 }, { "epoch": 0.09919100784598818, "grad_norm": 0.875, "learning_rate": 0.00019729192782199455, "loss": 1.2149, "step": 3863 }, { "epoch": 0.09921668504191, "grad_norm": 0.8203125, "learning_rate": 0.0001972908958465014, "loss": 1.1932, "step": 3864 }, { "epoch": 0.09924236223783182, "grad_norm": 0.91015625, "learning_rate": 0.00019728986367711628, "loss": 1.2499, "step": 3865 }, { "epoch": 0.09926803943375363, "grad_norm": 0.8359375, "learning_rate": 0.00019728883131384125, "loss": 1.1591, "step": 3866 }, { "epoch": 0.09929371662967545, "grad_norm": 0.9140625, "learning_rate": 0.00019728779875667835, "loss": 1.1903, "step": 3867 }, { "epoch": 0.09931939382559728, "grad_norm": 0.87109375, "learning_rate": 0.00019728676600562966, "loss": 1.1919, "step": 3868 }, { "epoch": 0.0993450710215191, "grad_norm": 0.875, "learning_rate": 0.00019728573306069727, "loss": 1.0361, "step": 3869 }, { "epoch": 0.09937074821744092, "grad_norm": 0.91015625, "learning_rate": 0.00019728469992188316, "loss": 1.2384, "step": 3870 }, { "epoch": 0.09939642541336273, "grad_norm": 0.8828125, "learning_rate": 0.00019728366658918947, "loss": 1.0797, "step": 3871 }, { "epoch": 0.09942210260928455, "grad_norm": 0.9140625, "learning_rate": 0.00019728263306261818, "loss": 1.1279, "step": 3872 }, { "epoch": 0.09944777980520637, "grad_norm": 0.76953125, "learning_rate": 0.0001972815993421714, "loss": 1.1114, "step": 3873 }, { "epoch": 0.0994734570011282, "grad_norm": 0.82421875, "learning_rate": 0.0001972805654278512, "loss": 1.1236, "step": 3874 }, { "epoch": 0.09949913419705, "grad_norm": 0.92578125, "learning_rate": 0.0001972795313196596, "loss": 1.1101, "step": 3875 }, { "epoch": 0.09952481139297183, "grad_norm": 0.828125, "learning_rate": 0.00019727849701759866, "loss": 1.1454, "step": 3876 }, { "epoch": 0.09955048858889365, "grad_norm": 0.83984375, "learning_rate": 0.0001972774625216705, "loss": 1.1309, "step": 3877 }, { "epoch": 0.09957616578481547, "grad_norm": 0.87890625, "learning_rate": 0.00019727642783187713, "loss": 1.2586, "step": 3878 }, { "epoch": 0.09960184298073729, "grad_norm": 0.8125, "learning_rate": 0.00019727539294822063, "loss": 0.9967, "step": 3879 }, { "epoch": 0.0996275201766591, "grad_norm": 0.8203125, "learning_rate": 0.00019727435787070306, "loss": 1.0989, "step": 3880 }, { "epoch": 0.09965319737258092, "grad_norm": 0.9765625, "learning_rate": 0.0001972733225993265, "loss": 1.2495, "step": 3881 }, { "epoch": 0.09967887456850275, "grad_norm": 0.89453125, "learning_rate": 0.00019727228713409297, "loss": 1.1493, "step": 3882 }, { "epoch": 0.09970455176442457, "grad_norm": 0.921875, "learning_rate": 0.00019727125147500456, "loss": 1.0373, "step": 3883 }, { "epoch": 0.09973022896034639, "grad_norm": 0.78125, "learning_rate": 0.00019727021562206335, "loss": 1.128, "step": 3884 }, { "epoch": 0.0997559061562682, "grad_norm": 0.83203125, "learning_rate": 0.00019726917957527134, "loss": 1.1978, "step": 3885 }, { "epoch": 0.09978158335219002, "grad_norm": 0.7734375, "learning_rate": 0.00019726814333463069, "loss": 1.2215, "step": 3886 }, { "epoch": 0.09980726054811184, "grad_norm": 0.87109375, "learning_rate": 0.0001972671069001434, "loss": 1.1919, "step": 3887 }, { "epoch": 0.09983293774403366, "grad_norm": 0.90625, "learning_rate": 0.00019726607027181154, "loss": 1.1808, "step": 3888 }, { "epoch": 0.09985861493995549, "grad_norm": 0.8515625, "learning_rate": 0.0001972650334496372, "loss": 1.1526, "step": 3889 }, { "epoch": 0.0998842921358773, "grad_norm": 0.84765625, "learning_rate": 0.00019726399643362242, "loss": 1.2486, "step": 3890 }, { "epoch": 0.09990996933179912, "grad_norm": 0.8671875, "learning_rate": 0.0001972629592237693, "loss": 1.226, "step": 3891 }, { "epoch": 0.09993564652772094, "grad_norm": 0.89453125, "learning_rate": 0.00019726192182007988, "loss": 1.1797, "step": 3892 }, { "epoch": 0.09996132372364276, "grad_norm": 0.87890625, "learning_rate": 0.00019726088422255625, "loss": 1.195, "step": 3893 }, { "epoch": 0.09998700091956458, "grad_norm": 0.86328125, "learning_rate": 0.00019725984643120047, "loss": 1.1244, "step": 3894 }, { "epoch": 0.10001267811548639, "grad_norm": 0.859375, "learning_rate": 0.00019725880844601457, "loss": 1.2434, "step": 3895 }, { "epoch": 0.10003835531140821, "grad_norm": 0.9140625, "learning_rate": 0.00019725777026700065, "loss": 1.2431, "step": 3896 }, { "epoch": 0.10006403250733004, "grad_norm": 0.78515625, "learning_rate": 0.00019725673189416082, "loss": 1.1388, "step": 3897 }, { "epoch": 0.10008970970325186, "grad_norm": 0.875, "learning_rate": 0.00019725569332749708, "loss": 1.1536, "step": 3898 }, { "epoch": 0.10011538689917368, "grad_norm": 0.90234375, "learning_rate": 0.00019725465456701152, "loss": 1.1473, "step": 3899 }, { "epoch": 0.10014106409509549, "grad_norm": 0.8671875, "learning_rate": 0.00019725361561270623, "loss": 1.2019, "step": 3900 }, { "epoch": 0.10016674129101731, "grad_norm": 0.86328125, "learning_rate": 0.00019725257646458326, "loss": 1.1907, "step": 3901 }, { "epoch": 0.10019241848693913, "grad_norm": 0.83203125, "learning_rate": 0.00019725153712264471, "loss": 1.2144, "step": 3902 }, { "epoch": 0.10021809568286096, "grad_norm": 0.81640625, "learning_rate": 0.0001972504975868926, "loss": 1.0477, "step": 3903 }, { "epoch": 0.10024377287878278, "grad_norm": 0.80859375, "learning_rate": 0.00019724945785732904, "loss": 1.1883, "step": 3904 }, { "epoch": 0.10026945007470459, "grad_norm": 0.875, "learning_rate": 0.00019724841793395613, "loss": 1.1531, "step": 3905 }, { "epoch": 0.10029512727062641, "grad_norm": 0.8515625, "learning_rate": 0.00019724737781677586, "loss": 1.2203, "step": 3906 }, { "epoch": 0.10032080446654823, "grad_norm": 0.890625, "learning_rate": 0.00019724633750579037, "loss": 1.375, "step": 3907 }, { "epoch": 0.10034648166247005, "grad_norm": 0.87109375, "learning_rate": 0.00019724529700100173, "loss": 1.2131, "step": 3908 }, { "epoch": 0.10037215885839187, "grad_norm": 0.87890625, "learning_rate": 0.00019724425630241198, "loss": 1.4079, "step": 3909 }, { "epoch": 0.10039783605431368, "grad_norm": 0.84375, "learning_rate": 0.00019724321541002323, "loss": 1.0882, "step": 3910 }, { "epoch": 0.1004235132502355, "grad_norm": 0.81640625, "learning_rate": 0.00019724217432383752, "loss": 1.276, "step": 3911 }, { "epoch": 0.10044919044615733, "grad_norm": 0.83984375, "learning_rate": 0.00019724113304385693, "loss": 0.9549, "step": 3912 }, { "epoch": 0.10047486764207915, "grad_norm": 0.9765625, "learning_rate": 0.00019724009157008358, "loss": 1.3537, "step": 3913 }, { "epoch": 0.10050054483800097, "grad_norm": 0.84375, "learning_rate": 0.00019723904990251947, "loss": 1.189, "step": 3914 }, { "epoch": 0.10052622203392278, "grad_norm": 0.84765625, "learning_rate": 0.00019723800804116674, "loss": 1.2783, "step": 3915 }, { "epoch": 0.1005518992298446, "grad_norm": 0.796875, "learning_rate": 0.00019723696598602744, "loss": 1.055, "step": 3916 }, { "epoch": 0.10057757642576642, "grad_norm": 0.85546875, "learning_rate": 0.00019723592373710363, "loss": 1.1478, "step": 3917 }, { "epoch": 0.10060325362168825, "grad_norm": 0.9296875, "learning_rate": 0.00019723488129439742, "loss": 1.2657, "step": 3918 }, { "epoch": 0.10062893081761007, "grad_norm": 1.0390625, "learning_rate": 0.00019723383865791089, "loss": 1.0859, "step": 3919 }, { "epoch": 0.10065460801353188, "grad_norm": 0.8515625, "learning_rate": 0.0001972327958276461, "loss": 1.0215, "step": 3920 }, { "epoch": 0.1006802852094537, "grad_norm": 0.890625, "learning_rate": 0.00019723175280360512, "loss": 1.227, "step": 3921 }, { "epoch": 0.10070596240537552, "grad_norm": 0.87890625, "learning_rate": 0.00019723070958579006, "loss": 1.3137, "step": 3922 }, { "epoch": 0.10073163960129734, "grad_norm": 0.91015625, "learning_rate": 0.00019722966617420295, "loss": 1.2887, "step": 3923 }, { "epoch": 0.10075731679721917, "grad_norm": 0.83984375, "learning_rate": 0.0001972286225688459, "loss": 1.159, "step": 3924 }, { "epoch": 0.10078299399314097, "grad_norm": 0.82421875, "learning_rate": 0.000197227578769721, "loss": 1.181, "step": 3925 }, { "epoch": 0.1008086711890628, "grad_norm": 0.87109375, "learning_rate": 0.00019722653477683032, "loss": 1.1311, "step": 3926 }, { "epoch": 0.10083434838498462, "grad_norm": 0.87890625, "learning_rate": 0.00019722549059017594, "loss": 1.2102, "step": 3927 }, { "epoch": 0.10086002558090644, "grad_norm": 0.85546875, "learning_rate": 0.00019722444620975992, "loss": 1.2588, "step": 3928 }, { "epoch": 0.10088570277682826, "grad_norm": 0.875, "learning_rate": 0.0001972234016355844, "loss": 1.1197, "step": 3929 }, { "epoch": 0.10091137997275007, "grad_norm": 0.8046875, "learning_rate": 0.0001972223568676514, "loss": 1.1064, "step": 3930 }, { "epoch": 0.10093705716867189, "grad_norm": 0.93359375, "learning_rate": 0.00019722131190596303, "loss": 1.204, "step": 3931 }, { "epoch": 0.10096273436459371, "grad_norm": 0.9375, "learning_rate": 0.00019722026675052134, "loss": 1.1626, "step": 3932 }, { "epoch": 0.10098841156051554, "grad_norm": 0.8203125, "learning_rate": 0.00019721922140132847, "loss": 1.2264, "step": 3933 }, { "epoch": 0.10101408875643736, "grad_norm": 0.8671875, "learning_rate": 0.00019721817585838645, "loss": 1.1217, "step": 3934 }, { "epoch": 0.10103976595235917, "grad_norm": 0.9375, "learning_rate": 0.0001972171301216974, "loss": 1.1828, "step": 3935 }, { "epoch": 0.10106544314828099, "grad_norm": 0.90625, "learning_rate": 0.0001972160841912634, "loss": 1.1752, "step": 3936 }, { "epoch": 0.10109112034420281, "grad_norm": 0.9453125, "learning_rate": 0.00019721503806708653, "loss": 1.1304, "step": 3937 }, { "epoch": 0.10111679754012463, "grad_norm": 0.921875, "learning_rate": 0.00019721399174916883, "loss": 1.1356, "step": 3938 }, { "epoch": 0.10114247473604646, "grad_norm": 0.89453125, "learning_rate": 0.00019721294523751247, "loss": 1.1137, "step": 3939 }, { "epoch": 0.10116815193196826, "grad_norm": 0.91796875, "learning_rate": 0.00019721189853211948, "loss": 1.1894, "step": 3940 }, { "epoch": 0.10119382912789009, "grad_norm": 0.8671875, "learning_rate": 0.00019721085163299196, "loss": 1.0769, "step": 3941 }, { "epoch": 0.10121950632381191, "grad_norm": 0.84375, "learning_rate": 0.00019720980454013197, "loss": 1.2087, "step": 3942 }, { "epoch": 0.10124518351973373, "grad_norm": 0.82421875, "learning_rate": 0.00019720875725354165, "loss": 1.1389, "step": 3943 }, { "epoch": 0.10127086071565555, "grad_norm": 0.7890625, "learning_rate": 0.00019720770977322305, "loss": 1.2012, "step": 3944 }, { "epoch": 0.10129653791157736, "grad_norm": 0.92578125, "learning_rate": 0.00019720666209917825, "loss": 1.21, "step": 3945 }, { "epoch": 0.10132221510749918, "grad_norm": 0.96875, "learning_rate": 0.00019720561423140938, "loss": 1.2259, "step": 3946 }, { "epoch": 0.101347892303421, "grad_norm": 0.83984375, "learning_rate": 0.00019720456616991846, "loss": 1.1361, "step": 3947 }, { "epoch": 0.10137356949934283, "grad_norm": 0.91796875, "learning_rate": 0.00019720351791470766, "loss": 1.2027, "step": 3948 }, { "epoch": 0.10139924669526465, "grad_norm": 0.8203125, "learning_rate": 0.000197202469465779, "loss": 1.1332, "step": 3949 }, { "epoch": 0.10142492389118646, "grad_norm": 2.078125, "learning_rate": 0.00019720142082313462, "loss": 1.1719, "step": 3950 }, { "epoch": 0.10145060108710828, "grad_norm": 0.8828125, "learning_rate": 0.00019720037198677658, "loss": 1.063, "step": 3951 }, { "epoch": 0.1014762782830301, "grad_norm": 0.9296875, "learning_rate": 0.00019719932295670698, "loss": 1.2618, "step": 3952 }, { "epoch": 0.10150195547895192, "grad_norm": 0.84765625, "learning_rate": 0.00019719827373292792, "loss": 1.1946, "step": 3953 }, { "epoch": 0.10152763267487375, "grad_norm": 0.8828125, "learning_rate": 0.00019719722431544144, "loss": 1.2925, "step": 3954 }, { "epoch": 0.10155330987079556, "grad_norm": 0.90234375, "learning_rate": 0.00019719617470424972, "loss": 1.2858, "step": 3955 }, { "epoch": 0.10157898706671738, "grad_norm": 0.9453125, "learning_rate": 0.0001971951248993548, "loss": 1.1032, "step": 3956 }, { "epoch": 0.1016046642626392, "grad_norm": 0.90625, "learning_rate": 0.00019719407490075873, "loss": 1.1132, "step": 3957 }, { "epoch": 0.10163034145856102, "grad_norm": 0.890625, "learning_rate": 0.00019719302470846368, "loss": 1.3238, "step": 3958 }, { "epoch": 0.10165601865448284, "grad_norm": 0.94921875, "learning_rate": 0.0001971919743224717, "loss": 1.1035, "step": 3959 }, { "epoch": 0.10168169585040465, "grad_norm": 0.8203125, "learning_rate": 0.00019719092374278493, "loss": 0.961, "step": 3960 }, { "epoch": 0.10170737304632647, "grad_norm": 0.87109375, "learning_rate": 0.0001971898729694054, "loss": 1.142, "step": 3961 }, { "epoch": 0.1017330502422483, "grad_norm": 0.8984375, "learning_rate": 0.00019718882200233523, "loss": 1.2882, "step": 3962 }, { "epoch": 0.10175872743817012, "grad_norm": 0.9296875, "learning_rate": 0.00019718777084157653, "loss": 1.2722, "step": 3963 }, { "epoch": 0.10178440463409194, "grad_norm": 0.87890625, "learning_rate": 0.00019718671948713138, "loss": 1.2335, "step": 3964 }, { "epoch": 0.10181008183001375, "grad_norm": 0.8046875, "learning_rate": 0.00019718566793900185, "loss": 1.0988, "step": 3965 }, { "epoch": 0.10183575902593557, "grad_norm": 0.8671875, "learning_rate": 0.0001971846161971901, "loss": 1.2057, "step": 3966 }, { "epoch": 0.1018614362218574, "grad_norm": 0.8671875, "learning_rate": 0.00019718356426169816, "loss": 1.0664, "step": 3967 }, { "epoch": 0.10188711341777922, "grad_norm": 0.80078125, "learning_rate": 0.00019718251213252815, "loss": 1.1221, "step": 3968 }, { "epoch": 0.10191279061370104, "grad_norm": 0.84765625, "learning_rate": 0.0001971814598096822, "loss": 1.1503, "step": 3969 }, { "epoch": 0.10193846780962285, "grad_norm": 0.81640625, "learning_rate": 0.00019718040729316236, "loss": 1.1884, "step": 3970 }, { "epoch": 0.10196414500554467, "grad_norm": 0.859375, "learning_rate": 0.00019717935458297078, "loss": 1.1146, "step": 3971 }, { "epoch": 0.10198982220146649, "grad_norm": 1.0234375, "learning_rate": 0.00019717830167910946, "loss": 1.0595, "step": 3972 }, { "epoch": 0.10201549939738831, "grad_norm": 0.859375, "learning_rate": 0.00019717724858158063, "loss": 1.0063, "step": 3973 }, { "epoch": 0.10204117659331013, "grad_norm": 0.921875, "learning_rate": 0.0001971761952903863, "loss": 1.1256, "step": 3974 }, { "epoch": 0.10206685378923194, "grad_norm": 1.0, "learning_rate": 0.0001971751418055286, "loss": 1.1509, "step": 3975 }, { "epoch": 0.10209253098515376, "grad_norm": 0.85546875, "learning_rate": 0.00019717408812700956, "loss": 1.2308, "step": 3976 }, { "epoch": 0.10211820818107559, "grad_norm": 0.8359375, "learning_rate": 0.00019717303425483137, "loss": 0.9604, "step": 3977 }, { "epoch": 0.10214388537699741, "grad_norm": 0.89453125, "learning_rate": 0.00019717198018899613, "loss": 1.25, "step": 3978 }, { "epoch": 0.10216956257291922, "grad_norm": 0.92578125, "learning_rate": 0.0001971709259295059, "loss": 1.1132, "step": 3979 }, { "epoch": 0.10219523976884104, "grad_norm": 0.92578125, "learning_rate": 0.00019716987147636278, "loss": 1.3292, "step": 3980 }, { "epoch": 0.10222091696476286, "grad_norm": 0.828125, "learning_rate": 0.0001971688168295689, "loss": 1.0939, "step": 3981 }, { "epoch": 0.10224659416068468, "grad_norm": 0.83984375, "learning_rate": 0.00019716776198912636, "loss": 1.1368, "step": 3982 }, { "epoch": 0.1022722713566065, "grad_norm": 0.91015625, "learning_rate": 0.00019716670695503718, "loss": 1.2929, "step": 3983 }, { "epoch": 0.10229794855252831, "grad_norm": 0.8359375, "learning_rate": 0.0001971656517273036, "loss": 1.242, "step": 3984 }, { "epoch": 0.10232362574845014, "grad_norm": 0.91015625, "learning_rate": 0.0001971645963059276, "loss": 1.2308, "step": 3985 }, { "epoch": 0.10234930294437196, "grad_norm": 0.8046875, "learning_rate": 0.00019716354069091135, "loss": 1.1404, "step": 3986 }, { "epoch": 0.10237498014029378, "grad_norm": 0.9375, "learning_rate": 0.00019716248488225698, "loss": 1.2387, "step": 3987 }, { "epoch": 0.1024006573362156, "grad_norm": 0.890625, "learning_rate": 0.00019716142887996654, "loss": 1.315, "step": 3988 }, { "epoch": 0.10242633453213741, "grad_norm": 0.91015625, "learning_rate": 0.00019716037268404212, "loss": 1.2723, "step": 3989 }, { "epoch": 0.10245201172805923, "grad_norm": 0.96875, "learning_rate": 0.00019715931629448588, "loss": 1.217, "step": 3990 }, { "epoch": 0.10247768892398106, "grad_norm": 0.86328125, "learning_rate": 0.0001971582597112999, "loss": 1.2611, "step": 3991 }, { "epoch": 0.10250336611990288, "grad_norm": 0.91796875, "learning_rate": 0.00019715720293448624, "loss": 1.1691, "step": 3992 }, { "epoch": 0.1025290433158247, "grad_norm": 0.84375, "learning_rate": 0.0001971561459640471, "loss": 1.2786, "step": 3993 }, { "epoch": 0.10255472051174651, "grad_norm": 1.0078125, "learning_rate": 0.00019715508879998453, "loss": 1.0315, "step": 3994 }, { "epoch": 0.10258039770766833, "grad_norm": 0.83984375, "learning_rate": 0.00019715403144230062, "loss": 1.2126, "step": 3995 }, { "epoch": 0.10260607490359015, "grad_norm": 0.89453125, "learning_rate": 0.00019715297389099752, "loss": 1.2427, "step": 3996 }, { "epoch": 0.10263175209951197, "grad_norm": 0.91015625, "learning_rate": 0.0001971519161460773, "loss": 1.2623, "step": 3997 }, { "epoch": 0.1026574292954338, "grad_norm": 0.7890625, "learning_rate": 0.0001971508582075421, "loss": 1.1187, "step": 3998 }, { "epoch": 0.1026831064913556, "grad_norm": 0.78125, "learning_rate": 0.00019714980007539404, "loss": 1.0484, "step": 3999 }, { "epoch": 0.10270878368727743, "grad_norm": 0.80078125, "learning_rate": 0.00019714874174963515, "loss": 1.2014, "step": 4000 }, { "epoch": 0.10270878368727743, "eval_loss": 1.1637061834335327, "eval_model_preparation_time": 0.0065, "eval_runtime": 533.0197, "eval_samples_per_second": 18.761, "eval_steps_per_second": 0.587, "step": 4000 }, { "epoch": 0.10273446088319925, "grad_norm": 0.83984375, "learning_rate": 0.00019714768323026764, "loss": 1.2368, "step": 4001 }, { "epoch": 0.10276013807912107, "grad_norm": 0.859375, "learning_rate": 0.00019714662451729358, "loss": 1.221, "step": 4002 }, { "epoch": 0.1027858152750429, "grad_norm": 0.85546875, "learning_rate": 0.00019714556561071503, "loss": 1.1553, "step": 4003 }, { "epoch": 0.1028114924709647, "grad_norm": 0.86328125, "learning_rate": 0.00019714450651053418, "loss": 1.1304, "step": 4004 }, { "epoch": 0.10283716966688652, "grad_norm": 0.8125, "learning_rate": 0.00019714344721675307, "loss": 1.1614, "step": 4005 }, { "epoch": 0.10286284686280835, "grad_norm": 0.87890625, "learning_rate": 0.00019714238772937387, "loss": 1.2005, "step": 4006 }, { "epoch": 0.10288852405873017, "grad_norm": 0.8203125, "learning_rate": 0.00019714132804839866, "loss": 1.1286, "step": 4007 }, { "epoch": 0.10291420125465199, "grad_norm": 0.859375, "learning_rate": 0.00019714026817382952, "loss": 0.9923, "step": 4008 }, { "epoch": 0.1029398784505738, "grad_norm": 0.84375, "learning_rate": 0.00019713920810566867, "loss": 1.2218, "step": 4009 }, { "epoch": 0.10296555564649562, "grad_norm": 0.859375, "learning_rate": 0.0001971381478439181, "loss": 1.2054, "step": 4010 }, { "epoch": 0.10299123284241744, "grad_norm": 0.80859375, "learning_rate": 0.00019713708738858, "loss": 1.2934, "step": 4011 }, { "epoch": 0.10301691003833927, "grad_norm": 0.8203125, "learning_rate": 0.00019713602673965643, "loss": 1.2388, "step": 4012 }, { "epoch": 0.10304258723426109, "grad_norm": 0.82421875, "learning_rate": 0.00019713496589714958, "loss": 1.3784, "step": 4013 }, { "epoch": 0.1030682644301829, "grad_norm": 0.83984375, "learning_rate": 0.00019713390486106146, "loss": 1.1266, "step": 4014 }, { "epoch": 0.10309394162610472, "grad_norm": 0.86328125, "learning_rate": 0.0001971328436313943, "loss": 1.1238, "step": 4015 }, { "epoch": 0.10311961882202654, "grad_norm": 0.85546875, "learning_rate": 0.0001971317822081501, "loss": 1.21, "step": 4016 }, { "epoch": 0.10314529601794836, "grad_norm": 0.82421875, "learning_rate": 0.00019713072059133108, "loss": 1.2284, "step": 4017 }, { "epoch": 0.10317097321387018, "grad_norm": 0.890625, "learning_rate": 0.0001971296587809393, "loss": 1.151, "step": 4018 }, { "epoch": 0.10319665040979199, "grad_norm": 0.8203125, "learning_rate": 0.00019712859677697688, "loss": 1.1792, "step": 4019 }, { "epoch": 0.10322232760571382, "grad_norm": 0.90234375, "learning_rate": 0.00019712753457944593, "loss": 1.0885, "step": 4020 }, { "epoch": 0.10324800480163564, "grad_norm": 0.86328125, "learning_rate": 0.00019712647218834856, "loss": 1.3272, "step": 4021 }, { "epoch": 0.10327368199755746, "grad_norm": 0.8984375, "learning_rate": 0.00019712540960368694, "loss": 1.2536, "step": 4022 }, { "epoch": 0.10329935919347928, "grad_norm": 0.89453125, "learning_rate": 0.00019712434682546314, "loss": 1.1381, "step": 4023 }, { "epoch": 0.10332503638940109, "grad_norm": 0.95703125, "learning_rate": 0.00019712328385367928, "loss": 1.2882, "step": 4024 }, { "epoch": 0.10335071358532291, "grad_norm": 0.828125, "learning_rate": 0.0001971222206883375, "loss": 1.0902, "step": 4025 }, { "epoch": 0.10337639078124473, "grad_norm": 0.90625, "learning_rate": 0.0001971211573294399, "loss": 1.1666, "step": 4026 }, { "epoch": 0.10340206797716656, "grad_norm": 0.83984375, "learning_rate": 0.00019712009377698863, "loss": 1.081, "step": 4027 }, { "epoch": 0.10342774517308838, "grad_norm": 0.87109375, "learning_rate": 0.00019711903003098575, "loss": 1.1606, "step": 4028 }, { "epoch": 0.10345342236901019, "grad_norm": 0.90234375, "learning_rate": 0.00019711796609143343, "loss": 1.3279, "step": 4029 }, { "epoch": 0.10347909956493201, "grad_norm": 0.828125, "learning_rate": 0.0001971169019583338, "loss": 1.0596, "step": 4030 }, { "epoch": 0.10350477676085383, "grad_norm": 0.82421875, "learning_rate": 0.0001971158376316889, "loss": 1.0855, "step": 4031 }, { "epoch": 0.10353045395677565, "grad_norm": 0.8984375, "learning_rate": 0.00019711477311150095, "loss": 1.0916, "step": 4032 }, { "epoch": 0.10355613115269748, "grad_norm": 0.828125, "learning_rate": 0.000197113708397772, "loss": 1.1296, "step": 4033 }, { "epoch": 0.10358180834861928, "grad_norm": 0.8359375, "learning_rate": 0.00019711264349050426, "loss": 1.4485, "step": 4034 }, { "epoch": 0.1036074855445411, "grad_norm": 0.796875, "learning_rate": 0.00019711157838969975, "loss": 1.0813, "step": 4035 }, { "epoch": 0.10363316274046293, "grad_norm": 0.83984375, "learning_rate": 0.00019711051309536065, "loss": 1.1685, "step": 4036 }, { "epoch": 0.10365883993638475, "grad_norm": 0.875, "learning_rate": 0.00019710944760748905, "loss": 1.074, "step": 4037 }, { "epoch": 0.10368451713230657, "grad_norm": 0.8515625, "learning_rate": 0.0001971083819260871, "loss": 0.9926, "step": 4038 }, { "epoch": 0.10371019432822838, "grad_norm": 0.87890625, "learning_rate": 0.00019710731605115692, "loss": 1.0473, "step": 4039 }, { "epoch": 0.1037358715241502, "grad_norm": 0.921875, "learning_rate": 0.00019710624998270064, "loss": 1.2334, "step": 4040 }, { "epoch": 0.10376154872007202, "grad_norm": 0.89453125, "learning_rate": 0.00019710518372072038, "loss": 1.1836, "step": 4041 }, { "epoch": 0.10378722591599385, "grad_norm": 0.87890625, "learning_rate": 0.00019710411726521824, "loss": 1.1147, "step": 4042 }, { "epoch": 0.10381290311191567, "grad_norm": 0.88671875, "learning_rate": 0.0001971030506161964, "loss": 1.251, "step": 4043 }, { "epoch": 0.10383858030783748, "grad_norm": 0.83984375, "learning_rate": 0.0001971019837736569, "loss": 1.1352, "step": 4044 }, { "epoch": 0.1038642575037593, "grad_norm": 0.8359375, "learning_rate": 0.00019710091673760195, "loss": 0.9884, "step": 4045 }, { "epoch": 0.10388993469968112, "grad_norm": 0.9140625, "learning_rate": 0.0001970998495080336, "loss": 1.237, "step": 4046 }, { "epoch": 0.10391561189560294, "grad_norm": 0.91796875, "learning_rate": 0.00019709878208495408, "loss": 1.1057, "step": 4047 }, { "epoch": 0.10394128909152477, "grad_norm": 0.77734375, "learning_rate": 0.00019709771446836544, "loss": 1.1478, "step": 4048 }, { "epoch": 0.10396696628744657, "grad_norm": 0.9609375, "learning_rate": 0.0001970966466582698, "loss": 1.1186, "step": 4049 }, { "epoch": 0.1039926434833684, "grad_norm": 0.91015625, "learning_rate": 0.00019709557865466937, "loss": 1.1813, "step": 4050 }, { "epoch": 0.10401832067929022, "grad_norm": 0.94140625, "learning_rate": 0.00019709451045756617, "loss": 1.2502, "step": 4051 }, { "epoch": 0.10404399787521204, "grad_norm": 0.84375, "learning_rate": 0.00019709344206696237, "loss": 1.1374, "step": 4052 }, { "epoch": 0.10406967507113386, "grad_norm": 0.828125, "learning_rate": 0.00019709237348286015, "loss": 1.1375, "step": 4053 }, { "epoch": 0.10409535226705567, "grad_norm": 0.859375, "learning_rate": 0.0001970913047052616, "loss": 1.0803, "step": 4054 }, { "epoch": 0.1041210294629775, "grad_norm": 0.828125, "learning_rate": 0.00019709023573416884, "loss": 1.1532, "step": 4055 }, { "epoch": 0.10414670665889932, "grad_norm": 0.8671875, "learning_rate": 0.00019708916656958404, "loss": 1.1514, "step": 4056 }, { "epoch": 0.10417238385482114, "grad_norm": 0.8984375, "learning_rate": 0.00019708809721150924, "loss": 1.1007, "step": 4057 }, { "epoch": 0.10419806105074296, "grad_norm": 0.75, "learning_rate": 0.00019708702765994666, "loss": 1.0736, "step": 4058 }, { "epoch": 0.10422373824666477, "grad_norm": 0.89453125, "learning_rate": 0.00019708595791489845, "loss": 1.086, "step": 4059 }, { "epoch": 0.10424941544258659, "grad_norm": 0.86328125, "learning_rate": 0.00019708488797636663, "loss": 1.0189, "step": 4060 }, { "epoch": 0.10427509263850841, "grad_norm": 0.8828125, "learning_rate": 0.0001970838178443534, "loss": 1.0995, "step": 4061 }, { "epoch": 0.10430076983443023, "grad_norm": 0.87109375, "learning_rate": 0.00019708274751886095, "loss": 1.0462, "step": 4062 }, { "epoch": 0.10432644703035206, "grad_norm": 0.84765625, "learning_rate": 0.00019708167699989128, "loss": 1.0996, "step": 4063 }, { "epoch": 0.10435212422627387, "grad_norm": 0.94140625, "learning_rate": 0.00019708060628744663, "loss": 1.1328, "step": 4064 }, { "epoch": 0.10437780142219569, "grad_norm": 0.890625, "learning_rate": 0.00019707953538152914, "loss": 1.2203, "step": 4065 }, { "epoch": 0.10440347861811751, "grad_norm": 0.90625, "learning_rate": 0.00019707846428214084, "loss": 1.2283, "step": 4066 }, { "epoch": 0.10442915581403933, "grad_norm": 0.82421875, "learning_rate": 0.00019707739298928395, "loss": 1.0266, "step": 4067 }, { "epoch": 0.10445483300996115, "grad_norm": 0.84375, "learning_rate": 0.00019707632150296062, "loss": 1.2558, "step": 4068 }, { "epoch": 0.10448051020588296, "grad_norm": 0.890625, "learning_rate": 0.0001970752498231729, "loss": 1.1707, "step": 4069 }, { "epoch": 0.10450618740180478, "grad_norm": 0.9765625, "learning_rate": 0.000197074177949923, "loss": 1.1693, "step": 4070 }, { "epoch": 0.1045318645977266, "grad_norm": 0.875, "learning_rate": 0.000197073105883213, "loss": 1.2569, "step": 4071 }, { "epoch": 0.10455754179364843, "grad_norm": 0.87890625, "learning_rate": 0.00019707203362304512, "loss": 1.2623, "step": 4072 }, { "epoch": 0.10458321898957025, "grad_norm": 0.86328125, "learning_rate": 0.00019707096116942143, "loss": 1.0573, "step": 4073 }, { "epoch": 0.10460889618549206, "grad_norm": 0.87890625, "learning_rate": 0.0001970698885223441, "loss": 1.1099, "step": 4074 }, { "epoch": 0.10463457338141388, "grad_norm": 0.921875, "learning_rate": 0.0001970688156818152, "loss": 1.1789, "step": 4075 }, { "epoch": 0.1046602505773357, "grad_norm": 1.484375, "learning_rate": 0.00019706774264783693, "loss": 1.1941, "step": 4076 }, { "epoch": 0.10468592777325753, "grad_norm": 0.953125, "learning_rate": 0.00019706666942041146, "loss": 1.2064, "step": 4077 }, { "epoch": 0.10471160496917935, "grad_norm": 0.80859375, "learning_rate": 0.00019706559599954083, "loss": 1.1178, "step": 4078 }, { "epoch": 0.10473728216510116, "grad_norm": 0.90234375, "learning_rate": 0.0001970645223852273, "loss": 1.2218, "step": 4079 }, { "epoch": 0.10476295936102298, "grad_norm": 0.828125, "learning_rate": 0.0001970634485774729, "loss": 1.1767, "step": 4080 }, { "epoch": 0.1047886365569448, "grad_norm": 0.8828125, "learning_rate": 0.00019706237457627982, "loss": 1.2904, "step": 4081 }, { "epoch": 0.10481431375286662, "grad_norm": 0.90625, "learning_rate": 0.0001970613003816502, "loss": 1.1515, "step": 4082 }, { "epoch": 0.10483999094878843, "grad_norm": 0.94921875, "learning_rate": 0.0001970602259935862, "loss": 1.339, "step": 4083 }, { "epoch": 0.10486566814471025, "grad_norm": 0.84375, "learning_rate": 0.00019705915141208987, "loss": 1.1567, "step": 4084 }, { "epoch": 0.10489134534063208, "grad_norm": 0.9921875, "learning_rate": 0.00019705807663716348, "loss": 1.2901, "step": 4085 }, { "epoch": 0.1049170225365539, "grad_norm": 0.90625, "learning_rate": 0.0001970570016688091, "loss": 1.0884, "step": 4086 }, { "epoch": 0.10494269973247572, "grad_norm": 1.1796875, "learning_rate": 0.00019705592650702886, "loss": 1.1537, "step": 4087 }, { "epoch": 0.10496837692839753, "grad_norm": 0.92578125, "learning_rate": 0.00019705485115182494, "loss": 1.0711, "step": 4088 }, { "epoch": 0.10499405412431935, "grad_norm": 0.96875, "learning_rate": 0.00019705377560319946, "loss": 1.0863, "step": 4089 }, { "epoch": 0.10501973132024117, "grad_norm": 0.84765625, "learning_rate": 0.00019705269986115457, "loss": 1.1477, "step": 4090 }, { "epoch": 0.105045408516163, "grad_norm": 0.8828125, "learning_rate": 0.00019705162392569245, "loss": 1.1294, "step": 4091 }, { "epoch": 0.10507108571208482, "grad_norm": 0.79296875, "learning_rate": 0.00019705054779681514, "loss": 1.2142, "step": 4092 }, { "epoch": 0.10509676290800662, "grad_norm": 0.84765625, "learning_rate": 0.00019704947147452492, "loss": 1.0024, "step": 4093 }, { "epoch": 0.10512244010392845, "grad_norm": 0.8046875, "learning_rate": 0.00019704839495882385, "loss": 1.0419, "step": 4094 }, { "epoch": 0.10514811729985027, "grad_norm": 0.7890625, "learning_rate": 0.00019704731824971412, "loss": 0.9872, "step": 4095 }, { "epoch": 0.10517379449577209, "grad_norm": 1.3828125, "learning_rate": 0.0001970462413471978, "loss": 1.2478, "step": 4096 }, { "epoch": 0.10519947169169391, "grad_norm": 0.85546875, "learning_rate": 0.00019704516425127712, "loss": 1.2509, "step": 4097 }, { "epoch": 0.10522514888761572, "grad_norm": 0.83984375, "learning_rate": 0.00019704408696195416, "loss": 1.0603, "step": 4098 }, { "epoch": 0.10525082608353754, "grad_norm": 0.82421875, "learning_rate": 0.00019704300947923117, "loss": 1.1827, "step": 4099 }, { "epoch": 0.10527650327945937, "grad_norm": 0.828125, "learning_rate": 0.00019704193180311016, "loss": 1.2632, "step": 4100 }, { "epoch": 0.10530218047538119, "grad_norm": 0.86328125, "learning_rate": 0.00019704085393359338, "loss": 1.0198, "step": 4101 }, { "epoch": 0.10532785767130301, "grad_norm": 0.859375, "learning_rate": 0.00019703977587068295, "loss": 1.1305, "step": 4102 }, { "epoch": 0.10535353486722482, "grad_norm": 0.8671875, "learning_rate": 0.000197038697614381, "loss": 1.1249, "step": 4103 }, { "epoch": 0.10537921206314664, "grad_norm": 0.8984375, "learning_rate": 0.00019703761916468968, "loss": 1.1659, "step": 4104 }, { "epoch": 0.10540488925906846, "grad_norm": 0.875, "learning_rate": 0.00019703654052161116, "loss": 1.2034, "step": 4105 }, { "epoch": 0.10543056645499028, "grad_norm": 0.8671875, "learning_rate": 0.00019703546168514758, "loss": 1.0881, "step": 4106 }, { "epoch": 0.10545624365091211, "grad_norm": 0.9453125, "learning_rate": 0.00019703438265530107, "loss": 1.2385, "step": 4107 }, { "epoch": 0.10548192084683392, "grad_norm": 0.8125, "learning_rate": 0.00019703330343207381, "loss": 1.1872, "step": 4108 }, { "epoch": 0.10550759804275574, "grad_norm": 0.89453125, "learning_rate": 0.00019703222401546797, "loss": 1.1455, "step": 4109 }, { "epoch": 0.10553327523867756, "grad_norm": 0.859375, "learning_rate": 0.00019703114440548566, "loss": 1.0228, "step": 4110 }, { "epoch": 0.10555895243459938, "grad_norm": 0.90234375, "learning_rate": 0.00019703006460212901, "loss": 1.1986, "step": 4111 }, { "epoch": 0.1055846296305212, "grad_norm": 0.84375, "learning_rate": 0.00019702898460540025, "loss": 1.0507, "step": 4112 }, { "epoch": 0.10561030682644301, "grad_norm": 0.84765625, "learning_rate": 0.00019702790441530148, "loss": 1.1151, "step": 4113 }, { "epoch": 0.10563598402236483, "grad_norm": 0.8046875, "learning_rate": 0.00019702682403183484, "loss": 1.0182, "step": 4114 }, { "epoch": 0.10566166121828666, "grad_norm": 0.84765625, "learning_rate": 0.0001970257434550025, "loss": 1.0623, "step": 4115 }, { "epoch": 0.10568733841420848, "grad_norm": 0.83984375, "learning_rate": 0.00019702466268480665, "loss": 1.0832, "step": 4116 }, { "epoch": 0.1057130156101303, "grad_norm": 0.8515625, "learning_rate": 0.0001970235817212494, "loss": 1.0784, "step": 4117 }, { "epoch": 0.10573869280605211, "grad_norm": 0.953125, "learning_rate": 0.00019702250056433292, "loss": 1.1267, "step": 4118 }, { "epoch": 0.10576437000197393, "grad_norm": 0.8359375, "learning_rate": 0.00019702141921405935, "loss": 1.1872, "step": 4119 }, { "epoch": 0.10579004719789575, "grad_norm": 0.82421875, "learning_rate": 0.00019702033767043085, "loss": 1.184, "step": 4120 }, { "epoch": 0.10581572439381758, "grad_norm": 0.90234375, "learning_rate": 0.00019701925593344958, "loss": 1.2718, "step": 4121 }, { "epoch": 0.1058414015897394, "grad_norm": 0.8359375, "learning_rate": 0.00019701817400311772, "loss": 1.1725, "step": 4122 }, { "epoch": 0.1058670787856612, "grad_norm": 0.85546875, "learning_rate": 0.00019701709187943737, "loss": 1.2423, "step": 4123 }, { "epoch": 0.10589275598158303, "grad_norm": 0.8046875, "learning_rate": 0.00019701600956241073, "loss": 1.201, "step": 4124 }, { "epoch": 0.10591843317750485, "grad_norm": 0.8359375, "learning_rate": 0.00019701492705203997, "loss": 1.1324, "step": 4125 }, { "epoch": 0.10594411037342667, "grad_norm": 0.8984375, "learning_rate": 0.00019701384434832718, "loss": 1.1606, "step": 4126 }, { "epoch": 0.1059697875693485, "grad_norm": 0.84765625, "learning_rate": 0.0001970127614512746, "loss": 1.1497, "step": 4127 }, { "epoch": 0.1059954647652703, "grad_norm": 0.83984375, "learning_rate": 0.00019701167836088434, "loss": 1.118, "step": 4128 }, { "epoch": 0.10602114196119213, "grad_norm": 0.89453125, "learning_rate": 0.00019701059507715857, "loss": 1.2946, "step": 4129 }, { "epoch": 0.10604681915711395, "grad_norm": 0.828125, "learning_rate": 0.00019700951160009944, "loss": 1.0507, "step": 4130 }, { "epoch": 0.10607249635303577, "grad_norm": 0.875, "learning_rate": 0.00019700842792970912, "loss": 1.0937, "step": 4131 }, { "epoch": 0.10609817354895759, "grad_norm": 0.81640625, "learning_rate": 0.00019700734406598978, "loss": 1.1826, "step": 4132 }, { "epoch": 0.1061238507448794, "grad_norm": 0.86328125, "learning_rate": 0.00019700626000894352, "loss": 1.0784, "step": 4133 }, { "epoch": 0.10614952794080122, "grad_norm": 0.8125, "learning_rate": 0.00019700517575857261, "loss": 1.0642, "step": 4134 }, { "epoch": 0.10617520513672304, "grad_norm": 0.81640625, "learning_rate": 0.00019700409131487908, "loss": 1.0279, "step": 4135 }, { "epoch": 0.10620088233264487, "grad_norm": 0.8828125, "learning_rate": 0.0001970030066778652, "loss": 1.2014, "step": 4136 }, { "epoch": 0.10622655952856669, "grad_norm": 0.83203125, "learning_rate": 0.0001970019218475331, "loss": 1.1502, "step": 4137 }, { "epoch": 0.1062522367244885, "grad_norm": 0.84375, "learning_rate": 0.00019700083682388492, "loss": 1.1667, "step": 4138 }, { "epoch": 0.10627791392041032, "grad_norm": 0.953125, "learning_rate": 0.0001969997516069228, "loss": 1.07, "step": 4139 }, { "epoch": 0.10630359111633214, "grad_norm": 0.8515625, "learning_rate": 0.00019699866619664897, "loss": 1.1264, "step": 4140 }, { "epoch": 0.10632926831225396, "grad_norm": 0.86328125, "learning_rate": 0.00019699758059306557, "loss": 1.106, "step": 4141 }, { "epoch": 0.10635494550817579, "grad_norm": 0.8828125, "learning_rate": 0.00019699649479617474, "loss": 1.2117, "step": 4142 }, { "epoch": 0.1063806227040976, "grad_norm": 0.91796875, "learning_rate": 0.00019699540880597865, "loss": 1.115, "step": 4143 }, { "epoch": 0.10640629990001942, "grad_norm": 1.1171875, "learning_rate": 0.00019699432262247946, "loss": 1.231, "step": 4144 }, { "epoch": 0.10643197709594124, "grad_norm": 0.91796875, "learning_rate": 0.00019699323624567937, "loss": 1.2128, "step": 4145 }, { "epoch": 0.10645765429186306, "grad_norm": 0.8828125, "learning_rate": 0.00019699214967558052, "loss": 1.2207, "step": 4146 }, { "epoch": 0.10648333148778488, "grad_norm": 0.859375, "learning_rate": 0.00019699106291218508, "loss": 1.1579, "step": 4147 }, { "epoch": 0.10650900868370669, "grad_norm": 0.90625, "learning_rate": 0.00019698997595549522, "loss": 1.304, "step": 4148 }, { "epoch": 0.10653468587962851, "grad_norm": 0.9375, "learning_rate": 0.00019698888880551307, "loss": 1.1799, "step": 4149 }, { "epoch": 0.10656036307555034, "grad_norm": 0.86328125, "learning_rate": 0.00019698780146224084, "loss": 1.2193, "step": 4150 }, { "epoch": 0.10658604027147216, "grad_norm": 0.9765625, "learning_rate": 0.00019698671392568069, "loss": 1.2396, "step": 4151 }, { "epoch": 0.10661171746739398, "grad_norm": 0.84375, "learning_rate": 0.00019698562619583477, "loss": 1.1806, "step": 4152 }, { "epoch": 0.10663739466331579, "grad_norm": 0.89453125, "learning_rate": 0.00019698453827270525, "loss": 1.1655, "step": 4153 }, { "epoch": 0.10666307185923761, "grad_norm": 0.84765625, "learning_rate": 0.00019698345015629434, "loss": 1.0916, "step": 4154 }, { "epoch": 0.10668874905515943, "grad_norm": 0.87890625, "learning_rate": 0.00019698236184660414, "loss": 1.2267, "step": 4155 }, { "epoch": 0.10671442625108125, "grad_norm": 0.91015625, "learning_rate": 0.00019698127334363689, "loss": 1.1578, "step": 4156 }, { "epoch": 0.10674010344700308, "grad_norm": 0.82421875, "learning_rate": 0.00019698018464739468, "loss": 1.3012, "step": 4157 }, { "epoch": 0.10676578064292488, "grad_norm": 0.85546875, "learning_rate": 0.00019697909575787975, "loss": 1.0101, "step": 4158 }, { "epoch": 0.1067914578388467, "grad_norm": 0.80078125, "learning_rate": 0.0001969780066750942, "loss": 1.2097, "step": 4159 }, { "epoch": 0.10681713503476853, "grad_norm": 0.89453125, "learning_rate": 0.0001969769173990403, "loss": 1.0567, "step": 4160 }, { "epoch": 0.10684281223069035, "grad_norm": 0.93359375, "learning_rate": 0.00019697582792972012, "loss": 1.2141, "step": 4161 }, { "epoch": 0.10686848942661217, "grad_norm": 0.83203125, "learning_rate": 0.00019697473826713592, "loss": 1.0135, "step": 4162 }, { "epoch": 0.10689416662253398, "grad_norm": 0.80859375, "learning_rate": 0.00019697364841128975, "loss": 1.0774, "step": 4163 }, { "epoch": 0.1069198438184558, "grad_norm": 0.84375, "learning_rate": 0.00019697255836218395, "loss": 1.2163, "step": 4164 }, { "epoch": 0.10694552101437763, "grad_norm": 0.8203125, "learning_rate": 0.00019697146811982052, "loss": 1.0807, "step": 4165 }, { "epoch": 0.10697119821029945, "grad_norm": 0.83203125, "learning_rate": 0.00019697037768420174, "loss": 1.0981, "step": 4166 }, { "epoch": 0.10699687540622127, "grad_norm": 0.83203125, "learning_rate": 0.00019696928705532977, "loss": 1.0605, "step": 4167 }, { "epoch": 0.10702255260214308, "grad_norm": 0.85546875, "learning_rate": 0.0001969681962332068, "loss": 1.3017, "step": 4168 }, { "epoch": 0.1070482297980649, "grad_norm": 0.83984375, "learning_rate": 0.00019696710521783492, "loss": 1.0304, "step": 4169 }, { "epoch": 0.10707390699398672, "grad_norm": 0.81640625, "learning_rate": 0.0001969660140092164, "loss": 1.2588, "step": 4170 }, { "epoch": 0.10709958418990854, "grad_norm": 0.85546875, "learning_rate": 0.00019696492260735333, "loss": 1.1059, "step": 4171 }, { "epoch": 0.10712526138583037, "grad_norm": 0.8515625, "learning_rate": 0.00019696383101224795, "loss": 1.1526, "step": 4172 }, { "epoch": 0.10715093858175218, "grad_norm": 0.8046875, "learning_rate": 0.00019696273922390243, "loss": 1.0475, "step": 4173 }, { "epoch": 0.107176615777674, "grad_norm": 0.8125, "learning_rate": 0.0001969616472423189, "loss": 1.1704, "step": 4174 }, { "epoch": 0.10720229297359582, "grad_norm": 0.92578125, "learning_rate": 0.0001969605550674996, "loss": 1.1565, "step": 4175 }, { "epoch": 0.10722797016951764, "grad_norm": 0.8125, "learning_rate": 0.00019695946269944665, "loss": 1.2409, "step": 4176 }, { "epoch": 0.10725364736543946, "grad_norm": 0.78515625, "learning_rate": 0.00019695837013816223, "loss": 1.0847, "step": 4177 }, { "epoch": 0.10727932456136127, "grad_norm": 0.7578125, "learning_rate": 0.00019695727738364854, "loss": 1.0485, "step": 4178 }, { "epoch": 0.1073050017572831, "grad_norm": 0.86328125, "learning_rate": 0.0001969561844359078, "loss": 1.2524, "step": 4179 }, { "epoch": 0.10733067895320492, "grad_norm": 0.8515625, "learning_rate": 0.0001969550912949421, "loss": 1.2708, "step": 4180 }, { "epoch": 0.10735635614912674, "grad_norm": 0.9140625, "learning_rate": 0.00019695399796075368, "loss": 1.2876, "step": 4181 }, { "epoch": 0.10738203334504856, "grad_norm": 0.84375, "learning_rate": 0.0001969529044333447, "loss": 1.1211, "step": 4182 }, { "epoch": 0.10740771054097037, "grad_norm": 0.80859375, "learning_rate": 0.00019695181071271734, "loss": 1.098, "step": 4183 }, { "epoch": 0.10743338773689219, "grad_norm": 0.9296875, "learning_rate": 0.00019695071679887377, "loss": 1.2863, "step": 4184 }, { "epoch": 0.10745906493281401, "grad_norm": 0.8984375, "learning_rate": 0.00019694962269181616, "loss": 1.114, "step": 4185 }, { "epoch": 0.10748474212873584, "grad_norm": 0.890625, "learning_rate": 0.00019694852839154673, "loss": 1.2584, "step": 4186 }, { "epoch": 0.10751041932465764, "grad_norm": 0.85546875, "learning_rate": 0.00019694743389806765, "loss": 1.1944, "step": 4187 }, { "epoch": 0.10753609652057947, "grad_norm": 0.8359375, "learning_rate": 0.0001969463392113811, "loss": 1.2462, "step": 4188 }, { "epoch": 0.10756177371650129, "grad_norm": 0.890625, "learning_rate": 0.00019694524433148922, "loss": 1.0818, "step": 4189 }, { "epoch": 0.10758745091242311, "grad_norm": 0.859375, "learning_rate": 0.00019694414925839425, "loss": 1.2357, "step": 4190 }, { "epoch": 0.10761312810834493, "grad_norm": 0.90234375, "learning_rate": 0.00019694305399209832, "loss": 1.0332, "step": 4191 }, { "epoch": 0.10763880530426674, "grad_norm": 0.91015625, "learning_rate": 0.0001969419585326037, "loss": 1.4245, "step": 4192 }, { "epoch": 0.10766448250018856, "grad_norm": 0.8359375, "learning_rate": 0.00019694086287991246, "loss": 1.0368, "step": 4193 }, { "epoch": 0.10769015969611039, "grad_norm": 0.890625, "learning_rate": 0.00019693976703402682, "loss": 1.3702, "step": 4194 }, { "epoch": 0.10771583689203221, "grad_norm": 0.82421875, "learning_rate": 0.00019693867099494902, "loss": 1.1786, "step": 4195 }, { "epoch": 0.10774151408795403, "grad_norm": 0.88671875, "learning_rate": 0.0001969375747626812, "loss": 1.1637, "step": 4196 }, { "epoch": 0.10776719128387584, "grad_norm": 0.828125, "learning_rate": 0.00019693647833722553, "loss": 1.1198, "step": 4197 }, { "epoch": 0.10779286847979766, "grad_norm": 0.890625, "learning_rate": 0.00019693538171858426, "loss": 1.3122, "step": 4198 }, { "epoch": 0.10781854567571948, "grad_norm": 0.77734375, "learning_rate": 0.00019693428490675946, "loss": 1.1678, "step": 4199 }, { "epoch": 0.1078442228716413, "grad_norm": 0.8203125, "learning_rate": 0.00019693318790175345, "loss": 1.0395, "step": 4200 }, { "epoch": 0.10786990006756313, "grad_norm": 0.80078125, "learning_rate": 0.00019693209070356833, "loss": 1.1122, "step": 4201 }, { "epoch": 0.10789557726348493, "grad_norm": 0.8515625, "learning_rate": 0.0001969309933122063, "loss": 1.2243, "step": 4202 }, { "epoch": 0.10792125445940676, "grad_norm": 0.9765625, "learning_rate": 0.00019692989572766957, "loss": 1.1006, "step": 4203 }, { "epoch": 0.10794693165532858, "grad_norm": 0.87890625, "learning_rate": 0.00019692879794996028, "loss": 1.0665, "step": 4204 }, { "epoch": 0.1079726088512504, "grad_norm": 0.91796875, "learning_rate": 0.0001969276999790807, "loss": 1.1095, "step": 4205 }, { "epoch": 0.10799828604717222, "grad_norm": 0.83984375, "learning_rate": 0.00019692660181503294, "loss": 1.2285, "step": 4206 }, { "epoch": 0.10802396324309403, "grad_norm": 0.84375, "learning_rate": 0.00019692550345781925, "loss": 1.1914, "step": 4207 }, { "epoch": 0.10804964043901585, "grad_norm": 0.83203125, "learning_rate": 0.00019692440490744176, "loss": 1.3717, "step": 4208 }, { "epoch": 0.10807531763493768, "grad_norm": 0.859375, "learning_rate": 0.0001969233061639027, "loss": 1.2085, "step": 4209 }, { "epoch": 0.1081009948308595, "grad_norm": 0.86328125, "learning_rate": 0.0001969222072272042, "loss": 1.0979, "step": 4210 }, { "epoch": 0.10812667202678132, "grad_norm": 0.89453125, "learning_rate": 0.00019692110809734853, "loss": 1.0729, "step": 4211 }, { "epoch": 0.10815234922270313, "grad_norm": 0.87109375, "learning_rate": 0.00019692000877433787, "loss": 1.1807, "step": 4212 }, { "epoch": 0.10817802641862495, "grad_norm": 0.87890625, "learning_rate": 0.00019691890925817437, "loss": 1.1357, "step": 4213 }, { "epoch": 0.10820370361454677, "grad_norm": 0.921875, "learning_rate": 0.0001969178095488602, "loss": 1.141, "step": 4214 }, { "epoch": 0.1082293808104686, "grad_norm": 0.8671875, "learning_rate": 0.00019691670964639763, "loss": 1.2523, "step": 4215 }, { "epoch": 0.10825505800639042, "grad_norm": 0.8046875, "learning_rate": 0.00019691560955078884, "loss": 1.0799, "step": 4216 }, { "epoch": 0.10828073520231223, "grad_norm": 0.765625, "learning_rate": 0.0001969145092620359, "loss": 1.0311, "step": 4217 }, { "epoch": 0.10830641239823405, "grad_norm": 0.8203125, "learning_rate": 0.0001969134087801412, "loss": 1.1026, "step": 4218 }, { "epoch": 0.10833208959415587, "grad_norm": 0.8671875, "learning_rate": 0.00019691230810510676, "loss": 1.1307, "step": 4219 }, { "epoch": 0.10835776679007769, "grad_norm": 0.85546875, "learning_rate": 0.0001969112072369349, "loss": 1.0967, "step": 4220 }, { "epoch": 0.10838344398599951, "grad_norm": 0.890625, "learning_rate": 0.00019691010617562773, "loss": 0.9239, "step": 4221 }, { "epoch": 0.10840912118192132, "grad_norm": 0.83984375, "learning_rate": 0.00019690900492118748, "loss": 1.2922, "step": 4222 }, { "epoch": 0.10843479837784314, "grad_norm": 0.88671875, "learning_rate": 0.0001969079034736163, "loss": 1.1514, "step": 4223 }, { "epoch": 0.10846047557376497, "grad_norm": 0.94140625, "learning_rate": 0.00019690680183291644, "loss": 1.1005, "step": 4224 }, { "epoch": 0.10848615276968679, "grad_norm": 0.80078125, "learning_rate": 0.0001969056999990901, "loss": 1.0267, "step": 4225 }, { "epoch": 0.10851182996560861, "grad_norm": 0.890625, "learning_rate": 0.00019690459797213944, "loss": 1.0677, "step": 4226 }, { "epoch": 0.10853750716153042, "grad_norm": 0.87109375, "learning_rate": 0.00019690349575206666, "loss": 1.2553, "step": 4227 }, { "epoch": 0.10856318435745224, "grad_norm": 0.90234375, "learning_rate": 0.000196902393338874, "loss": 1.3177, "step": 4228 }, { "epoch": 0.10858886155337406, "grad_norm": 0.8515625, "learning_rate": 0.00019690129073256357, "loss": 1.2617, "step": 4229 }, { "epoch": 0.10861453874929589, "grad_norm": 0.8828125, "learning_rate": 0.00019690018793313768, "loss": 1.1725, "step": 4230 }, { "epoch": 0.10864021594521771, "grad_norm": 0.859375, "learning_rate": 0.00019689908494059842, "loss": 1.093, "step": 4231 }, { "epoch": 0.10866589314113952, "grad_norm": 0.859375, "learning_rate": 0.00019689798175494806, "loss": 1.1172, "step": 4232 }, { "epoch": 0.10869157033706134, "grad_norm": 0.8203125, "learning_rate": 0.00019689687837618877, "loss": 1.2404, "step": 4233 }, { "epoch": 0.10871724753298316, "grad_norm": 0.90234375, "learning_rate": 0.00019689577480432274, "loss": 1.1613, "step": 4234 }, { "epoch": 0.10874292472890498, "grad_norm": 0.81640625, "learning_rate": 0.0001968946710393522, "loss": 1.0342, "step": 4235 }, { "epoch": 0.1087686019248268, "grad_norm": 0.91796875, "learning_rate": 0.00019689356708127936, "loss": 1.2458, "step": 4236 }, { "epoch": 0.10879427912074861, "grad_norm": 0.8515625, "learning_rate": 0.00019689246293010636, "loss": 1.1499, "step": 4237 }, { "epoch": 0.10881995631667044, "grad_norm": 0.87890625, "learning_rate": 0.00019689135858583543, "loss": 1.4176, "step": 4238 }, { "epoch": 0.10884563351259226, "grad_norm": 0.79296875, "learning_rate": 0.00019689025404846878, "loss": 1.113, "step": 4239 }, { "epoch": 0.10887131070851408, "grad_norm": 0.85546875, "learning_rate": 0.00019688914931800862, "loss": 1.079, "step": 4240 }, { "epoch": 0.1088969879044359, "grad_norm": 0.78515625, "learning_rate": 0.00019688804439445712, "loss": 1.2135, "step": 4241 }, { "epoch": 0.10892266510035771, "grad_norm": 0.87109375, "learning_rate": 0.0001968869392778165, "loss": 1.1051, "step": 4242 }, { "epoch": 0.10894834229627953, "grad_norm": 0.828125, "learning_rate": 0.00019688583396808897, "loss": 1.2087, "step": 4243 }, { "epoch": 0.10897401949220135, "grad_norm": 0.83984375, "learning_rate": 0.00019688472846527671, "loss": 1.069, "step": 4244 }, { "epoch": 0.10899969668812318, "grad_norm": 0.87890625, "learning_rate": 0.00019688362276938195, "loss": 1.2823, "step": 4245 }, { "epoch": 0.109025373884045, "grad_norm": 0.85546875, "learning_rate": 0.0001968825168804069, "loss": 1.07, "step": 4246 }, { "epoch": 0.10905105107996681, "grad_norm": 0.8671875, "learning_rate": 0.0001968814107983537, "loss": 1.2791, "step": 4247 }, { "epoch": 0.10907672827588863, "grad_norm": 0.90625, "learning_rate": 0.00019688030452322464, "loss": 1.0723, "step": 4248 }, { "epoch": 0.10910240547181045, "grad_norm": 0.87109375, "learning_rate": 0.00019687919805502188, "loss": 1.1862, "step": 4249 }, { "epoch": 0.10912808266773227, "grad_norm": 0.890625, "learning_rate": 0.00019687809139374763, "loss": 1.1178, "step": 4250 }, { "epoch": 0.1091537598636541, "grad_norm": 0.87109375, "learning_rate": 0.00019687698453940407, "loss": 1.0189, "step": 4251 }, { "epoch": 0.1091794370595759, "grad_norm": 0.8671875, "learning_rate": 0.00019687587749199343, "loss": 1.094, "step": 4252 }, { "epoch": 0.10920511425549773, "grad_norm": 0.8671875, "learning_rate": 0.00019687477025151794, "loss": 0.9701, "step": 4253 }, { "epoch": 0.10923079145141955, "grad_norm": 0.90625, "learning_rate": 0.00019687366281797978, "loss": 1.05, "step": 4254 }, { "epoch": 0.10925646864734137, "grad_norm": 0.94921875, "learning_rate": 0.00019687255519138116, "loss": 1.2778, "step": 4255 }, { "epoch": 0.10928214584326319, "grad_norm": 0.94140625, "learning_rate": 0.00019687144737172427, "loss": 1.1626, "step": 4256 }, { "epoch": 0.109307823039185, "grad_norm": 0.8984375, "learning_rate": 0.00019687033935901136, "loss": 1.1104, "step": 4257 }, { "epoch": 0.10933350023510682, "grad_norm": 0.828125, "learning_rate": 0.0001968692311532446, "loss": 1.0555, "step": 4258 }, { "epoch": 0.10935917743102865, "grad_norm": 0.8359375, "learning_rate": 0.0001968681227544262, "loss": 1.1678, "step": 4259 }, { "epoch": 0.10938485462695047, "grad_norm": 0.91796875, "learning_rate": 0.00019686701416255843, "loss": 1.1462, "step": 4260 }, { "epoch": 0.10941053182287229, "grad_norm": 0.8671875, "learning_rate": 0.0001968659053776434, "loss": 1.1267, "step": 4261 }, { "epoch": 0.1094362090187941, "grad_norm": 0.82421875, "learning_rate": 0.0001968647963996834, "loss": 1.166, "step": 4262 }, { "epoch": 0.10946188621471592, "grad_norm": 0.87890625, "learning_rate": 0.00019686368722868057, "loss": 1.1063, "step": 4263 }, { "epoch": 0.10948756341063774, "grad_norm": 0.7578125, "learning_rate": 0.0001968625778646372, "loss": 1.0315, "step": 4264 }, { "epoch": 0.10951324060655956, "grad_norm": 0.828125, "learning_rate": 0.00019686146830755547, "loss": 1.1777, "step": 4265 }, { "epoch": 0.10953891780248139, "grad_norm": 0.76953125, "learning_rate": 0.00019686035855743755, "loss": 1.0883, "step": 4266 }, { "epoch": 0.1095645949984032, "grad_norm": 0.875, "learning_rate": 0.00019685924861428567, "loss": 1.2199, "step": 4267 }, { "epoch": 0.10959027219432502, "grad_norm": 0.8671875, "learning_rate": 0.00019685813847810212, "loss": 1.1438, "step": 4268 }, { "epoch": 0.10961594939024684, "grad_norm": 0.87109375, "learning_rate": 0.000196857028148889, "loss": 1.2208, "step": 4269 }, { "epoch": 0.10964162658616866, "grad_norm": 0.8359375, "learning_rate": 0.0001968559176266486, "loss": 1.1701, "step": 4270 }, { "epoch": 0.10966730378209048, "grad_norm": 0.84375, "learning_rate": 0.00019685480691138308, "loss": 1.1392, "step": 4271 }, { "epoch": 0.10969298097801229, "grad_norm": 0.83203125, "learning_rate": 0.00019685369600309468, "loss": 0.9918, "step": 4272 }, { "epoch": 0.10971865817393411, "grad_norm": 0.8359375, "learning_rate": 0.0001968525849017856, "loss": 1.1647, "step": 4273 }, { "epoch": 0.10974433536985594, "grad_norm": 0.85546875, "learning_rate": 0.00019685147360745813, "loss": 1.0358, "step": 4274 }, { "epoch": 0.10977001256577776, "grad_norm": 0.8359375, "learning_rate": 0.00019685036212011437, "loss": 1.0561, "step": 4275 }, { "epoch": 0.10979568976169958, "grad_norm": 0.85546875, "learning_rate": 0.0001968492504397566, "loss": 1.1177, "step": 4276 }, { "epoch": 0.10982136695762139, "grad_norm": 0.8671875, "learning_rate": 0.00019684813856638705, "loss": 1.0948, "step": 4277 }, { "epoch": 0.10984704415354321, "grad_norm": 0.84765625, "learning_rate": 0.00019684702650000786, "loss": 1.1372, "step": 4278 }, { "epoch": 0.10987272134946503, "grad_norm": 0.83984375, "learning_rate": 0.00019684591424062132, "loss": 1.0938, "step": 4279 }, { "epoch": 0.10989839854538686, "grad_norm": 0.796875, "learning_rate": 0.00019684480178822962, "loss": 1.1892, "step": 4280 }, { "epoch": 0.10992407574130868, "grad_norm": 0.8046875, "learning_rate": 0.00019684368914283496, "loss": 1.0607, "step": 4281 }, { "epoch": 0.10994975293723049, "grad_norm": 0.87109375, "learning_rate": 0.00019684257630443958, "loss": 1.1411, "step": 4282 }, { "epoch": 0.10997543013315231, "grad_norm": 0.80859375, "learning_rate": 0.00019684146327304572, "loss": 1.1025, "step": 4283 }, { "epoch": 0.11000110732907413, "grad_norm": 0.8203125, "learning_rate": 0.00019684035004865553, "loss": 1.0764, "step": 4284 }, { "epoch": 0.11002678452499595, "grad_norm": 0.82421875, "learning_rate": 0.0001968392366312713, "loss": 1.2267, "step": 4285 }, { "epoch": 0.11005246172091776, "grad_norm": 0.7890625, "learning_rate": 0.00019683812302089522, "loss": 1.0203, "step": 4286 }, { "epoch": 0.11007813891683958, "grad_norm": 0.88671875, "learning_rate": 0.0001968370092175295, "loss": 1.2115, "step": 4287 }, { "epoch": 0.1101038161127614, "grad_norm": 0.96875, "learning_rate": 0.00019683589522117635, "loss": 1.1495, "step": 4288 }, { "epoch": 0.11012949330868323, "grad_norm": 0.9375, "learning_rate": 0.00019683478103183803, "loss": 1.2718, "step": 4289 }, { "epoch": 0.11015517050460505, "grad_norm": 0.84375, "learning_rate": 0.00019683366664951675, "loss": 1.2033, "step": 4290 }, { "epoch": 0.11018084770052686, "grad_norm": 0.890625, "learning_rate": 0.0001968325520742147, "loss": 1.1354, "step": 4291 }, { "epoch": 0.11020652489644868, "grad_norm": 0.8984375, "learning_rate": 0.00019683143730593413, "loss": 1.1274, "step": 4292 }, { "epoch": 0.1102322020923705, "grad_norm": 1.015625, "learning_rate": 0.00019683032234467723, "loss": 1.0337, "step": 4293 }, { "epoch": 0.11025787928829232, "grad_norm": 0.77734375, "learning_rate": 0.00019682920719044626, "loss": 1.0921, "step": 4294 }, { "epoch": 0.11028355648421415, "grad_norm": 0.90234375, "learning_rate": 0.00019682809184324343, "loss": 1.0738, "step": 4295 }, { "epoch": 0.11030923368013595, "grad_norm": 0.890625, "learning_rate": 0.00019682697630307096, "loss": 1.192, "step": 4296 }, { "epoch": 0.11033491087605778, "grad_norm": 0.8671875, "learning_rate": 0.00019682586056993107, "loss": 1.096, "step": 4297 }, { "epoch": 0.1103605880719796, "grad_norm": 0.90234375, "learning_rate": 0.00019682474464382597, "loss": 1.0653, "step": 4298 }, { "epoch": 0.11038626526790142, "grad_norm": 0.89453125, "learning_rate": 0.0001968236285247579, "loss": 1.2672, "step": 4299 }, { "epoch": 0.11041194246382324, "grad_norm": 0.90234375, "learning_rate": 0.0001968225122127291, "loss": 1.2925, "step": 4300 }, { "epoch": 0.11043761965974505, "grad_norm": 0.80078125, "learning_rate": 0.0001968213957077418, "loss": 0.9333, "step": 4301 }, { "epoch": 0.11046329685566687, "grad_norm": 0.75390625, "learning_rate": 0.00019682027900979818, "loss": 1.155, "step": 4302 }, { "epoch": 0.1104889740515887, "grad_norm": 0.8046875, "learning_rate": 0.00019681916211890049, "loss": 1.029, "step": 4303 }, { "epoch": 0.11051465124751052, "grad_norm": 0.87890625, "learning_rate": 0.00019681804503505096, "loss": 1.2651, "step": 4304 }, { "epoch": 0.11054032844343234, "grad_norm": 0.83984375, "learning_rate": 0.0001968169277582518, "loss": 0.922, "step": 4305 }, { "epoch": 0.11056600563935415, "grad_norm": 0.90234375, "learning_rate": 0.00019681581028850527, "loss": 1.2378, "step": 4306 }, { "epoch": 0.11059168283527597, "grad_norm": 0.89453125, "learning_rate": 0.00019681469262581354, "loss": 0.8911, "step": 4307 }, { "epoch": 0.11061736003119779, "grad_norm": 0.8984375, "learning_rate": 0.0001968135747701789, "loss": 1.2841, "step": 4308 }, { "epoch": 0.11064303722711961, "grad_norm": 0.890625, "learning_rate": 0.00019681245672160356, "loss": 0.9897, "step": 4309 }, { "epoch": 0.11066871442304144, "grad_norm": 0.93359375, "learning_rate": 0.00019681133848008974, "loss": 1.1518, "step": 4310 }, { "epoch": 0.11069439161896324, "grad_norm": 0.8359375, "learning_rate": 0.00019681022004563965, "loss": 1.1558, "step": 4311 }, { "epoch": 0.11072006881488507, "grad_norm": 0.859375, "learning_rate": 0.0001968091014182555, "loss": 1.1975, "step": 4312 }, { "epoch": 0.11074574601080689, "grad_norm": 0.87890625, "learning_rate": 0.00019680798259793963, "loss": 1.1078, "step": 4313 }, { "epoch": 0.11077142320672871, "grad_norm": 0.89453125, "learning_rate": 0.00019680686358469415, "loss": 0.9968, "step": 4314 }, { "epoch": 0.11079710040265053, "grad_norm": 0.8671875, "learning_rate": 0.00019680574437852134, "loss": 1.0437, "step": 4315 }, { "epoch": 0.11082277759857234, "grad_norm": 0.92578125, "learning_rate": 0.00019680462497942343, "loss": 1.2728, "step": 4316 }, { "epoch": 0.11084845479449416, "grad_norm": 0.8359375, "learning_rate": 0.00019680350538740267, "loss": 1.1297, "step": 4317 }, { "epoch": 0.11087413199041599, "grad_norm": 1.0390625, "learning_rate": 0.00019680238560246122, "loss": 1.1562, "step": 4318 }, { "epoch": 0.11089980918633781, "grad_norm": 0.88671875, "learning_rate": 0.0001968012656246014, "loss": 1.0835, "step": 4319 }, { "epoch": 0.11092548638225963, "grad_norm": 0.83984375, "learning_rate": 0.0001968001454538254, "loss": 1.2329, "step": 4320 }, { "epoch": 0.11095116357818144, "grad_norm": 1.1015625, "learning_rate": 0.00019679902509013542, "loss": 1.0764, "step": 4321 }, { "epoch": 0.11097684077410326, "grad_norm": 0.84375, "learning_rate": 0.00019679790453353376, "loss": 1.2063, "step": 4322 }, { "epoch": 0.11100251797002508, "grad_norm": 0.89453125, "learning_rate": 0.0001967967837840226, "loss": 1.0785, "step": 4323 }, { "epoch": 0.1110281951659469, "grad_norm": 0.8671875, "learning_rate": 0.0001967956628416042, "loss": 1.2346, "step": 4324 }, { "epoch": 0.11105387236186873, "grad_norm": 0.84765625, "learning_rate": 0.0001967945417062808, "loss": 1.1748, "step": 4325 }, { "epoch": 0.11107954955779054, "grad_norm": 0.84765625, "learning_rate": 0.0001967934203780546, "loss": 1.1521, "step": 4326 }, { "epoch": 0.11110522675371236, "grad_norm": 0.75390625, "learning_rate": 0.00019679229885692786, "loss": 1.0934, "step": 4327 }, { "epoch": 0.11113090394963418, "grad_norm": 0.85546875, "learning_rate": 0.00019679117714290284, "loss": 1.0805, "step": 4328 }, { "epoch": 0.111156581145556, "grad_norm": 0.85546875, "learning_rate": 0.00019679005523598174, "loss": 1.116, "step": 4329 }, { "epoch": 0.11118225834147782, "grad_norm": 0.9140625, "learning_rate": 0.00019678893313616677, "loss": 1.0481, "step": 4330 }, { "epoch": 0.11120793553739963, "grad_norm": 0.8671875, "learning_rate": 0.00019678781084346022, "loss": 1.1977, "step": 4331 }, { "epoch": 0.11123361273332145, "grad_norm": 0.8203125, "learning_rate": 0.00019678668835786432, "loss": 1.1401, "step": 4332 }, { "epoch": 0.11125928992924328, "grad_norm": 0.85546875, "learning_rate": 0.00019678556567938123, "loss": 1.2352, "step": 4333 }, { "epoch": 0.1112849671251651, "grad_norm": 1.0, "learning_rate": 0.00019678444280801333, "loss": 1.0979, "step": 4334 }, { "epoch": 0.11131064432108692, "grad_norm": 0.83203125, "learning_rate": 0.00019678331974376273, "loss": 1.0854, "step": 4335 }, { "epoch": 0.11133632151700873, "grad_norm": 0.921875, "learning_rate": 0.00019678219648663173, "loss": 1.0301, "step": 4336 }, { "epoch": 0.11136199871293055, "grad_norm": 0.86328125, "learning_rate": 0.00019678107303662255, "loss": 1.1317, "step": 4337 }, { "epoch": 0.11138767590885237, "grad_norm": 0.88671875, "learning_rate": 0.00019677994939373746, "loss": 1.2101, "step": 4338 }, { "epoch": 0.1114133531047742, "grad_norm": 0.859375, "learning_rate": 0.00019677882555797863, "loss": 1.0731, "step": 4339 }, { "epoch": 0.11143903030069602, "grad_norm": 0.81640625, "learning_rate": 0.00019677770152934837, "loss": 1.1332, "step": 4340 }, { "epoch": 0.11146470749661783, "grad_norm": 0.89453125, "learning_rate": 0.0001967765773078489, "loss": 1.1881, "step": 4341 }, { "epoch": 0.11149038469253965, "grad_norm": 1.0234375, "learning_rate": 0.00019677545289348243, "loss": 1.2111, "step": 4342 }, { "epoch": 0.11151606188846147, "grad_norm": 0.984375, "learning_rate": 0.00019677432828625125, "loss": 1.1788, "step": 4343 }, { "epoch": 0.11154173908438329, "grad_norm": 0.90234375, "learning_rate": 0.00019677320348615755, "loss": 1.1187, "step": 4344 }, { "epoch": 0.11156741628030512, "grad_norm": 0.83984375, "learning_rate": 0.00019677207849320361, "loss": 1.0968, "step": 4345 }, { "epoch": 0.11159309347622692, "grad_norm": 0.89453125, "learning_rate": 0.00019677095330739166, "loss": 0.9367, "step": 4346 }, { "epoch": 0.11161877067214875, "grad_norm": 0.78515625, "learning_rate": 0.00019676982792872394, "loss": 1.1507, "step": 4347 }, { "epoch": 0.11164444786807057, "grad_norm": 0.83984375, "learning_rate": 0.0001967687023572027, "loss": 1.0224, "step": 4348 }, { "epoch": 0.11167012506399239, "grad_norm": 0.87890625, "learning_rate": 0.00019676757659283016, "loss": 1.2165, "step": 4349 }, { "epoch": 0.11169580225991421, "grad_norm": 0.86328125, "learning_rate": 0.00019676645063560857, "loss": 1.1883, "step": 4350 }, { "epoch": 0.11172147945583602, "grad_norm": 0.84765625, "learning_rate": 0.00019676532448554022, "loss": 1.1294, "step": 4351 }, { "epoch": 0.11174715665175784, "grad_norm": 0.84765625, "learning_rate": 0.0001967641981426273, "loss": 1.1722, "step": 4352 }, { "epoch": 0.11177283384767966, "grad_norm": 0.93359375, "learning_rate": 0.0001967630716068721, "loss": 1.1263, "step": 4353 }, { "epoch": 0.11179851104360149, "grad_norm": 1.0390625, "learning_rate": 0.0001967619448782768, "loss": 1.066, "step": 4354 }, { "epoch": 0.11182418823952331, "grad_norm": 1.265625, "learning_rate": 0.0001967608179568437, "loss": 1.1096, "step": 4355 }, { "epoch": 0.11184986543544512, "grad_norm": 0.890625, "learning_rate": 0.000196759690842575, "loss": 1.3427, "step": 4356 }, { "epoch": 0.11187554263136694, "grad_norm": 0.85546875, "learning_rate": 0.00019675856353547304, "loss": 1.1269, "step": 4357 }, { "epoch": 0.11190121982728876, "grad_norm": 0.859375, "learning_rate": 0.00019675743603553996, "loss": 1.0286, "step": 4358 }, { "epoch": 0.11192689702321058, "grad_norm": 0.8515625, "learning_rate": 0.00019675630834277804, "loss": 1.085, "step": 4359 }, { "epoch": 0.1119525742191324, "grad_norm": 1.0, "learning_rate": 0.00019675518045718957, "loss": 1.2142, "step": 4360 }, { "epoch": 0.11197825141505421, "grad_norm": 0.87890625, "learning_rate": 0.00019675405237877677, "loss": 0.9917, "step": 4361 }, { "epoch": 0.11200392861097604, "grad_norm": 0.78125, "learning_rate": 0.00019675292410754186, "loss": 1.1529, "step": 4362 }, { "epoch": 0.11202960580689786, "grad_norm": 0.875, "learning_rate": 0.00019675179564348713, "loss": 1.2478, "step": 4363 }, { "epoch": 0.11205528300281968, "grad_norm": 0.83203125, "learning_rate": 0.00019675066698661477, "loss": 1.0844, "step": 4364 }, { "epoch": 0.1120809601987415, "grad_norm": 0.828125, "learning_rate": 0.0001967495381369271, "loss": 1.1216, "step": 4365 }, { "epoch": 0.11210663739466331, "grad_norm": 0.80078125, "learning_rate": 0.00019674840909442637, "loss": 1.0905, "step": 4366 }, { "epoch": 0.11213231459058513, "grad_norm": 0.88671875, "learning_rate": 0.00019674727985911474, "loss": 1.2591, "step": 4367 }, { "epoch": 0.11215799178650696, "grad_norm": 0.91796875, "learning_rate": 0.00019674615043099457, "loss": 1.1337, "step": 4368 }, { "epoch": 0.11218366898242878, "grad_norm": 0.98828125, "learning_rate": 0.00019674502081006804, "loss": 1.3088, "step": 4369 }, { "epoch": 0.1122093461783506, "grad_norm": 0.953125, "learning_rate": 0.0001967438909963374, "loss": 1.2213, "step": 4370 }, { "epoch": 0.11223502337427241, "grad_norm": 0.890625, "learning_rate": 0.00019674276098980497, "loss": 1.1936, "step": 4371 }, { "epoch": 0.11226070057019423, "grad_norm": 0.93359375, "learning_rate": 0.00019674163079047295, "loss": 1.0259, "step": 4372 }, { "epoch": 0.11228637776611605, "grad_norm": 0.8125, "learning_rate": 0.00019674050039834357, "loss": 1.1096, "step": 4373 }, { "epoch": 0.11231205496203787, "grad_norm": 0.92578125, "learning_rate": 0.00019673936981341912, "loss": 1.1379, "step": 4374 }, { "epoch": 0.1123377321579597, "grad_norm": 0.93359375, "learning_rate": 0.00019673823903570186, "loss": 1.0037, "step": 4375 }, { "epoch": 0.1123634093538815, "grad_norm": 0.86328125, "learning_rate": 0.000196737108065194, "loss": 1.1866, "step": 4376 }, { "epoch": 0.11238908654980333, "grad_norm": 0.79296875, "learning_rate": 0.00019673597690189786, "loss": 1.1614, "step": 4377 }, { "epoch": 0.11241476374572515, "grad_norm": 0.8828125, "learning_rate": 0.0001967348455458156, "loss": 1.0223, "step": 4378 }, { "epoch": 0.11244044094164697, "grad_norm": 0.875, "learning_rate": 0.00019673371399694958, "loss": 1.1668, "step": 4379 }, { "epoch": 0.1124661181375688, "grad_norm": 0.91015625, "learning_rate": 0.00019673258225530197, "loss": 1.0138, "step": 4380 }, { "epoch": 0.1124917953334906, "grad_norm": 0.890625, "learning_rate": 0.00019673145032087508, "loss": 1.101, "step": 4381 }, { "epoch": 0.11251747252941242, "grad_norm": 0.9453125, "learning_rate": 0.00019673031819367117, "loss": 1.1531, "step": 4382 }, { "epoch": 0.11254314972533425, "grad_norm": 0.828125, "learning_rate": 0.00019672918587369242, "loss": 1.1534, "step": 4383 }, { "epoch": 0.11256882692125607, "grad_norm": 0.8046875, "learning_rate": 0.0001967280533609412, "loss": 1.1434, "step": 4384 }, { "epoch": 0.11259450411717789, "grad_norm": 0.86328125, "learning_rate": 0.00019672692065541967, "loss": 1.1098, "step": 4385 }, { "epoch": 0.1126201813130997, "grad_norm": 0.8046875, "learning_rate": 0.00019672578775713014, "loss": 1.1106, "step": 4386 }, { "epoch": 0.11264585850902152, "grad_norm": 0.86328125, "learning_rate": 0.00019672465466607483, "loss": 1.3226, "step": 4387 }, { "epoch": 0.11267153570494334, "grad_norm": 0.87109375, "learning_rate": 0.00019672352138225605, "loss": 1.2213, "step": 4388 }, { "epoch": 0.11269721290086517, "grad_norm": 0.91796875, "learning_rate": 0.000196722387905676, "loss": 1.1093, "step": 4389 }, { "epoch": 0.11272289009678697, "grad_norm": 1.0234375, "learning_rate": 0.000196721254236337, "loss": 1.1128, "step": 4390 }, { "epoch": 0.1127485672927088, "grad_norm": 0.83984375, "learning_rate": 0.00019672012037424126, "loss": 1.1407, "step": 4391 }, { "epoch": 0.11277424448863062, "grad_norm": 0.859375, "learning_rate": 0.00019671898631939104, "loss": 1.0139, "step": 4392 }, { "epoch": 0.11279992168455244, "grad_norm": 0.859375, "learning_rate": 0.00019671785207178862, "loss": 1.1739, "step": 4393 }, { "epoch": 0.11282559888047426, "grad_norm": 0.7578125, "learning_rate": 0.0001967167176314363, "loss": 0.9689, "step": 4394 }, { "epoch": 0.11285127607639607, "grad_norm": 0.859375, "learning_rate": 0.00019671558299833627, "loss": 1.2206, "step": 4395 }, { "epoch": 0.11287695327231789, "grad_norm": 0.8046875, "learning_rate": 0.0001967144481724908, "loss": 1.1817, "step": 4396 }, { "epoch": 0.11290263046823971, "grad_norm": 0.83203125, "learning_rate": 0.0001967133131539022, "loss": 1.0623, "step": 4397 }, { "epoch": 0.11292830766416154, "grad_norm": 0.7734375, "learning_rate": 0.0001967121779425727, "loss": 0.9711, "step": 4398 }, { "epoch": 0.11295398486008336, "grad_norm": 0.77734375, "learning_rate": 0.00019671104253850453, "loss": 1.1326, "step": 4399 }, { "epoch": 0.11297966205600517, "grad_norm": 0.86328125, "learning_rate": 0.00019670990694170003, "loss": 1.26, "step": 4400 }, { "epoch": 0.11300533925192699, "grad_norm": 0.8671875, "learning_rate": 0.0001967087711521614, "loss": 1.101, "step": 4401 }, { "epoch": 0.11303101644784881, "grad_norm": 0.8671875, "learning_rate": 0.00019670763516989095, "loss": 1.1738, "step": 4402 }, { "epoch": 0.11305669364377063, "grad_norm": 0.90625, "learning_rate": 0.0001967064989948909, "loss": 1.0968, "step": 4403 }, { "epoch": 0.11308237083969246, "grad_norm": 0.8671875, "learning_rate": 0.00019670536262716352, "loss": 1.2035, "step": 4404 }, { "epoch": 0.11310804803561426, "grad_norm": 0.8515625, "learning_rate": 0.0001967042260667111, "loss": 1.2, "step": 4405 }, { "epoch": 0.11313372523153609, "grad_norm": 1.2578125, "learning_rate": 0.0001967030893135359, "loss": 1.1326, "step": 4406 }, { "epoch": 0.11315940242745791, "grad_norm": 0.875, "learning_rate": 0.00019670195236764018, "loss": 1.0324, "step": 4407 }, { "epoch": 0.11318507962337973, "grad_norm": 0.88671875, "learning_rate": 0.0001967008152290262, "loss": 1.1243, "step": 4408 }, { "epoch": 0.11321075681930155, "grad_norm": 0.92578125, "learning_rate": 0.0001966996778976962, "loss": 1.2886, "step": 4409 }, { "epoch": 0.11323643401522336, "grad_norm": 0.9140625, "learning_rate": 0.00019669854037365251, "loss": 1.3565, "step": 4410 }, { "epoch": 0.11326211121114518, "grad_norm": 0.8515625, "learning_rate": 0.00019669740265689737, "loss": 1.0922, "step": 4411 }, { "epoch": 0.113287788407067, "grad_norm": 0.8828125, "learning_rate": 0.00019669626474743304, "loss": 1.1928, "step": 4412 }, { "epoch": 0.11331346560298883, "grad_norm": 0.91796875, "learning_rate": 0.00019669512664526178, "loss": 1.0842, "step": 4413 }, { "epoch": 0.11333914279891065, "grad_norm": 0.80859375, "learning_rate": 0.00019669398835038584, "loss": 1.1473, "step": 4414 }, { "epoch": 0.11336481999483246, "grad_norm": 0.890625, "learning_rate": 0.00019669284986280754, "loss": 1.072, "step": 4415 }, { "epoch": 0.11339049719075428, "grad_norm": 0.90234375, "learning_rate": 0.00019669171118252913, "loss": 1.0855, "step": 4416 }, { "epoch": 0.1134161743866761, "grad_norm": 0.84765625, "learning_rate": 0.00019669057230955285, "loss": 1.1206, "step": 4417 }, { "epoch": 0.11344185158259792, "grad_norm": 0.90234375, "learning_rate": 0.00019668943324388104, "loss": 1.1287, "step": 4418 }, { "epoch": 0.11346752877851975, "grad_norm": 0.96484375, "learning_rate": 0.00019668829398551587, "loss": 0.9827, "step": 4419 }, { "epoch": 0.11349320597444156, "grad_norm": 0.84375, "learning_rate": 0.0001966871545344597, "loss": 1.1079, "step": 4420 }, { "epoch": 0.11351888317036338, "grad_norm": 1.1328125, "learning_rate": 0.00019668601489071475, "loss": 1.1244, "step": 4421 }, { "epoch": 0.1135445603662852, "grad_norm": 0.80078125, "learning_rate": 0.0001966848750542833, "loss": 1.0181, "step": 4422 }, { "epoch": 0.11357023756220702, "grad_norm": 0.8125, "learning_rate": 0.00019668373502516767, "loss": 1.0085, "step": 4423 }, { "epoch": 0.11359591475812884, "grad_norm": 0.8203125, "learning_rate": 0.00019668259480337002, "loss": 1.2269, "step": 4424 }, { "epoch": 0.11362159195405065, "grad_norm": 0.98828125, "learning_rate": 0.00019668145438889273, "loss": 1.4377, "step": 4425 }, { "epoch": 0.11364726914997247, "grad_norm": 0.90234375, "learning_rate": 0.00019668031378173803, "loss": 1.0735, "step": 4426 }, { "epoch": 0.1136729463458943, "grad_norm": 0.9921875, "learning_rate": 0.0001966791729819082, "loss": 1.3712, "step": 4427 }, { "epoch": 0.11369862354181612, "grad_norm": 0.859375, "learning_rate": 0.00019667803198940553, "loss": 0.9928, "step": 4428 }, { "epoch": 0.11372430073773794, "grad_norm": 0.87109375, "learning_rate": 0.00019667689080423223, "loss": 1.2855, "step": 4429 }, { "epoch": 0.11374997793365975, "grad_norm": 0.9609375, "learning_rate": 0.00019667574942639065, "loss": 1.0825, "step": 4430 }, { "epoch": 0.11377565512958157, "grad_norm": 0.8984375, "learning_rate": 0.00019667460785588304, "loss": 1.0747, "step": 4431 }, { "epoch": 0.1138013323255034, "grad_norm": 0.85546875, "learning_rate": 0.00019667346609271165, "loss": 1.0347, "step": 4432 }, { "epoch": 0.11382700952142522, "grad_norm": 0.94140625, "learning_rate": 0.0001966723241368788, "loss": 1.2007, "step": 4433 }, { "epoch": 0.11385268671734704, "grad_norm": 0.8984375, "learning_rate": 0.00019667118198838672, "loss": 1.0939, "step": 4434 }, { "epoch": 0.11387836391326885, "grad_norm": 0.8046875, "learning_rate": 0.00019667003964723772, "loss": 1.1073, "step": 4435 }, { "epoch": 0.11390404110919067, "grad_norm": 0.953125, "learning_rate": 0.00019666889711343407, "loss": 1.029, "step": 4436 }, { "epoch": 0.11392971830511249, "grad_norm": 0.98046875, "learning_rate": 0.000196667754386978, "loss": 1.1443, "step": 4437 }, { "epoch": 0.11395539550103431, "grad_norm": 0.82421875, "learning_rate": 0.00019666661146787188, "loss": 1.0515, "step": 4438 }, { "epoch": 0.11398107269695613, "grad_norm": 1.015625, "learning_rate": 0.0001966654683561179, "loss": 1.2751, "step": 4439 }, { "epoch": 0.11400674989287794, "grad_norm": 1.1171875, "learning_rate": 0.00019666432505171836, "loss": 1.1691, "step": 4440 }, { "epoch": 0.11403242708879976, "grad_norm": 0.86328125, "learning_rate": 0.00019666318155467556, "loss": 1.1901, "step": 4441 }, { "epoch": 0.11405810428472159, "grad_norm": 0.87890625, "learning_rate": 0.0001966620378649918, "loss": 0.9891, "step": 4442 }, { "epoch": 0.11408378148064341, "grad_norm": 0.83984375, "learning_rate": 0.00019666089398266928, "loss": 1.0345, "step": 4443 }, { "epoch": 0.11410945867656523, "grad_norm": 0.8984375, "learning_rate": 0.00019665974990771038, "loss": 1.0991, "step": 4444 }, { "epoch": 0.11413513587248704, "grad_norm": 0.91796875, "learning_rate": 0.00019665860564011732, "loss": 1.3314, "step": 4445 }, { "epoch": 0.11416081306840886, "grad_norm": 0.9140625, "learning_rate": 0.00019665746117989235, "loss": 1.1289, "step": 4446 }, { "epoch": 0.11418649026433068, "grad_norm": 0.79296875, "learning_rate": 0.00019665631652703784, "loss": 1.1266, "step": 4447 }, { "epoch": 0.1142121674602525, "grad_norm": 0.8671875, "learning_rate": 0.00019665517168155602, "loss": 1.0861, "step": 4448 }, { "epoch": 0.11423784465617433, "grad_norm": 1.0390625, "learning_rate": 0.00019665402664344916, "loss": 1.401, "step": 4449 }, { "epoch": 0.11426352185209614, "grad_norm": 0.921875, "learning_rate": 0.00019665288141271954, "loss": 1.0166, "step": 4450 }, { "epoch": 0.11428919904801796, "grad_norm": 0.82421875, "learning_rate": 0.0001966517359893695, "loss": 1.0911, "step": 4451 }, { "epoch": 0.11431487624393978, "grad_norm": 0.921875, "learning_rate": 0.00019665059037340124, "loss": 1.0896, "step": 4452 }, { "epoch": 0.1143405534398616, "grad_norm": 0.9140625, "learning_rate": 0.00019664944456481708, "loss": 1.2043, "step": 4453 }, { "epoch": 0.11436623063578343, "grad_norm": 0.91796875, "learning_rate": 0.00019664829856361934, "loss": 1.3098, "step": 4454 }, { "epoch": 0.11439190783170523, "grad_norm": 0.8828125, "learning_rate": 0.00019664715236981023, "loss": 1.096, "step": 4455 }, { "epoch": 0.11441758502762706, "grad_norm": 0.90234375, "learning_rate": 0.00019664600598339211, "loss": 1.1094, "step": 4456 }, { "epoch": 0.11444326222354888, "grad_norm": 0.90234375, "learning_rate": 0.00019664485940436722, "loss": 1.2067, "step": 4457 }, { "epoch": 0.1144689394194707, "grad_norm": 0.8828125, "learning_rate": 0.00019664371263273785, "loss": 1.2284, "step": 4458 }, { "epoch": 0.11449461661539252, "grad_norm": 0.94140625, "learning_rate": 0.0001966425656685063, "loss": 1.3295, "step": 4459 }, { "epoch": 0.11452029381131433, "grad_norm": 0.87890625, "learning_rate": 0.00019664141851167486, "loss": 1.2329, "step": 4460 }, { "epoch": 0.11454597100723615, "grad_norm": 0.875, "learning_rate": 0.00019664027116224578, "loss": 1.1829, "step": 4461 }, { "epoch": 0.11457164820315797, "grad_norm": 0.87109375, "learning_rate": 0.00019663912362022137, "loss": 0.9928, "step": 4462 }, { "epoch": 0.1145973253990798, "grad_norm": 0.97265625, "learning_rate": 0.0001966379758856039, "loss": 1.1516, "step": 4463 }, { "epoch": 0.11462300259500162, "grad_norm": 1.0703125, "learning_rate": 0.0001966368279583957, "loss": 1.1388, "step": 4464 }, { "epoch": 0.11464867979092343, "grad_norm": 0.8359375, "learning_rate": 0.00019663567983859903, "loss": 0.9383, "step": 4465 }, { "epoch": 0.11467435698684525, "grad_norm": 0.84765625, "learning_rate": 0.0001966345315262162, "loss": 1.3203, "step": 4466 }, { "epoch": 0.11470003418276707, "grad_norm": 0.8359375, "learning_rate": 0.00019663338302124944, "loss": 1.0794, "step": 4467 }, { "epoch": 0.1147257113786889, "grad_norm": 0.85546875, "learning_rate": 0.0001966322343237011, "loss": 1.0802, "step": 4468 }, { "epoch": 0.11475138857461072, "grad_norm": 0.91015625, "learning_rate": 0.0001966310854335734, "loss": 1.0863, "step": 4469 }, { "epoch": 0.11477706577053252, "grad_norm": 0.89453125, "learning_rate": 0.0001966299363508687, "loss": 1.0766, "step": 4470 }, { "epoch": 0.11480274296645435, "grad_norm": 0.85546875, "learning_rate": 0.00019662878707558926, "loss": 1.0904, "step": 4471 }, { "epoch": 0.11482842016237617, "grad_norm": 0.9921875, "learning_rate": 0.00019662763760773741, "loss": 1.0656, "step": 4472 }, { "epoch": 0.11485409735829799, "grad_norm": 0.89453125, "learning_rate": 0.0001966264879473154, "loss": 1.1398, "step": 4473 }, { "epoch": 0.11487977455421981, "grad_norm": 0.87890625, "learning_rate": 0.0001966253380943255, "loss": 1.212, "step": 4474 }, { "epoch": 0.11490545175014162, "grad_norm": 0.921875, "learning_rate": 0.00019662418804877006, "loss": 1.1083, "step": 4475 }, { "epoch": 0.11493112894606344, "grad_norm": 0.8828125, "learning_rate": 0.00019662303781065133, "loss": 1.078, "step": 4476 }, { "epoch": 0.11495680614198527, "grad_norm": 0.78125, "learning_rate": 0.00019662188737997157, "loss": 1.1017, "step": 4477 }, { "epoch": 0.11498248333790709, "grad_norm": 0.8515625, "learning_rate": 0.00019662073675673317, "loss": 1.1055, "step": 4478 }, { "epoch": 0.11500816053382891, "grad_norm": 0.84765625, "learning_rate": 0.00019661958594093836, "loss": 1.0462, "step": 4479 }, { "epoch": 0.11503383772975072, "grad_norm": 0.82421875, "learning_rate": 0.00019661843493258942, "loss": 1.1504, "step": 4480 }, { "epoch": 0.11505951492567254, "grad_norm": 0.890625, "learning_rate": 0.0001966172837316887, "loss": 1.1799, "step": 4481 }, { "epoch": 0.11508519212159436, "grad_norm": 0.89453125, "learning_rate": 0.00019661613233823845, "loss": 1.1978, "step": 4482 }, { "epoch": 0.11511086931751618, "grad_norm": 0.82421875, "learning_rate": 0.00019661498075224096, "loss": 1.0961, "step": 4483 }, { "epoch": 0.115136546513438, "grad_norm": 0.85546875, "learning_rate": 0.00019661382897369855, "loss": 1.1549, "step": 4484 }, { "epoch": 0.11516222370935982, "grad_norm": 0.9296875, "learning_rate": 0.0001966126770026135, "loss": 1.2465, "step": 4485 }, { "epoch": 0.11518790090528164, "grad_norm": 0.80078125, "learning_rate": 0.00019661152483898813, "loss": 0.987, "step": 4486 }, { "epoch": 0.11521357810120346, "grad_norm": 0.82421875, "learning_rate": 0.0001966103724828247, "loss": 1.0413, "step": 4487 }, { "epoch": 0.11523925529712528, "grad_norm": 0.8359375, "learning_rate": 0.00019660921993412553, "loss": 1.0656, "step": 4488 }, { "epoch": 0.1152649324930471, "grad_norm": 0.875, "learning_rate": 0.00019660806719289293, "loss": 1.313, "step": 4489 }, { "epoch": 0.11529060968896891, "grad_norm": 0.77734375, "learning_rate": 0.00019660691425912915, "loss": 1.0155, "step": 4490 }, { "epoch": 0.11531628688489073, "grad_norm": 0.90625, "learning_rate": 0.0001966057611328365, "loss": 1.2718, "step": 4491 }, { "epoch": 0.11534196408081256, "grad_norm": 0.8828125, "learning_rate": 0.00019660460781401734, "loss": 1.143, "step": 4492 }, { "epoch": 0.11536764127673438, "grad_norm": 0.91015625, "learning_rate": 0.0001966034543026739, "loss": 1.2992, "step": 4493 }, { "epoch": 0.11539331847265619, "grad_norm": 0.84375, "learning_rate": 0.0001966023005988085, "loss": 1.2056, "step": 4494 }, { "epoch": 0.11541899566857801, "grad_norm": 0.84765625, "learning_rate": 0.00019660114670242345, "loss": 1.0985, "step": 4495 }, { "epoch": 0.11544467286449983, "grad_norm": 0.8359375, "learning_rate": 0.00019659999261352105, "loss": 1.129, "step": 4496 }, { "epoch": 0.11547035006042165, "grad_norm": 0.85546875, "learning_rate": 0.00019659883833210358, "loss": 1.0225, "step": 4497 }, { "epoch": 0.11549602725634348, "grad_norm": 0.921875, "learning_rate": 0.00019659768385817334, "loss": 1.2011, "step": 4498 }, { "epoch": 0.11552170445226528, "grad_norm": 0.91796875, "learning_rate": 0.00019659652919173267, "loss": 1.268, "step": 4499 }, { "epoch": 0.1155473816481871, "grad_norm": 0.7890625, "learning_rate": 0.0001965953743327838, "loss": 1.0652, "step": 4500 }, { "epoch": 0.11557305884410893, "grad_norm": 0.82421875, "learning_rate": 0.00019659421928132912, "loss": 1.123, "step": 4501 }, { "epoch": 0.11559873604003075, "grad_norm": 0.86328125, "learning_rate": 0.00019659306403737084, "loss": 1.1257, "step": 4502 }, { "epoch": 0.11562441323595257, "grad_norm": 0.86328125, "learning_rate": 0.00019659190860091134, "loss": 1.131, "step": 4503 }, { "epoch": 0.11565009043187438, "grad_norm": 0.89453125, "learning_rate": 0.0001965907529719529, "loss": 1.1464, "step": 4504 }, { "epoch": 0.1156757676277962, "grad_norm": 0.875, "learning_rate": 0.00019658959715049778, "loss": 1.2847, "step": 4505 }, { "epoch": 0.11570144482371802, "grad_norm": 0.8828125, "learning_rate": 0.00019658844113654836, "loss": 1.0838, "step": 4506 }, { "epoch": 0.11572712201963985, "grad_norm": 0.8671875, "learning_rate": 0.00019658728493010685, "loss": 1.2122, "step": 4507 }, { "epoch": 0.11575279921556167, "grad_norm": 0.8203125, "learning_rate": 0.00019658612853117564, "loss": 1.2119, "step": 4508 }, { "epoch": 0.11577847641148348, "grad_norm": 0.87890625, "learning_rate": 0.000196584971939757, "loss": 1.1499, "step": 4509 }, { "epoch": 0.1158041536074053, "grad_norm": 0.83984375, "learning_rate": 0.00019658381515585323, "loss": 0.9901, "step": 4510 }, { "epoch": 0.11582983080332712, "grad_norm": 0.95703125, "learning_rate": 0.00019658265817946663, "loss": 1.092, "step": 4511 }, { "epoch": 0.11585550799924894, "grad_norm": 0.78125, "learning_rate": 0.00019658150101059952, "loss": 1.0425, "step": 4512 }, { "epoch": 0.11588118519517077, "grad_norm": 0.84375, "learning_rate": 0.00019658034364925423, "loss": 1.0898, "step": 4513 }, { "epoch": 0.11590686239109257, "grad_norm": 0.84375, "learning_rate": 0.000196579186095433, "loss": 1.1453, "step": 4514 }, { "epoch": 0.1159325395870144, "grad_norm": 0.91015625, "learning_rate": 0.0001965780283491382, "loss": 1.2162, "step": 4515 }, { "epoch": 0.11595821678293622, "grad_norm": 0.8046875, "learning_rate": 0.0001965768704103721, "loss": 1.0376, "step": 4516 }, { "epoch": 0.11598389397885804, "grad_norm": 0.80859375, "learning_rate": 0.00019657571227913706, "loss": 1.0301, "step": 4517 }, { "epoch": 0.11600957117477986, "grad_norm": 0.9140625, "learning_rate": 0.0001965745539554353, "loss": 1.2176, "step": 4518 }, { "epoch": 0.11603524837070167, "grad_norm": 0.84375, "learning_rate": 0.00019657339543926918, "loss": 1.103, "step": 4519 }, { "epoch": 0.1160609255666235, "grad_norm": 0.81640625, "learning_rate": 0.00019657223673064103, "loss": 1.2105, "step": 4520 }, { "epoch": 0.11608660276254532, "grad_norm": 0.88671875, "learning_rate": 0.00019657107782955312, "loss": 1.1352, "step": 4521 }, { "epoch": 0.11611227995846714, "grad_norm": 0.96875, "learning_rate": 0.0001965699187360078, "loss": 1.3183, "step": 4522 }, { "epoch": 0.11613795715438896, "grad_norm": 0.8359375, "learning_rate": 0.00019656875945000732, "loss": 1.2554, "step": 4523 }, { "epoch": 0.11616363435031077, "grad_norm": 0.78515625, "learning_rate": 0.00019656759997155405, "loss": 1.1059, "step": 4524 }, { "epoch": 0.11618931154623259, "grad_norm": 0.8671875, "learning_rate": 0.00019656644030065028, "loss": 1.181, "step": 4525 }, { "epoch": 0.11621498874215441, "grad_norm": 0.92578125, "learning_rate": 0.0001965652804372983, "loss": 1.1281, "step": 4526 }, { "epoch": 0.11624066593807623, "grad_norm": 0.890625, "learning_rate": 0.00019656412038150043, "loss": 1.1935, "step": 4527 }, { "epoch": 0.11626634313399806, "grad_norm": 0.81640625, "learning_rate": 0.000196562960133259, "loss": 1.1323, "step": 4528 }, { "epoch": 0.11629202032991987, "grad_norm": 0.83203125, "learning_rate": 0.00019656179969257634, "loss": 1.2133, "step": 4529 }, { "epoch": 0.11631769752584169, "grad_norm": 0.8828125, "learning_rate": 0.00019656063905945472, "loss": 1.2033, "step": 4530 }, { "epoch": 0.11634337472176351, "grad_norm": 0.8359375, "learning_rate": 0.00019655947823389645, "loss": 1.3328, "step": 4531 }, { "epoch": 0.11636905191768533, "grad_norm": 0.8671875, "learning_rate": 0.00019655831721590388, "loss": 1.1042, "step": 4532 }, { "epoch": 0.11639472911360715, "grad_norm": 0.87109375, "learning_rate": 0.0001965571560054793, "loss": 1.1009, "step": 4533 }, { "epoch": 0.11642040630952896, "grad_norm": 0.83984375, "learning_rate": 0.000196555994602625, "loss": 1.094, "step": 4534 }, { "epoch": 0.11644608350545078, "grad_norm": 0.8046875, "learning_rate": 0.00019655483300734338, "loss": 1.1352, "step": 4535 }, { "epoch": 0.1164717607013726, "grad_norm": 0.7734375, "learning_rate": 0.00019655367121963668, "loss": 0.9495, "step": 4536 }, { "epoch": 0.11649743789729443, "grad_norm": 0.80078125, "learning_rate": 0.00019655250923950724, "loss": 1.1809, "step": 4537 }, { "epoch": 0.11652311509321625, "grad_norm": 0.8125, "learning_rate": 0.00019655134706695734, "loss": 1.1889, "step": 4538 }, { "epoch": 0.11654879228913806, "grad_norm": 0.88671875, "learning_rate": 0.00019655018470198936, "loss": 1.0143, "step": 4539 }, { "epoch": 0.11657446948505988, "grad_norm": 0.92578125, "learning_rate": 0.00019654902214460558, "loss": 1.3549, "step": 4540 }, { "epoch": 0.1166001466809817, "grad_norm": 0.80859375, "learning_rate": 0.0001965478593948083, "loss": 1.1045, "step": 4541 }, { "epoch": 0.11662582387690353, "grad_norm": 0.84765625, "learning_rate": 0.00019654669645259985, "loss": 1.17, "step": 4542 }, { "epoch": 0.11665150107282535, "grad_norm": 0.94140625, "learning_rate": 0.00019654553331798259, "loss": 1.2805, "step": 4543 }, { "epoch": 0.11667717826874716, "grad_norm": 0.890625, "learning_rate": 0.0001965443699909588, "loss": 1.293, "step": 4544 }, { "epoch": 0.11670285546466898, "grad_norm": 0.86328125, "learning_rate": 0.00019654320647153077, "loss": 1.1195, "step": 4545 }, { "epoch": 0.1167285326605908, "grad_norm": 0.7421875, "learning_rate": 0.00019654204275970088, "loss": 1.114, "step": 4546 }, { "epoch": 0.11675420985651262, "grad_norm": 0.90625, "learning_rate": 0.0001965408788554714, "loss": 1.1609, "step": 4547 }, { "epoch": 0.11677988705243444, "grad_norm": 0.91015625, "learning_rate": 0.00019653971475884466, "loss": 1.1413, "step": 4548 }, { "epoch": 0.11680556424835625, "grad_norm": 0.94140625, "learning_rate": 0.00019653855046982303, "loss": 1.2011, "step": 4549 }, { "epoch": 0.11683124144427808, "grad_norm": 0.87890625, "learning_rate": 0.00019653738598840876, "loss": 1.1532, "step": 4550 }, { "epoch": 0.1168569186401999, "grad_norm": 0.80078125, "learning_rate": 0.0001965362213146042, "loss": 1.0138, "step": 4551 }, { "epoch": 0.11688259583612172, "grad_norm": 0.859375, "learning_rate": 0.0001965350564484117, "loss": 1.1257, "step": 4552 }, { "epoch": 0.11690827303204354, "grad_norm": 0.83984375, "learning_rate": 0.00019653389138983348, "loss": 1.2371, "step": 4553 }, { "epoch": 0.11693395022796535, "grad_norm": 0.8203125, "learning_rate": 0.00019653272613887198, "loss": 1.1708, "step": 4554 }, { "epoch": 0.11695962742388717, "grad_norm": 0.87109375, "learning_rate": 0.00019653156069552948, "loss": 1.2279, "step": 4555 }, { "epoch": 0.116985304619809, "grad_norm": 0.8828125, "learning_rate": 0.00019653039505980832, "loss": 1.264, "step": 4556 }, { "epoch": 0.11701098181573082, "grad_norm": 0.8515625, "learning_rate": 0.00019652922923171076, "loss": 1.1824, "step": 4557 }, { "epoch": 0.11703665901165264, "grad_norm": 0.84765625, "learning_rate": 0.0001965280632112392, "loss": 1.0857, "step": 4558 }, { "epoch": 0.11706233620757445, "grad_norm": 0.90625, "learning_rate": 0.0001965268969983959, "loss": 1.3233, "step": 4559 }, { "epoch": 0.11708801340349627, "grad_norm": 0.9921875, "learning_rate": 0.00019652573059318325, "loss": 1.1407, "step": 4560 }, { "epoch": 0.11711369059941809, "grad_norm": 0.890625, "learning_rate": 0.0001965245639956035, "loss": 1.0737, "step": 4561 }, { "epoch": 0.11713936779533991, "grad_norm": 0.90234375, "learning_rate": 0.00019652339720565903, "loss": 1.1253, "step": 4562 }, { "epoch": 0.11716504499126174, "grad_norm": 0.8515625, "learning_rate": 0.00019652223022335216, "loss": 0.9734, "step": 4563 }, { "epoch": 0.11719072218718354, "grad_norm": 0.9140625, "learning_rate": 0.00019652106304868518, "loss": 1.3048, "step": 4564 }, { "epoch": 0.11721639938310537, "grad_norm": 0.8828125, "learning_rate": 0.00019651989568166047, "loss": 1.2064, "step": 4565 }, { "epoch": 0.11724207657902719, "grad_norm": 0.80078125, "learning_rate": 0.0001965187281222803, "loss": 1.0314, "step": 4566 }, { "epoch": 0.11726775377494901, "grad_norm": 0.83203125, "learning_rate": 0.00019651756037054703, "loss": 1.1929, "step": 4567 }, { "epoch": 0.11729343097087083, "grad_norm": 0.89453125, "learning_rate": 0.00019651639242646298, "loss": 1.2844, "step": 4568 }, { "epoch": 0.11731910816679264, "grad_norm": 0.8046875, "learning_rate": 0.00019651522429003048, "loss": 1.075, "step": 4569 }, { "epoch": 0.11734478536271446, "grad_norm": 0.8125, "learning_rate": 0.00019651405596125186, "loss": 1.2406, "step": 4570 }, { "epoch": 0.11737046255863628, "grad_norm": 0.8203125, "learning_rate": 0.00019651288744012944, "loss": 1.0709, "step": 4571 }, { "epoch": 0.11739613975455811, "grad_norm": 0.91796875, "learning_rate": 0.00019651171872666555, "loss": 1.1167, "step": 4572 }, { "epoch": 0.11742181695047993, "grad_norm": 0.85546875, "learning_rate": 0.00019651054982086253, "loss": 0.9901, "step": 4573 }, { "epoch": 0.11744749414640174, "grad_norm": 0.7578125, "learning_rate": 0.0001965093807227227, "loss": 1.0638, "step": 4574 }, { "epoch": 0.11747317134232356, "grad_norm": 0.83984375, "learning_rate": 0.0001965082114322484, "loss": 1.1583, "step": 4575 }, { "epoch": 0.11749884853824538, "grad_norm": 1.2890625, "learning_rate": 0.00019650704194944192, "loss": 1.2706, "step": 4576 }, { "epoch": 0.1175245257341672, "grad_norm": 1.296875, "learning_rate": 0.00019650587227430566, "loss": 1.1636, "step": 4577 }, { "epoch": 0.11755020293008903, "grad_norm": 0.8046875, "learning_rate": 0.00019650470240684188, "loss": 1.3368, "step": 4578 }, { "epoch": 0.11757588012601083, "grad_norm": 0.7578125, "learning_rate": 0.000196503532347053, "loss": 0.9642, "step": 4579 }, { "epoch": 0.11760155732193266, "grad_norm": 0.9140625, "learning_rate": 0.00019650236209494123, "loss": 1.2445, "step": 4580 }, { "epoch": 0.11762723451785448, "grad_norm": 0.80078125, "learning_rate": 0.00019650119165050902, "loss": 1.1156, "step": 4581 }, { "epoch": 0.1176529117137763, "grad_norm": 0.78515625, "learning_rate": 0.00019650002101375864, "loss": 0.9912, "step": 4582 }, { "epoch": 0.11767858890969812, "grad_norm": 0.88671875, "learning_rate": 0.0001964988501846924, "loss": 1.0878, "step": 4583 }, { "epoch": 0.11770426610561993, "grad_norm": 0.84375, "learning_rate": 0.00019649767916331273, "loss": 1.0455, "step": 4584 }, { "epoch": 0.11772994330154175, "grad_norm": 0.8828125, "learning_rate": 0.00019649650794962184, "loss": 1.3307, "step": 4585 }, { "epoch": 0.11775562049746358, "grad_norm": 0.82421875, "learning_rate": 0.0001964953365436222, "loss": 0.9464, "step": 4586 }, { "epoch": 0.1177812976933854, "grad_norm": 0.78125, "learning_rate": 0.00019649416494531597, "loss": 1.086, "step": 4587 }, { "epoch": 0.11780697488930722, "grad_norm": 0.90625, "learning_rate": 0.00019649299315470565, "loss": 1.2387, "step": 4588 }, { "epoch": 0.11783265208522903, "grad_norm": 0.84765625, "learning_rate": 0.0001964918211717935, "loss": 1.2032, "step": 4589 }, { "epoch": 0.11785832928115085, "grad_norm": 0.84765625, "learning_rate": 0.00019649064899658188, "loss": 1.1448, "step": 4590 }, { "epoch": 0.11788400647707267, "grad_norm": 0.8125, "learning_rate": 0.00019648947662907312, "loss": 1.1207, "step": 4591 }, { "epoch": 0.1179096836729945, "grad_norm": 0.8515625, "learning_rate": 0.0001964883040692695, "loss": 1.1616, "step": 4592 }, { "epoch": 0.11793536086891632, "grad_norm": 0.85546875, "learning_rate": 0.00019648713131717343, "loss": 1.1442, "step": 4593 }, { "epoch": 0.11796103806483813, "grad_norm": 0.8125, "learning_rate": 0.00019648595837278726, "loss": 1.2898, "step": 4594 }, { "epoch": 0.11798671526075995, "grad_norm": 0.86328125, "learning_rate": 0.00019648478523611324, "loss": 1.2665, "step": 4595 }, { "epoch": 0.11801239245668177, "grad_norm": 0.8203125, "learning_rate": 0.00019648361190715375, "loss": 1.0897, "step": 4596 }, { "epoch": 0.11803806965260359, "grad_norm": 0.87109375, "learning_rate": 0.00019648243838591117, "loss": 1.2045, "step": 4597 }, { "epoch": 0.1180637468485254, "grad_norm": 0.8828125, "learning_rate": 0.00019648126467238783, "loss": 1.2847, "step": 4598 }, { "epoch": 0.11808942404444722, "grad_norm": 0.87109375, "learning_rate": 0.000196480090766586, "loss": 1.1481, "step": 4599 }, { "epoch": 0.11811510124036904, "grad_norm": 0.87890625, "learning_rate": 0.0001964789166685081, "loss": 1.197, "step": 4600 }, { "epoch": 0.11814077843629087, "grad_norm": 0.91015625, "learning_rate": 0.0001964777423781564, "loss": 1.1816, "step": 4601 }, { "epoch": 0.11816645563221269, "grad_norm": 0.890625, "learning_rate": 0.00019647656789553329, "loss": 1.2544, "step": 4602 }, { "epoch": 0.1181921328281345, "grad_norm": 0.8515625, "learning_rate": 0.00019647539322064107, "loss": 1.1822, "step": 4603 }, { "epoch": 0.11821781002405632, "grad_norm": 0.90625, "learning_rate": 0.00019647421835348214, "loss": 1.102, "step": 4604 }, { "epoch": 0.11824348721997814, "grad_norm": 0.859375, "learning_rate": 0.00019647304329405878, "loss": 1.0123, "step": 4605 }, { "epoch": 0.11826916441589996, "grad_norm": 0.97265625, "learning_rate": 0.0001964718680423734, "loss": 1.2426, "step": 4606 }, { "epoch": 0.11829484161182179, "grad_norm": 0.953125, "learning_rate": 0.00019647069259842828, "loss": 1.19, "step": 4607 }, { "epoch": 0.1183205188077436, "grad_norm": 0.87109375, "learning_rate": 0.00019646951696222575, "loss": 1.0298, "step": 4608 }, { "epoch": 0.11834619600366542, "grad_norm": 0.78515625, "learning_rate": 0.00019646834113376822, "loss": 0.9986, "step": 4609 }, { "epoch": 0.11837187319958724, "grad_norm": 0.85546875, "learning_rate": 0.000196467165113058, "loss": 1.0611, "step": 4610 }, { "epoch": 0.11839755039550906, "grad_norm": 0.98046875, "learning_rate": 0.00019646598890009744, "loss": 1.1771, "step": 4611 }, { "epoch": 0.11842322759143088, "grad_norm": 0.88671875, "learning_rate": 0.00019646481249488887, "loss": 1.1579, "step": 4612 }, { "epoch": 0.11844890478735269, "grad_norm": 0.93359375, "learning_rate": 0.00019646363589743464, "loss": 1.124, "step": 4613 }, { "epoch": 0.11847458198327451, "grad_norm": 0.921875, "learning_rate": 0.00019646245910773707, "loss": 1.1439, "step": 4614 }, { "epoch": 0.11850025917919634, "grad_norm": 0.890625, "learning_rate": 0.00019646128212579857, "loss": 1.1502, "step": 4615 }, { "epoch": 0.11852593637511816, "grad_norm": 0.87109375, "learning_rate": 0.00019646010495162145, "loss": 0.9812, "step": 4616 }, { "epoch": 0.11855161357103998, "grad_norm": 0.875, "learning_rate": 0.00019645892758520802, "loss": 1.1556, "step": 4617 }, { "epoch": 0.11857729076696179, "grad_norm": 0.86328125, "learning_rate": 0.0001964577500265607, "loss": 1.1621, "step": 4618 }, { "epoch": 0.11860296796288361, "grad_norm": 0.79296875, "learning_rate": 0.00019645657227568178, "loss": 1.1799, "step": 4619 }, { "epoch": 0.11862864515880543, "grad_norm": 0.85546875, "learning_rate": 0.00019645539433257362, "loss": 0.9683, "step": 4620 }, { "epoch": 0.11865432235472725, "grad_norm": 0.83203125, "learning_rate": 0.00019645421619723856, "loss": 1.1017, "step": 4621 }, { "epoch": 0.11867999955064908, "grad_norm": 0.90625, "learning_rate": 0.000196453037869679, "loss": 1.197, "step": 4622 }, { "epoch": 0.11870567674657088, "grad_norm": 0.8828125, "learning_rate": 0.0001964518593498972, "loss": 0.9648, "step": 4623 }, { "epoch": 0.1187313539424927, "grad_norm": 0.875, "learning_rate": 0.00019645068063789558, "loss": 1.1391, "step": 4624 }, { "epoch": 0.11875703113841453, "grad_norm": 0.87890625, "learning_rate": 0.00019644950173367649, "loss": 1.178, "step": 4625 }, { "epoch": 0.11878270833433635, "grad_norm": 0.91015625, "learning_rate": 0.00019644832263724223, "loss": 1.1734, "step": 4626 }, { "epoch": 0.11880838553025817, "grad_norm": 0.87109375, "learning_rate": 0.00019644714334859516, "loss": 1.224, "step": 4627 }, { "epoch": 0.11883406272617998, "grad_norm": 0.84375, "learning_rate": 0.00019644596386773767, "loss": 1.1104, "step": 4628 }, { "epoch": 0.1188597399221018, "grad_norm": 0.79296875, "learning_rate": 0.00019644478419467208, "loss": 1.1939, "step": 4629 }, { "epoch": 0.11888541711802363, "grad_norm": 0.8828125, "learning_rate": 0.00019644360432940073, "loss": 1.0348, "step": 4630 }, { "epoch": 0.11891109431394545, "grad_norm": 0.8203125, "learning_rate": 0.000196442424271926, "loss": 1.2221, "step": 4631 }, { "epoch": 0.11893677150986727, "grad_norm": 0.85546875, "learning_rate": 0.00019644124402225024, "loss": 0.9793, "step": 4632 }, { "epoch": 0.11896244870578908, "grad_norm": 0.83203125, "learning_rate": 0.00019644006358037576, "loss": 1.2145, "step": 4633 }, { "epoch": 0.1189881259017109, "grad_norm": 0.9140625, "learning_rate": 0.000196438882946305, "loss": 1.1688, "step": 4634 }, { "epoch": 0.11901380309763272, "grad_norm": 0.84375, "learning_rate": 0.0001964377021200402, "loss": 0.9982, "step": 4635 }, { "epoch": 0.11903948029355454, "grad_norm": 0.90625, "learning_rate": 0.00019643652110158381, "loss": 1.298, "step": 4636 }, { "epoch": 0.11906515748947637, "grad_norm": 0.87890625, "learning_rate": 0.0001964353398909381, "loss": 1.1926, "step": 4637 }, { "epoch": 0.11909083468539818, "grad_norm": 0.8828125, "learning_rate": 0.00019643415848810553, "loss": 1.2176, "step": 4638 }, { "epoch": 0.11911651188132, "grad_norm": 0.81640625, "learning_rate": 0.00019643297689308836, "loss": 1.0444, "step": 4639 }, { "epoch": 0.11914218907724182, "grad_norm": 0.8359375, "learning_rate": 0.00019643179510588898, "loss": 1.1821, "step": 4640 }, { "epoch": 0.11916786627316364, "grad_norm": 0.953125, "learning_rate": 0.00019643061312650976, "loss": 1.3395, "step": 4641 }, { "epoch": 0.11919354346908546, "grad_norm": 0.83203125, "learning_rate": 0.00019642943095495304, "loss": 1.1629, "step": 4642 }, { "epoch": 0.11921922066500727, "grad_norm": 0.9375, "learning_rate": 0.00019642824859122113, "loss": 1.2726, "step": 4643 }, { "epoch": 0.1192448978609291, "grad_norm": 0.84375, "learning_rate": 0.00019642706603531645, "loss": 1.2212, "step": 4644 }, { "epoch": 0.11927057505685092, "grad_norm": 0.90234375, "learning_rate": 0.00019642588328724135, "loss": 1.1225, "step": 4645 }, { "epoch": 0.11929625225277274, "grad_norm": 0.8671875, "learning_rate": 0.00019642470034699817, "loss": 1.1385, "step": 4646 }, { "epoch": 0.11932192944869456, "grad_norm": 0.921875, "learning_rate": 0.00019642351721458926, "loss": 1.1913, "step": 4647 }, { "epoch": 0.11934760664461637, "grad_norm": 0.7890625, "learning_rate": 0.00019642233389001703, "loss": 1.096, "step": 4648 }, { "epoch": 0.11937328384053819, "grad_norm": 0.80859375, "learning_rate": 0.00019642115037328377, "loss": 1.1037, "step": 4649 }, { "epoch": 0.11939896103646001, "grad_norm": 0.87109375, "learning_rate": 0.00019641996666439187, "loss": 1.1913, "step": 4650 }, { "epoch": 0.11942463823238184, "grad_norm": 0.8359375, "learning_rate": 0.0001964187827633437, "loss": 1.1182, "step": 4651 }, { "epoch": 0.11945031542830366, "grad_norm": 0.90234375, "learning_rate": 0.0001964175986701416, "loss": 1.3625, "step": 4652 }, { "epoch": 0.11947599262422547, "grad_norm": 0.93359375, "learning_rate": 0.0001964164143847879, "loss": 1.0771, "step": 4653 }, { "epoch": 0.11950166982014729, "grad_norm": 0.875, "learning_rate": 0.000196415229907285, "loss": 1.1757, "step": 4654 }, { "epoch": 0.11952734701606911, "grad_norm": 0.93359375, "learning_rate": 0.00019641404523763529, "loss": 1.2092, "step": 4655 }, { "epoch": 0.11955302421199093, "grad_norm": 0.8515625, "learning_rate": 0.00019641286037584108, "loss": 1.269, "step": 4656 }, { "epoch": 0.11957870140791275, "grad_norm": 0.8828125, "learning_rate": 0.00019641167532190475, "loss": 1.2182, "step": 4657 }, { "epoch": 0.11960437860383456, "grad_norm": 0.8046875, "learning_rate": 0.00019641049007582866, "loss": 1.2165, "step": 4658 }, { "epoch": 0.11963005579975639, "grad_norm": 0.82421875, "learning_rate": 0.00019640930463761517, "loss": 1.1502, "step": 4659 }, { "epoch": 0.11965573299567821, "grad_norm": 0.77734375, "learning_rate": 0.00019640811900726665, "loss": 1.216, "step": 4660 }, { "epoch": 0.11968141019160003, "grad_norm": 0.87890625, "learning_rate": 0.00019640693318478546, "loss": 1.1196, "step": 4661 }, { "epoch": 0.11970708738752185, "grad_norm": 0.75, "learning_rate": 0.00019640574717017396, "loss": 1.0478, "step": 4662 }, { "epoch": 0.11973276458344366, "grad_norm": 0.82421875, "learning_rate": 0.00019640456096343452, "loss": 1.1307, "step": 4663 }, { "epoch": 0.11975844177936548, "grad_norm": 0.9375, "learning_rate": 0.0001964033745645695, "loss": 1.1884, "step": 4664 }, { "epoch": 0.1197841189752873, "grad_norm": 0.87890625, "learning_rate": 0.00019640218797358123, "loss": 1.1349, "step": 4665 }, { "epoch": 0.11980979617120913, "grad_norm": 1.234375, "learning_rate": 0.00019640100119047214, "loss": 1.1094, "step": 4666 }, { "epoch": 0.11983547336713095, "grad_norm": 0.890625, "learning_rate": 0.00019639981421524453, "loss": 1.1159, "step": 4667 }, { "epoch": 0.11986115056305276, "grad_norm": 0.88671875, "learning_rate": 0.00019639862704790085, "loss": 1.2426, "step": 4668 }, { "epoch": 0.11988682775897458, "grad_norm": 0.85546875, "learning_rate": 0.00019639743968844338, "loss": 1.0214, "step": 4669 }, { "epoch": 0.1199125049548964, "grad_norm": 0.859375, "learning_rate": 0.00019639625213687452, "loss": 1.0353, "step": 4670 }, { "epoch": 0.11993818215081822, "grad_norm": 0.7734375, "learning_rate": 0.00019639506439319662, "loss": 1.0138, "step": 4671 }, { "epoch": 0.11996385934674005, "grad_norm": 0.8828125, "learning_rate": 0.00019639387645741207, "loss": 1.1544, "step": 4672 }, { "epoch": 0.11998953654266185, "grad_norm": 0.8984375, "learning_rate": 0.00019639268832952325, "loss": 1.1938, "step": 4673 }, { "epoch": 0.12001521373858368, "grad_norm": 0.828125, "learning_rate": 0.00019639150000953253, "loss": 1.0858, "step": 4674 }, { "epoch": 0.1200408909345055, "grad_norm": 0.8125, "learning_rate": 0.00019639031149744222, "loss": 1.0626, "step": 4675 }, { "epoch": 0.12006656813042732, "grad_norm": 0.8203125, "learning_rate": 0.00019638912279325475, "loss": 1.0218, "step": 4676 }, { "epoch": 0.12009224532634914, "grad_norm": 1.0625, "learning_rate": 0.00019638793389697244, "loss": 1.4618, "step": 4677 }, { "epoch": 0.12011792252227095, "grad_norm": 0.859375, "learning_rate": 0.0001963867448085977, "loss": 1.1097, "step": 4678 }, { "epoch": 0.12014359971819277, "grad_norm": 0.875, "learning_rate": 0.00019638555552813286, "loss": 1.1767, "step": 4679 }, { "epoch": 0.1201692769141146, "grad_norm": 0.8671875, "learning_rate": 0.00019638436605558035, "loss": 1.0498, "step": 4680 }, { "epoch": 0.12019495411003642, "grad_norm": 0.84765625, "learning_rate": 0.0001963831763909425, "loss": 1.1342, "step": 4681 }, { "epoch": 0.12022063130595824, "grad_norm": 0.80078125, "learning_rate": 0.00019638198653422167, "loss": 1.1374, "step": 4682 }, { "epoch": 0.12024630850188005, "grad_norm": 0.796875, "learning_rate": 0.00019638079648542025, "loss": 1.194, "step": 4683 }, { "epoch": 0.12027198569780187, "grad_norm": 0.8515625, "learning_rate": 0.00019637960624454062, "loss": 1.2359, "step": 4684 }, { "epoch": 0.12029766289372369, "grad_norm": 0.81640625, "learning_rate": 0.00019637841581158512, "loss": 1.2062, "step": 4685 }, { "epoch": 0.12032334008964551, "grad_norm": 0.8828125, "learning_rate": 0.00019637722518655614, "loss": 1.0843, "step": 4686 }, { "epoch": 0.12034901728556734, "grad_norm": 0.82421875, "learning_rate": 0.0001963760343694561, "loss": 1.0391, "step": 4687 }, { "epoch": 0.12037469448148914, "grad_norm": 0.91796875, "learning_rate": 0.00019637484336028731, "loss": 1.209, "step": 4688 }, { "epoch": 0.12040037167741097, "grad_norm": 0.87890625, "learning_rate": 0.00019637365215905214, "loss": 1.2479, "step": 4689 }, { "epoch": 0.12042604887333279, "grad_norm": 0.84765625, "learning_rate": 0.000196372460765753, "loss": 1.0535, "step": 4690 }, { "epoch": 0.12045172606925461, "grad_norm": 0.890625, "learning_rate": 0.00019637126918039223, "loss": 1.2301, "step": 4691 }, { "epoch": 0.12047740326517643, "grad_norm": 0.8359375, "learning_rate": 0.00019637007740297226, "loss": 1.0723, "step": 4692 }, { "epoch": 0.12050308046109824, "grad_norm": 0.87109375, "learning_rate": 0.00019636888543349542, "loss": 1.0701, "step": 4693 }, { "epoch": 0.12052875765702006, "grad_norm": 0.8828125, "learning_rate": 0.0001963676932719641, "loss": 1.0935, "step": 4694 }, { "epoch": 0.12055443485294189, "grad_norm": 0.83203125, "learning_rate": 0.00019636650091838065, "loss": 1.0964, "step": 4695 }, { "epoch": 0.12058011204886371, "grad_norm": 0.8125, "learning_rate": 0.00019636530837274752, "loss": 1.0492, "step": 4696 }, { "epoch": 0.12060578924478553, "grad_norm": 0.91015625, "learning_rate": 0.00019636411563506698, "loss": 1.2571, "step": 4697 }, { "epoch": 0.12063146644070734, "grad_norm": 0.875, "learning_rate": 0.0001963629227053415, "loss": 1.1297, "step": 4698 }, { "epoch": 0.12065714363662916, "grad_norm": 0.78515625, "learning_rate": 0.0001963617295835734, "loss": 1.0579, "step": 4699 }, { "epoch": 0.12068282083255098, "grad_norm": 0.8671875, "learning_rate": 0.00019636053626976506, "loss": 1.2001, "step": 4700 }, { "epoch": 0.1207084980284728, "grad_norm": 0.76171875, "learning_rate": 0.0001963593427639189, "loss": 1.0797, "step": 4701 }, { "epoch": 0.12073417522439461, "grad_norm": 0.890625, "learning_rate": 0.00019635814906603726, "loss": 1.0497, "step": 4702 }, { "epoch": 0.12075985242031644, "grad_norm": 0.8515625, "learning_rate": 0.00019635695517612254, "loss": 1.2485, "step": 4703 }, { "epoch": 0.12078552961623826, "grad_norm": 0.86328125, "learning_rate": 0.00019635576109417713, "loss": 1.0424, "step": 4704 }, { "epoch": 0.12081120681216008, "grad_norm": 0.85546875, "learning_rate": 0.00019635456682020336, "loss": 1.24, "step": 4705 }, { "epoch": 0.1208368840080819, "grad_norm": 0.90625, "learning_rate": 0.00019635337235420365, "loss": 1.1381, "step": 4706 }, { "epoch": 0.12086256120400371, "grad_norm": 0.84375, "learning_rate": 0.0001963521776961804, "loss": 1.0498, "step": 4707 }, { "epoch": 0.12088823839992553, "grad_norm": 0.80078125, "learning_rate": 0.00019635098284613594, "loss": 0.9854, "step": 4708 }, { "epoch": 0.12091391559584735, "grad_norm": 0.8125, "learning_rate": 0.00019634978780407267, "loss": 1.0578, "step": 4709 }, { "epoch": 0.12093959279176918, "grad_norm": 0.859375, "learning_rate": 0.000196348592569993, "loss": 1.235, "step": 4710 }, { "epoch": 0.120965269987691, "grad_norm": 0.76171875, "learning_rate": 0.00019634739714389924, "loss": 1.0469, "step": 4711 }, { "epoch": 0.12099094718361281, "grad_norm": 0.796875, "learning_rate": 0.00019634620152579385, "loss": 1.0881, "step": 4712 }, { "epoch": 0.12101662437953463, "grad_norm": 0.87109375, "learning_rate": 0.0001963450057156792, "loss": 1.095, "step": 4713 }, { "epoch": 0.12104230157545645, "grad_norm": 0.94921875, "learning_rate": 0.00019634380971355762, "loss": 1.2729, "step": 4714 }, { "epoch": 0.12106797877137827, "grad_norm": 0.9140625, "learning_rate": 0.00019634261351943159, "loss": 1.2617, "step": 4715 }, { "epoch": 0.1210936559673001, "grad_norm": 0.8671875, "learning_rate": 0.00019634141713330337, "loss": 1.0808, "step": 4716 }, { "epoch": 0.1211193331632219, "grad_norm": 0.79296875, "learning_rate": 0.00019634022055517542, "loss": 1.138, "step": 4717 }, { "epoch": 0.12114501035914373, "grad_norm": 0.77734375, "learning_rate": 0.00019633902378505015, "loss": 1.0744, "step": 4718 }, { "epoch": 0.12117068755506555, "grad_norm": 0.89453125, "learning_rate": 0.0001963378268229299, "loss": 1.0494, "step": 4719 }, { "epoch": 0.12119636475098737, "grad_norm": 0.87890625, "learning_rate": 0.00019633662966881703, "loss": 1.225, "step": 4720 }, { "epoch": 0.12122204194690919, "grad_norm": 0.93359375, "learning_rate": 0.00019633543232271396, "loss": 1.1865, "step": 4721 }, { "epoch": 0.121247719142831, "grad_norm": 0.90625, "learning_rate": 0.0001963342347846231, "loss": 1.2493, "step": 4722 }, { "epoch": 0.12127339633875282, "grad_norm": 0.91796875, "learning_rate": 0.00019633303705454683, "loss": 1.1765, "step": 4723 }, { "epoch": 0.12129907353467465, "grad_norm": 0.796875, "learning_rate": 0.00019633183913248748, "loss": 1.2002, "step": 4724 }, { "epoch": 0.12132475073059647, "grad_norm": 0.83984375, "learning_rate": 0.00019633064101844748, "loss": 1.0999, "step": 4725 }, { "epoch": 0.12135042792651829, "grad_norm": 0.859375, "learning_rate": 0.00019632944271242924, "loss": 1.1491, "step": 4726 }, { "epoch": 0.1213761051224401, "grad_norm": 0.875, "learning_rate": 0.0001963282442144351, "loss": 1.1947, "step": 4727 }, { "epoch": 0.12140178231836192, "grad_norm": 0.84765625, "learning_rate": 0.00019632704552446746, "loss": 1.0792, "step": 4728 }, { "epoch": 0.12142745951428374, "grad_norm": 0.8203125, "learning_rate": 0.00019632584664252875, "loss": 1.1731, "step": 4729 }, { "epoch": 0.12145313671020556, "grad_norm": 0.875, "learning_rate": 0.0001963246475686213, "loss": 1.1002, "step": 4730 }, { "epoch": 0.12147881390612739, "grad_norm": 0.79296875, "learning_rate": 0.00019632344830274753, "loss": 1.0687, "step": 4731 }, { "epoch": 0.1215044911020492, "grad_norm": 0.80078125, "learning_rate": 0.00019632224884490987, "loss": 0.905, "step": 4732 }, { "epoch": 0.12153016829797102, "grad_norm": 0.86328125, "learning_rate": 0.00019632104919511064, "loss": 1.0447, "step": 4733 }, { "epoch": 0.12155584549389284, "grad_norm": 0.88671875, "learning_rate": 0.00019631984935335225, "loss": 1.046, "step": 4734 }, { "epoch": 0.12158152268981466, "grad_norm": 0.8828125, "learning_rate": 0.0001963186493196371, "loss": 1.2548, "step": 4735 }, { "epoch": 0.12160719988573648, "grad_norm": 0.84375, "learning_rate": 0.0001963174490939676, "loss": 1.1166, "step": 4736 }, { "epoch": 0.12163287708165829, "grad_norm": 0.8515625, "learning_rate": 0.00019631624867634613, "loss": 1.0353, "step": 4737 }, { "epoch": 0.12165855427758011, "grad_norm": 0.8515625, "learning_rate": 0.00019631504806677503, "loss": 1.1417, "step": 4738 }, { "epoch": 0.12168423147350194, "grad_norm": 0.76171875, "learning_rate": 0.00019631384726525678, "loss": 1.0503, "step": 4739 }, { "epoch": 0.12170990866942376, "grad_norm": 0.9140625, "learning_rate": 0.00019631264627179373, "loss": 1.2069, "step": 4740 }, { "epoch": 0.12173558586534558, "grad_norm": 0.90234375, "learning_rate": 0.00019631144508638828, "loss": 1.0997, "step": 4741 }, { "epoch": 0.12176126306126739, "grad_norm": 0.9375, "learning_rate": 0.0001963102437090428, "loss": 1.2427, "step": 4742 }, { "epoch": 0.12178694025718921, "grad_norm": 0.81640625, "learning_rate": 0.0001963090421397597, "loss": 1.0631, "step": 4743 }, { "epoch": 0.12181261745311103, "grad_norm": 1.375, "learning_rate": 0.0001963078403785414, "loss": 1.2697, "step": 4744 }, { "epoch": 0.12183829464903285, "grad_norm": 0.84765625, "learning_rate": 0.00019630663842539029, "loss": 1.3218, "step": 4745 }, { "epoch": 0.12186397184495468, "grad_norm": 0.89453125, "learning_rate": 0.0001963054362803087, "loss": 1.1738, "step": 4746 }, { "epoch": 0.12188964904087649, "grad_norm": 0.8828125, "learning_rate": 0.00019630423394329912, "loss": 1.259, "step": 4747 }, { "epoch": 0.12191532623679831, "grad_norm": 0.86328125, "learning_rate": 0.00019630303141436386, "loss": 1.156, "step": 4748 }, { "epoch": 0.12194100343272013, "grad_norm": 0.95703125, "learning_rate": 0.0001963018286935054, "loss": 1.0389, "step": 4749 }, { "epoch": 0.12196668062864195, "grad_norm": 0.8359375, "learning_rate": 0.00019630062578072607, "loss": 1.033, "step": 4750 }, { "epoch": 0.12199235782456377, "grad_norm": 0.84765625, "learning_rate": 0.00019629942267602828, "loss": 1.2311, "step": 4751 }, { "epoch": 0.12201803502048558, "grad_norm": 0.875, "learning_rate": 0.00019629821937941445, "loss": 1.2289, "step": 4752 }, { "epoch": 0.1220437122164074, "grad_norm": 0.86328125, "learning_rate": 0.00019629701589088698, "loss": 1.2037, "step": 4753 }, { "epoch": 0.12206938941232923, "grad_norm": 0.83203125, "learning_rate": 0.00019629581221044826, "loss": 1.1617, "step": 4754 }, { "epoch": 0.12209506660825105, "grad_norm": 0.828125, "learning_rate": 0.00019629460833810067, "loss": 1.0427, "step": 4755 }, { "epoch": 0.12212074380417287, "grad_norm": 0.890625, "learning_rate": 0.0001962934042738466, "loss": 1.1316, "step": 4756 }, { "epoch": 0.12214642100009468, "grad_norm": 0.81640625, "learning_rate": 0.0001962922000176885, "loss": 1.1493, "step": 4757 }, { "epoch": 0.1221720981960165, "grad_norm": 0.83203125, "learning_rate": 0.00019629099556962875, "loss": 1.1921, "step": 4758 }, { "epoch": 0.12219777539193832, "grad_norm": 0.84765625, "learning_rate": 0.00019628979092966976, "loss": 1.3526, "step": 4759 }, { "epoch": 0.12222345258786015, "grad_norm": 0.95703125, "learning_rate": 0.00019628858609781387, "loss": 1.1265, "step": 4760 }, { "epoch": 0.12224912978378197, "grad_norm": 0.84375, "learning_rate": 0.00019628738107406354, "loss": 1.0477, "step": 4761 }, { "epoch": 0.12227480697970378, "grad_norm": 0.890625, "learning_rate": 0.00019628617585842117, "loss": 1.1547, "step": 4762 }, { "epoch": 0.1223004841756256, "grad_norm": 0.84765625, "learning_rate": 0.00019628497045088913, "loss": 1.2688, "step": 4763 }, { "epoch": 0.12232616137154742, "grad_norm": 0.93359375, "learning_rate": 0.00019628376485146987, "loss": 1.0956, "step": 4764 }, { "epoch": 0.12235183856746924, "grad_norm": 0.828125, "learning_rate": 0.00019628255906016574, "loss": 1.1677, "step": 4765 }, { "epoch": 0.12237751576339106, "grad_norm": 0.8046875, "learning_rate": 0.0001962813530769792, "loss": 1.1376, "step": 4766 }, { "epoch": 0.12240319295931287, "grad_norm": 0.8046875, "learning_rate": 0.00019628014690191257, "loss": 1.0033, "step": 4767 }, { "epoch": 0.1224288701552347, "grad_norm": 0.92578125, "learning_rate": 0.0001962789405349683, "loss": 1.1906, "step": 4768 }, { "epoch": 0.12245454735115652, "grad_norm": 0.88671875, "learning_rate": 0.00019627773397614887, "loss": 1.1202, "step": 4769 }, { "epoch": 0.12248022454707834, "grad_norm": 0.859375, "learning_rate": 0.00019627652722545655, "loss": 1.1682, "step": 4770 }, { "epoch": 0.12250590174300016, "grad_norm": 0.8359375, "learning_rate": 0.00019627532028289383, "loss": 1.2021, "step": 4771 }, { "epoch": 0.12253157893892197, "grad_norm": 0.80078125, "learning_rate": 0.0001962741131484631, "loss": 1.0485, "step": 4772 }, { "epoch": 0.12255725613484379, "grad_norm": 0.7578125, "learning_rate": 0.00019627290582216675, "loss": 1.0002, "step": 4773 }, { "epoch": 0.12258293333076561, "grad_norm": 0.78515625, "learning_rate": 0.0001962716983040072, "loss": 1.1375, "step": 4774 }, { "epoch": 0.12260861052668744, "grad_norm": 0.85546875, "learning_rate": 0.00019627049059398684, "loss": 1.1885, "step": 4775 }, { "epoch": 0.12263428772260926, "grad_norm": 0.875, "learning_rate": 0.00019626928269210809, "loss": 1.0155, "step": 4776 }, { "epoch": 0.12265996491853107, "grad_norm": 0.92578125, "learning_rate": 0.00019626807459837336, "loss": 1.2558, "step": 4777 }, { "epoch": 0.12268564211445289, "grad_norm": 0.78125, "learning_rate": 0.00019626686631278505, "loss": 1.2798, "step": 4778 }, { "epoch": 0.12271131931037471, "grad_norm": 0.81640625, "learning_rate": 0.0001962656578353456, "loss": 1.0618, "step": 4779 }, { "epoch": 0.12273699650629653, "grad_norm": 0.84375, "learning_rate": 0.00019626444916605732, "loss": 1.0471, "step": 4780 }, { "epoch": 0.12276267370221836, "grad_norm": 0.8203125, "learning_rate": 0.00019626324030492276, "loss": 1.1483, "step": 4781 }, { "epoch": 0.12278835089814016, "grad_norm": 0.82421875, "learning_rate": 0.00019626203125194423, "loss": 1.1086, "step": 4782 }, { "epoch": 0.12281402809406199, "grad_norm": 0.8515625, "learning_rate": 0.00019626082200712417, "loss": 1.2572, "step": 4783 }, { "epoch": 0.12283970528998381, "grad_norm": 0.9375, "learning_rate": 0.00019625961257046498, "loss": 1.111, "step": 4784 }, { "epoch": 0.12286538248590563, "grad_norm": 0.87890625, "learning_rate": 0.0001962584029419691, "loss": 1.1688, "step": 4785 }, { "epoch": 0.12289105968182745, "grad_norm": 0.72265625, "learning_rate": 0.00019625719312163885, "loss": 1.0926, "step": 4786 }, { "epoch": 0.12291673687774926, "grad_norm": 0.8046875, "learning_rate": 0.0001962559831094768, "loss": 1.0221, "step": 4787 }, { "epoch": 0.12294241407367108, "grad_norm": 0.87890625, "learning_rate": 0.0001962547729054852, "loss": 1.066, "step": 4788 }, { "epoch": 0.1229680912695929, "grad_norm": 0.84765625, "learning_rate": 0.00019625356250966658, "loss": 1.2192, "step": 4789 }, { "epoch": 0.12299376846551473, "grad_norm": 0.83984375, "learning_rate": 0.00019625235192202327, "loss": 1.0265, "step": 4790 }, { "epoch": 0.12301944566143655, "grad_norm": 0.859375, "learning_rate": 0.00019625114114255773, "loss": 1.3292, "step": 4791 }, { "epoch": 0.12304512285735836, "grad_norm": 0.8359375, "learning_rate": 0.00019624993017127235, "loss": 1.2613, "step": 4792 }, { "epoch": 0.12307080005328018, "grad_norm": 0.85546875, "learning_rate": 0.0001962487190081696, "loss": 1.2086, "step": 4793 }, { "epoch": 0.123096477249202, "grad_norm": 0.84375, "learning_rate": 0.00019624750765325183, "loss": 1.122, "step": 4794 }, { "epoch": 0.12312215444512382, "grad_norm": 0.84765625, "learning_rate": 0.0001962462961065214, "loss": 1.0497, "step": 4795 }, { "epoch": 0.12314783164104565, "grad_norm": 0.84375, "learning_rate": 0.0001962450843679809, "loss": 1.1935, "step": 4796 }, { "epoch": 0.12317350883696745, "grad_norm": 0.79296875, "learning_rate": 0.00019624387243763258, "loss": 1.1587, "step": 4797 }, { "epoch": 0.12319918603288928, "grad_norm": 0.8125, "learning_rate": 0.0001962426603154789, "loss": 1.0522, "step": 4798 }, { "epoch": 0.1232248632288111, "grad_norm": 0.90625, "learning_rate": 0.00019624144800152232, "loss": 1.1987, "step": 4799 }, { "epoch": 0.12325054042473292, "grad_norm": 0.8671875, "learning_rate": 0.00019624023549576523, "loss": 1.1843, "step": 4800 }, { "epoch": 0.12327621762065474, "grad_norm": 0.8203125, "learning_rate": 0.00019623902279821006, "loss": 1.1285, "step": 4801 }, { "epoch": 0.12330189481657655, "grad_norm": 0.88671875, "learning_rate": 0.00019623780990885917, "loss": 1.2404, "step": 4802 }, { "epoch": 0.12332757201249837, "grad_norm": 0.83984375, "learning_rate": 0.00019623659682771504, "loss": 1.2721, "step": 4803 }, { "epoch": 0.1233532492084202, "grad_norm": 0.8359375, "learning_rate": 0.00019623538355478007, "loss": 1.0888, "step": 4804 }, { "epoch": 0.12337892640434202, "grad_norm": 0.7734375, "learning_rate": 0.00019623417009005667, "loss": 1.115, "step": 4805 }, { "epoch": 0.12340460360026383, "grad_norm": 0.89453125, "learning_rate": 0.00019623295643354726, "loss": 1.2433, "step": 4806 }, { "epoch": 0.12343028079618565, "grad_norm": 0.91796875, "learning_rate": 0.00019623174258525425, "loss": 1.1933, "step": 4807 }, { "epoch": 0.12345595799210747, "grad_norm": 0.82421875, "learning_rate": 0.00019623052854518007, "loss": 1.1017, "step": 4808 }, { "epoch": 0.12348163518802929, "grad_norm": 0.93359375, "learning_rate": 0.00019622931431332715, "loss": 1.0321, "step": 4809 }, { "epoch": 0.12350731238395111, "grad_norm": 0.89453125, "learning_rate": 0.0001962280998896979, "loss": 1.1081, "step": 4810 }, { "epoch": 0.12353298957987292, "grad_norm": 0.8359375, "learning_rate": 0.00019622688527429474, "loss": 1.0228, "step": 4811 }, { "epoch": 0.12355866677579475, "grad_norm": 1.0, "learning_rate": 0.0001962256704671201, "loss": 1.1021, "step": 4812 }, { "epoch": 0.12358434397171657, "grad_norm": 0.81640625, "learning_rate": 0.00019622445546817638, "loss": 0.9878, "step": 4813 }, { "epoch": 0.12361002116763839, "grad_norm": 0.8359375, "learning_rate": 0.00019622324027746598, "loss": 1.248, "step": 4814 }, { "epoch": 0.12363569836356021, "grad_norm": 0.859375, "learning_rate": 0.0001962220248949914, "loss": 1.2059, "step": 4815 }, { "epoch": 0.12366137555948202, "grad_norm": 0.80859375, "learning_rate": 0.000196220809320755, "loss": 1.2142, "step": 4816 }, { "epoch": 0.12368705275540384, "grad_norm": 0.77734375, "learning_rate": 0.00019621959355475922, "loss": 1.1784, "step": 4817 }, { "epoch": 0.12371272995132566, "grad_norm": 0.83203125, "learning_rate": 0.00019621837759700646, "loss": 1.1787, "step": 4818 }, { "epoch": 0.12373840714724749, "grad_norm": 0.84375, "learning_rate": 0.0001962171614474992, "loss": 1.2129, "step": 4819 }, { "epoch": 0.12376408434316931, "grad_norm": 0.9296875, "learning_rate": 0.0001962159451062398, "loss": 1.1869, "step": 4820 }, { "epoch": 0.12378976153909112, "grad_norm": 0.7578125, "learning_rate": 0.00019621472857323072, "loss": 0.9934, "step": 4821 }, { "epoch": 0.12381543873501294, "grad_norm": 0.8828125, "learning_rate": 0.0001962135118484744, "loss": 1.2491, "step": 4822 }, { "epoch": 0.12384111593093476, "grad_norm": 0.87109375, "learning_rate": 0.0001962122949319732, "loss": 1.1788, "step": 4823 }, { "epoch": 0.12386679312685658, "grad_norm": 0.79296875, "learning_rate": 0.00019621107782372965, "loss": 1.0768, "step": 4824 }, { "epoch": 0.1238924703227784, "grad_norm": 0.87109375, "learning_rate": 0.00019620986052374607, "loss": 1.1419, "step": 4825 }, { "epoch": 0.12391814751870021, "grad_norm": 0.875, "learning_rate": 0.00019620864303202491, "loss": 1.295, "step": 4826 }, { "epoch": 0.12394382471462204, "grad_norm": 0.86328125, "learning_rate": 0.00019620742534856865, "loss": 1.0448, "step": 4827 }, { "epoch": 0.12396950191054386, "grad_norm": 0.859375, "learning_rate": 0.00019620620747337966, "loss": 0.9929, "step": 4828 }, { "epoch": 0.12399517910646568, "grad_norm": 0.8515625, "learning_rate": 0.00019620498940646041, "loss": 1.1223, "step": 4829 }, { "epoch": 0.1240208563023875, "grad_norm": 0.83203125, "learning_rate": 0.0001962037711478133, "loss": 1.1307, "step": 4830 }, { "epoch": 0.12404653349830931, "grad_norm": 0.8828125, "learning_rate": 0.00019620255269744074, "loss": 1.2484, "step": 4831 }, { "epoch": 0.12407221069423113, "grad_norm": 0.9375, "learning_rate": 0.0001962013340553452, "loss": 1.201, "step": 4832 }, { "epoch": 0.12409788789015296, "grad_norm": 0.9375, "learning_rate": 0.0001962001152215291, "loss": 1.1945, "step": 4833 }, { "epoch": 0.12412356508607478, "grad_norm": 0.93359375, "learning_rate": 0.00019619889619599485, "loss": 1.2281, "step": 4834 }, { "epoch": 0.1241492422819966, "grad_norm": 0.88671875, "learning_rate": 0.0001961976769787449, "loss": 1.0452, "step": 4835 }, { "epoch": 0.12417491947791841, "grad_norm": 0.8359375, "learning_rate": 0.00019619645756978164, "loss": 1.2555, "step": 4836 }, { "epoch": 0.12420059667384023, "grad_norm": 0.8359375, "learning_rate": 0.00019619523796910758, "loss": 1.1461, "step": 4837 }, { "epoch": 0.12422627386976205, "grad_norm": 0.8203125, "learning_rate": 0.00019619401817672506, "loss": 1.132, "step": 4838 }, { "epoch": 0.12425195106568387, "grad_norm": 0.859375, "learning_rate": 0.00019619279819263653, "loss": 1.0668, "step": 4839 }, { "epoch": 0.1242776282616057, "grad_norm": 0.87890625, "learning_rate": 0.0001961915780168445, "loss": 1.2022, "step": 4840 }, { "epoch": 0.1243033054575275, "grad_norm": 0.859375, "learning_rate": 0.0001961903576493513, "loss": 0.9463, "step": 4841 }, { "epoch": 0.12432898265344933, "grad_norm": 0.9296875, "learning_rate": 0.00019618913709015944, "loss": 1.126, "step": 4842 }, { "epoch": 0.12435465984937115, "grad_norm": 0.90234375, "learning_rate": 0.0001961879163392713, "loss": 1.1874, "step": 4843 }, { "epoch": 0.12438033704529297, "grad_norm": 0.80078125, "learning_rate": 0.00019618669539668934, "loss": 1.051, "step": 4844 }, { "epoch": 0.1244060142412148, "grad_norm": 0.8046875, "learning_rate": 0.00019618547426241597, "loss": 1.1356, "step": 4845 }, { "epoch": 0.1244316914371366, "grad_norm": 0.8046875, "learning_rate": 0.00019618425293645365, "loss": 0.9883, "step": 4846 }, { "epoch": 0.12445736863305842, "grad_norm": 0.9453125, "learning_rate": 0.0001961830314188048, "loss": 1.3745, "step": 4847 }, { "epoch": 0.12448304582898025, "grad_norm": 0.890625, "learning_rate": 0.00019618180970947183, "loss": 1.1823, "step": 4848 }, { "epoch": 0.12450872302490207, "grad_norm": 0.8359375, "learning_rate": 0.00019618058780845722, "loss": 1.157, "step": 4849 }, { "epoch": 0.12453440022082389, "grad_norm": 0.7890625, "learning_rate": 0.0001961793657157634, "loss": 0.9198, "step": 4850 }, { "epoch": 0.1245600774167457, "grad_norm": 0.875, "learning_rate": 0.00019617814343139277, "loss": 1.1475, "step": 4851 }, { "epoch": 0.12458575461266752, "grad_norm": 0.8984375, "learning_rate": 0.00019617692095534782, "loss": 1.3017, "step": 4852 }, { "epoch": 0.12461143180858934, "grad_norm": 0.875, "learning_rate": 0.00019617569828763094, "loss": 1.2691, "step": 4853 }, { "epoch": 0.12463710900451117, "grad_norm": 0.86328125, "learning_rate": 0.0001961744754282446, "loss": 1.1431, "step": 4854 }, { "epoch": 0.12466278620043299, "grad_norm": 0.8515625, "learning_rate": 0.00019617325237719117, "loss": 1.1782, "step": 4855 }, { "epoch": 0.1246884633963548, "grad_norm": 0.796875, "learning_rate": 0.00019617202913447317, "loss": 1.1749, "step": 4856 }, { "epoch": 0.12471414059227662, "grad_norm": 0.796875, "learning_rate": 0.00019617080570009297, "loss": 0.9621, "step": 4857 }, { "epoch": 0.12473981778819844, "grad_norm": 0.88671875, "learning_rate": 0.0001961695820740531, "loss": 1.2087, "step": 4858 }, { "epoch": 0.12476549498412026, "grad_norm": 0.88671875, "learning_rate": 0.00019616835825635588, "loss": 1.3314, "step": 4859 }, { "epoch": 0.12479117218004208, "grad_norm": 1.0, "learning_rate": 0.00019616713424700383, "loss": 1.1065, "step": 4860 }, { "epoch": 0.12481684937596389, "grad_norm": 0.81640625, "learning_rate": 0.00019616591004599938, "loss": 0.99, "step": 4861 }, { "epoch": 0.12484252657188571, "grad_norm": 0.859375, "learning_rate": 0.00019616468565334495, "loss": 1.0928, "step": 4862 }, { "epoch": 0.12486820376780754, "grad_norm": 0.99609375, "learning_rate": 0.000196163461069043, "loss": 1.2424, "step": 4863 }, { "epoch": 0.12489388096372936, "grad_norm": 0.84765625, "learning_rate": 0.00019616223629309593, "loss": 1.1048, "step": 4864 }, { "epoch": 0.12491955815965118, "grad_norm": 0.8359375, "learning_rate": 0.00019616101132550621, "loss": 0.9482, "step": 4865 }, { "epoch": 0.12494523535557299, "grad_norm": 0.98046875, "learning_rate": 0.00019615978616627632, "loss": 1.1092, "step": 4866 }, { "epoch": 0.12497091255149481, "grad_norm": 0.84765625, "learning_rate": 0.0001961585608154086, "loss": 1.1406, "step": 4867 }, { "epoch": 0.12499658974741663, "grad_norm": 0.8984375, "learning_rate": 0.00019615733527290563, "loss": 0.9892, "step": 4868 }, { "epoch": 0.12502226694333846, "grad_norm": 0.83984375, "learning_rate": 0.00019615610953876972, "loss": 1.1247, "step": 4869 }, { "epoch": 0.12504794413926026, "grad_norm": 0.7578125, "learning_rate": 0.0001961548836130034, "loss": 0.8932, "step": 4870 }, { "epoch": 0.1250736213351821, "grad_norm": 1.3046875, "learning_rate": 0.00019615365749560905, "loss": 1.0679, "step": 4871 }, { "epoch": 0.1250992985311039, "grad_norm": 0.85546875, "learning_rate": 0.0001961524311865892, "loss": 1.1911, "step": 4872 }, { "epoch": 0.12512497572702572, "grad_norm": 0.83984375, "learning_rate": 0.00019615120468594618, "loss": 1.2696, "step": 4873 }, { "epoch": 0.12515065292294755, "grad_norm": 0.875, "learning_rate": 0.00019614997799368252, "loss": 1.1514, "step": 4874 }, { "epoch": 0.12517633011886936, "grad_norm": 0.91796875, "learning_rate": 0.00019614875110980068, "loss": 1.204, "step": 4875 }, { "epoch": 0.1252020073147912, "grad_norm": 0.921875, "learning_rate": 0.00019614752403430302, "loss": 1.0626, "step": 4876 }, { "epoch": 0.125227684510713, "grad_norm": 0.81640625, "learning_rate": 0.000196146296767192, "loss": 1.2106, "step": 4877 }, { "epoch": 0.1252533617066348, "grad_norm": 0.828125, "learning_rate": 0.00019614506930847015, "loss": 1.0541, "step": 4878 }, { "epoch": 0.12527903890255665, "grad_norm": 0.9609375, "learning_rate": 0.00019614384165813983, "loss": 1.301, "step": 4879 }, { "epoch": 0.12530471609847846, "grad_norm": 0.890625, "learning_rate": 0.00019614261381620355, "loss": 1.0051, "step": 4880 }, { "epoch": 0.1253303932944003, "grad_norm": 0.80078125, "learning_rate": 0.00019614138578266367, "loss": 1.1753, "step": 4881 }, { "epoch": 0.1253560704903221, "grad_norm": 0.82421875, "learning_rate": 0.00019614015755752275, "loss": 1.0177, "step": 4882 }, { "epoch": 0.1253817476862439, "grad_norm": 0.8203125, "learning_rate": 0.00019613892914078316, "loss": 1.0777, "step": 4883 }, { "epoch": 0.12540742488216575, "grad_norm": 0.8671875, "learning_rate": 0.00019613770053244735, "loss": 1.1055, "step": 4884 }, { "epoch": 0.12543310207808755, "grad_norm": 0.84375, "learning_rate": 0.00019613647173251782, "loss": 1.0318, "step": 4885 }, { "epoch": 0.1254587792740094, "grad_norm": 0.80078125, "learning_rate": 0.00019613524274099696, "loss": 1.162, "step": 4886 }, { "epoch": 0.1254844564699312, "grad_norm": 0.8203125, "learning_rate": 0.00019613401355788725, "loss": 1.0601, "step": 4887 }, { "epoch": 0.125510133665853, "grad_norm": 0.83203125, "learning_rate": 0.00019613278418319113, "loss": 1.0929, "step": 4888 }, { "epoch": 0.12553581086177484, "grad_norm": 0.78515625, "learning_rate": 0.0001961315546169111, "loss": 1.2317, "step": 4889 }, { "epoch": 0.12556148805769665, "grad_norm": 0.9375, "learning_rate": 0.0001961303248590495, "loss": 1.1623, "step": 4890 }, { "epoch": 0.1255871652536185, "grad_norm": 0.78125, "learning_rate": 0.0001961290949096089, "loss": 1.0933, "step": 4891 }, { "epoch": 0.1256128424495403, "grad_norm": 0.9375, "learning_rate": 0.00019612786476859166, "loss": 1.4328, "step": 4892 }, { "epoch": 0.1256385196454621, "grad_norm": 0.8984375, "learning_rate": 0.00019612663443600026, "loss": 1.1117, "step": 4893 }, { "epoch": 0.12566419684138394, "grad_norm": 0.87890625, "learning_rate": 0.0001961254039118372, "loss": 1.1505, "step": 4894 }, { "epoch": 0.12568987403730575, "grad_norm": 0.859375, "learning_rate": 0.00019612417319610486, "loss": 1.1488, "step": 4895 }, { "epoch": 0.12571555123322758, "grad_norm": 0.83984375, "learning_rate": 0.00019612294228880576, "loss": 1.1893, "step": 4896 }, { "epoch": 0.1257412284291494, "grad_norm": 0.80859375, "learning_rate": 0.00019612171118994228, "loss": 1.0535, "step": 4897 }, { "epoch": 0.1257669056250712, "grad_norm": 0.87109375, "learning_rate": 0.00019612047989951692, "loss": 1.2221, "step": 4898 }, { "epoch": 0.12579258282099304, "grad_norm": 0.82421875, "learning_rate": 0.00019611924841753215, "loss": 1.1187, "step": 4899 }, { "epoch": 0.12581826001691485, "grad_norm": 0.80859375, "learning_rate": 0.0001961180167439904, "loss": 1.1011, "step": 4900 }, { "epoch": 0.12584393721283668, "grad_norm": 0.90234375, "learning_rate": 0.00019611678487889411, "loss": 1.1543, "step": 4901 }, { "epoch": 0.1258696144087585, "grad_norm": 0.80078125, "learning_rate": 0.00019611555282224573, "loss": 1.0529, "step": 4902 }, { "epoch": 0.1258952916046803, "grad_norm": 0.8046875, "learning_rate": 0.00019611432057404778, "loss": 1.125, "step": 4903 }, { "epoch": 0.12592096880060213, "grad_norm": 0.88671875, "learning_rate": 0.00019611308813430266, "loss": 1.2201, "step": 4904 }, { "epoch": 0.12594664599652394, "grad_norm": 0.89453125, "learning_rate": 0.0001961118555030128, "loss": 1.163, "step": 4905 }, { "epoch": 0.12597232319244578, "grad_norm": 0.90234375, "learning_rate": 0.0001961106226801807, "loss": 1.1689, "step": 4906 }, { "epoch": 0.1259980003883676, "grad_norm": 0.77734375, "learning_rate": 0.00019610938966580885, "loss": 1.0845, "step": 4907 }, { "epoch": 0.1260236775842894, "grad_norm": 1.015625, "learning_rate": 0.00019610815645989966, "loss": 1.1709, "step": 4908 }, { "epoch": 0.12604935478021123, "grad_norm": 0.84375, "learning_rate": 0.00019610692306245558, "loss": 1.0393, "step": 4909 }, { "epoch": 0.12607503197613304, "grad_norm": 0.84375, "learning_rate": 0.0001961056894734791, "loss": 1.0965, "step": 4910 }, { "epoch": 0.12610070917205488, "grad_norm": 0.7734375, "learning_rate": 0.00019610445569297262, "loss": 1.0246, "step": 4911 }, { "epoch": 0.12612638636797668, "grad_norm": 0.87890625, "learning_rate": 0.0001961032217209387, "loss": 1.134, "step": 4912 }, { "epoch": 0.1261520635638985, "grad_norm": 0.81640625, "learning_rate": 0.0001961019875573797, "loss": 1.1532, "step": 4913 }, { "epoch": 0.12617774075982033, "grad_norm": 0.82421875, "learning_rate": 0.00019610075320229814, "loss": 1.0805, "step": 4914 }, { "epoch": 0.12620341795574214, "grad_norm": 0.82421875, "learning_rate": 0.00019609951865569641, "loss": 1.0931, "step": 4915 }, { "epoch": 0.12622909515166397, "grad_norm": 0.81640625, "learning_rate": 0.00019609828391757708, "loss": 1.101, "step": 4916 }, { "epoch": 0.12625477234758578, "grad_norm": 0.890625, "learning_rate": 0.00019609704898794253, "loss": 1.0719, "step": 4917 }, { "epoch": 0.1262804495435076, "grad_norm": 0.8515625, "learning_rate": 0.00019609581386679522, "loss": 1.1631, "step": 4918 }, { "epoch": 0.12630612673942943, "grad_norm": 1.1640625, "learning_rate": 0.00019609457855413766, "loss": 1.2496, "step": 4919 }, { "epoch": 0.12633180393535123, "grad_norm": 0.796875, "learning_rate": 0.0001960933430499723, "loss": 1.0863, "step": 4920 }, { "epoch": 0.12635748113127307, "grad_norm": 0.91015625, "learning_rate": 0.00019609210735430156, "loss": 1.3187, "step": 4921 }, { "epoch": 0.12638315832719488, "grad_norm": 0.91015625, "learning_rate": 0.00019609087146712795, "loss": 1.1718, "step": 4922 }, { "epoch": 0.12640883552311669, "grad_norm": 0.90234375, "learning_rate": 0.00019608963538845389, "loss": 1.0247, "step": 4923 }, { "epoch": 0.12643451271903852, "grad_norm": 0.83203125, "learning_rate": 0.00019608839911828188, "loss": 1.1957, "step": 4924 }, { "epoch": 0.12646018991496033, "grad_norm": 0.828125, "learning_rate": 0.00019608716265661434, "loss": 1.0755, "step": 4925 }, { "epoch": 0.12648586711088217, "grad_norm": 0.89453125, "learning_rate": 0.00019608592600345382, "loss": 1.1619, "step": 4926 }, { "epoch": 0.12651154430680397, "grad_norm": 0.7890625, "learning_rate": 0.00019608468915880269, "loss": 0.9376, "step": 4927 }, { "epoch": 0.12653722150272578, "grad_norm": 0.86328125, "learning_rate": 0.00019608345212266344, "loss": 1.1747, "step": 4928 }, { "epoch": 0.12656289869864762, "grad_norm": 0.859375, "learning_rate": 0.0001960822148950386, "loss": 1.0719, "step": 4929 }, { "epoch": 0.12658857589456943, "grad_norm": 0.76953125, "learning_rate": 0.00019608097747593055, "loss": 0.9465, "step": 4930 }, { "epoch": 0.12661425309049126, "grad_norm": 0.84375, "learning_rate": 0.0001960797398653418, "loss": 1.2008, "step": 4931 }, { "epoch": 0.12663993028641307, "grad_norm": 0.875, "learning_rate": 0.0001960785020632748, "loss": 1.1796, "step": 4932 }, { "epoch": 0.12666560748233488, "grad_norm": 0.85546875, "learning_rate": 0.00019607726406973202, "loss": 1.0526, "step": 4933 }, { "epoch": 0.12669128467825672, "grad_norm": 0.84765625, "learning_rate": 0.00019607602588471597, "loss": 1.2164, "step": 4934 }, { "epoch": 0.12671696187417852, "grad_norm": 0.9375, "learning_rate": 0.00019607478750822903, "loss": 1.0326, "step": 4935 }, { "epoch": 0.12674263907010036, "grad_norm": 0.8671875, "learning_rate": 0.00019607354894027375, "loss": 0.9984, "step": 4936 }, { "epoch": 0.12676831626602217, "grad_norm": 0.80078125, "learning_rate": 0.00019607231018085254, "loss": 1.1475, "step": 4937 }, { "epoch": 0.12679399346194398, "grad_norm": 0.81640625, "learning_rate": 0.00019607107122996791, "loss": 1.1168, "step": 4938 }, { "epoch": 0.1268196706578658, "grad_norm": 0.796875, "learning_rate": 0.00019606983208762232, "loss": 1.186, "step": 4939 }, { "epoch": 0.12684534785378762, "grad_norm": 0.91796875, "learning_rate": 0.0001960685927538182, "loss": 1.0917, "step": 4940 }, { "epoch": 0.12687102504970946, "grad_norm": 0.80078125, "learning_rate": 0.0001960673532285581, "loss": 1.1358, "step": 4941 }, { "epoch": 0.12689670224563127, "grad_norm": 0.78515625, "learning_rate": 0.0001960661135118444, "loss": 1.0844, "step": 4942 }, { "epoch": 0.12692237944155307, "grad_norm": 0.859375, "learning_rate": 0.00019606487360367966, "loss": 1.169, "step": 4943 }, { "epoch": 0.1269480566374749, "grad_norm": 0.88671875, "learning_rate": 0.00019606363350406625, "loss": 1.1809, "step": 4944 }, { "epoch": 0.12697373383339672, "grad_norm": 0.78125, "learning_rate": 0.00019606239321300672, "loss": 1.0028, "step": 4945 }, { "epoch": 0.12699941102931855, "grad_norm": 0.85546875, "learning_rate": 0.00019606115273050354, "loss": 1.1537, "step": 4946 }, { "epoch": 0.12702508822524036, "grad_norm": 0.83203125, "learning_rate": 0.0001960599120565591, "loss": 1.1579, "step": 4947 }, { "epoch": 0.12705076542116217, "grad_norm": 0.828125, "learning_rate": 0.000196058671191176, "loss": 1.1091, "step": 4948 }, { "epoch": 0.127076442617084, "grad_norm": 0.79296875, "learning_rate": 0.0001960574301343566, "loss": 1.0389, "step": 4949 }, { "epoch": 0.12710211981300581, "grad_norm": 0.87109375, "learning_rate": 0.00019605618888610344, "loss": 1.0951, "step": 4950 }, { "epoch": 0.12712779700892765, "grad_norm": 0.76953125, "learning_rate": 0.00019605494744641896, "loss": 1.0382, "step": 4951 }, { "epoch": 0.12715347420484946, "grad_norm": 0.8046875, "learning_rate": 0.00019605370581530566, "loss": 1.048, "step": 4952 }, { "epoch": 0.12717915140077127, "grad_norm": 0.7734375, "learning_rate": 0.000196052463992766, "loss": 1.1654, "step": 4953 }, { "epoch": 0.1272048285966931, "grad_norm": 0.87109375, "learning_rate": 0.00019605122197880243, "loss": 1.1932, "step": 4954 }, { "epoch": 0.1272305057926149, "grad_norm": 0.8359375, "learning_rate": 0.0001960499797734175, "loss": 1.1239, "step": 4955 }, { "epoch": 0.12725618298853675, "grad_norm": 0.91015625, "learning_rate": 0.0001960487373766136, "loss": 1.0339, "step": 4956 }, { "epoch": 0.12728186018445856, "grad_norm": 0.86328125, "learning_rate": 0.00019604749478839323, "loss": 1.0654, "step": 4957 }, { "epoch": 0.12730753738038036, "grad_norm": 1.2734375, "learning_rate": 0.00019604625200875893, "loss": 1.2851, "step": 4958 }, { "epoch": 0.1273332145763022, "grad_norm": 0.97265625, "learning_rate": 0.00019604500903771307, "loss": 1.0842, "step": 4959 }, { "epoch": 0.127358891772224, "grad_norm": 0.90234375, "learning_rate": 0.0001960437658752582, "loss": 1.1312, "step": 4960 }, { "epoch": 0.12738456896814584, "grad_norm": 0.94921875, "learning_rate": 0.0001960425225213968, "loss": 1.1016, "step": 4961 }, { "epoch": 0.12741024616406765, "grad_norm": 0.84765625, "learning_rate": 0.00019604127897613133, "loss": 0.9743, "step": 4962 }, { "epoch": 0.12743592335998946, "grad_norm": 0.8359375, "learning_rate": 0.00019604003523946423, "loss": 1.1589, "step": 4963 }, { "epoch": 0.1274616005559113, "grad_norm": 0.84765625, "learning_rate": 0.00019603879131139805, "loss": 0.9992, "step": 4964 }, { "epoch": 0.1274872777518331, "grad_norm": 0.81640625, "learning_rate": 0.00019603754719193522, "loss": 1.1546, "step": 4965 }, { "epoch": 0.12751295494775494, "grad_norm": 0.76171875, "learning_rate": 0.00019603630288107823, "loss": 1.1153, "step": 4966 }, { "epoch": 0.12753863214367675, "grad_norm": 0.765625, "learning_rate": 0.00019603505837882955, "loss": 1.0006, "step": 4967 }, { "epoch": 0.12756430933959856, "grad_norm": 0.76953125, "learning_rate": 0.0001960338136851917, "loss": 0.9941, "step": 4968 }, { "epoch": 0.1275899865355204, "grad_norm": 0.9375, "learning_rate": 0.00019603256880016713, "loss": 1.1484, "step": 4969 }, { "epoch": 0.1276156637314422, "grad_norm": 0.8359375, "learning_rate": 0.00019603132372375832, "loss": 1.0487, "step": 4970 }, { "epoch": 0.12764134092736404, "grad_norm": 0.921875, "learning_rate": 0.00019603007845596775, "loss": 1.0894, "step": 4971 }, { "epoch": 0.12766701812328585, "grad_norm": 1.3046875, "learning_rate": 0.0001960288329967979, "loss": 0.9929, "step": 4972 }, { "epoch": 0.12769269531920766, "grad_norm": 0.87109375, "learning_rate": 0.0001960275873462513, "loss": 1.0875, "step": 4973 }, { "epoch": 0.1277183725151295, "grad_norm": 0.87890625, "learning_rate": 0.00019602634150433037, "loss": 1.0438, "step": 4974 }, { "epoch": 0.1277440497110513, "grad_norm": 0.83203125, "learning_rate": 0.0001960250954710376, "loss": 1.0956, "step": 4975 }, { "epoch": 0.12776972690697314, "grad_norm": 0.80078125, "learning_rate": 0.0001960238492463755, "loss": 1.0332, "step": 4976 }, { "epoch": 0.12779540410289494, "grad_norm": 0.82421875, "learning_rate": 0.00019602260283034656, "loss": 1.0329, "step": 4977 }, { "epoch": 0.12782108129881675, "grad_norm": 0.90234375, "learning_rate": 0.0001960213562229532, "loss": 1.0597, "step": 4978 }, { "epoch": 0.1278467584947386, "grad_norm": 0.8984375, "learning_rate": 0.00019602010942419798, "loss": 1.3101, "step": 4979 }, { "epoch": 0.1278724356906604, "grad_norm": 0.87109375, "learning_rate": 0.00019601886243408336, "loss": 1.2395, "step": 4980 }, { "epoch": 0.12789811288658223, "grad_norm": 0.87890625, "learning_rate": 0.00019601761525261181, "loss": 1.2007, "step": 4981 }, { "epoch": 0.12792379008250404, "grad_norm": 1.1953125, "learning_rate": 0.00019601636787978585, "loss": 1.1747, "step": 4982 }, { "epoch": 0.12794946727842585, "grad_norm": 0.7890625, "learning_rate": 0.0001960151203156079, "loss": 1.1068, "step": 4983 }, { "epoch": 0.12797514447434769, "grad_norm": 0.796875, "learning_rate": 0.0001960138725600805, "loss": 1.1262, "step": 4984 }, { "epoch": 0.1280008216702695, "grad_norm": 0.88671875, "learning_rate": 0.00019601262461320617, "loss": 1.2648, "step": 4985 }, { "epoch": 0.12802649886619133, "grad_norm": 0.9140625, "learning_rate": 0.0001960113764749873, "loss": 1.0303, "step": 4986 }, { "epoch": 0.12805217606211314, "grad_norm": 0.828125, "learning_rate": 0.00019601012814542647, "loss": 1.0886, "step": 4987 }, { "epoch": 0.12807785325803495, "grad_norm": 0.82421875, "learning_rate": 0.0001960088796245261, "loss": 0.9259, "step": 4988 }, { "epoch": 0.12810353045395678, "grad_norm": 0.88671875, "learning_rate": 0.00019600763091228872, "loss": 1.1856, "step": 4989 }, { "epoch": 0.1281292076498786, "grad_norm": 0.87890625, "learning_rate": 0.0001960063820087168, "loss": 1.0781, "step": 4990 }, { "epoch": 0.12815488484580043, "grad_norm": 0.7421875, "learning_rate": 0.0001960051329138128, "loss": 1.063, "step": 4991 }, { "epoch": 0.12818056204172223, "grad_norm": 0.88671875, "learning_rate": 0.00019600388362757927, "loss": 1.2022, "step": 4992 }, { "epoch": 0.12820623923764404, "grad_norm": 0.87109375, "learning_rate": 0.00019600263415001868, "loss": 1.1342, "step": 4993 }, { "epoch": 0.12823191643356588, "grad_norm": 1.0546875, "learning_rate": 0.0001960013844811335, "loss": 1.0395, "step": 4994 }, { "epoch": 0.1282575936294877, "grad_norm": 0.81640625, "learning_rate": 0.00019600013462092624, "loss": 1.0326, "step": 4995 }, { "epoch": 0.12828327082540952, "grad_norm": 0.83203125, "learning_rate": 0.00019599888456939935, "loss": 1.1271, "step": 4996 }, { "epoch": 0.12830894802133133, "grad_norm": 0.96484375, "learning_rate": 0.0001959976343265554, "loss": 1.2464, "step": 4997 }, { "epoch": 0.12833462521725314, "grad_norm": 1.0703125, "learning_rate": 0.00019599638389239683, "loss": 1.0253, "step": 4998 }, { "epoch": 0.12836030241317498, "grad_norm": 0.89453125, "learning_rate": 0.00019599513326692614, "loss": 1.1934, "step": 4999 }, { "epoch": 0.12838597960909678, "grad_norm": 0.83203125, "learning_rate": 0.0001959938824501458, "loss": 0.9688, "step": 5000 }, { "epoch": 0.12838597960909678, "eval_loss": 1.1211611032485962, "eval_model_preparation_time": 0.0065, "eval_runtime": 407.3182, "eval_samples_per_second": 24.551, "eval_steps_per_second": 0.768, "step": 5000 }, { "epoch": 0.12841165680501862, "grad_norm": 0.8515625, "learning_rate": 0.00019599263144205834, "loss": 0.9984, "step": 5001 }, { "epoch": 0.12843733400094043, "grad_norm": 0.8515625, "learning_rate": 0.0001959913802426662, "loss": 1.1311, "step": 5002 }, { "epoch": 0.12846301119686224, "grad_norm": 0.89453125, "learning_rate": 0.00019599012885197198, "loss": 1.1383, "step": 5003 }, { "epoch": 0.12848868839278407, "grad_norm": 0.8828125, "learning_rate": 0.00019598887726997804, "loss": 1.1312, "step": 5004 }, { "epoch": 0.12851436558870588, "grad_norm": 0.8515625, "learning_rate": 0.00019598762549668695, "loss": 1.2542, "step": 5005 }, { "epoch": 0.12854004278462772, "grad_norm": 0.8203125, "learning_rate": 0.0001959863735321012, "loss": 0.9978, "step": 5006 }, { "epoch": 0.12856571998054953, "grad_norm": 0.84375, "learning_rate": 0.00019598512137622328, "loss": 1.1598, "step": 5007 }, { "epoch": 0.12859139717647133, "grad_norm": 0.8984375, "learning_rate": 0.0001959838690290557, "loss": 1.2139, "step": 5008 }, { "epoch": 0.12861707437239317, "grad_norm": 0.8046875, "learning_rate": 0.00019598261649060093, "loss": 1.2369, "step": 5009 }, { "epoch": 0.12864275156831498, "grad_norm": 0.80078125, "learning_rate": 0.00019598136376086146, "loss": 0.9782, "step": 5010 }, { "epoch": 0.12866842876423681, "grad_norm": 0.96875, "learning_rate": 0.0001959801108398398, "loss": 1.1326, "step": 5011 }, { "epoch": 0.12869410596015862, "grad_norm": 0.83984375, "learning_rate": 0.00019597885772753846, "loss": 1.1471, "step": 5012 }, { "epoch": 0.12871978315608043, "grad_norm": 0.83203125, "learning_rate": 0.00019597760442395995, "loss": 1.0486, "step": 5013 }, { "epoch": 0.12874546035200227, "grad_norm": 0.828125, "learning_rate": 0.0001959763509291067, "loss": 1.125, "step": 5014 }, { "epoch": 0.12877113754792407, "grad_norm": 0.79296875, "learning_rate": 0.00019597509724298128, "loss": 0.9806, "step": 5015 }, { "epoch": 0.1287968147438459, "grad_norm": 0.7734375, "learning_rate": 0.00019597384336558616, "loss": 0.896, "step": 5016 }, { "epoch": 0.12882249193976772, "grad_norm": 0.953125, "learning_rate": 0.00019597258929692383, "loss": 1.2608, "step": 5017 }, { "epoch": 0.12884816913568953, "grad_norm": 0.84765625, "learning_rate": 0.00019597133503699681, "loss": 1.266, "step": 5018 }, { "epoch": 0.12887384633161136, "grad_norm": 0.86328125, "learning_rate": 0.00019597008058580757, "loss": 1.0676, "step": 5019 }, { "epoch": 0.12889952352753317, "grad_norm": 0.8359375, "learning_rate": 0.00019596882594335863, "loss": 1.07, "step": 5020 }, { "epoch": 0.128925200723455, "grad_norm": 0.828125, "learning_rate": 0.0001959675711096525, "loss": 1.0196, "step": 5021 }, { "epoch": 0.12895087791937682, "grad_norm": 0.890625, "learning_rate": 0.00019596631608469168, "loss": 1.0639, "step": 5022 }, { "epoch": 0.12897655511529862, "grad_norm": 0.83984375, "learning_rate": 0.00019596506086847864, "loss": 1.173, "step": 5023 }, { "epoch": 0.12900223231122046, "grad_norm": 0.9921875, "learning_rate": 0.00019596380546101593, "loss": 1.1084, "step": 5024 }, { "epoch": 0.12902790950714227, "grad_norm": 0.796875, "learning_rate": 0.000195962549862306, "loss": 1.0465, "step": 5025 }, { "epoch": 0.1290535867030641, "grad_norm": 0.76953125, "learning_rate": 0.00019596129407235138, "loss": 1.247, "step": 5026 }, { "epoch": 0.1290792638989859, "grad_norm": 0.8671875, "learning_rate": 0.00019596003809115454, "loss": 1.2399, "step": 5027 }, { "epoch": 0.12910494109490772, "grad_norm": 0.8203125, "learning_rate": 0.00019595878191871804, "loss": 1.1822, "step": 5028 }, { "epoch": 0.12913061829082956, "grad_norm": 0.94140625, "learning_rate": 0.00019595752555504436, "loss": 1.1985, "step": 5029 }, { "epoch": 0.12915629548675137, "grad_norm": 0.84765625, "learning_rate": 0.00019595626900013603, "loss": 1.1947, "step": 5030 }, { "epoch": 0.1291819726826732, "grad_norm": 0.87890625, "learning_rate": 0.00019595501225399547, "loss": 1.0086, "step": 5031 }, { "epoch": 0.129207649878595, "grad_norm": 0.8125, "learning_rate": 0.00019595375531662526, "loss": 1.3009, "step": 5032 }, { "epoch": 0.12923332707451682, "grad_norm": 0.859375, "learning_rate": 0.00019595249818802788, "loss": 1.1247, "step": 5033 }, { "epoch": 0.12925900427043865, "grad_norm": 0.91796875, "learning_rate": 0.00019595124086820581, "loss": 1.2115, "step": 5034 }, { "epoch": 0.12928468146636046, "grad_norm": 0.83203125, "learning_rate": 0.00019594998335716163, "loss": 1.2046, "step": 5035 }, { "epoch": 0.1293103586622823, "grad_norm": 0.85546875, "learning_rate": 0.00019594872565489779, "loss": 1.2124, "step": 5036 }, { "epoch": 0.1293360358582041, "grad_norm": 0.83203125, "learning_rate": 0.0001959474677614168, "loss": 1.092, "step": 5037 }, { "epoch": 0.12936171305412592, "grad_norm": 0.8125, "learning_rate": 0.00019594620967672117, "loss": 1.127, "step": 5038 }, { "epoch": 0.12938739025004775, "grad_norm": 0.76171875, "learning_rate": 0.00019594495140081342, "loss": 1.0552, "step": 5039 }, { "epoch": 0.12941306744596956, "grad_norm": 0.89453125, "learning_rate": 0.00019594369293369606, "loss": 0.9447, "step": 5040 }, { "epoch": 0.1294387446418914, "grad_norm": 0.84765625, "learning_rate": 0.00019594243427537155, "loss": 1.1884, "step": 5041 }, { "epoch": 0.1294644218378132, "grad_norm": 0.8515625, "learning_rate": 0.00019594117542584246, "loss": 1.1712, "step": 5042 }, { "epoch": 0.129490099033735, "grad_norm": 0.81640625, "learning_rate": 0.00019593991638511128, "loss": 1.1149, "step": 5043 }, { "epoch": 0.12951577622965685, "grad_norm": 0.87890625, "learning_rate": 0.0001959386571531805, "loss": 1.2869, "step": 5044 }, { "epoch": 0.12954145342557866, "grad_norm": 0.84765625, "learning_rate": 0.00019593739773005262, "loss": 1.2338, "step": 5045 }, { "epoch": 0.1295671306215005, "grad_norm": 0.9921875, "learning_rate": 0.0001959361381157302, "loss": 1.0597, "step": 5046 }, { "epoch": 0.1295928078174223, "grad_norm": 0.8203125, "learning_rate": 0.00019593487831021572, "loss": 1.0304, "step": 5047 }, { "epoch": 0.1296184850133441, "grad_norm": 0.921875, "learning_rate": 0.0001959336183135117, "loss": 1.2443, "step": 5048 }, { "epoch": 0.12964416220926595, "grad_norm": 0.8125, "learning_rate": 0.00019593235812562064, "loss": 1.1094, "step": 5049 }, { "epoch": 0.12966983940518775, "grad_norm": 0.94921875, "learning_rate": 0.00019593109774654503, "loss": 1.2836, "step": 5050 }, { "epoch": 0.1296955166011096, "grad_norm": 0.88671875, "learning_rate": 0.00019592983717628746, "loss": 1.186, "step": 5051 }, { "epoch": 0.1297211937970314, "grad_norm": 0.80078125, "learning_rate": 0.00019592857641485037, "loss": 1.1957, "step": 5052 }, { "epoch": 0.1297468709929532, "grad_norm": 0.83203125, "learning_rate": 0.00019592731546223626, "loss": 1.0788, "step": 5053 }, { "epoch": 0.12977254818887504, "grad_norm": 0.75390625, "learning_rate": 0.0001959260543184477, "loss": 1.2097, "step": 5054 }, { "epoch": 0.12979822538479685, "grad_norm": 1.375, "learning_rate": 0.0001959247929834872, "loss": 1.0955, "step": 5055 }, { "epoch": 0.1298239025807187, "grad_norm": 0.83203125, "learning_rate": 0.0001959235314573572, "loss": 1.1163, "step": 5056 }, { "epoch": 0.1298495797766405, "grad_norm": 0.89453125, "learning_rate": 0.00019592226974006032, "loss": 1.2465, "step": 5057 }, { "epoch": 0.1298752569725623, "grad_norm": 0.859375, "learning_rate": 0.000195921007831599, "loss": 1.0545, "step": 5058 }, { "epoch": 0.12990093416848414, "grad_norm": 0.8671875, "learning_rate": 0.00019591974573197573, "loss": 1.1332, "step": 5059 }, { "epoch": 0.12992661136440595, "grad_norm": 1.0, "learning_rate": 0.00019591848344119311, "loss": 1.0457, "step": 5060 }, { "epoch": 0.12995228856032776, "grad_norm": 0.828125, "learning_rate": 0.00019591722095925365, "loss": 1.0803, "step": 5061 }, { "epoch": 0.1299779657562496, "grad_norm": 0.90234375, "learning_rate": 0.00019591595828615977, "loss": 1.0663, "step": 5062 }, { "epoch": 0.1300036429521714, "grad_norm": 0.86328125, "learning_rate": 0.00019591469542191411, "loss": 1.2551, "step": 5063 }, { "epoch": 0.13002932014809324, "grad_norm": 0.77734375, "learning_rate": 0.00019591343236651909, "loss": 1.0218, "step": 5064 }, { "epoch": 0.13005499734401504, "grad_norm": 0.828125, "learning_rate": 0.00019591216911997726, "loss": 1.1068, "step": 5065 }, { "epoch": 0.13008067453993685, "grad_norm": 0.80078125, "learning_rate": 0.00019591090568229117, "loss": 1.0894, "step": 5066 }, { "epoch": 0.1301063517358587, "grad_norm": 0.76171875, "learning_rate": 0.00019590964205346326, "loss": 1.0297, "step": 5067 }, { "epoch": 0.1301320289317805, "grad_norm": 0.828125, "learning_rate": 0.00019590837823349615, "loss": 1.0539, "step": 5068 }, { "epoch": 0.13015770612770233, "grad_norm": 0.875, "learning_rate": 0.00019590711422239228, "loss": 1.1416, "step": 5069 }, { "epoch": 0.13018338332362414, "grad_norm": 0.88671875, "learning_rate": 0.00019590585002015421, "loss": 1.1291, "step": 5070 }, { "epoch": 0.13020906051954595, "grad_norm": 0.859375, "learning_rate": 0.00019590458562678445, "loss": 1.2295, "step": 5071 }, { "epoch": 0.13023473771546779, "grad_norm": 0.90625, "learning_rate": 0.00019590332104228546, "loss": 1.1084, "step": 5072 }, { "epoch": 0.1302604149113896, "grad_norm": 0.796875, "learning_rate": 0.00019590205626665984, "loss": 1.2252, "step": 5073 }, { "epoch": 0.13028609210731143, "grad_norm": 0.8671875, "learning_rate": 0.0001959007912999101, "loss": 1.1078, "step": 5074 }, { "epoch": 0.13031176930323324, "grad_norm": 0.79296875, "learning_rate": 0.00019589952614203874, "loss": 1.1248, "step": 5075 }, { "epoch": 0.13033744649915505, "grad_norm": 0.86328125, "learning_rate": 0.0001958982607930483, "loss": 1.039, "step": 5076 }, { "epoch": 0.13036312369507688, "grad_norm": 0.90234375, "learning_rate": 0.00019589699525294127, "loss": 1.1286, "step": 5077 }, { "epoch": 0.1303888008909987, "grad_norm": 0.84765625, "learning_rate": 0.0001958957295217202, "loss": 1.1536, "step": 5078 }, { "epoch": 0.13041447808692053, "grad_norm": 0.875, "learning_rate": 0.00019589446359938762, "loss": 1.2547, "step": 5079 }, { "epoch": 0.13044015528284233, "grad_norm": 0.765625, "learning_rate": 0.00019589319748594602, "loss": 1.1203, "step": 5080 }, { "epoch": 0.13046583247876414, "grad_norm": 0.90234375, "learning_rate": 0.0001958919311813979, "loss": 1.0358, "step": 5081 }, { "epoch": 0.13049150967468598, "grad_norm": 0.87109375, "learning_rate": 0.00019589066468574587, "loss": 1.2583, "step": 5082 }, { "epoch": 0.1305171868706078, "grad_norm": 0.875, "learning_rate": 0.00019588939799899239, "loss": 1.1808, "step": 5083 }, { "epoch": 0.13054286406652962, "grad_norm": 0.8203125, "learning_rate": 0.00019588813112114, "loss": 1.0646, "step": 5084 }, { "epoch": 0.13056854126245143, "grad_norm": 0.8125, "learning_rate": 0.00019588686405219122, "loss": 1.1804, "step": 5085 }, { "epoch": 0.13059421845837324, "grad_norm": 0.85546875, "learning_rate": 0.00019588559679214863, "loss": 1.0969, "step": 5086 }, { "epoch": 0.13061989565429508, "grad_norm": 0.80859375, "learning_rate": 0.00019588432934101465, "loss": 1.0216, "step": 5087 }, { "epoch": 0.13064557285021688, "grad_norm": 0.82421875, "learning_rate": 0.0001958830616987919, "loss": 0.9896, "step": 5088 }, { "epoch": 0.13067125004613872, "grad_norm": 0.9296875, "learning_rate": 0.00019588179386548284, "loss": 1.0272, "step": 5089 }, { "epoch": 0.13069692724206053, "grad_norm": 0.80859375, "learning_rate": 0.00019588052584109005, "loss": 1.0275, "step": 5090 }, { "epoch": 0.13072260443798234, "grad_norm": 0.80078125, "learning_rate": 0.000195879257625616, "loss": 1.1623, "step": 5091 }, { "epoch": 0.13074828163390417, "grad_norm": 0.875, "learning_rate": 0.00019587798921906328, "loss": 1.2704, "step": 5092 }, { "epoch": 0.13077395882982598, "grad_norm": 0.80859375, "learning_rate": 0.00019587672062143437, "loss": 1.13, "step": 5093 }, { "epoch": 0.13079963602574782, "grad_norm": 0.78125, "learning_rate": 0.00019587545183273185, "loss": 1.0334, "step": 5094 }, { "epoch": 0.13082531322166963, "grad_norm": 0.828125, "learning_rate": 0.0001958741828529582, "loss": 1.0931, "step": 5095 }, { "epoch": 0.13085099041759143, "grad_norm": 0.80078125, "learning_rate": 0.00019587291368211593, "loss": 1.1827, "step": 5096 }, { "epoch": 0.13087666761351327, "grad_norm": 0.85546875, "learning_rate": 0.00019587164432020764, "loss": 1.1037, "step": 5097 }, { "epoch": 0.13090234480943508, "grad_norm": 0.86328125, "learning_rate": 0.00019587037476723583, "loss": 1.1701, "step": 5098 }, { "epoch": 0.13092802200535691, "grad_norm": 0.83984375, "learning_rate": 0.00019586910502320296, "loss": 1.15, "step": 5099 }, { "epoch": 0.13095369920127872, "grad_norm": 0.83984375, "learning_rate": 0.0001958678350881117, "loss": 1.0661, "step": 5100 }, { "epoch": 0.13097937639720053, "grad_norm": 0.9140625, "learning_rate": 0.00019586656496196447, "loss": 1.1498, "step": 5101 }, { "epoch": 0.13100505359312237, "grad_norm": 0.9375, "learning_rate": 0.00019586529464476384, "loss": 1.0938, "step": 5102 }, { "epoch": 0.13103073078904418, "grad_norm": 2.125, "learning_rate": 0.00019586402413651234, "loss": 1.1892, "step": 5103 }, { "epoch": 0.131056407984966, "grad_norm": 1.0859375, "learning_rate": 0.00019586275343721248, "loss": 1.0318, "step": 5104 }, { "epoch": 0.13108208518088782, "grad_norm": 0.859375, "learning_rate": 0.00019586148254686683, "loss": 1.1539, "step": 5105 }, { "epoch": 0.13110776237680963, "grad_norm": 1.2109375, "learning_rate": 0.0001958602114654779, "loss": 1.0735, "step": 5106 }, { "epoch": 0.13113343957273146, "grad_norm": 0.8828125, "learning_rate": 0.00019585894019304823, "loss": 0.9837, "step": 5107 }, { "epoch": 0.13115911676865327, "grad_norm": 0.7890625, "learning_rate": 0.00019585766872958033, "loss": 1.0502, "step": 5108 }, { "epoch": 0.1311847939645751, "grad_norm": 0.8359375, "learning_rate": 0.0001958563970750768, "loss": 0.9333, "step": 5109 }, { "epoch": 0.13121047116049692, "grad_norm": 0.84375, "learning_rate": 0.0001958551252295401, "loss": 1.0465, "step": 5110 }, { "epoch": 0.13123614835641872, "grad_norm": 1.0703125, "learning_rate": 0.00019585385319297277, "loss": 0.9225, "step": 5111 }, { "epoch": 0.13126182555234056, "grad_norm": 0.890625, "learning_rate": 0.00019585258096537742, "loss": 1.1357, "step": 5112 }, { "epoch": 0.13128750274826237, "grad_norm": 0.92578125, "learning_rate": 0.0001958513085467565, "loss": 1.1693, "step": 5113 }, { "epoch": 0.1313131799441842, "grad_norm": 0.921875, "learning_rate": 0.0001958500359371126, "loss": 1.1308, "step": 5114 }, { "epoch": 0.131338857140106, "grad_norm": 0.87109375, "learning_rate": 0.00019584876313644823, "loss": 1.1126, "step": 5115 }, { "epoch": 0.13136453433602782, "grad_norm": 0.81640625, "learning_rate": 0.00019584749014476592, "loss": 1.1646, "step": 5116 }, { "epoch": 0.13139021153194966, "grad_norm": 7.5625, "learning_rate": 0.00019584621696206825, "loss": 1.0343, "step": 5117 }, { "epoch": 0.13141588872787147, "grad_norm": 0.81640625, "learning_rate": 0.00019584494358835768, "loss": 1.0828, "step": 5118 }, { "epoch": 0.1314415659237933, "grad_norm": 0.8359375, "learning_rate": 0.00019584367002363685, "loss": 1.185, "step": 5119 }, { "epoch": 0.1314672431197151, "grad_norm": 0.82421875, "learning_rate": 0.00019584239626790822, "loss": 1.18, "step": 5120 }, { "epoch": 0.13149292031563692, "grad_norm": 0.9375, "learning_rate": 0.00019584112232117433, "loss": 0.9678, "step": 5121 }, { "epoch": 0.13151859751155875, "grad_norm": 1.109375, "learning_rate": 0.00019583984818343777, "loss": 1.1613, "step": 5122 }, { "epoch": 0.13154427470748056, "grad_norm": 1.0390625, "learning_rate": 0.000195838573854701, "loss": 1.1942, "step": 5123 }, { "epoch": 0.1315699519034024, "grad_norm": 0.8359375, "learning_rate": 0.00019583729933496667, "loss": 1.2156, "step": 5124 }, { "epoch": 0.1315956290993242, "grad_norm": 0.765625, "learning_rate": 0.00019583602462423723, "loss": 1.2716, "step": 5125 }, { "epoch": 0.13162130629524602, "grad_norm": 0.83984375, "learning_rate": 0.00019583474972251525, "loss": 1.1275, "step": 5126 }, { "epoch": 0.13164698349116785, "grad_norm": 0.8203125, "learning_rate": 0.0001958334746298033, "loss": 1.1052, "step": 5127 }, { "epoch": 0.13167266068708966, "grad_norm": 0.90625, "learning_rate": 0.00019583219934610386, "loss": 1.1944, "step": 5128 }, { "epoch": 0.1316983378830115, "grad_norm": 0.90234375, "learning_rate": 0.0001958309238714195, "loss": 1.1052, "step": 5129 }, { "epoch": 0.1317240150789333, "grad_norm": 0.87109375, "learning_rate": 0.0001958296482057528, "loss": 1.1581, "step": 5130 }, { "epoch": 0.1317496922748551, "grad_norm": 0.796875, "learning_rate": 0.00019582837234910624, "loss": 1.2337, "step": 5131 }, { "epoch": 0.13177536947077695, "grad_norm": 0.95703125, "learning_rate": 0.00019582709630148237, "loss": 1.1687, "step": 5132 }, { "epoch": 0.13180104666669876, "grad_norm": 0.84765625, "learning_rate": 0.00019582582006288379, "loss": 1.147, "step": 5133 }, { "epoch": 0.1318267238626206, "grad_norm": 0.96484375, "learning_rate": 0.000195824543633313, "loss": 1.1434, "step": 5134 }, { "epoch": 0.1318524010585424, "grad_norm": 0.8203125, "learning_rate": 0.00019582326701277254, "loss": 1.0363, "step": 5135 }, { "epoch": 0.1318780782544642, "grad_norm": 0.83984375, "learning_rate": 0.00019582199020126497, "loss": 1.175, "step": 5136 }, { "epoch": 0.13190375545038605, "grad_norm": 0.81640625, "learning_rate": 0.00019582071319879283, "loss": 1.1337, "step": 5137 }, { "epoch": 0.13192943264630785, "grad_norm": 0.8046875, "learning_rate": 0.00019581943600535865, "loss": 1.2228, "step": 5138 }, { "epoch": 0.1319551098422297, "grad_norm": 0.81640625, "learning_rate": 0.000195818158620965, "loss": 1.0055, "step": 5139 }, { "epoch": 0.1319807870381515, "grad_norm": 0.8125, "learning_rate": 0.00019581688104561442, "loss": 1.0445, "step": 5140 }, { "epoch": 0.1320064642340733, "grad_norm": 0.86328125, "learning_rate": 0.00019581560327930946, "loss": 1.1624, "step": 5141 }, { "epoch": 0.13203214142999514, "grad_norm": 0.87109375, "learning_rate": 0.00019581432532205263, "loss": 1.2063, "step": 5142 }, { "epoch": 0.13205781862591695, "grad_norm": 0.890625, "learning_rate": 0.00019581304717384654, "loss": 1.1939, "step": 5143 }, { "epoch": 0.1320834958218388, "grad_norm": 0.8046875, "learning_rate": 0.00019581176883469366, "loss": 0.9114, "step": 5144 }, { "epoch": 0.1321091730177606, "grad_norm": 0.90625, "learning_rate": 0.00019581049030459663, "loss": 1.1243, "step": 5145 }, { "epoch": 0.1321348502136824, "grad_norm": 0.9453125, "learning_rate": 0.0001958092115835579, "loss": 1.1779, "step": 5146 }, { "epoch": 0.13216052740960424, "grad_norm": 0.984375, "learning_rate": 0.0001958079326715801, "loss": 1.084, "step": 5147 }, { "epoch": 0.13218620460552605, "grad_norm": 0.8515625, "learning_rate": 0.0001958066535686657, "loss": 1.1193, "step": 5148 }, { "epoch": 0.13221188180144788, "grad_norm": 0.8359375, "learning_rate": 0.00019580537427481734, "loss": 1.1418, "step": 5149 }, { "epoch": 0.1322375589973697, "grad_norm": 0.87890625, "learning_rate": 0.0001958040947900375, "loss": 1.0332, "step": 5150 }, { "epoch": 0.1322632361932915, "grad_norm": 0.88671875, "learning_rate": 0.00019580281511432876, "loss": 1.0631, "step": 5151 }, { "epoch": 0.13228891338921334, "grad_norm": 0.84765625, "learning_rate": 0.00019580153524769367, "loss": 1.2038, "step": 5152 }, { "epoch": 0.13231459058513514, "grad_norm": 0.8125, "learning_rate": 0.00019580025519013475, "loss": 0.9595, "step": 5153 }, { "epoch": 0.13234026778105698, "grad_norm": 0.91015625, "learning_rate": 0.0001957989749416546, "loss": 1.1635, "step": 5154 }, { "epoch": 0.1323659449769788, "grad_norm": 0.890625, "learning_rate": 0.00019579769450225572, "loss": 1.2595, "step": 5155 }, { "epoch": 0.1323916221729006, "grad_norm": 0.76953125, "learning_rate": 0.0001957964138719407, "loss": 1.1027, "step": 5156 }, { "epoch": 0.13241729936882243, "grad_norm": 0.82421875, "learning_rate": 0.0001957951330507121, "loss": 1.0621, "step": 5157 }, { "epoch": 0.13244297656474424, "grad_norm": 0.92578125, "learning_rate": 0.00019579385203857244, "loss": 1.1161, "step": 5158 }, { "epoch": 0.13246865376066608, "grad_norm": 0.91015625, "learning_rate": 0.0001957925708355243, "loss": 1.2299, "step": 5159 }, { "epoch": 0.13249433095658789, "grad_norm": 0.96484375, "learning_rate": 0.0001957912894415702, "loss": 1.1955, "step": 5160 }, { "epoch": 0.1325200081525097, "grad_norm": 0.8828125, "learning_rate": 0.0001957900078567127, "loss": 1.0943, "step": 5161 }, { "epoch": 0.13254568534843153, "grad_norm": 0.91796875, "learning_rate": 0.00019578872608095438, "loss": 1.2024, "step": 5162 }, { "epoch": 0.13257136254435334, "grad_norm": 0.84765625, "learning_rate": 0.0001957874441142978, "loss": 0.9645, "step": 5163 }, { "epoch": 0.13259703974027517, "grad_norm": 0.89453125, "learning_rate": 0.00019578616195674547, "loss": 1.0736, "step": 5164 }, { "epoch": 0.13262271693619698, "grad_norm": 0.859375, "learning_rate": 0.0001957848796083, "loss": 1.1656, "step": 5165 }, { "epoch": 0.1326483941321188, "grad_norm": 0.86328125, "learning_rate": 0.00019578359706896387, "loss": 1.2073, "step": 5166 }, { "epoch": 0.13267407132804063, "grad_norm": 1.0078125, "learning_rate": 0.00019578231433873972, "loss": 0.9344, "step": 5167 }, { "epoch": 0.13269974852396244, "grad_norm": 0.875, "learning_rate": 0.0001957810314176301, "loss": 1.1713, "step": 5168 }, { "epoch": 0.13272542571988427, "grad_norm": 0.86328125, "learning_rate": 0.0001957797483056375, "loss": 1.0037, "step": 5169 }, { "epoch": 0.13275110291580608, "grad_norm": 0.87109375, "learning_rate": 0.00019577846500276448, "loss": 1.1763, "step": 5170 }, { "epoch": 0.1327767801117279, "grad_norm": 0.90625, "learning_rate": 0.00019577718150901366, "loss": 1.026, "step": 5171 }, { "epoch": 0.13280245730764972, "grad_norm": 0.83984375, "learning_rate": 0.0001957758978243876, "loss": 1.0096, "step": 5172 }, { "epoch": 0.13282813450357153, "grad_norm": 0.85546875, "learning_rate": 0.00019577461394888878, "loss": 1.1129, "step": 5173 }, { "epoch": 0.13285381169949337, "grad_norm": 0.85546875, "learning_rate": 0.00019577332988251982, "loss": 1.2449, "step": 5174 }, { "epoch": 0.13287948889541518, "grad_norm": 0.78125, "learning_rate": 0.00019577204562528326, "loss": 1.0788, "step": 5175 }, { "epoch": 0.13290516609133698, "grad_norm": 0.8125, "learning_rate": 0.0001957707611771817, "loss": 1.1226, "step": 5176 }, { "epoch": 0.13293084328725882, "grad_norm": 1.0546875, "learning_rate": 0.00019576947653821762, "loss": 1.1135, "step": 5177 }, { "epoch": 0.13295652048318063, "grad_norm": 0.890625, "learning_rate": 0.00019576819170839365, "loss": 1.0929, "step": 5178 }, { "epoch": 0.13298219767910247, "grad_norm": 0.859375, "learning_rate": 0.00019576690668771232, "loss": 1.0637, "step": 5179 }, { "epoch": 0.13300787487502427, "grad_norm": 0.8125, "learning_rate": 0.0001957656214761762, "loss": 1.1208, "step": 5180 }, { "epoch": 0.13303355207094608, "grad_norm": 0.875, "learning_rate": 0.00019576433607378782, "loss": 1.1096, "step": 5181 }, { "epoch": 0.13305922926686792, "grad_norm": 0.85546875, "learning_rate": 0.00019576305048054978, "loss": 1.1014, "step": 5182 }, { "epoch": 0.13308490646278973, "grad_norm": 0.8515625, "learning_rate": 0.00019576176469646462, "loss": 1.0457, "step": 5183 }, { "epoch": 0.13311058365871156, "grad_norm": 0.91796875, "learning_rate": 0.00019576047872153494, "loss": 1.2353, "step": 5184 }, { "epoch": 0.13313626085463337, "grad_norm": 0.97265625, "learning_rate": 0.00019575919255576327, "loss": 1.0463, "step": 5185 }, { "epoch": 0.13316193805055518, "grad_norm": 0.8359375, "learning_rate": 0.00019575790619915217, "loss": 1.0417, "step": 5186 }, { "epoch": 0.13318761524647701, "grad_norm": 0.96875, "learning_rate": 0.00019575661965170422, "loss": 1.0662, "step": 5187 }, { "epoch": 0.13321329244239882, "grad_norm": 1.0546875, "learning_rate": 0.000195755332913422, "loss": 1.2509, "step": 5188 }, { "epoch": 0.13323896963832066, "grad_norm": 0.84765625, "learning_rate": 0.00019575404598430802, "loss": 1.0449, "step": 5189 }, { "epoch": 0.13326464683424247, "grad_norm": 1.46875, "learning_rate": 0.0001957527588643649, "loss": 1.0214, "step": 5190 }, { "epoch": 0.13329032403016428, "grad_norm": 1.0078125, "learning_rate": 0.00019575147155359514, "loss": 1.148, "step": 5191 }, { "epoch": 0.1333160012260861, "grad_norm": 0.84765625, "learning_rate": 0.00019575018405200138, "loss": 1.0627, "step": 5192 }, { "epoch": 0.13334167842200792, "grad_norm": 0.90234375, "learning_rate": 0.00019574889635958612, "loss": 1.2881, "step": 5193 }, { "epoch": 0.13336735561792976, "grad_norm": 0.82421875, "learning_rate": 0.000195747608476352, "loss": 1.1318, "step": 5194 }, { "epoch": 0.13339303281385156, "grad_norm": 0.87109375, "learning_rate": 0.0001957463204023015, "loss": 1.1032, "step": 5195 }, { "epoch": 0.13341871000977337, "grad_norm": 0.8515625, "learning_rate": 0.00019574503213743726, "loss": 1.0418, "step": 5196 }, { "epoch": 0.1334443872056952, "grad_norm": 0.90234375, "learning_rate": 0.00019574374368176183, "loss": 0.9743, "step": 5197 }, { "epoch": 0.13347006440161702, "grad_norm": 0.8828125, "learning_rate": 0.00019574245503527777, "loss": 0.9677, "step": 5198 }, { "epoch": 0.13349574159753885, "grad_norm": 0.87109375, "learning_rate": 0.00019574116619798762, "loss": 1.1408, "step": 5199 }, { "epoch": 0.13352141879346066, "grad_norm": 0.875, "learning_rate": 0.000195739877169894, "loss": 1.3369, "step": 5200 }, { "epoch": 0.13354709598938247, "grad_norm": 0.84765625, "learning_rate": 0.00019573858795099943, "loss": 1.1653, "step": 5201 }, { "epoch": 0.1335727731853043, "grad_norm": 0.78515625, "learning_rate": 0.0001957372985413065, "loss": 1.0455, "step": 5202 }, { "epoch": 0.1335984503812261, "grad_norm": 0.859375, "learning_rate": 0.0001957360089408178, "loss": 1.0755, "step": 5203 }, { "epoch": 0.13362412757714795, "grad_norm": 0.8046875, "learning_rate": 0.00019573471914953588, "loss": 0.993, "step": 5204 }, { "epoch": 0.13364980477306976, "grad_norm": 0.80078125, "learning_rate": 0.00019573342916746328, "loss": 1.1053, "step": 5205 }, { "epoch": 0.13367548196899157, "grad_norm": 0.83203125, "learning_rate": 0.00019573213899460264, "loss": 1.149, "step": 5206 }, { "epoch": 0.1337011591649134, "grad_norm": 0.7734375, "learning_rate": 0.00019573084863095647, "loss": 1.117, "step": 5207 }, { "epoch": 0.1337268363608352, "grad_norm": 0.84375, "learning_rate": 0.0001957295580765274, "loss": 1.1055, "step": 5208 }, { "epoch": 0.13375251355675705, "grad_norm": 0.84375, "learning_rate": 0.00019572826733131794, "loss": 1.2711, "step": 5209 }, { "epoch": 0.13377819075267885, "grad_norm": 0.93359375, "learning_rate": 0.00019572697639533068, "loss": 1.1236, "step": 5210 }, { "epoch": 0.13380386794860066, "grad_norm": 0.8515625, "learning_rate": 0.00019572568526856824, "loss": 1.1375, "step": 5211 }, { "epoch": 0.1338295451445225, "grad_norm": 0.89453125, "learning_rate": 0.00019572439395103313, "loss": 1.0199, "step": 5212 }, { "epoch": 0.1338552223404443, "grad_norm": 0.78515625, "learning_rate": 0.00019572310244272796, "loss": 1.0353, "step": 5213 }, { "epoch": 0.13388089953636614, "grad_norm": 0.8671875, "learning_rate": 0.00019572181074365528, "loss": 1.0148, "step": 5214 }, { "epoch": 0.13390657673228795, "grad_norm": 0.9296875, "learning_rate": 0.0001957205188538177, "loss": 1.1784, "step": 5215 }, { "epoch": 0.13393225392820976, "grad_norm": 0.875, "learning_rate": 0.00019571922677321776, "loss": 1.1138, "step": 5216 }, { "epoch": 0.1339579311241316, "grad_norm": 0.85546875, "learning_rate": 0.00019571793450185804, "loss": 1.1439, "step": 5217 }, { "epoch": 0.1339836083200534, "grad_norm": 0.81640625, "learning_rate": 0.0001957166420397411, "loss": 1.1541, "step": 5218 }, { "epoch": 0.13400928551597524, "grad_norm": 0.85546875, "learning_rate": 0.0001957153493868696, "loss": 1.1751, "step": 5219 }, { "epoch": 0.13403496271189705, "grad_norm": 0.83203125, "learning_rate": 0.00019571405654324601, "loss": 1.0572, "step": 5220 }, { "epoch": 0.13406063990781886, "grad_norm": 0.828125, "learning_rate": 0.00019571276350887295, "loss": 1.1265, "step": 5221 }, { "epoch": 0.1340863171037407, "grad_norm": 0.765625, "learning_rate": 0.00019571147028375302, "loss": 1.1137, "step": 5222 }, { "epoch": 0.1341119942996625, "grad_norm": 0.8671875, "learning_rate": 0.00019571017686788878, "loss": 1.0535, "step": 5223 }, { "epoch": 0.13413767149558434, "grad_norm": 0.87890625, "learning_rate": 0.00019570888326128278, "loss": 1.1433, "step": 5224 }, { "epoch": 0.13416334869150615, "grad_norm": 0.8359375, "learning_rate": 0.00019570758946393764, "loss": 1.1044, "step": 5225 }, { "epoch": 0.13418902588742795, "grad_norm": 0.859375, "learning_rate": 0.0001957062954758559, "loss": 1.1386, "step": 5226 }, { "epoch": 0.1342147030833498, "grad_norm": 0.8671875, "learning_rate": 0.0001957050012970402, "loss": 1.1625, "step": 5227 }, { "epoch": 0.1342403802792716, "grad_norm": 0.8203125, "learning_rate": 0.00019570370692749303, "loss": 1.2971, "step": 5228 }, { "epoch": 0.13426605747519343, "grad_norm": 0.8515625, "learning_rate": 0.00019570241236721702, "loss": 1.0787, "step": 5229 }, { "epoch": 0.13429173467111524, "grad_norm": 0.80078125, "learning_rate": 0.00019570111761621476, "loss": 1.1955, "step": 5230 }, { "epoch": 0.13431741186703705, "grad_norm": 0.85546875, "learning_rate": 0.00019569982267448883, "loss": 1.0607, "step": 5231 }, { "epoch": 0.1343430890629589, "grad_norm": 0.83984375, "learning_rate": 0.00019569852754204177, "loss": 1.0817, "step": 5232 }, { "epoch": 0.1343687662588807, "grad_norm": 0.84765625, "learning_rate": 0.0001956972322188762, "loss": 1.0206, "step": 5233 }, { "epoch": 0.13439444345480253, "grad_norm": 0.82421875, "learning_rate": 0.0001956959367049947, "loss": 1.0286, "step": 5234 }, { "epoch": 0.13442012065072434, "grad_norm": 0.8359375, "learning_rate": 0.00019569464100039984, "loss": 1.0387, "step": 5235 }, { "epoch": 0.13444579784664615, "grad_norm": 0.875, "learning_rate": 0.0001956933451050942, "loss": 1.1679, "step": 5236 }, { "epoch": 0.13447147504256798, "grad_norm": 0.8984375, "learning_rate": 0.00019569204901908038, "loss": 1.1502, "step": 5237 }, { "epoch": 0.1344971522384898, "grad_norm": 0.83203125, "learning_rate": 0.00019569075274236095, "loss": 1.2262, "step": 5238 }, { "epoch": 0.13452282943441163, "grad_norm": 0.84765625, "learning_rate": 0.00019568945627493846, "loss": 1.141, "step": 5239 }, { "epoch": 0.13454850663033344, "grad_norm": 0.7734375, "learning_rate": 0.00019568815961681555, "loss": 0.9807, "step": 5240 }, { "epoch": 0.13457418382625524, "grad_norm": 0.75, "learning_rate": 0.00019568686276799478, "loss": 0.9869, "step": 5241 }, { "epoch": 0.13459986102217708, "grad_norm": 0.90234375, "learning_rate": 0.00019568556572847876, "loss": 1.3204, "step": 5242 }, { "epoch": 0.1346255382180989, "grad_norm": 0.859375, "learning_rate": 0.00019568426849826998, "loss": 1.1125, "step": 5243 }, { "epoch": 0.13465121541402073, "grad_norm": 0.80859375, "learning_rate": 0.00019568297107737119, "loss": 1.0567, "step": 5244 }, { "epoch": 0.13467689260994253, "grad_norm": 1.0703125, "learning_rate": 0.00019568167346578483, "loss": 1.1174, "step": 5245 }, { "epoch": 0.13470256980586434, "grad_norm": 0.83984375, "learning_rate": 0.00019568037566351353, "loss": 1.1333, "step": 5246 }, { "epoch": 0.13472824700178618, "grad_norm": 0.796875, "learning_rate": 0.00019567907767055988, "loss": 0.9702, "step": 5247 }, { "epoch": 0.13475392419770799, "grad_norm": 0.90625, "learning_rate": 0.0001956777794869265, "loss": 1.1605, "step": 5248 }, { "epoch": 0.13477960139362982, "grad_norm": 0.86328125, "learning_rate": 0.00019567648111261593, "loss": 1.2818, "step": 5249 }, { "epoch": 0.13480527858955163, "grad_norm": 0.80078125, "learning_rate": 0.0001956751825476308, "loss": 1.1231, "step": 5250 }, { "epoch": 0.13483095578547344, "grad_norm": 0.88671875, "learning_rate": 0.00019567388379197365, "loss": 1.1095, "step": 5251 }, { "epoch": 0.13485663298139527, "grad_norm": 0.76953125, "learning_rate": 0.0001956725848456471, "loss": 1.0195, "step": 5252 }, { "epoch": 0.13488231017731708, "grad_norm": 0.83203125, "learning_rate": 0.00019567128570865372, "loss": 1.1228, "step": 5253 }, { "epoch": 0.13490798737323892, "grad_norm": 0.828125, "learning_rate": 0.00019566998638099613, "loss": 1.2502, "step": 5254 }, { "epoch": 0.13493366456916073, "grad_norm": 0.8359375, "learning_rate": 0.00019566868686267687, "loss": 1.1333, "step": 5255 }, { "epoch": 0.13495934176508254, "grad_norm": 0.8515625, "learning_rate": 0.00019566738715369857, "loss": 1.1295, "step": 5256 }, { "epoch": 0.13498501896100437, "grad_norm": 0.8515625, "learning_rate": 0.00019566608725406383, "loss": 1.1675, "step": 5257 }, { "epoch": 0.13501069615692618, "grad_norm": 0.859375, "learning_rate": 0.0001956647871637752, "loss": 1.2221, "step": 5258 }, { "epoch": 0.13503637335284802, "grad_norm": 0.875, "learning_rate": 0.0001956634868828353, "loss": 1.16, "step": 5259 }, { "epoch": 0.13506205054876982, "grad_norm": 0.80078125, "learning_rate": 0.0001956621864112467, "loss": 1.0909, "step": 5260 }, { "epoch": 0.13508772774469163, "grad_norm": 0.92578125, "learning_rate": 0.000195660885749012, "loss": 1.2334, "step": 5261 }, { "epoch": 0.13511340494061347, "grad_norm": 0.83984375, "learning_rate": 0.0001956595848961338, "loss": 1.0181, "step": 5262 }, { "epoch": 0.13513908213653528, "grad_norm": 0.8359375, "learning_rate": 0.00019565828385261467, "loss": 1.0997, "step": 5263 }, { "epoch": 0.1351647593324571, "grad_norm": 0.84375, "learning_rate": 0.00019565698261845728, "loss": 1.0426, "step": 5264 }, { "epoch": 0.13519043652837892, "grad_norm": 0.8125, "learning_rate": 0.00019565568119366412, "loss": 1.0119, "step": 5265 }, { "epoch": 0.13521611372430073, "grad_norm": 0.890625, "learning_rate": 0.0001956543795782378, "loss": 1.1663, "step": 5266 }, { "epoch": 0.13524179092022257, "grad_norm": 1.0625, "learning_rate": 0.000195653077772181, "loss": 1.0827, "step": 5267 }, { "epoch": 0.13526746811614437, "grad_norm": 0.85546875, "learning_rate": 0.00019565177577549625, "loss": 1.0295, "step": 5268 }, { "epoch": 0.13529314531206618, "grad_norm": 2.203125, "learning_rate": 0.00019565047358818612, "loss": 1.0522, "step": 5269 }, { "epoch": 0.13531882250798802, "grad_norm": 0.8359375, "learning_rate": 0.00019564917121025327, "loss": 1.1734, "step": 5270 }, { "epoch": 0.13534449970390983, "grad_norm": 0.83203125, "learning_rate": 0.00019564786864170023, "loss": 1.1503, "step": 5271 }, { "epoch": 0.13537017689983166, "grad_norm": 0.80859375, "learning_rate": 0.00019564656588252964, "loss": 0.9812, "step": 5272 }, { "epoch": 0.13539585409575347, "grad_norm": 0.78515625, "learning_rate": 0.0001956452629327441, "loss": 1.0963, "step": 5273 }, { "epoch": 0.13542153129167528, "grad_norm": 0.765625, "learning_rate": 0.00019564395979234614, "loss": 0.9288, "step": 5274 }, { "epoch": 0.13544720848759711, "grad_norm": 0.82421875, "learning_rate": 0.0001956426564613385, "loss": 0.9904, "step": 5275 }, { "epoch": 0.13547288568351892, "grad_norm": 0.76171875, "learning_rate": 0.0001956413529397236, "loss": 1.0333, "step": 5276 }, { "epoch": 0.13549856287944076, "grad_norm": 0.8671875, "learning_rate": 0.00019564004922750417, "loss": 1.1128, "step": 5277 }, { "epoch": 0.13552424007536257, "grad_norm": 0.79296875, "learning_rate": 0.0001956387453246827, "loss": 1.0214, "step": 5278 }, { "epoch": 0.13554991727128438, "grad_norm": 0.859375, "learning_rate": 0.00019563744123126192, "loss": 1.1385, "step": 5279 }, { "epoch": 0.1355755944672062, "grad_norm": 0.95703125, "learning_rate": 0.00019563613694724432, "loss": 0.9838, "step": 5280 }, { "epoch": 0.13560127166312802, "grad_norm": 0.828125, "learning_rate": 0.00019563483247263258, "loss": 1.014, "step": 5281 }, { "epoch": 0.13562694885904986, "grad_norm": 0.91796875, "learning_rate": 0.00019563352780742918, "loss": 1.206, "step": 5282 }, { "epoch": 0.13565262605497166, "grad_norm": 0.79296875, "learning_rate": 0.0001956322229516369, "loss": 0.98, "step": 5283 }, { "epoch": 0.13567830325089347, "grad_norm": 0.84765625, "learning_rate": 0.00019563091790525815, "loss": 1.1209, "step": 5284 }, { "epoch": 0.1357039804468153, "grad_norm": 0.8203125, "learning_rate": 0.00019562961266829566, "loss": 1.2444, "step": 5285 }, { "epoch": 0.13572965764273712, "grad_norm": 0.84765625, "learning_rate": 0.00019562830724075196, "loss": 1.1827, "step": 5286 }, { "epoch": 0.13575533483865895, "grad_norm": 0.76953125, "learning_rate": 0.0001956270016226297, "loss": 1.0943, "step": 5287 }, { "epoch": 0.13578101203458076, "grad_norm": 0.9140625, "learning_rate": 0.00019562569581393148, "loss": 1.0363, "step": 5288 }, { "epoch": 0.13580668923050257, "grad_norm": 0.90625, "learning_rate": 0.00019562438981465986, "loss": 1.2217, "step": 5289 }, { "epoch": 0.1358323664264244, "grad_norm": 0.86328125, "learning_rate": 0.0001956230836248175, "loss": 1.005, "step": 5290 }, { "epoch": 0.13585804362234621, "grad_norm": 0.921875, "learning_rate": 0.00019562177724440693, "loss": 1.2815, "step": 5291 }, { "epoch": 0.13588372081826805, "grad_norm": 0.81640625, "learning_rate": 0.00019562047067343081, "loss": 1.0745, "step": 5292 }, { "epoch": 0.13590939801418986, "grad_norm": 0.88671875, "learning_rate": 0.00019561916391189175, "loss": 1.1021, "step": 5293 }, { "epoch": 0.13593507521011167, "grad_norm": 0.875, "learning_rate": 0.0001956178569597923, "loss": 1.2195, "step": 5294 }, { "epoch": 0.1359607524060335, "grad_norm": 0.82421875, "learning_rate": 0.00019561654981713512, "loss": 1.1256, "step": 5295 }, { "epoch": 0.1359864296019553, "grad_norm": 0.79296875, "learning_rate": 0.0001956152424839228, "loss": 1.0482, "step": 5296 }, { "epoch": 0.13601210679787715, "grad_norm": 0.94140625, "learning_rate": 0.00019561393496015793, "loss": 1.164, "step": 5297 }, { "epoch": 0.13603778399379896, "grad_norm": 1.0703125, "learning_rate": 0.00019561262724584314, "loss": 1.1224, "step": 5298 }, { "epoch": 0.13606346118972076, "grad_norm": 1.234375, "learning_rate": 0.000195611319340981, "loss": 0.9536, "step": 5299 }, { "epoch": 0.1360891383856426, "grad_norm": 0.89453125, "learning_rate": 0.00019561001124557414, "loss": 1.084, "step": 5300 }, { "epoch": 0.1361148155815644, "grad_norm": 0.875, "learning_rate": 0.0001956087029596252, "loss": 1.0999, "step": 5301 }, { "epoch": 0.13614049277748624, "grad_norm": 0.8984375, "learning_rate": 0.0001956073944831367, "loss": 1.1511, "step": 5302 }, { "epoch": 0.13616616997340805, "grad_norm": 0.84765625, "learning_rate": 0.00019560608581611132, "loss": 1.1255, "step": 5303 }, { "epoch": 0.13619184716932986, "grad_norm": 0.890625, "learning_rate": 0.00019560477695855163, "loss": 1.0949, "step": 5304 }, { "epoch": 0.1362175243652517, "grad_norm": 0.84765625, "learning_rate": 0.00019560346791046027, "loss": 1.0007, "step": 5305 }, { "epoch": 0.1362432015611735, "grad_norm": 0.83203125, "learning_rate": 0.00019560215867183984, "loss": 1.2987, "step": 5306 }, { "epoch": 0.13626887875709534, "grad_norm": 0.8203125, "learning_rate": 0.00019560084924269292, "loss": 1.0661, "step": 5307 }, { "epoch": 0.13629455595301715, "grad_norm": 0.84765625, "learning_rate": 0.00019559953962302218, "loss": 0.9379, "step": 5308 }, { "epoch": 0.13632023314893896, "grad_norm": 0.859375, "learning_rate": 0.00019559822981283018, "loss": 1.2353, "step": 5309 }, { "epoch": 0.1363459103448608, "grad_norm": 0.89453125, "learning_rate": 0.0001955969198121195, "loss": 1.154, "step": 5310 }, { "epoch": 0.1363715875407826, "grad_norm": 0.8046875, "learning_rate": 0.00019559560962089284, "loss": 1.1146, "step": 5311 }, { "epoch": 0.13639726473670444, "grad_norm": 0.82421875, "learning_rate": 0.00019559429923915277, "loss": 0.9957, "step": 5312 }, { "epoch": 0.13642294193262625, "grad_norm": 0.8828125, "learning_rate": 0.0001955929886669019, "loss": 1.0216, "step": 5313 }, { "epoch": 0.13644861912854805, "grad_norm": 0.8828125, "learning_rate": 0.00019559167790414282, "loss": 1.2073, "step": 5314 }, { "epoch": 0.1364742963244699, "grad_norm": 0.78125, "learning_rate": 0.00019559036695087815, "loss": 1.0172, "step": 5315 }, { "epoch": 0.1364999735203917, "grad_norm": 0.79296875, "learning_rate": 0.0001955890558071105, "loss": 1.1338, "step": 5316 }, { "epoch": 0.13652565071631353, "grad_norm": 0.91796875, "learning_rate": 0.00019558774447284253, "loss": 1.2025, "step": 5317 }, { "epoch": 0.13655132791223534, "grad_norm": 0.890625, "learning_rate": 0.0001955864329480768, "loss": 1.1881, "step": 5318 }, { "epoch": 0.13657700510815715, "grad_norm": 0.828125, "learning_rate": 0.000195585121232816, "loss": 1.0948, "step": 5319 }, { "epoch": 0.136602682304079, "grad_norm": 1.515625, "learning_rate": 0.00019558380932706263, "loss": 1.0107, "step": 5320 }, { "epoch": 0.1366283595000008, "grad_norm": 0.875, "learning_rate": 0.0001955824972308194, "loss": 1.2289, "step": 5321 }, { "epoch": 0.13665403669592263, "grad_norm": 0.875, "learning_rate": 0.0001955811849440888, "loss": 1.1472, "step": 5322 }, { "epoch": 0.13667971389184444, "grad_norm": 0.86328125, "learning_rate": 0.00019557987246687363, "loss": 1.2444, "step": 5323 }, { "epoch": 0.13670539108776625, "grad_norm": 0.98828125, "learning_rate": 0.00019557855979917635, "loss": 0.9302, "step": 5324 }, { "epoch": 0.13673106828368808, "grad_norm": 0.828125, "learning_rate": 0.0001955772469409997, "loss": 1.1297, "step": 5325 }, { "epoch": 0.1367567454796099, "grad_norm": 0.76953125, "learning_rate": 0.00019557593389234613, "loss": 0.9862, "step": 5326 }, { "epoch": 0.13678242267553173, "grad_norm": 0.79296875, "learning_rate": 0.00019557462065321844, "loss": 1.1493, "step": 5327 }, { "epoch": 0.13680809987145354, "grad_norm": 0.92578125, "learning_rate": 0.00019557330722361912, "loss": 1.0205, "step": 5328 }, { "epoch": 0.13683377706737535, "grad_norm": 0.78125, "learning_rate": 0.00019557199360355085, "loss": 1.0462, "step": 5329 }, { "epoch": 0.13685945426329718, "grad_norm": 0.8046875, "learning_rate": 0.00019557067979301623, "loss": 1.0353, "step": 5330 }, { "epoch": 0.136885131459219, "grad_norm": 0.80078125, "learning_rate": 0.0001955693657920179, "loss": 1.0965, "step": 5331 }, { "epoch": 0.13691080865514083, "grad_norm": 0.85546875, "learning_rate": 0.00019556805160055843, "loss": 1.0338, "step": 5332 }, { "epoch": 0.13693648585106263, "grad_norm": 0.90625, "learning_rate": 0.00019556673721864045, "loss": 1.0623, "step": 5333 }, { "epoch": 0.13696216304698444, "grad_norm": 0.8671875, "learning_rate": 0.0001955654226462666, "loss": 1.1404, "step": 5334 }, { "epoch": 0.13698784024290628, "grad_norm": 0.86328125, "learning_rate": 0.00019556410788343953, "loss": 0.9778, "step": 5335 }, { "epoch": 0.1370135174388281, "grad_norm": 0.87109375, "learning_rate": 0.0001955627929301618, "loss": 1.0776, "step": 5336 }, { "epoch": 0.13703919463474992, "grad_norm": 0.8515625, "learning_rate": 0.00019556147778643609, "loss": 1.0616, "step": 5337 }, { "epoch": 0.13706487183067173, "grad_norm": 0.80078125, "learning_rate": 0.00019556016245226494, "loss": 0.9994, "step": 5338 }, { "epoch": 0.13709054902659354, "grad_norm": 0.890625, "learning_rate": 0.00019555884692765103, "loss": 1.1858, "step": 5339 }, { "epoch": 0.13711622622251537, "grad_norm": 0.8359375, "learning_rate": 0.00019555753121259698, "loss": 1.2145, "step": 5340 }, { "epoch": 0.13714190341843718, "grad_norm": 0.75390625, "learning_rate": 0.00019555621530710537, "loss": 1.0513, "step": 5341 }, { "epoch": 0.13716758061435902, "grad_norm": 0.75390625, "learning_rate": 0.0001955548992111789, "loss": 1.1226, "step": 5342 }, { "epoch": 0.13719325781028083, "grad_norm": 0.8046875, "learning_rate": 0.00019555358292482012, "loss": 1.0786, "step": 5343 }, { "epoch": 0.13721893500620264, "grad_norm": 0.8671875, "learning_rate": 0.00019555226644803168, "loss": 1.0746, "step": 5344 }, { "epoch": 0.13724461220212447, "grad_norm": 0.8359375, "learning_rate": 0.00019555094978081623, "loss": 1.1921, "step": 5345 }, { "epoch": 0.13727028939804628, "grad_norm": 0.88671875, "learning_rate": 0.00019554963292317635, "loss": 1.1459, "step": 5346 }, { "epoch": 0.13729596659396812, "grad_norm": 0.99609375, "learning_rate": 0.00019554831587511467, "loss": 1.0717, "step": 5347 }, { "epoch": 0.13732164378988992, "grad_norm": 0.828125, "learning_rate": 0.00019554699863663384, "loss": 0.9574, "step": 5348 }, { "epoch": 0.13734732098581173, "grad_norm": 0.86328125, "learning_rate": 0.0001955456812077365, "loss": 1.2309, "step": 5349 }, { "epoch": 0.13737299818173357, "grad_norm": 0.875, "learning_rate": 0.0001955443635884252, "loss": 1.2162, "step": 5350 }, { "epoch": 0.13739867537765538, "grad_norm": 0.86328125, "learning_rate": 0.00019554304577870264, "loss": 1.2666, "step": 5351 }, { "epoch": 0.1374243525735772, "grad_norm": 0.8359375, "learning_rate": 0.00019554172777857143, "loss": 1.0966, "step": 5352 }, { "epoch": 0.13745002976949902, "grad_norm": 0.8671875, "learning_rate": 0.00019554040958803416, "loss": 1.224, "step": 5353 }, { "epoch": 0.13747570696542083, "grad_norm": 0.84765625, "learning_rate": 0.00019553909120709347, "loss": 1.1977, "step": 5354 }, { "epoch": 0.13750138416134267, "grad_norm": 1.0390625, "learning_rate": 0.00019553777263575206, "loss": 1.0661, "step": 5355 }, { "epoch": 0.13752706135726447, "grad_norm": 0.828125, "learning_rate": 0.00019553645387401244, "loss": 1.2073, "step": 5356 }, { "epoch": 0.1375527385531863, "grad_norm": 0.83203125, "learning_rate": 0.00019553513492187734, "loss": 1.117, "step": 5357 }, { "epoch": 0.13757841574910812, "grad_norm": 0.83984375, "learning_rate": 0.00019553381577934933, "loss": 1.2928, "step": 5358 }, { "epoch": 0.13760409294502993, "grad_norm": 0.84375, "learning_rate": 0.00019553249644643105, "loss": 1.207, "step": 5359 }, { "epoch": 0.13762977014095176, "grad_norm": 0.93359375, "learning_rate": 0.00019553117692312514, "loss": 0.9513, "step": 5360 }, { "epoch": 0.13765544733687357, "grad_norm": 1.015625, "learning_rate": 0.00019552985720943422, "loss": 1.0005, "step": 5361 }, { "epoch": 0.1376811245327954, "grad_norm": 0.84765625, "learning_rate": 0.00019552853730536095, "loss": 1.1137, "step": 5362 }, { "epoch": 0.13770680172871722, "grad_norm": 0.8203125, "learning_rate": 0.0001955272172109079, "loss": 1.1863, "step": 5363 }, { "epoch": 0.13773247892463902, "grad_norm": 0.8046875, "learning_rate": 0.00019552589692607776, "loss": 1.0071, "step": 5364 }, { "epoch": 0.13775815612056086, "grad_norm": 0.8359375, "learning_rate": 0.0001955245764508731, "loss": 1.1096, "step": 5365 }, { "epoch": 0.13778383331648267, "grad_norm": 0.83984375, "learning_rate": 0.00019552325578529663, "loss": 1.0189, "step": 5366 }, { "epoch": 0.1378095105124045, "grad_norm": 0.79296875, "learning_rate": 0.00019552193492935088, "loss": 1.1871, "step": 5367 }, { "epoch": 0.1378351877083263, "grad_norm": 0.859375, "learning_rate": 0.00019552061388303858, "loss": 1.1915, "step": 5368 }, { "epoch": 0.13786086490424812, "grad_norm": 0.76171875, "learning_rate": 0.00019551929264636234, "loss": 1.0877, "step": 5369 }, { "epoch": 0.13788654210016996, "grad_norm": 0.8671875, "learning_rate": 0.00019551797121932478, "loss": 1.1508, "step": 5370 }, { "epoch": 0.13791221929609176, "grad_norm": 0.80078125, "learning_rate": 0.0001955166496019285, "loss": 1.1478, "step": 5371 }, { "epoch": 0.1379378964920136, "grad_norm": 0.765625, "learning_rate": 0.0001955153277941762, "loss": 0.9269, "step": 5372 }, { "epoch": 0.1379635736879354, "grad_norm": 0.82421875, "learning_rate": 0.00019551400579607045, "loss": 0.9681, "step": 5373 }, { "epoch": 0.13798925088385722, "grad_norm": 0.8125, "learning_rate": 0.0001955126836076139, "loss": 1.1745, "step": 5374 }, { "epoch": 0.13801492807977905, "grad_norm": 0.82421875, "learning_rate": 0.00019551136122880925, "loss": 1.1993, "step": 5375 }, { "epoch": 0.13804060527570086, "grad_norm": 0.8203125, "learning_rate": 0.00019551003865965905, "loss": 1.1238, "step": 5376 }, { "epoch": 0.1380662824716227, "grad_norm": 0.76953125, "learning_rate": 0.00019550871590016597, "loss": 1.0545, "step": 5377 }, { "epoch": 0.1380919596675445, "grad_norm": 0.8203125, "learning_rate": 0.00019550739295033266, "loss": 1.0024, "step": 5378 }, { "epoch": 0.13811763686346631, "grad_norm": 0.83203125, "learning_rate": 0.00019550606981016174, "loss": 1.079, "step": 5379 }, { "epoch": 0.13814331405938815, "grad_norm": 0.82421875, "learning_rate": 0.00019550474647965586, "loss": 1.0615, "step": 5380 }, { "epoch": 0.13816899125530996, "grad_norm": 0.8828125, "learning_rate": 0.00019550342295881763, "loss": 1.0768, "step": 5381 }, { "epoch": 0.1381946684512318, "grad_norm": 0.8125, "learning_rate": 0.00019550209924764972, "loss": 0.9945, "step": 5382 }, { "epoch": 0.1382203456471536, "grad_norm": 0.8671875, "learning_rate": 0.00019550077534615472, "loss": 1.2699, "step": 5383 }, { "epoch": 0.1382460228430754, "grad_norm": 0.8125, "learning_rate": 0.00019549945125433532, "loss": 1.0786, "step": 5384 }, { "epoch": 0.13827170003899725, "grad_norm": 0.78515625, "learning_rate": 0.00019549812697219413, "loss": 1.0885, "step": 5385 }, { "epoch": 0.13829737723491906, "grad_norm": 0.77734375, "learning_rate": 0.00019549680249973382, "loss": 1.0153, "step": 5386 }, { "epoch": 0.1383230544308409, "grad_norm": 0.83203125, "learning_rate": 0.000195495477836957, "loss": 1.0412, "step": 5387 }, { "epoch": 0.1383487316267627, "grad_norm": 0.81640625, "learning_rate": 0.00019549415298386635, "loss": 1.0159, "step": 5388 }, { "epoch": 0.1383744088226845, "grad_norm": 0.796875, "learning_rate": 0.00019549282794046444, "loss": 1.0028, "step": 5389 }, { "epoch": 0.13840008601860634, "grad_norm": 0.875, "learning_rate": 0.00019549150270675397, "loss": 1.0898, "step": 5390 }, { "epoch": 0.13842576321452815, "grad_norm": 0.8515625, "learning_rate": 0.00019549017728273755, "loss": 1.1698, "step": 5391 }, { "epoch": 0.13845144041045, "grad_norm": 0.8203125, "learning_rate": 0.00019548885166841782, "loss": 1.0326, "step": 5392 }, { "epoch": 0.1384771176063718, "grad_norm": 0.90234375, "learning_rate": 0.00019548752586379746, "loss": 1.247, "step": 5393 }, { "epoch": 0.1385027948022936, "grad_norm": 0.85546875, "learning_rate": 0.00019548619986887905, "loss": 1.1687, "step": 5394 }, { "epoch": 0.13852847199821544, "grad_norm": 0.8828125, "learning_rate": 0.0001954848736836653, "loss": 1.0303, "step": 5395 }, { "epoch": 0.13855414919413725, "grad_norm": 0.8125, "learning_rate": 0.00019548354730815882, "loss": 1.0941, "step": 5396 }, { "epoch": 0.13857982639005909, "grad_norm": 0.77734375, "learning_rate": 0.00019548222074236224, "loss": 1.0911, "step": 5397 }, { "epoch": 0.1386055035859809, "grad_norm": 0.89453125, "learning_rate": 0.00019548089398627823, "loss": 1.22, "step": 5398 }, { "epoch": 0.1386311807819027, "grad_norm": 0.8671875, "learning_rate": 0.00019547956703990944, "loss": 1.0926, "step": 5399 }, { "epoch": 0.13865685797782454, "grad_norm": 0.8359375, "learning_rate": 0.00019547823990325846, "loss": 1.1337, "step": 5400 }, { "epoch": 0.13868253517374635, "grad_norm": 0.921875, "learning_rate": 0.00019547691257632802, "loss": 1.1356, "step": 5401 }, { "epoch": 0.13870821236966818, "grad_norm": 0.8359375, "learning_rate": 0.00019547558505912064, "loss": 1.0225, "step": 5402 }, { "epoch": 0.13873388956559, "grad_norm": 0.91796875, "learning_rate": 0.0001954742573516391, "loss": 1.13, "step": 5403 }, { "epoch": 0.1387595667615118, "grad_norm": 0.828125, "learning_rate": 0.000195472929453886, "loss": 1.1526, "step": 5404 }, { "epoch": 0.13878524395743363, "grad_norm": 0.765625, "learning_rate": 0.00019547160136586397, "loss": 1.0308, "step": 5405 }, { "epoch": 0.13881092115335544, "grad_norm": 0.828125, "learning_rate": 0.0001954702730875756, "loss": 1.0768, "step": 5406 }, { "epoch": 0.13883659834927728, "grad_norm": 0.87109375, "learning_rate": 0.00019546894461902367, "loss": 1.1163, "step": 5407 }, { "epoch": 0.1388622755451991, "grad_norm": 0.828125, "learning_rate": 0.00019546761596021074, "loss": 1.0551, "step": 5408 }, { "epoch": 0.1388879527411209, "grad_norm": 0.7890625, "learning_rate": 0.00019546628711113948, "loss": 1.0128, "step": 5409 }, { "epoch": 0.13891362993704273, "grad_norm": 0.90625, "learning_rate": 0.00019546495807181252, "loss": 1.1514, "step": 5410 }, { "epoch": 0.13893930713296454, "grad_norm": 0.8828125, "learning_rate": 0.0001954636288422325, "loss": 1.1856, "step": 5411 }, { "epoch": 0.13896498432888638, "grad_norm": 0.81640625, "learning_rate": 0.00019546229942240216, "loss": 1.0659, "step": 5412 }, { "epoch": 0.13899066152480818, "grad_norm": 0.7890625, "learning_rate": 0.00019546096981232404, "loss": 1.1529, "step": 5413 }, { "epoch": 0.13901633872073, "grad_norm": 0.8984375, "learning_rate": 0.00019545964001200082, "loss": 1.1262, "step": 5414 }, { "epoch": 0.13904201591665183, "grad_norm": 0.81640625, "learning_rate": 0.00019545831002143514, "loss": 1.1008, "step": 5415 }, { "epoch": 0.13906769311257364, "grad_norm": 0.85546875, "learning_rate": 0.00019545697984062972, "loss": 1.107, "step": 5416 }, { "epoch": 0.13909337030849547, "grad_norm": 0.7734375, "learning_rate": 0.0001954556494695871, "loss": 0.9361, "step": 5417 }, { "epoch": 0.13911904750441728, "grad_norm": 0.8046875, "learning_rate": 0.00019545431890831004, "loss": 1.0991, "step": 5418 }, { "epoch": 0.1391447247003391, "grad_norm": 0.921875, "learning_rate": 0.00019545298815680116, "loss": 0.9903, "step": 5419 }, { "epoch": 0.13917040189626093, "grad_norm": 0.88671875, "learning_rate": 0.0001954516572150631, "loss": 1.1735, "step": 5420 }, { "epoch": 0.13919607909218273, "grad_norm": 0.984375, "learning_rate": 0.00019545032608309846, "loss": 1.1363, "step": 5421 }, { "epoch": 0.13922175628810457, "grad_norm": 0.79296875, "learning_rate": 0.00019544899476090996, "loss": 1.0746, "step": 5422 }, { "epoch": 0.13924743348402638, "grad_norm": 0.7890625, "learning_rate": 0.00019544766324850024, "loss": 1.2291, "step": 5423 }, { "epoch": 0.1392731106799482, "grad_norm": 0.83984375, "learning_rate": 0.00019544633154587197, "loss": 0.9952, "step": 5424 }, { "epoch": 0.13929878787587002, "grad_norm": 0.80078125, "learning_rate": 0.00019544499965302775, "loss": 1.1094, "step": 5425 }, { "epoch": 0.13932446507179183, "grad_norm": 0.8828125, "learning_rate": 0.00019544366756997028, "loss": 1.0566, "step": 5426 }, { "epoch": 0.13935014226771367, "grad_norm": 0.8125, "learning_rate": 0.0001954423352967022, "loss": 1.1296, "step": 5427 }, { "epoch": 0.13937581946363548, "grad_norm": 0.88671875, "learning_rate": 0.00019544100283322617, "loss": 0.999, "step": 5428 }, { "epoch": 0.13940149665955728, "grad_norm": 0.8828125, "learning_rate": 0.00019543967017954486, "loss": 1.0786, "step": 5429 }, { "epoch": 0.13942717385547912, "grad_norm": 0.7265625, "learning_rate": 0.00019543833733566088, "loss": 0.9039, "step": 5430 }, { "epoch": 0.13945285105140093, "grad_norm": 0.7890625, "learning_rate": 0.0001954370043015769, "loss": 1.0596, "step": 5431 }, { "epoch": 0.13947852824732276, "grad_norm": 0.828125, "learning_rate": 0.00019543567107729564, "loss": 1.0116, "step": 5432 }, { "epoch": 0.13950420544324457, "grad_norm": 0.82421875, "learning_rate": 0.0001954343376628197, "loss": 1.1336, "step": 5433 }, { "epoch": 0.13952988263916638, "grad_norm": 0.76953125, "learning_rate": 0.0001954330040581517, "loss": 1.0049, "step": 5434 }, { "epoch": 0.13955555983508822, "grad_norm": 0.83203125, "learning_rate": 0.0001954316702632944, "loss": 1.0605, "step": 5435 }, { "epoch": 0.13958123703101002, "grad_norm": 0.81640625, "learning_rate": 0.00019543033627825035, "loss": 1.0209, "step": 5436 }, { "epoch": 0.13960691422693186, "grad_norm": 0.93359375, "learning_rate": 0.0001954290021030223, "loss": 1.076, "step": 5437 }, { "epoch": 0.13963259142285367, "grad_norm": 0.80859375, "learning_rate": 0.00019542766773761284, "loss": 0.9794, "step": 5438 }, { "epoch": 0.13965826861877548, "grad_norm": 0.78125, "learning_rate": 0.00019542633318202468, "loss": 1.1235, "step": 5439 }, { "epoch": 0.1396839458146973, "grad_norm": 0.828125, "learning_rate": 0.00019542499843626043, "loss": 1.2405, "step": 5440 }, { "epoch": 0.13970962301061912, "grad_norm": 0.8984375, "learning_rate": 0.00019542366350032282, "loss": 1.1309, "step": 5441 }, { "epoch": 0.13973530020654096, "grad_norm": 0.875, "learning_rate": 0.00019542232837421443, "loss": 1.2183, "step": 5442 }, { "epoch": 0.13976097740246277, "grad_norm": 0.8359375, "learning_rate": 0.000195420993057938, "loss": 1.1083, "step": 5443 }, { "epoch": 0.13978665459838457, "grad_norm": 0.81640625, "learning_rate": 0.00019541965755149612, "loss": 1.2822, "step": 5444 }, { "epoch": 0.1398123317943064, "grad_norm": 0.80859375, "learning_rate": 0.00019541832185489147, "loss": 1.0442, "step": 5445 }, { "epoch": 0.13983800899022822, "grad_norm": 0.8828125, "learning_rate": 0.00019541698596812674, "loss": 1.1899, "step": 5446 }, { "epoch": 0.13986368618615005, "grad_norm": 0.90234375, "learning_rate": 0.00019541564989120457, "loss": 1.0519, "step": 5447 }, { "epoch": 0.13988936338207186, "grad_norm": 0.82421875, "learning_rate": 0.00019541431362412763, "loss": 1.0387, "step": 5448 }, { "epoch": 0.13991504057799367, "grad_norm": 0.79296875, "learning_rate": 0.0001954129771668986, "loss": 1.1742, "step": 5449 }, { "epoch": 0.1399407177739155, "grad_norm": 0.8671875, "learning_rate": 0.0001954116405195201, "loss": 1.1443, "step": 5450 }, { "epoch": 0.13996639496983732, "grad_norm": 0.86328125, "learning_rate": 0.00019541030368199484, "loss": 1.1125, "step": 5451 }, { "epoch": 0.13999207216575915, "grad_norm": 0.83984375, "learning_rate": 0.00019540896665432546, "loss": 1.1144, "step": 5452 }, { "epoch": 0.14001774936168096, "grad_norm": 0.87890625, "learning_rate": 0.0001954076294365146, "loss": 1.0587, "step": 5453 }, { "epoch": 0.14004342655760277, "grad_norm": 0.84375, "learning_rate": 0.000195406292028565, "loss": 1.0972, "step": 5454 }, { "epoch": 0.1400691037535246, "grad_norm": 0.95703125, "learning_rate": 0.00019540495443047924, "loss": 1.2129, "step": 5455 }, { "epoch": 0.1400947809494464, "grad_norm": 0.9453125, "learning_rate": 0.00019540361664226004, "loss": 1.1816, "step": 5456 }, { "epoch": 0.14012045814536825, "grad_norm": 0.9453125, "learning_rate": 0.00019540227866391002, "loss": 1.2356, "step": 5457 }, { "epoch": 0.14014613534129006, "grad_norm": 0.8046875, "learning_rate": 0.00019540094049543192, "loss": 0.8519, "step": 5458 }, { "epoch": 0.14017181253721187, "grad_norm": 0.82421875, "learning_rate": 0.00019539960213682832, "loss": 1.0375, "step": 5459 }, { "epoch": 0.1401974897331337, "grad_norm": 0.8671875, "learning_rate": 0.00019539826358810198, "loss": 1.1814, "step": 5460 }, { "epoch": 0.1402231669290555, "grad_norm": 0.81640625, "learning_rate": 0.0001953969248492555, "loss": 1.0826, "step": 5461 }, { "epoch": 0.14024884412497735, "grad_norm": 1.3203125, "learning_rate": 0.00019539558592029154, "loss": 1.0599, "step": 5462 }, { "epoch": 0.14027452132089915, "grad_norm": 0.8671875, "learning_rate": 0.00019539424680121284, "loss": 1.1228, "step": 5463 }, { "epoch": 0.14030019851682096, "grad_norm": 0.80078125, "learning_rate": 0.000195392907492022, "loss": 1.0403, "step": 5464 }, { "epoch": 0.1403258757127428, "grad_norm": 0.80078125, "learning_rate": 0.0001953915679927217, "loss": 1.0189, "step": 5465 }, { "epoch": 0.1403515529086646, "grad_norm": 0.96875, "learning_rate": 0.0001953902283033146, "loss": 1.1393, "step": 5466 }, { "epoch": 0.14037723010458644, "grad_norm": 0.7578125, "learning_rate": 0.00019538888842380344, "loss": 0.9994, "step": 5467 }, { "epoch": 0.14040290730050825, "grad_norm": 0.83984375, "learning_rate": 0.00019538754835419082, "loss": 1.1218, "step": 5468 }, { "epoch": 0.14042858449643006, "grad_norm": 0.8515625, "learning_rate": 0.00019538620809447945, "loss": 1.1614, "step": 5469 }, { "epoch": 0.1404542616923519, "grad_norm": 0.828125, "learning_rate": 0.00019538486764467195, "loss": 0.9373, "step": 5470 }, { "epoch": 0.1404799388882737, "grad_norm": 0.80078125, "learning_rate": 0.00019538352700477105, "loss": 1.0548, "step": 5471 }, { "epoch": 0.14050561608419554, "grad_norm": 0.921875, "learning_rate": 0.0001953821861747794, "loss": 1.1274, "step": 5472 }, { "epoch": 0.14053129328011735, "grad_norm": 0.765625, "learning_rate": 0.00019538084515469966, "loss": 1.1416, "step": 5473 }, { "epoch": 0.14055697047603916, "grad_norm": 0.8203125, "learning_rate": 0.0001953795039445345, "loss": 1.2499, "step": 5474 }, { "epoch": 0.140582647671961, "grad_norm": 0.83984375, "learning_rate": 0.00019537816254428664, "loss": 1.1749, "step": 5475 }, { "epoch": 0.1406083248678828, "grad_norm": 0.78515625, "learning_rate": 0.00019537682095395868, "loss": 1.0407, "step": 5476 }, { "epoch": 0.1406340020638046, "grad_norm": 0.890625, "learning_rate": 0.0001953754791735533, "loss": 1.0606, "step": 5477 }, { "epoch": 0.14065967925972644, "grad_norm": 0.84375, "learning_rate": 0.0001953741372030733, "loss": 1.152, "step": 5478 }, { "epoch": 0.14068535645564825, "grad_norm": 0.80859375, "learning_rate": 0.00019537279504252118, "loss": 1.0811, "step": 5479 }, { "epoch": 0.1407110336515701, "grad_norm": 0.8671875, "learning_rate": 0.00019537145269189974, "loss": 1.0313, "step": 5480 }, { "epoch": 0.1407367108474919, "grad_norm": 0.85546875, "learning_rate": 0.00019537011015121158, "loss": 1.0857, "step": 5481 }, { "epoch": 0.1407623880434137, "grad_norm": 0.96484375, "learning_rate": 0.00019536876742045945, "loss": 1.0292, "step": 5482 }, { "epoch": 0.14078806523933554, "grad_norm": 0.97265625, "learning_rate": 0.0001953674244996459, "loss": 1.1686, "step": 5483 }, { "epoch": 0.14081374243525735, "grad_norm": 0.83203125, "learning_rate": 0.00019536608138877374, "loss": 1.1719, "step": 5484 }, { "epoch": 0.14083941963117919, "grad_norm": 0.87890625, "learning_rate": 0.0001953647380878456, "loss": 1.0346, "step": 5485 }, { "epoch": 0.140865096827101, "grad_norm": 0.8125, "learning_rate": 0.00019536339459686416, "loss": 1.0725, "step": 5486 }, { "epoch": 0.1408907740230228, "grad_norm": 0.9453125, "learning_rate": 0.00019536205091583207, "loss": 1.3894, "step": 5487 }, { "epoch": 0.14091645121894464, "grad_norm": 0.86328125, "learning_rate": 0.000195360707044752, "loss": 1.1301, "step": 5488 }, { "epoch": 0.14094212841486645, "grad_norm": 0.87890625, "learning_rate": 0.0001953593629836267, "loss": 1.0653, "step": 5489 }, { "epoch": 0.14096780561078828, "grad_norm": 0.86328125, "learning_rate": 0.0001953580187324588, "loss": 0.9406, "step": 5490 }, { "epoch": 0.1409934828067101, "grad_norm": 0.8125, "learning_rate": 0.00019535667429125095, "loss": 1.0241, "step": 5491 }, { "epoch": 0.1410191600026319, "grad_norm": 0.859375, "learning_rate": 0.00019535532966000587, "loss": 1.0028, "step": 5492 }, { "epoch": 0.14104483719855374, "grad_norm": 0.91796875, "learning_rate": 0.00019535398483872625, "loss": 0.9644, "step": 5493 }, { "epoch": 0.14107051439447554, "grad_norm": 0.81640625, "learning_rate": 0.00019535263982741477, "loss": 1.077, "step": 5494 }, { "epoch": 0.14109619159039738, "grad_norm": 0.78125, "learning_rate": 0.00019535129462607405, "loss": 1.1183, "step": 5495 }, { "epoch": 0.1411218687863192, "grad_norm": 1.1171875, "learning_rate": 0.00019534994923470683, "loss": 1.2411, "step": 5496 }, { "epoch": 0.141147545982241, "grad_norm": 0.8359375, "learning_rate": 0.00019534860365331578, "loss": 1.1714, "step": 5497 }, { "epoch": 0.14117322317816283, "grad_norm": 0.87109375, "learning_rate": 0.00019534725788190356, "loss": 1.1835, "step": 5498 }, { "epoch": 0.14119890037408464, "grad_norm": 0.84375, "learning_rate": 0.00019534591192047287, "loss": 1.1452, "step": 5499 }, { "epoch": 0.14122457757000648, "grad_norm": 2.46875, "learning_rate": 0.0001953445657690264, "loss": 1.0825, "step": 5500 }, { "epoch": 0.14125025476592828, "grad_norm": 0.84765625, "learning_rate": 0.0001953432194275668, "loss": 1.1382, "step": 5501 }, { "epoch": 0.1412759319618501, "grad_norm": 0.86328125, "learning_rate": 0.0001953418728960968, "loss": 1.174, "step": 5502 }, { "epoch": 0.14130160915777193, "grad_norm": 0.88671875, "learning_rate": 0.00019534052617461908, "loss": 1.0755, "step": 5503 }, { "epoch": 0.14132728635369374, "grad_norm": 0.859375, "learning_rate": 0.00019533917926313626, "loss": 1.1035, "step": 5504 }, { "epoch": 0.14135296354961557, "grad_norm": 0.8359375, "learning_rate": 0.00019533783216165107, "loss": 1.1292, "step": 5505 }, { "epoch": 0.14137864074553738, "grad_norm": 0.859375, "learning_rate": 0.0001953364848701662, "loss": 0.9693, "step": 5506 }, { "epoch": 0.1414043179414592, "grad_norm": 0.88671875, "learning_rate": 0.00019533513738868434, "loss": 1.1054, "step": 5507 }, { "epoch": 0.14142999513738103, "grad_norm": 0.8203125, "learning_rate": 0.00019533378971720814, "loss": 1.1826, "step": 5508 }, { "epoch": 0.14145567233330283, "grad_norm": 0.859375, "learning_rate": 0.00019533244185574033, "loss": 1.0236, "step": 5509 }, { "epoch": 0.14148134952922467, "grad_norm": 0.88671875, "learning_rate": 0.00019533109380428357, "loss": 1.2192, "step": 5510 }, { "epoch": 0.14150702672514648, "grad_norm": 0.89453125, "learning_rate": 0.00019532974556284058, "loss": 1.2624, "step": 5511 }, { "epoch": 0.1415327039210683, "grad_norm": 0.89453125, "learning_rate": 0.00019532839713141397, "loss": 1.2218, "step": 5512 }, { "epoch": 0.14155838111699012, "grad_norm": 0.81640625, "learning_rate": 0.0001953270485100065, "loss": 1.0664, "step": 5513 }, { "epoch": 0.14158405831291193, "grad_norm": 0.84765625, "learning_rate": 0.00019532569969862084, "loss": 1.1519, "step": 5514 }, { "epoch": 0.14160973550883377, "grad_norm": 0.83203125, "learning_rate": 0.00019532435069725965, "loss": 1.1531, "step": 5515 }, { "epoch": 0.14163541270475558, "grad_norm": 0.8203125, "learning_rate": 0.00019532300150592566, "loss": 1.0435, "step": 5516 }, { "epoch": 0.14166108990067738, "grad_norm": 0.84765625, "learning_rate": 0.0001953216521246215, "loss": 1.0469, "step": 5517 }, { "epoch": 0.14168676709659922, "grad_norm": 0.8828125, "learning_rate": 0.00019532030255334993, "loss": 1.1594, "step": 5518 }, { "epoch": 0.14171244429252103, "grad_norm": 0.7734375, "learning_rate": 0.00019531895279211364, "loss": 0.9564, "step": 5519 }, { "epoch": 0.14173812148844286, "grad_norm": 0.921875, "learning_rate": 0.00019531760284091524, "loss": 1.2537, "step": 5520 }, { "epoch": 0.14176379868436467, "grad_norm": 0.83203125, "learning_rate": 0.00019531625269975745, "loss": 1.076, "step": 5521 }, { "epoch": 0.14178947588028648, "grad_norm": 0.8125, "learning_rate": 0.000195314902368643, "loss": 1.049, "step": 5522 }, { "epoch": 0.14181515307620832, "grad_norm": 0.81640625, "learning_rate": 0.00019531355184757457, "loss": 1.0817, "step": 5523 }, { "epoch": 0.14184083027213013, "grad_norm": 0.8125, "learning_rate": 0.00019531220113655487, "loss": 1.2074, "step": 5524 }, { "epoch": 0.14186650746805196, "grad_norm": 0.828125, "learning_rate": 0.0001953108502355865, "loss": 1.0882, "step": 5525 }, { "epoch": 0.14189218466397377, "grad_norm": 0.75390625, "learning_rate": 0.00019530949914467225, "loss": 0.8971, "step": 5526 }, { "epoch": 0.14191786185989558, "grad_norm": 0.8515625, "learning_rate": 0.00019530814786381477, "loss": 1.1371, "step": 5527 }, { "epoch": 0.1419435390558174, "grad_norm": 0.8125, "learning_rate": 0.00019530679639301676, "loss": 1.0552, "step": 5528 }, { "epoch": 0.14196921625173922, "grad_norm": 0.77734375, "learning_rate": 0.00019530544473228092, "loss": 1.0434, "step": 5529 }, { "epoch": 0.14199489344766106, "grad_norm": 0.8203125, "learning_rate": 0.00019530409288160994, "loss": 1.0911, "step": 5530 }, { "epoch": 0.14202057064358287, "grad_norm": 0.84765625, "learning_rate": 0.0001953027408410065, "loss": 1.1388, "step": 5531 }, { "epoch": 0.14204624783950467, "grad_norm": 0.84765625, "learning_rate": 0.0001953013886104733, "loss": 1.1968, "step": 5532 }, { "epoch": 0.1420719250354265, "grad_norm": 0.859375, "learning_rate": 0.00019530003619001307, "loss": 0.9786, "step": 5533 }, { "epoch": 0.14209760223134832, "grad_norm": 0.90234375, "learning_rate": 0.00019529868357962845, "loss": 1.1405, "step": 5534 }, { "epoch": 0.14212327942727015, "grad_norm": 0.84765625, "learning_rate": 0.00019529733077932218, "loss": 1.1069, "step": 5535 }, { "epoch": 0.14214895662319196, "grad_norm": 0.7578125, "learning_rate": 0.0001952959777890969, "loss": 1.0187, "step": 5536 }, { "epoch": 0.14217463381911377, "grad_norm": 0.82421875, "learning_rate": 0.00019529462460895537, "loss": 1.0191, "step": 5537 }, { "epoch": 0.1422003110150356, "grad_norm": 0.8359375, "learning_rate": 0.00019529327123890027, "loss": 1.1948, "step": 5538 }, { "epoch": 0.14222598821095742, "grad_norm": 0.87109375, "learning_rate": 0.00019529191767893428, "loss": 1.0144, "step": 5539 }, { "epoch": 0.14225166540687925, "grad_norm": 0.765625, "learning_rate": 0.00019529056392906011, "loss": 1.2352, "step": 5540 }, { "epoch": 0.14227734260280106, "grad_norm": 0.90234375, "learning_rate": 0.00019528920998928046, "loss": 1.1419, "step": 5541 }, { "epoch": 0.14230301979872287, "grad_norm": 0.85546875, "learning_rate": 0.000195287855859598, "loss": 1.1513, "step": 5542 }, { "epoch": 0.1423286969946447, "grad_norm": 0.82421875, "learning_rate": 0.00019528650154001546, "loss": 1.1626, "step": 5543 }, { "epoch": 0.1423543741905665, "grad_norm": 0.875, "learning_rate": 0.00019528514703053554, "loss": 1.1747, "step": 5544 }, { "epoch": 0.14238005138648835, "grad_norm": 0.828125, "learning_rate": 0.00019528379233116092, "loss": 1.1148, "step": 5545 }, { "epoch": 0.14240572858241016, "grad_norm": 0.87890625, "learning_rate": 0.0001952824374418943, "loss": 1.1333, "step": 5546 }, { "epoch": 0.14243140577833197, "grad_norm": 0.86328125, "learning_rate": 0.0001952810823627384, "loss": 1.2867, "step": 5547 }, { "epoch": 0.1424570829742538, "grad_norm": 0.8359375, "learning_rate": 0.0001952797270936959, "loss": 1.0196, "step": 5548 }, { "epoch": 0.1424827601701756, "grad_norm": 0.90625, "learning_rate": 0.00019527837163476954, "loss": 1.1678, "step": 5549 }, { "epoch": 0.14250843736609745, "grad_norm": 0.87109375, "learning_rate": 0.00019527701598596195, "loss": 1.107, "step": 5550 }, { "epoch": 0.14253411456201925, "grad_norm": 0.8359375, "learning_rate": 0.00019527566014727592, "loss": 1.2786, "step": 5551 }, { "epoch": 0.14255979175794106, "grad_norm": 0.77734375, "learning_rate": 0.00019527430411871407, "loss": 1.0924, "step": 5552 }, { "epoch": 0.1425854689538629, "grad_norm": 0.9296875, "learning_rate": 0.00019527294790027915, "loss": 1.1034, "step": 5553 }, { "epoch": 0.1426111461497847, "grad_norm": 0.8359375, "learning_rate": 0.00019527159149197384, "loss": 1.1034, "step": 5554 }, { "epoch": 0.14263682334570654, "grad_norm": 0.78125, "learning_rate": 0.00019527023489380088, "loss": 1.0599, "step": 5555 }, { "epoch": 0.14266250054162835, "grad_norm": 0.8515625, "learning_rate": 0.00019526887810576292, "loss": 1.1967, "step": 5556 }, { "epoch": 0.14268817773755016, "grad_norm": 0.93359375, "learning_rate": 0.0001952675211278627, "loss": 1.0405, "step": 5557 }, { "epoch": 0.142713854933472, "grad_norm": 0.796875, "learning_rate": 0.0001952661639601029, "loss": 0.942, "step": 5558 }, { "epoch": 0.1427395321293938, "grad_norm": 0.875, "learning_rate": 0.0001952648066024863, "loss": 1.1026, "step": 5559 }, { "epoch": 0.14276520932531564, "grad_norm": 0.78125, "learning_rate": 0.0001952634490550155, "loss": 1.0713, "step": 5560 }, { "epoch": 0.14279088652123745, "grad_norm": 0.77734375, "learning_rate": 0.00019526209131769324, "loss": 0.9305, "step": 5561 }, { "epoch": 0.14281656371715926, "grad_norm": 0.92578125, "learning_rate": 0.00019526073339052227, "loss": 1.1161, "step": 5562 }, { "epoch": 0.1428422409130811, "grad_norm": 0.9609375, "learning_rate": 0.00019525937527350523, "loss": 1.0683, "step": 5563 }, { "epoch": 0.1428679181090029, "grad_norm": 0.93359375, "learning_rate": 0.00019525801696664488, "loss": 1.1693, "step": 5564 }, { "epoch": 0.14289359530492474, "grad_norm": 0.87109375, "learning_rate": 0.0001952566584699439, "loss": 1.0183, "step": 5565 }, { "epoch": 0.14291927250084654, "grad_norm": 0.796875, "learning_rate": 0.000195255299783405, "loss": 1.0909, "step": 5566 }, { "epoch": 0.14294494969676835, "grad_norm": 0.9140625, "learning_rate": 0.00019525394090703091, "loss": 1.0479, "step": 5567 }, { "epoch": 0.1429706268926902, "grad_norm": 0.92578125, "learning_rate": 0.0001952525818408243, "loss": 1.2945, "step": 5568 }, { "epoch": 0.142996304088612, "grad_norm": 0.88671875, "learning_rate": 0.0001952512225847879, "loss": 1.12, "step": 5569 }, { "epoch": 0.14302198128453383, "grad_norm": 0.8125, "learning_rate": 0.00019524986313892444, "loss": 1.1044, "step": 5570 }, { "epoch": 0.14304765848045564, "grad_norm": 0.859375, "learning_rate": 0.00019524850350323658, "loss": 1.058, "step": 5571 }, { "epoch": 0.14307333567637745, "grad_norm": 0.83984375, "learning_rate": 0.00019524714367772706, "loss": 1.0635, "step": 5572 }, { "epoch": 0.14309901287229929, "grad_norm": 0.80859375, "learning_rate": 0.0001952457836623986, "loss": 1.3089, "step": 5573 }, { "epoch": 0.1431246900682211, "grad_norm": 0.890625, "learning_rate": 0.0001952444234572539, "loss": 1.1019, "step": 5574 }, { "epoch": 0.14315036726414293, "grad_norm": 0.8046875, "learning_rate": 0.00019524306306229565, "loss": 1.1909, "step": 5575 }, { "epoch": 0.14317604446006474, "grad_norm": 0.828125, "learning_rate": 0.0001952417024775266, "loss": 1.1139, "step": 5576 }, { "epoch": 0.14320172165598655, "grad_norm": 0.7578125, "learning_rate": 0.0001952403417029494, "loss": 0.9468, "step": 5577 }, { "epoch": 0.14322739885190838, "grad_norm": 0.8046875, "learning_rate": 0.00019523898073856683, "loss": 1.1197, "step": 5578 }, { "epoch": 0.1432530760478302, "grad_norm": 0.8671875, "learning_rate": 0.0001952376195843816, "loss": 1.116, "step": 5579 }, { "epoch": 0.14327875324375203, "grad_norm": 0.80859375, "learning_rate": 0.00019523625824039635, "loss": 0.9357, "step": 5580 }, { "epoch": 0.14330443043967384, "grad_norm": 0.82421875, "learning_rate": 0.00019523489670661385, "loss": 1.1744, "step": 5581 }, { "epoch": 0.14333010763559564, "grad_norm": 0.8203125, "learning_rate": 0.00019523353498303683, "loss": 1.0431, "step": 5582 }, { "epoch": 0.14335578483151748, "grad_norm": 0.8984375, "learning_rate": 0.00019523217306966796, "loss": 1.208, "step": 5583 }, { "epoch": 0.1433814620274393, "grad_norm": 0.8359375, "learning_rate": 0.00019523081096650997, "loss": 1.1407, "step": 5584 }, { "epoch": 0.14340713922336112, "grad_norm": 0.8203125, "learning_rate": 0.00019522944867356556, "loss": 1.1713, "step": 5585 }, { "epoch": 0.14343281641928293, "grad_norm": 0.7578125, "learning_rate": 0.0001952280861908375, "loss": 1.0407, "step": 5586 }, { "epoch": 0.14345849361520474, "grad_norm": 0.81640625, "learning_rate": 0.00019522672351832845, "loss": 1.0366, "step": 5587 }, { "epoch": 0.14348417081112658, "grad_norm": 0.8046875, "learning_rate": 0.0001952253606560411, "loss": 1.0726, "step": 5588 }, { "epoch": 0.14350984800704839, "grad_norm": 0.86328125, "learning_rate": 0.00019522399760397828, "loss": 1.1699, "step": 5589 }, { "epoch": 0.14353552520297022, "grad_norm": 0.7734375, "learning_rate": 0.0001952226343621426, "loss": 1.0147, "step": 5590 }, { "epoch": 0.14356120239889203, "grad_norm": 0.8046875, "learning_rate": 0.00019522127093053683, "loss": 1.0671, "step": 5591 }, { "epoch": 0.14358687959481384, "grad_norm": 0.91796875, "learning_rate": 0.00019521990730916362, "loss": 1.0492, "step": 5592 }, { "epoch": 0.14361255679073567, "grad_norm": 0.90234375, "learning_rate": 0.0001952185434980258, "loss": 1.1005, "step": 5593 }, { "epoch": 0.14363823398665748, "grad_norm": 0.8515625, "learning_rate": 0.00019521717949712596, "loss": 1.1159, "step": 5594 }, { "epoch": 0.14366391118257932, "grad_norm": 0.8125, "learning_rate": 0.0001952158153064669, "loss": 1.2665, "step": 5595 }, { "epoch": 0.14368958837850113, "grad_norm": 0.84375, "learning_rate": 0.00019521445092605133, "loss": 1.0412, "step": 5596 }, { "epoch": 0.14371526557442293, "grad_norm": 0.77734375, "learning_rate": 0.00019521308635588196, "loss": 0.845, "step": 5597 }, { "epoch": 0.14374094277034477, "grad_norm": 0.78515625, "learning_rate": 0.00019521172159596152, "loss": 1.1118, "step": 5598 }, { "epoch": 0.14376661996626658, "grad_norm": 0.8203125, "learning_rate": 0.0001952103566462927, "loss": 1.0553, "step": 5599 }, { "epoch": 0.14379229716218841, "grad_norm": 0.76953125, "learning_rate": 0.00019520899150687825, "loss": 1.0366, "step": 5600 }, { "epoch": 0.14381797435811022, "grad_norm": 0.85546875, "learning_rate": 0.00019520762617772087, "loss": 1.1655, "step": 5601 }, { "epoch": 0.14384365155403203, "grad_norm": 0.81640625, "learning_rate": 0.00019520626065882332, "loss": 1.2953, "step": 5602 }, { "epoch": 0.14386932874995387, "grad_norm": 0.78125, "learning_rate": 0.00019520489495018827, "loss": 1.0098, "step": 5603 }, { "epoch": 0.14389500594587568, "grad_norm": 0.83984375, "learning_rate": 0.00019520352905181846, "loss": 1.0901, "step": 5604 }, { "epoch": 0.1439206831417975, "grad_norm": 0.875, "learning_rate": 0.00019520216296371664, "loss": 1.1233, "step": 5605 }, { "epoch": 0.14394636033771932, "grad_norm": 0.87890625, "learning_rate": 0.00019520079668588548, "loss": 1.0524, "step": 5606 }, { "epoch": 0.14397203753364113, "grad_norm": 0.91015625, "learning_rate": 0.00019519943021832775, "loss": 1.2018, "step": 5607 }, { "epoch": 0.14399771472956296, "grad_norm": 0.875, "learning_rate": 0.00019519806356104613, "loss": 1.0047, "step": 5608 }, { "epoch": 0.14402339192548477, "grad_norm": 0.93359375, "learning_rate": 0.00019519669671404338, "loss": 1.1565, "step": 5609 }, { "epoch": 0.1440490691214066, "grad_norm": 0.9296875, "learning_rate": 0.0001951953296773222, "loss": 0.8961, "step": 5610 }, { "epoch": 0.14407474631732842, "grad_norm": 1.3671875, "learning_rate": 0.0001951939624508854, "loss": 1.0768, "step": 5611 }, { "epoch": 0.14410042351325023, "grad_norm": 0.83203125, "learning_rate": 0.00019519259503473558, "loss": 1.1685, "step": 5612 }, { "epoch": 0.14412610070917206, "grad_norm": 0.83984375, "learning_rate": 0.0001951912274288755, "loss": 1.2515, "step": 5613 }, { "epoch": 0.14415177790509387, "grad_norm": 0.83203125, "learning_rate": 0.0001951898596333079, "loss": 1.0383, "step": 5614 }, { "epoch": 0.1441774551010157, "grad_norm": 0.8125, "learning_rate": 0.00019518849164803554, "loss": 1.0626, "step": 5615 }, { "epoch": 0.14420313229693751, "grad_norm": 0.9765625, "learning_rate": 0.0001951871234730611, "loss": 1.2082, "step": 5616 }, { "epoch": 0.14422880949285932, "grad_norm": 0.82421875, "learning_rate": 0.0001951857551083873, "loss": 1.0263, "step": 5617 }, { "epoch": 0.14425448668878116, "grad_norm": 0.83203125, "learning_rate": 0.00019518438655401692, "loss": 0.9861, "step": 5618 }, { "epoch": 0.14428016388470297, "grad_norm": 0.83203125, "learning_rate": 0.00019518301780995265, "loss": 0.9827, "step": 5619 }, { "epoch": 0.1443058410806248, "grad_norm": 0.828125, "learning_rate": 0.00019518164887619724, "loss": 1.0145, "step": 5620 }, { "epoch": 0.1443315182765466, "grad_norm": 0.8515625, "learning_rate": 0.00019518027975275336, "loss": 0.9783, "step": 5621 }, { "epoch": 0.14435719547246842, "grad_norm": 1.1640625, "learning_rate": 0.0001951789104396238, "loss": 1.1259, "step": 5622 }, { "epoch": 0.14438287266839026, "grad_norm": 0.859375, "learning_rate": 0.00019517754093681126, "loss": 1.1222, "step": 5623 }, { "epoch": 0.14440854986431206, "grad_norm": 0.80078125, "learning_rate": 0.00019517617124431848, "loss": 1.0373, "step": 5624 }, { "epoch": 0.1444342270602339, "grad_norm": 0.7421875, "learning_rate": 0.00019517480136214821, "loss": 1.0217, "step": 5625 }, { "epoch": 0.1444599042561557, "grad_norm": 0.8671875, "learning_rate": 0.00019517343129030312, "loss": 1.0884, "step": 5626 }, { "epoch": 0.14448558145207752, "grad_norm": 0.8046875, "learning_rate": 0.00019517206102878602, "loss": 1.1342, "step": 5627 }, { "epoch": 0.14451125864799935, "grad_norm": 0.81640625, "learning_rate": 0.00019517069057759957, "loss": 1.0671, "step": 5628 }, { "epoch": 0.14453693584392116, "grad_norm": 0.80859375, "learning_rate": 0.00019516931993674653, "loss": 0.9796, "step": 5629 }, { "epoch": 0.144562613039843, "grad_norm": 0.765625, "learning_rate": 0.00019516794910622967, "loss": 1.0974, "step": 5630 }, { "epoch": 0.1445882902357648, "grad_norm": 0.8125, "learning_rate": 0.00019516657808605167, "loss": 1.0711, "step": 5631 }, { "epoch": 0.1446139674316866, "grad_norm": 0.828125, "learning_rate": 0.00019516520687621523, "loss": 1.1833, "step": 5632 }, { "epoch": 0.14463964462760845, "grad_norm": 0.93359375, "learning_rate": 0.00019516383547672317, "loss": 1.2363, "step": 5633 }, { "epoch": 0.14466532182353026, "grad_norm": 0.83984375, "learning_rate": 0.00019516246388757817, "loss": 1.1509, "step": 5634 }, { "epoch": 0.1446909990194521, "grad_norm": 0.7890625, "learning_rate": 0.00019516109210878296, "loss": 1.0785, "step": 5635 }, { "epoch": 0.1447166762153739, "grad_norm": 0.78125, "learning_rate": 0.00019515972014034035, "loss": 1.0525, "step": 5636 }, { "epoch": 0.1447423534112957, "grad_norm": 0.85546875, "learning_rate": 0.00019515834798225298, "loss": 1.1325, "step": 5637 }, { "epoch": 0.14476803060721755, "grad_norm": 0.80859375, "learning_rate": 0.0001951569756345236, "loss": 1.0622, "step": 5638 }, { "epoch": 0.14479370780313935, "grad_norm": 0.83984375, "learning_rate": 0.00019515560309715495, "loss": 1.1208, "step": 5639 }, { "epoch": 0.1448193849990612, "grad_norm": 0.86328125, "learning_rate": 0.0001951542303701498, "loss": 1.0576, "step": 5640 }, { "epoch": 0.144845062194983, "grad_norm": 0.8671875, "learning_rate": 0.0001951528574535109, "loss": 0.9965, "step": 5641 }, { "epoch": 0.1448707393909048, "grad_norm": 0.83203125, "learning_rate": 0.00019515148434724088, "loss": 1.072, "step": 5642 }, { "epoch": 0.14489641658682664, "grad_norm": 0.890625, "learning_rate": 0.0001951501110513426, "loss": 1.0219, "step": 5643 }, { "epoch": 0.14492209378274845, "grad_norm": 0.83203125, "learning_rate": 0.00019514873756581872, "loss": 1.103, "step": 5644 }, { "epoch": 0.1449477709786703, "grad_norm": 0.80859375, "learning_rate": 0.000195147363890672, "loss": 1.0814, "step": 5645 }, { "epoch": 0.1449734481745921, "grad_norm": 0.7890625, "learning_rate": 0.0001951459900259052, "loss": 1.1072, "step": 5646 }, { "epoch": 0.1449991253705139, "grad_norm": 0.8203125, "learning_rate": 0.00019514461597152104, "loss": 1.0545, "step": 5647 }, { "epoch": 0.14502480256643574, "grad_norm": 0.80078125, "learning_rate": 0.00019514324172752222, "loss": 1.0316, "step": 5648 }, { "epoch": 0.14505047976235755, "grad_norm": 0.79296875, "learning_rate": 0.00019514186729391154, "loss": 1.0307, "step": 5649 }, { "epoch": 0.14507615695827938, "grad_norm": 0.80859375, "learning_rate": 0.00019514049267069168, "loss": 1.0068, "step": 5650 }, { "epoch": 0.1451018341542012, "grad_norm": 0.8046875, "learning_rate": 0.00019513911785786544, "loss": 1.011, "step": 5651 }, { "epoch": 0.145127511350123, "grad_norm": 0.85546875, "learning_rate": 0.00019513774285543554, "loss": 1.0049, "step": 5652 }, { "epoch": 0.14515318854604484, "grad_norm": 0.8046875, "learning_rate": 0.00019513636766340468, "loss": 1.0593, "step": 5653 }, { "epoch": 0.14517886574196665, "grad_norm": 0.8125, "learning_rate": 0.00019513499228177565, "loss": 1.0958, "step": 5654 }, { "epoch": 0.14520454293788848, "grad_norm": 0.74609375, "learning_rate": 0.00019513361671055117, "loss": 1.0921, "step": 5655 }, { "epoch": 0.1452302201338103, "grad_norm": 0.890625, "learning_rate": 0.000195132240949734, "loss": 1.2288, "step": 5656 }, { "epoch": 0.1452558973297321, "grad_norm": 0.8046875, "learning_rate": 0.00019513086499932687, "loss": 0.9877, "step": 5657 }, { "epoch": 0.14528157452565393, "grad_norm": 0.87890625, "learning_rate": 0.0001951294888593325, "loss": 1.2084, "step": 5658 }, { "epoch": 0.14530725172157574, "grad_norm": 0.81640625, "learning_rate": 0.00019512811252975367, "loss": 1.1838, "step": 5659 }, { "epoch": 0.14533292891749758, "grad_norm": 0.79296875, "learning_rate": 0.0001951267360105931, "loss": 0.9294, "step": 5660 }, { "epoch": 0.1453586061134194, "grad_norm": 0.859375, "learning_rate": 0.00019512535930185353, "loss": 1.0799, "step": 5661 }, { "epoch": 0.1453842833093412, "grad_norm": 0.828125, "learning_rate": 0.0001951239824035377, "loss": 1.0515, "step": 5662 }, { "epoch": 0.14540996050526303, "grad_norm": 0.7890625, "learning_rate": 0.0001951226053156484, "loss": 1.0885, "step": 5663 }, { "epoch": 0.14543563770118484, "grad_norm": 0.8515625, "learning_rate": 0.0001951212280381883, "loss": 1.0888, "step": 5664 }, { "epoch": 0.14546131489710667, "grad_norm": 0.90234375, "learning_rate": 0.0001951198505711602, "loss": 1.1584, "step": 5665 }, { "epoch": 0.14548699209302848, "grad_norm": 0.85546875, "learning_rate": 0.00019511847291456684, "loss": 1.0818, "step": 5666 }, { "epoch": 0.1455126692889503, "grad_norm": 0.82421875, "learning_rate": 0.00019511709506841093, "loss": 0.9998, "step": 5667 }, { "epoch": 0.14553834648487213, "grad_norm": 0.92578125, "learning_rate": 0.00019511571703269528, "loss": 1.0985, "step": 5668 }, { "epoch": 0.14556402368079394, "grad_norm": 0.80859375, "learning_rate": 0.00019511433880742258, "loss": 0.9619, "step": 5669 }, { "epoch": 0.14558970087671577, "grad_norm": 0.79296875, "learning_rate": 0.00019511296039259558, "loss": 1.1056, "step": 5670 }, { "epoch": 0.14561537807263758, "grad_norm": 0.84375, "learning_rate": 0.00019511158178821706, "loss": 1.2011, "step": 5671 }, { "epoch": 0.1456410552685594, "grad_norm": 0.96875, "learning_rate": 0.00019511020299428972, "loss": 1.2402, "step": 5672 }, { "epoch": 0.14566673246448122, "grad_norm": 0.84375, "learning_rate": 0.00019510882401081638, "loss": 1.0843, "step": 5673 }, { "epoch": 0.14569240966040303, "grad_norm": 0.88671875, "learning_rate": 0.00019510744483779973, "loss": 1.1654, "step": 5674 }, { "epoch": 0.14571808685632487, "grad_norm": 0.828125, "learning_rate": 0.00019510606547524254, "loss": 1.0859, "step": 5675 }, { "epoch": 0.14574376405224668, "grad_norm": 0.84375, "learning_rate": 0.00019510468592314753, "loss": 1.2175, "step": 5676 }, { "epoch": 0.14576944124816849, "grad_norm": 0.890625, "learning_rate": 0.00019510330618151747, "loss": 1.0833, "step": 5677 }, { "epoch": 0.14579511844409032, "grad_norm": 0.84765625, "learning_rate": 0.00019510192625035513, "loss": 1.0054, "step": 5678 }, { "epoch": 0.14582079564001213, "grad_norm": 0.75390625, "learning_rate": 0.00019510054612966325, "loss": 1.0538, "step": 5679 }, { "epoch": 0.14584647283593394, "grad_norm": 0.85546875, "learning_rate": 0.00019509916581944453, "loss": 1.2728, "step": 5680 }, { "epoch": 0.14587215003185577, "grad_norm": 0.890625, "learning_rate": 0.0001950977853197018, "loss": 1.2422, "step": 5681 }, { "epoch": 0.14589782722777758, "grad_norm": 0.8203125, "learning_rate": 0.00019509640463043776, "loss": 1.0464, "step": 5682 }, { "epoch": 0.14592350442369942, "grad_norm": 0.87890625, "learning_rate": 0.00019509502375165519, "loss": 1.2628, "step": 5683 }, { "epoch": 0.14594918161962123, "grad_norm": 0.86328125, "learning_rate": 0.0001950936426833568, "loss": 1.1298, "step": 5684 }, { "epoch": 0.14597485881554303, "grad_norm": 1.2890625, "learning_rate": 0.0001950922614255454, "loss": 1.1637, "step": 5685 }, { "epoch": 0.14600053601146487, "grad_norm": 0.8203125, "learning_rate": 0.0001950908799782237, "loss": 1.1942, "step": 5686 }, { "epoch": 0.14602621320738668, "grad_norm": 0.8515625, "learning_rate": 0.00019508949834139445, "loss": 1.0772, "step": 5687 }, { "epoch": 0.14605189040330852, "grad_norm": 0.828125, "learning_rate": 0.00019508811651506046, "loss": 0.9831, "step": 5688 }, { "epoch": 0.14607756759923032, "grad_norm": 0.8515625, "learning_rate": 0.00019508673449922438, "loss": 1.058, "step": 5689 }, { "epoch": 0.14610324479515213, "grad_norm": 0.7890625, "learning_rate": 0.00019508535229388908, "loss": 1.0737, "step": 5690 }, { "epoch": 0.14612892199107397, "grad_norm": 0.9140625, "learning_rate": 0.00019508396989905723, "loss": 1.194, "step": 5691 }, { "epoch": 0.14615459918699578, "grad_norm": 0.80859375, "learning_rate": 0.00019508258731473167, "loss": 1.0551, "step": 5692 }, { "epoch": 0.1461802763829176, "grad_norm": 0.76953125, "learning_rate": 0.00019508120454091505, "loss": 1.1063, "step": 5693 }, { "epoch": 0.14620595357883942, "grad_norm": 1.0078125, "learning_rate": 0.0001950798215776102, "loss": 1.1183, "step": 5694 }, { "epoch": 0.14623163077476123, "grad_norm": 0.8515625, "learning_rate": 0.00019507843842481984, "loss": 1.1871, "step": 5695 }, { "epoch": 0.14625730797068306, "grad_norm": 0.78125, "learning_rate": 0.00019507705508254675, "loss": 1.2376, "step": 5696 }, { "epoch": 0.14628298516660487, "grad_norm": 0.80859375, "learning_rate": 0.00019507567155079366, "loss": 1.1538, "step": 5697 }, { "epoch": 0.1463086623625267, "grad_norm": 0.828125, "learning_rate": 0.00019507428782956338, "loss": 1.0638, "step": 5698 }, { "epoch": 0.14633433955844852, "grad_norm": 0.83984375, "learning_rate": 0.00019507290391885862, "loss": 1.1533, "step": 5699 }, { "epoch": 0.14636001675437033, "grad_norm": 0.78125, "learning_rate": 0.00019507151981868213, "loss": 1.0632, "step": 5700 }, { "epoch": 0.14638569395029216, "grad_norm": 0.859375, "learning_rate": 0.0001950701355290367, "loss": 1.1593, "step": 5701 }, { "epoch": 0.14641137114621397, "grad_norm": 0.81640625, "learning_rate": 0.0001950687510499251, "loss": 0.893, "step": 5702 }, { "epoch": 0.1464370483421358, "grad_norm": 0.8203125, "learning_rate": 0.00019506736638135003, "loss": 1.1319, "step": 5703 }, { "epoch": 0.14646272553805761, "grad_norm": 0.83984375, "learning_rate": 0.0001950659815233143, "loss": 1.1954, "step": 5704 }, { "epoch": 0.14648840273397942, "grad_norm": 0.7578125, "learning_rate": 0.00019506459647582064, "loss": 1.0523, "step": 5705 }, { "epoch": 0.14651407992990126, "grad_norm": 0.77734375, "learning_rate": 0.00019506321123887187, "loss": 1.0812, "step": 5706 }, { "epoch": 0.14653975712582307, "grad_norm": 0.8046875, "learning_rate": 0.0001950618258124707, "loss": 0.9249, "step": 5707 }, { "epoch": 0.1465654343217449, "grad_norm": 0.8671875, "learning_rate": 0.00019506044019661987, "loss": 1.0555, "step": 5708 }, { "epoch": 0.1465911115176667, "grad_norm": 0.8671875, "learning_rate": 0.00019505905439132218, "loss": 1.0876, "step": 5709 }, { "epoch": 0.14661678871358852, "grad_norm": 0.8828125, "learning_rate": 0.00019505766839658038, "loss": 1.1076, "step": 5710 }, { "epoch": 0.14664246590951036, "grad_norm": 0.76953125, "learning_rate": 0.00019505628221239723, "loss": 1.1632, "step": 5711 }, { "epoch": 0.14666814310543216, "grad_norm": 0.78515625, "learning_rate": 0.0001950548958387755, "loss": 1.2013, "step": 5712 }, { "epoch": 0.146693820301354, "grad_norm": 0.81640625, "learning_rate": 0.00019505350927571791, "loss": 1.1852, "step": 5713 }, { "epoch": 0.1467194974972758, "grad_norm": 0.79296875, "learning_rate": 0.00019505212252322732, "loss": 1.1725, "step": 5714 }, { "epoch": 0.14674517469319762, "grad_norm": 0.76953125, "learning_rate": 0.00019505073558130645, "loss": 0.9762, "step": 5715 }, { "epoch": 0.14677085188911945, "grad_norm": 0.8359375, "learning_rate": 0.000195049348449958, "loss": 0.9923, "step": 5716 }, { "epoch": 0.14679652908504126, "grad_norm": 0.80078125, "learning_rate": 0.0001950479611291848, "loss": 1.1498, "step": 5717 }, { "epoch": 0.1468222062809631, "grad_norm": 0.82421875, "learning_rate": 0.00019504657361898962, "loss": 1.082, "step": 5718 }, { "epoch": 0.1468478834768849, "grad_norm": 0.87109375, "learning_rate": 0.00019504518591937514, "loss": 1.0857, "step": 5719 }, { "epoch": 0.1468735606728067, "grad_norm": 0.828125, "learning_rate": 0.00019504379803034425, "loss": 1.0475, "step": 5720 }, { "epoch": 0.14689923786872855, "grad_norm": 0.890625, "learning_rate": 0.00019504240995189966, "loss": 1.2142, "step": 5721 }, { "epoch": 0.14692491506465036, "grad_norm": 0.890625, "learning_rate": 0.0001950410216840441, "loss": 1.1217, "step": 5722 }, { "epoch": 0.1469505922605722, "grad_norm": 0.90625, "learning_rate": 0.00019503963322678038, "loss": 1.2274, "step": 5723 }, { "epoch": 0.146976269456494, "grad_norm": 1.0390625, "learning_rate": 0.00019503824458011126, "loss": 1.1303, "step": 5724 }, { "epoch": 0.1470019466524158, "grad_norm": 0.8359375, "learning_rate": 0.00019503685574403948, "loss": 1.096, "step": 5725 }, { "epoch": 0.14702762384833765, "grad_norm": 0.85546875, "learning_rate": 0.00019503546671856788, "loss": 0.9993, "step": 5726 }, { "epoch": 0.14705330104425945, "grad_norm": 0.87109375, "learning_rate": 0.00019503407750369914, "loss": 1.0674, "step": 5727 }, { "epoch": 0.1470789782401813, "grad_norm": 0.80078125, "learning_rate": 0.00019503268809943606, "loss": 1.059, "step": 5728 }, { "epoch": 0.1471046554361031, "grad_norm": 0.96875, "learning_rate": 0.00019503129850578145, "loss": 1.25, "step": 5729 }, { "epoch": 0.1471303326320249, "grad_norm": 0.8359375, "learning_rate": 0.00019502990872273803, "loss": 1.0524, "step": 5730 }, { "epoch": 0.14715600982794674, "grad_norm": 0.76953125, "learning_rate": 0.0001950285187503086, "loss": 1.0442, "step": 5731 }, { "epoch": 0.14718168702386855, "grad_norm": 0.9140625, "learning_rate": 0.0001950271285884959, "loss": 1.125, "step": 5732 }, { "epoch": 0.1472073642197904, "grad_norm": 0.828125, "learning_rate": 0.00019502573823730273, "loss": 1.2309, "step": 5733 }, { "epoch": 0.1472330414157122, "grad_norm": 0.8515625, "learning_rate": 0.00019502434769673183, "loss": 1.0122, "step": 5734 }, { "epoch": 0.147258718611634, "grad_norm": 0.84375, "learning_rate": 0.000195022956966786, "loss": 1.0008, "step": 5735 }, { "epoch": 0.14728439580755584, "grad_norm": 0.80078125, "learning_rate": 0.000195021566047468, "loss": 1.0745, "step": 5736 }, { "epoch": 0.14731007300347765, "grad_norm": 0.828125, "learning_rate": 0.0001950201749387806, "loss": 1.0711, "step": 5737 }, { "epoch": 0.14733575019939948, "grad_norm": 1.34375, "learning_rate": 0.00019501878364072656, "loss": 1.0719, "step": 5738 }, { "epoch": 0.1473614273953213, "grad_norm": 0.7578125, "learning_rate": 0.00019501739215330868, "loss": 1.089, "step": 5739 }, { "epoch": 0.1473871045912431, "grad_norm": 0.83984375, "learning_rate": 0.00019501600047652974, "loss": 1.1924, "step": 5740 }, { "epoch": 0.14741278178716494, "grad_norm": 0.82421875, "learning_rate": 0.00019501460861039247, "loss": 1.0791, "step": 5741 }, { "epoch": 0.14743845898308675, "grad_norm": 0.82421875, "learning_rate": 0.00019501321655489965, "loss": 1.2021, "step": 5742 }, { "epoch": 0.14746413617900858, "grad_norm": 0.7890625, "learning_rate": 0.0001950118243100541, "loss": 1.0447, "step": 5743 }, { "epoch": 0.1474898133749304, "grad_norm": 0.8046875, "learning_rate": 0.00019501043187585858, "loss": 1.0357, "step": 5744 }, { "epoch": 0.1475154905708522, "grad_norm": 0.79296875, "learning_rate": 0.0001950090392523158, "loss": 1.0568, "step": 5745 }, { "epoch": 0.14754116776677403, "grad_norm": 0.83203125, "learning_rate": 0.00019500764643942865, "loss": 1.0999, "step": 5746 }, { "epoch": 0.14756684496269584, "grad_norm": 0.75, "learning_rate": 0.00019500625343719978, "loss": 1.0064, "step": 5747 }, { "epoch": 0.14759252215861768, "grad_norm": 0.89453125, "learning_rate": 0.00019500486024563206, "loss": 1.1481, "step": 5748 }, { "epoch": 0.1476181993545395, "grad_norm": 0.80859375, "learning_rate": 0.00019500346686472826, "loss": 0.9884, "step": 5749 }, { "epoch": 0.1476438765504613, "grad_norm": 0.84765625, "learning_rate": 0.00019500207329449108, "loss": 1.1011, "step": 5750 }, { "epoch": 0.14766955374638313, "grad_norm": 0.8984375, "learning_rate": 0.0001950006795349234, "loss": 1.1909, "step": 5751 }, { "epoch": 0.14769523094230494, "grad_norm": 0.81640625, "learning_rate": 0.00019499928558602792, "loss": 1.2041, "step": 5752 }, { "epoch": 0.14772090813822678, "grad_norm": 0.8515625, "learning_rate": 0.00019499789144780745, "loss": 1.1551, "step": 5753 }, { "epoch": 0.14774658533414858, "grad_norm": 0.83984375, "learning_rate": 0.00019499649712026478, "loss": 1.0723, "step": 5754 }, { "epoch": 0.1477722625300704, "grad_norm": 0.84765625, "learning_rate": 0.00019499510260340263, "loss": 1.1008, "step": 5755 }, { "epoch": 0.14779793972599223, "grad_norm": 0.7890625, "learning_rate": 0.00019499370789722385, "loss": 1.0906, "step": 5756 }, { "epoch": 0.14782361692191404, "grad_norm": 0.9375, "learning_rate": 0.0001949923130017312, "loss": 1.0534, "step": 5757 }, { "epoch": 0.14784929411783587, "grad_norm": 0.80078125, "learning_rate": 0.00019499091791692742, "loss": 1.0691, "step": 5758 }, { "epoch": 0.14787497131375768, "grad_norm": 0.796875, "learning_rate": 0.00019498952264281532, "loss": 1.0925, "step": 5759 }, { "epoch": 0.1479006485096795, "grad_norm": 0.82421875, "learning_rate": 0.0001949881271793977, "loss": 1.0203, "step": 5760 }, { "epoch": 0.14792632570560132, "grad_norm": 0.890625, "learning_rate": 0.00019498673152667733, "loss": 1.0748, "step": 5761 }, { "epoch": 0.14795200290152313, "grad_norm": 0.859375, "learning_rate": 0.000194985335684657, "loss": 1.1232, "step": 5762 }, { "epoch": 0.14797768009744497, "grad_norm": 0.8203125, "learning_rate": 0.00019498393965333946, "loss": 1.1366, "step": 5763 }, { "epoch": 0.14800335729336678, "grad_norm": 0.87109375, "learning_rate": 0.00019498254343272749, "loss": 1.0376, "step": 5764 }, { "epoch": 0.14802903448928859, "grad_norm": 0.75, "learning_rate": 0.0001949811470228239, "loss": 1.042, "step": 5765 }, { "epoch": 0.14805471168521042, "grad_norm": 0.78515625, "learning_rate": 0.00019497975042363148, "loss": 1.1531, "step": 5766 }, { "epoch": 0.14808038888113223, "grad_norm": 0.8046875, "learning_rate": 0.00019497835363515298, "loss": 1.0636, "step": 5767 }, { "epoch": 0.14810606607705407, "grad_norm": 0.859375, "learning_rate": 0.00019497695665739121, "loss": 1.0666, "step": 5768 }, { "epoch": 0.14813174327297587, "grad_norm": 0.78125, "learning_rate": 0.00019497555949034894, "loss": 1.0637, "step": 5769 }, { "epoch": 0.14815742046889768, "grad_norm": 0.765625, "learning_rate": 0.00019497416213402896, "loss": 1.101, "step": 5770 }, { "epoch": 0.14818309766481952, "grad_norm": 0.83203125, "learning_rate": 0.00019497276458843406, "loss": 1.0145, "step": 5771 }, { "epoch": 0.14820877486074133, "grad_norm": 0.92578125, "learning_rate": 0.000194971366853567, "loss": 0.9899, "step": 5772 }, { "epoch": 0.14823445205666316, "grad_norm": 0.79296875, "learning_rate": 0.0001949699689294306, "loss": 0.9206, "step": 5773 }, { "epoch": 0.14826012925258497, "grad_norm": 0.875, "learning_rate": 0.0001949685708160276, "loss": 1.0852, "step": 5774 }, { "epoch": 0.14828580644850678, "grad_norm": 0.8203125, "learning_rate": 0.00019496717251336085, "loss": 1.1082, "step": 5775 }, { "epoch": 0.14831148364442862, "grad_norm": 0.8125, "learning_rate": 0.0001949657740214331, "loss": 1.1009, "step": 5776 }, { "epoch": 0.14833716084035042, "grad_norm": 0.8203125, "learning_rate": 0.0001949643753402471, "loss": 1.1027, "step": 5777 }, { "epoch": 0.14836283803627226, "grad_norm": 0.7421875, "learning_rate": 0.00019496297646980575, "loss": 0.974, "step": 5778 }, { "epoch": 0.14838851523219407, "grad_norm": 0.7578125, "learning_rate": 0.00019496157741011173, "loss": 1.0451, "step": 5779 }, { "epoch": 0.14841419242811588, "grad_norm": 0.8046875, "learning_rate": 0.00019496017816116786, "loss": 1.0561, "step": 5780 }, { "epoch": 0.1484398696240377, "grad_norm": 0.828125, "learning_rate": 0.00019495877872297693, "loss": 0.9441, "step": 5781 }, { "epoch": 0.14846554681995952, "grad_norm": 0.80078125, "learning_rate": 0.00019495737909554174, "loss": 0.9701, "step": 5782 }, { "epoch": 0.14849122401588136, "grad_norm": 0.84765625, "learning_rate": 0.00019495597927886508, "loss": 1.0793, "step": 5783 }, { "epoch": 0.14851690121180317, "grad_norm": 0.91015625, "learning_rate": 0.00019495457927294973, "loss": 1.214, "step": 5784 }, { "epoch": 0.14854257840772497, "grad_norm": 0.91796875, "learning_rate": 0.00019495317907779845, "loss": 1.213, "step": 5785 }, { "epoch": 0.1485682556036468, "grad_norm": 0.8125, "learning_rate": 0.00019495177869341407, "loss": 1.0544, "step": 5786 }, { "epoch": 0.14859393279956862, "grad_norm": 0.8125, "learning_rate": 0.00019495037811979938, "loss": 1.0301, "step": 5787 }, { "epoch": 0.14861960999549045, "grad_norm": 0.8203125, "learning_rate": 0.00019494897735695718, "loss": 1.172, "step": 5788 }, { "epoch": 0.14864528719141226, "grad_norm": 0.7421875, "learning_rate": 0.00019494757640489026, "loss": 0.943, "step": 5789 }, { "epoch": 0.14867096438733407, "grad_norm": 0.8203125, "learning_rate": 0.00019494617526360136, "loss": 1.0238, "step": 5790 }, { "epoch": 0.1486966415832559, "grad_norm": 0.86328125, "learning_rate": 0.00019494477393309332, "loss": 1.2207, "step": 5791 }, { "epoch": 0.14872231877917771, "grad_norm": 0.8515625, "learning_rate": 0.0001949433724133689, "loss": 1.0595, "step": 5792 }, { "epoch": 0.14874799597509955, "grad_norm": 0.83203125, "learning_rate": 0.00019494197070443094, "loss": 1.0757, "step": 5793 }, { "epoch": 0.14877367317102136, "grad_norm": 0.89453125, "learning_rate": 0.00019494056880628219, "loss": 1.1335, "step": 5794 }, { "epoch": 0.14879935036694317, "grad_norm": 1.15625, "learning_rate": 0.0001949391667189255, "loss": 1.0925, "step": 5795 }, { "epoch": 0.148825027562865, "grad_norm": 0.859375, "learning_rate": 0.0001949377644423636, "loss": 1.0902, "step": 5796 }, { "epoch": 0.1488507047587868, "grad_norm": 0.89453125, "learning_rate": 0.00019493636197659932, "loss": 1.2355, "step": 5797 }, { "epoch": 0.14887638195470865, "grad_norm": 0.8125, "learning_rate": 0.0001949349593216354, "loss": 1.1798, "step": 5798 }, { "epoch": 0.14890205915063046, "grad_norm": 0.796875, "learning_rate": 0.00019493355647747475, "loss": 1.0503, "step": 5799 }, { "epoch": 0.14892773634655226, "grad_norm": 0.79296875, "learning_rate": 0.00019493215344412005, "loss": 1.0008, "step": 5800 }, { "epoch": 0.1489534135424741, "grad_norm": 0.8671875, "learning_rate": 0.00019493075022157416, "loss": 1.089, "step": 5801 }, { "epoch": 0.1489790907383959, "grad_norm": 0.84765625, "learning_rate": 0.00019492934680983987, "loss": 1.0759, "step": 5802 }, { "epoch": 0.14900476793431774, "grad_norm": 0.81640625, "learning_rate": 0.00019492794320891997, "loss": 1.053, "step": 5803 }, { "epoch": 0.14903044513023955, "grad_norm": 0.796875, "learning_rate": 0.00019492653941881725, "loss": 1.085, "step": 5804 }, { "epoch": 0.14905612232616136, "grad_norm": 0.8203125, "learning_rate": 0.0001949251354395345, "loss": 1.0976, "step": 5805 }, { "epoch": 0.1490817995220832, "grad_norm": 0.8203125, "learning_rate": 0.00019492373127107454, "loss": 1.128, "step": 5806 }, { "epoch": 0.149107476718005, "grad_norm": 0.84375, "learning_rate": 0.00019492232691344015, "loss": 1.1151, "step": 5807 }, { "epoch": 0.14913315391392684, "grad_norm": 0.8203125, "learning_rate": 0.00019492092236663412, "loss": 1.0438, "step": 5808 }, { "epoch": 0.14915883110984865, "grad_norm": 0.8046875, "learning_rate": 0.0001949195176306593, "loss": 1.0795, "step": 5809 }, { "epoch": 0.14918450830577046, "grad_norm": 0.8203125, "learning_rate": 0.00019491811270551843, "loss": 1.1134, "step": 5810 }, { "epoch": 0.1492101855016923, "grad_norm": 0.890625, "learning_rate": 0.00019491670759121433, "loss": 1.0996, "step": 5811 }, { "epoch": 0.1492358626976141, "grad_norm": 0.89453125, "learning_rate": 0.00019491530228774983, "loss": 1.1189, "step": 5812 }, { "epoch": 0.14926153989353594, "grad_norm": 0.76953125, "learning_rate": 0.0001949138967951277, "loss": 0.9576, "step": 5813 }, { "epoch": 0.14928721708945775, "grad_norm": 0.765625, "learning_rate": 0.00019491249111335075, "loss": 1.0389, "step": 5814 }, { "epoch": 0.14931289428537955, "grad_norm": 0.80859375, "learning_rate": 0.00019491108524242177, "loss": 1.0922, "step": 5815 }, { "epoch": 0.1493385714813014, "grad_norm": 0.86328125, "learning_rate": 0.00019490967918234359, "loss": 1.0053, "step": 5816 }, { "epoch": 0.1493642486772232, "grad_norm": 0.8828125, "learning_rate": 0.00019490827293311896, "loss": 1.2209, "step": 5817 }, { "epoch": 0.14938992587314504, "grad_norm": 0.8125, "learning_rate": 0.00019490686649475074, "loss": 1.0546, "step": 5818 }, { "epoch": 0.14941560306906684, "grad_norm": 0.80859375, "learning_rate": 0.0001949054598672417, "loss": 1.1419, "step": 5819 }, { "epoch": 0.14944128026498865, "grad_norm": 0.87109375, "learning_rate": 0.00019490405305059465, "loss": 1.103, "step": 5820 }, { "epoch": 0.1494669574609105, "grad_norm": 0.84765625, "learning_rate": 0.00019490264604481237, "loss": 1.2194, "step": 5821 }, { "epoch": 0.1494926346568323, "grad_norm": 0.88671875, "learning_rate": 0.00019490123884989772, "loss": 1.2272, "step": 5822 }, { "epoch": 0.14951831185275413, "grad_norm": 0.828125, "learning_rate": 0.00019489983146585348, "loss": 1.0524, "step": 5823 }, { "epoch": 0.14954398904867594, "grad_norm": 0.9453125, "learning_rate": 0.00019489842389268242, "loss": 1.1712, "step": 5824 }, { "epoch": 0.14956966624459775, "grad_norm": 0.83203125, "learning_rate": 0.0001948970161303874, "loss": 1.1924, "step": 5825 }, { "epoch": 0.14959534344051958, "grad_norm": 0.828125, "learning_rate": 0.00019489560817897118, "loss": 1.1005, "step": 5826 }, { "epoch": 0.1496210206364414, "grad_norm": 0.984375, "learning_rate": 0.0001948942000384366, "loss": 1.1643, "step": 5827 }, { "epoch": 0.14964669783236323, "grad_norm": 0.81640625, "learning_rate": 0.00019489279170878642, "loss": 0.9992, "step": 5828 }, { "epoch": 0.14967237502828504, "grad_norm": 0.83203125, "learning_rate": 0.0001948913831900235, "loss": 1.1019, "step": 5829 }, { "epoch": 0.14969805222420685, "grad_norm": 0.82421875, "learning_rate": 0.00019488997448215064, "loss": 0.9314, "step": 5830 }, { "epoch": 0.14972372942012868, "grad_norm": 1.3515625, "learning_rate": 0.00019488856558517062, "loss": 1.2072, "step": 5831 }, { "epoch": 0.1497494066160505, "grad_norm": 0.91796875, "learning_rate": 0.00019488715649908627, "loss": 1.1818, "step": 5832 }, { "epoch": 0.14977508381197233, "grad_norm": 0.890625, "learning_rate": 0.00019488574722390035, "loss": 1.1798, "step": 5833 }, { "epoch": 0.14980076100789413, "grad_norm": 0.8125, "learning_rate": 0.00019488433775961575, "loss": 1.0033, "step": 5834 }, { "epoch": 0.14982643820381594, "grad_norm": 0.79296875, "learning_rate": 0.0001948829281062352, "loss": 1.0686, "step": 5835 }, { "epoch": 0.14985211539973778, "grad_norm": 0.7890625, "learning_rate": 0.00019488151826376157, "loss": 1.222, "step": 5836 }, { "epoch": 0.1498777925956596, "grad_norm": 0.87109375, "learning_rate": 0.00019488010823219765, "loss": 1.2059, "step": 5837 }, { "epoch": 0.14990346979158142, "grad_norm": 0.73828125, "learning_rate": 0.00019487869801154624, "loss": 0.9916, "step": 5838 }, { "epoch": 0.14992914698750323, "grad_norm": 0.82421875, "learning_rate": 0.00019487728760181014, "loss": 1.08, "step": 5839 }, { "epoch": 0.14995482418342504, "grad_norm": 0.9609375, "learning_rate": 0.00019487587700299218, "loss": 1.225, "step": 5840 }, { "epoch": 0.14998050137934688, "grad_norm": 0.8828125, "learning_rate": 0.00019487446621509517, "loss": 1.2521, "step": 5841 }, { "epoch": 0.15000617857526868, "grad_norm": 0.80859375, "learning_rate": 0.00019487305523812194, "loss": 1.1042, "step": 5842 }, { "epoch": 0.15003185577119052, "grad_norm": 0.76953125, "learning_rate": 0.00019487164407207527, "loss": 0.9846, "step": 5843 }, { "epoch": 0.15005753296711233, "grad_norm": 0.79296875, "learning_rate": 0.00019487023271695794, "loss": 1.1247, "step": 5844 }, { "epoch": 0.15008321016303414, "grad_norm": 0.78515625, "learning_rate": 0.00019486882117277285, "loss": 1.0209, "step": 5845 }, { "epoch": 0.15010888735895597, "grad_norm": 1.46875, "learning_rate": 0.00019486740943952275, "loss": 1.1093, "step": 5846 }, { "epoch": 0.15013456455487778, "grad_norm": 0.8359375, "learning_rate": 0.00019486599751721047, "loss": 1.0277, "step": 5847 }, { "epoch": 0.15016024175079962, "grad_norm": 0.76953125, "learning_rate": 0.00019486458540583885, "loss": 1.0026, "step": 5848 }, { "epoch": 0.15018591894672143, "grad_norm": 0.84375, "learning_rate": 0.00019486317310541068, "loss": 1.072, "step": 5849 }, { "epoch": 0.15021159614264323, "grad_norm": 0.796875, "learning_rate": 0.00019486176061592875, "loss": 1.135, "step": 5850 }, { "epoch": 0.15023727333856507, "grad_norm": 0.83203125, "learning_rate": 0.00019486034793739592, "loss": 1.0572, "step": 5851 }, { "epoch": 0.15026295053448688, "grad_norm": 0.828125, "learning_rate": 0.000194858935069815, "loss": 1.0883, "step": 5852 }, { "epoch": 0.1502886277304087, "grad_norm": 0.80859375, "learning_rate": 0.00019485752201318875, "loss": 1.0477, "step": 5853 }, { "epoch": 0.15031430492633052, "grad_norm": 0.85546875, "learning_rate": 0.00019485610876752003, "loss": 1.0004, "step": 5854 }, { "epoch": 0.15033998212225233, "grad_norm": 0.8359375, "learning_rate": 0.00019485469533281168, "loss": 1.0961, "step": 5855 }, { "epoch": 0.15036565931817417, "grad_norm": 0.8828125, "learning_rate": 0.00019485328170906646, "loss": 1.0497, "step": 5856 }, { "epoch": 0.15039133651409597, "grad_norm": 0.81640625, "learning_rate": 0.00019485186789628726, "loss": 1.2042, "step": 5857 }, { "epoch": 0.1504170137100178, "grad_norm": 0.875, "learning_rate": 0.00019485045389447682, "loss": 1.0901, "step": 5858 }, { "epoch": 0.15044269090593962, "grad_norm": 0.8671875, "learning_rate": 0.000194849039703638, "loss": 1.064, "step": 5859 }, { "epoch": 0.15046836810186143, "grad_norm": 0.8515625, "learning_rate": 0.00019484762532377364, "loss": 1.2158, "step": 5860 }, { "epoch": 0.15049404529778326, "grad_norm": 0.80078125, "learning_rate": 0.0001948462107548865, "loss": 1.0467, "step": 5861 }, { "epoch": 0.15051972249370507, "grad_norm": 0.79296875, "learning_rate": 0.0001948447959969794, "loss": 0.9769, "step": 5862 }, { "epoch": 0.1505453996896269, "grad_norm": 0.87109375, "learning_rate": 0.00019484338105005527, "loss": 1.0628, "step": 5863 }, { "epoch": 0.15057107688554872, "grad_norm": 0.8203125, "learning_rate": 0.0001948419659141168, "loss": 0.8864, "step": 5864 }, { "epoch": 0.15059675408147052, "grad_norm": 0.8671875, "learning_rate": 0.00019484055058916687, "loss": 0.9558, "step": 5865 }, { "epoch": 0.15062243127739236, "grad_norm": 0.8125, "learning_rate": 0.00019483913507520827, "loss": 1.0137, "step": 5866 }, { "epoch": 0.15064810847331417, "grad_norm": 0.8125, "learning_rate": 0.00019483771937224385, "loss": 1.0777, "step": 5867 }, { "epoch": 0.150673785669236, "grad_norm": 0.76953125, "learning_rate": 0.00019483630348027643, "loss": 1.021, "step": 5868 }, { "epoch": 0.1506994628651578, "grad_norm": 0.89453125, "learning_rate": 0.0001948348873993088, "loss": 1.1762, "step": 5869 }, { "epoch": 0.15072514006107962, "grad_norm": 0.78515625, "learning_rate": 0.00019483347112934384, "loss": 1.0083, "step": 5870 }, { "epoch": 0.15075081725700146, "grad_norm": 0.73828125, "learning_rate": 0.00019483205467038436, "loss": 0.975, "step": 5871 }, { "epoch": 0.15077649445292327, "grad_norm": 0.86328125, "learning_rate": 0.0001948306380224331, "loss": 1.1614, "step": 5872 }, { "epoch": 0.1508021716488451, "grad_norm": 0.84375, "learning_rate": 0.00019482922118549295, "loss": 0.9835, "step": 5873 }, { "epoch": 0.1508278488447669, "grad_norm": 0.81640625, "learning_rate": 0.00019482780415956677, "loss": 1.0859, "step": 5874 }, { "epoch": 0.15085352604068872, "grad_norm": 0.796875, "learning_rate": 0.0001948263869446573, "loss": 1.06, "step": 5875 }, { "epoch": 0.15087920323661055, "grad_norm": 0.75390625, "learning_rate": 0.00019482496954076743, "loss": 1.0264, "step": 5876 }, { "epoch": 0.15090488043253236, "grad_norm": 0.8359375, "learning_rate": 0.00019482355194789996, "loss": 1.0576, "step": 5877 }, { "epoch": 0.1509305576284542, "grad_norm": 0.84375, "learning_rate": 0.00019482213416605773, "loss": 1.2093, "step": 5878 }, { "epoch": 0.150956234824376, "grad_norm": 0.7421875, "learning_rate": 0.0001948207161952435, "loss": 1.0242, "step": 5879 }, { "epoch": 0.15098191202029781, "grad_norm": 0.87890625, "learning_rate": 0.0001948192980354602, "loss": 1.1742, "step": 5880 }, { "epoch": 0.15100758921621965, "grad_norm": 0.859375, "learning_rate": 0.0001948178796867106, "loss": 1.1378, "step": 5881 }, { "epoch": 0.15103326641214146, "grad_norm": 0.8984375, "learning_rate": 0.0001948164611489975, "loss": 1.201, "step": 5882 }, { "epoch": 0.1510589436080633, "grad_norm": 0.8125, "learning_rate": 0.0001948150424223238, "loss": 1.3102, "step": 5883 }, { "epoch": 0.1510846208039851, "grad_norm": 0.8203125, "learning_rate": 0.00019481362350669225, "loss": 1.1804, "step": 5884 }, { "epoch": 0.1511102979999069, "grad_norm": 0.875, "learning_rate": 0.00019481220440210572, "loss": 1.3638, "step": 5885 }, { "epoch": 0.15113597519582875, "grad_norm": 0.85546875, "learning_rate": 0.00019481078510856704, "loss": 1.0594, "step": 5886 }, { "epoch": 0.15116165239175056, "grad_norm": 0.80078125, "learning_rate": 0.00019480936562607903, "loss": 1.0398, "step": 5887 }, { "epoch": 0.15118732958767236, "grad_norm": 0.83203125, "learning_rate": 0.0001948079459546445, "loss": 1.1567, "step": 5888 }, { "epoch": 0.1512130067835942, "grad_norm": 0.8359375, "learning_rate": 0.0001948065260942663, "loss": 1.1217, "step": 5889 }, { "epoch": 0.151238683979516, "grad_norm": 0.8046875, "learning_rate": 0.00019480510604494725, "loss": 0.9918, "step": 5890 }, { "epoch": 0.15126436117543784, "grad_norm": 0.9609375, "learning_rate": 0.00019480368580669023, "loss": 1.0968, "step": 5891 }, { "epoch": 0.15129003837135965, "grad_norm": 0.8125, "learning_rate": 0.000194802265379498, "loss": 1.0929, "step": 5892 }, { "epoch": 0.15131571556728146, "grad_norm": 0.84375, "learning_rate": 0.00019480084476337345, "loss": 1.0799, "step": 5893 }, { "epoch": 0.1513413927632033, "grad_norm": 0.76171875, "learning_rate": 0.00019479942395831933, "loss": 0.8989, "step": 5894 }, { "epoch": 0.1513670699591251, "grad_norm": 0.76953125, "learning_rate": 0.00019479800296433853, "loss": 1.1352, "step": 5895 }, { "epoch": 0.15139274715504694, "grad_norm": 0.8671875, "learning_rate": 0.00019479658178143388, "loss": 1.1677, "step": 5896 }, { "epoch": 0.15141842435096875, "grad_norm": 0.83203125, "learning_rate": 0.00019479516040960823, "loss": 1.0731, "step": 5897 }, { "epoch": 0.15144410154689056, "grad_norm": 0.875, "learning_rate": 0.00019479373884886435, "loss": 0.9052, "step": 5898 }, { "epoch": 0.1514697787428124, "grad_norm": 0.75390625, "learning_rate": 0.00019479231709920513, "loss": 0.9209, "step": 5899 }, { "epoch": 0.1514954559387342, "grad_norm": 0.8515625, "learning_rate": 0.0001947908951606334, "loss": 1.1112, "step": 5900 }, { "epoch": 0.15152113313465604, "grad_norm": 0.890625, "learning_rate": 0.00019478947303315196, "loss": 1.2, "step": 5901 }, { "epoch": 0.15154681033057785, "grad_norm": 0.75390625, "learning_rate": 0.00019478805071676365, "loss": 1.0994, "step": 5902 }, { "epoch": 0.15157248752649966, "grad_norm": 0.86328125, "learning_rate": 0.00019478662821147133, "loss": 1.1451, "step": 5903 }, { "epoch": 0.1515981647224215, "grad_norm": 0.875, "learning_rate": 0.00019478520551727784, "loss": 1.2253, "step": 5904 }, { "epoch": 0.1516238419183433, "grad_norm": 0.859375, "learning_rate": 0.00019478378263418597, "loss": 1.1253, "step": 5905 }, { "epoch": 0.15164951911426514, "grad_norm": 0.81640625, "learning_rate": 0.00019478235956219863, "loss": 1.0973, "step": 5906 }, { "epoch": 0.15167519631018694, "grad_norm": 0.83203125, "learning_rate": 0.00019478093630131852, "loss": 1.0235, "step": 5907 }, { "epoch": 0.15170087350610875, "grad_norm": 0.80078125, "learning_rate": 0.00019477951285154865, "loss": 1.0398, "step": 5908 }, { "epoch": 0.1517265507020306, "grad_norm": 0.7734375, "learning_rate": 0.00019477808921289174, "loss": 1.0783, "step": 5909 }, { "epoch": 0.1517522278979524, "grad_norm": 0.8046875, "learning_rate": 0.00019477666538535065, "loss": 1.2295, "step": 5910 }, { "epoch": 0.15177790509387423, "grad_norm": 0.82421875, "learning_rate": 0.00019477524136892824, "loss": 1.0956, "step": 5911 }, { "epoch": 0.15180358228979604, "grad_norm": 0.84375, "learning_rate": 0.00019477381716362735, "loss": 0.9847, "step": 5912 }, { "epoch": 0.15182925948571785, "grad_norm": 0.828125, "learning_rate": 0.0001947723927694508, "loss": 1.0965, "step": 5913 }, { "epoch": 0.15185493668163969, "grad_norm": 0.8203125, "learning_rate": 0.0001947709681864014, "loss": 1.1403, "step": 5914 }, { "epoch": 0.1518806138775615, "grad_norm": 0.90234375, "learning_rate": 0.00019476954341448202, "loss": 1.0677, "step": 5915 }, { "epoch": 0.15190629107348333, "grad_norm": 0.875, "learning_rate": 0.00019476811845369553, "loss": 1.1182, "step": 5916 }, { "epoch": 0.15193196826940514, "grad_norm": 0.8359375, "learning_rate": 0.0001947666933040447, "loss": 1.117, "step": 5917 }, { "epoch": 0.15195764546532695, "grad_norm": 0.91015625, "learning_rate": 0.00019476526796553248, "loss": 1.2004, "step": 5918 }, { "epoch": 0.15198332266124878, "grad_norm": 0.82421875, "learning_rate": 0.00019476384243816158, "loss": 1.2507, "step": 5919 }, { "epoch": 0.1520089998571706, "grad_norm": 0.84765625, "learning_rate": 0.00019476241672193494, "loss": 1.0971, "step": 5920 }, { "epoch": 0.15203467705309243, "grad_norm": 0.828125, "learning_rate": 0.00019476099081685533, "loss": 1.2621, "step": 5921 }, { "epoch": 0.15206035424901423, "grad_norm": 0.8828125, "learning_rate": 0.00019475956472292565, "loss": 1.1016, "step": 5922 }, { "epoch": 0.15208603144493604, "grad_norm": 0.85546875, "learning_rate": 0.00019475813844014873, "loss": 1.051, "step": 5923 }, { "epoch": 0.15211170864085788, "grad_norm": 0.80078125, "learning_rate": 0.00019475671196852735, "loss": 1.112, "step": 5924 }, { "epoch": 0.1521373858367797, "grad_norm": 0.84375, "learning_rate": 0.00019475528530806443, "loss": 1.0857, "step": 5925 }, { "epoch": 0.15216306303270152, "grad_norm": 0.87109375, "learning_rate": 0.0001947538584587628, "loss": 1.0712, "step": 5926 }, { "epoch": 0.15218874022862333, "grad_norm": 0.8359375, "learning_rate": 0.00019475243142062527, "loss": 1.0456, "step": 5927 }, { "epoch": 0.15221441742454514, "grad_norm": 0.8359375, "learning_rate": 0.0001947510041936547, "loss": 0.9992, "step": 5928 }, { "epoch": 0.15224009462046698, "grad_norm": 0.765625, "learning_rate": 0.00019474957677785394, "loss": 0.9702, "step": 5929 }, { "epoch": 0.15226577181638878, "grad_norm": 0.9296875, "learning_rate": 0.00019474814917322584, "loss": 1.168, "step": 5930 }, { "epoch": 0.15229144901231062, "grad_norm": 0.875, "learning_rate": 0.00019474672137977325, "loss": 1.0427, "step": 5931 }, { "epoch": 0.15231712620823243, "grad_norm": 0.80078125, "learning_rate": 0.000194745293397499, "loss": 1.1387, "step": 5932 }, { "epoch": 0.15234280340415424, "grad_norm": 0.80859375, "learning_rate": 0.00019474386522640592, "loss": 1.034, "step": 5933 }, { "epoch": 0.15236848060007607, "grad_norm": 0.796875, "learning_rate": 0.00019474243686649686, "loss": 1.1741, "step": 5934 }, { "epoch": 0.15239415779599788, "grad_norm": 0.80859375, "learning_rate": 0.0001947410083177747, "loss": 1.1665, "step": 5935 }, { "epoch": 0.15241983499191972, "grad_norm": 0.9296875, "learning_rate": 0.00019473957958024229, "loss": 1.0716, "step": 5936 }, { "epoch": 0.15244551218784153, "grad_norm": 0.84765625, "learning_rate": 0.00019473815065390243, "loss": 1.0451, "step": 5937 }, { "epoch": 0.15247118938376333, "grad_norm": 0.81640625, "learning_rate": 0.000194736721538758, "loss": 1.0837, "step": 5938 }, { "epoch": 0.15249686657968517, "grad_norm": 0.875, "learning_rate": 0.00019473529223481184, "loss": 1.1885, "step": 5939 }, { "epoch": 0.15252254377560698, "grad_norm": 0.80859375, "learning_rate": 0.00019473386274206683, "loss": 1.0767, "step": 5940 }, { "epoch": 0.15254822097152881, "grad_norm": 0.80078125, "learning_rate": 0.00019473243306052573, "loss": 1.0671, "step": 5941 }, { "epoch": 0.15257389816745062, "grad_norm": 0.875, "learning_rate": 0.00019473100319019148, "loss": 1.2469, "step": 5942 }, { "epoch": 0.15259957536337243, "grad_norm": 0.91015625, "learning_rate": 0.0001947295731310669, "loss": 1.0783, "step": 5943 }, { "epoch": 0.15262525255929427, "grad_norm": 0.76953125, "learning_rate": 0.00019472814288315485, "loss": 1.0396, "step": 5944 }, { "epoch": 0.15265092975521607, "grad_norm": 0.921875, "learning_rate": 0.00019472671244645816, "loss": 1.1789, "step": 5945 }, { "epoch": 0.1526766069511379, "grad_norm": 1.0546875, "learning_rate": 0.00019472528182097967, "loss": 1.1704, "step": 5946 }, { "epoch": 0.15270228414705972, "grad_norm": 0.82421875, "learning_rate": 0.00019472385100672223, "loss": 1.0395, "step": 5947 }, { "epoch": 0.15272796134298153, "grad_norm": 0.80859375, "learning_rate": 0.00019472242000368874, "loss": 1.025, "step": 5948 }, { "epoch": 0.15275363853890336, "grad_norm": 0.8125, "learning_rate": 0.00019472098881188204, "loss": 1.0492, "step": 5949 }, { "epoch": 0.15277931573482517, "grad_norm": 0.8515625, "learning_rate": 0.00019471955743130494, "loss": 0.975, "step": 5950 }, { "epoch": 0.152804992930747, "grad_norm": 0.83203125, "learning_rate": 0.00019471812586196034, "loss": 1.0289, "step": 5951 }, { "epoch": 0.15283067012666882, "grad_norm": 0.82421875, "learning_rate": 0.00019471669410385105, "loss": 0.9706, "step": 5952 }, { "epoch": 0.15285634732259062, "grad_norm": 0.80078125, "learning_rate": 0.00019471526215697997, "loss": 1.1338, "step": 5953 }, { "epoch": 0.15288202451851246, "grad_norm": 0.7890625, "learning_rate": 0.00019471383002134991, "loss": 1.0879, "step": 5954 }, { "epoch": 0.15290770171443427, "grad_norm": 0.78125, "learning_rate": 0.00019471239769696376, "loss": 1.0088, "step": 5955 }, { "epoch": 0.1529333789103561, "grad_norm": 0.78125, "learning_rate": 0.00019471096518382431, "loss": 1.0458, "step": 5956 }, { "epoch": 0.1529590561062779, "grad_norm": 0.875, "learning_rate": 0.00019470953248193452, "loss": 1.048, "step": 5957 }, { "epoch": 0.15298473330219972, "grad_norm": 0.765625, "learning_rate": 0.00019470809959129716, "loss": 0.9338, "step": 5958 }, { "epoch": 0.15301041049812156, "grad_norm": 0.82421875, "learning_rate": 0.00019470666651191508, "loss": 1.0274, "step": 5959 }, { "epoch": 0.15303608769404337, "grad_norm": 0.7890625, "learning_rate": 0.0001947052332437912, "loss": 0.9167, "step": 5960 }, { "epoch": 0.1530617648899652, "grad_norm": 0.796875, "learning_rate": 0.00019470379978692837, "loss": 1.0083, "step": 5961 }, { "epoch": 0.153087442085887, "grad_norm": 0.79296875, "learning_rate": 0.00019470236614132938, "loss": 1.0598, "step": 5962 }, { "epoch": 0.15311311928180882, "grad_norm": 0.81640625, "learning_rate": 0.00019470093230699716, "loss": 1.0752, "step": 5963 }, { "epoch": 0.15313879647773065, "grad_norm": 0.77734375, "learning_rate": 0.0001946994982839345, "loss": 1.0394, "step": 5964 }, { "epoch": 0.15316447367365246, "grad_norm": 0.80859375, "learning_rate": 0.00019469806407214433, "loss": 0.9591, "step": 5965 }, { "epoch": 0.1531901508695743, "grad_norm": 0.8203125, "learning_rate": 0.00019469662967162946, "loss": 1.0596, "step": 5966 }, { "epoch": 0.1532158280654961, "grad_norm": 0.9296875, "learning_rate": 0.00019469519508239275, "loss": 0.9725, "step": 5967 }, { "epoch": 0.15324150526141792, "grad_norm": 0.85546875, "learning_rate": 0.00019469376030443707, "loss": 1.1805, "step": 5968 }, { "epoch": 0.15326718245733975, "grad_norm": 0.8984375, "learning_rate": 0.00019469232533776533, "loss": 1.21, "step": 5969 }, { "epoch": 0.15329285965326156, "grad_norm": 0.890625, "learning_rate": 0.00019469089018238028, "loss": 1.1231, "step": 5970 }, { "epoch": 0.1533185368491834, "grad_norm": 0.80859375, "learning_rate": 0.00019468945483828485, "loss": 1.187, "step": 5971 }, { "epoch": 0.1533442140451052, "grad_norm": 0.8125, "learning_rate": 0.0001946880193054819, "loss": 1.0387, "step": 5972 }, { "epoch": 0.153369891241027, "grad_norm": 0.80078125, "learning_rate": 0.00019468658358397426, "loss": 1.0667, "step": 5973 }, { "epoch": 0.15339556843694885, "grad_norm": 0.83984375, "learning_rate": 0.00019468514767376482, "loss": 1.0722, "step": 5974 }, { "epoch": 0.15342124563287066, "grad_norm": 0.83984375, "learning_rate": 0.00019468371157485646, "loss": 0.9845, "step": 5975 }, { "epoch": 0.1534469228287925, "grad_norm": 0.8515625, "learning_rate": 0.000194682275287252, "loss": 1.0668, "step": 5976 }, { "epoch": 0.1534726000247143, "grad_norm": 0.94140625, "learning_rate": 0.00019468083881095432, "loss": 1.0804, "step": 5977 }, { "epoch": 0.1534982772206361, "grad_norm": 0.71875, "learning_rate": 0.00019467940214596628, "loss": 0.9507, "step": 5978 }, { "epoch": 0.15352395441655795, "grad_norm": 0.8515625, "learning_rate": 0.00019467796529229071, "loss": 1.03, "step": 5979 }, { "epoch": 0.15354963161247975, "grad_norm": 0.82421875, "learning_rate": 0.00019467652824993055, "loss": 1.0918, "step": 5980 }, { "epoch": 0.1535753088084016, "grad_norm": 0.8515625, "learning_rate": 0.00019467509101888864, "loss": 1.1139, "step": 5981 }, { "epoch": 0.1536009860043234, "grad_norm": 0.7890625, "learning_rate": 0.0001946736535991678, "loss": 1.2347, "step": 5982 }, { "epoch": 0.1536266632002452, "grad_norm": 0.828125, "learning_rate": 0.00019467221599077088, "loss": 1.1614, "step": 5983 }, { "epoch": 0.15365234039616704, "grad_norm": 0.86328125, "learning_rate": 0.00019467077819370084, "loss": 1.1518, "step": 5984 }, { "epoch": 0.15367801759208885, "grad_norm": 0.87890625, "learning_rate": 0.00019466934020796045, "loss": 1.0268, "step": 5985 }, { "epoch": 0.1537036947880107, "grad_norm": 0.8359375, "learning_rate": 0.00019466790203355265, "loss": 1.0554, "step": 5986 }, { "epoch": 0.1537293719839325, "grad_norm": 0.80859375, "learning_rate": 0.00019466646367048023, "loss": 1.0793, "step": 5987 }, { "epoch": 0.1537550491798543, "grad_norm": 0.8046875, "learning_rate": 0.00019466502511874615, "loss": 1.046, "step": 5988 }, { "epoch": 0.15378072637577614, "grad_norm": 0.828125, "learning_rate": 0.0001946635863783532, "loss": 0.9658, "step": 5989 }, { "epoch": 0.15380640357169795, "grad_norm": 0.859375, "learning_rate": 0.0001946621474493043, "loss": 1.0275, "step": 5990 }, { "epoch": 0.15383208076761978, "grad_norm": 0.828125, "learning_rate": 0.00019466070833160226, "loss": 1.1074, "step": 5991 }, { "epoch": 0.1538577579635416, "grad_norm": 0.80859375, "learning_rate": 0.00019465926902525001, "loss": 1.1391, "step": 5992 }, { "epoch": 0.1538834351594634, "grad_norm": 0.82421875, "learning_rate": 0.00019465782953025036, "loss": 1.1955, "step": 5993 }, { "epoch": 0.15390911235538524, "grad_norm": 0.86328125, "learning_rate": 0.0001946563898466062, "loss": 1.1746, "step": 5994 }, { "epoch": 0.15393478955130704, "grad_norm": 0.84765625, "learning_rate": 0.00019465494997432042, "loss": 1.0656, "step": 5995 }, { "epoch": 0.15396046674722888, "grad_norm": 0.8671875, "learning_rate": 0.00019465350991339586, "loss": 1.1727, "step": 5996 }, { "epoch": 0.1539861439431507, "grad_norm": 0.8515625, "learning_rate": 0.00019465206966383543, "loss": 1.0244, "step": 5997 }, { "epoch": 0.1540118211390725, "grad_norm": 0.8828125, "learning_rate": 0.00019465062922564196, "loss": 1.0508, "step": 5998 }, { "epoch": 0.15403749833499433, "grad_norm": 0.81640625, "learning_rate": 0.00019464918859881834, "loss": 1.0789, "step": 5999 }, { "epoch": 0.15406317553091614, "grad_norm": 0.8046875, "learning_rate": 0.00019464774778336742, "loss": 1.2009, "step": 6000 }, { "epoch": 0.15406317553091614, "eval_loss": 1.0851809978485107, "eval_model_preparation_time": 0.0065, "eval_runtime": 404.3626, "eval_samples_per_second": 24.73, "eval_steps_per_second": 0.774, "step": 6000 }, { "epoch": 0.15408885272683798, "grad_norm": 0.828125, "learning_rate": 0.0001946463067792921, "loss": 0.9998, "step": 6001 }, { "epoch": 0.15411452992275979, "grad_norm": 0.80078125, "learning_rate": 0.00019464486558659524, "loss": 1.0888, "step": 6002 }, { "epoch": 0.1541402071186816, "grad_norm": 0.8515625, "learning_rate": 0.00019464342420527972, "loss": 1.1131, "step": 6003 }, { "epoch": 0.15416588431460343, "grad_norm": 0.86328125, "learning_rate": 0.0001946419826353484, "loss": 1.1634, "step": 6004 }, { "epoch": 0.15419156151052524, "grad_norm": 0.8203125, "learning_rate": 0.00019464054087680417, "loss": 1.0768, "step": 6005 }, { "epoch": 0.15421723870644707, "grad_norm": 0.83984375, "learning_rate": 0.00019463909892964988, "loss": 1.2743, "step": 6006 }, { "epoch": 0.15424291590236888, "grad_norm": 0.83984375, "learning_rate": 0.0001946376567938884, "loss": 1.1455, "step": 6007 }, { "epoch": 0.1542685930982907, "grad_norm": 0.8125, "learning_rate": 0.00019463621446952264, "loss": 1.1447, "step": 6008 }, { "epoch": 0.15429427029421253, "grad_norm": 0.8359375, "learning_rate": 0.0001946347719565554, "loss": 1.0407, "step": 6009 }, { "epoch": 0.15431994749013433, "grad_norm": 0.875, "learning_rate": 0.0001946333292549897, "loss": 1.0727, "step": 6010 }, { "epoch": 0.15434562468605617, "grad_norm": 0.828125, "learning_rate": 0.00019463188636482826, "loss": 1.0912, "step": 6011 }, { "epoch": 0.15437130188197798, "grad_norm": 0.796875, "learning_rate": 0.00019463044328607402, "loss": 1.2039, "step": 6012 }, { "epoch": 0.1543969790778998, "grad_norm": 0.79296875, "learning_rate": 0.00019462900001872987, "loss": 1.0206, "step": 6013 }, { "epoch": 0.15442265627382162, "grad_norm": 0.80078125, "learning_rate": 0.00019462755656279867, "loss": 1.1256, "step": 6014 }, { "epoch": 0.15444833346974343, "grad_norm": 0.875, "learning_rate": 0.0001946261129182833, "loss": 1.1931, "step": 6015 }, { "epoch": 0.15447401066566527, "grad_norm": 0.875, "learning_rate": 0.00019462466908518663, "loss": 1.0245, "step": 6016 }, { "epoch": 0.15449968786158708, "grad_norm": 0.84375, "learning_rate": 0.00019462322506351153, "loss": 0.9794, "step": 6017 }, { "epoch": 0.15452536505750888, "grad_norm": 0.83984375, "learning_rate": 0.00019462178085326092, "loss": 1.1259, "step": 6018 }, { "epoch": 0.15455104225343072, "grad_norm": 0.90234375, "learning_rate": 0.0001946203364544376, "loss": 1.2062, "step": 6019 }, { "epoch": 0.15457671944935253, "grad_norm": 0.859375, "learning_rate": 0.00019461889186704454, "loss": 1.0224, "step": 6020 }, { "epoch": 0.15460239664527436, "grad_norm": 0.82421875, "learning_rate": 0.00019461744709108457, "loss": 0.9961, "step": 6021 }, { "epoch": 0.15462807384119617, "grad_norm": 0.8671875, "learning_rate": 0.00019461600212656056, "loss": 1.0231, "step": 6022 }, { "epoch": 0.15465375103711798, "grad_norm": 0.859375, "learning_rate": 0.00019461455697347541, "loss": 0.9687, "step": 6023 }, { "epoch": 0.15467942823303982, "grad_norm": 0.8359375, "learning_rate": 0.000194613111631832, "loss": 1.0788, "step": 6024 }, { "epoch": 0.15470510542896163, "grad_norm": 0.83203125, "learning_rate": 0.00019461166610163319, "loss": 1.1093, "step": 6025 }, { "epoch": 0.15473078262488346, "grad_norm": 0.85546875, "learning_rate": 0.0001946102203828819, "loss": 1.0648, "step": 6026 }, { "epoch": 0.15475645982080527, "grad_norm": 0.83984375, "learning_rate": 0.00019460877447558097, "loss": 1.0379, "step": 6027 }, { "epoch": 0.15478213701672708, "grad_norm": 0.8125, "learning_rate": 0.0001946073283797333, "loss": 1.0047, "step": 6028 }, { "epoch": 0.15480781421264891, "grad_norm": 0.83984375, "learning_rate": 0.00019460588209534177, "loss": 1.1892, "step": 6029 }, { "epoch": 0.15483349140857072, "grad_norm": 0.9296875, "learning_rate": 0.00019460443562240926, "loss": 1.1795, "step": 6030 }, { "epoch": 0.15485916860449256, "grad_norm": 0.796875, "learning_rate": 0.00019460298896093866, "loss": 1.0261, "step": 6031 }, { "epoch": 0.15488484580041437, "grad_norm": 0.80078125, "learning_rate": 0.00019460154211093283, "loss": 1.1103, "step": 6032 }, { "epoch": 0.15491052299633618, "grad_norm": 0.81640625, "learning_rate": 0.0001946000950723947, "loss": 0.9444, "step": 6033 }, { "epoch": 0.154936200192258, "grad_norm": 0.75, "learning_rate": 0.00019459864784532714, "loss": 0.783, "step": 6034 }, { "epoch": 0.15496187738817982, "grad_norm": 0.77734375, "learning_rate": 0.000194597200429733, "loss": 1.1611, "step": 6035 }, { "epoch": 0.15498755458410166, "grad_norm": 0.78125, "learning_rate": 0.00019459575282561518, "loss": 1.1165, "step": 6036 }, { "epoch": 0.15501323178002346, "grad_norm": 0.76953125, "learning_rate": 0.00019459430503297655, "loss": 1.0885, "step": 6037 }, { "epoch": 0.15503890897594527, "grad_norm": 0.8203125, "learning_rate": 0.00019459285705182003, "loss": 1.1182, "step": 6038 }, { "epoch": 0.1550645861718671, "grad_norm": 0.7734375, "learning_rate": 0.0001945914088821485, "loss": 0.9116, "step": 6039 }, { "epoch": 0.15509026336778892, "grad_norm": 0.8046875, "learning_rate": 0.00019458996052396484, "loss": 1.0503, "step": 6040 }, { "epoch": 0.15511594056371075, "grad_norm": 0.83984375, "learning_rate": 0.0001945885119772719, "loss": 1.1879, "step": 6041 }, { "epoch": 0.15514161775963256, "grad_norm": 0.90625, "learning_rate": 0.00019458706324207263, "loss": 1.1278, "step": 6042 }, { "epoch": 0.15516729495555437, "grad_norm": 0.85546875, "learning_rate": 0.00019458561431836988, "loss": 1.0797, "step": 6043 }, { "epoch": 0.1551929721514762, "grad_norm": 0.78125, "learning_rate": 0.00019458416520616657, "loss": 1.0581, "step": 6044 }, { "epoch": 0.155218649347398, "grad_norm": 0.86328125, "learning_rate": 0.00019458271590546553, "loss": 1.0967, "step": 6045 }, { "epoch": 0.15524432654331985, "grad_norm": 0.7421875, "learning_rate": 0.00019458126641626967, "loss": 1.0004, "step": 6046 }, { "epoch": 0.15527000373924166, "grad_norm": 0.828125, "learning_rate": 0.0001945798167385819, "loss": 1.1525, "step": 6047 }, { "epoch": 0.15529568093516347, "grad_norm": 0.92578125, "learning_rate": 0.00019457836687240514, "loss": 1.1435, "step": 6048 }, { "epoch": 0.1553213581310853, "grad_norm": 0.8046875, "learning_rate": 0.00019457691681774218, "loss": 1.0954, "step": 6049 }, { "epoch": 0.1553470353270071, "grad_norm": 0.88671875, "learning_rate": 0.000194575466574596, "loss": 0.9812, "step": 6050 }, { "epoch": 0.15537271252292895, "grad_norm": 0.84375, "learning_rate": 0.00019457401614296943, "loss": 1.1133, "step": 6051 }, { "epoch": 0.15539838971885075, "grad_norm": 0.8515625, "learning_rate": 0.00019457256552286544, "loss": 1.1167, "step": 6052 }, { "epoch": 0.15542406691477256, "grad_norm": 0.8515625, "learning_rate": 0.00019457111471428683, "loss": 1.1115, "step": 6053 }, { "epoch": 0.1554497441106944, "grad_norm": 0.85546875, "learning_rate": 0.00019456966371723654, "loss": 1.0732, "step": 6054 }, { "epoch": 0.1554754213066162, "grad_norm": 0.83203125, "learning_rate": 0.00019456821253171746, "loss": 1.1798, "step": 6055 }, { "epoch": 0.15550109850253804, "grad_norm": 0.85546875, "learning_rate": 0.00019456676115773246, "loss": 1.0509, "step": 6056 }, { "epoch": 0.15552677569845985, "grad_norm": 0.77734375, "learning_rate": 0.00019456530959528445, "loss": 1.0832, "step": 6057 }, { "epoch": 0.15555245289438166, "grad_norm": 0.890625, "learning_rate": 0.00019456385784437635, "loss": 1.2718, "step": 6058 }, { "epoch": 0.1555781300903035, "grad_norm": 0.8046875, "learning_rate": 0.00019456240590501098, "loss": 0.9842, "step": 6059 }, { "epoch": 0.1556038072862253, "grad_norm": 0.83984375, "learning_rate": 0.0001945609537771913, "loss": 1.1404, "step": 6060 }, { "epoch": 0.15562948448214714, "grad_norm": 0.79296875, "learning_rate": 0.00019455950146092017, "loss": 1.0205, "step": 6061 }, { "epoch": 0.15565516167806895, "grad_norm": 0.8515625, "learning_rate": 0.00019455804895620048, "loss": 1.1504, "step": 6062 }, { "epoch": 0.15568083887399076, "grad_norm": 0.796875, "learning_rate": 0.00019455659626303517, "loss": 1.035, "step": 6063 }, { "epoch": 0.1557065160699126, "grad_norm": 0.90234375, "learning_rate": 0.0001945551433814271, "loss": 1.1498, "step": 6064 }, { "epoch": 0.1557321932658344, "grad_norm": 0.82421875, "learning_rate": 0.00019455369031137916, "loss": 1.1355, "step": 6065 }, { "epoch": 0.15575787046175624, "grad_norm": 0.91015625, "learning_rate": 0.00019455223705289426, "loss": 1.1004, "step": 6066 }, { "epoch": 0.15578354765767805, "grad_norm": 0.8359375, "learning_rate": 0.0001945507836059753, "loss": 1.0897, "step": 6067 }, { "epoch": 0.15580922485359985, "grad_norm": 0.94140625, "learning_rate": 0.00019454932997062517, "loss": 1.1273, "step": 6068 }, { "epoch": 0.1558349020495217, "grad_norm": 0.82421875, "learning_rate": 0.00019454787614684674, "loss": 1.0991, "step": 6069 }, { "epoch": 0.1558605792454435, "grad_norm": 0.80859375, "learning_rate": 0.00019454642213464295, "loss": 0.9566, "step": 6070 }, { "epoch": 0.15588625644136533, "grad_norm": 0.8359375, "learning_rate": 0.00019454496793401668, "loss": 1.0151, "step": 6071 }, { "epoch": 0.15591193363728714, "grad_norm": 0.84375, "learning_rate": 0.00019454351354497085, "loss": 1.1543, "step": 6072 }, { "epoch": 0.15593761083320895, "grad_norm": 0.8984375, "learning_rate": 0.00019454205896750833, "loss": 1.0971, "step": 6073 }, { "epoch": 0.1559632880291308, "grad_norm": 0.828125, "learning_rate": 0.000194540604201632, "loss": 1.1455, "step": 6074 }, { "epoch": 0.1559889652250526, "grad_norm": 0.76953125, "learning_rate": 0.00019453914924734482, "loss": 1.0705, "step": 6075 }, { "epoch": 0.15601464242097443, "grad_norm": 0.83203125, "learning_rate": 0.00019453769410464965, "loss": 1.2085, "step": 6076 }, { "epoch": 0.15604031961689624, "grad_norm": 0.8671875, "learning_rate": 0.00019453623877354936, "loss": 1.232, "step": 6077 }, { "epoch": 0.15606599681281805, "grad_norm": 0.81640625, "learning_rate": 0.0001945347832540469, "loss": 1.0325, "step": 6078 }, { "epoch": 0.15609167400873988, "grad_norm": 0.765625, "learning_rate": 0.00019453332754614517, "loss": 1.0719, "step": 6079 }, { "epoch": 0.1561173512046617, "grad_norm": 0.90234375, "learning_rate": 0.00019453187164984707, "loss": 1.15, "step": 6080 }, { "epoch": 0.15614302840058353, "grad_norm": 0.90625, "learning_rate": 0.00019453041556515544, "loss": 0.9665, "step": 6081 }, { "epoch": 0.15616870559650534, "grad_norm": 0.8515625, "learning_rate": 0.00019452895929207329, "loss": 1.0646, "step": 6082 }, { "epoch": 0.15619438279242714, "grad_norm": 0.81640625, "learning_rate": 0.0001945275028306034, "loss": 0.9959, "step": 6083 }, { "epoch": 0.15622005998834898, "grad_norm": 0.87890625, "learning_rate": 0.0001945260461807488, "loss": 1.0477, "step": 6084 }, { "epoch": 0.1562457371842708, "grad_norm": 0.78515625, "learning_rate": 0.00019452458934251227, "loss": 1.1142, "step": 6085 }, { "epoch": 0.15627141438019262, "grad_norm": 0.8984375, "learning_rate": 0.00019452313231589684, "loss": 1.0748, "step": 6086 }, { "epoch": 0.15629709157611443, "grad_norm": 0.8671875, "learning_rate": 0.0001945216751009053, "loss": 1.147, "step": 6087 }, { "epoch": 0.15632276877203624, "grad_norm": 0.9375, "learning_rate": 0.0001945202176975406, "loss": 1.0541, "step": 6088 }, { "epoch": 0.15634844596795808, "grad_norm": 0.83203125, "learning_rate": 0.00019451876010580566, "loss": 1.1023, "step": 6089 }, { "epoch": 0.15637412316387989, "grad_norm": 0.79296875, "learning_rate": 0.00019451730232570333, "loss": 1.1736, "step": 6090 }, { "epoch": 0.15639980035980172, "grad_norm": 0.91015625, "learning_rate": 0.00019451584435723658, "loss": 1.2036, "step": 6091 }, { "epoch": 0.15642547755572353, "grad_norm": 0.8046875, "learning_rate": 0.0001945143862004083, "loss": 1.0188, "step": 6092 }, { "epoch": 0.15645115475164534, "grad_norm": 0.8125, "learning_rate": 0.0001945129278552214, "loss": 1.007, "step": 6093 }, { "epoch": 0.15647683194756717, "grad_norm": 0.87890625, "learning_rate": 0.00019451146932167874, "loss": 1.1962, "step": 6094 }, { "epoch": 0.15650250914348898, "grad_norm": 0.828125, "learning_rate": 0.00019451001059978324, "loss": 1.1459, "step": 6095 }, { "epoch": 0.1565281863394108, "grad_norm": 0.83984375, "learning_rate": 0.00019450855168953789, "loss": 0.9963, "step": 6096 }, { "epoch": 0.15655386353533263, "grad_norm": 0.859375, "learning_rate": 0.00019450709259094548, "loss": 1.1184, "step": 6097 }, { "epoch": 0.15657954073125444, "grad_norm": 0.75, "learning_rate": 0.000194505633304009, "loss": 1.0024, "step": 6098 }, { "epoch": 0.15660521792717627, "grad_norm": 0.8515625, "learning_rate": 0.0001945041738287313, "loss": 0.9962, "step": 6099 }, { "epoch": 0.15663089512309808, "grad_norm": 0.87109375, "learning_rate": 0.00019450271416511535, "loss": 1.2535, "step": 6100 }, { "epoch": 0.1566565723190199, "grad_norm": 0.8125, "learning_rate": 0.000194501254313164, "loss": 1.0959, "step": 6101 }, { "epoch": 0.15668224951494172, "grad_norm": 0.78125, "learning_rate": 0.0001944997942728802, "loss": 0.9177, "step": 6102 }, { "epoch": 0.15670792671086353, "grad_norm": 0.8359375, "learning_rate": 0.00019449833404426684, "loss": 1.0809, "step": 6103 }, { "epoch": 0.15673360390678537, "grad_norm": 0.8515625, "learning_rate": 0.00019449687362732684, "loss": 1.1151, "step": 6104 }, { "epoch": 0.15675928110270718, "grad_norm": 0.8828125, "learning_rate": 0.00019449541302206312, "loss": 0.9837, "step": 6105 }, { "epoch": 0.15678495829862898, "grad_norm": 0.84375, "learning_rate": 0.00019449395222847859, "loss": 1.1523, "step": 6106 }, { "epoch": 0.15681063549455082, "grad_norm": 0.7890625, "learning_rate": 0.00019449249124657613, "loss": 1.2253, "step": 6107 }, { "epoch": 0.15683631269047263, "grad_norm": 0.8203125, "learning_rate": 0.00019449103007635865, "loss": 1.0471, "step": 6108 }, { "epoch": 0.15686198988639447, "grad_norm": 0.73828125, "learning_rate": 0.0001944895687178291, "loss": 1.2108, "step": 6109 }, { "epoch": 0.15688766708231627, "grad_norm": 0.80859375, "learning_rate": 0.00019448810717099038, "loss": 1.0581, "step": 6110 }, { "epoch": 0.15691334427823808, "grad_norm": 0.84375, "learning_rate": 0.00019448664543584538, "loss": 1.0152, "step": 6111 }, { "epoch": 0.15693902147415992, "grad_norm": 0.7734375, "learning_rate": 0.00019448518351239703, "loss": 1.0241, "step": 6112 }, { "epoch": 0.15696469867008173, "grad_norm": 0.9453125, "learning_rate": 0.00019448372140064824, "loss": 1.315, "step": 6113 }, { "epoch": 0.15699037586600356, "grad_norm": 0.84375, "learning_rate": 0.00019448225910060195, "loss": 1.1319, "step": 6114 }, { "epoch": 0.15701605306192537, "grad_norm": 0.86328125, "learning_rate": 0.0001944807966122611, "loss": 1.0867, "step": 6115 }, { "epoch": 0.15704173025784718, "grad_norm": 0.8125, "learning_rate": 0.00019447933393562848, "loss": 1.041, "step": 6116 }, { "epoch": 0.15706740745376901, "grad_norm": 0.83984375, "learning_rate": 0.0001944778710707071, "loss": 1.0734, "step": 6117 }, { "epoch": 0.15709308464969082, "grad_norm": 0.81640625, "learning_rate": 0.00019447640801749985, "loss": 1.0504, "step": 6118 }, { "epoch": 0.15711876184561266, "grad_norm": 0.8125, "learning_rate": 0.00019447494477600966, "loss": 1.1174, "step": 6119 }, { "epoch": 0.15714443904153447, "grad_norm": 0.796875, "learning_rate": 0.00019447348134623944, "loss": 1.1113, "step": 6120 }, { "epoch": 0.15717011623745628, "grad_norm": 0.828125, "learning_rate": 0.00019447201772819213, "loss": 1.0023, "step": 6121 }, { "epoch": 0.1571957934333781, "grad_norm": 0.79296875, "learning_rate": 0.00019447055392187058, "loss": 0.995, "step": 6122 }, { "epoch": 0.15722147062929992, "grad_norm": 0.8359375, "learning_rate": 0.00019446908992727777, "loss": 1.1349, "step": 6123 }, { "epoch": 0.15724714782522176, "grad_norm": 0.82421875, "learning_rate": 0.0001944676257444166, "loss": 1.0351, "step": 6124 }, { "epoch": 0.15727282502114356, "grad_norm": 0.92578125, "learning_rate": 0.00019446616137329, "loss": 1.08, "step": 6125 }, { "epoch": 0.15729850221706537, "grad_norm": 0.8828125, "learning_rate": 0.00019446469681390084, "loss": 1.1271, "step": 6126 }, { "epoch": 0.1573241794129872, "grad_norm": 0.8359375, "learning_rate": 0.0001944632320662521, "loss": 0.9418, "step": 6127 }, { "epoch": 0.15734985660890902, "grad_norm": 0.88671875, "learning_rate": 0.00019446176713034668, "loss": 1.2227, "step": 6128 }, { "epoch": 0.15737553380483085, "grad_norm": 0.85546875, "learning_rate": 0.00019446030200618742, "loss": 1.0116, "step": 6129 }, { "epoch": 0.15740121100075266, "grad_norm": 0.84375, "learning_rate": 0.0001944588366937774, "loss": 1.1094, "step": 6130 }, { "epoch": 0.15742688819667447, "grad_norm": 0.8359375, "learning_rate": 0.00019445737119311939, "loss": 1.0561, "step": 6131 }, { "epoch": 0.1574525653925963, "grad_norm": 0.81640625, "learning_rate": 0.00019445590550421643, "loss": 1.0, "step": 6132 }, { "epoch": 0.1574782425885181, "grad_norm": 0.81640625, "learning_rate": 0.00019445443962707134, "loss": 1.0785, "step": 6133 }, { "epoch": 0.15750391978443995, "grad_norm": 0.83203125, "learning_rate": 0.0001944529735616871, "loss": 1.2712, "step": 6134 }, { "epoch": 0.15752959698036176, "grad_norm": 0.80078125, "learning_rate": 0.00019445150730806657, "loss": 1.0988, "step": 6135 }, { "epoch": 0.15755527417628357, "grad_norm": 0.8125, "learning_rate": 0.00019445004086621274, "loss": 1.0208, "step": 6136 }, { "epoch": 0.1575809513722054, "grad_norm": 0.91015625, "learning_rate": 0.00019444857423612854, "loss": 1.1941, "step": 6137 }, { "epoch": 0.1576066285681272, "grad_norm": 0.8125, "learning_rate": 0.00019444710741781687, "loss": 1.0027, "step": 6138 }, { "epoch": 0.15763230576404905, "grad_norm": 0.8203125, "learning_rate": 0.0001944456404112806, "loss": 0.9189, "step": 6139 }, { "epoch": 0.15765798295997085, "grad_norm": 0.87109375, "learning_rate": 0.00019444417321652272, "loss": 1.0851, "step": 6140 }, { "epoch": 0.15768366015589266, "grad_norm": 0.7578125, "learning_rate": 0.00019444270583354614, "loss": 0.9577, "step": 6141 }, { "epoch": 0.1577093373518145, "grad_norm": 0.78125, "learning_rate": 0.00019444123826235375, "loss": 1.1332, "step": 6142 }, { "epoch": 0.1577350145477363, "grad_norm": 0.7734375, "learning_rate": 0.00019443977050294855, "loss": 1.003, "step": 6143 }, { "epoch": 0.15776069174365814, "grad_norm": 0.81640625, "learning_rate": 0.0001944383025553334, "loss": 1.1188, "step": 6144 }, { "epoch": 0.15778636893957995, "grad_norm": 0.8359375, "learning_rate": 0.00019443683441951125, "loss": 1.0885, "step": 6145 }, { "epoch": 0.15781204613550176, "grad_norm": 0.78515625, "learning_rate": 0.000194435366095485, "loss": 1.1755, "step": 6146 }, { "epoch": 0.1578377233314236, "grad_norm": 0.80078125, "learning_rate": 0.0001944338975832576, "loss": 1.0333, "step": 6147 }, { "epoch": 0.1578634005273454, "grad_norm": 0.84375, "learning_rate": 0.00019443242888283197, "loss": 1.0598, "step": 6148 }, { "epoch": 0.15788907772326724, "grad_norm": 0.80078125, "learning_rate": 0.00019443095999421104, "loss": 1.1494, "step": 6149 }, { "epoch": 0.15791475491918905, "grad_norm": 0.87890625, "learning_rate": 0.00019442949091739778, "loss": 1.1391, "step": 6150 }, { "epoch": 0.15794043211511086, "grad_norm": 0.80859375, "learning_rate": 0.000194428021652395, "loss": 0.962, "step": 6151 }, { "epoch": 0.1579661093110327, "grad_norm": 0.828125, "learning_rate": 0.00019442655219920573, "loss": 1.2519, "step": 6152 }, { "epoch": 0.1579917865069545, "grad_norm": 0.765625, "learning_rate": 0.0001944250825578329, "loss": 1.088, "step": 6153 }, { "epoch": 0.15801746370287634, "grad_norm": 0.85546875, "learning_rate": 0.00019442361272827937, "loss": 1.1046, "step": 6154 }, { "epoch": 0.15804314089879815, "grad_norm": 0.81640625, "learning_rate": 0.00019442214271054816, "loss": 1.1516, "step": 6155 }, { "epoch": 0.15806881809471995, "grad_norm": 0.87109375, "learning_rate": 0.00019442067250464211, "loss": 1.0381, "step": 6156 }, { "epoch": 0.1580944952906418, "grad_norm": 0.76953125, "learning_rate": 0.0001944192021105642, "loss": 1.06, "step": 6157 }, { "epoch": 0.1581201724865636, "grad_norm": 0.83203125, "learning_rate": 0.00019441773152831734, "loss": 1.0494, "step": 6158 }, { "epoch": 0.15814584968248543, "grad_norm": 0.8359375, "learning_rate": 0.0001944162607579045, "loss": 1.1321, "step": 6159 }, { "epoch": 0.15817152687840724, "grad_norm": 0.87109375, "learning_rate": 0.00019441478979932852, "loss": 1.0787, "step": 6160 }, { "epoch": 0.15819720407432905, "grad_norm": 0.87109375, "learning_rate": 0.00019441331865259246, "loss": 1.0384, "step": 6161 }, { "epoch": 0.1582228812702509, "grad_norm": 0.8359375, "learning_rate": 0.00019441184731769915, "loss": 1.0395, "step": 6162 }, { "epoch": 0.1582485584661727, "grad_norm": 0.83984375, "learning_rate": 0.00019441037579465156, "loss": 1.11, "step": 6163 }, { "epoch": 0.15827423566209453, "grad_norm": 0.8515625, "learning_rate": 0.0001944089040834526, "loss": 1.0986, "step": 6164 }, { "epoch": 0.15829991285801634, "grad_norm": 0.81640625, "learning_rate": 0.00019440743218410525, "loss": 1.0757, "step": 6165 }, { "epoch": 0.15832559005393815, "grad_norm": 0.83203125, "learning_rate": 0.0001944059600966124, "loss": 1.0748, "step": 6166 }, { "epoch": 0.15835126724985998, "grad_norm": 0.8203125, "learning_rate": 0.00019440448782097702, "loss": 1.1537, "step": 6167 }, { "epoch": 0.1583769444457818, "grad_norm": 0.80859375, "learning_rate": 0.000194403015357202, "loss": 1.1438, "step": 6168 }, { "epoch": 0.15840262164170363, "grad_norm": 0.78515625, "learning_rate": 0.00019440154270529032, "loss": 1.028, "step": 6169 }, { "epoch": 0.15842829883762544, "grad_norm": 0.96875, "learning_rate": 0.00019440006986524486, "loss": 1.1326, "step": 6170 }, { "epoch": 0.15845397603354724, "grad_norm": 0.84375, "learning_rate": 0.0001943985968370686, "loss": 1.1927, "step": 6171 }, { "epoch": 0.15847965322946908, "grad_norm": 0.7890625, "learning_rate": 0.00019439712362076447, "loss": 0.9654, "step": 6172 }, { "epoch": 0.1585053304253909, "grad_norm": 0.84375, "learning_rate": 0.0001943956502163354, "loss": 1.0672, "step": 6173 }, { "epoch": 0.15853100762131273, "grad_norm": 0.828125, "learning_rate": 0.00019439417662378433, "loss": 1.1257, "step": 6174 }, { "epoch": 0.15855668481723453, "grad_norm": 0.8046875, "learning_rate": 0.00019439270284311419, "loss": 1.0827, "step": 6175 }, { "epoch": 0.15858236201315634, "grad_norm": 0.78515625, "learning_rate": 0.0001943912288743279, "loss": 1.1645, "step": 6176 }, { "epoch": 0.15860803920907818, "grad_norm": 0.828125, "learning_rate": 0.00019438975471742847, "loss": 1.1818, "step": 6177 }, { "epoch": 0.15863371640499999, "grad_norm": 0.8515625, "learning_rate": 0.00019438828037241873, "loss": 1.1555, "step": 6178 }, { "epoch": 0.15865939360092182, "grad_norm": 0.80859375, "learning_rate": 0.00019438680583930168, "loss": 1.0079, "step": 6179 }, { "epoch": 0.15868507079684363, "grad_norm": 0.828125, "learning_rate": 0.0001943853311180803, "loss": 1.0836, "step": 6180 }, { "epoch": 0.15871074799276544, "grad_norm": 0.7890625, "learning_rate": 0.00019438385620875744, "loss": 1.006, "step": 6181 }, { "epoch": 0.15873642518868727, "grad_norm": 0.828125, "learning_rate": 0.00019438238111133606, "loss": 1.083, "step": 6182 }, { "epoch": 0.15876210238460908, "grad_norm": 0.8359375, "learning_rate": 0.00019438090582581916, "loss": 0.9426, "step": 6183 }, { "epoch": 0.15878777958053092, "grad_norm": 0.8515625, "learning_rate": 0.00019437943035220966, "loss": 1.064, "step": 6184 }, { "epoch": 0.15881345677645273, "grad_norm": 0.79296875, "learning_rate": 0.0001943779546905104, "loss": 1.0255, "step": 6185 }, { "epoch": 0.15883913397237454, "grad_norm": 0.78515625, "learning_rate": 0.00019437647884072446, "loss": 0.9664, "step": 6186 }, { "epoch": 0.15886481116829637, "grad_norm": 0.88671875, "learning_rate": 0.00019437500280285473, "loss": 1.028, "step": 6187 }, { "epoch": 0.15889048836421818, "grad_norm": 0.80078125, "learning_rate": 0.00019437352657690414, "loss": 1.1007, "step": 6188 }, { "epoch": 0.15891616556014002, "grad_norm": 0.84375, "learning_rate": 0.0001943720501628756, "loss": 1.118, "step": 6189 }, { "epoch": 0.15894184275606182, "grad_norm": 0.7734375, "learning_rate": 0.00019437057356077212, "loss": 1.0241, "step": 6190 }, { "epoch": 0.15896751995198363, "grad_norm": 0.84765625, "learning_rate": 0.0001943690967705966, "loss": 1.1271, "step": 6191 }, { "epoch": 0.15899319714790547, "grad_norm": 0.91796875, "learning_rate": 0.000194367619792352, "loss": 1.1453, "step": 6192 }, { "epoch": 0.15901887434382728, "grad_norm": 0.8125, "learning_rate": 0.00019436614262604126, "loss": 1.1376, "step": 6193 }, { "epoch": 0.1590445515397491, "grad_norm": 0.83203125, "learning_rate": 0.0001943646652716673, "loss": 1.1731, "step": 6194 }, { "epoch": 0.15907022873567092, "grad_norm": 0.91796875, "learning_rate": 0.0001943631877292331, "loss": 1.0898, "step": 6195 }, { "epoch": 0.15909590593159273, "grad_norm": 0.828125, "learning_rate": 0.0001943617099987416, "loss": 1.1441, "step": 6196 }, { "epoch": 0.15912158312751457, "grad_norm": 0.8359375, "learning_rate": 0.00019436023208019572, "loss": 1.1081, "step": 6197 }, { "epoch": 0.15914726032343637, "grad_norm": 0.80078125, "learning_rate": 0.00019435875397359845, "loss": 1.1328, "step": 6198 }, { "epoch": 0.1591729375193582, "grad_norm": 0.7578125, "learning_rate": 0.00019435727567895268, "loss": 1.0016, "step": 6199 }, { "epoch": 0.15919861471528002, "grad_norm": 0.859375, "learning_rate": 0.00019435579719626137, "loss": 1.2292, "step": 6200 }, { "epoch": 0.15922429191120183, "grad_norm": 0.8515625, "learning_rate": 0.0001943543185255275, "loss": 1.2627, "step": 6201 }, { "epoch": 0.15924996910712366, "grad_norm": 0.8046875, "learning_rate": 0.000194352839666754, "loss": 0.9791, "step": 6202 }, { "epoch": 0.15927564630304547, "grad_norm": 0.83984375, "learning_rate": 0.00019435136061994382, "loss": 0.9924, "step": 6203 }, { "epoch": 0.1593013234989673, "grad_norm": 0.84375, "learning_rate": 0.00019434988138509985, "loss": 1.0551, "step": 6204 }, { "epoch": 0.15932700069488911, "grad_norm": 0.80078125, "learning_rate": 0.00019434840196222515, "loss": 1.1417, "step": 6205 }, { "epoch": 0.15935267789081092, "grad_norm": 0.77734375, "learning_rate": 0.00019434692235132257, "loss": 1.0267, "step": 6206 }, { "epoch": 0.15937835508673276, "grad_norm": 0.88671875, "learning_rate": 0.00019434544255239513, "loss": 1.0968, "step": 6207 }, { "epoch": 0.15940403228265457, "grad_norm": 0.81640625, "learning_rate": 0.00019434396256544573, "loss": 1.0393, "step": 6208 }, { "epoch": 0.1594297094785764, "grad_norm": 0.83203125, "learning_rate": 0.00019434248239047733, "loss": 1.0693, "step": 6209 }, { "epoch": 0.1594553866744982, "grad_norm": 0.8125, "learning_rate": 0.00019434100202749287, "loss": 1.0262, "step": 6210 }, { "epoch": 0.15948106387042002, "grad_norm": 0.90625, "learning_rate": 0.0001943395214764953, "loss": 1.1673, "step": 6211 }, { "epoch": 0.15950674106634186, "grad_norm": 0.84375, "learning_rate": 0.00019433804073748764, "loss": 1.0051, "step": 6212 }, { "epoch": 0.15953241826226366, "grad_norm": 0.85546875, "learning_rate": 0.00019433655981047278, "loss": 1.0838, "step": 6213 }, { "epoch": 0.1595580954581855, "grad_norm": 0.90234375, "learning_rate": 0.00019433507869545365, "loss": 1.084, "step": 6214 }, { "epoch": 0.1595837726541073, "grad_norm": 0.84765625, "learning_rate": 0.00019433359739243325, "loss": 0.9971, "step": 6215 }, { "epoch": 0.15960944985002912, "grad_norm": 0.828125, "learning_rate": 0.0001943321159014145, "loss": 1.1289, "step": 6216 }, { "epoch": 0.15963512704595095, "grad_norm": 0.95703125, "learning_rate": 0.00019433063422240036, "loss": 1.1298, "step": 6217 }, { "epoch": 0.15966080424187276, "grad_norm": 0.8203125, "learning_rate": 0.0001943291523553938, "loss": 1.133, "step": 6218 }, { "epoch": 0.1596864814377946, "grad_norm": 0.84765625, "learning_rate": 0.00019432767030039773, "loss": 1.2181, "step": 6219 }, { "epoch": 0.1597121586337164, "grad_norm": 0.83984375, "learning_rate": 0.00019432618805741516, "loss": 1.0111, "step": 6220 }, { "epoch": 0.15973783582963821, "grad_norm": 0.8515625, "learning_rate": 0.000194324705626449, "loss": 1.1041, "step": 6221 }, { "epoch": 0.15976351302556005, "grad_norm": 0.7890625, "learning_rate": 0.00019432322300750227, "loss": 1.1576, "step": 6222 }, { "epoch": 0.15978919022148186, "grad_norm": 0.828125, "learning_rate": 0.00019432174020057782, "loss": 1.0478, "step": 6223 }, { "epoch": 0.1598148674174037, "grad_norm": 0.71875, "learning_rate": 0.00019432025720567868, "loss": 1.1809, "step": 6224 }, { "epoch": 0.1598405446133255, "grad_norm": 0.7734375, "learning_rate": 0.0001943187740228078, "loss": 0.9984, "step": 6225 }, { "epoch": 0.1598662218092473, "grad_norm": 0.8203125, "learning_rate": 0.0001943172906519681, "loss": 0.9325, "step": 6226 }, { "epoch": 0.15989189900516915, "grad_norm": 0.8671875, "learning_rate": 0.0001943158070931626, "loss": 0.9927, "step": 6227 }, { "epoch": 0.15991757620109096, "grad_norm": 0.828125, "learning_rate": 0.00019431432334639416, "loss": 1.1364, "step": 6228 }, { "epoch": 0.1599432533970128, "grad_norm": 0.8828125, "learning_rate": 0.00019431283941166583, "loss": 1.0296, "step": 6229 }, { "epoch": 0.1599689305929346, "grad_norm": 0.8359375, "learning_rate": 0.00019431135528898052, "loss": 1.131, "step": 6230 }, { "epoch": 0.1599946077888564, "grad_norm": 0.85546875, "learning_rate": 0.00019430987097834122, "loss": 1.1306, "step": 6231 }, { "epoch": 0.16002028498477824, "grad_norm": 0.87109375, "learning_rate": 0.00019430838647975084, "loss": 1.0486, "step": 6232 }, { "epoch": 0.16004596218070005, "grad_norm": 0.84375, "learning_rate": 0.0001943069017932124, "loss": 1.1732, "step": 6233 }, { "epoch": 0.1600716393766219, "grad_norm": 0.796875, "learning_rate": 0.00019430541691872874, "loss": 0.9618, "step": 6234 }, { "epoch": 0.1600973165725437, "grad_norm": 0.87890625, "learning_rate": 0.00019430393185630298, "loss": 1.0072, "step": 6235 }, { "epoch": 0.1601229937684655, "grad_norm": 0.75, "learning_rate": 0.00019430244660593798, "loss": 1.0817, "step": 6236 }, { "epoch": 0.16014867096438734, "grad_norm": 0.734375, "learning_rate": 0.00019430096116763673, "loss": 1.1069, "step": 6237 }, { "epoch": 0.16017434816030915, "grad_norm": 0.78515625, "learning_rate": 0.00019429947554140217, "loss": 1.2286, "step": 6238 }, { "epoch": 0.16020002535623099, "grad_norm": 0.859375, "learning_rate": 0.00019429798972723727, "loss": 1.1716, "step": 6239 }, { "epoch": 0.1602257025521528, "grad_norm": 0.76953125, "learning_rate": 0.000194296503725145, "loss": 1.1649, "step": 6240 }, { "epoch": 0.1602513797480746, "grad_norm": 0.78515625, "learning_rate": 0.0001942950175351283, "loss": 0.9757, "step": 6241 }, { "epoch": 0.16027705694399644, "grad_norm": 0.828125, "learning_rate": 0.00019429353115719018, "loss": 1.0162, "step": 6242 }, { "epoch": 0.16030273413991825, "grad_norm": 0.828125, "learning_rate": 0.00019429204459133357, "loss": 1.254, "step": 6243 }, { "epoch": 0.16032841133584008, "grad_norm": 0.82421875, "learning_rate": 0.0001942905578375614, "loss": 1.0868, "step": 6244 }, { "epoch": 0.1603540885317619, "grad_norm": 0.8125, "learning_rate": 0.0001942890708958767, "loss": 1.0188, "step": 6245 }, { "epoch": 0.1603797657276837, "grad_norm": 0.8828125, "learning_rate": 0.00019428758376628238, "loss": 1.1057, "step": 6246 }, { "epoch": 0.16040544292360553, "grad_norm": 0.78515625, "learning_rate": 0.00019428609644878142, "loss": 0.98, "step": 6247 }, { "epoch": 0.16043112011952734, "grad_norm": 0.81640625, "learning_rate": 0.0001942846089433768, "loss": 1.0562, "step": 6248 }, { "epoch": 0.16045679731544918, "grad_norm": 0.83984375, "learning_rate": 0.00019428312125007144, "loss": 1.112, "step": 6249 }, { "epoch": 0.160482474511371, "grad_norm": 0.83984375, "learning_rate": 0.00019428163336886834, "loss": 1.0981, "step": 6250 }, { "epoch": 0.1605081517072928, "grad_norm": 0.7734375, "learning_rate": 0.0001942801452997705, "loss": 0.9996, "step": 6251 }, { "epoch": 0.16053382890321463, "grad_norm": 0.84765625, "learning_rate": 0.00019427865704278082, "loss": 1.0372, "step": 6252 }, { "epoch": 0.16055950609913644, "grad_norm": 0.8203125, "learning_rate": 0.0001942771685979023, "loss": 1.0138, "step": 6253 }, { "epoch": 0.16058518329505828, "grad_norm": 0.82421875, "learning_rate": 0.00019427567996513793, "loss": 1.1162, "step": 6254 }, { "epoch": 0.16061086049098008, "grad_norm": 0.87890625, "learning_rate": 0.0001942741911444906, "loss": 1.1036, "step": 6255 }, { "epoch": 0.1606365376869019, "grad_norm": 0.76953125, "learning_rate": 0.00019427270213596333, "loss": 0.994, "step": 6256 }, { "epoch": 0.16066221488282373, "grad_norm": 0.8046875, "learning_rate": 0.0001942712129395591, "loss": 0.9784, "step": 6257 }, { "epoch": 0.16068789207874554, "grad_norm": 0.84375, "learning_rate": 0.00019426972355528083, "loss": 1.107, "step": 6258 }, { "epoch": 0.16071356927466737, "grad_norm": 0.8515625, "learning_rate": 0.00019426823398313153, "loss": 1.0503, "step": 6259 }, { "epoch": 0.16073924647058918, "grad_norm": 0.80859375, "learning_rate": 0.00019426674422311414, "loss": 1.166, "step": 6260 }, { "epoch": 0.160764923666511, "grad_norm": 0.796875, "learning_rate": 0.00019426525427523167, "loss": 1.1391, "step": 6261 }, { "epoch": 0.16079060086243283, "grad_norm": 0.765625, "learning_rate": 0.00019426376413948706, "loss": 1.1458, "step": 6262 }, { "epoch": 0.16081627805835463, "grad_norm": 0.8203125, "learning_rate": 0.0001942622738158833, "loss": 1.0929, "step": 6263 }, { "epoch": 0.16084195525427647, "grad_norm": 0.8046875, "learning_rate": 0.0001942607833044233, "loss": 1.1892, "step": 6264 }, { "epoch": 0.16086763245019828, "grad_norm": 0.80078125, "learning_rate": 0.00019425929260511007, "loss": 1.2022, "step": 6265 }, { "epoch": 0.1608933096461201, "grad_norm": 0.78125, "learning_rate": 0.0001942578017179466, "loss": 1.0443, "step": 6266 }, { "epoch": 0.16091898684204192, "grad_norm": 0.83203125, "learning_rate": 0.00019425631064293585, "loss": 0.9666, "step": 6267 }, { "epoch": 0.16094466403796373, "grad_norm": 0.84375, "learning_rate": 0.0001942548193800808, "loss": 1.051, "step": 6268 }, { "epoch": 0.16097034123388557, "grad_norm": 0.73828125, "learning_rate": 0.0001942533279293844, "loss": 0.8988, "step": 6269 }, { "epoch": 0.16099601842980737, "grad_norm": 0.8984375, "learning_rate": 0.00019425183629084963, "loss": 1.0332, "step": 6270 }, { "epoch": 0.16102169562572918, "grad_norm": 0.77734375, "learning_rate": 0.00019425034446447946, "loss": 1.1481, "step": 6271 }, { "epoch": 0.16104737282165102, "grad_norm": 0.8515625, "learning_rate": 0.0001942488524502769, "loss": 1.109, "step": 6272 }, { "epoch": 0.16107305001757283, "grad_norm": 0.81640625, "learning_rate": 0.00019424736024824485, "loss": 1.0792, "step": 6273 }, { "epoch": 0.16109872721349466, "grad_norm": 0.91796875, "learning_rate": 0.00019424586785838632, "loss": 1.1161, "step": 6274 }, { "epoch": 0.16112440440941647, "grad_norm": 0.84765625, "learning_rate": 0.0001942443752807043, "loss": 0.976, "step": 6275 }, { "epoch": 0.16115008160533828, "grad_norm": 0.89453125, "learning_rate": 0.00019424288251520175, "loss": 1.1484, "step": 6276 }, { "epoch": 0.16117575880126012, "grad_norm": 0.8203125, "learning_rate": 0.00019424138956188167, "loss": 1.0702, "step": 6277 }, { "epoch": 0.16120143599718192, "grad_norm": 0.8125, "learning_rate": 0.00019423989642074698, "loss": 1.0562, "step": 6278 }, { "epoch": 0.16122711319310376, "grad_norm": 0.81640625, "learning_rate": 0.00019423840309180072, "loss": 1.0498, "step": 6279 }, { "epoch": 0.16125279038902557, "grad_norm": 0.7734375, "learning_rate": 0.00019423690957504582, "loss": 1.0373, "step": 6280 }, { "epoch": 0.16127846758494738, "grad_norm": 0.8203125, "learning_rate": 0.00019423541587048528, "loss": 1.1354, "step": 6281 }, { "epoch": 0.1613041447808692, "grad_norm": 0.84375, "learning_rate": 0.00019423392197812206, "loss": 1.1892, "step": 6282 }, { "epoch": 0.16132982197679102, "grad_norm": 0.83984375, "learning_rate": 0.00019423242789795914, "loss": 1.028, "step": 6283 }, { "epoch": 0.16135549917271286, "grad_norm": 0.796875, "learning_rate": 0.00019423093362999953, "loss": 0.9083, "step": 6284 }, { "epoch": 0.16138117636863467, "grad_norm": 0.81640625, "learning_rate": 0.00019422943917424618, "loss": 1.1995, "step": 6285 }, { "epoch": 0.16140685356455647, "grad_norm": 0.87890625, "learning_rate": 0.00019422794453070202, "loss": 1.2902, "step": 6286 }, { "epoch": 0.1614325307604783, "grad_norm": 0.81640625, "learning_rate": 0.00019422644969937012, "loss": 0.9506, "step": 6287 }, { "epoch": 0.16145820795640012, "grad_norm": 0.828125, "learning_rate": 0.0001942249546802534, "loss": 1.0282, "step": 6288 }, { "epoch": 0.16148388515232195, "grad_norm": 0.91015625, "learning_rate": 0.00019422345947335489, "loss": 1.037, "step": 6289 }, { "epoch": 0.16150956234824376, "grad_norm": 0.75, "learning_rate": 0.00019422196407867747, "loss": 1.1191, "step": 6290 }, { "epoch": 0.16153523954416557, "grad_norm": 0.88671875, "learning_rate": 0.00019422046849622423, "loss": 1.1482, "step": 6291 }, { "epoch": 0.1615609167400874, "grad_norm": 0.82421875, "learning_rate": 0.00019421897272599813, "loss": 1.0987, "step": 6292 }, { "epoch": 0.16158659393600922, "grad_norm": 1.6015625, "learning_rate": 0.00019421747676800209, "loss": 1.1847, "step": 6293 }, { "epoch": 0.16161227113193105, "grad_norm": 0.84765625, "learning_rate": 0.00019421598062223914, "loss": 1.1041, "step": 6294 }, { "epoch": 0.16163794832785286, "grad_norm": 0.83984375, "learning_rate": 0.00019421448428871225, "loss": 1.1547, "step": 6295 }, { "epoch": 0.16166362552377467, "grad_norm": 0.85546875, "learning_rate": 0.00019421298776742442, "loss": 1.1676, "step": 6296 }, { "epoch": 0.1616893027196965, "grad_norm": 0.77734375, "learning_rate": 0.0001942114910583786, "loss": 1.0102, "step": 6297 }, { "epoch": 0.1617149799156183, "grad_norm": 0.7734375, "learning_rate": 0.00019420999416157778, "loss": 0.9418, "step": 6298 }, { "epoch": 0.16174065711154015, "grad_norm": 0.78515625, "learning_rate": 0.00019420849707702496, "loss": 1.1251, "step": 6299 }, { "epoch": 0.16176633430746196, "grad_norm": 0.76953125, "learning_rate": 0.00019420699980472313, "loss": 1.083, "step": 6300 }, { "epoch": 0.16179201150338376, "grad_norm": 0.81640625, "learning_rate": 0.00019420550234467522, "loss": 0.9916, "step": 6301 }, { "epoch": 0.1618176886993056, "grad_norm": 0.8125, "learning_rate": 0.00019420400469688428, "loss": 1.1267, "step": 6302 }, { "epoch": 0.1618433658952274, "grad_norm": 0.84765625, "learning_rate": 0.00019420250686135327, "loss": 1.3763, "step": 6303 }, { "epoch": 0.16186904309114922, "grad_norm": 0.7890625, "learning_rate": 0.00019420100883808518, "loss": 0.9073, "step": 6304 }, { "epoch": 0.16189472028707105, "grad_norm": 0.7890625, "learning_rate": 0.00019419951062708298, "loss": 1.1359, "step": 6305 }, { "epoch": 0.16192039748299286, "grad_norm": 0.8125, "learning_rate": 0.00019419801222834965, "loss": 0.9859, "step": 6306 }, { "epoch": 0.1619460746789147, "grad_norm": 0.81640625, "learning_rate": 0.00019419651364188821, "loss": 1.0763, "step": 6307 }, { "epoch": 0.1619717518748365, "grad_norm": 0.8359375, "learning_rate": 0.0001941950148677016, "loss": 1.0639, "step": 6308 }, { "epoch": 0.16199742907075831, "grad_norm": 0.828125, "learning_rate": 0.00019419351590579286, "loss": 1.1248, "step": 6309 }, { "epoch": 0.16202310626668015, "grad_norm": 1.1171875, "learning_rate": 0.00019419201675616496, "loss": 1.1127, "step": 6310 }, { "epoch": 0.16204878346260196, "grad_norm": 0.76171875, "learning_rate": 0.00019419051741882085, "loss": 0.9315, "step": 6311 }, { "epoch": 0.1620744606585238, "grad_norm": 0.8046875, "learning_rate": 0.00019418901789376359, "loss": 0.972, "step": 6312 }, { "epoch": 0.1621001378544456, "grad_norm": 0.859375, "learning_rate": 0.00019418751818099607, "loss": 1.0294, "step": 6313 }, { "epoch": 0.1621258150503674, "grad_norm": 0.8046875, "learning_rate": 0.00019418601828052136, "loss": 1.1021, "step": 6314 }, { "epoch": 0.16215149224628925, "grad_norm": 1.09375, "learning_rate": 0.0001941845181923424, "loss": 1.2379, "step": 6315 }, { "epoch": 0.16217716944221106, "grad_norm": 0.765625, "learning_rate": 0.00019418301791646227, "loss": 0.8657, "step": 6316 }, { "epoch": 0.1622028466381329, "grad_norm": 0.90625, "learning_rate": 0.00019418151745288385, "loss": 1.0596, "step": 6317 }, { "epoch": 0.1622285238340547, "grad_norm": 0.859375, "learning_rate": 0.00019418001680161017, "loss": 1.0519, "step": 6318 }, { "epoch": 0.1622542010299765, "grad_norm": 0.921875, "learning_rate": 0.00019417851596264423, "loss": 0.9655, "step": 6319 }, { "epoch": 0.16227987822589834, "grad_norm": 0.83984375, "learning_rate": 0.000194177014935989, "loss": 0.9116, "step": 6320 }, { "epoch": 0.16230555542182015, "grad_norm": 0.82421875, "learning_rate": 0.00019417551372164751, "loss": 1.1511, "step": 6321 }, { "epoch": 0.162331232617742, "grad_norm": 0.8125, "learning_rate": 0.0001941740123196227, "loss": 1.0437, "step": 6322 }, { "epoch": 0.1623569098136638, "grad_norm": 0.98046875, "learning_rate": 0.0001941725107299176, "loss": 0.9798, "step": 6323 }, { "epoch": 0.1623825870095856, "grad_norm": 0.8984375, "learning_rate": 0.00019417100895253523, "loss": 0.9242, "step": 6324 }, { "epoch": 0.16240826420550744, "grad_norm": 0.828125, "learning_rate": 0.00019416950698747848, "loss": 1.1557, "step": 6325 }, { "epoch": 0.16243394140142925, "grad_norm": 0.84375, "learning_rate": 0.00019416800483475045, "loss": 1.0456, "step": 6326 }, { "epoch": 0.16245961859735109, "grad_norm": 0.82421875, "learning_rate": 0.0001941665024943541, "loss": 1.0852, "step": 6327 }, { "epoch": 0.1624852957932729, "grad_norm": 0.9375, "learning_rate": 0.0001941649999662924, "loss": 1.162, "step": 6328 }, { "epoch": 0.1625109729891947, "grad_norm": 2.765625, "learning_rate": 0.00019416349725056838, "loss": 0.9654, "step": 6329 }, { "epoch": 0.16253665018511654, "grad_norm": 0.8828125, "learning_rate": 0.00019416199434718498, "loss": 1.3226, "step": 6330 }, { "epoch": 0.16256232738103835, "grad_norm": 0.98046875, "learning_rate": 0.00019416049125614527, "loss": 1.0373, "step": 6331 }, { "epoch": 0.16258800457696018, "grad_norm": 0.8828125, "learning_rate": 0.00019415898797745218, "loss": 1.0616, "step": 6332 }, { "epoch": 0.162613681772882, "grad_norm": 0.875, "learning_rate": 0.0001941574845111087, "loss": 1.0933, "step": 6333 }, { "epoch": 0.1626393589688038, "grad_norm": 0.8046875, "learning_rate": 0.0001941559808571179, "loss": 1.0688, "step": 6334 }, { "epoch": 0.16266503616472563, "grad_norm": 0.81640625, "learning_rate": 0.00019415447701548276, "loss": 0.9489, "step": 6335 }, { "epoch": 0.16269071336064744, "grad_norm": 0.89453125, "learning_rate": 0.0001941529729862062, "loss": 1.0447, "step": 6336 }, { "epoch": 0.16271639055656928, "grad_norm": 0.83203125, "learning_rate": 0.0001941514687692913, "loss": 1.0753, "step": 6337 }, { "epoch": 0.1627420677524911, "grad_norm": 0.78125, "learning_rate": 0.00019414996436474104, "loss": 1.1647, "step": 6338 }, { "epoch": 0.1627677449484129, "grad_norm": 0.85546875, "learning_rate": 0.00019414845977255838, "loss": 0.9797, "step": 6339 }, { "epoch": 0.16279342214433473, "grad_norm": 0.796875, "learning_rate": 0.00019414695499274636, "loss": 0.9883, "step": 6340 }, { "epoch": 0.16281909934025654, "grad_norm": 0.82421875, "learning_rate": 0.00019414545002530796, "loss": 1.0721, "step": 6341 }, { "epoch": 0.16284477653617838, "grad_norm": 0.76953125, "learning_rate": 0.00019414394487024617, "loss": 1.0202, "step": 6342 }, { "epoch": 0.16287045373210018, "grad_norm": 0.83984375, "learning_rate": 0.000194142439527564, "loss": 1.1866, "step": 6343 }, { "epoch": 0.162896130928022, "grad_norm": 0.7734375, "learning_rate": 0.00019414093399726444, "loss": 1.08, "step": 6344 }, { "epoch": 0.16292180812394383, "grad_norm": 0.84375, "learning_rate": 0.00019413942827935053, "loss": 1.1168, "step": 6345 }, { "epoch": 0.16294748531986564, "grad_norm": 0.83203125, "learning_rate": 0.00019413792237382526, "loss": 1.0864, "step": 6346 }, { "epoch": 0.16297316251578747, "grad_norm": 0.890625, "learning_rate": 0.00019413641628069156, "loss": 1.0358, "step": 6347 }, { "epoch": 0.16299883971170928, "grad_norm": 0.76953125, "learning_rate": 0.00019413490999995253, "loss": 0.9823, "step": 6348 }, { "epoch": 0.1630245169076311, "grad_norm": 0.83203125, "learning_rate": 0.0001941334035316111, "loss": 1.0927, "step": 6349 }, { "epoch": 0.16305019410355293, "grad_norm": 0.75, "learning_rate": 0.00019413189687567033, "loss": 1.0451, "step": 6350 }, { "epoch": 0.16307587129947473, "grad_norm": 0.8203125, "learning_rate": 0.00019413039003213318, "loss": 1.0946, "step": 6351 }, { "epoch": 0.16310154849539657, "grad_norm": 0.86328125, "learning_rate": 0.00019412888300100267, "loss": 1.0891, "step": 6352 }, { "epoch": 0.16312722569131838, "grad_norm": 0.7890625, "learning_rate": 0.00019412737578228178, "loss": 1.0825, "step": 6353 }, { "epoch": 0.1631529028872402, "grad_norm": 1.0859375, "learning_rate": 0.00019412586837597352, "loss": 0.9314, "step": 6354 }, { "epoch": 0.16317858008316202, "grad_norm": 0.80078125, "learning_rate": 0.00019412436078208094, "loss": 1.189, "step": 6355 }, { "epoch": 0.16320425727908383, "grad_norm": 0.8671875, "learning_rate": 0.000194122853000607, "loss": 1.0216, "step": 6356 }, { "epoch": 0.16322993447500567, "grad_norm": 0.83203125, "learning_rate": 0.0001941213450315547, "loss": 1.0096, "step": 6357 }, { "epoch": 0.16325561167092748, "grad_norm": 0.80078125, "learning_rate": 0.0001941198368749271, "loss": 1.1286, "step": 6358 }, { "epoch": 0.16328128886684928, "grad_norm": 0.80859375, "learning_rate": 0.00019411832853072713, "loss": 1.1185, "step": 6359 }, { "epoch": 0.16330696606277112, "grad_norm": 0.8203125, "learning_rate": 0.00019411681999895785, "loss": 1.1582, "step": 6360 }, { "epoch": 0.16333264325869293, "grad_norm": 0.80078125, "learning_rate": 0.00019411531127962227, "loss": 1.0717, "step": 6361 }, { "epoch": 0.16335832045461476, "grad_norm": 0.828125, "learning_rate": 0.00019411380237272332, "loss": 1.0748, "step": 6362 }, { "epoch": 0.16338399765053657, "grad_norm": 0.81640625, "learning_rate": 0.0001941122932782641, "loss": 1.1038, "step": 6363 }, { "epoch": 0.16340967484645838, "grad_norm": 1.0859375, "learning_rate": 0.00019411078399624758, "loss": 1.1703, "step": 6364 }, { "epoch": 0.16343535204238022, "grad_norm": 0.84375, "learning_rate": 0.0001941092745266768, "loss": 0.8899, "step": 6365 }, { "epoch": 0.16346102923830202, "grad_norm": 0.7734375, "learning_rate": 0.00019410776486955466, "loss": 1.0852, "step": 6366 }, { "epoch": 0.16348670643422386, "grad_norm": 0.87890625, "learning_rate": 0.0001941062550248843, "loss": 1.075, "step": 6367 }, { "epoch": 0.16351238363014567, "grad_norm": 0.83203125, "learning_rate": 0.00019410474499266867, "loss": 0.9424, "step": 6368 }, { "epoch": 0.16353806082606748, "grad_norm": 0.80078125, "learning_rate": 0.00019410323477291077, "loss": 0.9926, "step": 6369 }, { "epoch": 0.1635637380219893, "grad_norm": 0.98046875, "learning_rate": 0.00019410172436561362, "loss": 0.9391, "step": 6370 }, { "epoch": 0.16358941521791112, "grad_norm": 0.83984375, "learning_rate": 0.0001941002137707802, "loss": 1.0382, "step": 6371 }, { "epoch": 0.16361509241383296, "grad_norm": 0.74609375, "learning_rate": 0.00019409870298841363, "loss": 1.1562, "step": 6372 }, { "epoch": 0.16364076960975477, "grad_norm": 0.77734375, "learning_rate": 0.00019409719201851682, "loss": 1.0703, "step": 6373 }, { "epoch": 0.16366644680567657, "grad_norm": 1.140625, "learning_rate": 0.0001940956808610928, "loss": 0.9631, "step": 6374 }, { "epoch": 0.1636921240015984, "grad_norm": 0.85546875, "learning_rate": 0.0001940941695161446, "loss": 1.1208, "step": 6375 }, { "epoch": 0.16371780119752022, "grad_norm": 0.85546875, "learning_rate": 0.00019409265798367518, "loss": 1.1185, "step": 6376 }, { "epoch": 0.16374347839344205, "grad_norm": 0.90234375, "learning_rate": 0.00019409114626368763, "loss": 1.0542, "step": 6377 }, { "epoch": 0.16376915558936386, "grad_norm": 0.84375, "learning_rate": 0.0001940896343561849, "loss": 1.081, "step": 6378 }, { "epoch": 0.16379483278528567, "grad_norm": 0.78515625, "learning_rate": 0.00019408812226117005, "loss": 0.9049, "step": 6379 }, { "epoch": 0.1638205099812075, "grad_norm": 0.8125, "learning_rate": 0.00019408660997864601, "loss": 1.0037, "step": 6380 }, { "epoch": 0.16384618717712932, "grad_norm": 0.84375, "learning_rate": 0.00019408509750861594, "loss": 1.0733, "step": 6381 }, { "epoch": 0.16387186437305115, "grad_norm": 0.8203125, "learning_rate": 0.00019408358485108274, "loss": 1.2632, "step": 6382 }, { "epoch": 0.16389754156897296, "grad_norm": 0.82421875, "learning_rate": 0.00019408207200604944, "loss": 0.9607, "step": 6383 }, { "epoch": 0.16392321876489477, "grad_norm": 0.8671875, "learning_rate": 0.00019408055897351907, "loss": 1.2046, "step": 6384 }, { "epoch": 0.1639488959608166, "grad_norm": 0.80859375, "learning_rate": 0.00019407904575349466, "loss": 1.1603, "step": 6385 }, { "epoch": 0.1639745731567384, "grad_norm": 0.81640625, "learning_rate": 0.0001940775323459792, "loss": 1.1114, "step": 6386 }, { "epoch": 0.16400025035266025, "grad_norm": 0.828125, "learning_rate": 0.0001940760187509757, "loss": 1.034, "step": 6387 }, { "epoch": 0.16402592754858206, "grad_norm": 0.83203125, "learning_rate": 0.00019407450496848722, "loss": 1.0424, "step": 6388 }, { "epoch": 0.16405160474450386, "grad_norm": 0.84765625, "learning_rate": 0.00019407299099851675, "loss": 1.0374, "step": 6389 }, { "epoch": 0.1640772819404257, "grad_norm": 0.78125, "learning_rate": 0.0001940714768410673, "loss": 1.129, "step": 6390 }, { "epoch": 0.1641029591363475, "grad_norm": 0.8046875, "learning_rate": 0.00019406996249614187, "loss": 0.9241, "step": 6391 }, { "epoch": 0.16412863633226935, "grad_norm": 0.7734375, "learning_rate": 0.00019406844796374353, "loss": 1.0626, "step": 6392 }, { "epoch": 0.16415431352819115, "grad_norm": 0.72265625, "learning_rate": 0.00019406693324387525, "loss": 1.0975, "step": 6393 }, { "epoch": 0.16417999072411296, "grad_norm": 0.90234375, "learning_rate": 0.0001940654183365401, "loss": 1.199, "step": 6394 }, { "epoch": 0.1642056679200348, "grad_norm": 0.984375, "learning_rate": 0.00019406390324174105, "loss": 1.0193, "step": 6395 }, { "epoch": 0.1642313451159566, "grad_norm": 0.7734375, "learning_rate": 0.00019406238795948114, "loss": 1.0841, "step": 6396 }, { "epoch": 0.16425702231187844, "grad_norm": 0.91015625, "learning_rate": 0.00019406087248976338, "loss": 1.1184, "step": 6397 }, { "epoch": 0.16428269950780025, "grad_norm": 0.984375, "learning_rate": 0.0001940593568325908, "loss": 1.0929, "step": 6398 }, { "epoch": 0.16430837670372206, "grad_norm": 0.83984375, "learning_rate": 0.0001940578409879664, "loss": 1.1008, "step": 6399 }, { "epoch": 0.1643340538996439, "grad_norm": 0.84375, "learning_rate": 0.00019405632495589324, "loss": 1.0957, "step": 6400 }, { "epoch": 0.1643597310955657, "grad_norm": 0.86328125, "learning_rate": 0.00019405480873637432, "loss": 1.0054, "step": 6401 }, { "epoch": 0.16438540829148754, "grad_norm": 0.8671875, "learning_rate": 0.00019405329232941268, "loss": 1.0646, "step": 6402 }, { "epoch": 0.16441108548740935, "grad_norm": 0.84375, "learning_rate": 0.0001940517757350113, "loss": 1.1465, "step": 6403 }, { "epoch": 0.16443676268333116, "grad_norm": 0.7890625, "learning_rate": 0.00019405025895317325, "loss": 0.9406, "step": 6404 }, { "epoch": 0.164462439879253, "grad_norm": 0.90234375, "learning_rate": 0.00019404874198390148, "loss": 1.0478, "step": 6405 }, { "epoch": 0.1644881170751748, "grad_norm": 0.83203125, "learning_rate": 0.0001940472248271991, "loss": 1.0065, "step": 6406 }, { "epoch": 0.16451379427109664, "grad_norm": 0.8203125, "learning_rate": 0.0001940457074830691, "loss": 1.0311, "step": 6407 }, { "epoch": 0.16453947146701844, "grad_norm": 0.82421875, "learning_rate": 0.00019404418995151447, "loss": 1.0518, "step": 6408 }, { "epoch": 0.16456514866294025, "grad_norm": 0.984375, "learning_rate": 0.00019404267223253828, "loss": 1.1086, "step": 6409 }, { "epoch": 0.1645908258588621, "grad_norm": 0.828125, "learning_rate": 0.00019404115432614355, "loss": 0.938, "step": 6410 }, { "epoch": 0.1646165030547839, "grad_norm": 0.8046875, "learning_rate": 0.0001940396362323333, "loss": 0.9732, "step": 6411 }, { "epoch": 0.16464218025070573, "grad_norm": 0.93359375, "learning_rate": 0.00019403811795111052, "loss": 1.0483, "step": 6412 }, { "epoch": 0.16466785744662754, "grad_norm": 0.8828125, "learning_rate": 0.0001940365994824783, "loss": 1.1147, "step": 6413 }, { "epoch": 0.16469353464254935, "grad_norm": 0.875, "learning_rate": 0.00019403508082643963, "loss": 0.9717, "step": 6414 }, { "epoch": 0.16471921183847119, "grad_norm": 0.87109375, "learning_rate": 0.00019403356198299752, "loss": 1.2198, "step": 6415 }, { "epoch": 0.164744889034393, "grad_norm": 0.93359375, "learning_rate": 0.000194032042952155, "loss": 1.0888, "step": 6416 }, { "epoch": 0.16477056623031483, "grad_norm": 0.9609375, "learning_rate": 0.00019403052373391516, "loss": 1.1721, "step": 6417 }, { "epoch": 0.16479624342623664, "grad_norm": 0.8515625, "learning_rate": 0.00019402900432828092, "loss": 0.9075, "step": 6418 }, { "epoch": 0.16482192062215845, "grad_norm": 1.0, "learning_rate": 0.0001940274847352554, "loss": 1.1783, "step": 6419 }, { "epoch": 0.16484759781808028, "grad_norm": 0.91796875, "learning_rate": 0.0001940259649548416, "loss": 1.0849, "step": 6420 }, { "epoch": 0.1648732750140021, "grad_norm": 0.8671875, "learning_rate": 0.00019402444498704252, "loss": 1.1229, "step": 6421 }, { "epoch": 0.16489895220992393, "grad_norm": 0.78125, "learning_rate": 0.00019402292483186123, "loss": 1.1491, "step": 6422 }, { "epoch": 0.16492462940584574, "grad_norm": 0.84765625, "learning_rate": 0.00019402140448930078, "loss": 1.0217, "step": 6423 }, { "epoch": 0.16495030660176754, "grad_norm": 0.89453125, "learning_rate": 0.0001940198839593641, "loss": 1.1031, "step": 6424 }, { "epoch": 0.16497598379768938, "grad_norm": 0.85546875, "learning_rate": 0.00019401836324205434, "loss": 1.2159, "step": 6425 }, { "epoch": 0.1650016609936112, "grad_norm": 0.91796875, "learning_rate": 0.00019401684233737445, "loss": 0.9826, "step": 6426 }, { "epoch": 0.16502733818953302, "grad_norm": 0.9140625, "learning_rate": 0.00019401532124532748, "loss": 1.0865, "step": 6427 }, { "epoch": 0.16505301538545483, "grad_norm": 0.8515625, "learning_rate": 0.00019401379996591647, "loss": 1.1564, "step": 6428 }, { "epoch": 0.16507869258137664, "grad_norm": 0.81640625, "learning_rate": 0.00019401227849914445, "loss": 1.0156, "step": 6429 }, { "epoch": 0.16510436977729848, "grad_norm": 0.85546875, "learning_rate": 0.00019401075684501447, "loss": 1.1359, "step": 6430 }, { "epoch": 0.16513004697322028, "grad_norm": 0.8046875, "learning_rate": 0.0001940092350035295, "loss": 1.003, "step": 6431 }, { "epoch": 0.16515572416914212, "grad_norm": 0.81640625, "learning_rate": 0.00019400771297469266, "loss": 1.1651, "step": 6432 }, { "epoch": 0.16518140136506393, "grad_norm": 0.7890625, "learning_rate": 0.0001940061907585069, "loss": 1.1641, "step": 6433 }, { "epoch": 0.16520707856098574, "grad_norm": 0.88671875, "learning_rate": 0.00019400466835497532, "loss": 1.2577, "step": 6434 }, { "epoch": 0.16523275575690757, "grad_norm": 1.21875, "learning_rate": 0.00019400314576410095, "loss": 0.9743, "step": 6435 }, { "epoch": 0.16525843295282938, "grad_norm": 0.80859375, "learning_rate": 0.00019400162298588674, "loss": 1.1442, "step": 6436 }, { "epoch": 0.16528411014875122, "grad_norm": 0.8515625, "learning_rate": 0.00019400010002033584, "loss": 1.1145, "step": 6437 }, { "epoch": 0.16530978734467303, "grad_norm": 0.84375, "learning_rate": 0.0001939985768674512, "loss": 1.029, "step": 6438 }, { "epoch": 0.16533546454059483, "grad_norm": 0.79296875, "learning_rate": 0.00019399705352723593, "loss": 0.9281, "step": 6439 }, { "epoch": 0.16536114173651667, "grad_norm": 0.86328125, "learning_rate": 0.000193995529999693, "loss": 1.0946, "step": 6440 }, { "epoch": 0.16538681893243848, "grad_norm": 0.875, "learning_rate": 0.00019399400628482543, "loss": 1.0941, "step": 6441 }, { "epoch": 0.16541249612836031, "grad_norm": 0.8203125, "learning_rate": 0.00019399248238263634, "loss": 1.0129, "step": 6442 }, { "epoch": 0.16543817332428212, "grad_norm": 0.7890625, "learning_rate": 0.0001939909582931287, "loss": 1.2948, "step": 6443 }, { "epoch": 0.16546385052020393, "grad_norm": 0.84375, "learning_rate": 0.00019398943401630558, "loss": 1.1079, "step": 6444 }, { "epoch": 0.16548952771612577, "grad_norm": 0.84375, "learning_rate": 0.00019398790955217003, "loss": 0.9616, "step": 6445 }, { "epoch": 0.16551520491204758, "grad_norm": 0.859375, "learning_rate": 0.00019398638490072503, "loss": 0.966, "step": 6446 }, { "epoch": 0.1655408821079694, "grad_norm": 0.859375, "learning_rate": 0.00019398486006197367, "loss": 1.0149, "step": 6447 }, { "epoch": 0.16556655930389122, "grad_norm": 0.84765625, "learning_rate": 0.000193983335035919, "loss": 1.0742, "step": 6448 }, { "epoch": 0.16559223649981303, "grad_norm": 0.90625, "learning_rate": 0.00019398180982256399, "loss": 1.0885, "step": 6449 }, { "epoch": 0.16561791369573486, "grad_norm": 0.87109375, "learning_rate": 0.00019398028442191173, "loss": 1.1131, "step": 6450 }, { "epoch": 0.16564359089165667, "grad_norm": 0.84375, "learning_rate": 0.00019397875883396526, "loss": 1.0122, "step": 6451 }, { "epoch": 0.1656692680875785, "grad_norm": 0.86328125, "learning_rate": 0.00019397723305872763, "loss": 1.1066, "step": 6452 }, { "epoch": 0.16569494528350032, "grad_norm": 0.99609375, "learning_rate": 0.00019397570709620183, "loss": 1.0553, "step": 6453 }, { "epoch": 0.16572062247942212, "grad_norm": 0.74609375, "learning_rate": 0.00019397418094639095, "loss": 0.9962, "step": 6454 }, { "epoch": 0.16574629967534396, "grad_norm": 0.80859375, "learning_rate": 0.00019397265460929802, "loss": 1.0074, "step": 6455 }, { "epoch": 0.16577197687126577, "grad_norm": 0.79296875, "learning_rate": 0.00019397112808492608, "loss": 1.0063, "step": 6456 }, { "epoch": 0.1657976540671876, "grad_norm": 0.76171875, "learning_rate": 0.0001939696013732782, "loss": 1.1077, "step": 6457 }, { "epoch": 0.1658233312631094, "grad_norm": 0.83203125, "learning_rate": 0.00019396807447435733, "loss": 1.2133, "step": 6458 }, { "epoch": 0.16584900845903122, "grad_norm": 0.7578125, "learning_rate": 0.00019396654738816662, "loss": 1.1652, "step": 6459 }, { "epoch": 0.16587468565495306, "grad_norm": 0.84765625, "learning_rate": 0.00019396502011470904, "loss": 1.0141, "step": 6460 }, { "epoch": 0.16590036285087487, "grad_norm": 0.859375, "learning_rate": 0.00019396349265398769, "loss": 1.0401, "step": 6461 }, { "epoch": 0.1659260400467967, "grad_norm": 0.7734375, "learning_rate": 0.00019396196500600556, "loss": 0.9747, "step": 6462 }, { "epoch": 0.1659517172427185, "grad_norm": 0.7890625, "learning_rate": 0.00019396043717076573, "loss": 0.9153, "step": 6463 }, { "epoch": 0.16597739443864032, "grad_norm": 0.796875, "learning_rate": 0.00019395890914827125, "loss": 1.041, "step": 6464 }, { "epoch": 0.16600307163456215, "grad_norm": 0.83984375, "learning_rate": 0.00019395738093852514, "loss": 1.1705, "step": 6465 }, { "epoch": 0.16602874883048396, "grad_norm": 0.7890625, "learning_rate": 0.00019395585254153045, "loss": 0.9217, "step": 6466 }, { "epoch": 0.1660544260264058, "grad_norm": 0.80078125, "learning_rate": 0.00019395432395729025, "loss": 1.0481, "step": 6467 }, { "epoch": 0.1660801032223276, "grad_norm": 0.8671875, "learning_rate": 0.00019395279518580756, "loss": 1.0491, "step": 6468 }, { "epoch": 0.16610578041824942, "grad_norm": 0.859375, "learning_rate": 0.00019395126622708545, "loss": 1.2177, "step": 6469 }, { "epoch": 0.16613145761417125, "grad_norm": 0.77734375, "learning_rate": 0.0001939497370811269, "loss": 1.0234, "step": 6470 }, { "epoch": 0.16615713481009306, "grad_norm": 0.8359375, "learning_rate": 0.0001939482077479351, "loss": 1.1803, "step": 6471 }, { "epoch": 0.1661828120060149, "grad_norm": 0.86328125, "learning_rate": 0.00019394667822751293, "loss": 1.136, "step": 6472 }, { "epoch": 0.1662084892019367, "grad_norm": 0.875, "learning_rate": 0.00019394514851986356, "loss": 1.1039, "step": 6473 }, { "epoch": 0.1662341663978585, "grad_norm": 0.84375, "learning_rate": 0.00019394361862498996, "loss": 1.0588, "step": 6474 }, { "epoch": 0.16625984359378035, "grad_norm": 0.8828125, "learning_rate": 0.00019394208854289525, "loss": 1.0235, "step": 6475 }, { "epoch": 0.16628552078970216, "grad_norm": 0.84375, "learning_rate": 0.00019394055827358242, "loss": 1.1215, "step": 6476 }, { "epoch": 0.166311197985624, "grad_norm": 0.90234375, "learning_rate": 0.00019393902781705456, "loss": 1.0981, "step": 6477 }, { "epoch": 0.1663368751815458, "grad_norm": 0.8046875, "learning_rate": 0.00019393749717331468, "loss": 0.9029, "step": 6478 }, { "epoch": 0.1663625523774676, "grad_norm": 0.74609375, "learning_rate": 0.0001939359663423659, "loss": 1.1119, "step": 6479 }, { "epoch": 0.16638822957338945, "grad_norm": 0.90625, "learning_rate": 0.00019393443532421116, "loss": 1.1811, "step": 6480 }, { "epoch": 0.16641390676931125, "grad_norm": 0.8046875, "learning_rate": 0.0001939329041188536, "loss": 1.2285, "step": 6481 }, { "epoch": 0.1664395839652331, "grad_norm": 0.86328125, "learning_rate": 0.00019393137272629628, "loss": 0.9814, "step": 6482 }, { "epoch": 0.1664652611611549, "grad_norm": 0.87109375, "learning_rate": 0.00019392984114654217, "loss": 1.1091, "step": 6483 }, { "epoch": 0.1664909383570767, "grad_norm": 0.8359375, "learning_rate": 0.0001939283093795944, "loss": 1.1673, "step": 6484 }, { "epoch": 0.16651661555299854, "grad_norm": 0.9296875, "learning_rate": 0.000193926777425456, "loss": 0.9268, "step": 6485 }, { "epoch": 0.16654229274892035, "grad_norm": 0.88671875, "learning_rate": 0.00019392524528413, "loss": 1.3653, "step": 6486 }, { "epoch": 0.1665679699448422, "grad_norm": 0.80859375, "learning_rate": 0.00019392371295561946, "loss": 1.0497, "step": 6487 }, { "epoch": 0.166593647140764, "grad_norm": 0.76953125, "learning_rate": 0.00019392218043992748, "loss": 1.1183, "step": 6488 }, { "epoch": 0.1666193243366858, "grad_norm": 0.984375, "learning_rate": 0.00019392064773705704, "loss": 1.0809, "step": 6489 }, { "epoch": 0.16664500153260764, "grad_norm": 0.8828125, "learning_rate": 0.00019391911484701124, "loss": 1.1659, "step": 6490 }, { "epoch": 0.16667067872852945, "grad_norm": 0.8828125, "learning_rate": 0.00019391758176979317, "loss": 1.1248, "step": 6491 }, { "epoch": 0.16669635592445128, "grad_norm": 1.0078125, "learning_rate": 0.00019391604850540578, "loss": 1.0933, "step": 6492 }, { "epoch": 0.1667220331203731, "grad_norm": 0.97265625, "learning_rate": 0.00019391451505385224, "loss": 1.088, "step": 6493 }, { "epoch": 0.1667477103162949, "grad_norm": 0.77734375, "learning_rate": 0.00019391298141513552, "loss": 1.1473, "step": 6494 }, { "epoch": 0.16677338751221674, "grad_norm": 0.921875, "learning_rate": 0.00019391144758925871, "loss": 1.0634, "step": 6495 }, { "epoch": 0.16679906470813854, "grad_norm": 0.77734375, "learning_rate": 0.0001939099135762249, "loss": 0.9573, "step": 6496 }, { "epoch": 0.16682474190406038, "grad_norm": 0.89453125, "learning_rate": 0.0001939083793760371, "loss": 1.1798, "step": 6497 }, { "epoch": 0.1668504190999822, "grad_norm": 0.7890625, "learning_rate": 0.0001939068449886984, "loss": 1.1331, "step": 6498 }, { "epoch": 0.166876096295904, "grad_norm": 0.8203125, "learning_rate": 0.0001939053104142118, "loss": 1.0612, "step": 6499 }, { "epoch": 0.16690177349182583, "grad_norm": 0.79296875, "learning_rate": 0.00019390377565258043, "loss": 1.091, "step": 6500 }, { "epoch": 0.16692745068774764, "grad_norm": 0.79296875, "learning_rate": 0.0001939022407038073, "loss": 1.0298, "step": 6501 }, { "epoch": 0.16695312788366948, "grad_norm": 0.81640625, "learning_rate": 0.0001939007055678955, "loss": 0.9919, "step": 6502 }, { "epoch": 0.16697880507959129, "grad_norm": 0.83984375, "learning_rate": 0.00019389917024484807, "loss": 1.0407, "step": 6503 }, { "epoch": 0.1670044822755131, "grad_norm": 0.80078125, "learning_rate": 0.00019389763473466807, "loss": 1.1524, "step": 6504 }, { "epoch": 0.16703015947143493, "grad_norm": 0.83984375, "learning_rate": 0.00019389609903735857, "loss": 1.238, "step": 6505 }, { "epoch": 0.16705583666735674, "grad_norm": 0.85546875, "learning_rate": 0.00019389456315292263, "loss": 0.951, "step": 6506 }, { "epoch": 0.16708151386327855, "grad_norm": 0.890625, "learning_rate": 0.0001938930270813633, "loss": 0.9859, "step": 6507 }, { "epoch": 0.16710719105920038, "grad_norm": 0.82421875, "learning_rate": 0.00019389149082268365, "loss": 1.0221, "step": 6508 }, { "epoch": 0.1671328682551222, "grad_norm": 0.7890625, "learning_rate": 0.00019388995437688675, "loss": 1.0569, "step": 6509 }, { "epoch": 0.16715854545104403, "grad_norm": 0.82421875, "learning_rate": 0.00019388841774397567, "loss": 1.004, "step": 6510 }, { "epoch": 0.16718422264696584, "grad_norm": 0.80078125, "learning_rate": 0.0001938868809239534, "loss": 1.2186, "step": 6511 }, { "epoch": 0.16720989984288764, "grad_norm": 0.80078125, "learning_rate": 0.0001938853439168231, "loss": 0.991, "step": 6512 }, { "epoch": 0.16723557703880948, "grad_norm": 0.81640625, "learning_rate": 0.0001938838067225878, "loss": 1.1458, "step": 6513 }, { "epoch": 0.1672612542347313, "grad_norm": 0.8046875, "learning_rate": 0.0001938822693412505, "loss": 1.035, "step": 6514 }, { "epoch": 0.16728693143065312, "grad_norm": 0.796875, "learning_rate": 0.00019388073177281436, "loss": 0.9739, "step": 6515 }, { "epoch": 0.16731260862657493, "grad_norm": 0.8671875, "learning_rate": 0.00019387919401728237, "loss": 1.2579, "step": 6516 }, { "epoch": 0.16733828582249674, "grad_norm": 0.7421875, "learning_rate": 0.00019387765607465767, "loss": 1.0661, "step": 6517 }, { "epoch": 0.16736396301841858, "grad_norm": 0.875, "learning_rate": 0.00019387611794494325, "loss": 1.0749, "step": 6518 }, { "epoch": 0.16738964021434038, "grad_norm": 0.77734375, "learning_rate": 0.00019387457962814222, "loss": 1.2006, "step": 6519 }, { "epoch": 0.16741531741026222, "grad_norm": 0.8046875, "learning_rate": 0.00019387304112425762, "loss": 1.2715, "step": 6520 }, { "epoch": 0.16744099460618403, "grad_norm": 0.79296875, "learning_rate": 0.00019387150243329254, "loss": 1.1168, "step": 6521 }, { "epoch": 0.16746667180210584, "grad_norm": 0.78515625, "learning_rate": 0.00019386996355525002, "loss": 1.0753, "step": 6522 }, { "epoch": 0.16749234899802767, "grad_norm": 0.7890625, "learning_rate": 0.00019386842449013316, "loss": 1.1082, "step": 6523 }, { "epoch": 0.16751802619394948, "grad_norm": 0.89453125, "learning_rate": 0.00019386688523794502, "loss": 1.2608, "step": 6524 }, { "epoch": 0.16754370338987132, "grad_norm": 0.7890625, "learning_rate": 0.00019386534579868864, "loss": 0.9255, "step": 6525 }, { "epoch": 0.16756938058579313, "grad_norm": 0.796875, "learning_rate": 0.00019386380617236707, "loss": 1.0905, "step": 6526 }, { "epoch": 0.16759505778171493, "grad_norm": 0.75390625, "learning_rate": 0.00019386226635898345, "loss": 0.9231, "step": 6527 }, { "epoch": 0.16762073497763677, "grad_norm": 0.77734375, "learning_rate": 0.0001938607263585408, "loss": 1.0436, "step": 6528 }, { "epoch": 0.16764641217355858, "grad_norm": 0.80078125, "learning_rate": 0.00019385918617104223, "loss": 1.0361, "step": 6529 }, { "epoch": 0.16767208936948041, "grad_norm": 0.76171875, "learning_rate": 0.00019385764579649074, "loss": 1.1418, "step": 6530 }, { "epoch": 0.16769776656540222, "grad_norm": 0.921875, "learning_rate": 0.00019385610523488947, "loss": 1.2061, "step": 6531 }, { "epoch": 0.16772344376132403, "grad_norm": 0.8671875, "learning_rate": 0.00019385456448624147, "loss": 1.1858, "step": 6532 }, { "epoch": 0.16774912095724587, "grad_norm": 0.83203125, "learning_rate": 0.00019385302355054976, "loss": 1.1093, "step": 6533 }, { "epoch": 0.16777479815316768, "grad_norm": 0.93359375, "learning_rate": 0.00019385148242781749, "loss": 1.1656, "step": 6534 }, { "epoch": 0.1678004753490895, "grad_norm": 0.796875, "learning_rate": 0.00019384994111804765, "loss": 0.9884, "step": 6535 }, { "epoch": 0.16782615254501132, "grad_norm": 0.80078125, "learning_rate": 0.00019384839962124337, "loss": 0.9514, "step": 6536 }, { "epoch": 0.16785182974093313, "grad_norm": 0.91796875, "learning_rate": 0.00019384685793740774, "loss": 1.347, "step": 6537 }, { "epoch": 0.16787750693685496, "grad_norm": 0.78125, "learning_rate": 0.00019384531606654376, "loss": 1.0769, "step": 6538 }, { "epoch": 0.16790318413277677, "grad_norm": 0.8671875, "learning_rate": 0.00019384377400865455, "loss": 0.9756, "step": 6539 }, { "epoch": 0.1679288613286986, "grad_norm": 0.8125, "learning_rate": 0.0001938422317637432, "loss": 1.0467, "step": 6540 }, { "epoch": 0.16795453852462042, "grad_norm": 0.79296875, "learning_rate": 0.00019384068933181274, "loss": 0.8997, "step": 6541 }, { "epoch": 0.16798021572054223, "grad_norm": 0.796875, "learning_rate": 0.00019383914671286626, "loss": 1.0795, "step": 6542 }, { "epoch": 0.16800589291646406, "grad_norm": 0.7890625, "learning_rate": 0.00019383760390690683, "loss": 1.052, "step": 6543 }, { "epoch": 0.16803157011238587, "grad_norm": 0.78125, "learning_rate": 0.00019383606091393757, "loss": 1.0528, "step": 6544 }, { "epoch": 0.1680572473083077, "grad_norm": 0.79296875, "learning_rate": 0.00019383451773396146, "loss": 1.0503, "step": 6545 }, { "epoch": 0.16808292450422951, "grad_norm": 0.83203125, "learning_rate": 0.0001938329743669817, "loss": 0.9549, "step": 6546 }, { "epoch": 0.16810860170015132, "grad_norm": 0.7578125, "learning_rate": 0.00019383143081300123, "loss": 1.088, "step": 6547 }, { "epoch": 0.16813427889607316, "grad_norm": 0.8828125, "learning_rate": 0.00019382988707202322, "loss": 1.0002, "step": 6548 }, { "epoch": 0.16815995609199497, "grad_norm": 0.953125, "learning_rate": 0.00019382834314405075, "loss": 1.1272, "step": 6549 }, { "epoch": 0.1681856332879168, "grad_norm": 0.796875, "learning_rate": 0.00019382679902908682, "loss": 1.058, "step": 6550 }, { "epoch": 0.1682113104838386, "grad_norm": 0.828125, "learning_rate": 0.0001938252547271346, "loss": 1.0989, "step": 6551 }, { "epoch": 0.16823698767976042, "grad_norm": 0.84765625, "learning_rate": 0.0001938237102381971, "loss": 1.1215, "step": 6552 }, { "epoch": 0.16826266487568226, "grad_norm": 0.79296875, "learning_rate": 0.0001938221655622774, "loss": 1.0972, "step": 6553 }, { "epoch": 0.16828834207160406, "grad_norm": 0.8203125, "learning_rate": 0.00019382062069937862, "loss": 0.9396, "step": 6554 }, { "epoch": 0.1683140192675259, "grad_norm": 0.890625, "learning_rate": 0.00019381907564950383, "loss": 1.1721, "step": 6555 }, { "epoch": 0.1683396964634477, "grad_norm": 0.8046875, "learning_rate": 0.00019381753041265606, "loss": 1.1349, "step": 6556 }, { "epoch": 0.16836537365936952, "grad_norm": 0.8671875, "learning_rate": 0.00019381598498883847, "loss": 1.0598, "step": 6557 }, { "epoch": 0.16839105085529135, "grad_norm": 1.4140625, "learning_rate": 0.00019381443937805407, "loss": 1.0701, "step": 6558 }, { "epoch": 0.16841672805121316, "grad_norm": 0.83203125, "learning_rate": 0.000193812893580306, "loss": 0.9166, "step": 6559 }, { "epoch": 0.168442405247135, "grad_norm": 0.8515625, "learning_rate": 0.00019381134759559725, "loss": 1.0586, "step": 6560 }, { "epoch": 0.1684680824430568, "grad_norm": 0.9296875, "learning_rate": 0.000193809801423931, "loss": 1.1598, "step": 6561 }, { "epoch": 0.1684937596389786, "grad_norm": 0.82421875, "learning_rate": 0.00019380825506531027, "loss": 1.0685, "step": 6562 }, { "epoch": 0.16851943683490045, "grad_norm": 0.828125, "learning_rate": 0.00019380670851973817, "loss": 0.9097, "step": 6563 }, { "epoch": 0.16854511403082226, "grad_norm": 0.8828125, "learning_rate": 0.00019380516178721778, "loss": 1.097, "step": 6564 }, { "epoch": 0.1685707912267441, "grad_norm": 0.85546875, "learning_rate": 0.00019380361486775216, "loss": 1.0805, "step": 6565 }, { "epoch": 0.1685964684226659, "grad_norm": 0.87890625, "learning_rate": 0.0001938020677613444, "loss": 1.233, "step": 6566 }, { "epoch": 0.1686221456185877, "grad_norm": 0.8046875, "learning_rate": 0.0001938005204679976, "loss": 1.1098, "step": 6567 }, { "epoch": 0.16864782281450955, "grad_norm": 0.84765625, "learning_rate": 0.00019379897298771488, "loss": 1.0828, "step": 6568 }, { "epoch": 0.16867350001043135, "grad_norm": 0.82421875, "learning_rate": 0.00019379742532049923, "loss": 1.1666, "step": 6569 }, { "epoch": 0.1686991772063532, "grad_norm": 0.90625, "learning_rate": 0.0001937958774663538, "loss": 1.0104, "step": 6570 }, { "epoch": 0.168724854402275, "grad_norm": 0.828125, "learning_rate": 0.00019379432942528166, "loss": 1.076, "step": 6571 }, { "epoch": 0.1687505315981968, "grad_norm": 0.90625, "learning_rate": 0.0001937927811972859, "loss": 1.0833, "step": 6572 }, { "epoch": 0.16877620879411864, "grad_norm": 0.86328125, "learning_rate": 0.0001937912327823696, "loss": 1.0496, "step": 6573 }, { "epoch": 0.16880188599004045, "grad_norm": 0.91796875, "learning_rate": 0.00019378968418053583, "loss": 1.0618, "step": 6574 }, { "epoch": 0.1688275631859623, "grad_norm": 1.078125, "learning_rate": 0.00019378813539178767, "loss": 1.1769, "step": 6575 }, { "epoch": 0.1688532403818841, "grad_norm": 0.83984375, "learning_rate": 0.00019378658641612827, "loss": 1.0452, "step": 6576 }, { "epoch": 0.1688789175778059, "grad_norm": 0.84765625, "learning_rate": 0.00019378503725356064, "loss": 1.1125, "step": 6577 }, { "epoch": 0.16890459477372774, "grad_norm": 0.77734375, "learning_rate": 0.00019378348790408792, "loss": 1.0271, "step": 6578 }, { "epoch": 0.16893027196964955, "grad_norm": 0.97265625, "learning_rate": 0.00019378193836771322, "loss": 1.1298, "step": 6579 }, { "epoch": 0.16895594916557138, "grad_norm": 0.83984375, "learning_rate": 0.00019378038864443955, "loss": 1.0857, "step": 6580 }, { "epoch": 0.1689816263614932, "grad_norm": 0.8359375, "learning_rate": 0.00019377883873427003, "loss": 1.1192, "step": 6581 }, { "epoch": 0.169007303557415, "grad_norm": 0.8359375, "learning_rate": 0.00019377728863720776, "loss": 1.0918, "step": 6582 }, { "epoch": 0.16903298075333684, "grad_norm": 0.98046875, "learning_rate": 0.00019377573835325582, "loss": 1.0605, "step": 6583 }, { "epoch": 0.16905865794925864, "grad_norm": 1.0390625, "learning_rate": 0.00019377418788241733, "loss": 0.935, "step": 6584 }, { "epoch": 0.16908433514518048, "grad_norm": 0.8125, "learning_rate": 0.00019377263722469533, "loss": 1.0552, "step": 6585 }, { "epoch": 0.1691100123411023, "grad_norm": 0.953125, "learning_rate": 0.00019377108638009294, "loss": 1.0483, "step": 6586 }, { "epoch": 0.1691356895370241, "grad_norm": 0.88671875, "learning_rate": 0.00019376953534861325, "loss": 0.9702, "step": 6587 }, { "epoch": 0.16916136673294593, "grad_norm": 0.79296875, "learning_rate": 0.00019376798413025934, "loss": 1.186, "step": 6588 }, { "epoch": 0.16918704392886774, "grad_norm": 0.86328125, "learning_rate": 0.00019376643272503433, "loss": 1.059, "step": 6589 }, { "epoch": 0.16921272112478958, "grad_norm": 0.8359375, "learning_rate": 0.00019376488113294124, "loss": 1.1336, "step": 6590 }, { "epoch": 0.1692383983207114, "grad_norm": 0.8203125, "learning_rate": 0.00019376332935398325, "loss": 1.0705, "step": 6591 }, { "epoch": 0.1692640755166332, "grad_norm": 0.80859375, "learning_rate": 0.0001937617773881634, "loss": 1.0757, "step": 6592 }, { "epoch": 0.16928975271255503, "grad_norm": 1.453125, "learning_rate": 0.0001937602252354848, "loss": 1.1859, "step": 6593 }, { "epoch": 0.16931542990847684, "grad_norm": 0.82421875, "learning_rate": 0.00019375867289595053, "loss": 1.1671, "step": 6594 }, { "epoch": 0.16934110710439867, "grad_norm": 0.9375, "learning_rate": 0.00019375712036956373, "loss": 0.9687, "step": 6595 }, { "epoch": 0.16936678430032048, "grad_norm": 0.82421875, "learning_rate": 0.00019375556765632742, "loss": 1.1042, "step": 6596 }, { "epoch": 0.1693924614962423, "grad_norm": 0.8515625, "learning_rate": 0.00019375401475624476, "loss": 1.1346, "step": 6597 }, { "epoch": 0.16941813869216413, "grad_norm": 0.8203125, "learning_rate": 0.0001937524616693188, "loss": 1.1375, "step": 6598 }, { "epoch": 0.16944381588808594, "grad_norm": 0.93359375, "learning_rate": 0.00019375090839555265, "loss": 1.0177, "step": 6599 }, { "epoch": 0.16946949308400777, "grad_norm": 0.84765625, "learning_rate": 0.00019374935493494944, "loss": 1.0061, "step": 6600 }, { "epoch": 0.16949517027992958, "grad_norm": 0.91796875, "learning_rate": 0.0001937478012875122, "loss": 1.0844, "step": 6601 }, { "epoch": 0.1695208474758514, "grad_norm": 0.8125, "learning_rate": 0.00019374624745324405, "loss": 1.0074, "step": 6602 }, { "epoch": 0.16954652467177322, "grad_norm": 0.91015625, "learning_rate": 0.0001937446934321481, "loss": 1.1918, "step": 6603 }, { "epoch": 0.16957220186769503, "grad_norm": 0.76953125, "learning_rate": 0.0001937431392242275, "loss": 1.0323, "step": 6604 }, { "epoch": 0.16959787906361687, "grad_norm": 0.84375, "learning_rate": 0.00019374158482948523, "loss": 0.9703, "step": 6605 }, { "epoch": 0.16962355625953868, "grad_norm": 0.79296875, "learning_rate": 0.00019374003024792447, "loss": 1.048, "step": 6606 }, { "epoch": 0.16964923345546049, "grad_norm": 0.84765625, "learning_rate": 0.00019373847547954828, "loss": 1.1344, "step": 6607 }, { "epoch": 0.16967491065138232, "grad_norm": 0.73828125, "learning_rate": 0.0001937369205243598, "loss": 0.922, "step": 6608 }, { "epoch": 0.16970058784730413, "grad_norm": 0.82421875, "learning_rate": 0.0001937353653823621, "loss": 1.1824, "step": 6609 }, { "epoch": 0.16972626504322597, "grad_norm": 0.9921875, "learning_rate": 0.0001937338100535583, "loss": 0.9519, "step": 6610 }, { "epoch": 0.16975194223914777, "grad_norm": 0.8203125, "learning_rate": 0.00019373225453795146, "loss": 1.0503, "step": 6611 }, { "epoch": 0.16977761943506958, "grad_norm": 0.84765625, "learning_rate": 0.00019373069883554472, "loss": 1.1883, "step": 6612 }, { "epoch": 0.16980329663099142, "grad_norm": 0.83984375, "learning_rate": 0.00019372914294634114, "loss": 1.1278, "step": 6613 }, { "epoch": 0.16982897382691323, "grad_norm": 0.859375, "learning_rate": 0.00019372758687034386, "loss": 1.1289, "step": 6614 }, { "epoch": 0.16985465102283506, "grad_norm": 0.8828125, "learning_rate": 0.00019372603060755595, "loss": 1.0247, "step": 6615 }, { "epoch": 0.16988032821875687, "grad_norm": 0.81640625, "learning_rate": 0.00019372447415798053, "loss": 1.0035, "step": 6616 }, { "epoch": 0.16990600541467868, "grad_norm": 0.94140625, "learning_rate": 0.00019372291752162073, "loss": 1.1155, "step": 6617 }, { "epoch": 0.16993168261060052, "grad_norm": 0.8125, "learning_rate": 0.00019372136069847963, "loss": 1.0547, "step": 6618 }, { "epoch": 0.16995735980652232, "grad_norm": 0.82421875, "learning_rate": 0.00019371980368856028, "loss": 1.2446, "step": 6619 }, { "epoch": 0.16998303700244416, "grad_norm": 0.87109375, "learning_rate": 0.00019371824649186582, "loss": 1.0506, "step": 6620 }, { "epoch": 0.17000871419836597, "grad_norm": 0.9296875, "learning_rate": 0.00019371668910839939, "loss": 1.0506, "step": 6621 }, { "epoch": 0.17003439139428778, "grad_norm": 0.81640625, "learning_rate": 0.00019371513153816405, "loss": 1.2007, "step": 6622 }, { "epoch": 0.1700600685902096, "grad_norm": 0.86328125, "learning_rate": 0.00019371357378116292, "loss": 1.0279, "step": 6623 }, { "epoch": 0.17008574578613142, "grad_norm": 0.875, "learning_rate": 0.00019371201583739912, "loss": 1.1031, "step": 6624 }, { "epoch": 0.17011142298205326, "grad_norm": 0.859375, "learning_rate": 0.00019371045770687572, "loss": 1.0328, "step": 6625 }, { "epoch": 0.17013710017797506, "grad_norm": 0.73046875, "learning_rate": 0.00019370889938959588, "loss": 0.8935, "step": 6626 }, { "epoch": 0.17016277737389687, "grad_norm": 0.84375, "learning_rate": 0.0001937073408855626, "loss": 1.0113, "step": 6627 }, { "epoch": 0.1701884545698187, "grad_norm": 0.84375, "learning_rate": 0.0001937057821947791, "loss": 0.9002, "step": 6628 }, { "epoch": 0.17021413176574052, "grad_norm": 0.78515625, "learning_rate": 0.00019370422331724843, "loss": 1.0393, "step": 6629 }, { "epoch": 0.17023980896166235, "grad_norm": 0.8046875, "learning_rate": 0.0001937026642529737, "loss": 1.1059, "step": 6630 }, { "epoch": 0.17026548615758416, "grad_norm": 0.8046875, "learning_rate": 0.000193701105001958, "loss": 0.9743, "step": 6631 }, { "epoch": 0.17029116335350597, "grad_norm": 0.80078125, "learning_rate": 0.00019369954556420452, "loss": 0.9792, "step": 6632 }, { "epoch": 0.1703168405494278, "grad_norm": 0.7890625, "learning_rate": 0.0001936979859397163, "loss": 1.1112, "step": 6633 }, { "epoch": 0.17034251774534961, "grad_norm": 0.83203125, "learning_rate": 0.0001936964261284964, "loss": 0.962, "step": 6634 }, { "epoch": 0.17036819494127145, "grad_norm": 0.8125, "learning_rate": 0.00019369486613054804, "loss": 0.9813, "step": 6635 }, { "epoch": 0.17039387213719326, "grad_norm": 0.82421875, "learning_rate": 0.00019369330594587428, "loss": 1.1589, "step": 6636 }, { "epoch": 0.17041954933311507, "grad_norm": 0.80078125, "learning_rate": 0.0001936917455744782, "loss": 1.1057, "step": 6637 }, { "epoch": 0.1704452265290369, "grad_norm": 0.77734375, "learning_rate": 0.00019369018501636293, "loss": 1.0364, "step": 6638 }, { "epoch": 0.1704709037249587, "grad_norm": 0.83984375, "learning_rate": 0.0001936886242715316, "loss": 1.1432, "step": 6639 }, { "epoch": 0.17049658092088055, "grad_norm": 0.875, "learning_rate": 0.00019368706333998728, "loss": 1.2336, "step": 6640 }, { "epoch": 0.17052225811680236, "grad_norm": 0.890625, "learning_rate": 0.00019368550222173317, "loss": 1.171, "step": 6641 }, { "epoch": 0.17054793531272416, "grad_norm": 0.87890625, "learning_rate": 0.00019368394091677225, "loss": 1.0212, "step": 6642 }, { "epoch": 0.170573612508646, "grad_norm": 0.80859375, "learning_rate": 0.0001936823794251077, "loss": 1.0734, "step": 6643 }, { "epoch": 0.1705992897045678, "grad_norm": 0.76953125, "learning_rate": 0.00019368081774674266, "loss": 1.19, "step": 6644 }, { "epoch": 0.17062496690048964, "grad_norm": 0.8203125, "learning_rate": 0.0001936792558816802, "loss": 1.1057, "step": 6645 }, { "epoch": 0.17065064409641145, "grad_norm": 0.79296875, "learning_rate": 0.00019367769382992346, "loss": 1.1274, "step": 6646 }, { "epoch": 0.17067632129233326, "grad_norm": 0.859375, "learning_rate": 0.0001936761315914755, "loss": 1.029, "step": 6647 }, { "epoch": 0.1707019984882551, "grad_norm": 1.1484375, "learning_rate": 0.00019367456916633954, "loss": 1.0015, "step": 6648 }, { "epoch": 0.1707276756841769, "grad_norm": 0.87890625, "learning_rate": 0.00019367300655451857, "loss": 1.056, "step": 6649 }, { "epoch": 0.17075335288009874, "grad_norm": 0.85546875, "learning_rate": 0.00019367144375601576, "loss": 0.9782, "step": 6650 }, { "epoch": 0.17077903007602055, "grad_norm": 0.8515625, "learning_rate": 0.00019366988077083424, "loss": 1.0874, "step": 6651 }, { "epoch": 0.17080470727194236, "grad_norm": 0.8203125, "learning_rate": 0.0001936683175989771, "loss": 1.0458, "step": 6652 }, { "epoch": 0.1708303844678642, "grad_norm": 0.7890625, "learning_rate": 0.00019366675424044747, "loss": 0.9765, "step": 6653 }, { "epoch": 0.170856061663786, "grad_norm": 0.8828125, "learning_rate": 0.00019366519069524847, "loss": 0.978, "step": 6654 }, { "epoch": 0.17088173885970784, "grad_norm": 1.109375, "learning_rate": 0.00019366362696338318, "loss": 1.0501, "step": 6655 }, { "epoch": 0.17090741605562965, "grad_norm": 0.91015625, "learning_rate": 0.00019366206304485475, "loss": 1.1146, "step": 6656 }, { "epoch": 0.17093309325155145, "grad_norm": 0.85546875, "learning_rate": 0.0001936604989396663, "loss": 1.1466, "step": 6657 }, { "epoch": 0.1709587704474733, "grad_norm": 0.83203125, "learning_rate": 0.00019365893464782092, "loss": 0.9967, "step": 6658 }, { "epoch": 0.1709844476433951, "grad_norm": 0.86328125, "learning_rate": 0.00019365737016932176, "loss": 1.0576, "step": 6659 }, { "epoch": 0.17101012483931693, "grad_norm": 0.953125, "learning_rate": 0.0001936558055041719, "loss": 1.1051, "step": 6660 }, { "epoch": 0.17103580203523874, "grad_norm": 0.87890625, "learning_rate": 0.00019365424065237452, "loss": 1.1596, "step": 6661 }, { "epoch": 0.17106147923116055, "grad_norm": 0.87890625, "learning_rate": 0.00019365267561393266, "loss": 1.0675, "step": 6662 }, { "epoch": 0.1710871564270824, "grad_norm": 0.83203125, "learning_rate": 0.0001936511103888495, "loss": 0.989, "step": 6663 }, { "epoch": 0.1711128336230042, "grad_norm": 0.88671875, "learning_rate": 0.0001936495449771281, "loss": 1.0491, "step": 6664 }, { "epoch": 0.17113851081892603, "grad_norm": 0.79296875, "learning_rate": 0.00019364797937877163, "loss": 1.0776, "step": 6665 }, { "epoch": 0.17116418801484784, "grad_norm": 0.8046875, "learning_rate": 0.00019364641359378323, "loss": 1.0204, "step": 6666 }, { "epoch": 0.17118986521076965, "grad_norm": 0.91015625, "learning_rate": 0.00019364484762216598, "loss": 1.1543, "step": 6667 }, { "epoch": 0.17121554240669148, "grad_norm": 0.765625, "learning_rate": 0.00019364328146392299, "loss": 1.012, "step": 6668 }, { "epoch": 0.1712412196026133, "grad_norm": 0.83203125, "learning_rate": 0.0001936417151190574, "loss": 1.0743, "step": 6669 }, { "epoch": 0.17126689679853513, "grad_norm": 0.73828125, "learning_rate": 0.00019364014858757234, "loss": 1.0581, "step": 6670 }, { "epoch": 0.17129257399445694, "grad_norm": 0.859375, "learning_rate": 0.0001936385818694709, "loss": 1.1541, "step": 6671 }, { "epoch": 0.17131825119037875, "grad_norm": 0.859375, "learning_rate": 0.00019363701496475624, "loss": 1.0562, "step": 6672 }, { "epoch": 0.17134392838630058, "grad_norm": 0.8359375, "learning_rate": 0.00019363544787343146, "loss": 1.2816, "step": 6673 }, { "epoch": 0.1713696055822224, "grad_norm": 0.84375, "learning_rate": 0.00019363388059549973, "loss": 0.9891, "step": 6674 }, { "epoch": 0.17139528277814423, "grad_norm": 0.75, "learning_rate": 0.0001936323131309641, "loss": 1.1793, "step": 6675 }, { "epoch": 0.17142095997406603, "grad_norm": 0.86328125, "learning_rate": 0.00019363074547982772, "loss": 1.1919, "step": 6676 }, { "epoch": 0.17144663716998784, "grad_norm": 0.78515625, "learning_rate": 0.00019362917764209373, "loss": 1.0974, "step": 6677 }, { "epoch": 0.17147231436590968, "grad_norm": 0.7890625, "learning_rate": 0.00019362760961776524, "loss": 0.9887, "step": 6678 }, { "epoch": 0.1714979915618315, "grad_norm": 0.88671875, "learning_rate": 0.00019362604140684537, "loss": 1.2771, "step": 6679 }, { "epoch": 0.17152366875775332, "grad_norm": 0.86328125, "learning_rate": 0.00019362447300933728, "loss": 1.017, "step": 6680 }, { "epoch": 0.17154934595367513, "grad_norm": 0.875, "learning_rate": 0.00019362290442524407, "loss": 1.0573, "step": 6681 }, { "epoch": 0.17157502314959694, "grad_norm": 0.76953125, "learning_rate": 0.00019362133565456886, "loss": 1.0829, "step": 6682 }, { "epoch": 0.17160070034551878, "grad_norm": 0.8515625, "learning_rate": 0.00019361976669731478, "loss": 1.124, "step": 6683 }, { "epoch": 0.17162637754144058, "grad_norm": 0.80078125, "learning_rate": 0.000193618197553485, "loss": 1.0134, "step": 6684 }, { "epoch": 0.17165205473736242, "grad_norm": 0.80078125, "learning_rate": 0.00019361662822308255, "loss": 1.0446, "step": 6685 }, { "epoch": 0.17167773193328423, "grad_norm": 0.8671875, "learning_rate": 0.00019361505870611065, "loss": 1.103, "step": 6686 }, { "epoch": 0.17170340912920604, "grad_norm": 0.92578125, "learning_rate": 0.00019361348900257238, "loss": 0.949, "step": 6687 }, { "epoch": 0.17172908632512787, "grad_norm": 0.82421875, "learning_rate": 0.00019361191911247092, "loss": 1.0354, "step": 6688 }, { "epoch": 0.17175476352104968, "grad_norm": 0.91796875, "learning_rate": 0.00019361034903580933, "loss": 1.094, "step": 6689 }, { "epoch": 0.17178044071697152, "grad_norm": 0.8359375, "learning_rate": 0.00019360877877259077, "loss": 1.1202, "step": 6690 }, { "epoch": 0.17180611791289332, "grad_norm": 0.828125, "learning_rate": 0.00019360720832281838, "loss": 1.0428, "step": 6691 }, { "epoch": 0.17183179510881513, "grad_norm": 0.81640625, "learning_rate": 0.00019360563768649526, "loss": 0.9898, "step": 6692 }, { "epoch": 0.17185747230473697, "grad_norm": 0.8125, "learning_rate": 0.00019360406686362458, "loss": 1.1242, "step": 6693 }, { "epoch": 0.17188314950065878, "grad_norm": 0.859375, "learning_rate": 0.00019360249585420944, "loss": 1.1775, "step": 6694 }, { "epoch": 0.1719088266965806, "grad_norm": 0.75, "learning_rate": 0.000193600924658253, "loss": 0.9785, "step": 6695 }, { "epoch": 0.17193450389250242, "grad_norm": 0.92578125, "learning_rate": 0.00019359935327575834, "loss": 1.198, "step": 6696 }, { "epoch": 0.17196018108842423, "grad_norm": 0.81640625, "learning_rate": 0.00019359778170672864, "loss": 1.1357, "step": 6697 }, { "epoch": 0.17198585828434607, "grad_norm": 0.828125, "learning_rate": 0.000193596209951167, "loss": 1.1356, "step": 6698 }, { "epoch": 0.17201153548026787, "grad_norm": 0.8125, "learning_rate": 0.0001935946380090766, "loss": 1.0544, "step": 6699 }, { "epoch": 0.1720372126761897, "grad_norm": 0.89453125, "learning_rate": 0.00019359306588046053, "loss": 1.0363, "step": 6700 }, { "epoch": 0.17206288987211152, "grad_norm": 0.83984375, "learning_rate": 0.00019359149356532192, "loss": 1.219, "step": 6701 }, { "epoch": 0.17208856706803333, "grad_norm": 0.8125, "learning_rate": 0.00019358992106366393, "loss": 1.1402, "step": 6702 }, { "epoch": 0.17211424426395516, "grad_norm": 0.8359375, "learning_rate": 0.00019358834837548966, "loss": 1.1689, "step": 6703 }, { "epoch": 0.17213992145987697, "grad_norm": 0.87890625, "learning_rate": 0.00019358677550080231, "loss": 1.1342, "step": 6704 }, { "epoch": 0.1721655986557988, "grad_norm": 0.8515625, "learning_rate": 0.00019358520243960495, "loss": 1.2576, "step": 6705 }, { "epoch": 0.17219127585172062, "grad_norm": 0.82421875, "learning_rate": 0.00019358362919190072, "loss": 1.1264, "step": 6706 }, { "epoch": 0.17221695304764242, "grad_norm": 0.78515625, "learning_rate": 0.00019358205575769277, "loss": 0.9777, "step": 6707 }, { "epoch": 0.17224263024356426, "grad_norm": 0.76171875, "learning_rate": 0.00019358048213698425, "loss": 1.1237, "step": 6708 }, { "epoch": 0.17226830743948607, "grad_norm": 0.765625, "learning_rate": 0.00019357890832977828, "loss": 0.9438, "step": 6709 }, { "epoch": 0.1722939846354079, "grad_norm": 0.80859375, "learning_rate": 0.00019357733433607798, "loss": 1.0556, "step": 6710 }, { "epoch": 0.1723196618313297, "grad_norm": 0.77734375, "learning_rate": 0.0001935757601558865, "loss": 0.951, "step": 6711 }, { "epoch": 0.17234533902725152, "grad_norm": 0.8203125, "learning_rate": 0.00019357418578920703, "loss": 1.1262, "step": 6712 }, { "epoch": 0.17237101622317336, "grad_norm": 0.81640625, "learning_rate": 0.0001935726112360426, "loss": 1.0621, "step": 6713 }, { "epoch": 0.17239669341909516, "grad_norm": 0.8046875, "learning_rate": 0.00019357103649639647, "loss": 0.9973, "step": 6714 }, { "epoch": 0.17242237061501697, "grad_norm": 0.8203125, "learning_rate": 0.00019356946157027168, "loss": 1.1047, "step": 6715 }, { "epoch": 0.1724480478109388, "grad_norm": 0.83984375, "learning_rate": 0.00019356788645767144, "loss": 1.1604, "step": 6716 }, { "epoch": 0.17247372500686062, "grad_norm": 0.84375, "learning_rate": 0.0001935663111585988, "loss": 0.9497, "step": 6717 }, { "epoch": 0.17249940220278245, "grad_norm": 0.83984375, "learning_rate": 0.000193564735673057, "loss": 1.2195, "step": 6718 }, { "epoch": 0.17252507939870426, "grad_norm": 0.74609375, "learning_rate": 0.00019356316000104912, "loss": 1.0018, "step": 6719 }, { "epoch": 0.17255075659462607, "grad_norm": 1.078125, "learning_rate": 0.0001935615841425783, "loss": 1.1726, "step": 6720 }, { "epoch": 0.1725764337905479, "grad_norm": 0.80859375, "learning_rate": 0.00019356000809764773, "loss": 1.0026, "step": 6721 }, { "epoch": 0.17260211098646971, "grad_norm": 0.91015625, "learning_rate": 0.00019355843186626045, "loss": 1.1006, "step": 6722 }, { "epoch": 0.17262778818239155, "grad_norm": 0.8515625, "learning_rate": 0.00019355685544841973, "loss": 1.0561, "step": 6723 }, { "epoch": 0.17265346537831336, "grad_norm": 0.83984375, "learning_rate": 0.0001935552788441286, "loss": 1.0954, "step": 6724 }, { "epoch": 0.17267914257423517, "grad_norm": 0.81640625, "learning_rate": 0.00019355370205339028, "loss": 1.0302, "step": 6725 }, { "epoch": 0.172704819770157, "grad_norm": 0.89453125, "learning_rate": 0.00019355212507620787, "loss": 1.0387, "step": 6726 }, { "epoch": 0.1727304969660788, "grad_norm": 0.875, "learning_rate": 0.00019355054791258454, "loss": 1.0704, "step": 6727 }, { "epoch": 0.17275617416200065, "grad_norm": 0.83203125, "learning_rate": 0.0001935489705625234, "loss": 1.3437, "step": 6728 }, { "epoch": 0.17278185135792246, "grad_norm": 0.828125, "learning_rate": 0.00019354739302602763, "loss": 1.1673, "step": 6729 }, { "epoch": 0.17280752855384426, "grad_norm": 0.96484375, "learning_rate": 0.00019354581530310033, "loss": 1.1677, "step": 6730 }, { "epoch": 0.1728332057497661, "grad_norm": 0.890625, "learning_rate": 0.0001935442373937447, "loss": 0.993, "step": 6731 }, { "epoch": 0.1728588829456879, "grad_norm": 0.8125, "learning_rate": 0.00019354265929796385, "loss": 0.9717, "step": 6732 }, { "epoch": 0.17288456014160974, "grad_norm": 0.8203125, "learning_rate": 0.00019354108101576092, "loss": 0.9796, "step": 6733 }, { "epoch": 0.17291023733753155, "grad_norm": 0.8046875, "learning_rate": 0.00019353950254713905, "loss": 0.9648, "step": 6734 }, { "epoch": 0.17293591453345336, "grad_norm": 0.7421875, "learning_rate": 0.00019353792389210141, "loss": 0.9523, "step": 6735 }, { "epoch": 0.1729615917293752, "grad_norm": 0.81640625, "learning_rate": 0.00019353634505065115, "loss": 1.1512, "step": 6736 }, { "epoch": 0.172987268925297, "grad_norm": 0.79296875, "learning_rate": 0.0001935347660227914, "loss": 1.0439, "step": 6737 }, { "epoch": 0.17301294612121884, "grad_norm": 0.828125, "learning_rate": 0.0001935331868085253, "loss": 1.105, "step": 6738 }, { "epoch": 0.17303862331714065, "grad_norm": 0.8359375, "learning_rate": 0.00019353160740785601, "loss": 0.9373, "step": 6739 }, { "epoch": 0.17306430051306246, "grad_norm": 0.79296875, "learning_rate": 0.0001935300278207867, "loss": 1.0477, "step": 6740 }, { "epoch": 0.1730899777089843, "grad_norm": 0.80078125, "learning_rate": 0.00019352844804732046, "loss": 0.9785, "step": 6741 }, { "epoch": 0.1731156549049061, "grad_norm": 0.77734375, "learning_rate": 0.00019352686808746047, "loss": 1.15, "step": 6742 }, { "epoch": 0.17314133210082794, "grad_norm": 0.80859375, "learning_rate": 0.00019352528794120989, "loss": 0.9869, "step": 6743 }, { "epoch": 0.17316700929674975, "grad_norm": 0.78515625, "learning_rate": 0.00019352370760857186, "loss": 0.9945, "step": 6744 }, { "epoch": 0.17319268649267155, "grad_norm": 0.8515625, "learning_rate": 0.00019352212708954953, "loss": 1.0822, "step": 6745 }, { "epoch": 0.1732183636885934, "grad_norm": 0.76171875, "learning_rate": 0.00019352054638414607, "loss": 0.9807, "step": 6746 }, { "epoch": 0.1732440408845152, "grad_norm": 0.90625, "learning_rate": 0.00019351896549236458, "loss": 0.9452, "step": 6747 }, { "epoch": 0.17326971808043704, "grad_norm": 0.78125, "learning_rate": 0.00019351738441420827, "loss": 1.1442, "step": 6748 }, { "epoch": 0.17329539527635884, "grad_norm": 0.92578125, "learning_rate": 0.0001935158031496802, "loss": 1.061, "step": 6749 }, { "epoch": 0.17332107247228065, "grad_norm": 0.84375, "learning_rate": 0.00019351422169878367, "loss": 0.9948, "step": 6750 }, { "epoch": 0.1733467496682025, "grad_norm": 0.8046875, "learning_rate": 0.00019351264006152165, "loss": 1.1056, "step": 6751 }, { "epoch": 0.1733724268641243, "grad_norm": 0.8203125, "learning_rate": 0.00019351105823789744, "loss": 0.8882, "step": 6752 }, { "epoch": 0.17339810406004613, "grad_norm": 0.87890625, "learning_rate": 0.00019350947622791414, "loss": 1.165, "step": 6753 }, { "epoch": 0.17342378125596794, "grad_norm": 0.82421875, "learning_rate": 0.0001935078940315749, "loss": 0.9235, "step": 6754 }, { "epoch": 0.17344945845188975, "grad_norm": 0.85546875, "learning_rate": 0.00019350631164888283, "loss": 1.056, "step": 6755 }, { "epoch": 0.17347513564781158, "grad_norm": 0.83203125, "learning_rate": 0.00019350472907984117, "loss": 1.0048, "step": 6756 }, { "epoch": 0.1735008128437334, "grad_norm": 0.8828125, "learning_rate": 0.00019350314632445302, "loss": 0.9998, "step": 6757 }, { "epoch": 0.17352649003965523, "grad_norm": 0.93359375, "learning_rate": 0.00019350156338272153, "loss": 1.1614, "step": 6758 }, { "epoch": 0.17355216723557704, "grad_norm": 0.8671875, "learning_rate": 0.0001934999802546499, "loss": 1.0921, "step": 6759 }, { "epoch": 0.17357784443149885, "grad_norm": 0.78125, "learning_rate": 0.00019349839694024127, "loss": 0.9653, "step": 6760 }, { "epoch": 0.17360352162742068, "grad_norm": 0.86328125, "learning_rate": 0.00019349681343949873, "loss": 1.2791, "step": 6761 }, { "epoch": 0.1736291988233425, "grad_norm": 0.81640625, "learning_rate": 0.00019349522975242552, "loss": 1.1495, "step": 6762 }, { "epoch": 0.17365487601926433, "grad_norm": 0.77734375, "learning_rate": 0.00019349364587902473, "loss": 1.0708, "step": 6763 }, { "epoch": 0.17368055321518613, "grad_norm": 0.921875, "learning_rate": 0.00019349206181929954, "loss": 1.0043, "step": 6764 }, { "epoch": 0.17370623041110794, "grad_norm": 0.82421875, "learning_rate": 0.00019349047757325315, "loss": 1.0484, "step": 6765 }, { "epoch": 0.17373190760702978, "grad_norm": 0.83203125, "learning_rate": 0.00019348889314088868, "loss": 0.9964, "step": 6766 }, { "epoch": 0.1737575848029516, "grad_norm": 0.87890625, "learning_rate": 0.0001934873085222093, "loss": 0.9944, "step": 6767 }, { "epoch": 0.17378326199887342, "grad_norm": 0.83203125, "learning_rate": 0.00019348572371721815, "loss": 1.0373, "step": 6768 }, { "epoch": 0.17380893919479523, "grad_norm": 0.79296875, "learning_rate": 0.0001934841387259184, "loss": 0.9951, "step": 6769 }, { "epoch": 0.17383461639071704, "grad_norm": 0.8046875, "learning_rate": 0.0001934825535483132, "loss": 1.1941, "step": 6770 }, { "epoch": 0.17386029358663888, "grad_norm": 0.76171875, "learning_rate": 0.00019348096818440572, "loss": 0.9891, "step": 6771 }, { "epoch": 0.17388597078256068, "grad_norm": 0.86328125, "learning_rate": 0.0001934793826341991, "loss": 1.0242, "step": 6772 }, { "epoch": 0.17391164797848252, "grad_norm": 0.796875, "learning_rate": 0.00019347779689769654, "loss": 0.9416, "step": 6773 }, { "epoch": 0.17393732517440433, "grad_norm": 0.76953125, "learning_rate": 0.00019347621097490114, "loss": 1.1338, "step": 6774 }, { "epoch": 0.17396300237032614, "grad_norm": 0.76171875, "learning_rate": 0.00019347462486581614, "loss": 0.9457, "step": 6775 }, { "epoch": 0.17398867956624797, "grad_norm": 0.83984375, "learning_rate": 0.0001934730385704446, "loss": 1.0424, "step": 6776 }, { "epoch": 0.17401435676216978, "grad_norm": 0.8671875, "learning_rate": 0.0001934714520887898, "loss": 1.1899, "step": 6777 }, { "epoch": 0.17404003395809162, "grad_norm": 0.81640625, "learning_rate": 0.00019346986542085478, "loss": 1.0432, "step": 6778 }, { "epoch": 0.17406571115401342, "grad_norm": 0.87890625, "learning_rate": 0.00019346827856664281, "loss": 1.0451, "step": 6779 }, { "epoch": 0.17409138834993523, "grad_norm": 0.76171875, "learning_rate": 0.00019346669152615697, "loss": 1.0255, "step": 6780 }, { "epoch": 0.17411706554585707, "grad_norm": 0.88671875, "learning_rate": 0.00019346510429940045, "loss": 1.1141, "step": 6781 }, { "epoch": 0.17414274274177888, "grad_norm": 0.82421875, "learning_rate": 0.00019346351688637645, "loss": 1.0172, "step": 6782 }, { "epoch": 0.1741684199377007, "grad_norm": 0.828125, "learning_rate": 0.00019346192928708811, "loss": 1.0813, "step": 6783 }, { "epoch": 0.17419409713362252, "grad_norm": 0.92578125, "learning_rate": 0.00019346034150153856, "loss": 1.1609, "step": 6784 }, { "epoch": 0.17421977432954433, "grad_norm": 1.0546875, "learning_rate": 0.000193458753529731, "loss": 1.0149, "step": 6785 }, { "epoch": 0.17424545152546617, "grad_norm": 0.83984375, "learning_rate": 0.00019345716537166858, "loss": 1.0983, "step": 6786 }, { "epoch": 0.17427112872138797, "grad_norm": 0.796875, "learning_rate": 0.00019345557702735448, "loss": 0.9373, "step": 6787 }, { "epoch": 0.1742968059173098, "grad_norm": 0.84765625, "learning_rate": 0.00019345398849679186, "loss": 1.1779, "step": 6788 }, { "epoch": 0.17432248311323162, "grad_norm": 0.859375, "learning_rate": 0.00019345239977998387, "loss": 1.0711, "step": 6789 }, { "epoch": 0.17434816030915343, "grad_norm": 0.90234375, "learning_rate": 0.00019345081087693367, "loss": 1.0844, "step": 6790 }, { "epoch": 0.17437383750507526, "grad_norm": 0.91015625, "learning_rate": 0.0001934492217876445, "loss": 1.1015, "step": 6791 }, { "epoch": 0.17439951470099707, "grad_norm": 0.75, "learning_rate": 0.00019344763251211947, "loss": 0.9054, "step": 6792 }, { "epoch": 0.1744251918969189, "grad_norm": 0.87109375, "learning_rate": 0.0001934460430503617, "loss": 1.049, "step": 6793 }, { "epoch": 0.17445086909284072, "grad_norm": 0.80859375, "learning_rate": 0.00019344445340237443, "loss": 1.2361, "step": 6794 }, { "epoch": 0.17447654628876252, "grad_norm": 0.87109375, "learning_rate": 0.0001934428635681608, "loss": 1.1661, "step": 6795 }, { "epoch": 0.17450222348468436, "grad_norm": 0.875, "learning_rate": 0.00019344127354772398, "loss": 1.0409, "step": 6796 }, { "epoch": 0.17452790068060617, "grad_norm": 0.8671875, "learning_rate": 0.00019343968334106716, "loss": 1.0993, "step": 6797 }, { "epoch": 0.174553577876528, "grad_norm": 0.85546875, "learning_rate": 0.00019343809294819348, "loss": 1.0362, "step": 6798 }, { "epoch": 0.1745792550724498, "grad_norm": 0.88671875, "learning_rate": 0.00019343650236910612, "loss": 1.181, "step": 6799 }, { "epoch": 0.17460493226837162, "grad_norm": 0.8046875, "learning_rate": 0.00019343491160380826, "loss": 1.1184, "step": 6800 }, { "epoch": 0.17463060946429346, "grad_norm": 0.9609375, "learning_rate": 0.00019343332065230307, "loss": 1.1051, "step": 6801 }, { "epoch": 0.17465628666021527, "grad_norm": 0.85546875, "learning_rate": 0.00019343172951459368, "loss": 1.1491, "step": 6802 }, { "epoch": 0.1746819638561371, "grad_norm": 0.81640625, "learning_rate": 0.0001934301381906833, "loss": 1.0645, "step": 6803 }, { "epoch": 0.1747076410520589, "grad_norm": 0.92578125, "learning_rate": 0.00019342854668057512, "loss": 1.1471, "step": 6804 }, { "epoch": 0.17473331824798072, "grad_norm": 0.8203125, "learning_rate": 0.00019342695498427229, "loss": 1.0741, "step": 6805 }, { "epoch": 0.17475899544390255, "grad_norm": 0.87109375, "learning_rate": 0.00019342536310177795, "loss": 1.1709, "step": 6806 }, { "epoch": 0.17478467263982436, "grad_norm": 0.796875, "learning_rate": 0.0001934237710330953, "loss": 1.0157, "step": 6807 }, { "epoch": 0.1748103498357462, "grad_norm": 0.8203125, "learning_rate": 0.00019342217877822753, "loss": 1.0223, "step": 6808 }, { "epoch": 0.174836027031668, "grad_norm": 0.8515625, "learning_rate": 0.00019342058633717779, "loss": 1.2611, "step": 6809 }, { "epoch": 0.17486170422758981, "grad_norm": 0.88671875, "learning_rate": 0.00019341899370994926, "loss": 1.1252, "step": 6810 }, { "epoch": 0.17488738142351165, "grad_norm": 0.8046875, "learning_rate": 0.0001934174008965451, "loss": 0.9692, "step": 6811 }, { "epoch": 0.17491305861943346, "grad_norm": 0.8828125, "learning_rate": 0.0001934158078969685, "loss": 1.0555, "step": 6812 }, { "epoch": 0.1749387358153553, "grad_norm": 0.8671875, "learning_rate": 0.00019341421471122264, "loss": 1.0173, "step": 6813 }, { "epoch": 0.1749644130112771, "grad_norm": 0.84375, "learning_rate": 0.00019341262133931072, "loss": 1.1623, "step": 6814 }, { "epoch": 0.1749900902071989, "grad_norm": 0.78515625, "learning_rate": 0.00019341102778123586, "loss": 1.0314, "step": 6815 }, { "epoch": 0.17501576740312075, "grad_norm": 0.95703125, "learning_rate": 0.00019340943403700124, "loss": 1.1775, "step": 6816 }, { "epoch": 0.17504144459904256, "grad_norm": 0.79296875, "learning_rate": 0.00019340784010661008, "loss": 1.0872, "step": 6817 }, { "epoch": 0.1750671217949644, "grad_norm": 0.859375, "learning_rate": 0.00019340624599006553, "loss": 1.0823, "step": 6818 }, { "epoch": 0.1750927989908862, "grad_norm": 0.78125, "learning_rate": 0.00019340465168737075, "loss": 0.9142, "step": 6819 }, { "epoch": 0.175118476186808, "grad_norm": 0.86328125, "learning_rate": 0.00019340305719852893, "loss": 1.0186, "step": 6820 }, { "epoch": 0.17514415338272984, "grad_norm": 0.796875, "learning_rate": 0.0001934014625235433, "loss": 1.1951, "step": 6821 }, { "epoch": 0.17516983057865165, "grad_norm": 0.8671875, "learning_rate": 0.00019339986766241695, "loss": 1.0925, "step": 6822 }, { "epoch": 0.1751955077745735, "grad_norm": 0.90625, "learning_rate": 0.00019339827261515314, "loss": 1.0987, "step": 6823 }, { "epoch": 0.1752211849704953, "grad_norm": 0.83984375, "learning_rate": 0.00019339667738175497, "loss": 1.1113, "step": 6824 }, { "epoch": 0.1752468621664171, "grad_norm": 0.87890625, "learning_rate": 0.00019339508196222568, "loss": 1.0786, "step": 6825 }, { "epoch": 0.17527253936233894, "grad_norm": 0.8125, "learning_rate": 0.00019339348635656842, "loss": 1.0009, "step": 6826 }, { "epoch": 0.17529821655826075, "grad_norm": 0.87109375, "learning_rate": 0.00019339189056478636, "loss": 1.1608, "step": 6827 }, { "epoch": 0.17532389375418259, "grad_norm": 0.84375, "learning_rate": 0.0001933902945868827, "loss": 1.0461, "step": 6828 }, { "epoch": 0.1753495709501044, "grad_norm": 0.828125, "learning_rate": 0.00019338869842286066, "loss": 1.1072, "step": 6829 }, { "epoch": 0.1753752481460262, "grad_norm": 0.81640625, "learning_rate": 0.00019338710207272337, "loss": 1.128, "step": 6830 }, { "epoch": 0.17540092534194804, "grad_norm": 0.8125, "learning_rate": 0.00019338550553647398, "loss": 1.1523, "step": 6831 }, { "epoch": 0.17542660253786985, "grad_norm": 0.83984375, "learning_rate": 0.00019338390881411575, "loss": 1.0719, "step": 6832 }, { "epoch": 0.17545227973379168, "grad_norm": 0.82421875, "learning_rate": 0.0001933823119056518, "loss": 1.1472, "step": 6833 }, { "epoch": 0.1754779569297135, "grad_norm": 0.80078125, "learning_rate": 0.00019338071481108537, "loss": 1.0532, "step": 6834 }, { "epoch": 0.1755036341256353, "grad_norm": 0.76953125, "learning_rate": 0.0001933791175304196, "loss": 1.0777, "step": 6835 }, { "epoch": 0.17552931132155714, "grad_norm": 0.8203125, "learning_rate": 0.00019337752006365767, "loss": 0.9368, "step": 6836 }, { "epoch": 0.17555498851747894, "grad_norm": 0.828125, "learning_rate": 0.00019337592241080277, "loss": 1.0373, "step": 6837 }, { "epoch": 0.17558066571340078, "grad_norm": 0.9609375, "learning_rate": 0.00019337432457185814, "loss": 1.2867, "step": 6838 }, { "epoch": 0.1756063429093226, "grad_norm": 0.81640625, "learning_rate": 0.00019337272654682686, "loss": 1.1301, "step": 6839 }, { "epoch": 0.1756320201052444, "grad_norm": 0.78515625, "learning_rate": 0.0001933711283357122, "loss": 1.1365, "step": 6840 }, { "epoch": 0.17565769730116623, "grad_norm": 1.140625, "learning_rate": 0.0001933695299385173, "loss": 1.0841, "step": 6841 }, { "epoch": 0.17568337449708804, "grad_norm": 0.78515625, "learning_rate": 0.00019336793135524535, "loss": 1.0111, "step": 6842 }, { "epoch": 0.17570905169300988, "grad_norm": 0.81640625, "learning_rate": 0.00019336633258589957, "loss": 1.1585, "step": 6843 }, { "epoch": 0.17573472888893168, "grad_norm": 0.80859375, "learning_rate": 0.00019336473363048312, "loss": 1.1928, "step": 6844 }, { "epoch": 0.1757604060848535, "grad_norm": 0.8515625, "learning_rate": 0.0001933631344889992, "loss": 1.0859, "step": 6845 }, { "epoch": 0.17578608328077533, "grad_norm": 0.9609375, "learning_rate": 0.00019336153516145096, "loss": 1.0583, "step": 6846 }, { "epoch": 0.17581176047669714, "grad_norm": 0.86328125, "learning_rate": 0.00019335993564784164, "loss": 1.1701, "step": 6847 }, { "epoch": 0.17583743767261897, "grad_norm": 0.75, "learning_rate": 0.00019335833594817436, "loss": 0.9903, "step": 6848 }, { "epoch": 0.17586311486854078, "grad_norm": 0.85546875, "learning_rate": 0.00019335673606245239, "loss": 1.1605, "step": 6849 }, { "epoch": 0.1758887920644626, "grad_norm": 0.84765625, "learning_rate": 0.00019335513599067888, "loss": 1.0804, "step": 6850 }, { "epoch": 0.17591446926038443, "grad_norm": 0.83984375, "learning_rate": 0.000193353535732857, "loss": 1.0618, "step": 6851 }, { "epoch": 0.17594014645630623, "grad_norm": 0.7578125, "learning_rate": 0.00019335193528898997, "loss": 1.1191, "step": 6852 }, { "epoch": 0.17596582365222807, "grad_norm": 0.8125, "learning_rate": 0.00019335033465908095, "loss": 0.9843, "step": 6853 }, { "epoch": 0.17599150084814988, "grad_norm": 0.7890625, "learning_rate": 0.00019334873384313316, "loss": 1.0119, "step": 6854 }, { "epoch": 0.1760171780440717, "grad_norm": 0.80859375, "learning_rate": 0.00019334713284114975, "loss": 1.1407, "step": 6855 }, { "epoch": 0.17604285523999352, "grad_norm": 0.81640625, "learning_rate": 0.00019334553165313394, "loss": 1.1278, "step": 6856 }, { "epoch": 0.17606853243591533, "grad_norm": 0.76953125, "learning_rate": 0.00019334393027908893, "loss": 1.0517, "step": 6857 }, { "epoch": 0.17609420963183717, "grad_norm": 0.79296875, "learning_rate": 0.0001933423287190179, "loss": 0.9945, "step": 6858 }, { "epoch": 0.17611988682775898, "grad_norm": 0.9140625, "learning_rate": 0.000193340726972924, "loss": 1.1166, "step": 6859 }, { "epoch": 0.17614556402368078, "grad_norm": 0.8359375, "learning_rate": 0.00019333912504081052, "loss": 1.1933, "step": 6860 }, { "epoch": 0.17617124121960262, "grad_norm": 0.78125, "learning_rate": 0.0001933375229226806, "loss": 1.0141, "step": 6861 }, { "epoch": 0.17619691841552443, "grad_norm": 0.8125, "learning_rate": 0.00019333592061853737, "loss": 1.0988, "step": 6862 }, { "epoch": 0.17622259561144626, "grad_norm": 0.828125, "learning_rate": 0.00019333431812838413, "loss": 1.0464, "step": 6863 }, { "epoch": 0.17624827280736807, "grad_norm": 0.859375, "learning_rate": 0.000193332715452224, "loss": 1.0949, "step": 6864 }, { "epoch": 0.17627395000328988, "grad_norm": 0.78515625, "learning_rate": 0.00019333111259006022, "loss": 0.8652, "step": 6865 }, { "epoch": 0.17629962719921172, "grad_norm": 0.78125, "learning_rate": 0.00019332950954189596, "loss": 1.1865, "step": 6866 }, { "epoch": 0.17632530439513353, "grad_norm": 0.76953125, "learning_rate": 0.00019332790630773437, "loss": 0.9806, "step": 6867 }, { "epoch": 0.17635098159105536, "grad_norm": 0.84375, "learning_rate": 0.00019332630288757875, "loss": 0.9773, "step": 6868 }, { "epoch": 0.17637665878697717, "grad_norm": 0.79296875, "learning_rate": 0.00019332469928143222, "loss": 0.9864, "step": 6869 }, { "epoch": 0.17640233598289898, "grad_norm": 0.87109375, "learning_rate": 0.000193323095489298, "loss": 1.1813, "step": 6870 }, { "epoch": 0.17642801317882081, "grad_norm": 0.76171875, "learning_rate": 0.00019332149151117927, "loss": 1.1337, "step": 6871 }, { "epoch": 0.17645369037474262, "grad_norm": 0.80078125, "learning_rate": 0.00019331988734707923, "loss": 1.0493, "step": 6872 }, { "epoch": 0.17647936757066446, "grad_norm": 0.8046875, "learning_rate": 0.0001933182829970011, "loss": 1.0701, "step": 6873 }, { "epoch": 0.17650504476658627, "grad_norm": 0.921875, "learning_rate": 0.00019331667846094806, "loss": 0.937, "step": 6874 }, { "epoch": 0.17653072196250807, "grad_norm": 0.80859375, "learning_rate": 0.00019331507373892333, "loss": 1.1971, "step": 6875 }, { "epoch": 0.1765563991584299, "grad_norm": 0.8046875, "learning_rate": 0.00019331346883093005, "loss": 1.0248, "step": 6876 }, { "epoch": 0.17658207635435172, "grad_norm": 0.859375, "learning_rate": 0.00019331186373697148, "loss": 0.9741, "step": 6877 }, { "epoch": 0.17660775355027356, "grad_norm": 0.75390625, "learning_rate": 0.00019331025845705074, "loss": 1.0236, "step": 6878 }, { "epoch": 0.17663343074619536, "grad_norm": 0.96875, "learning_rate": 0.00019330865299117114, "loss": 1.0245, "step": 6879 }, { "epoch": 0.17665910794211717, "grad_norm": 0.796875, "learning_rate": 0.00019330704733933583, "loss": 1.091, "step": 6880 }, { "epoch": 0.176684785138039, "grad_norm": 0.7890625, "learning_rate": 0.00019330544150154797, "loss": 1.1133, "step": 6881 }, { "epoch": 0.17671046233396082, "grad_norm": 0.76171875, "learning_rate": 0.00019330383547781082, "loss": 1.1252, "step": 6882 }, { "epoch": 0.17673613952988265, "grad_norm": 0.8203125, "learning_rate": 0.00019330222926812753, "loss": 1.0666, "step": 6883 }, { "epoch": 0.17676181672580446, "grad_norm": 0.734375, "learning_rate": 0.00019330062287250133, "loss": 1.192, "step": 6884 }, { "epoch": 0.17678749392172627, "grad_norm": 0.8515625, "learning_rate": 0.00019329901629093542, "loss": 1.0749, "step": 6885 }, { "epoch": 0.1768131711176481, "grad_norm": 0.81640625, "learning_rate": 0.000193297409523433, "loss": 0.9735, "step": 6886 }, { "epoch": 0.1768388483135699, "grad_norm": 0.81640625, "learning_rate": 0.0001932958025699973, "loss": 1.106, "step": 6887 }, { "epoch": 0.17686452550949175, "grad_norm": 0.90625, "learning_rate": 0.00019329419543063145, "loss": 1.0728, "step": 6888 }, { "epoch": 0.17689020270541356, "grad_norm": 0.8046875, "learning_rate": 0.0001932925881053387, "loss": 0.8729, "step": 6889 }, { "epoch": 0.17691587990133537, "grad_norm": 0.921875, "learning_rate": 0.00019329098059412227, "loss": 1.1179, "step": 6890 }, { "epoch": 0.1769415570972572, "grad_norm": 0.7578125, "learning_rate": 0.00019328937289698532, "loss": 1.1201, "step": 6891 }, { "epoch": 0.176967234293179, "grad_norm": 0.8515625, "learning_rate": 0.00019328776501393108, "loss": 1.1873, "step": 6892 }, { "epoch": 0.17699291148910085, "grad_norm": 1.7421875, "learning_rate": 0.00019328615694496278, "loss": 1.1393, "step": 6893 }, { "epoch": 0.17701858868502265, "grad_norm": 0.8359375, "learning_rate": 0.00019328454869008358, "loss": 1.0055, "step": 6894 }, { "epoch": 0.17704426588094446, "grad_norm": 0.875, "learning_rate": 0.00019328294024929669, "loss": 1.239, "step": 6895 }, { "epoch": 0.1770699430768663, "grad_norm": 0.8671875, "learning_rate": 0.00019328133162260533, "loss": 1.1768, "step": 6896 }, { "epoch": 0.1770956202727881, "grad_norm": 0.89453125, "learning_rate": 0.00019327972281001267, "loss": 0.8635, "step": 6897 }, { "epoch": 0.17712129746870994, "grad_norm": 0.81640625, "learning_rate": 0.00019327811381152202, "loss": 0.9252, "step": 6898 }, { "epoch": 0.17714697466463175, "grad_norm": 0.83984375, "learning_rate": 0.00019327650462713645, "loss": 1.1695, "step": 6899 }, { "epoch": 0.17717265186055356, "grad_norm": 0.7734375, "learning_rate": 0.00019327489525685927, "loss": 0.9398, "step": 6900 }, { "epoch": 0.1771983290564754, "grad_norm": 0.8203125, "learning_rate": 0.00019327328570069362, "loss": 1.0617, "step": 6901 }, { "epoch": 0.1772240062523972, "grad_norm": 0.80859375, "learning_rate": 0.00019327167595864278, "loss": 1.0113, "step": 6902 }, { "epoch": 0.17724968344831904, "grad_norm": 0.80078125, "learning_rate": 0.0001932700660307099, "loss": 1.0467, "step": 6903 }, { "epoch": 0.17727536064424085, "grad_norm": 0.8515625, "learning_rate": 0.00019326845591689817, "loss": 1.0777, "step": 6904 }, { "epoch": 0.17730103784016266, "grad_norm": 0.84765625, "learning_rate": 0.00019326684561721087, "loss": 1.0711, "step": 6905 }, { "epoch": 0.1773267150360845, "grad_norm": 0.859375, "learning_rate": 0.00019326523513165116, "loss": 1.1526, "step": 6906 }, { "epoch": 0.1773523922320063, "grad_norm": 0.7734375, "learning_rate": 0.00019326362446022227, "loss": 0.9643, "step": 6907 }, { "epoch": 0.17737806942792814, "grad_norm": 0.8203125, "learning_rate": 0.00019326201360292736, "loss": 0.9558, "step": 6908 }, { "epoch": 0.17740374662384994, "grad_norm": 0.87109375, "learning_rate": 0.00019326040255976973, "loss": 1.1264, "step": 6909 }, { "epoch": 0.17742942381977175, "grad_norm": 0.8203125, "learning_rate": 0.0001932587913307525, "loss": 1.1566, "step": 6910 }, { "epoch": 0.1774551010156936, "grad_norm": 0.91015625, "learning_rate": 0.00019325717991587894, "loss": 1.0336, "step": 6911 }, { "epoch": 0.1774807782116154, "grad_norm": 0.81640625, "learning_rate": 0.00019325556831515227, "loss": 1.1755, "step": 6912 }, { "epoch": 0.17750645540753723, "grad_norm": 0.8046875, "learning_rate": 0.00019325395652857564, "loss": 0.974, "step": 6913 }, { "epoch": 0.17753213260345904, "grad_norm": 0.80859375, "learning_rate": 0.0001932523445561523, "loss": 0.9632, "step": 6914 }, { "epoch": 0.17755780979938085, "grad_norm": 0.80859375, "learning_rate": 0.00019325073239788548, "loss": 1.2544, "step": 6915 }, { "epoch": 0.1775834869953027, "grad_norm": 0.8359375, "learning_rate": 0.00019324912005377838, "loss": 1.1179, "step": 6916 }, { "epoch": 0.1776091641912245, "grad_norm": 0.82421875, "learning_rate": 0.0001932475075238342, "loss": 0.9038, "step": 6917 }, { "epoch": 0.17763484138714633, "grad_norm": 0.75390625, "learning_rate": 0.00019324589480805614, "loss": 0.9963, "step": 6918 }, { "epoch": 0.17766051858306814, "grad_norm": 0.921875, "learning_rate": 0.00019324428190644745, "loss": 1.1402, "step": 6919 }, { "epoch": 0.17768619577898995, "grad_norm": 0.80078125, "learning_rate": 0.00019324266881901134, "loss": 1.1729, "step": 6920 }, { "epoch": 0.17771187297491178, "grad_norm": 0.87109375, "learning_rate": 0.00019324105554575097, "loss": 1.0017, "step": 6921 }, { "epoch": 0.1777375501708336, "grad_norm": 0.87109375, "learning_rate": 0.00019323944208666965, "loss": 1.2143, "step": 6922 }, { "epoch": 0.1777632273667554, "grad_norm": 0.828125, "learning_rate": 0.00019323782844177055, "loss": 1.1533, "step": 6923 }, { "epoch": 0.17778890456267724, "grad_norm": 0.90625, "learning_rate": 0.00019323621461105685, "loss": 1.186, "step": 6924 }, { "epoch": 0.17781458175859904, "grad_norm": 0.90625, "learning_rate": 0.0001932346005945318, "loss": 1.0948, "step": 6925 }, { "epoch": 0.17784025895452088, "grad_norm": 0.85546875, "learning_rate": 0.0001932329863921986, "loss": 1.0888, "step": 6926 }, { "epoch": 0.1778659361504427, "grad_norm": 0.83203125, "learning_rate": 0.00019323137200406048, "loss": 1.0565, "step": 6927 }, { "epoch": 0.1778916133463645, "grad_norm": 0.84375, "learning_rate": 0.0001932297574301207, "loss": 0.9835, "step": 6928 }, { "epoch": 0.17791729054228633, "grad_norm": 0.81640625, "learning_rate": 0.0001932281426703824, "loss": 1.0773, "step": 6929 }, { "epoch": 0.17794296773820814, "grad_norm": 0.80859375, "learning_rate": 0.00019322652772484883, "loss": 0.9982, "step": 6930 }, { "epoch": 0.17796864493412998, "grad_norm": 0.79296875, "learning_rate": 0.00019322491259352322, "loss": 0.8881, "step": 6931 }, { "epoch": 0.17799432213005179, "grad_norm": 1.8515625, "learning_rate": 0.0001932232972764088, "loss": 1.045, "step": 6932 }, { "epoch": 0.1780199993259736, "grad_norm": 1.0, "learning_rate": 0.00019322168177350873, "loss": 1.0553, "step": 6933 }, { "epoch": 0.17804567652189543, "grad_norm": 0.83984375, "learning_rate": 0.00019322006608482627, "loss": 1.0622, "step": 6934 }, { "epoch": 0.17807135371781724, "grad_norm": 0.859375, "learning_rate": 0.00019321845021036467, "loss": 0.9724, "step": 6935 }, { "epoch": 0.17809703091373907, "grad_norm": 0.80859375, "learning_rate": 0.00019321683415012713, "loss": 1.0094, "step": 6936 }, { "epoch": 0.17812270810966088, "grad_norm": 1.015625, "learning_rate": 0.00019321521790411681, "loss": 1.0273, "step": 6937 }, { "epoch": 0.1781483853055827, "grad_norm": 0.8203125, "learning_rate": 0.00019321360147233702, "loss": 0.981, "step": 6938 }, { "epoch": 0.17817406250150453, "grad_norm": 0.765625, "learning_rate": 0.00019321198485479092, "loss": 1.0205, "step": 6939 }, { "epoch": 0.17819973969742633, "grad_norm": 0.73828125, "learning_rate": 0.00019321036805148176, "loss": 0.9976, "step": 6940 }, { "epoch": 0.17822541689334817, "grad_norm": 0.91015625, "learning_rate": 0.00019320875106241275, "loss": 1.0071, "step": 6941 }, { "epoch": 0.17825109408926998, "grad_norm": 0.91015625, "learning_rate": 0.00019320713388758714, "loss": 1.0806, "step": 6942 }, { "epoch": 0.1782767712851918, "grad_norm": 0.86328125, "learning_rate": 0.00019320551652700813, "loss": 1.092, "step": 6943 }, { "epoch": 0.17830244848111362, "grad_norm": 0.8359375, "learning_rate": 0.00019320389898067893, "loss": 1.1022, "step": 6944 }, { "epoch": 0.17832812567703543, "grad_norm": 0.8515625, "learning_rate": 0.0001932022812486028, "loss": 0.7357, "step": 6945 }, { "epoch": 0.17835380287295727, "grad_norm": 0.83203125, "learning_rate": 0.00019320066333078291, "loss": 1.2401, "step": 6946 }, { "epoch": 0.17837948006887908, "grad_norm": 0.875, "learning_rate": 0.00019319904522722254, "loss": 1.0205, "step": 6947 }, { "epoch": 0.17840515726480088, "grad_norm": 0.890625, "learning_rate": 0.00019319742693792486, "loss": 1.0763, "step": 6948 }, { "epoch": 0.17843083446072272, "grad_norm": 0.81640625, "learning_rate": 0.00019319580846289319, "loss": 1.0401, "step": 6949 }, { "epoch": 0.17845651165664453, "grad_norm": 0.84765625, "learning_rate": 0.00019319418980213063, "loss": 0.9397, "step": 6950 }, { "epoch": 0.17848218885256636, "grad_norm": 0.75, "learning_rate": 0.0001931925709556405, "loss": 1.1207, "step": 6951 }, { "epoch": 0.17850786604848817, "grad_norm": 0.76953125, "learning_rate": 0.00019319095192342597, "loss": 0.9345, "step": 6952 }, { "epoch": 0.17853354324440998, "grad_norm": 0.828125, "learning_rate": 0.00019318933270549032, "loss": 1.1094, "step": 6953 }, { "epoch": 0.17855922044033182, "grad_norm": 0.9921875, "learning_rate": 0.00019318771330183672, "loss": 1.0865, "step": 6954 }, { "epoch": 0.17858489763625363, "grad_norm": 0.8046875, "learning_rate": 0.00019318609371246845, "loss": 0.9268, "step": 6955 }, { "epoch": 0.17861057483217546, "grad_norm": 0.77734375, "learning_rate": 0.0001931844739373887, "loss": 1.1092, "step": 6956 }, { "epoch": 0.17863625202809727, "grad_norm": 0.796875, "learning_rate": 0.0001931828539766007, "loss": 1.0499, "step": 6957 }, { "epoch": 0.17866192922401908, "grad_norm": 0.84375, "learning_rate": 0.00019318123383010768, "loss": 1.1733, "step": 6958 }, { "epoch": 0.17868760641994091, "grad_norm": 0.86328125, "learning_rate": 0.00019317961349791293, "loss": 1.0692, "step": 6959 }, { "epoch": 0.17871328361586272, "grad_norm": 0.8671875, "learning_rate": 0.00019317799298001958, "loss": 1.0162, "step": 6960 }, { "epoch": 0.17873896081178456, "grad_norm": 0.828125, "learning_rate": 0.00019317637227643094, "loss": 0.8428, "step": 6961 }, { "epoch": 0.17876463800770637, "grad_norm": 0.7890625, "learning_rate": 0.00019317475138715018, "loss": 1.0475, "step": 6962 }, { "epoch": 0.17879031520362818, "grad_norm": 0.921875, "learning_rate": 0.00019317313031218055, "loss": 1.1562, "step": 6963 }, { "epoch": 0.17881599239955, "grad_norm": 0.80078125, "learning_rate": 0.00019317150905152534, "loss": 1.1192, "step": 6964 }, { "epoch": 0.17884166959547182, "grad_norm": 0.80859375, "learning_rate": 0.00019316988760518768, "loss": 1.0898, "step": 6965 }, { "epoch": 0.17886734679139366, "grad_norm": 0.8828125, "learning_rate": 0.00019316826597317083, "loss": 0.9094, "step": 6966 }, { "epoch": 0.17889302398731546, "grad_norm": 0.796875, "learning_rate": 0.0001931666441554781, "loss": 1.0306, "step": 6967 }, { "epoch": 0.17891870118323727, "grad_norm": 0.87109375, "learning_rate": 0.00019316502215211263, "loss": 1.0748, "step": 6968 }, { "epoch": 0.1789443783791591, "grad_norm": 0.7734375, "learning_rate": 0.0001931633999630777, "loss": 1.0108, "step": 6969 }, { "epoch": 0.17897005557508092, "grad_norm": 0.85546875, "learning_rate": 0.0001931617775883765, "loss": 1.068, "step": 6970 }, { "epoch": 0.17899573277100275, "grad_norm": 0.90234375, "learning_rate": 0.00019316015502801235, "loss": 1.1516, "step": 6971 }, { "epoch": 0.17902140996692456, "grad_norm": 0.80859375, "learning_rate": 0.0001931585322819884, "loss": 0.8707, "step": 6972 }, { "epoch": 0.17904708716284637, "grad_norm": 2.03125, "learning_rate": 0.00019315690935030788, "loss": 1.1376, "step": 6973 }, { "epoch": 0.1790727643587682, "grad_norm": 0.8671875, "learning_rate": 0.00019315528623297407, "loss": 1.2153, "step": 6974 }, { "epoch": 0.17909844155469, "grad_norm": 0.84375, "learning_rate": 0.0001931536629299902, "loss": 1.0765, "step": 6975 }, { "epoch": 0.17912411875061185, "grad_norm": 0.7890625, "learning_rate": 0.00019315203944135948, "loss": 1.1195, "step": 6976 }, { "epoch": 0.17914979594653366, "grad_norm": 0.890625, "learning_rate": 0.00019315041576708518, "loss": 1.0646, "step": 6977 }, { "epoch": 0.17917547314245547, "grad_norm": 0.8046875, "learning_rate": 0.0001931487919071705, "loss": 1.1108, "step": 6978 }, { "epoch": 0.1792011503383773, "grad_norm": 0.83203125, "learning_rate": 0.00019314716786161871, "loss": 1.1002, "step": 6979 }, { "epoch": 0.1792268275342991, "grad_norm": 0.859375, "learning_rate": 0.00019314554363043302, "loss": 1.0576, "step": 6980 }, { "epoch": 0.17925250473022095, "grad_norm": 0.80859375, "learning_rate": 0.00019314391921361669, "loss": 1.105, "step": 6981 }, { "epoch": 0.17927818192614275, "grad_norm": 0.74609375, "learning_rate": 0.0001931422946111729, "loss": 1.1338, "step": 6982 }, { "epoch": 0.17930385912206456, "grad_norm": 0.85546875, "learning_rate": 0.00019314066982310495, "loss": 1.0731, "step": 6983 }, { "epoch": 0.1793295363179864, "grad_norm": 0.78515625, "learning_rate": 0.00019313904484941606, "loss": 0.9885, "step": 6984 }, { "epoch": 0.1793552135139082, "grad_norm": 0.78515625, "learning_rate": 0.00019313741969010945, "loss": 0.9538, "step": 6985 }, { "epoch": 0.17938089070983004, "grad_norm": 0.79296875, "learning_rate": 0.0001931357943451884, "loss": 1.0677, "step": 6986 }, { "epoch": 0.17940656790575185, "grad_norm": 0.87890625, "learning_rate": 0.00019313416881465613, "loss": 1.0397, "step": 6987 }, { "epoch": 0.17943224510167366, "grad_norm": 0.8515625, "learning_rate": 0.00019313254309851586, "loss": 1.1038, "step": 6988 }, { "epoch": 0.1794579222975955, "grad_norm": 0.8046875, "learning_rate": 0.00019313091719677086, "loss": 1.0292, "step": 6989 }, { "epoch": 0.1794835994935173, "grad_norm": 0.8828125, "learning_rate": 0.00019312929110942433, "loss": 0.9131, "step": 6990 }, { "epoch": 0.17950927668943914, "grad_norm": 0.875, "learning_rate": 0.00019312766483647957, "loss": 1.1145, "step": 6991 }, { "epoch": 0.17953495388536095, "grad_norm": 0.8671875, "learning_rate": 0.00019312603837793978, "loss": 1.0569, "step": 6992 }, { "epoch": 0.17956063108128276, "grad_norm": 0.73046875, "learning_rate": 0.00019312441173380816, "loss": 0.9411, "step": 6993 }, { "epoch": 0.1795863082772046, "grad_norm": 0.8828125, "learning_rate": 0.00019312278490408804, "loss": 1.0825, "step": 6994 }, { "epoch": 0.1796119854731264, "grad_norm": 1.0703125, "learning_rate": 0.0001931211578887826, "loss": 1.0772, "step": 6995 }, { "epoch": 0.17963766266904824, "grad_norm": 0.890625, "learning_rate": 0.0001931195306878951, "loss": 1.0048, "step": 6996 }, { "epoch": 0.17966333986497005, "grad_norm": 0.85546875, "learning_rate": 0.00019311790330142883, "loss": 1.1726, "step": 6997 }, { "epoch": 0.17968901706089185, "grad_norm": 0.8203125, "learning_rate": 0.00019311627572938695, "loss": 1.0056, "step": 6998 }, { "epoch": 0.1797146942568137, "grad_norm": 0.81640625, "learning_rate": 0.00019311464797177276, "loss": 1.0246, "step": 6999 }, { "epoch": 0.1797403714527355, "grad_norm": 0.78515625, "learning_rate": 0.00019311302002858948, "loss": 0.9698, "step": 7000 }, { "epoch": 0.1797403714527355, "eval_loss": 1.0615228414535522, "eval_model_preparation_time": 0.0065, "eval_runtime": 402.9474, "eval_samples_per_second": 24.817, "eval_steps_per_second": 0.777, "step": 7000 }, { "epoch": 0.17976604864865733, "grad_norm": 0.8203125, "learning_rate": 0.00019311139189984037, "loss": 1.1696, "step": 7001 }, { "epoch": 0.17979172584457914, "grad_norm": 0.8125, "learning_rate": 0.00019310976358552867, "loss": 1.1284, "step": 7002 }, { "epoch": 0.17981740304050095, "grad_norm": 0.81640625, "learning_rate": 0.0001931081350856576, "loss": 1.1105, "step": 7003 }, { "epoch": 0.1798430802364228, "grad_norm": 0.82421875, "learning_rate": 0.00019310650640023047, "loss": 0.9855, "step": 7004 }, { "epoch": 0.1798687574323446, "grad_norm": 0.85546875, "learning_rate": 0.00019310487752925045, "loss": 1.0835, "step": 7005 }, { "epoch": 0.17989443462826643, "grad_norm": 0.859375, "learning_rate": 0.00019310324847272083, "loss": 1.1517, "step": 7006 }, { "epoch": 0.17992011182418824, "grad_norm": 0.83203125, "learning_rate": 0.00019310161923064484, "loss": 1.0414, "step": 7007 }, { "epoch": 0.17994578902011005, "grad_norm": 1.9765625, "learning_rate": 0.00019309998980302572, "loss": 1.1861, "step": 7008 }, { "epoch": 0.17997146621603188, "grad_norm": 0.875, "learning_rate": 0.00019309836018986673, "loss": 1.1194, "step": 7009 }, { "epoch": 0.1799971434119537, "grad_norm": 0.796875, "learning_rate": 0.00019309673039117112, "loss": 0.9878, "step": 7010 }, { "epoch": 0.18002282060787553, "grad_norm": 1.015625, "learning_rate": 0.00019309510040694215, "loss": 1.0351, "step": 7011 }, { "epoch": 0.18004849780379734, "grad_norm": 0.83203125, "learning_rate": 0.00019309347023718307, "loss": 1.1684, "step": 7012 }, { "epoch": 0.18007417499971914, "grad_norm": 0.828125, "learning_rate": 0.0001930918398818971, "loss": 1.0114, "step": 7013 }, { "epoch": 0.18009985219564098, "grad_norm": 0.83984375, "learning_rate": 0.00019309020934108747, "loss": 1.1173, "step": 7014 }, { "epoch": 0.1801255293915628, "grad_norm": 0.84765625, "learning_rate": 0.0001930885786147575, "loss": 1.1324, "step": 7015 }, { "epoch": 0.18015120658748462, "grad_norm": 0.83984375, "learning_rate": 0.00019308694770291037, "loss": 1.0613, "step": 7016 }, { "epoch": 0.18017688378340643, "grad_norm": 0.76953125, "learning_rate": 0.0001930853166055494, "loss": 1.0185, "step": 7017 }, { "epoch": 0.18020256097932824, "grad_norm": 0.890625, "learning_rate": 0.0001930836853226778, "loss": 1.0693, "step": 7018 }, { "epoch": 0.18022823817525008, "grad_norm": 0.83203125, "learning_rate": 0.00019308205385429878, "loss": 1.2065, "step": 7019 }, { "epoch": 0.18025391537117189, "grad_norm": 0.84375, "learning_rate": 0.00019308042220041568, "loss": 0.9957, "step": 7020 }, { "epoch": 0.18027959256709372, "grad_norm": 0.80859375, "learning_rate": 0.0001930787903610317, "loss": 0.9307, "step": 7021 }, { "epoch": 0.18030526976301553, "grad_norm": 0.8671875, "learning_rate": 0.00019307715833615007, "loss": 1.1348, "step": 7022 }, { "epoch": 0.18033094695893734, "grad_norm": 0.8828125, "learning_rate": 0.0001930755261257741, "loss": 1.1561, "step": 7023 }, { "epoch": 0.18035662415485917, "grad_norm": 1.234375, "learning_rate": 0.000193073893729907, "loss": 1.0737, "step": 7024 }, { "epoch": 0.18038230135078098, "grad_norm": 0.875, "learning_rate": 0.00019307226114855203, "loss": 1.0615, "step": 7025 }, { "epoch": 0.18040797854670282, "grad_norm": 0.84765625, "learning_rate": 0.00019307062838171243, "loss": 1.0305, "step": 7026 }, { "epoch": 0.18043365574262463, "grad_norm": 0.81640625, "learning_rate": 0.00019306899542939152, "loss": 0.9947, "step": 7027 }, { "epoch": 0.18045933293854644, "grad_norm": 0.84375, "learning_rate": 0.00019306736229159248, "loss": 1.2872, "step": 7028 }, { "epoch": 0.18048501013446827, "grad_norm": 0.8125, "learning_rate": 0.0001930657289683186, "loss": 1.035, "step": 7029 }, { "epoch": 0.18051068733039008, "grad_norm": 0.796875, "learning_rate": 0.00019306409545957314, "loss": 1.0835, "step": 7030 }, { "epoch": 0.18053636452631192, "grad_norm": 0.8125, "learning_rate": 0.00019306246176535935, "loss": 1.0925, "step": 7031 }, { "epoch": 0.18056204172223372, "grad_norm": 0.765625, "learning_rate": 0.00019306082788568047, "loss": 1.091, "step": 7032 }, { "epoch": 0.18058771891815553, "grad_norm": 0.76953125, "learning_rate": 0.00019305919382053976, "loss": 0.9454, "step": 7033 }, { "epoch": 0.18061339611407737, "grad_norm": 0.859375, "learning_rate": 0.0001930575595699405, "loss": 1.0808, "step": 7034 }, { "epoch": 0.18063907330999918, "grad_norm": 0.8125, "learning_rate": 0.0001930559251338859, "loss": 1.0616, "step": 7035 }, { "epoch": 0.180664750505921, "grad_norm": 3.703125, "learning_rate": 0.00019305429051237927, "loss": 1.0407, "step": 7036 }, { "epoch": 0.18069042770184282, "grad_norm": 0.85546875, "learning_rate": 0.00019305265570542383, "loss": 1.0811, "step": 7037 }, { "epoch": 0.18071610489776463, "grad_norm": 0.81640625, "learning_rate": 0.00019305102071302287, "loss": 1.2382, "step": 7038 }, { "epoch": 0.18074178209368646, "grad_norm": 0.90234375, "learning_rate": 0.00019304938553517964, "loss": 1.1468, "step": 7039 }, { "epoch": 0.18076745928960827, "grad_norm": 0.7890625, "learning_rate": 0.00019304775017189735, "loss": 1.0609, "step": 7040 }, { "epoch": 0.1807931364855301, "grad_norm": 0.7578125, "learning_rate": 0.00019304611462317932, "loss": 0.9621, "step": 7041 }, { "epoch": 0.18081881368145192, "grad_norm": 0.7578125, "learning_rate": 0.0001930444788890288, "loss": 0.981, "step": 7042 }, { "epoch": 0.18084449087737373, "grad_norm": 0.76171875, "learning_rate": 0.00019304284296944903, "loss": 0.9671, "step": 7043 }, { "epoch": 0.18087016807329556, "grad_norm": 0.86328125, "learning_rate": 0.00019304120686444327, "loss": 1.0816, "step": 7044 }, { "epoch": 0.18089584526921737, "grad_norm": 0.86328125, "learning_rate": 0.0001930395705740148, "loss": 1.2648, "step": 7045 }, { "epoch": 0.1809215224651392, "grad_norm": 0.796875, "learning_rate": 0.00019303793409816686, "loss": 1.1498, "step": 7046 }, { "epoch": 0.18094719966106101, "grad_norm": 0.80859375, "learning_rate": 0.00019303629743690272, "loss": 0.9724, "step": 7047 }, { "epoch": 0.18097287685698282, "grad_norm": 0.8359375, "learning_rate": 0.00019303466059022566, "loss": 1.0057, "step": 7048 }, { "epoch": 0.18099855405290466, "grad_norm": 0.7734375, "learning_rate": 0.00019303302355813893, "loss": 1.0168, "step": 7049 }, { "epoch": 0.18102423124882647, "grad_norm": 0.8203125, "learning_rate": 0.00019303138634064576, "loss": 1.0586, "step": 7050 }, { "epoch": 0.1810499084447483, "grad_norm": 0.76171875, "learning_rate": 0.00019302974893774946, "loss": 1.0247, "step": 7051 }, { "epoch": 0.1810755856406701, "grad_norm": 0.859375, "learning_rate": 0.00019302811134945324, "loss": 1.0708, "step": 7052 }, { "epoch": 0.18110126283659192, "grad_norm": 0.796875, "learning_rate": 0.00019302647357576045, "loss": 1.053, "step": 7053 }, { "epoch": 0.18112694003251376, "grad_norm": 0.83984375, "learning_rate": 0.00019302483561667427, "loss": 1.0707, "step": 7054 }, { "epoch": 0.18115261722843556, "grad_norm": 0.78515625, "learning_rate": 0.000193023197472198, "loss": 1.1359, "step": 7055 }, { "epoch": 0.1811782944243574, "grad_norm": 0.84765625, "learning_rate": 0.0001930215591423349, "loss": 1.1007, "step": 7056 }, { "epoch": 0.1812039716202792, "grad_norm": 0.88671875, "learning_rate": 0.0001930199206270882, "loss": 1.0364, "step": 7057 }, { "epoch": 0.18122964881620102, "grad_norm": 0.96484375, "learning_rate": 0.00019301828192646125, "loss": 1.2782, "step": 7058 }, { "epoch": 0.18125532601212285, "grad_norm": 0.78515625, "learning_rate": 0.0001930166430404573, "loss": 0.9833, "step": 7059 }, { "epoch": 0.18128100320804466, "grad_norm": 0.90625, "learning_rate": 0.0001930150039690795, "loss": 0.9594, "step": 7060 }, { "epoch": 0.1813066804039665, "grad_norm": 0.77734375, "learning_rate": 0.00019301336471233123, "loss": 1.0811, "step": 7061 }, { "epoch": 0.1813323575998883, "grad_norm": 0.84765625, "learning_rate": 0.00019301172527021575, "loss": 0.9756, "step": 7062 }, { "epoch": 0.1813580347958101, "grad_norm": 0.72265625, "learning_rate": 0.00019301008564273628, "loss": 1.0798, "step": 7063 }, { "epoch": 0.18138371199173195, "grad_norm": 0.890625, "learning_rate": 0.0001930084458298961, "loss": 1.1568, "step": 7064 }, { "epoch": 0.18140938918765376, "grad_norm": 0.78515625, "learning_rate": 0.0001930068058316985, "loss": 0.9642, "step": 7065 }, { "epoch": 0.1814350663835756, "grad_norm": 0.75, "learning_rate": 0.00019300516564814676, "loss": 0.951, "step": 7066 }, { "epoch": 0.1814607435794974, "grad_norm": 0.84765625, "learning_rate": 0.00019300352527924411, "loss": 1.2233, "step": 7067 }, { "epoch": 0.1814864207754192, "grad_norm": 0.78125, "learning_rate": 0.00019300188472499383, "loss": 1.073, "step": 7068 }, { "epoch": 0.18151209797134105, "grad_norm": 0.828125, "learning_rate": 0.0001930002439853992, "loss": 1.048, "step": 7069 }, { "epoch": 0.18153777516726285, "grad_norm": 0.91796875, "learning_rate": 0.0001929986030604635, "loss": 1.0955, "step": 7070 }, { "epoch": 0.1815634523631847, "grad_norm": 0.84765625, "learning_rate": 0.00019299696195018993, "loss": 1.1104, "step": 7071 }, { "epoch": 0.1815891295591065, "grad_norm": 0.81640625, "learning_rate": 0.00019299532065458186, "loss": 1.0022, "step": 7072 }, { "epoch": 0.1816148067550283, "grad_norm": 0.8984375, "learning_rate": 0.0001929936791736425, "loss": 1.2496, "step": 7073 }, { "epoch": 0.18164048395095014, "grad_norm": 0.80859375, "learning_rate": 0.00019299203750737515, "loss": 1.0139, "step": 7074 }, { "epoch": 0.18166616114687195, "grad_norm": 0.78515625, "learning_rate": 0.00019299039565578307, "loss": 1.088, "step": 7075 }, { "epoch": 0.1816918383427938, "grad_norm": 0.859375, "learning_rate": 0.00019298875361886951, "loss": 1.106, "step": 7076 }, { "epoch": 0.1817175155387156, "grad_norm": 0.8671875, "learning_rate": 0.00019298711139663778, "loss": 1.1174, "step": 7077 }, { "epoch": 0.1817431927346374, "grad_norm": 0.81640625, "learning_rate": 0.00019298546898909114, "loss": 1.0379, "step": 7078 }, { "epoch": 0.18176886993055924, "grad_norm": 0.796875, "learning_rate": 0.00019298382639623286, "loss": 0.9673, "step": 7079 }, { "epoch": 0.18179454712648105, "grad_norm": 0.80078125, "learning_rate": 0.00019298218361806622, "loss": 1.076, "step": 7080 }, { "epoch": 0.18182022432240288, "grad_norm": 0.7890625, "learning_rate": 0.00019298054065459446, "loss": 1.0146, "step": 7081 }, { "epoch": 0.1818459015183247, "grad_norm": 0.78515625, "learning_rate": 0.0001929788975058209, "loss": 1.1071, "step": 7082 }, { "epoch": 0.1818715787142465, "grad_norm": 1.1484375, "learning_rate": 0.0001929772541717488, "loss": 1.1165, "step": 7083 }, { "epoch": 0.18189725591016834, "grad_norm": 0.87109375, "learning_rate": 0.0001929756106523814, "loss": 1.0616, "step": 7084 }, { "epoch": 0.18192293310609015, "grad_norm": 0.83203125, "learning_rate": 0.00019297396694772204, "loss": 0.9986, "step": 7085 }, { "epoch": 0.18194861030201198, "grad_norm": 0.8203125, "learning_rate": 0.00019297232305777397, "loss": 1.0319, "step": 7086 }, { "epoch": 0.1819742874979338, "grad_norm": 0.84375, "learning_rate": 0.00019297067898254043, "loss": 1.0889, "step": 7087 }, { "epoch": 0.1819999646938556, "grad_norm": 0.828125, "learning_rate": 0.0001929690347220247, "loss": 1.0895, "step": 7088 }, { "epoch": 0.18202564188977743, "grad_norm": 0.87890625, "learning_rate": 0.00019296739027623017, "loss": 1.1306, "step": 7089 }, { "epoch": 0.18205131908569924, "grad_norm": 0.84765625, "learning_rate": 0.00019296574564515995, "loss": 1.0832, "step": 7090 }, { "epoch": 0.18207699628162108, "grad_norm": 0.8671875, "learning_rate": 0.00019296410082881744, "loss": 1.0668, "step": 7091 }, { "epoch": 0.1821026734775429, "grad_norm": 0.80078125, "learning_rate": 0.00019296245582720584, "loss": 1.0769, "step": 7092 }, { "epoch": 0.1821283506734647, "grad_norm": 0.8125, "learning_rate": 0.0001929608106403285, "loss": 1.0686, "step": 7093 }, { "epoch": 0.18215402786938653, "grad_norm": 0.8359375, "learning_rate": 0.00019295916526818863, "loss": 1.146, "step": 7094 }, { "epoch": 0.18217970506530834, "grad_norm": 0.80078125, "learning_rate": 0.00019295751971078954, "loss": 1.0867, "step": 7095 }, { "epoch": 0.18220538226123018, "grad_norm": 0.828125, "learning_rate": 0.00019295587396813451, "loss": 1.0328, "step": 7096 }, { "epoch": 0.18223105945715198, "grad_norm": 0.8359375, "learning_rate": 0.00019295422804022685, "loss": 0.9967, "step": 7097 }, { "epoch": 0.1822567366530738, "grad_norm": 0.77734375, "learning_rate": 0.0001929525819270698, "loss": 0.978, "step": 7098 }, { "epoch": 0.18228241384899563, "grad_norm": 0.83203125, "learning_rate": 0.00019295093562866664, "loss": 1.0472, "step": 7099 }, { "epoch": 0.18230809104491744, "grad_norm": 0.80078125, "learning_rate": 0.00019294928914502066, "loss": 1.088, "step": 7100 }, { "epoch": 0.18233376824083927, "grad_norm": 0.84765625, "learning_rate": 0.00019294764247613515, "loss": 1.1663, "step": 7101 }, { "epoch": 0.18235944543676108, "grad_norm": 0.94140625, "learning_rate": 0.00019294599562201338, "loss": 1.2472, "step": 7102 }, { "epoch": 0.1823851226326829, "grad_norm": 0.73828125, "learning_rate": 0.00019294434858265863, "loss": 1.0875, "step": 7103 }, { "epoch": 0.18241079982860472, "grad_norm": 0.80859375, "learning_rate": 0.0001929427013580742, "loss": 1.0857, "step": 7104 }, { "epoch": 0.18243647702452653, "grad_norm": 0.8359375, "learning_rate": 0.00019294105394826336, "loss": 1.12, "step": 7105 }, { "epoch": 0.18246215422044837, "grad_norm": 0.8046875, "learning_rate": 0.0001929394063532294, "loss": 1.172, "step": 7106 }, { "epoch": 0.18248783141637018, "grad_norm": 0.828125, "learning_rate": 0.00019293775857297558, "loss": 0.9966, "step": 7107 }, { "epoch": 0.18251350861229199, "grad_norm": 0.92578125, "learning_rate": 0.0001929361106075052, "loss": 1.1352, "step": 7108 }, { "epoch": 0.18253918580821382, "grad_norm": 0.76953125, "learning_rate": 0.00019293446245682158, "loss": 1.0731, "step": 7109 }, { "epoch": 0.18256486300413563, "grad_norm": 0.74609375, "learning_rate": 0.00019293281412092794, "loss": 0.9768, "step": 7110 }, { "epoch": 0.18259054020005747, "grad_norm": 0.828125, "learning_rate": 0.0001929311655998276, "loss": 1.0918, "step": 7111 }, { "epoch": 0.18261621739597927, "grad_norm": 0.84375, "learning_rate": 0.00019292951689352386, "loss": 1.2249, "step": 7112 }, { "epoch": 0.18264189459190108, "grad_norm": 0.8359375, "learning_rate": 0.00019292786800202, "loss": 1.0513, "step": 7113 }, { "epoch": 0.18266757178782292, "grad_norm": 0.83984375, "learning_rate": 0.00019292621892531923, "loss": 1.1333, "step": 7114 }, { "epoch": 0.18269324898374473, "grad_norm": 0.828125, "learning_rate": 0.00019292456966342496, "loss": 1.2115, "step": 7115 }, { "epoch": 0.18271892617966656, "grad_norm": 0.76953125, "learning_rate": 0.00019292292021634038, "loss": 0.9803, "step": 7116 }, { "epoch": 0.18274460337558837, "grad_norm": 0.7734375, "learning_rate": 0.00019292127058406883, "loss": 1.125, "step": 7117 }, { "epoch": 0.18277028057151018, "grad_norm": 0.8046875, "learning_rate": 0.0001929196207666136, "loss": 1.1373, "step": 7118 }, { "epoch": 0.18279595776743202, "grad_norm": 0.8203125, "learning_rate": 0.00019291797076397793, "loss": 1.0231, "step": 7119 }, { "epoch": 0.18282163496335382, "grad_norm": 0.8671875, "learning_rate": 0.00019291632057616512, "loss": 1.0733, "step": 7120 }, { "epoch": 0.18284731215927566, "grad_norm": 0.7734375, "learning_rate": 0.00019291467020317852, "loss": 0.982, "step": 7121 }, { "epoch": 0.18287298935519747, "grad_norm": 0.79296875, "learning_rate": 0.00019291301964502138, "loss": 1.1271, "step": 7122 }, { "epoch": 0.18289866655111928, "grad_norm": 0.82421875, "learning_rate": 0.00019291136890169693, "loss": 1.0211, "step": 7123 }, { "epoch": 0.1829243437470411, "grad_norm": 0.8125, "learning_rate": 0.00019290971797320856, "loss": 1.0397, "step": 7124 }, { "epoch": 0.18295002094296292, "grad_norm": 0.80078125, "learning_rate": 0.0001929080668595595, "loss": 1.1406, "step": 7125 }, { "epoch": 0.18297569813888473, "grad_norm": 0.86328125, "learning_rate": 0.00019290641556075306, "loss": 1.1256, "step": 7126 }, { "epoch": 0.18300137533480657, "grad_norm": 0.72265625, "learning_rate": 0.0001929047640767925, "loss": 0.9563, "step": 7127 }, { "epoch": 0.18302705253072837, "grad_norm": 0.890625, "learning_rate": 0.00019290311240768116, "loss": 0.974, "step": 7128 }, { "epoch": 0.1830527297266502, "grad_norm": 0.77734375, "learning_rate": 0.00019290146055342232, "loss": 1.0726, "step": 7129 }, { "epoch": 0.18307840692257202, "grad_norm": 0.84765625, "learning_rate": 0.00019289980851401928, "loss": 0.9987, "step": 7130 }, { "epoch": 0.18310408411849383, "grad_norm": 0.875, "learning_rate": 0.00019289815628947525, "loss": 1.0743, "step": 7131 }, { "epoch": 0.18312976131441566, "grad_norm": 0.890625, "learning_rate": 0.00019289650387979362, "loss": 1.1024, "step": 7132 }, { "epoch": 0.18315543851033747, "grad_norm": 0.77734375, "learning_rate": 0.00019289485128497766, "loss": 1.2485, "step": 7133 }, { "epoch": 0.1831811157062593, "grad_norm": 0.7890625, "learning_rate": 0.00019289319850503063, "loss": 1.1168, "step": 7134 }, { "epoch": 0.18320679290218111, "grad_norm": 0.86328125, "learning_rate": 0.00019289154553995584, "loss": 1.0531, "step": 7135 }, { "epoch": 0.18323247009810292, "grad_norm": 0.78125, "learning_rate": 0.00019288989238975664, "loss": 1.0684, "step": 7136 }, { "epoch": 0.18325814729402476, "grad_norm": 0.7109375, "learning_rate": 0.00019288823905443624, "loss": 0.9193, "step": 7137 }, { "epoch": 0.18328382448994657, "grad_norm": 0.796875, "learning_rate": 0.000192886585533998, "loss": 1.0242, "step": 7138 }, { "epoch": 0.1833095016858684, "grad_norm": 0.8046875, "learning_rate": 0.00019288493182844513, "loss": 0.9409, "step": 7139 }, { "epoch": 0.1833351788817902, "grad_norm": 0.76953125, "learning_rate": 0.000192883277937781, "loss": 0.9727, "step": 7140 }, { "epoch": 0.18336085607771202, "grad_norm": 0.796875, "learning_rate": 0.0001928816238620089, "loss": 1.0923, "step": 7141 }, { "epoch": 0.18338653327363386, "grad_norm": 0.80078125, "learning_rate": 0.00019287996960113213, "loss": 1.0756, "step": 7142 }, { "epoch": 0.18341221046955566, "grad_norm": 0.8671875, "learning_rate": 0.00019287831515515393, "loss": 1.1563, "step": 7143 }, { "epoch": 0.1834378876654775, "grad_norm": 0.90625, "learning_rate": 0.00019287666052407765, "loss": 1.0208, "step": 7144 }, { "epoch": 0.1834635648613993, "grad_norm": 0.8046875, "learning_rate": 0.0001928750057079066, "loss": 0.9609, "step": 7145 }, { "epoch": 0.18348924205732112, "grad_norm": 0.875, "learning_rate": 0.00019287335070664402, "loss": 1.0793, "step": 7146 }, { "epoch": 0.18351491925324295, "grad_norm": 0.890625, "learning_rate": 0.00019287169552029327, "loss": 1.0699, "step": 7147 }, { "epoch": 0.18354059644916476, "grad_norm": 0.8515625, "learning_rate": 0.00019287004014885762, "loss": 1.0508, "step": 7148 }, { "epoch": 0.1835662736450866, "grad_norm": 0.7890625, "learning_rate": 0.00019286838459234033, "loss": 0.949, "step": 7149 }, { "epoch": 0.1835919508410084, "grad_norm": 0.7578125, "learning_rate": 0.00019286672885074477, "loss": 0.9347, "step": 7150 }, { "epoch": 0.1836176280369302, "grad_norm": 0.94140625, "learning_rate": 0.0001928650729240742, "loss": 1.0136, "step": 7151 }, { "epoch": 0.18364330523285205, "grad_norm": 0.90234375, "learning_rate": 0.00019286341681233195, "loss": 1.1008, "step": 7152 }, { "epoch": 0.18366898242877386, "grad_norm": 0.78125, "learning_rate": 0.00019286176051552128, "loss": 1.0155, "step": 7153 }, { "epoch": 0.1836946596246957, "grad_norm": 0.80078125, "learning_rate": 0.00019286010403364552, "loss": 1.0366, "step": 7154 }, { "epoch": 0.1837203368206175, "grad_norm": 0.79296875, "learning_rate": 0.00019285844736670793, "loss": 1.0281, "step": 7155 }, { "epoch": 0.1837460140165393, "grad_norm": 0.85546875, "learning_rate": 0.00019285679051471186, "loss": 1.028, "step": 7156 }, { "epoch": 0.18377169121246115, "grad_norm": 0.921875, "learning_rate": 0.0001928551334776606, "loss": 1.0353, "step": 7157 }, { "epoch": 0.18379736840838296, "grad_norm": 0.83203125, "learning_rate": 0.00019285347625555744, "loss": 1.0957, "step": 7158 }, { "epoch": 0.1838230456043048, "grad_norm": 0.76171875, "learning_rate": 0.00019285181884840568, "loss": 1.2185, "step": 7159 }, { "epoch": 0.1838487228002266, "grad_norm": 0.8046875, "learning_rate": 0.00019285016125620866, "loss": 0.9623, "step": 7160 }, { "epoch": 0.1838743999961484, "grad_norm": 0.83203125, "learning_rate": 0.00019284850347896964, "loss": 0.9444, "step": 7161 }, { "epoch": 0.18390007719207024, "grad_norm": 0.82421875, "learning_rate": 0.00019284684551669194, "loss": 1.0395, "step": 7162 }, { "epoch": 0.18392575438799205, "grad_norm": 0.80859375, "learning_rate": 0.00019284518736937886, "loss": 1.0039, "step": 7163 }, { "epoch": 0.1839514315839139, "grad_norm": 0.8515625, "learning_rate": 0.00019284352903703371, "loss": 0.8838, "step": 7164 }, { "epoch": 0.1839771087798357, "grad_norm": 0.828125, "learning_rate": 0.0001928418705196598, "loss": 1.243, "step": 7165 }, { "epoch": 0.1840027859757575, "grad_norm": 0.80859375, "learning_rate": 0.00019284021181726043, "loss": 1.0762, "step": 7166 }, { "epoch": 0.18402846317167934, "grad_norm": 0.80859375, "learning_rate": 0.00019283855292983888, "loss": 1.049, "step": 7167 }, { "epoch": 0.18405414036760115, "grad_norm": 0.8046875, "learning_rate": 0.0001928368938573985, "loss": 0.9824, "step": 7168 }, { "epoch": 0.18407981756352298, "grad_norm": 0.9140625, "learning_rate": 0.00019283523459994256, "loss": 1.2787, "step": 7169 }, { "epoch": 0.1841054947594448, "grad_norm": 0.796875, "learning_rate": 0.0001928335751574744, "loss": 1.1944, "step": 7170 }, { "epoch": 0.1841311719553666, "grad_norm": 0.8984375, "learning_rate": 0.0001928319155299973, "loss": 1.0327, "step": 7171 }, { "epoch": 0.18415684915128844, "grad_norm": 0.83203125, "learning_rate": 0.0001928302557175146, "loss": 0.9691, "step": 7172 }, { "epoch": 0.18418252634721025, "grad_norm": 0.796875, "learning_rate": 0.00019282859572002957, "loss": 0.9827, "step": 7173 }, { "epoch": 0.18420820354313208, "grad_norm": 0.9140625, "learning_rate": 0.00019282693553754554, "loss": 1.1087, "step": 7174 }, { "epoch": 0.1842338807390539, "grad_norm": 0.85546875, "learning_rate": 0.00019282527517006578, "loss": 1.1001, "step": 7175 }, { "epoch": 0.1842595579349757, "grad_norm": 0.86328125, "learning_rate": 0.0001928236146175937, "loss": 1.16, "step": 7176 }, { "epoch": 0.18428523513089753, "grad_norm": 0.83984375, "learning_rate": 0.0001928219538801325, "loss": 1.0234, "step": 7177 }, { "epoch": 0.18431091232681934, "grad_norm": 0.953125, "learning_rate": 0.00019282029295768552, "loss": 1.2025, "step": 7178 }, { "epoch": 0.18433658952274118, "grad_norm": 0.96875, "learning_rate": 0.00019281863185025607, "loss": 1.0963, "step": 7179 }, { "epoch": 0.184362266718663, "grad_norm": 0.82421875, "learning_rate": 0.00019281697055784747, "loss": 0.9719, "step": 7180 }, { "epoch": 0.1843879439145848, "grad_norm": 0.890625, "learning_rate": 0.0001928153090804631, "loss": 1.0496, "step": 7181 }, { "epoch": 0.18441362111050663, "grad_norm": 0.796875, "learning_rate": 0.00019281364741810614, "loss": 1.023, "step": 7182 }, { "epoch": 0.18443929830642844, "grad_norm": 0.7890625, "learning_rate": 0.00019281198557077997, "loss": 1.0248, "step": 7183 }, { "epoch": 0.18446497550235028, "grad_norm": 0.8125, "learning_rate": 0.0001928103235384879, "loss": 1.0558, "step": 7184 }, { "epoch": 0.18449065269827208, "grad_norm": 0.83984375, "learning_rate": 0.00019280866132123325, "loss": 1.153, "step": 7185 }, { "epoch": 0.1845163298941939, "grad_norm": 0.8515625, "learning_rate": 0.00019280699891901932, "loss": 1.0176, "step": 7186 }, { "epoch": 0.18454200709011573, "grad_norm": 1.40625, "learning_rate": 0.00019280533633184944, "loss": 0.9979, "step": 7187 }, { "epoch": 0.18456768428603754, "grad_norm": 0.859375, "learning_rate": 0.00019280367355972686, "loss": 1.0273, "step": 7188 }, { "epoch": 0.18459336148195937, "grad_norm": 0.9609375, "learning_rate": 0.00019280201060265499, "loss": 1.0985, "step": 7189 }, { "epoch": 0.18461903867788118, "grad_norm": 0.7890625, "learning_rate": 0.00019280034746063706, "loss": 0.9411, "step": 7190 }, { "epoch": 0.184644715873803, "grad_norm": 0.8203125, "learning_rate": 0.00019279868413367646, "loss": 0.9774, "step": 7191 }, { "epoch": 0.18467039306972483, "grad_norm": 0.91796875, "learning_rate": 0.00019279702062177643, "loss": 1.0248, "step": 7192 }, { "epoch": 0.18469607026564663, "grad_norm": 0.7734375, "learning_rate": 0.00019279535692494034, "loss": 0.9225, "step": 7193 }, { "epoch": 0.18472174746156847, "grad_norm": 0.76953125, "learning_rate": 0.00019279369304317148, "loss": 0.9754, "step": 7194 }, { "epoch": 0.18474742465749028, "grad_norm": 0.80078125, "learning_rate": 0.00019279202897647316, "loss": 1.1526, "step": 7195 }, { "epoch": 0.1847731018534121, "grad_norm": 0.89453125, "learning_rate": 0.00019279036472484871, "loss": 1.1293, "step": 7196 }, { "epoch": 0.18479877904933392, "grad_norm": 0.80859375, "learning_rate": 0.00019278870028830144, "loss": 1.0922, "step": 7197 }, { "epoch": 0.18482445624525573, "grad_norm": 0.78515625, "learning_rate": 0.0001927870356668347, "loss": 0.972, "step": 7198 }, { "epoch": 0.18485013344117757, "grad_norm": 0.84765625, "learning_rate": 0.00019278537086045176, "loss": 0.9926, "step": 7199 }, { "epoch": 0.18487581063709937, "grad_norm": 0.88671875, "learning_rate": 0.00019278370586915596, "loss": 1.1193, "step": 7200 }, { "epoch": 0.18490148783302118, "grad_norm": 0.83203125, "learning_rate": 0.00019278204069295062, "loss": 1.006, "step": 7201 }, { "epoch": 0.18492716502894302, "grad_norm": 0.859375, "learning_rate": 0.00019278037533183905, "loss": 1.1176, "step": 7202 }, { "epoch": 0.18495284222486483, "grad_norm": 0.90625, "learning_rate": 0.00019277870978582457, "loss": 1.0001, "step": 7203 }, { "epoch": 0.18497851942078666, "grad_norm": 0.88671875, "learning_rate": 0.0001927770440549105, "loss": 0.9396, "step": 7204 }, { "epoch": 0.18500419661670847, "grad_norm": 0.796875, "learning_rate": 0.00019277537813910017, "loss": 1.0216, "step": 7205 }, { "epoch": 0.18502987381263028, "grad_norm": 1.265625, "learning_rate": 0.0001927737120383969, "loss": 1.2669, "step": 7206 }, { "epoch": 0.18505555100855212, "grad_norm": 0.796875, "learning_rate": 0.000192772045752804, "loss": 1.0766, "step": 7207 }, { "epoch": 0.18508122820447392, "grad_norm": 0.76953125, "learning_rate": 0.00019277037928232477, "loss": 0.9506, "step": 7208 }, { "epoch": 0.18510690540039576, "grad_norm": 0.796875, "learning_rate": 0.00019276871262696258, "loss": 1.2871, "step": 7209 }, { "epoch": 0.18513258259631757, "grad_norm": 0.90234375, "learning_rate": 0.00019276704578672068, "loss": 1.1571, "step": 7210 }, { "epoch": 0.18515825979223938, "grad_norm": 0.78515625, "learning_rate": 0.00019276537876160247, "loss": 1.0586, "step": 7211 }, { "epoch": 0.1851839369881612, "grad_norm": 0.8046875, "learning_rate": 0.00019276371155161126, "loss": 1.1069, "step": 7212 }, { "epoch": 0.18520961418408302, "grad_norm": 0.8125, "learning_rate": 0.00019276204415675033, "loss": 0.9842, "step": 7213 }, { "epoch": 0.18523529138000486, "grad_norm": 0.71875, "learning_rate": 0.00019276037657702303, "loss": 1.1145, "step": 7214 }, { "epoch": 0.18526096857592667, "grad_norm": 0.80859375, "learning_rate": 0.00019275870881243264, "loss": 1.0712, "step": 7215 }, { "epoch": 0.18528664577184847, "grad_norm": 0.82421875, "learning_rate": 0.0001927570408629826, "loss": 0.9964, "step": 7216 }, { "epoch": 0.1853123229677703, "grad_norm": 0.8046875, "learning_rate": 0.0001927553727286761, "loss": 0.983, "step": 7217 }, { "epoch": 0.18533800016369212, "grad_norm": 0.76171875, "learning_rate": 0.00019275370440951652, "loss": 1.1615, "step": 7218 }, { "epoch": 0.18536367735961395, "grad_norm": 0.7890625, "learning_rate": 0.00019275203590550722, "loss": 1.0426, "step": 7219 }, { "epoch": 0.18538935455553576, "grad_norm": 0.7421875, "learning_rate": 0.00019275036721665148, "loss": 0.9977, "step": 7220 }, { "epoch": 0.18541503175145757, "grad_norm": 0.80859375, "learning_rate": 0.0001927486983429526, "loss": 1.0675, "step": 7221 }, { "epoch": 0.1854407089473794, "grad_norm": 0.8203125, "learning_rate": 0.00019274702928441398, "loss": 1.0039, "step": 7222 }, { "epoch": 0.18546638614330122, "grad_norm": 0.84765625, "learning_rate": 0.0001927453600410389, "loss": 1.1128, "step": 7223 }, { "epoch": 0.18549206333922305, "grad_norm": 0.85546875, "learning_rate": 0.0001927436906128307, "loss": 0.891, "step": 7224 }, { "epoch": 0.18551774053514486, "grad_norm": 0.8515625, "learning_rate": 0.00019274202099979268, "loss": 0.8705, "step": 7225 }, { "epoch": 0.18554341773106667, "grad_norm": 0.83984375, "learning_rate": 0.00019274035120192822, "loss": 1.0289, "step": 7226 }, { "epoch": 0.1855690949269885, "grad_norm": 0.83984375, "learning_rate": 0.00019273868121924057, "loss": 0.9676, "step": 7227 }, { "epoch": 0.1855947721229103, "grad_norm": 0.77734375, "learning_rate": 0.00019273701105173316, "loss": 1.0815, "step": 7228 }, { "epoch": 0.18562044931883215, "grad_norm": 0.984375, "learning_rate": 0.00019273534069940923, "loss": 0.9336, "step": 7229 }, { "epoch": 0.18564612651475396, "grad_norm": 0.81640625, "learning_rate": 0.00019273367016227214, "loss": 0.9934, "step": 7230 }, { "epoch": 0.18567180371067576, "grad_norm": 0.8359375, "learning_rate": 0.00019273199944032525, "loss": 1.167, "step": 7231 }, { "epoch": 0.1856974809065976, "grad_norm": 0.81640625, "learning_rate": 0.00019273032853357185, "loss": 0.9286, "step": 7232 }, { "epoch": 0.1857231581025194, "grad_norm": 0.91015625, "learning_rate": 0.00019272865744201527, "loss": 1.0635, "step": 7233 }, { "epoch": 0.18574883529844124, "grad_norm": 0.78515625, "learning_rate": 0.00019272698616565887, "loss": 1.0363, "step": 7234 }, { "epoch": 0.18577451249436305, "grad_norm": 0.98046875, "learning_rate": 0.00019272531470450595, "loss": 1.0323, "step": 7235 }, { "epoch": 0.18580018969028486, "grad_norm": 0.81640625, "learning_rate": 0.00019272364305855986, "loss": 1.1195, "step": 7236 }, { "epoch": 0.1858258668862067, "grad_norm": 0.87109375, "learning_rate": 0.00019272197122782391, "loss": 1.18, "step": 7237 }, { "epoch": 0.1858515440821285, "grad_norm": 0.796875, "learning_rate": 0.00019272029921230146, "loss": 1.0663, "step": 7238 }, { "epoch": 0.18587722127805034, "grad_norm": 0.85546875, "learning_rate": 0.00019271862701199583, "loss": 1.0943, "step": 7239 }, { "epoch": 0.18590289847397215, "grad_norm": 0.796875, "learning_rate": 0.00019271695462691035, "loss": 1.1705, "step": 7240 }, { "epoch": 0.18592857566989396, "grad_norm": 0.796875, "learning_rate": 0.00019271528205704836, "loss": 1.1508, "step": 7241 }, { "epoch": 0.1859542528658158, "grad_norm": 0.8515625, "learning_rate": 0.00019271360930241317, "loss": 1.0598, "step": 7242 }, { "epoch": 0.1859799300617376, "grad_norm": 1.2578125, "learning_rate": 0.00019271193636300816, "loss": 0.8719, "step": 7243 }, { "epoch": 0.18600560725765944, "grad_norm": 0.84375, "learning_rate": 0.00019271026323883662, "loss": 1.1475, "step": 7244 }, { "epoch": 0.18603128445358125, "grad_norm": 0.8359375, "learning_rate": 0.0001927085899299019, "loss": 1.1726, "step": 7245 }, { "epoch": 0.18605696164950306, "grad_norm": 0.87109375, "learning_rate": 0.00019270691643620734, "loss": 1.1236, "step": 7246 }, { "epoch": 0.1860826388454249, "grad_norm": 0.86328125, "learning_rate": 0.00019270524275775624, "loss": 1.0411, "step": 7247 }, { "epoch": 0.1861083160413467, "grad_norm": 0.83203125, "learning_rate": 0.00019270356889455198, "loss": 1.1508, "step": 7248 }, { "epoch": 0.18613399323726854, "grad_norm": 0.88671875, "learning_rate": 0.0001927018948465979, "loss": 1.087, "step": 7249 }, { "epoch": 0.18615967043319034, "grad_norm": 0.7734375, "learning_rate": 0.0001927002206138973, "loss": 0.9906, "step": 7250 }, { "epoch": 0.18618534762911215, "grad_norm": 0.9453125, "learning_rate": 0.00019269854619645353, "loss": 0.9821, "step": 7251 }, { "epoch": 0.186211024825034, "grad_norm": 0.8515625, "learning_rate": 0.00019269687159426994, "loss": 0.8603, "step": 7252 }, { "epoch": 0.1862367020209558, "grad_norm": 0.83984375, "learning_rate": 0.00019269519680734987, "loss": 1.0325, "step": 7253 }, { "epoch": 0.18626237921687763, "grad_norm": 0.7734375, "learning_rate": 0.0001926935218356966, "loss": 1.0374, "step": 7254 }, { "epoch": 0.18628805641279944, "grad_norm": 0.83203125, "learning_rate": 0.0001926918466793136, "loss": 1.0827, "step": 7255 }, { "epoch": 0.18631373360872125, "grad_norm": 0.80859375, "learning_rate": 0.00019269017133820407, "loss": 1.0298, "step": 7256 }, { "epoch": 0.18633941080464309, "grad_norm": 0.765625, "learning_rate": 0.00019268849581237138, "loss": 1.1179, "step": 7257 }, { "epoch": 0.1863650880005649, "grad_norm": 0.8671875, "learning_rate": 0.0001926868201018189, "loss": 1.1323, "step": 7258 }, { "epoch": 0.18639076519648673, "grad_norm": 0.828125, "learning_rate": 0.00019268514420655, "loss": 1.0862, "step": 7259 }, { "epoch": 0.18641644239240854, "grad_norm": 0.85546875, "learning_rate": 0.00019268346812656794, "loss": 1.0203, "step": 7260 }, { "epoch": 0.18644211958833035, "grad_norm": 0.8828125, "learning_rate": 0.0001926817918618761, "loss": 1.0118, "step": 7261 }, { "epoch": 0.18646779678425218, "grad_norm": 0.81640625, "learning_rate": 0.00019268011541247787, "loss": 1.0126, "step": 7262 }, { "epoch": 0.186493473980174, "grad_norm": 0.8515625, "learning_rate": 0.00019267843877837648, "loss": 1.199, "step": 7263 }, { "epoch": 0.18651915117609583, "grad_norm": 0.75, "learning_rate": 0.00019267676195957535, "loss": 0.8663, "step": 7264 }, { "epoch": 0.18654482837201763, "grad_norm": 0.78125, "learning_rate": 0.00019267508495607782, "loss": 0.9906, "step": 7265 }, { "epoch": 0.18657050556793944, "grad_norm": 0.77734375, "learning_rate": 0.0001926734077678872, "loss": 0.9581, "step": 7266 }, { "epoch": 0.18659618276386128, "grad_norm": 0.83203125, "learning_rate": 0.00019267173039500687, "loss": 0.9714, "step": 7267 }, { "epoch": 0.1866218599597831, "grad_norm": 0.83203125, "learning_rate": 0.00019267005283744016, "loss": 0.9488, "step": 7268 }, { "epoch": 0.18664753715570492, "grad_norm": 0.81640625, "learning_rate": 0.00019266837509519035, "loss": 0.9545, "step": 7269 }, { "epoch": 0.18667321435162673, "grad_norm": 0.8828125, "learning_rate": 0.0001926666971682609, "loss": 1.1111, "step": 7270 }, { "epoch": 0.18669889154754854, "grad_norm": 0.92578125, "learning_rate": 0.00019266501905665506, "loss": 0.9965, "step": 7271 }, { "epoch": 0.18672456874347038, "grad_norm": 0.8359375, "learning_rate": 0.00019266334076037622, "loss": 1.0164, "step": 7272 }, { "epoch": 0.18675024593939218, "grad_norm": 0.79296875, "learning_rate": 0.00019266166227942773, "loss": 0.9913, "step": 7273 }, { "epoch": 0.18677592313531402, "grad_norm": 0.859375, "learning_rate": 0.00019265998361381288, "loss": 1.0392, "step": 7274 }, { "epoch": 0.18680160033123583, "grad_norm": 0.765625, "learning_rate": 0.00019265830476353508, "loss": 1.1915, "step": 7275 }, { "epoch": 0.18682727752715764, "grad_norm": 0.84375, "learning_rate": 0.0001926566257285976, "loss": 1.0255, "step": 7276 }, { "epoch": 0.18685295472307947, "grad_norm": 0.83984375, "learning_rate": 0.0001926549465090039, "loss": 1.1962, "step": 7277 }, { "epoch": 0.18687863191900128, "grad_norm": 0.859375, "learning_rate": 0.00019265326710475722, "loss": 1.1343, "step": 7278 }, { "epoch": 0.18690430911492312, "grad_norm": 0.87890625, "learning_rate": 0.00019265158751586098, "loss": 1.0692, "step": 7279 }, { "epoch": 0.18692998631084493, "grad_norm": 0.82421875, "learning_rate": 0.00019264990774231846, "loss": 1.0187, "step": 7280 }, { "epoch": 0.18695566350676673, "grad_norm": 0.80859375, "learning_rate": 0.00019264822778413304, "loss": 0.9874, "step": 7281 }, { "epoch": 0.18698134070268857, "grad_norm": 0.828125, "learning_rate": 0.00019264654764130813, "loss": 0.9966, "step": 7282 }, { "epoch": 0.18700701789861038, "grad_norm": 0.78515625, "learning_rate": 0.00019264486731384697, "loss": 1.1516, "step": 7283 }, { "epoch": 0.18703269509453221, "grad_norm": 0.78515625, "learning_rate": 0.00019264318680175296, "loss": 1.0105, "step": 7284 }, { "epoch": 0.18705837229045402, "grad_norm": 0.90234375, "learning_rate": 0.00019264150610502945, "loss": 1.0893, "step": 7285 }, { "epoch": 0.18708404948637583, "grad_norm": 0.96875, "learning_rate": 0.0001926398252236798, "loss": 1.0928, "step": 7286 }, { "epoch": 0.18710972668229767, "grad_norm": 0.80859375, "learning_rate": 0.00019263814415770733, "loss": 0.8376, "step": 7287 }, { "epoch": 0.18713540387821948, "grad_norm": 0.79296875, "learning_rate": 0.0001926364629071154, "loss": 1.041, "step": 7288 }, { "epoch": 0.1871610810741413, "grad_norm": 0.86328125, "learning_rate": 0.00019263478147190738, "loss": 1.0813, "step": 7289 }, { "epoch": 0.18718675827006312, "grad_norm": 0.84375, "learning_rate": 0.0001926330998520866, "loss": 1.0645, "step": 7290 }, { "epoch": 0.18721243546598493, "grad_norm": 0.85546875, "learning_rate": 0.00019263141804765646, "loss": 1.1034, "step": 7291 }, { "epoch": 0.18723811266190676, "grad_norm": 0.81640625, "learning_rate": 0.00019262973605862024, "loss": 1.1094, "step": 7292 }, { "epoch": 0.18726378985782857, "grad_norm": 0.90234375, "learning_rate": 0.0001926280538849813, "loss": 1.1591, "step": 7293 }, { "epoch": 0.1872894670537504, "grad_norm": 0.88671875, "learning_rate": 0.00019262637152674307, "loss": 1.1695, "step": 7294 }, { "epoch": 0.18731514424967222, "grad_norm": 0.76953125, "learning_rate": 0.00019262468898390882, "loss": 0.9694, "step": 7295 }, { "epoch": 0.18734082144559402, "grad_norm": 0.77734375, "learning_rate": 0.0001926230062564819, "loss": 1.0242, "step": 7296 }, { "epoch": 0.18736649864151586, "grad_norm": 0.8359375, "learning_rate": 0.00019262132334446573, "loss": 0.8934, "step": 7297 }, { "epoch": 0.18739217583743767, "grad_norm": 0.87890625, "learning_rate": 0.00019261964024786364, "loss": 1.0467, "step": 7298 }, { "epoch": 0.1874178530333595, "grad_norm": 0.83984375, "learning_rate": 0.00019261795696667896, "loss": 1.1557, "step": 7299 }, { "epoch": 0.1874435302292813, "grad_norm": 0.76953125, "learning_rate": 0.00019261627350091506, "loss": 0.9663, "step": 7300 }, { "epoch": 0.18746920742520312, "grad_norm": 0.8359375, "learning_rate": 0.00019261458985057525, "loss": 1.1337, "step": 7301 }, { "epoch": 0.18749488462112496, "grad_norm": 0.78125, "learning_rate": 0.00019261290601566298, "loss": 1.0442, "step": 7302 }, { "epoch": 0.18752056181704677, "grad_norm": 0.82421875, "learning_rate": 0.00019261122199618153, "loss": 0.9733, "step": 7303 }, { "epoch": 0.1875462390129686, "grad_norm": 0.7734375, "learning_rate": 0.00019260953779213428, "loss": 1.0547, "step": 7304 }, { "epoch": 0.1875719162088904, "grad_norm": 0.80078125, "learning_rate": 0.00019260785340352463, "loss": 1.0853, "step": 7305 }, { "epoch": 0.18759759340481222, "grad_norm": 0.75, "learning_rate": 0.00019260616883035584, "loss": 0.9217, "step": 7306 }, { "epoch": 0.18762327060073405, "grad_norm": 0.90234375, "learning_rate": 0.00019260448407263137, "loss": 1.0132, "step": 7307 }, { "epoch": 0.18764894779665586, "grad_norm": 0.8359375, "learning_rate": 0.00019260279913035447, "loss": 1.0158, "step": 7308 }, { "epoch": 0.1876746249925777, "grad_norm": 0.80078125, "learning_rate": 0.0001926011140035286, "loss": 1.0518, "step": 7309 }, { "epoch": 0.1877003021884995, "grad_norm": 0.8515625, "learning_rate": 0.00019259942869215703, "loss": 0.929, "step": 7310 }, { "epoch": 0.18772597938442132, "grad_norm": 0.8359375, "learning_rate": 0.0001925977431962432, "loss": 0.8723, "step": 7311 }, { "epoch": 0.18775165658034315, "grad_norm": 0.8046875, "learning_rate": 0.00019259605751579043, "loss": 1.1093, "step": 7312 }, { "epoch": 0.18777733377626496, "grad_norm": 0.9765625, "learning_rate": 0.00019259437165080207, "loss": 1.0334, "step": 7313 }, { "epoch": 0.1878030109721868, "grad_norm": 1.1328125, "learning_rate": 0.0001925926856012815, "loss": 1.0187, "step": 7314 }, { "epoch": 0.1878286881681086, "grad_norm": 0.73828125, "learning_rate": 0.0001925909993672321, "loss": 0.9141, "step": 7315 }, { "epoch": 0.1878543653640304, "grad_norm": 0.828125, "learning_rate": 0.00019258931294865715, "loss": 0.9645, "step": 7316 }, { "epoch": 0.18788004255995225, "grad_norm": 0.84765625, "learning_rate": 0.00019258762634556006, "loss": 1.1121, "step": 7317 }, { "epoch": 0.18790571975587406, "grad_norm": 0.78125, "learning_rate": 0.00019258593955794425, "loss": 1.074, "step": 7318 }, { "epoch": 0.1879313969517959, "grad_norm": 0.81640625, "learning_rate": 0.00019258425258581298, "loss": 0.9116, "step": 7319 }, { "epoch": 0.1879570741477177, "grad_norm": 0.890625, "learning_rate": 0.00019258256542916967, "loss": 1.1059, "step": 7320 }, { "epoch": 0.1879827513436395, "grad_norm": 0.77734375, "learning_rate": 0.00019258087808801766, "loss": 1.0636, "step": 7321 }, { "epoch": 0.18800842853956135, "grad_norm": 0.85546875, "learning_rate": 0.00019257919056236037, "loss": 1.1475, "step": 7322 }, { "epoch": 0.18803410573548315, "grad_norm": 0.8828125, "learning_rate": 0.00019257750285220105, "loss": 1.1447, "step": 7323 }, { "epoch": 0.188059782931405, "grad_norm": 0.82421875, "learning_rate": 0.00019257581495754319, "loss": 1.0258, "step": 7324 }, { "epoch": 0.1880854601273268, "grad_norm": 0.859375, "learning_rate": 0.00019257412687839005, "loss": 1.0755, "step": 7325 }, { "epoch": 0.1881111373232486, "grad_norm": 0.875, "learning_rate": 0.00019257243861474504, "loss": 1.1173, "step": 7326 }, { "epoch": 0.18813681451917044, "grad_norm": 0.765625, "learning_rate": 0.00019257075016661154, "loss": 0.9694, "step": 7327 }, { "epoch": 0.18816249171509225, "grad_norm": 0.7890625, "learning_rate": 0.00019256906153399287, "loss": 1.1132, "step": 7328 }, { "epoch": 0.1881881689110141, "grad_norm": 0.828125, "learning_rate": 0.00019256737271689244, "loss": 1.0062, "step": 7329 }, { "epoch": 0.1882138461069359, "grad_norm": 0.765625, "learning_rate": 0.0001925656837153136, "loss": 0.9185, "step": 7330 }, { "epoch": 0.1882395233028577, "grad_norm": 0.87109375, "learning_rate": 0.0001925639945292597, "loss": 0.9631, "step": 7331 }, { "epoch": 0.18826520049877954, "grad_norm": 0.77734375, "learning_rate": 0.00019256230515873416, "loss": 1.0779, "step": 7332 }, { "epoch": 0.18829087769470135, "grad_norm": 0.7578125, "learning_rate": 0.00019256061560374025, "loss": 1.1849, "step": 7333 }, { "epoch": 0.18831655489062316, "grad_norm": 0.94140625, "learning_rate": 0.00019255892586428144, "loss": 1.0803, "step": 7334 }, { "epoch": 0.188342232086545, "grad_norm": 0.8046875, "learning_rate": 0.00019255723594036105, "loss": 0.9991, "step": 7335 }, { "epoch": 0.1883679092824668, "grad_norm": 0.77734375, "learning_rate": 0.0001925555458319824, "loss": 1.1113, "step": 7336 }, { "epoch": 0.18839358647838864, "grad_norm": 0.79296875, "learning_rate": 0.00019255385553914895, "loss": 1.1248, "step": 7337 }, { "epoch": 0.18841926367431044, "grad_norm": 0.7890625, "learning_rate": 0.00019255216506186402, "loss": 1.1272, "step": 7338 }, { "epoch": 0.18844494087023225, "grad_norm": 0.83984375, "learning_rate": 0.00019255047440013099, "loss": 0.9631, "step": 7339 }, { "epoch": 0.1884706180661541, "grad_norm": 0.80859375, "learning_rate": 0.00019254878355395323, "loss": 1.0256, "step": 7340 }, { "epoch": 0.1884962952620759, "grad_norm": 0.74609375, "learning_rate": 0.00019254709252333407, "loss": 1.0039, "step": 7341 }, { "epoch": 0.18852197245799773, "grad_norm": 0.84765625, "learning_rate": 0.00019254540130827694, "loss": 1.1042, "step": 7342 }, { "epoch": 0.18854764965391954, "grad_norm": 0.8203125, "learning_rate": 0.00019254370990878518, "loss": 1.1243, "step": 7343 }, { "epoch": 0.18857332684984135, "grad_norm": 0.7890625, "learning_rate": 0.00019254201832486216, "loss": 0.9947, "step": 7344 }, { "epoch": 0.18859900404576319, "grad_norm": 0.83984375, "learning_rate": 0.0001925403265565113, "loss": 1.1229, "step": 7345 }, { "epoch": 0.188624681241685, "grad_norm": 0.82421875, "learning_rate": 0.00019253863460373584, "loss": 1.0673, "step": 7346 }, { "epoch": 0.18865035843760683, "grad_norm": 0.77734375, "learning_rate": 0.00019253694246653931, "loss": 1.1391, "step": 7347 }, { "epoch": 0.18867603563352864, "grad_norm": 0.80078125, "learning_rate": 0.00019253525014492498, "loss": 1.0791, "step": 7348 }, { "epoch": 0.18870171282945045, "grad_norm": 0.8046875, "learning_rate": 0.0001925335576388963, "loss": 0.9558, "step": 7349 }, { "epoch": 0.18872739002537228, "grad_norm": 0.765625, "learning_rate": 0.00019253186494845657, "loss": 0.9704, "step": 7350 }, { "epoch": 0.1887530672212941, "grad_norm": 0.80859375, "learning_rate": 0.00019253017207360916, "loss": 0.988, "step": 7351 }, { "epoch": 0.18877874441721593, "grad_norm": 0.8671875, "learning_rate": 0.0001925284790143575, "loss": 1.0431, "step": 7352 }, { "epoch": 0.18880442161313774, "grad_norm": 0.83984375, "learning_rate": 0.00019252678577070498, "loss": 1.2003, "step": 7353 }, { "epoch": 0.18883009880905954, "grad_norm": 0.85546875, "learning_rate": 0.00019252509234265488, "loss": 1.001, "step": 7354 }, { "epoch": 0.18885577600498138, "grad_norm": 0.828125, "learning_rate": 0.00019252339873021063, "loss": 1.048, "step": 7355 }, { "epoch": 0.1888814532009032, "grad_norm": 0.796875, "learning_rate": 0.0001925217049333756, "loss": 1.0897, "step": 7356 }, { "epoch": 0.18890713039682502, "grad_norm": 0.77734375, "learning_rate": 0.00019252001095215323, "loss": 1.0339, "step": 7357 }, { "epoch": 0.18893280759274683, "grad_norm": 0.9296875, "learning_rate": 0.00019251831678654679, "loss": 1.0525, "step": 7358 }, { "epoch": 0.18895848478866864, "grad_norm": 0.86328125, "learning_rate": 0.0001925166224365597, "loss": 1.0202, "step": 7359 }, { "epoch": 0.18898416198459048, "grad_norm": 0.81640625, "learning_rate": 0.00019251492790219534, "loss": 0.9823, "step": 7360 }, { "epoch": 0.18900983918051228, "grad_norm": 0.80078125, "learning_rate": 0.0001925132331834571, "loss": 0.9724, "step": 7361 }, { "epoch": 0.18903551637643412, "grad_norm": 0.890625, "learning_rate": 0.00019251153828034833, "loss": 1.0214, "step": 7362 }, { "epoch": 0.18906119357235593, "grad_norm": 0.93359375, "learning_rate": 0.00019250984319287243, "loss": 1.0783, "step": 7363 }, { "epoch": 0.18908687076827774, "grad_norm": 0.828125, "learning_rate": 0.00019250814792103274, "loss": 1.0184, "step": 7364 }, { "epoch": 0.18911254796419957, "grad_norm": 0.953125, "learning_rate": 0.0001925064524648327, "loss": 1.0069, "step": 7365 }, { "epoch": 0.18913822516012138, "grad_norm": 0.83203125, "learning_rate": 0.00019250475682427567, "loss": 1.003, "step": 7366 }, { "epoch": 0.18916390235604322, "grad_norm": 0.828125, "learning_rate": 0.000192503060999365, "loss": 1.0069, "step": 7367 }, { "epoch": 0.18918957955196503, "grad_norm": 0.90234375, "learning_rate": 0.00019250136499010404, "loss": 1.104, "step": 7368 }, { "epoch": 0.18921525674788683, "grad_norm": 0.83984375, "learning_rate": 0.00019249966879649627, "loss": 0.9509, "step": 7369 }, { "epoch": 0.18924093394380867, "grad_norm": 0.80078125, "learning_rate": 0.000192497972418545, "loss": 1.1125, "step": 7370 }, { "epoch": 0.18926661113973048, "grad_norm": 0.84765625, "learning_rate": 0.00019249627585625362, "loss": 1.0897, "step": 7371 }, { "epoch": 0.18929228833565231, "grad_norm": 1.28125, "learning_rate": 0.00019249457910962555, "loss": 1.1876, "step": 7372 }, { "epoch": 0.18931796553157412, "grad_norm": 0.828125, "learning_rate": 0.0001924928821786641, "loss": 1.0014, "step": 7373 }, { "epoch": 0.18934364272749593, "grad_norm": 0.87109375, "learning_rate": 0.0001924911850633727, "loss": 1.133, "step": 7374 }, { "epoch": 0.18936931992341777, "grad_norm": 0.93359375, "learning_rate": 0.00019248948776375474, "loss": 1.0279, "step": 7375 }, { "epoch": 0.18939499711933958, "grad_norm": 0.76171875, "learning_rate": 0.00019248779027981354, "loss": 0.8717, "step": 7376 }, { "epoch": 0.1894206743152614, "grad_norm": 0.84765625, "learning_rate": 0.00019248609261155255, "loss": 1.0225, "step": 7377 }, { "epoch": 0.18944635151118322, "grad_norm": 0.78515625, "learning_rate": 0.00019248439475897514, "loss": 1.0779, "step": 7378 }, { "epoch": 0.18947202870710503, "grad_norm": 0.84375, "learning_rate": 0.0001924826967220847, "loss": 1.1464, "step": 7379 }, { "epoch": 0.18949770590302686, "grad_norm": 1.15625, "learning_rate": 0.0001924809985008846, "loss": 0.9914, "step": 7380 }, { "epoch": 0.18952338309894867, "grad_norm": 0.71875, "learning_rate": 0.0001924793000953782, "loss": 1.0232, "step": 7381 }, { "epoch": 0.1895490602948705, "grad_norm": 0.78125, "learning_rate": 0.00019247760150556894, "loss": 1.1286, "step": 7382 }, { "epoch": 0.18957473749079232, "grad_norm": 0.90234375, "learning_rate": 0.00019247590273146012, "loss": 0.9517, "step": 7383 }, { "epoch": 0.18960041468671412, "grad_norm": 1.921875, "learning_rate": 0.00019247420377305522, "loss": 1.1183, "step": 7384 }, { "epoch": 0.18962609188263596, "grad_norm": 0.796875, "learning_rate": 0.00019247250463035758, "loss": 1.1047, "step": 7385 }, { "epoch": 0.18965176907855777, "grad_norm": 0.80078125, "learning_rate": 0.00019247080530337057, "loss": 1.0489, "step": 7386 }, { "epoch": 0.1896774462744796, "grad_norm": 0.81640625, "learning_rate": 0.00019246910579209762, "loss": 1.0127, "step": 7387 }, { "epoch": 0.1897031234704014, "grad_norm": 0.8515625, "learning_rate": 0.00019246740609654207, "loss": 0.9344, "step": 7388 }, { "epoch": 0.18972880066632322, "grad_norm": 0.8125, "learning_rate": 0.00019246570621670735, "loss": 0.9804, "step": 7389 }, { "epoch": 0.18975447786224506, "grad_norm": 0.9140625, "learning_rate": 0.00019246400615259684, "loss": 1.1161, "step": 7390 }, { "epoch": 0.18978015505816687, "grad_norm": 0.76171875, "learning_rate": 0.00019246230590421392, "loss": 0.9662, "step": 7391 }, { "epoch": 0.1898058322540887, "grad_norm": 0.76171875, "learning_rate": 0.00019246060547156195, "loss": 0.9851, "step": 7392 }, { "epoch": 0.1898315094500105, "grad_norm": 0.86328125, "learning_rate": 0.00019245890485464437, "loss": 0.9861, "step": 7393 }, { "epoch": 0.18985718664593232, "grad_norm": 0.7578125, "learning_rate": 0.00019245720405346455, "loss": 1.0263, "step": 7394 }, { "epoch": 0.18988286384185415, "grad_norm": 0.84375, "learning_rate": 0.00019245550306802583, "loss": 1.0054, "step": 7395 }, { "epoch": 0.18990854103777596, "grad_norm": 0.77734375, "learning_rate": 0.00019245380189833168, "loss": 1.032, "step": 7396 }, { "epoch": 0.1899342182336978, "grad_norm": 0.87890625, "learning_rate": 0.00019245210054438547, "loss": 1.0359, "step": 7397 }, { "epoch": 0.1899598954296196, "grad_norm": 0.9375, "learning_rate": 0.00019245039900619054, "loss": 1.1514, "step": 7398 }, { "epoch": 0.18998557262554142, "grad_norm": 0.83203125, "learning_rate": 0.00019244869728375034, "loss": 1.0537, "step": 7399 }, { "epoch": 0.19001124982146325, "grad_norm": 0.84375, "learning_rate": 0.0001924469953770682, "loss": 1.1326, "step": 7400 }, { "epoch": 0.19003692701738506, "grad_norm": 0.8984375, "learning_rate": 0.00019244529328614758, "loss": 1.1064, "step": 7401 }, { "epoch": 0.1900626042133069, "grad_norm": 0.73828125, "learning_rate": 0.00019244359101099184, "loss": 0.874, "step": 7402 }, { "epoch": 0.1900882814092287, "grad_norm": 0.875, "learning_rate": 0.00019244188855160437, "loss": 1.0421, "step": 7403 }, { "epoch": 0.1901139586051505, "grad_norm": 0.9375, "learning_rate": 0.00019244018590798857, "loss": 1.1166, "step": 7404 }, { "epoch": 0.19013963580107235, "grad_norm": 0.86328125, "learning_rate": 0.0001924384830801478, "loss": 1.1961, "step": 7405 }, { "epoch": 0.19016531299699416, "grad_norm": 0.86328125, "learning_rate": 0.00019243678006808552, "loss": 1.0177, "step": 7406 }, { "epoch": 0.190190990192916, "grad_norm": 0.78125, "learning_rate": 0.00019243507687180506, "loss": 1.1522, "step": 7407 }, { "epoch": 0.1902166673888378, "grad_norm": 0.9296875, "learning_rate": 0.00019243337349130987, "loss": 0.9353, "step": 7408 }, { "epoch": 0.1902423445847596, "grad_norm": 0.8671875, "learning_rate": 0.0001924316699266033, "loss": 1.0069, "step": 7409 }, { "epoch": 0.19026802178068145, "grad_norm": 0.80078125, "learning_rate": 0.00019242996617768876, "loss": 0.9012, "step": 7410 }, { "epoch": 0.19029369897660325, "grad_norm": 0.9140625, "learning_rate": 0.00019242826224456967, "loss": 1.0586, "step": 7411 }, { "epoch": 0.1903193761725251, "grad_norm": 0.8359375, "learning_rate": 0.00019242655812724938, "loss": 1.0621, "step": 7412 }, { "epoch": 0.1903450533684469, "grad_norm": 0.7578125, "learning_rate": 0.0001924248538257313, "loss": 1.0018, "step": 7413 }, { "epoch": 0.1903707305643687, "grad_norm": 0.87890625, "learning_rate": 0.00019242314934001886, "loss": 1.1351, "step": 7414 }, { "epoch": 0.19039640776029054, "grad_norm": 0.82421875, "learning_rate": 0.0001924214446701154, "loss": 1.0927, "step": 7415 }, { "epoch": 0.19042208495621235, "grad_norm": 0.92578125, "learning_rate": 0.00019241973981602438, "loss": 1.155, "step": 7416 }, { "epoch": 0.1904477621521342, "grad_norm": 0.828125, "learning_rate": 0.00019241803477774915, "loss": 0.9822, "step": 7417 }, { "epoch": 0.190473439348056, "grad_norm": 0.70703125, "learning_rate": 0.00019241632955529313, "loss": 1.0451, "step": 7418 }, { "epoch": 0.1904991165439778, "grad_norm": 1.015625, "learning_rate": 0.0001924146241486597, "loss": 1.179, "step": 7419 }, { "epoch": 0.19052479373989964, "grad_norm": 0.8359375, "learning_rate": 0.00019241291855785227, "loss": 1.129, "step": 7420 }, { "epoch": 0.19055047093582145, "grad_norm": 0.83984375, "learning_rate": 0.00019241121278287427, "loss": 0.9891, "step": 7421 }, { "epoch": 0.19057614813174328, "grad_norm": 0.8046875, "learning_rate": 0.00019240950682372906, "loss": 0.8937, "step": 7422 }, { "epoch": 0.1906018253276651, "grad_norm": 0.84765625, "learning_rate": 0.00019240780068042004, "loss": 1.0518, "step": 7423 }, { "epoch": 0.1906275025235869, "grad_norm": 0.9609375, "learning_rate": 0.0001924060943529506, "loss": 1.2029, "step": 7424 }, { "epoch": 0.19065317971950874, "grad_norm": 3.921875, "learning_rate": 0.00019240438784132418, "loss": 1.0961, "step": 7425 }, { "epoch": 0.19067885691543054, "grad_norm": 0.765625, "learning_rate": 0.0001924026811455442, "loss": 0.9043, "step": 7426 }, { "epoch": 0.19070453411135238, "grad_norm": 0.77734375, "learning_rate": 0.000192400974265614, "loss": 1.1205, "step": 7427 }, { "epoch": 0.1907302113072742, "grad_norm": 0.79296875, "learning_rate": 0.000192399267201537, "loss": 1.0739, "step": 7428 }, { "epoch": 0.190755888503196, "grad_norm": 0.80859375, "learning_rate": 0.0001923975599533166, "loss": 1.1429, "step": 7429 }, { "epoch": 0.19078156569911783, "grad_norm": 0.8359375, "learning_rate": 0.0001923958525209562, "loss": 1.1212, "step": 7430 }, { "epoch": 0.19080724289503964, "grad_norm": 0.796875, "learning_rate": 0.00019239414490445923, "loss": 1.2941, "step": 7431 }, { "epoch": 0.19083292009096148, "grad_norm": 0.875, "learning_rate": 0.00019239243710382907, "loss": 1.069, "step": 7432 }, { "epoch": 0.19085859728688329, "grad_norm": 0.80078125, "learning_rate": 0.00019239072911906912, "loss": 1.0045, "step": 7433 }, { "epoch": 0.1908842744828051, "grad_norm": 0.8984375, "learning_rate": 0.0001923890209501828, "loss": 1.1633, "step": 7434 }, { "epoch": 0.19090995167872693, "grad_norm": 0.80859375, "learning_rate": 0.00019238731259717351, "loss": 0.9809, "step": 7435 }, { "epoch": 0.19093562887464874, "grad_norm": 0.84765625, "learning_rate": 0.00019238560406004466, "loss": 0.9719, "step": 7436 }, { "epoch": 0.19096130607057057, "grad_norm": 0.91796875, "learning_rate": 0.00019238389533879962, "loss": 1.0556, "step": 7437 }, { "epoch": 0.19098698326649238, "grad_norm": 0.8515625, "learning_rate": 0.00019238218643344185, "loss": 1.1331, "step": 7438 }, { "epoch": 0.1910126604624142, "grad_norm": 0.80859375, "learning_rate": 0.0001923804773439747, "loss": 1.0724, "step": 7439 }, { "epoch": 0.19103833765833603, "grad_norm": 0.90234375, "learning_rate": 0.00019237876807040163, "loss": 1.1073, "step": 7440 }, { "epoch": 0.19106401485425784, "grad_norm": 0.99609375, "learning_rate": 0.00019237705861272598, "loss": 1.1212, "step": 7441 }, { "epoch": 0.19108969205017967, "grad_norm": 0.84375, "learning_rate": 0.00019237534897095123, "loss": 1.0882, "step": 7442 }, { "epoch": 0.19111536924610148, "grad_norm": 0.8671875, "learning_rate": 0.00019237363914508074, "loss": 1.1237, "step": 7443 }, { "epoch": 0.1911410464420233, "grad_norm": 1.1015625, "learning_rate": 0.00019237192913511796, "loss": 1.0441, "step": 7444 }, { "epoch": 0.19116672363794512, "grad_norm": 0.828125, "learning_rate": 0.00019237021894106623, "loss": 1.0807, "step": 7445 }, { "epoch": 0.19119240083386693, "grad_norm": 0.8515625, "learning_rate": 0.00019236850856292902, "loss": 0.9731, "step": 7446 }, { "epoch": 0.19121807802978877, "grad_norm": 0.859375, "learning_rate": 0.00019236679800070972, "loss": 0.9587, "step": 7447 }, { "epoch": 0.19124375522571058, "grad_norm": 0.859375, "learning_rate": 0.0001923650872544117, "loss": 1.0137, "step": 7448 }, { "epoch": 0.19126943242163238, "grad_norm": 0.8203125, "learning_rate": 0.00019236337632403845, "loss": 1.1476, "step": 7449 }, { "epoch": 0.19129510961755422, "grad_norm": 0.84375, "learning_rate": 0.00019236166520959332, "loss": 0.989, "step": 7450 }, { "epoch": 0.19132078681347603, "grad_norm": 0.8359375, "learning_rate": 0.00019235995391107973, "loss": 1.1055, "step": 7451 }, { "epoch": 0.19134646400939787, "grad_norm": 0.7578125, "learning_rate": 0.00019235824242850108, "loss": 0.8599, "step": 7452 }, { "epoch": 0.19137214120531967, "grad_norm": 0.82421875, "learning_rate": 0.00019235653076186079, "loss": 1.0123, "step": 7453 }, { "epoch": 0.19139781840124148, "grad_norm": 0.84375, "learning_rate": 0.00019235481891116232, "loss": 1.1529, "step": 7454 }, { "epoch": 0.19142349559716332, "grad_norm": 0.765625, "learning_rate": 0.00019235310687640902, "loss": 1.0034, "step": 7455 }, { "epoch": 0.19144917279308513, "grad_norm": 0.7890625, "learning_rate": 0.0001923513946576043, "loss": 0.9173, "step": 7456 }, { "epoch": 0.19147484998900696, "grad_norm": 0.875, "learning_rate": 0.0001923496822547516, "loss": 0.9185, "step": 7457 }, { "epoch": 0.19150052718492877, "grad_norm": 0.77734375, "learning_rate": 0.00019234796966785433, "loss": 1.0483, "step": 7458 }, { "epoch": 0.19152620438085058, "grad_norm": 0.83203125, "learning_rate": 0.0001923462568969159, "loss": 1.0307, "step": 7459 }, { "epoch": 0.19155188157677241, "grad_norm": 0.82421875, "learning_rate": 0.00019234454394193975, "loss": 1.0942, "step": 7460 }, { "epoch": 0.19157755877269422, "grad_norm": 0.83984375, "learning_rate": 0.00019234283080292924, "loss": 1.0592, "step": 7461 }, { "epoch": 0.19160323596861606, "grad_norm": 0.84375, "learning_rate": 0.00019234111747988778, "loss": 0.9467, "step": 7462 }, { "epoch": 0.19162891316453787, "grad_norm": 1.453125, "learning_rate": 0.00019233940397281885, "loss": 1.2077, "step": 7463 }, { "epoch": 0.19165459036045968, "grad_norm": 0.7890625, "learning_rate": 0.00019233769028172582, "loss": 1.0777, "step": 7464 }, { "epoch": 0.1916802675563815, "grad_norm": 0.81640625, "learning_rate": 0.0001923359764066121, "loss": 1.1108, "step": 7465 }, { "epoch": 0.19170594475230332, "grad_norm": 0.8046875, "learning_rate": 0.00019233426234748112, "loss": 1.0858, "step": 7466 }, { "epoch": 0.19173162194822516, "grad_norm": 0.765625, "learning_rate": 0.0001923325481043363, "loss": 0.9374, "step": 7467 }, { "epoch": 0.19175729914414696, "grad_norm": 0.83203125, "learning_rate": 0.0001923308336771811, "loss": 1.1393, "step": 7468 }, { "epoch": 0.19178297634006877, "grad_norm": 0.90234375, "learning_rate": 0.00019232911906601885, "loss": 0.9453, "step": 7469 }, { "epoch": 0.1918086535359906, "grad_norm": 0.8359375, "learning_rate": 0.000192327404270853, "loss": 1.0593, "step": 7470 }, { "epoch": 0.19183433073191242, "grad_norm": 0.89453125, "learning_rate": 0.00019232568929168696, "loss": 1.0635, "step": 7471 }, { "epoch": 0.19186000792783425, "grad_norm": 1.0546875, "learning_rate": 0.00019232397412852418, "loss": 1.157, "step": 7472 }, { "epoch": 0.19188568512375606, "grad_norm": 0.81640625, "learning_rate": 0.00019232225878136806, "loss": 1.0754, "step": 7473 }, { "epoch": 0.19191136231967787, "grad_norm": 0.7890625, "learning_rate": 0.00019232054325022202, "loss": 1.0596, "step": 7474 }, { "epoch": 0.1919370395155997, "grad_norm": 0.7578125, "learning_rate": 0.00019231882753508947, "loss": 0.9173, "step": 7475 }, { "epoch": 0.1919627167115215, "grad_norm": 0.875, "learning_rate": 0.00019231711163597383, "loss": 1.0152, "step": 7476 }, { "epoch": 0.19198839390744335, "grad_norm": 0.87890625, "learning_rate": 0.00019231539555287857, "loss": 1.0574, "step": 7477 }, { "epoch": 0.19201407110336516, "grad_norm": 0.82421875, "learning_rate": 0.00019231367928580702, "loss": 1.0128, "step": 7478 }, { "epoch": 0.19203974829928697, "grad_norm": 0.82421875, "learning_rate": 0.00019231196283476265, "loss": 0.9983, "step": 7479 }, { "epoch": 0.1920654254952088, "grad_norm": 0.875, "learning_rate": 0.00019231024619974888, "loss": 1.0603, "step": 7480 }, { "epoch": 0.1920911026911306, "grad_norm": 0.8125, "learning_rate": 0.00019230852938076912, "loss": 1.0774, "step": 7481 }, { "epoch": 0.19211677988705245, "grad_norm": 0.80859375, "learning_rate": 0.00019230681237782682, "loss": 1.008, "step": 7482 }, { "epoch": 0.19214245708297426, "grad_norm": 0.83984375, "learning_rate": 0.00019230509519092536, "loss": 1.0848, "step": 7483 }, { "epoch": 0.19216813427889606, "grad_norm": 0.98828125, "learning_rate": 0.0001923033778200682, "loss": 1.1044, "step": 7484 }, { "epoch": 0.1921938114748179, "grad_norm": 0.87109375, "learning_rate": 0.00019230166026525875, "loss": 1.0889, "step": 7485 }, { "epoch": 0.1922194886707397, "grad_norm": 0.87890625, "learning_rate": 0.00019229994252650042, "loss": 1.0538, "step": 7486 }, { "epoch": 0.19224516586666154, "grad_norm": 0.79296875, "learning_rate": 0.00019229822460379662, "loss": 1.089, "step": 7487 }, { "epoch": 0.19227084306258335, "grad_norm": 0.828125, "learning_rate": 0.00019229650649715084, "loss": 1.0796, "step": 7488 }, { "epoch": 0.19229652025850516, "grad_norm": 0.83984375, "learning_rate": 0.00019229478820656642, "loss": 1.3061, "step": 7489 }, { "epoch": 0.192322197454427, "grad_norm": 0.8046875, "learning_rate": 0.0001922930697320468, "loss": 0.9724, "step": 7490 }, { "epoch": 0.1923478746503488, "grad_norm": 0.90234375, "learning_rate": 0.0001922913510735955, "loss": 1.1723, "step": 7491 }, { "epoch": 0.19237355184627064, "grad_norm": 0.890625, "learning_rate": 0.00019228963223121587, "loss": 1.1512, "step": 7492 }, { "epoch": 0.19239922904219245, "grad_norm": 0.6796875, "learning_rate": 0.0001922879132049113, "loss": 0.9012, "step": 7493 }, { "epoch": 0.19242490623811426, "grad_norm": 0.78125, "learning_rate": 0.00019228619399468526, "loss": 1.0602, "step": 7494 }, { "epoch": 0.1924505834340361, "grad_norm": 0.8828125, "learning_rate": 0.00019228447460054116, "loss": 1.1031, "step": 7495 }, { "epoch": 0.1924762606299579, "grad_norm": 0.9609375, "learning_rate": 0.00019228275502248247, "loss": 1.1691, "step": 7496 }, { "epoch": 0.19250193782587974, "grad_norm": 0.83203125, "learning_rate": 0.00019228103526051257, "loss": 1.0169, "step": 7497 }, { "epoch": 0.19252761502180155, "grad_norm": 0.82421875, "learning_rate": 0.0001922793153146349, "loss": 0.9026, "step": 7498 }, { "epoch": 0.19255329221772335, "grad_norm": 0.76953125, "learning_rate": 0.00019227759518485288, "loss": 0.9215, "step": 7499 }, { "epoch": 0.1925789694136452, "grad_norm": 0.8671875, "learning_rate": 0.00019227587487116997, "loss": 1.0534, "step": 7500 }, { "epoch": 0.192604646609567, "grad_norm": 0.765625, "learning_rate": 0.00019227415437358958, "loss": 0.9593, "step": 7501 }, { "epoch": 0.19263032380548883, "grad_norm": 0.76953125, "learning_rate": 0.00019227243369211513, "loss": 1.0144, "step": 7502 }, { "epoch": 0.19265600100141064, "grad_norm": 0.79296875, "learning_rate": 0.00019227071282675002, "loss": 1.2012, "step": 7503 }, { "epoch": 0.19268167819733245, "grad_norm": 0.78515625, "learning_rate": 0.00019226899177749774, "loss": 1.0291, "step": 7504 }, { "epoch": 0.1927073553932543, "grad_norm": 0.8046875, "learning_rate": 0.00019226727054436171, "loss": 1.0438, "step": 7505 }, { "epoch": 0.1927330325891761, "grad_norm": 0.796875, "learning_rate": 0.00019226554912734532, "loss": 1.0195, "step": 7506 }, { "epoch": 0.19275870978509793, "grad_norm": 0.89453125, "learning_rate": 0.00019226382752645204, "loss": 1.0862, "step": 7507 }, { "epoch": 0.19278438698101974, "grad_norm": 0.84765625, "learning_rate": 0.00019226210574168527, "loss": 1.127, "step": 7508 }, { "epoch": 0.19281006417694155, "grad_norm": 0.75390625, "learning_rate": 0.00019226038377304848, "loss": 1.0158, "step": 7509 }, { "epoch": 0.19283574137286338, "grad_norm": 0.8125, "learning_rate": 0.00019225866162054505, "loss": 0.9793, "step": 7510 }, { "epoch": 0.1928614185687852, "grad_norm": 0.79296875, "learning_rate": 0.00019225693928417848, "loss": 1.0821, "step": 7511 }, { "epoch": 0.19288709576470703, "grad_norm": 0.91796875, "learning_rate": 0.00019225521676395215, "loss": 1.2316, "step": 7512 }, { "epoch": 0.19291277296062884, "grad_norm": 0.82421875, "learning_rate": 0.0001922534940598695, "loss": 1.1652, "step": 7513 }, { "epoch": 0.19293845015655064, "grad_norm": 0.7890625, "learning_rate": 0.00019225177117193395, "loss": 0.992, "step": 7514 }, { "epoch": 0.19296412735247248, "grad_norm": 0.83203125, "learning_rate": 0.00019225004810014898, "loss": 0.9657, "step": 7515 }, { "epoch": 0.1929898045483943, "grad_norm": 0.84375, "learning_rate": 0.000192248324844518, "loss": 1.044, "step": 7516 }, { "epoch": 0.19301548174431613, "grad_norm": 0.796875, "learning_rate": 0.0001922466014050444, "loss": 0.9417, "step": 7517 }, { "epoch": 0.19304115894023793, "grad_norm": 0.77734375, "learning_rate": 0.0001922448777817317, "loss": 0.9535, "step": 7518 }, { "epoch": 0.19306683613615974, "grad_norm": 0.8671875, "learning_rate": 0.00019224315397458328, "loss": 1.2015, "step": 7519 }, { "epoch": 0.19309251333208158, "grad_norm": 0.9296875, "learning_rate": 0.00019224142998360257, "loss": 1.0224, "step": 7520 }, { "epoch": 0.1931181905280034, "grad_norm": 0.85546875, "learning_rate": 0.00019223970580879306, "loss": 1.0589, "step": 7521 }, { "epoch": 0.19314386772392522, "grad_norm": 0.76953125, "learning_rate": 0.00019223798145015812, "loss": 0.9402, "step": 7522 }, { "epoch": 0.19316954491984703, "grad_norm": 0.81640625, "learning_rate": 0.0001922362569077012, "loss": 0.9866, "step": 7523 }, { "epoch": 0.19319522211576884, "grad_norm": 0.81640625, "learning_rate": 0.00019223453218142578, "loss": 1.1004, "step": 7524 }, { "epoch": 0.19322089931169067, "grad_norm": 0.8203125, "learning_rate": 0.00019223280727133525, "loss": 1.1023, "step": 7525 }, { "epoch": 0.19324657650761248, "grad_norm": 0.83203125, "learning_rate": 0.00019223108217743308, "loss": 1.1153, "step": 7526 }, { "epoch": 0.19327225370353432, "grad_norm": 0.8671875, "learning_rate": 0.0001922293568997227, "loss": 1.1758, "step": 7527 }, { "epoch": 0.19329793089945613, "grad_norm": 0.87890625, "learning_rate": 0.00019222763143820752, "loss": 1.2769, "step": 7528 }, { "epoch": 0.19332360809537794, "grad_norm": 0.78515625, "learning_rate": 0.00019222590579289102, "loss": 0.9506, "step": 7529 }, { "epoch": 0.19334928529129977, "grad_norm": 0.83203125, "learning_rate": 0.0001922241799637766, "loss": 1.089, "step": 7530 }, { "epoch": 0.19337496248722158, "grad_norm": 1.0625, "learning_rate": 0.00019222245395086776, "loss": 1.0955, "step": 7531 }, { "epoch": 0.19340063968314342, "grad_norm": 0.79296875, "learning_rate": 0.00019222072775416785, "loss": 1.1937, "step": 7532 }, { "epoch": 0.19342631687906522, "grad_norm": 0.85546875, "learning_rate": 0.0001922190013736804, "loss": 1.1282, "step": 7533 }, { "epoch": 0.19345199407498703, "grad_norm": 0.79296875, "learning_rate": 0.0001922172748094088, "loss": 1.0045, "step": 7534 }, { "epoch": 0.19347767127090887, "grad_norm": 0.8046875, "learning_rate": 0.0001922155480613565, "loss": 1.0708, "step": 7535 }, { "epoch": 0.19350334846683068, "grad_norm": 0.79296875, "learning_rate": 0.00019221382112952692, "loss": 1.0665, "step": 7536 }, { "epoch": 0.1935290256627525, "grad_norm": 0.80859375, "learning_rate": 0.00019221209401392355, "loss": 1.1013, "step": 7537 }, { "epoch": 0.19355470285867432, "grad_norm": 0.8203125, "learning_rate": 0.0001922103667145498, "loss": 1.1453, "step": 7538 }, { "epoch": 0.19358038005459613, "grad_norm": 0.8046875, "learning_rate": 0.00019220863923140912, "loss": 0.9894, "step": 7539 }, { "epoch": 0.19360605725051797, "grad_norm": 0.9140625, "learning_rate": 0.00019220691156450495, "loss": 1.0909, "step": 7540 }, { "epoch": 0.19363173444643977, "grad_norm": 0.86328125, "learning_rate": 0.00019220518371384072, "loss": 1.2175, "step": 7541 }, { "epoch": 0.19365741164236158, "grad_norm": 0.875, "learning_rate": 0.0001922034556794199, "loss": 1.0685, "step": 7542 }, { "epoch": 0.19368308883828342, "grad_norm": 0.87109375, "learning_rate": 0.00019220172746124594, "loss": 1.0722, "step": 7543 }, { "epoch": 0.19370876603420523, "grad_norm": 0.76171875, "learning_rate": 0.00019219999905932222, "loss": 1.032, "step": 7544 }, { "epoch": 0.19373444323012706, "grad_norm": 0.828125, "learning_rate": 0.00019219827047365227, "loss": 1.1192, "step": 7545 }, { "epoch": 0.19376012042604887, "grad_norm": 0.890625, "learning_rate": 0.0001921965417042395, "loss": 0.9928, "step": 7546 }, { "epoch": 0.19378579762197068, "grad_norm": 0.8359375, "learning_rate": 0.0001921948127510873, "loss": 1.0096, "step": 7547 }, { "epoch": 0.19381147481789252, "grad_norm": 0.8046875, "learning_rate": 0.0001921930836141992, "loss": 1.0563, "step": 7548 }, { "epoch": 0.19383715201381432, "grad_norm": 0.83984375, "learning_rate": 0.0001921913542935786, "loss": 0.9704, "step": 7549 }, { "epoch": 0.19386282920973616, "grad_norm": 0.828125, "learning_rate": 0.000192189624789229, "loss": 1.1676, "step": 7550 }, { "epoch": 0.19388850640565797, "grad_norm": 0.89453125, "learning_rate": 0.00019218789510115376, "loss": 0.9635, "step": 7551 }, { "epoch": 0.19391418360157978, "grad_norm": 0.828125, "learning_rate": 0.0001921861652293564, "loss": 1.1194, "step": 7552 }, { "epoch": 0.1939398607975016, "grad_norm": 0.765625, "learning_rate": 0.00019218443517384032, "loss": 0.994, "step": 7553 }, { "epoch": 0.19396553799342342, "grad_norm": 0.796875, "learning_rate": 0.000192182704934609, "loss": 1.0627, "step": 7554 }, { "epoch": 0.19399121518934526, "grad_norm": 0.828125, "learning_rate": 0.00019218097451166588, "loss": 1.138, "step": 7555 }, { "epoch": 0.19401689238526706, "grad_norm": 0.79296875, "learning_rate": 0.00019217924390501438, "loss": 1.032, "step": 7556 }, { "epoch": 0.19404256958118887, "grad_norm": 0.78125, "learning_rate": 0.000192177513114658, "loss": 1.0214, "step": 7557 }, { "epoch": 0.1940682467771107, "grad_norm": 0.8046875, "learning_rate": 0.00019217578214060014, "loss": 1.1026, "step": 7558 }, { "epoch": 0.19409392397303252, "grad_norm": 0.84375, "learning_rate": 0.00019217405098284428, "loss": 1.0291, "step": 7559 }, { "epoch": 0.19411960116895435, "grad_norm": 0.8125, "learning_rate": 0.00019217231964139387, "loss": 1.0473, "step": 7560 }, { "epoch": 0.19414527836487616, "grad_norm": 0.84375, "learning_rate": 0.00019217058811625236, "loss": 1.0014, "step": 7561 }, { "epoch": 0.19417095556079797, "grad_norm": 0.84375, "learning_rate": 0.0001921688564074232, "loss": 1.0568, "step": 7562 }, { "epoch": 0.1941966327567198, "grad_norm": 0.77734375, "learning_rate": 0.00019216712451490978, "loss": 1.0506, "step": 7563 }, { "epoch": 0.19422230995264161, "grad_norm": 0.828125, "learning_rate": 0.00019216539243871567, "loss": 1.0603, "step": 7564 }, { "epoch": 0.19424798714856345, "grad_norm": 0.71484375, "learning_rate": 0.0001921636601788442, "loss": 0.915, "step": 7565 }, { "epoch": 0.19427366434448526, "grad_norm": 1.0234375, "learning_rate": 0.00019216192773529892, "loss": 1.0717, "step": 7566 }, { "epoch": 0.19429934154040707, "grad_norm": 0.75, "learning_rate": 0.00019216019510808326, "loss": 1.0453, "step": 7567 }, { "epoch": 0.1943250187363289, "grad_norm": 0.84765625, "learning_rate": 0.00019215846229720063, "loss": 1.0035, "step": 7568 }, { "epoch": 0.1943506959322507, "grad_norm": 0.91015625, "learning_rate": 0.0001921567293026545, "loss": 1.0443, "step": 7569 }, { "epoch": 0.19437637312817255, "grad_norm": 0.8203125, "learning_rate": 0.00019215499612444835, "loss": 1.0909, "step": 7570 }, { "epoch": 0.19440205032409436, "grad_norm": 0.8359375, "learning_rate": 0.00019215326276258562, "loss": 0.9698, "step": 7571 }, { "epoch": 0.19442772752001616, "grad_norm": 0.87890625, "learning_rate": 0.00019215152921706974, "loss": 0.99, "step": 7572 }, { "epoch": 0.194453404715938, "grad_norm": 0.90625, "learning_rate": 0.0001921497954879042, "loss": 1.149, "step": 7573 }, { "epoch": 0.1944790819118598, "grad_norm": 0.890625, "learning_rate": 0.00019214806157509244, "loss": 1.0165, "step": 7574 }, { "epoch": 0.19450475910778164, "grad_norm": 0.77734375, "learning_rate": 0.00019214632747863793, "loss": 0.9598, "step": 7575 }, { "epoch": 0.19453043630370345, "grad_norm": 0.80078125, "learning_rate": 0.00019214459319854407, "loss": 0.9942, "step": 7576 }, { "epoch": 0.19455611349962526, "grad_norm": 0.8125, "learning_rate": 0.00019214285873481442, "loss": 1.0083, "step": 7577 }, { "epoch": 0.1945817906955471, "grad_norm": 0.90234375, "learning_rate": 0.0001921411240874523, "loss": 1.1178, "step": 7578 }, { "epoch": 0.1946074678914689, "grad_norm": 0.796875, "learning_rate": 0.00019213938925646132, "loss": 0.9725, "step": 7579 }, { "epoch": 0.19463314508739074, "grad_norm": 0.8125, "learning_rate": 0.0001921376542418448, "loss": 0.9906, "step": 7580 }, { "epoch": 0.19465882228331255, "grad_norm": 0.8828125, "learning_rate": 0.00019213591904360632, "loss": 1.1178, "step": 7581 }, { "epoch": 0.19468449947923436, "grad_norm": 1.0234375, "learning_rate": 0.00019213418366174922, "loss": 1.0544, "step": 7582 }, { "epoch": 0.1947101766751562, "grad_norm": 0.81640625, "learning_rate": 0.00019213244809627704, "loss": 1.1267, "step": 7583 }, { "epoch": 0.194735853871078, "grad_norm": 0.8125, "learning_rate": 0.0001921307123471932, "loss": 1.0107, "step": 7584 }, { "epoch": 0.19476153106699984, "grad_norm": 0.77734375, "learning_rate": 0.0001921289764145012, "loss": 1.0085, "step": 7585 }, { "epoch": 0.19478720826292165, "grad_norm": 0.84375, "learning_rate": 0.00019212724029820443, "loss": 0.9379, "step": 7586 }, { "epoch": 0.19481288545884345, "grad_norm": 0.84375, "learning_rate": 0.00019212550399830644, "loss": 0.9993, "step": 7587 }, { "epoch": 0.1948385626547653, "grad_norm": 0.83984375, "learning_rate": 0.0001921237675148106, "loss": 1.1011, "step": 7588 }, { "epoch": 0.1948642398506871, "grad_norm": 0.74609375, "learning_rate": 0.00019212203084772043, "loss": 1.0272, "step": 7589 }, { "epoch": 0.19488991704660893, "grad_norm": 0.859375, "learning_rate": 0.00019212029399703936, "loss": 0.9974, "step": 7590 }, { "epoch": 0.19491559424253074, "grad_norm": 1.2734375, "learning_rate": 0.0001921185569627709, "loss": 1.0642, "step": 7591 }, { "epoch": 0.19494127143845255, "grad_norm": 0.8046875, "learning_rate": 0.00019211681974491845, "loss": 1.1888, "step": 7592 }, { "epoch": 0.1949669486343744, "grad_norm": 1.015625, "learning_rate": 0.0001921150823434855, "loss": 1.1542, "step": 7593 }, { "epoch": 0.1949926258302962, "grad_norm": 0.84765625, "learning_rate": 0.00019211334475847553, "loss": 1.1031, "step": 7594 }, { "epoch": 0.19501830302621803, "grad_norm": 0.84375, "learning_rate": 0.00019211160698989196, "loss": 0.9894, "step": 7595 }, { "epoch": 0.19504398022213984, "grad_norm": 0.81640625, "learning_rate": 0.0001921098690377383, "loss": 1.1069, "step": 7596 }, { "epoch": 0.19506965741806165, "grad_norm": 0.828125, "learning_rate": 0.00019210813090201798, "loss": 0.9711, "step": 7597 }, { "epoch": 0.19509533461398348, "grad_norm": 0.74609375, "learning_rate": 0.0001921063925827345, "loss": 1.0334, "step": 7598 }, { "epoch": 0.1951210118099053, "grad_norm": 0.80078125, "learning_rate": 0.00019210465407989123, "loss": 0.9441, "step": 7599 }, { "epoch": 0.19514668900582713, "grad_norm": 0.8203125, "learning_rate": 0.00019210291539349176, "loss": 1.0081, "step": 7600 }, { "epoch": 0.19517236620174894, "grad_norm": 0.8359375, "learning_rate": 0.0001921011765235395, "loss": 0.9565, "step": 7601 }, { "epoch": 0.19519804339767075, "grad_norm": 0.8046875, "learning_rate": 0.0001920994374700379, "loss": 1.0724, "step": 7602 }, { "epoch": 0.19522372059359258, "grad_norm": 0.83203125, "learning_rate": 0.00019209769823299043, "loss": 1.1451, "step": 7603 }, { "epoch": 0.1952493977895144, "grad_norm": 0.83203125, "learning_rate": 0.00019209595881240058, "loss": 1.0401, "step": 7604 }, { "epoch": 0.19527507498543623, "grad_norm": 0.80859375, "learning_rate": 0.0001920942192082718, "loss": 1.0445, "step": 7605 }, { "epoch": 0.19530075218135803, "grad_norm": 0.8515625, "learning_rate": 0.0001920924794206076, "loss": 1.0629, "step": 7606 }, { "epoch": 0.19532642937727984, "grad_norm": 0.80078125, "learning_rate": 0.00019209073944941136, "loss": 0.9773, "step": 7607 }, { "epoch": 0.19535210657320168, "grad_norm": 0.94140625, "learning_rate": 0.0001920889992946866, "loss": 1.0073, "step": 7608 }, { "epoch": 0.1953777837691235, "grad_norm": 0.83203125, "learning_rate": 0.00019208725895643677, "loss": 0.9887, "step": 7609 }, { "epoch": 0.19540346096504532, "grad_norm": 0.83984375, "learning_rate": 0.00019208551843466538, "loss": 1.0415, "step": 7610 }, { "epoch": 0.19542913816096713, "grad_norm": 0.8046875, "learning_rate": 0.00019208377772937584, "loss": 0.9429, "step": 7611 }, { "epoch": 0.19545481535688894, "grad_norm": 1.1875, "learning_rate": 0.00019208203684057169, "loss": 0.9213, "step": 7612 }, { "epoch": 0.19548049255281078, "grad_norm": 0.83203125, "learning_rate": 0.0001920802957682563, "loss": 1.0642, "step": 7613 }, { "epoch": 0.19550616974873258, "grad_norm": 0.8125, "learning_rate": 0.00019207855451243327, "loss": 1.1378, "step": 7614 }, { "epoch": 0.19553184694465442, "grad_norm": 0.765625, "learning_rate": 0.00019207681307310598, "loss": 0.9514, "step": 7615 }, { "epoch": 0.19555752414057623, "grad_norm": 0.85546875, "learning_rate": 0.00019207507145027787, "loss": 1.0506, "step": 7616 }, { "epoch": 0.19558320133649804, "grad_norm": 0.7734375, "learning_rate": 0.0001920733296439525, "loss": 0.9173, "step": 7617 }, { "epoch": 0.19560887853241987, "grad_norm": 0.72265625, "learning_rate": 0.0001920715876541333, "loss": 0.9706, "step": 7618 }, { "epoch": 0.19563455572834168, "grad_norm": 0.87109375, "learning_rate": 0.0001920698454808237, "loss": 1.1007, "step": 7619 }, { "epoch": 0.19566023292426352, "grad_norm": 0.8359375, "learning_rate": 0.00019206810312402727, "loss": 1.0734, "step": 7620 }, { "epoch": 0.19568591012018532, "grad_norm": 0.796875, "learning_rate": 0.00019206636058374742, "loss": 1.1431, "step": 7621 }, { "epoch": 0.19571158731610713, "grad_norm": 0.7578125, "learning_rate": 0.0001920646178599876, "loss": 0.9817, "step": 7622 }, { "epoch": 0.19573726451202897, "grad_norm": 0.765625, "learning_rate": 0.00019206287495275132, "loss": 1.1095, "step": 7623 }, { "epoch": 0.19576294170795078, "grad_norm": 0.77734375, "learning_rate": 0.00019206113186204208, "loss": 1.1722, "step": 7624 }, { "epoch": 0.1957886189038726, "grad_norm": 1.1328125, "learning_rate": 0.0001920593885878633, "loss": 1.0129, "step": 7625 }, { "epoch": 0.19581429609979442, "grad_norm": 0.8828125, "learning_rate": 0.00019205764513021846, "loss": 1.0865, "step": 7626 }, { "epoch": 0.19583997329571623, "grad_norm": 0.8203125, "learning_rate": 0.00019205590148911107, "loss": 0.9872, "step": 7627 }, { "epoch": 0.19586565049163807, "grad_norm": 0.8203125, "learning_rate": 0.00019205415766454455, "loss": 1.0414, "step": 7628 }, { "epoch": 0.19589132768755987, "grad_norm": 0.8515625, "learning_rate": 0.00019205241365652246, "loss": 1.1834, "step": 7629 }, { "epoch": 0.1959170048834817, "grad_norm": 0.80859375, "learning_rate": 0.00019205066946504818, "loss": 1.2094, "step": 7630 }, { "epoch": 0.19594268207940352, "grad_norm": 0.78125, "learning_rate": 0.00019204892509012522, "loss": 0.9233, "step": 7631 }, { "epoch": 0.19596835927532533, "grad_norm": 0.8203125, "learning_rate": 0.0001920471805317571, "loss": 1.249, "step": 7632 }, { "epoch": 0.19599403647124716, "grad_norm": 0.75390625, "learning_rate": 0.00019204543578994727, "loss": 0.9657, "step": 7633 }, { "epoch": 0.19601971366716897, "grad_norm": 0.76953125, "learning_rate": 0.00019204369086469917, "loss": 1.0467, "step": 7634 }, { "epoch": 0.1960453908630908, "grad_norm": 0.8125, "learning_rate": 0.00019204194575601636, "loss": 1.2782, "step": 7635 }, { "epoch": 0.19607106805901262, "grad_norm": 0.84765625, "learning_rate": 0.00019204020046390222, "loss": 1.1342, "step": 7636 }, { "epoch": 0.19609674525493442, "grad_norm": 0.79296875, "learning_rate": 0.00019203845498836025, "loss": 1.0114, "step": 7637 }, { "epoch": 0.19612242245085626, "grad_norm": 0.78125, "learning_rate": 0.00019203670932939403, "loss": 1.0036, "step": 7638 }, { "epoch": 0.19614809964677807, "grad_norm": 0.83984375, "learning_rate": 0.0001920349634870069, "loss": 1.0056, "step": 7639 }, { "epoch": 0.1961737768426999, "grad_norm": 0.87109375, "learning_rate": 0.00019203321746120243, "loss": 1.0721, "step": 7640 }, { "epoch": 0.1961994540386217, "grad_norm": 0.7578125, "learning_rate": 0.00019203147125198406, "loss": 0.9489, "step": 7641 }, { "epoch": 0.19622513123454352, "grad_norm": 0.8515625, "learning_rate": 0.0001920297248593553, "loss": 0.9313, "step": 7642 }, { "epoch": 0.19625080843046536, "grad_norm": 0.82421875, "learning_rate": 0.0001920279782833196, "loss": 1.0216, "step": 7643 }, { "epoch": 0.19627648562638716, "grad_norm": 0.83984375, "learning_rate": 0.00019202623152388042, "loss": 1.1178, "step": 7644 }, { "epoch": 0.196302162822309, "grad_norm": 0.79296875, "learning_rate": 0.0001920244845810413, "loss": 1.0677, "step": 7645 }, { "epoch": 0.1963278400182308, "grad_norm": 0.90625, "learning_rate": 0.00019202273745480572, "loss": 1.1172, "step": 7646 }, { "epoch": 0.19635351721415262, "grad_norm": 0.7734375, "learning_rate": 0.0001920209901451771, "loss": 0.9195, "step": 7647 }, { "epoch": 0.19637919441007445, "grad_norm": 0.77734375, "learning_rate": 0.00019201924265215898, "loss": 1.0706, "step": 7648 }, { "epoch": 0.19640487160599626, "grad_norm": 0.86328125, "learning_rate": 0.0001920174949757548, "loss": 1.0856, "step": 7649 }, { "epoch": 0.1964305488019181, "grad_norm": 0.78515625, "learning_rate": 0.00019201574711596807, "loss": 0.9847, "step": 7650 }, { "epoch": 0.1964562259978399, "grad_norm": 0.8125, "learning_rate": 0.0001920139990728023, "loss": 1.1923, "step": 7651 }, { "epoch": 0.19648190319376171, "grad_norm": 0.890625, "learning_rate": 0.00019201225084626092, "loss": 1.0995, "step": 7652 }, { "epoch": 0.19650758038968355, "grad_norm": 0.76953125, "learning_rate": 0.0001920105024363474, "loss": 1.1649, "step": 7653 }, { "epoch": 0.19653325758560536, "grad_norm": 0.7734375, "learning_rate": 0.0001920087538430653, "loss": 1.0038, "step": 7654 }, { "epoch": 0.1965589347815272, "grad_norm": 0.83203125, "learning_rate": 0.00019200700506641808, "loss": 1.004, "step": 7655 }, { "epoch": 0.196584611977449, "grad_norm": 0.84765625, "learning_rate": 0.0001920052561064092, "loss": 1.0871, "step": 7656 }, { "epoch": 0.1966102891733708, "grad_norm": 0.85546875, "learning_rate": 0.0001920035069630421, "loss": 1.0398, "step": 7657 }, { "epoch": 0.19663596636929265, "grad_norm": 0.94921875, "learning_rate": 0.00019200175763632037, "loss": 0.986, "step": 7658 }, { "epoch": 0.19666164356521446, "grad_norm": 0.875, "learning_rate": 0.00019200000812624746, "loss": 1.0629, "step": 7659 }, { "epoch": 0.1966873207611363, "grad_norm": 0.875, "learning_rate": 0.00019199825843282682, "loss": 1.0544, "step": 7660 }, { "epoch": 0.1967129979570581, "grad_norm": 0.86328125, "learning_rate": 0.00019199650855606195, "loss": 0.9222, "step": 7661 }, { "epoch": 0.1967386751529799, "grad_norm": 0.80078125, "learning_rate": 0.00019199475849595636, "loss": 1.0001, "step": 7662 }, { "epoch": 0.19676435234890174, "grad_norm": 0.78515625, "learning_rate": 0.00019199300825251353, "loss": 1.0623, "step": 7663 }, { "epoch": 0.19679002954482355, "grad_norm": 0.91015625, "learning_rate": 0.00019199125782573697, "loss": 1.0394, "step": 7664 }, { "epoch": 0.1968157067407454, "grad_norm": 0.859375, "learning_rate": 0.0001919895072156301, "loss": 1.0688, "step": 7665 }, { "epoch": 0.1968413839366672, "grad_norm": 0.84375, "learning_rate": 0.0001919877564221965, "loss": 1.1134, "step": 7666 }, { "epoch": 0.196867061132589, "grad_norm": 0.8046875, "learning_rate": 0.00019198600544543956, "loss": 1.0966, "step": 7667 }, { "epoch": 0.19689273832851084, "grad_norm": 0.80078125, "learning_rate": 0.00019198425428536284, "loss": 0.9432, "step": 7668 }, { "epoch": 0.19691841552443265, "grad_norm": 0.7890625, "learning_rate": 0.00019198250294196983, "loss": 0.9428, "step": 7669 }, { "epoch": 0.19694409272035449, "grad_norm": 0.8046875, "learning_rate": 0.00019198075141526398, "loss": 1.1586, "step": 7670 }, { "epoch": 0.1969697699162763, "grad_norm": 0.87890625, "learning_rate": 0.00019197899970524877, "loss": 1.0008, "step": 7671 }, { "epoch": 0.1969954471121981, "grad_norm": 0.81640625, "learning_rate": 0.00019197724781192777, "loss": 0.9556, "step": 7672 }, { "epoch": 0.19702112430811994, "grad_norm": 0.8203125, "learning_rate": 0.0001919754957353044, "loss": 1.1311, "step": 7673 }, { "epoch": 0.19704680150404175, "grad_norm": 1.2734375, "learning_rate": 0.00019197374347538218, "loss": 1.0303, "step": 7674 }, { "epoch": 0.19707247869996358, "grad_norm": 0.8046875, "learning_rate": 0.0001919719910321646, "loss": 1.1035, "step": 7675 }, { "epoch": 0.1970981558958854, "grad_norm": 0.7890625, "learning_rate": 0.00019197023840565516, "loss": 0.9916, "step": 7676 }, { "epoch": 0.1971238330918072, "grad_norm": 0.73828125, "learning_rate": 0.00019196848559585732, "loss": 0.969, "step": 7677 }, { "epoch": 0.19714951028772904, "grad_norm": 0.80859375, "learning_rate": 0.0001919667326027746, "loss": 1.0664, "step": 7678 }, { "epoch": 0.19717518748365084, "grad_norm": 0.828125, "learning_rate": 0.0001919649794264105, "loss": 0.8708, "step": 7679 }, { "epoch": 0.19720086467957268, "grad_norm": 0.8515625, "learning_rate": 0.0001919632260667685, "loss": 1.0862, "step": 7680 }, { "epoch": 0.1972265418754945, "grad_norm": 0.85546875, "learning_rate": 0.0001919614725238521, "loss": 1.118, "step": 7681 }, { "epoch": 0.1972522190714163, "grad_norm": 0.83203125, "learning_rate": 0.0001919597187976648, "loss": 1.105, "step": 7682 }, { "epoch": 0.19727789626733813, "grad_norm": 0.79296875, "learning_rate": 0.00019195796488821008, "loss": 0.9046, "step": 7683 }, { "epoch": 0.19730357346325994, "grad_norm": 0.86328125, "learning_rate": 0.00019195621079549143, "loss": 1.1209, "step": 7684 }, { "epoch": 0.19732925065918178, "grad_norm": 0.796875, "learning_rate": 0.00019195445651951236, "loss": 0.9748, "step": 7685 }, { "epoch": 0.19735492785510358, "grad_norm": 0.765625, "learning_rate": 0.00019195270206027635, "loss": 0.8894, "step": 7686 }, { "epoch": 0.1973806050510254, "grad_norm": 0.8359375, "learning_rate": 0.00019195094741778697, "loss": 1.0724, "step": 7687 }, { "epoch": 0.19740628224694723, "grad_norm": 0.8125, "learning_rate": 0.0001919491925920476, "loss": 0.9117, "step": 7688 }, { "epoch": 0.19743195944286904, "grad_norm": 0.79296875, "learning_rate": 0.0001919474375830618, "loss": 1.1814, "step": 7689 }, { "epoch": 0.19745763663879087, "grad_norm": 0.8359375, "learning_rate": 0.00019194568239083308, "loss": 1.0038, "step": 7690 }, { "epoch": 0.19748331383471268, "grad_norm": 0.84765625, "learning_rate": 0.00019194392701536494, "loss": 0.9369, "step": 7691 }, { "epoch": 0.1975089910306345, "grad_norm": 0.89453125, "learning_rate": 0.0001919421714566608, "loss": 1.1964, "step": 7692 }, { "epoch": 0.19753466822655633, "grad_norm": 0.7734375, "learning_rate": 0.0001919404157147243, "loss": 0.9805, "step": 7693 }, { "epoch": 0.19756034542247813, "grad_norm": 0.796875, "learning_rate": 0.0001919386597895588, "loss": 1.114, "step": 7694 }, { "epoch": 0.19758602261839997, "grad_norm": 0.8125, "learning_rate": 0.00019193690368116785, "loss": 1.119, "step": 7695 }, { "epoch": 0.19761169981432178, "grad_norm": 0.7578125, "learning_rate": 0.00019193514738955497, "loss": 0.9277, "step": 7696 }, { "epoch": 0.1976373770102436, "grad_norm": 0.71875, "learning_rate": 0.00019193339091472367, "loss": 1.0693, "step": 7697 }, { "epoch": 0.19766305420616542, "grad_norm": 0.8671875, "learning_rate": 0.0001919316342566774, "loss": 1.2216, "step": 7698 }, { "epoch": 0.19768873140208723, "grad_norm": 0.84765625, "learning_rate": 0.00019192987741541967, "loss": 1.0876, "step": 7699 }, { "epoch": 0.19771440859800907, "grad_norm": 0.85546875, "learning_rate": 0.00019192812039095402, "loss": 0.9911, "step": 7700 }, { "epoch": 0.19774008579393088, "grad_norm": 0.81640625, "learning_rate": 0.00019192636318328393, "loss": 1.0883, "step": 7701 }, { "epoch": 0.19776576298985268, "grad_norm": 0.78515625, "learning_rate": 0.0001919246057924129, "loss": 1.1521, "step": 7702 }, { "epoch": 0.19779144018577452, "grad_norm": 0.78515625, "learning_rate": 0.00019192284821834444, "loss": 1.0761, "step": 7703 }, { "epoch": 0.19781711738169633, "grad_norm": 0.82421875, "learning_rate": 0.00019192109046108205, "loss": 1.0734, "step": 7704 }, { "epoch": 0.19784279457761816, "grad_norm": 0.875, "learning_rate": 0.00019191933252062923, "loss": 1.0013, "step": 7705 }, { "epoch": 0.19786847177353997, "grad_norm": 0.94140625, "learning_rate": 0.00019191757439698947, "loss": 1.098, "step": 7706 }, { "epoch": 0.19789414896946178, "grad_norm": 0.80078125, "learning_rate": 0.00019191581609016628, "loss": 1.1605, "step": 7707 }, { "epoch": 0.19791982616538362, "grad_norm": 0.7890625, "learning_rate": 0.0001919140576001632, "loss": 1.0756, "step": 7708 }, { "epoch": 0.19794550336130542, "grad_norm": 0.7421875, "learning_rate": 0.00019191229892698367, "loss": 0.9418, "step": 7709 }, { "epoch": 0.19797118055722726, "grad_norm": 0.80078125, "learning_rate": 0.00019191054007063125, "loss": 0.9664, "step": 7710 }, { "epoch": 0.19799685775314907, "grad_norm": 0.7734375, "learning_rate": 0.00019190878103110944, "loss": 1.1381, "step": 7711 }, { "epoch": 0.19802253494907088, "grad_norm": 0.78515625, "learning_rate": 0.00019190702180842172, "loss": 0.8941, "step": 7712 }, { "epoch": 0.1980482121449927, "grad_norm": 0.76953125, "learning_rate": 0.0001919052624025716, "loss": 1.1102, "step": 7713 }, { "epoch": 0.19807388934091452, "grad_norm": 0.87890625, "learning_rate": 0.0001919035028135626, "loss": 1.0846, "step": 7714 }, { "epoch": 0.19809956653683636, "grad_norm": 0.83203125, "learning_rate": 0.0001919017430413982, "loss": 1.0907, "step": 7715 }, { "epoch": 0.19812524373275817, "grad_norm": 0.8203125, "learning_rate": 0.00019189998308608196, "loss": 1.1139, "step": 7716 }, { "epoch": 0.19815092092867997, "grad_norm": 0.80859375, "learning_rate": 0.00019189822294761734, "loss": 0.9555, "step": 7717 }, { "epoch": 0.1981765981246018, "grad_norm": 0.859375, "learning_rate": 0.00019189646262600784, "loss": 1.0702, "step": 7718 }, { "epoch": 0.19820227532052362, "grad_norm": 0.93359375, "learning_rate": 0.000191894702121257, "loss": 1.0462, "step": 7719 }, { "epoch": 0.19822795251644545, "grad_norm": 1.0390625, "learning_rate": 0.00019189294143336837, "loss": 1.0005, "step": 7720 }, { "epoch": 0.19825362971236726, "grad_norm": 0.7578125, "learning_rate": 0.00019189118056234537, "loss": 0.9765, "step": 7721 }, { "epoch": 0.19827930690828907, "grad_norm": 0.79296875, "learning_rate": 0.00019188941950819155, "loss": 0.9385, "step": 7722 }, { "epoch": 0.1983049841042109, "grad_norm": 0.77734375, "learning_rate": 0.00019188765827091042, "loss": 1.0848, "step": 7723 }, { "epoch": 0.19833066130013272, "grad_norm": 0.76171875, "learning_rate": 0.0001918858968505055, "loss": 1.0876, "step": 7724 }, { "epoch": 0.19835633849605455, "grad_norm": 0.78125, "learning_rate": 0.00019188413524698025, "loss": 1.1984, "step": 7725 }, { "epoch": 0.19838201569197636, "grad_norm": 0.80078125, "learning_rate": 0.00019188237346033825, "loss": 0.9838, "step": 7726 }, { "epoch": 0.19840769288789817, "grad_norm": 0.83984375, "learning_rate": 0.00019188061149058295, "loss": 1.0618, "step": 7727 }, { "epoch": 0.19843337008382, "grad_norm": 0.82421875, "learning_rate": 0.00019187884933771795, "loss": 0.9696, "step": 7728 }, { "epoch": 0.1984590472797418, "grad_norm": 0.828125, "learning_rate": 0.00019187708700174665, "loss": 1.0715, "step": 7729 }, { "epoch": 0.19848472447566365, "grad_norm": 0.8203125, "learning_rate": 0.00019187532448267263, "loss": 1.1038, "step": 7730 }, { "epoch": 0.19851040167158546, "grad_norm": 0.8671875, "learning_rate": 0.00019187356178049938, "loss": 1.0831, "step": 7731 }, { "epoch": 0.19853607886750727, "grad_norm": 0.77734375, "learning_rate": 0.0001918717988952304, "loss": 1.0951, "step": 7732 }, { "epoch": 0.1985617560634291, "grad_norm": 0.87890625, "learning_rate": 0.0001918700358268693, "loss": 1.1308, "step": 7733 }, { "epoch": 0.1985874332593509, "grad_norm": 0.86328125, "learning_rate": 0.00019186827257541944, "loss": 1.0417, "step": 7734 }, { "epoch": 0.19861311045527275, "grad_norm": 0.8125, "learning_rate": 0.00019186650914088444, "loss": 1.0841, "step": 7735 }, { "epoch": 0.19863878765119455, "grad_norm": 0.859375, "learning_rate": 0.0001918647455232678, "loss": 1.0289, "step": 7736 }, { "epoch": 0.19866446484711636, "grad_norm": 0.80859375, "learning_rate": 0.000191862981722573, "loss": 1.1111, "step": 7737 }, { "epoch": 0.1986901420430382, "grad_norm": 0.8359375, "learning_rate": 0.00019186121773880356, "loss": 1.1366, "step": 7738 }, { "epoch": 0.19871581923896, "grad_norm": 0.8359375, "learning_rate": 0.000191859453571963, "loss": 1.059, "step": 7739 }, { "epoch": 0.19874149643488184, "grad_norm": 0.85546875, "learning_rate": 0.00019185768922205492, "loss": 1.0734, "step": 7740 }, { "epoch": 0.19876717363080365, "grad_norm": 0.87890625, "learning_rate": 0.0001918559246890827, "loss": 1.1045, "step": 7741 }, { "epoch": 0.19879285082672546, "grad_norm": 0.7734375, "learning_rate": 0.00019185415997304995, "loss": 0.8498, "step": 7742 }, { "epoch": 0.1988185280226473, "grad_norm": 0.87109375, "learning_rate": 0.00019185239507396012, "loss": 1.2322, "step": 7743 }, { "epoch": 0.1988442052185691, "grad_norm": 0.734375, "learning_rate": 0.0001918506299918168, "loss": 0.9482, "step": 7744 }, { "epoch": 0.19886988241449094, "grad_norm": 0.74609375, "learning_rate": 0.0001918488647266235, "loss": 1.0335, "step": 7745 }, { "epoch": 0.19889555961041275, "grad_norm": 0.8125, "learning_rate": 0.00019184709927838365, "loss": 0.9778, "step": 7746 }, { "epoch": 0.19892123680633456, "grad_norm": 0.84375, "learning_rate": 0.00019184533364710084, "loss": 1.108, "step": 7747 }, { "epoch": 0.1989469140022564, "grad_norm": 0.8359375, "learning_rate": 0.00019184356783277857, "loss": 1.2696, "step": 7748 }, { "epoch": 0.1989725911981782, "grad_norm": 0.765625, "learning_rate": 0.0001918418018354204, "loss": 0.9185, "step": 7749 }, { "epoch": 0.1989982683941, "grad_norm": 0.91796875, "learning_rate": 0.0001918400356550298, "loss": 1.169, "step": 7750 }, { "epoch": 0.19902394559002184, "grad_norm": 0.76953125, "learning_rate": 0.00019183826929161028, "loss": 0.881, "step": 7751 }, { "epoch": 0.19904962278594365, "grad_norm": 0.7734375, "learning_rate": 0.0001918365027451654, "loss": 1.0453, "step": 7752 }, { "epoch": 0.1990752999818655, "grad_norm": 0.82421875, "learning_rate": 0.00019183473601569869, "loss": 0.9221, "step": 7753 }, { "epoch": 0.1991009771777873, "grad_norm": 0.76953125, "learning_rate": 0.0001918329691032136, "loss": 0.9788, "step": 7754 }, { "epoch": 0.1991266543737091, "grad_norm": 0.78125, "learning_rate": 0.00019183120200771375, "loss": 1.0323, "step": 7755 }, { "epoch": 0.19915233156963094, "grad_norm": 0.7734375, "learning_rate": 0.00019182943472920258, "loss": 1.1532, "step": 7756 }, { "epoch": 0.19917800876555275, "grad_norm": 0.859375, "learning_rate": 0.00019182766726768364, "loss": 1.1824, "step": 7757 }, { "epoch": 0.19920368596147459, "grad_norm": 0.85546875, "learning_rate": 0.00019182589962316046, "loss": 1.0943, "step": 7758 }, { "epoch": 0.1992293631573964, "grad_norm": 0.86328125, "learning_rate": 0.00019182413179563655, "loss": 1.2022, "step": 7759 }, { "epoch": 0.1992550403533182, "grad_norm": 0.84765625, "learning_rate": 0.00019182236378511544, "loss": 1.0797, "step": 7760 }, { "epoch": 0.19928071754924004, "grad_norm": 0.7421875, "learning_rate": 0.00019182059559160066, "loss": 1.0551, "step": 7761 }, { "epoch": 0.19930639474516185, "grad_norm": 0.93359375, "learning_rate": 0.00019181882721509572, "loss": 1.0555, "step": 7762 }, { "epoch": 0.19933207194108368, "grad_norm": 0.8046875, "learning_rate": 0.00019181705865560416, "loss": 1.0656, "step": 7763 }, { "epoch": 0.1993577491370055, "grad_norm": 0.8515625, "learning_rate": 0.0001918152899131295, "loss": 1.2318, "step": 7764 }, { "epoch": 0.1993834263329273, "grad_norm": 0.79296875, "learning_rate": 0.00019181352098767524, "loss": 1.0931, "step": 7765 }, { "epoch": 0.19940910352884914, "grad_norm": 0.83203125, "learning_rate": 0.00019181175187924496, "loss": 1.0366, "step": 7766 }, { "epoch": 0.19943478072477094, "grad_norm": 0.8359375, "learning_rate": 0.0001918099825878421, "loss": 0.9426, "step": 7767 }, { "epoch": 0.19946045792069278, "grad_norm": 0.8046875, "learning_rate": 0.00019180821311347028, "loss": 1.1029, "step": 7768 }, { "epoch": 0.1994861351166146, "grad_norm": 0.8046875, "learning_rate": 0.000191806443456133, "loss": 1.102, "step": 7769 }, { "epoch": 0.1995118123125364, "grad_norm": 0.78125, "learning_rate": 0.00019180467361583375, "loss": 0.9352, "step": 7770 }, { "epoch": 0.19953748950845823, "grad_norm": 0.796875, "learning_rate": 0.00019180290359257604, "loss": 0.9876, "step": 7771 }, { "epoch": 0.19956316670438004, "grad_norm": 0.83984375, "learning_rate": 0.0001918011333863635, "loss": 0.9737, "step": 7772 }, { "epoch": 0.19958884390030188, "grad_norm": 0.8125, "learning_rate": 0.00019179936299719956, "loss": 1.082, "step": 7773 }, { "epoch": 0.19961452109622368, "grad_norm": 0.921875, "learning_rate": 0.00019179759242508778, "loss": 1.0201, "step": 7774 }, { "epoch": 0.1996401982921455, "grad_norm": 0.74609375, "learning_rate": 0.00019179582167003169, "loss": 1.1005, "step": 7775 }, { "epoch": 0.19966587548806733, "grad_norm": 0.76953125, "learning_rate": 0.00019179405073203483, "loss": 1.1122, "step": 7776 }, { "epoch": 0.19969155268398914, "grad_norm": 0.86328125, "learning_rate": 0.0001917922796111007, "loss": 0.9648, "step": 7777 }, { "epoch": 0.19971722987991097, "grad_norm": 0.8515625, "learning_rate": 0.00019179050830723287, "loss": 1.0956, "step": 7778 }, { "epoch": 0.19974290707583278, "grad_norm": 0.79296875, "learning_rate": 0.00019178873682043483, "loss": 1.0512, "step": 7779 }, { "epoch": 0.1997685842717546, "grad_norm": 0.73046875, "learning_rate": 0.00019178696515071014, "loss": 0.9835, "step": 7780 }, { "epoch": 0.19979426146767643, "grad_norm": 0.921875, "learning_rate": 0.00019178519329806232, "loss": 0.9894, "step": 7781 }, { "epoch": 0.19981993866359823, "grad_norm": 0.8671875, "learning_rate": 0.0001917834212624949, "loss": 1.1552, "step": 7782 }, { "epoch": 0.19984561585952007, "grad_norm": 0.8203125, "learning_rate": 0.0001917816490440114, "loss": 1.0802, "step": 7783 }, { "epoch": 0.19987129305544188, "grad_norm": 0.78515625, "learning_rate": 0.00019177987664261537, "loss": 1.0596, "step": 7784 }, { "epoch": 0.1998969702513637, "grad_norm": 0.76171875, "learning_rate": 0.00019177810405831034, "loss": 0.9165, "step": 7785 }, { "epoch": 0.19992264744728552, "grad_norm": 0.80859375, "learning_rate": 0.00019177633129109983, "loss": 1.0946, "step": 7786 }, { "epoch": 0.19994832464320733, "grad_norm": 0.7734375, "learning_rate": 0.00019177455834098737, "loss": 0.9073, "step": 7787 }, { "epoch": 0.19997400183912917, "grad_norm": 0.80859375, "learning_rate": 0.00019177278520797657, "loss": 1.0705, "step": 7788 }, { "epoch": 0.19999967903505098, "grad_norm": 0.87890625, "learning_rate": 0.00019177101189207086, "loss": 0.9632, "step": 7789 }, { "epoch": 0.20002535623097278, "grad_norm": 0.78515625, "learning_rate": 0.0001917692383932738, "loss": 0.9494, "step": 7790 }, { "epoch": 0.20005103342689462, "grad_norm": 0.77734375, "learning_rate": 0.00019176746471158892, "loss": 1.005, "step": 7791 }, { "epoch": 0.20007671062281643, "grad_norm": 0.80078125, "learning_rate": 0.00019176569084701983, "loss": 0.9687, "step": 7792 }, { "epoch": 0.20010238781873826, "grad_norm": 0.7578125, "learning_rate": 0.00019176391679956995, "loss": 1.0203, "step": 7793 }, { "epoch": 0.20012806501466007, "grad_norm": 0.7890625, "learning_rate": 0.0001917621425692429, "loss": 1.0254, "step": 7794 }, { "epoch": 0.20015374221058188, "grad_norm": 0.78515625, "learning_rate": 0.00019176036815604219, "loss": 1.1491, "step": 7795 }, { "epoch": 0.20017941940650372, "grad_norm": 0.84375, "learning_rate": 0.00019175859355997137, "loss": 1.1864, "step": 7796 }, { "epoch": 0.20020509660242553, "grad_norm": 0.828125, "learning_rate": 0.0001917568187810339, "loss": 0.9288, "step": 7797 }, { "epoch": 0.20023077379834736, "grad_norm": 0.8046875, "learning_rate": 0.00019175504381923345, "loss": 0.9888, "step": 7798 }, { "epoch": 0.20025645099426917, "grad_norm": 0.79296875, "learning_rate": 0.00019175326867457347, "loss": 1.0744, "step": 7799 }, { "epoch": 0.20028212819019098, "grad_norm": 0.82421875, "learning_rate": 0.00019175149334705748, "loss": 1.0676, "step": 7800 }, { "epoch": 0.2003078053861128, "grad_norm": 0.765625, "learning_rate": 0.00019174971783668908, "loss": 1.1545, "step": 7801 }, { "epoch": 0.20033348258203462, "grad_norm": 0.77734375, "learning_rate": 0.00019174794214347175, "loss": 1.0596, "step": 7802 }, { "epoch": 0.20035915977795646, "grad_norm": 0.80859375, "learning_rate": 0.00019174616626740912, "loss": 1.1052, "step": 7803 }, { "epoch": 0.20038483697387827, "grad_norm": 0.86328125, "learning_rate": 0.0001917443902085046, "loss": 1.0088, "step": 7804 }, { "epoch": 0.20041051416980007, "grad_norm": 0.8359375, "learning_rate": 0.00019174261396676183, "loss": 1.0261, "step": 7805 }, { "epoch": 0.2004361913657219, "grad_norm": 0.80859375, "learning_rate": 0.0001917408375421843, "loss": 1.0019, "step": 7806 }, { "epoch": 0.20046186856164372, "grad_norm": 0.828125, "learning_rate": 0.0001917390609347756, "loss": 1.1267, "step": 7807 }, { "epoch": 0.20048754575756556, "grad_norm": 0.79296875, "learning_rate": 0.0001917372841445392, "loss": 0.8947, "step": 7808 }, { "epoch": 0.20051322295348736, "grad_norm": 0.78515625, "learning_rate": 0.00019173550717147873, "loss": 0.9682, "step": 7809 }, { "epoch": 0.20053890014940917, "grad_norm": 0.79296875, "learning_rate": 0.00019173373001559764, "loss": 1.0702, "step": 7810 }, { "epoch": 0.200564577345331, "grad_norm": 0.77734375, "learning_rate": 0.00019173195267689952, "loss": 1.1214, "step": 7811 }, { "epoch": 0.20059025454125282, "grad_norm": 0.8359375, "learning_rate": 0.00019173017515538792, "loss": 1.013, "step": 7812 }, { "epoch": 0.20061593173717465, "grad_norm": 0.83984375, "learning_rate": 0.00019172839745106634, "loss": 0.9856, "step": 7813 }, { "epoch": 0.20064160893309646, "grad_norm": 0.8046875, "learning_rate": 0.00019172661956393835, "loss": 1.0073, "step": 7814 }, { "epoch": 0.20066728612901827, "grad_norm": 0.76171875, "learning_rate": 0.0001917248414940075, "loss": 0.9271, "step": 7815 }, { "epoch": 0.2006929633249401, "grad_norm": 0.76171875, "learning_rate": 0.00019172306324127735, "loss": 1.0241, "step": 7816 }, { "epoch": 0.2007186405208619, "grad_norm": 0.8125, "learning_rate": 0.00019172128480575141, "loss": 0.9467, "step": 7817 }, { "epoch": 0.20074431771678375, "grad_norm": 0.8125, "learning_rate": 0.00019171950618743325, "loss": 0.9448, "step": 7818 }, { "epoch": 0.20076999491270556, "grad_norm": 0.81640625, "learning_rate": 0.00019171772738632635, "loss": 1.031, "step": 7819 }, { "epoch": 0.20079567210862737, "grad_norm": 0.7578125, "learning_rate": 0.00019171594840243432, "loss": 1.1801, "step": 7820 }, { "epoch": 0.2008213493045492, "grad_norm": 0.83984375, "learning_rate": 0.00019171416923576074, "loss": 1.0011, "step": 7821 }, { "epoch": 0.200847026500471, "grad_norm": 0.88671875, "learning_rate": 0.00019171238988630905, "loss": 0.9688, "step": 7822 }, { "epoch": 0.20087270369639285, "grad_norm": 0.80078125, "learning_rate": 0.00019171061035408283, "loss": 1.2182, "step": 7823 }, { "epoch": 0.20089838089231465, "grad_norm": 0.85546875, "learning_rate": 0.00019170883063908571, "loss": 1.1073, "step": 7824 }, { "epoch": 0.20092405808823646, "grad_norm": 0.77734375, "learning_rate": 0.00019170705074132114, "loss": 1.0262, "step": 7825 }, { "epoch": 0.2009497352841583, "grad_norm": 0.76953125, "learning_rate": 0.00019170527066079274, "loss": 0.9106, "step": 7826 }, { "epoch": 0.2009754124800801, "grad_norm": 0.78515625, "learning_rate": 0.00019170349039750398, "loss": 1.1274, "step": 7827 }, { "epoch": 0.20100108967600194, "grad_norm": 0.84375, "learning_rate": 0.00019170170995145848, "loss": 1.1453, "step": 7828 }, { "epoch": 0.20102676687192375, "grad_norm": 0.765625, "learning_rate": 0.0001916999293226597, "loss": 1.0972, "step": 7829 }, { "epoch": 0.20105244406784556, "grad_norm": 0.76953125, "learning_rate": 0.00019169814851111128, "loss": 1.0009, "step": 7830 }, { "epoch": 0.2010781212637674, "grad_norm": 0.80078125, "learning_rate": 0.00019169636751681672, "loss": 1.0869, "step": 7831 }, { "epoch": 0.2011037984596892, "grad_norm": 0.7734375, "learning_rate": 0.0001916945863397796, "loss": 1.0535, "step": 7832 }, { "epoch": 0.20112947565561104, "grad_norm": 0.78125, "learning_rate": 0.00019169280498000342, "loss": 0.942, "step": 7833 }, { "epoch": 0.20115515285153285, "grad_norm": 0.8203125, "learning_rate": 0.00019169102343749182, "loss": 0.9414, "step": 7834 }, { "epoch": 0.20118083004745466, "grad_norm": 0.74609375, "learning_rate": 0.00019168924171224824, "loss": 1.1162, "step": 7835 }, { "epoch": 0.2012065072433765, "grad_norm": 0.82421875, "learning_rate": 0.00019168745980427631, "loss": 1.0903, "step": 7836 }, { "epoch": 0.2012321844392983, "grad_norm": 0.83203125, "learning_rate": 0.00019168567771357954, "loss": 0.985, "step": 7837 }, { "epoch": 0.20125786163522014, "grad_norm": 0.80859375, "learning_rate": 0.00019168389544016153, "loss": 0.9505, "step": 7838 }, { "epoch": 0.20128353883114194, "grad_norm": 0.8984375, "learning_rate": 0.00019168211298402574, "loss": 1.1574, "step": 7839 }, { "epoch": 0.20130921602706375, "grad_norm": 0.80078125, "learning_rate": 0.00019168033034517584, "loss": 1.0243, "step": 7840 }, { "epoch": 0.2013348932229856, "grad_norm": 0.8984375, "learning_rate": 0.00019167854752361526, "loss": 1.0456, "step": 7841 }, { "epoch": 0.2013605704189074, "grad_norm": 0.79296875, "learning_rate": 0.00019167676451934765, "loss": 1.1959, "step": 7842 }, { "epoch": 0.20138624761482923, "grad_norm": 0.78515625, "learning_rate": 0.00019167498133237655, "loss": 1.0562, "step": 7843 }, { "epoch": 0.20141192481075104, "grad_norm": 0.7890625, "learning_rate": 0.00019167319796270548, "loss": 0.9569, "step": 7844 }, { "epoch": 0.20143760200667285, "grad_norm": 0.78125, "learning_rate": 0.000191671414410338, "loss": 1.1482, "step": 7845 }, { "epoch": 0.2014632792025947, "grad_norm": 0.875, "learning_rate": 0.00019166963067527768, "loss": 1.0247, "step": 7846 }, { "epoch": 0.2014889563985165, "grad_norm": 0.83984375, "learning_rate": 0.00019166784675752805, "loss": 1.0248, "step": 7847 }, { "epoch": 0.20151463359443833, "grad_norm": 0.85546875, "learning_rate": 0.0001916660626570927, "loss": 0.9521, "step": 7848 }, { "epoch": 0.20154031079036014, "grad_norm": 0.80078125, "learning_rate": 0.00019166427837397516, "loss": 1.0837, "step": 7849 }, { "epoch": 0.20156598798628195, "grad_norm": 0.9140625, "learning_rate": 0.000191662493908179, "loss": 1.062, "step": 7850 }, { "epoch": 0.20159166518220378, "grad_norm": 0.86328125, "learning_rate": 0.00019166070925970776, "loss": 1.1165, "step": 7851 }, { "epoch": 0.2016173423781256, "grad_norm": 0.82421875, "learning_rate": 0.00019165892442856501, "loss": 1.0127, "step": 7852 }, { "epoch": 0.20164301957404743, "grad_norm": 0.83984375, "learning_rate": 0.0001916571394147543, "loss": 1.1172, "step": 7853 }, { "epoch": 0.20166869676996924, "grad_norm": 0.765625, "learning_rate": 0.0001916553542182792, "loss": 1.1527, "step": 7854 }, { "epoch": 0.20169437396589104, "grad_norm": 0.87109375, "learning_rate": 0.00019165356883914323, "loss": 0.9721, "step": 7855 }, { "epoch": 0.20172005116181288, "grad_norm": 0.7890625, "learning_rate": 0.00019165178327734998, "loss": 1.0866, "step": 7856 }, { "epoch": 0.2017457283577347, "grad_norm": 0.7265625, "learning_rate": 0.00019164999753290305, "loss": 1.0578, "step": 7857 }, { "epoch": 0.20177140555365652, "grad_norm": 0.79296875, "learning_rate": 0.0001916482116058059, "loss": 1.1387, "step": 7858 }, { "epoch": 0.20179708274957833, "grad_norm": 0.80078125, "learning_rate": 0.00019164642549606217, "loss": 1.0841, "step": 7859 }, { "epoch": 0.20182275994550014, "grad_norm": 0.8515625, "learning_rate": 0.00019164463920367538, "loss": 0.9487, "step": 7860 }, { "epoch": 0.20184843714142198, "grad_norm": 0.8203125, "learning_rate": 0.00019164285272864912, "loss": 1.0793, "step": 7861 }, { "epoch": 0.20187411433734379, "grad_norm": 0.8203125, "learning_rate": 0.00019164106607098694, "loss": 0.9776, "step": 7862 }, { "epoch": 0.20189979153326562, "grad_norm": 0.85546875, "learning_rate": 0.00019163927923069237, "loss": 1.1705, "step": 7863 }, { "epoch": 0.20192546872918743, "grad_norm": 0.73828125, "learning_rate": 0.00019163749220776902, "loss": 0.8113, "step": 7864 }, { "epoch": 0.20195114592510924, "grad_norm": 0.79296875, "learning_rate": 0.0001916357050022204, "loss": 1.0344, "step": 7865 }, { "epoch": 0.20197682312103107, "grad_norm": 0.8203125, "learning_rate": 0.0001916339176140501, "loss": 0.9799, "step": 7866 }, { "epoch": 0.20200250031695288, "grad_norm": 0.79296875, "learning_rate": 0.00019163213004326168, "loss": 0.9744, "step": 7867 }, { "epoch": 0.20202817751287472, "grad_norm": 0.78125, "learning_rate": 0.0001916303422898587, "loss": 1.0554, "step": 7868 }, { "epoch": 0.20205385470879653, "grad_norm": 0.80078125, "learning_rate": 0.00019162855435384476, "loss": 1.0664, "step": 7869 }, { "epoch": 0.20207953190471833, "grad_norm": 0.8125, "learning_rate": 0.00019162676623522333, "loss": 1.152, "step": 7870 }, { "epoch": 0.20210520910064017, "grad_norm": 0.8515625, "learning_rate": 0.00019162497793399807, "loss": 0.9417, "step": 7871 }, { "epoch": 0.20213088629656198, "grad_norm": 0.80859375, "learning_rate": 0.00019162318945017251, "loss": 1.0648, "step": 7872 }, { "epoch": 0.20215656349248382, "grad_norm": 0.8125, "learning_rate": 0.00019162140078375018, "loss": 0.9962, "step": 7873 }, { "epoch": 0.20218224068840562, "grad_norm": 0.86328125, "learning_rate": 0.0001916196119347347, "loss": 1.0128, "step": 7874 }, { "epoch": 0.20220791788432743, "grad_norm": 0.8203125, "learning_rate": 0.00019161782290312958, "loss": 0.9306, "step": 7875 }, { "epoch": 0.20223359508024927, "grad_norm": 0.80078125, "learning_rate": 0.00019161603368893844, "loss": 1.0321, "step": 7876 }, { "epoch": 0.20225927227617108, "grad_norm": 0.83203125, "learning_rate": 0.0001916142442921648, "loss": 1.1142, "step": 7877 }, { "epoch": 0.2022849494720929, "grad_norm": 0.7890625, "learning_rate": 0.0001916124547128123, "loss": 0.9569, "step": 7878 }, { "epoch": 0.20231062666801472, "grad_norm": 0.80078125, "learning_rate": 0.00019161066495088438, "loss": 1.0135, "step": 7879 }, { "epoch": 0.20233630386393653, "grad_norm": 0.8203125, "learning_rate": 0.00019160887500638474, "loss": 1.0358, "step": 7880 }, { "epoch": 0.20236198105985836, "grad_norm": 0.80859375, "learning_rate": 0.00019160708487931684, "loss": 1.0282, "step": 7881 }, { "epoch": 0.20238765825578017, "grad_norm": 0.82421875, "learning_rate": 0.0001916052945696843, "loss": 1.0157, "step": 7882 }, { "epoch": 0.202413335451702, "grad_norm": 0.859375, "learning_rate": 0.0001916035040774907, "loss": 1.1417, "step": 7883 }, { "epoch": 0.20243901264762382, "grad_norm": 0.7578125, "learning_rate": 0.0001916017134027396, "loss": 0.8907, "step": 7884 }, { "epoch": 0.20246468984354563, "grad_norm": 0.7578125, "learning_rate": 0.00019159992254543456, "loss": 0.9851, "step": 7885 }, { "epoch": 0.20249036703946746, "grad_norm": 0.8125, "learning_rate": 0.00019159813150557912, "loss": 1.1479, "step": 7886 }, { "epoch": 0.20251604423538927, "grad_norm": 0.75, "learning_rate": 0.0001915963402831769, "loss": 0.9464, "step": 7887 }, { "epoch": 0.2025417214313111, "grad_norm": 0.79296875, "learning_rate": 0.00019159454887823145, "loss": 0.9196, "step": 7888 }, { "epoch": 0.20256739862723291, "grad_norm": 0.76953125, "learning_rate": 0.00019159275729074634, "loss": 1.045, "step": 7889 }, { "epoch": 0.20259307582315472, "grad_norm": 0.8125, "learning_rate": 0.00019159096552072514, "loss": 0.9648, "step": 7890 }, { "epoch": 0.20261875301907656, "grad_norm": 0.765625, "learning_rate": 0.00019158917356817138, "loss": 1.034, "step": 7891 }, { "epoch": 0.20264443021499837, "grad_norm": 0.8125, "learning_rate": 0.0001915873814330887, "loss": 0.94, "step": 7892 }, { "epoch": 0.2026701074109202, "grad_norm": 0.765625, "learning_rate": 0.00019158558911548066, "loss": 1.0179, "step": 7893 }, { "epoch": 0.202695784606842, "grad_norm": 0.8984375, "learning_rate": 0.00019158379661535078, "loss": 1.0122, "step": 7894 }, { "epoch": 0.20272146180276382, "grad_norm": 0.8046875, "learning_rate": 0.00019158200393270267, "loss": 1.0074, "step": 7895 }, { "epoch": 0.20274713899868566, "grad_norm": 0.98828125, "learning_rate": 0.00019158021106753992, "loss": 0.9826, "step": 7896 }, { "epoch": 0.20277281619460746, "grad_norm": 0.8125, "learning_rate": 0.00019157841801986606, "loss": 0.9461, "step": 7897 }, { "epoch": 0.2027984933905293, "grad_norm": 0.76171875, "learning_rate": 0.00019157662478968472, "loss": 1.0955, "step": 7898 }, { "epoch": 0.2028241705864511, "grad_norm": 0.8671875, "learning_rate": 0.00019157483137699938, "loss": 1.0174, "step": 7899 }, { "epoch": 0.20284984778237292, "grad_norm": 0.7109375, "learning_rate": 0.00019157303778181373, "loss": 1.0404, "step": 7900 }, { "epoch": 0.20287552497829475, "grad_norm": 1.03125, "learning_rate": 0.00019157124400413127, "loss": 1.0849, "step": 7901 }, { "epoch": 0.20290120217421656, "grad_norm": 0.73046875, "learning_rate": 0.0001915694500439556, "loss": 0.8527, "step": 7902 }, { "epoch": 0.2029268793701384, "grad_norm": 0.81640625, "learning_rate": 0.00019156765590129028, "loss": 1.194, "step": 7903 }, { "epoch": 0.2029525565660602, "grad_norm": 0.84765625, "learning_rate": 0.00019156586157613887, "loss": 1.1809, "step": 7904 }, { "epoch": 0.202978233761982, "grad_norm": 0.8359375, "learning_rate": 0.000191564067068505, "loss": 1.1018, "step": 7905 }, { "epoch": 0.20300391095790385, "grad_norm": 0.79296875, "learning_rate": 0.00019156227237839216, "loss": 1.1169, "step": 7906 }, { "epoch": 0.20302958815382566, "grad_norm": 0.80859375, "learning_rate": 0.00019156047750580406, "loss": 1.0017, "step": 7907 }, { "epoch": 0.2030552653497475, "grad_norm": 0.7890625, "learning_rate": 0.00019155868245074415, "loss": 1.1513, "step": 7908 }, { "epoch": 0.2030809425456693, "grad_norm": 0.796875, "learning_rate": 0.00019155688721321607, "loss": 0.9601, "step": 7909 }, { "epoch": 0.2031066197415911, "grad_norm": 1.25, "learning_rate": 0.0001915550917932234, "loss": 0.9789, "step": 7910 }, { "epoch": 0.20313229693751295, "grad_norm": 0.80078125, "learning_rate": 0.00019155329619076968, "loss": 0.9556, "step": 7911 }, { "epoch": 0.20315797413343475, "grad_norm": 0.7578125, "learning_rate": 0.0001915515004058585, "loss": 0.9977, "step": 7912 }, { "epoch": 0.2031836513293566, "grad_norm": 0.8125, "learning_rate": 0.00019154970443849346, "loss": 0.9915, "step": 7913 }, { "epoch": 0.2032093285252784, "grad_norm": 0.87109375, "learning_rate": 0.00019154790828867816, "loss": 1.0272, "step": 7914 }, { "epoch": 0.2032350057212002, "grad_norm": 0.81640625, "learning_rate": 0.0001915461119564161, "loss": 1.1953, "step": 7915 }, { "epoch": 0.20326068291712204, "grad_norm": 0.7890625, "learning_rate": 0.00019154431544171095, "loss": 1.0026, "step": 7916 }, { "epoch": 0.20328636011304385, "grad_norm": 0.8359375, "learning_rate": 0.0001915425187445662, "loss": 0.9967, "step": 7917 }, { "epoch": 0.2033120373089657, "grad_norm": 0.93359375, "learning_rate": 0.00019154072186498554, "loss": 1.03, "step": 7918 }, { "epoch": 0.2033377145048875, "grad_norm": 0.86328125, "learning_rate": 0.00019153892480297245, "loss": 1.0953, "step": 7919 }, { "epoch": 0.2033633917008093, "grad_norm": 0.8671875, "learning_rate": 0.00019153712755853056, "loss": 0.9283, "step": 7920 }, { "epoch": 0.20338906889673114, "grad_norm": 0.90234375, "learning_rate": 0.00019153533013166344, "loss": 1.1464, "step": 7921 }, { "epoch": 0.20341474609265295, "grad_norm": 0.765625, "learning_rate": 0.00019153353252237466, "loss": 1.0918, "step": 7922 }, { "epoch": 0.20344042328857478, "grad_norm": 0.7578125, "learning_rate": 0.00019153173473066785, "loss": 0.9842, "step": 7923 }, { "epoch": 0.2034661004844966, "grad_norm": 0.8046875, "learning_rate": 0.00019152993675654653, "loss": 1.1335, "step": 7924 }, { "epoch": 0.2034917776804184, "grad_norm": 0.73046875, "learning_rate": 0.00019152813860001436, "loss": 1.0045, "step": 7925 }, { "epoch": 0.20351745487634024, "grad_norm": 0.84375, "learning_rate": 0.00019152634026107485, "loss": 0.9708, "step": 7926 }, { "epoch": 0.20354313207226205, "grad_norm": 1.65625, "learning_rate": 0.0001915245417397316, "loss": 0.9682, "step": 7927 }, { "epoch": 0.20356880926818388, "grad_norm": 0.84765625, "learning_rate": 0.00019152274303598822, "loss": 1.0, "step": 7928 }, { "epoch": 0.2035944864641057, "grad_norm": 0.828125, "learning_rate": 0.00019152094414984826, "loss": 1.117, "step": 7929 }, { "epoch": 0.2036201636600275, "grad_norm": 0.7890625, "learning_rate": 0.00019151914508131536, "loss": 0.9556, "step": 7930 }, { "epoch": 0.20364584085594933, "grad_norm": 0.77734375, "learning_rate": 0.00019151734583039304, "loss": 1.0057, "step": 7931 }, { "epoch": 0.20367151805187114, "grad_norm": 0.86328125, "learning_rate": 0.00019151554639708492, "loss": 0.9593, "step": 7932 }, { "epoch": 0.20369719524779298, "grad_norm": 0.87890625, "learning_rate": 0.0001915137467813946, "loss": 1.1147, "step": 7933 }, { "epoch": 0.2037228724437148, "grad_norm": 0.91796875, "learning_rate": 0.00019151194698332564, "loss": 1.1885, "step": 7934 }, { "epoch": 0.2037485496396366, "grad_norm": 0.80859375, "learning_rate": 0.00019151014700288164, "loss": 1.1313, "step": 7935 }, { "epoch": 0.20377422683555843, "grad_norm": 0.8125, "learning_rate": 0.0001915083468400662, "loss": 1.1751, "step": 7936 }, { "epoch": 0.20379990403148024, "grad_norm": 0.79296875, "learning_rate": 0.00019150654649488285, "loss": 1.0364, "step": 7937 }, { "epoch": 0.20382558122740208, "grad_norm": 0.77734375, "learning_rate": 0.00019150474596733524, "loss": 0.9206, "step": 7938 }, { "epoch": 0.20385125842332388, "grad_norm": 0.828125, "learning_rate": 0.00019150294525742694, "loss": 1.2113, "step": 7939 }, { "epoch": 0.2038769356192457, "grad_norm": 0.921875, "learning_rate": 0.00019150114436516154, "loss": 1.0569, "step": 7940 }, { "epoch": 0.20390261281516753, "grad_norm": 0.8515625, "learning_rate": 0.0001914993432905426, "loss": 1.1269, "step": 7941 }, { "epoch": 0.20392829001108934, "grad_norm": 0.79296875, "learning_rate": 0.00019149754203357376, "loss": 0.9416, "step": 7942 }, { "epoch": 0.20395396720701117, "grad_norm": 0.7421875, "learning_rate": 0.00019149574059425858, "loss": 0.98, "step": 7943 }, { "epoch": 0.20397964440293298, "grad_norm": 0.79296875, "learning_rate": 0.00019149393897260062, "loss": 1.2279, "step": 7944 }, { "epoch": 0.2040053215988548, "grad_norm": 0.84375, "learning_rate": 0.00019149213716860354, "loss": 1.0358, "step": 7945 }, { "epoch": 0.20403099879477662, "grad_norm": 0.84765625, "learning_rate": 0.0001914903351822709, "loss": 0.9995, "step": 7946 }, { "epoch": 0.20405667599069843, "grad_norm": 0.76953125, "learning_rate": 0.00019148853301360625, "loss": 1.1279, "step": 7947 }, { "epoch": 0.20408235318662027, "grad_norm": 0.85546875, "learning_rate": 0.00019148673066261327, "loss": 1.1481, "step": 7948 }, { "epoch": 0.20410803038254208, "grad_norm": 0.7734375, "learning_rate": 0.00019148492812929546, "loss": 1.0003, "step": 7949 }, { "epoch": 0.20413370757846389, "grad_norm": 0.75390625, "learning_rate": 0.00019148312541365648, "loss": 0.8517, "step": 7950 }, { "epoch": 0.20415938477438572, "grad_norm": 0.73046875, "learning_rate": 0.00019148132251569985, "loss": 1.0507, "step": 7951 }, { "epoch": 0.20418506197030753, "grad_norm": 0.8359375, "learning_rate": 0.00019147951943542925, "loss": 1.1222, "step": 7952 }, { "epoch": 0.20421073916622934, "grad_norm": 0.7734375, "learning_rate": 0.00019147771617284822, "loss": 1.1932, "step": 7953 }, { "epoch": 0.20423641636215117, "grad_norm": 0.9296875, "learning_rate": 0.00019147591272796036, "loss": 1.0079, "step": 7954 }, { "epoch": 0.20426209355807298, "grad_norm": 0.8359375, "learning_rate": 0.00019147410910076926, "loss": 1.1816, "step": 7955 }, { "epoch": 0.20428777075399482, "grad_norm": 0.875, "learning_rate": 0.00019147230529127854, "loss": 1.0376, "step": 7956 }, { "epoch": 0.20431344794991663, "grad_norm": 0.78125, "learning_rate": 0.00019147050129949175, "loss": 1.0426, "step": 7957 }, { "epoch": 0.20433912514583844, "grad_norm": 0.82421875, "learning_rate": 0.00019146869712541252, "loss": 1.0556, "step": 7958 }, { "epoch": 0.20436480234176027, "grad_norm": 0.80078125, "learning_rate": 0.00019146689276904448, "loss": 0.9565, "step": 7959 }, { "epoch": 0.20439047953768208, "grad_norm": 0.796875, "learning_rate": 0.00019146508823039116, "loss": 1.0127, "step": 7960 }, { "epoch": 0.20441615673360392, "grad_norm": 0.79296875, "learning_rate": 0.00019146328350945618, "loss": 1.0808, "step": 7961 }, { "epoch": 0.20444183392952572, "grad_norm": 0.86328125, "learning_rate": 0.00019146147860624315, "loss": 0.9713, "step": 7962 }, { "epoch": 0.20446751112544753, "grad_norm": 0.875, "learning_rate": 0.00019145967352075563, "loss": 1.1327, "step": 7963 }, { "epoch": 0.20449318832136937, "grad_norm": 0.828125, "learning_rate": 0.00019145786825299724, "loss": 1.0573, "step": 7964 }, { "epoch": 0.20451886551729118, "grad_norm": 0.7734375, "learning_rate": 0.00019145606280297162, "loss": 1.0264, "step": 7965 }, { "epoch": 0.204544542713213, "grad_norm": 0.85546875, "learning_rate": 0.0001914542571706823, "loss": 1.0834, "step": 7966 }, { "epoch": 0.20457021990913482, "grad_norm": 0.83984375, "learning_rate": 0.00019145245135613288, "loss": 1.0374, "step": 7967 }, { "epoch": 0.20459589710505663, "grad_norm": 0.84375, "learning_rate": 0.00019145064535932703, "loss": 1.2052, "step": 7968 }, { "epoch": 0.20462157430097846, "grad_norm": 0.87109375, "learning_rate": 0.00019144883918026829, "loss": 1.0319, "step": 7969 }, { "epoch": 0.20464725149690027, "grad_norm": 0.83984375, "learning_rate": 0.00019144703281896028, "loss": 1.1907, "step": 7970 }, { "epoch": 0.2046729286928221, "grad_norm": 0.8046875, "learning_rate": 0.00019144522627540658, "loss": 0.9617, "step": 7971 }, { "epoch": 0.20469860588874392, "grad_norm": 0.8046875, "learning_rate": 0.0001914434195496108, "loss": 0.8927, "step": 7972 }, { "epoch": 0.20472428308466573, "grad_norm": 0.76953125, "learning_rate": 0.00019144161264157656, "loss": 0.9021, "step": 7973 }, { "epoch": 0.20474996028058756, "grad_norm": 0.78125, "learning_rate": 0.00019143980555130744, "loss": 0.8647, "step": 7974 }, { "epoch": 0.20477563747650937, "grad_norm": 0.8125, "learning_rate": 0.00019143799827880703, "loss": 1.0244, "step": 7975 }, { "epoch": 0.2048013146724312, "grad_norm": 0.8984375, "learning_rate": 0.00019143619082407897, "loss": 1.2023, "step": 7976 }, { "epoch": 0.20482699186835301, "grad_norm": 0.88671875, "learning_rate": 0.00019143438318712684, "loss": 1.0182, "step": 7977 }, { "epoch": 0.20485266906427482, "grad_norm": 0.8359375, "learning_rate": 0.00019143257536795427, "loss": 1.1297, "step": 7978 }, { "epoch": 0.20487834626019666, "grad_norm": 0.77734375, "learning_rate": 0.0001914307673665648, "loss": 0.9236, "step": 7979 }, { "epoch": 0.20490402345611847, "grad_norm": 0.78125, "learning_rate": 0.00019142895918296206, "loss": 1.0644, "step": 7980 }, { "epoch": 0.2049297006520403, "grad_norm": 0.83203125, "learning_rate": 0.0001914271508171497, "loss": 1.0794, "step": 7981 }, { "epoch": 0.2049553778479621, "grad_norm": 0.83984375, "learning_rate": 0.00019142534226913128, "loss": 1.1889, "step": 7982 }, { "epoch": 0.20498105504388392, "grad_norm": 0.83203125, "learning_rate": 0.00019142353353891037, "loss": 0.9369, "step": 7983 }, { "epoch": 0.20500673223980576, "grad_norm": 0.83984375, "learning_rate": 0.00019142172462649067, "loss": 1.1351, "step": 7984 }, { "epoch": 0.20503240943572756, "grad_norm": 0.796875, "learning_rate": 0.0001914199155318757, "loss": 0.9375, "step": 7985 }, { "epoch": 0.2050580866316494, "grad_norm": 0.80078125, "learning_rate": 0.00019141810625506912, "loss": 1.0098, "step": 7986 }, { "epoch": 0.2050837638275712, "grad_norm": 0.8359375, "learning_rate": 0.00019141629679607448, "loss": 1.0062, "step": 7987 }, { "epoch": 0.20510944102349302, "grad_norm": 0.80078125, "learning_rate": 0.00019141448715489544, "loss": 1.0156, "step": 7988 }, { "epoch": 0.20513511821941485, "grad_norm": 0.82421875, "learning_rate": 0.00019141267733153558, "loss": 1.0619, "step": 7989 }, { "epoch": 0.20516079541533666, "grad_norm": 0.8203125, "learning_rate": 0.0001914108673259985, "loss": 0.9761, "step": 7990 }, { "epoch": 0.2051864726112585, "grad_norm": 0.8984375, "learning_rate": 0.00019140905713828783, "loss": 1.1458, "step": 7991 }, { "epoch": 0.2052121498071803, "grad_norm": 0.78515625, "learning_rate": 0.00019140724676840716, "loss": 1.1126, "step": 7992 }, { "epoch": 0.2052378270031021, "grad_norm": 0.87890625, "learning_rate": 0.0001914054362163601, "loss": 1.0954, "step": 7993 }, { "epoch": 0.20526350419902395, "grad_norm": 0.7265625, "learning_rate": 0.00019140362548215027, "loss": 0.8008, "step": 7994 }, { "epoch": 0.20528918139494576, "grad_norm": 0.80078125, "learning_rate": 0.0001914018145657813, "loss": 1.0457, "step": 7995 }, { "epoch": 0.2053148585908676, "grad_norm": 0.82421875, "learning_rate": 0.00019140000346725672, "loss": 1.123, "step": 7996 }, { "epoch": 0.2053405357867894, "grad_norm": 0.75390625, "learning_rate": 0.00019139819218658022, "loss": 0.9748, "step": 7997 }, { "epoch": 0.2053662129827112, "grad_norm": 0.86328125, "learning_rate": 0.00019139638072375537, "loss": 1.0205, "step": 7998 }, { "epoch": 0.20539189017863305, "grad_norm": 0.953125, "learning_rate": 0.00019139456907878578, "loss": 1.1858, "step": 7999 }, { "epoch": 0.20541756737455485, "grad_norm": 0.7578125, "learning_rate": 0.00019139275725167508, "loss": 1.131, "step": 8000 }, { "epoch": 0.20541756737455485, "eval_loss": 1.038010835647583, "eval_model_preparation_time": 0.0065, "eval_runtime": 404.8423, "eval_samples_per_second": 24.701, "eval_steps_per_second": 0.773, "step": 8000 }, { "epoch": 0.2054432445704767, "grad_norm": 0.81640625, "learning_rate": 0.00019139094524242686, "loss": 1.116, "step": 8001 }, { "epoch": 0.2054689217663985, "grad_norm": 0.81640625, "learning_rate": 0.00019138913305104474, "loss": 1.0927, "step": 8002 }, { "epoch": 0.2054945989623203, "grad_norm": 0.8203125, "learning_rate": 0.00019138732067753233, "loss": 1.0554, "step": 8003 }, { "epoch": 0.20552027615824214, "grad_norm": 0.875, "learning_rate": 0.00019138550812189327, "loss": 1.0614, "step": 8004 }, { "epoch": 0.20554595335416395, "grad_norm": 0.81640625, "learning_rate": 0.00019138369538413113, "loss": 0.9675, "step": 8005 }, { "epoch": 0.2055716305500858, "grad_norm": 0.796875, "learning_rate": 0.00019138188246424956, "loss": 0.996, "step": 8006 }, { "epoch": 0.2055973077460076, "grad_norm": 0.89453125, "learning_rate": 0.00019138006936225214, "loss": 1.1084, "step": 8007 }, { "epoch": 0.2056229849419294, "grad_norm": 0.89453125, "learning_rate": 0.0001913782560781425, "loss": 1.1995, "step": 8008 }, { "epoch": 0.20564866213785124, "grad_norm": 0.796875, "learning_rate": 0.00019137644261192424, "loss": 0.9029, "step": 8009 }, { "epoch": 0.20567433933377305, "grad_norm": 0.82421875, "learning_rate": 0.00019137462896360097, "loss": 0.9828, "step": 8010 }, { "epoch": 0.20570001652969488, "grad_norm": 0.83203125, "learning_rate": 0.00019137281513317633, "loss": 0.8547, "step": 8011 }, { "epoch": 0.2057256937256167, "grad_norm": 0.84375, "learning_rate": 0.00019137100112065394, "loss": 1.15, "step": 8012 }, { "epoch": 0.2057513709215385, "grad_norm": 0.796875, "learning_rate": 0.00019136918692603736, "loss": 1.0153, "step": 8013 }, { "epoch": 0.20577704811746034, "grad_norm": 0.78125, "learning_rate": 0.0001913673725493303, "loss": 0.9804, "step": 8014 }, { "epoch": 0.20580272531338215, "grad_norm": 0.796875, "learning_rate": 0.00019136555799053627, "loss": 0.8899, "step": 8015 }, { "epoch": 0.20582840250930398, "grad_norm": 0.8046875, "learning_rate": 0.00019136374324965895, "loss": 1.036, "step": 8016 }, { "epoch": 0.2058540797052258, "grad_norm": 0.75, "learning_rate": 0.00019136192832670194, "loss": 1.0029, "step": 8017 }, { "epoch": 0.2058797569011476, "grad_norm": 0.76171875, "learning_rate": 0.00019136011322166887, "loss": 1.0905, "step": 8018 }, { "epoch": 0.20590543409706943, "grad_norm": 0.78125, "learning_rate": 0.00019135829793456334, "loss": 1.0147, "step": 8019 }, { "epoch": 0.20593111129299124, "grad_norm": 0.83984375, "learning_rate": 0.00019135648246538899, "loss": 0.9724, "step": 8020 }, { "epoch": 0.20595678848891308, "grad_norm": 0.78515625, "learning_rate": 0.00019135466681414939, "loss": 0.8668, "step": 8021 }, { "epoch": 0.2059824656848349, "grad_norm": 0.78125, "learning_rate": 0.00019135285098084823, "loss": 1.0945, "step": 8022 }, { "epoch": 0.2060081428807567, "grad_norm": 0.88671875, "learning_rate": 0.00019135103496548909, "loss": 1.0605, "step": 8023 }, { "epoch": 0.20603382007667853, "grad_norm": 0.8671875, "learning_rate": 0.00019134921876807557, "loss": 1.0285, "step": 8024 }, { "epoch": 0.20605949727260034, "grad_norm": 0.7890625, "learning_rate": 0.0001913474023886113, "loss": 1.0314, "step": 8025 }, { "epoch": 0.20608517446852218, "grad_norm": 0.8203125, "learning_rate": 0.0001913455858270999, "loss": 1.0328, "step": 8026 }, { "epoch": 0.20611085166444398, "grad_norm": 0.7578125, "learning_rate": 0.00019134376908354505, "loss": 0.9257, "step": 8027 }, { "epoch": 0.2061365288603658, "grad_norm": 0.765625, "learning_rate": 0.00019134195215795026, "loss": 1.1135, "step": 8028 }, { "epoch": 0.20616220605628763, "grad_norm": 0.8125, "learning_rate": 0.00019134013505031926, "loss": 1.1573, "step": 8029 }, { "epoch": 0.20618788325220944, "grad_norm": 0.75, "learning_rate": 0.0001913383177606556, "loss": 0.9861, "step": 8030 }, { "epoch": 0.20621356044813127, "grad_norm": 0.8515625, "learning_rate": 0.0001913365002889629, "loss": 1.0478, "step": 8031 }, { "epoch": 0.20623923764405308, "grad_norm": 0.84765625, "learning_rate": 0.00019133468263524483, "loss": 1.0244, "step": 8032 }, { "epoch": 0.2062649148399749, "grad_norm": 0.72265625, "learning_rate": 0.000191332864799505, "loss": 1.0029, "step": 8033 }, { "epoch": 0.20629059203589672, "grad_norm": 0.7734375, "learning_rate": 0.00019133104678174698, "loss": 1.3331, "step": 8034 }, { "epoch": 0.20631626923181853, "grad_norm": 0.84375, "learning_rate": 0.00019132922858197446, "loss": 1.1568, "step": 8035 }, { "epoch": 0.20634194642774037, "grad_norm": 0.8203125, "learning_rate": 0.00019132741020019104, "loss": 0.9361, "step": 8036 }, { "epoch": 0.20636762362366218, "grad_norm": 0.76171875, "learning_rate": 0.00019132559163640033, "loss": 0.8697, "step": 8037 }, { "epoch": 0.20639330081958399, "grad_norm": 0.79296875, "learning_rate": 0.00019132377289060598, "loss": 1.0241, "step": 8038 }, { "epoch": 0.20641897801550582, "grad_norm": 0.8046875, "learning_rate": 0.0001913219539628116, "loss": 1.0121, "step": 8039 }, { "epoch": 0.20644465521142763, "grad_norm": 0.75, "learning_rate": 0.0001913201348530208, "loss": 0.8954, "step": 8040 }, { "epoch": 0.20647033240734947, "grad_norm": 0.79296875, "learning_rate": 0.00019131831556123722, "loss": 1.0843, "step": 8041 }, { "epoch": 0.20649600960327127, "grad_norm": 0.72265625, "learning_rate": 0.00019131649608746448, "loss": 1.1213, "step": 8042 }, { "epoch": 0.20652168679919308, "grad_norm": 0.74609375, "learning_rate": 0.00019131467643170624, "loss": 0.9842, "step": 8043 }, { "epoch": 0.20654736399511492, "grad_norm": 0.82421875, "learning_rate": 0.0001913128565939661, "loss": 1.0779, "step": 8044 }, { "epoch": 0.20657304119103673, "grad_norm": 0.82421875, "learning_rate": 0.00019131103657424764, "loss": 1.0418, "step": 8045 }, { "epoch": 0.20659871838695856, "grad_norm": 0.7734375, "learning_rate": 0.00019130921637255458, "loss": 0.9214, "step": 8046 }, { "epoch": 0.20662439558288037, "grad_norm": 0.859375, "learning_rate": 0.00019130739598889046, "loss": 1.1524, "step": 8047 }, { "epoch": 0.20665007277880218, "grad_norm": 0.8359375, "learning_rate": 0.00019130557542325897, "loss": 1.0605, "step": 8048 }, { "epoch": 0.20667574997472402, "grad_norm": 0.83984375, "learning_rate": 0.0001913037546756637, "loss": 1.0532, "step": 8049 }, { "epoch": 0.20670142717064582, "grad_norm": 0.828125, "learning_rate": 0.00019130193374610835, "loss": 1.0683, "step": 8050 }, { "epoch": 0.20672710436656766, "grad_norm": 0.80078125, "learning_rate": 0.00019130011263459643, "loss": 0.9846, "step": 8051 }, { "epoch": 0.20675278156248947, "grad_norm": 0.83203125, "learning_rate": 0.00019129829134113165, "loss": 1.097, "step": 8052 }, { "epoch": 0.20677845875841128, "grad_norm": 0.8359375, "learning_rate": 0.00019129646986571763, "loss": 1.1127, "step": 8053 }, { "epoch": 0.2068041359543331, "grad_norm": 0.76171875, "learning_rate": 0.00019129464820835798, "loss": 0.8658, "step": 8054 }, { "epoch": 0.20682981315025492, "grad_norm": 0.8359375, "learning_rate": 0.00019129282636905638, "loss": 1.0536, "step": 8055 }, { "epoch": 0.20685549034617676, "grad_norm": 0.7890625, "learning_rate": 0.00019129100434781636, "loss": 1.0836, "step": 8056 }, { "epoch": 0.20688116754209857, "grad_norm": 0.8203125, "learning_rate": 0.00019128918214464163, "loss": 0.9535, "step": 8057 }, { "epoch": 0.20690684473802037, "grad_norm": 0.73828125, "learning_rate": 0.00019128735975953586, "loss": 1.0701, "step": 8058 }, { "epoch": 0.2069325219339422, "grad_norm": 0.83203125, "learning_rate": 0.0001912855371925026, "loss": 1.1281, "step": 8059 }, { "epoch": 0.20695819912986402, "grad_norm": 0.75390625, "learning_rate": 0.0001912837144435455, "loss": 0.9238, "step": 8060 }, { "epoch": 0.20698387632578585, "grad_norm": 0.74609375, "learning_rate": 0.0001912818915126682, "loss": 0.8374, "step": 8061 }, { "epoch": 0.20700955352170766, "grad_norm": 0.8359375, "learning_rate": 0.00019128006839987433, "loss": 0.9854, "step": 8062 }, { "epoch": 0.20703523071762947, "grad_norm": 0.76953125, "learning_rate": 0.00019127824510516755, "loss": 1.007, "step": 8063 }, { "epoch": 0.2070609079135513, "grad_norm": 0.75, "learning_rate": 0.00019127642162855145, "loss": 0.9887, "step": 8064 }, { "epoch": 0.20708658510947311, "grad_norm": 0.87890625, "learning_rate": 0.0001912745979700297, "loss": 1.047, "step": 8065 }, { "epoch": 0.20711226230539495, "grad_norm": 0.8046875, "learning_rate": 0.0001912727741296059, "loss": 1.0986, "step": 8066 }, { "epoch": 0.20713793950131676, "grad_norm": 0.78515625, "learning_rate": 0.00019127095010728376, "loss": 0.8369, "step": 8067 }, { "epoch": 0.20716361669723857, "grad_norm": 0.71875, "learning_rate": 0.00019126912590306682, "loss": 1.0421, "step": 8068 }, { "epoch": 0.2071892938931604, "grad_norm": 0.8046875, "learning_rate": 0.00019126730151695876, "loss": 1.1018, "step": 8069 }, { "epoch": 0.2072149710890822, "grad_norm": 0.83984375, "learning_rate": 0.0001912654769489632, "loss": 0.9782, "step": 8070 }, { "epoch": 0.20724064828500405, "grad_norm": 0.8125, "learning_rate": 0.00019126365219908383, "loss": 0.9386, "step": 8071 }, { "epoch": 0.20726632548092586, "grad_norm": 0.83203125, "learning_rate": 0.0001912618272673242, "loss": 1.0366, "step": 8072 }, { "epoch": 0.20729200267684766, "grad_norm": 0.859375, "learning_rate": 0.00019126000215368802, "loss": 0.9391, "step": 8073 }, { "epoch": 0.2073176798727695, "grad_norm": 0.8203125, "learning_rate": 0.00019125817685817888, "loss": 0.9942, "step": 8074 }, { "epoch": 0.2073433570686913, "grad_norm": 1.03125, "learning_rate": 0.00019125635138080046, "loss": 1.1366, "step": 8075 }, { "epoch": 0.20736903426461314, "grad_norm": 0.82421875, "learning_rate": 0.00019125452572155637, "loss": 1.0851, "step": 8076 }, { "epoch": 0.20739471146053495, "grad_norm": 0.84375, "learning_rate": 0.00019125269988045024, "loss": 0.946, "step": 8077 }, { "epoch": 0.20742038865645676, "grad_norm": 0.85546875, "learning_rate": 0.00019125087385748574, "loss": 1.0769, "step": 8078 }, { "epoch": 0.2074460658523786, "grad_norm": 0.85546875, "learning_rate": 0.00019124904765266648, "loss": 1.0885, "step": 8079 }, { "epoch": 0.2074717430483004, "grad_norm": 0.75390625, "learning_rate": 0.0001912472212659961, "loss": 1.063, "step": 8080 }, { "epoch": 0.20749742024422224, "grad_norm": 0.8828125, "learning_rate": 0.00019124539469747825, "loss": 1.1091, "step": 8081 }, { "epoch": 0.20752309744014405, "grad_norm": 0.89453125, "learning_rate": 0.0001912435679471166, "loss": 1.0789, "step": 8082 }, { "epoch": 0.20754877463606586, "grad_norm": 0.796875, "learning_rate": 0.00019124174101491472, "loss": 0.9917, "step": 8083 }, { "epoch": 0.2075744518319877, "grad_norm": 0.77734375, "learning_rate": 0.0001912399139008763, "loss": 0.9004, "step": 8084 }, { "epoch": 0.2076001290279095, "grad_norm": 0.734375, "learning_rate": 0.00019123808660500503, "loss": 0.9719, "step": 8085 }, { "epoch": 0.20762580622383134, "grad_norm": 0.86328125, "learning_rate": 0.00019123625912730442, "loss": 1.1041, "step": 8086 }, { "epoch": 0.20765148341975315, "grad_norm": 0.8515625, "learning_rate": 0.00019123443146777823, "loss": 1.1403, "step": 8087 }, { "epoch": 0.20767716061567496, "grad_norm": 0.7734375, "learning_rate": 0.00019123260362643004, "loss": 1.0173, "step": 8088 }, { "epoch": 0.2077028378115968, "grad_norm": 0.75, "learning_rate": 0.0001912307756032635, "loss": 1.0861, "step": 8089 }, { "epoch": 0.2077285150075186, "grad_norm": 0.8203125, "learning_rate": 0.0001912289473982823, "loss": 1.0214, "step": 8090 }, { "epoch": 0.20775419220344044, "grad_norm": 0.86328125, "learning_rate": 0.00019122711901149004, "loss": 1.1676, "step": 8091 }, { "epoch": 0.20777986939936224, "grad_norm": 0.84375, "learning_rate": 0.00019122529044289036, "loss": 1.0264, "step": 8092 }, { "epoch": 0.20780554659528405, "grad_norm": 0.91015625, "learning_rate": 0.00019122346169248692, "loss": 1.0472, "step": 8093 }, { "epoch": 0.2078312237912059, "grad_norm": 0.89453125, "learning_rate": 0.00019122163276028337, "loss": 1.0199, "step": 8094 }, { "epoch": 0.2078569009871277, "grad_norm": 0.8515625, "learning_rate": 0.00019121980364628332, "loss": 1.0681, "step": 8095 }, { "epoch": 0.20788257818304953, "grad_norm": 0.80078125, "learning_rate": 0.00019121797435049046, "loss": 0.9338, "step": 8096 }, { "epoch": 0.20790825537897134, "grad_norm": 0.73046875, "learning_rate": 0.00019121614487290845, "loss": 1.0921, "step": 8097 }, { "epoch": 0.20793393257489315, "grad_norm": 0.875, "learning_rate": 0.00019121431521354086, "loss": 1.0198, "step": 8098 }, { "epoch": 0.20795960977081498, "grad_norm": 0.84765625, "learning_rate": 0.00019121248537239138, "loss": 1.1139, "step": 8099 }, { "epoch": 0.2079852869667368, "grad_norm": 0.76171875, "learning_rate": 0.00019121065534946364, "loss": 1.0422, "step": 8100 }, { "epoch": 0.20801096416265863, "grad_norm": 0.76953125, "learning_rate": 0.00019120882514476136, "loss": 0.9961, "step": 8101 }, { "epoch": 0.20803664135858044, "grad_norm": 0.83984375, "learning_rate": 0.00019120699475828808, "loss": 1.0864, "step": 8102 }, { "epoch": 0.20806231855450225, "grad_norm": 0.80859375, "learning_rate": 0.0001912051641900475, "loss": 1.1226, "step": 8103 }, { "epoch": 0.20808799575042408, "grad_norm": 0.8515625, "learning_rate": 0.0001912033334400433, "loss": 1.0398, "step": 8104 }, { "epoch": 0.2081136729463459, "grad_norm": 0.80859375, "learning_rate": 0.00019120150250827908, "loss": 0.9449, "step": 8105 }, { "epoch": 0.20813935014226773, "grad_norm": 0.87109375, "learning_rate": 0.0001911996713947585, "loss": 0.9563, "step": 8106 }, { "epoch": 0.20816502733818953, "grad_norm": 0.82421875, "learning_rate": 0.0001911978400994852, "loss": 1.066, "step": 8107 }, { "epoch": 0.20819070453411134, "grad_norm": 0.84375, "learning_rate": 0.00019119600862246285, "loss": 1.1004, "step": 8108 }, { "epoch": 0.20821638173003318, "grad_norm": 0.86328125, "learning_rate": 0.00019119417696369513, "loss": 1.0249, "step": 8109 }, { "epoch": 0.208242058925955, "grad_norm": 0.796875, "learning_rate": 0.0001911923451231856, "loss": 1.1011, "step": 8110 }, { "epoch": 0.20826773612187682, "grad_norm": 0.796875, "learning_rate": 0.00019119051310093798, "loss": 1.0533, "step": 8111 }, { "epoch": 0.20829341331779863, "grad_norm": 0.78125, "learning_rate": 0.0001911886808969559, "loss": 1.0502, "step": 8112 }, { "epoch": 0.20831909051372044, "grad_norm": 0.81640625, "learning_rate": 0.00019118684851124305, "loss": 1.1615, "step": 8113 }, { "epoch": 0.20834476770964228, "grad_norm": 0.75, "learning_rate": 0.00019118501594380302, "loss": 0.9136, "step": 8114 }, { "epoch": 0.20837044490556408, "grad_norm": 0.80859375, "learning_rate": 0.0001911831831946395, "loss": 1.0915, "step": 8115 }, { "epoch": 0.20839612210148592, "grad_norm": 0.9375, "learning_rate": 0.0001911813502637561, "loss": 1.0078, "step": 8116 }, { "epoch": 0.20842179929740773, "grad_norm": 0.80078125, "learning_rate": 0.00019117951715115657, "loss": 1.1058, "step": 8117 }, { "epoch": 0.20844747649332954, "grad_norm": 1.03125, "learning_rate": 0.00019117768385684442, "loss": 1.0897, "step": 8118 }, { "epoch": 0.20847315368925137, "grad_norm": 0.80859375, "learning_rate": 0.00019117585038082343, "loss": 0.9739, "step": 8119 }, { "epoch": 0.20849883088517318, "grad_norm": 0.86328125, "learning_rate": 0.0001911740167230972, "loss": 1.0145, "step": 8120 }, { "epoch": 0.20852450808109502, "grad_norm": 0.84375, "learning_rate": 0.0001911721828836694, "loss": 1.0702, "step": 8121 }, { "epoch": 0.20855018527701683, "grad_norm": 0.7578125, "learning_rate": 0.00019117034886254364, "loss": 0.9201, "step": 8122 }, { "epoch": 0.20857586247293863, "grad_norm": 0.8359375, "learning_rate": 0.00019116851465972366, "loss": 1.0679, "step": 8123 }, { "epoch": 0.20860153966886047, "grad_norm": 0.9375, "learning_rate": 0.00019116668027521302, "loss": 0.9671, "step": 8124 }, { "epoch": 0.20862721686478228, "grad_norm": 0.78125, "learning_rate": 0.00019116484570901545, "loss": 0.9311, "step": 8125 }, { "epoch": 0.2086528940607041, "grad_norm": 0.84375, "learning_rate": 0.00019116301096113456, "loss": 1.0572, "step": 8126 }, { "epoch": 0.20867857125662592, "grad_norm": 0.8046875, "learning_rate": 0.00019116117603157405, "loss": 1.0568, "step": 8127 }, { "epoch": 0.20870424845254773, "grad_norm": 0.81640625, "learning_rate": 0.00019115934092033752, "loss": 0.9447, "step": 8128 }, { "epoch": 0.20872992564846957, "grad_norm": 1.0, "learning_rate": 0.00019115750562742867, "loss": 1.0254, "step": 8129 }, { "epoch": 0.20875560284439137, "grad_norm": 0.8515625, "learning_rate": 0.00019115567015285114, "loss": 1.1939, "step": 8130 }, { "epoch": 0.2087812800403132, "grad_norm": 0.78515625, "learning_rate": 0.0001911538344966086, "loss": 1.0571, "step": 8131 }, { "epoch": 0.20880695723623502, "grad_norm": 0.765625, "learning_rate": 0.0001911519986587047, "loss": 0.9996, "step": 8132 }, { "epoch": 0.20883263443215683, "grad_norm": 0.921875, "learning_rate": 0.0001911501626391431, "loss": 1.0065, "step": 8133 }, { "epoch": 0.20885831162807866, "grad_norm": 0.859375, "learning_rate": 0.00019114832643792745, "loss": 1.1657, "step": 8134 }, { "epoch": 0.20888398882400047, "grad_norm": 0.82421875, "learning_rate": 0.00019114649005506142, "loss": 1.0293, "step": 8135 }, { "epoch": 0.2089096660199223, "grad_norm": 0.85546875, "learning_rate": 0.00019114465349054867, "loss": 1.2605, "step": 8136 }, { "epoch": 0.20893534321584412, "grad_norm": 0.8203125, "learning_rate": 0.0001911428167443929, "loss": 1.0798, "step": 8137 }, { "epoch": 0.20896102041176592, "grad_norm": 0.81640625, "learning_rate": 0.0001911409798165977, "loss": 1.0367, "step": 8138 }, { "epoch": 0.20898669760768776, "grad_norm": 0.921875, "learning_rate": 0.00019113914270716675, "loss": 0.9337, "step": 8139 }, { "epoch": 0.20901237480360957, "grad_norm": 0.7421875, "learning_rate": 0.00019113730541610373, "loss": 1.1258, "step": 8140 }, { "epoch": 0.2090380519995314, "grad_norm": 0.8515625, "learning_rate": 0.00019113546794341228, "loss": 1.0588, "step": 8141 }, { "epoch": 0.2090637291954532, "grad_norm": 0.796875, "learning_rate": 0.00019113363028909608, "loss": 1.1393, "step": 8142 }, { "epoch": 0.20908940639137502, "grad_norm": 0.890625, "learning_rate": 0.0001911317924531588, "loss": 1.0997, "step": 8143 }, { "epoch": 0.20911508358729686, "grad_norm": 0.8984375, "learning_rate": 0.0001911299544356041, "loss": 1.2124, "step": 8144 }, { "epoch": 0.20914076078321867, "grad_norm": 0.83984375, "learning_rate": 0.0001911281162364356, "loss": 1.0024, "step": 8145 }, { "epoch": 0.2091664379791405, "grad_norm": 0.75390625, "learning_rate": 0.000191126277855657, "loss": 1.0379, "step": 8146 }, { "epoch": 0.2091921151750623, "grad_norm": 0.796875, "learning_rate": 0.00019112443929327196, "loss": 1.0996, "step": 8147 }, { "epoch": 0.20921779237098412, "grad_norm": 0.87890625, "learning_rate": 0.00019112260054928413, "loss": 1.0635, "step": 8148 }, { "epoch": 0.20924346956690595, "grad_norm": 0.89453125, "learning_rate": 0.00019112076162369724, "loss": 0.8901, "step": 8149 }, { "epoch": 0.20926914676282776, "grad_norm": 0.76953125, "learning_rate": 0.00019111892251651486, "loss": 0.9742, "step": 8150 }, { "epoch": 0.2092948239587496, "grad_norm": 0.8359375, "learning_rate": 0.00019111708322774073, "loss": 1.0481, "step": 8151 }, { "epoch": 0.2093205011546714, "grad_norm": 0.765625, "learning_rate": 0.00019111524375737843, "loss": 0.9652, "step": 8152 }, { "epoch": 0.20934617835059322, "grad_norm": 0.8671875, "learning_rate": 0.00019111340410543176, "loss": 1.1146, "step": 8153 }, { "epoch": 0.20937185554651505, "grad_norm": 0.78515625, "learning_rate": 0.00019111156427190423, "loss": 0.9694, "step": 8154 }, { "epoch": 0.20939753274243686, "grad_norm": 0.87890625, "learning_rate": 0.00019110972425679962, "loss": 1.0426, "step": 8155 }, { "epoch": 0.2094232099383587, "grad_norm": 0.78125, "learning_rate": 0.00019110788406012156, "loss": 1.0383, "step": 8156 }, { "epoch": 0.2094488871342805, "grad_norm": 0.80078125, "learning_rate": 0.0001911060436818737, "loss": 1.0423, "step": 8157 }, { "epoch": 0.2094745643302023, "grad_norm": 0.83203125, "learning_rate": 0.00019110420312205975, "loss": 1.0382, "step": 8158 }, { "epoch": 0.20950024152612415, "grad_norm": 0.81640625, "learning_rate": 0.0001911023623806833, "loss": 1.0015, "step": 8159 }, { "epoch": 0.20952591872204596, "grad_norm": 0.7734375, "learning_rate": 0.00019110052145774814, "loss": 1.0658, "step": 8160 }, { "epoch": 0.20955159591796776, "grad_norm": 0.7734375, "learning_rate": 0.00019109868035325781, "loss": 1.0888, "step": 8161 }, { "epoch": 0.2095772731138896, "grad_norm": 0.9921875, "learning_rate": 0.0001910968390672161, "loss": 1.2058, "step": 8162 }, { "epoch": 0.2096029503098114, "grad_norm": 0.80859375, "learning_rate": 0.00019109499759962657, "loss": 1.0726, "step": 8163 }, { "epoch": 0.20962862750573324, "grad_norm": 0.8515625, "learning_rate": 0.00019109315595049295, "loss": 1.0799, "step": 8164 }, { "epoch": 0.20965430470165505, "grad_norm": 0.84375, "learning_rate": 0.00019109131411981892, "loss": 1.1652, "step": 8165 }, { "epoch": 0.20967998189757686, "grad_norm": 0.78125, "learning_rate": 0.0001910894721076081, "loss": 0.9864, "step": 8166 }, { "epoch": 0.2097056590934987, "grad_norm": 0.78125, "learning_rate": 0.0001910876299138642, "loss": 1.0531, "step": 8167 }, { "epoch": 0.2097313362894205, "grad_norm": 0.8359375, "learning_rate": 0.00019108578753859086, "loss": 1.1386, "step": 8168 }, { "epoch": 0.20975701348534234, "grad_norm": 0.78125, "learning_rate": 0.00019108394498179183, "loss": 0.9706, "step": 8169 }, { "epoch": 0.20978269068126415, "grad_norm": 0.8125, "learning_rate": 0.00019108210224347069, "loss": 1.163, "step": 8170 }, { "epoch": 0.20980836787718596, "grad_norm": 0.7421875, "learning_rate": 0.0001910802593236311, "loss": 0.99, "step": 8171 }, { "epoch": 0.2098340450731078, "grad_norm": 0.79296875, "learning_rate": 0.00019107841622227688, "loss": 0.9514, "step": 8172 }, { "epoch": 0.2098597222690296, "grad_norm": 0.8046875, "learning_rate": 0.00019107657293941156, "loss": 0.8824, "step": 8173 }, { "epoch": 0.20988539946495144, "grad_norm": 0.8125, "learning_rate": 0.0001910747294750388, "loss": 1.0957, "step": 8174 }, { "epoch": 0.20991107666087325, "grad_norm": 0.73828125, "learning_rate": 0.00019107288582916239, "loss": 0.9548, "step": 8175 }, { "epoch": 0.20993675385679506, "grad_norm": 0.80078125, "learning_rate": 0.00019107104200178596, "loss": 1.0483, "step": 8176 }, { "epoch": 0.2099624310527169, "grad_norm": 1.09375, "learning_rate": 0.00019106919799291312, "loss": 1.047, "step": 8177 }, { "epoch": 0.2099881082486387, "grad_norm": 0.8203125, "learning_rate": 0.00019106735380254762, "loss": 1.113, "step": 8178 }, { "epoch": 0.21001378544456054, "grad_norm": 0.9453125, "learning_rate": 0.00019106550943069313, "loss": 1.0991, "step": 8179 }, { "epoch": 0.21003946264048234, "grad_norm": 0.80078125, "learning_rate": 0.00019106366487735328, "loss": 0.955, "step": 8180 }, { "epoch": 0.21006513983640415, "grad_norm": 0.77734375, "learning_rate": 0.0001910618201425318, "loss": 1.0214, "step": 8181 }, { "epoch": 0.210090817032326, "grad_norm": 0.828125, "learning_rate": 0.0001910599752262323, "loss": 1.0913, "step": 8182 }, { "epoch": 0.2101164942282478, "grad_norm": 0.9140625, "learning_rate": 0.0001910581301284585, "loss": 1.1212, "step": 8183 }, { "epoch": 0.21014217142416963, "grad_norm": 0.75390625, "learning_rate": 0.00019105628484921406, "loss": 0.99, "step": 8184 }, { "epoch": 0.21016784862009144, "grad_norm": 0.73046875, "learning_rate": 0.0001910544393885027, "loss": 0.7977, "step": 8185 }, { "epoch": 0.21019352581601325, "grad_norm": 0.77734375, "learning_rate": 0.00019105259374632808, "loss": 1.0291, "step": 8186 }, { "epoch": 0.21021920301193509, "grad_norm": 0.796875, "learning_rate": 0.0001910507479226938, "loss": 1.077, "step": 8187 }, { "epoch": 0.2102448802078569, "grad_norm": 0.76171875, "learning_rate": 0.00019104890191760368, "loss": 1.001, "step": 8188 }, { "epoch": 0.21027055740377873, "grad_norm": 0.796875, "learning_rate": 0.0001910470557310613, "loss": 0.9952, "step": 8189 }, { "epoch": 0.21029623459970054, "grad_norm": 0.875, "learning_rate": 0.00019104520936307035, "loss": 1.0625, "step": 8190 }, { "epoch": 0.21032191179562235, "grad_norm": 0.84765625, "learning_rate": 0.0001910433628136345, "loss": 1.1011, "step": 8191 }, { "epoch": 0.21034758899154418, "grad_norm": 0.80078125, "learning_rate": 0.00019104151608275748, "loss": 0.9899, "step": 8192 }, { "epoch": 0.210373266187466, "grad_norm": 0.8125, "learning_rate": 0.00019103966917044296, "loss": 1.0156, "step": 8193 }, { "epoch": 0.21039894338338783, "grad_norm": 0.8359375, "learning_rate": 0.00019103782207669458, "loss": 1.1437, "step": 8194 }, { "epoch": 0.21042462057930963, "grad_norm": 0.8046875, "learning_rate": 0.00019103597480151603, "loss": 1.0661, "step": 8195 }, { "epoch": 0.21045029777523144, "grad_norm": 0.73046875, "learning_rate": 0.00019103412734491104, "loss": 1.0757, "step": 8196 }, { "epoch": 0.21047597497115328, "grad_norm": 0.8671875, "learning_rate": 0.00019103227970688322, "loss": 0.9137, "step": 8197 }, { "epoch": 0.2105016521670751, "grad_norm": 0.77734375, "learning_rate": 0.0001910304318874363, "loss": 1.207, "step": 8198 }, { "epoch": 0.21052732936299692, "grad_norm": 0.80859375, "learning_rate": 0.00019102858388657395, "loss": 0.9845, "step": 8199 }, { "epoch": 0.21055300655891873, "grad_norm": 0.79296875, "learning_rate": 0.0001910267357042999, "loss": 1.1136, "step": 8200 }, { "epoch": 0.21057868375484054, "grad_norm": 0.86328125, "learning_rate": 0.00019102488734061774, "loss": 1.195, "step": 8201 }, { "epoch": 0.21060436095076238, "grad_norm": 0.953125, "learning_rate": 0.00019102303879553123, "loss": 0.8757, "step": 8202 }, { "epoch": 0.21063003814668418, "grad_norm": 0.8984375, "learning_rate": 0.00019102119006904399, "loss": 1.0022, "step": 8203 }, { "epoch": 0.21065571534260602, "grad_norm": 0.80859375, "learning_rate": 0.00019101934116115977, "loss": 0.9648, "step": 8204 }, { "epoch": 0.21068139253852783, "grad_norm": 0.8046875, "learning_rate": 0.00019101749207188221, "loss": 0.9186, "step": 8205 }, { "epoch": 0.21070706973444964, "grad_norm": 0.78125, "learning_rate": 0.00019101564280121503, "loss": 1.0405, "step": 8206 }, { "epoch": 0.21073274693037147, "grad_norm": 0.8046875, "learning_rate": 0.0001910137933491619, "loss": 1.0491, "step": 8207 }, { "epoch": 0.21075842412629328, "grad_norm": 0.83203125, "learning_rate": 0.00019101194371572646, "loss": 0.9023, "step": 8208 }, { "epoch": 0.21078410132221512, "grad_norm": 0.7421875, "learning_rate": 0.00019101009390091244, "loss": 0.9838, "step": 8209 }, { "epoch": 0.21080977851813693, "grad_norm": 0.78125, "learning_rate": 0.00019100824390472356, "loss": 1.0019, "step": 8210 }, { "epoch": 0.21083545571405873, "grad_norm": 0.70703125, "learning_rate": 0.00019100639372716344, "loss": 0.8686, "step": 8211 }, { "epoch": 0.21086113290998057, "grad_norm": 0.828125, "learning_rate": 0.00019100454336823584, "loss": 1.0825, "step": 8212 }, { "epoch": 0.21088681010590238, "grad_norm": 0.8515625, "learning_rate": 0.00019100269282794433, "loss": 0.9149, "step": 8213 }, { "epoch": 0.21091248730182421, "grad_norm": 0.8203125, "learning_rate": 0.00019100084210629275, "loss": 1.0566, "step": 8214 }, { "epoch": 0.21093816449774602, "grad_norm": 0.84375, "learning_rate": 0.00019099899120328466, "loss": 1.0499, "step": 8215 }, { "epoch": 0.21096384169366783, "grad_norm": 0.80078125, "learning_rate": 0.00019099714011892386, "loss": 0.9924, "step": 8216 }, { "epoch": 0.21098951888958967, "grad_norm": 0.74609375, "learning_rate": 0.00019099528885321393, "loss": 1.0475, "step": 8217 }, { "epoch": 0.21101519608551147, "grad_norm": 0.796875, "learning_rate": 0.0001909934374061586, "loss": 1.0938, "step": 8218 }, { "epoch": 0.2110408732814333, "grad_norm": 0.83203125, "learning_rate": 0.00019099158577776158, "loss": 1.1154, "step": 8219 }, { "epoch": 0.21106655047735512, "grad_norm": 0.76171875, "learning_rate": 0.00019098973396802657, "loss": 0.8962, "step": 8220 }, { "epoch": 0.21109222767327693, "grad_norm": 0.78515625, "learning_rate": 0.00019098788197695723, "loss": 1.0575, "step": 8221 }, { "epoch": 0.21111790486919876, "grad_norm": 0.8359375, "learning_rate": 0.00019098602980455724, "loss": 1.1799, "step": 8222 }, { "epoch": 0.21114358206512057, "grad_norm": 0.74609375, "learning_rate": 0.00019098417745083034, "loss": 1.0576, "step": 8223 }, { "epoch": 0.2111692592610424, "grad_norm": 0.80859375, "learning_rate": 0.00019098232491578018, "loss": 1.188, "step": 8224 }, { "epoch": 0.21119493645696422, "grad_norm": 0.71484375, "learning_rate": 0.00019098047219941048, "loss": 1.0117, "step": 8225 }, { "epoch": 0.21122061365288602, "grad_norm": 0.79296875, "learning_rate": 0.00019097861930172488, "loss": 1.0158, "step": 8226 }, { "epoch": 0.21124629084880786, "grad_norm": 0.8125, "learning_rate": 0.00019097676622272715, "loss": 1.0968, "step": 8227 }, { "epoch": 0.21127196804472967, "grad_norm": 0.7734375, "learning_rate": 0.00019097491296242095, "loss": 0.902, "step": 8228 }, { "epoch": 0.2112976452406515, "grad_norm": 0.86328125, "learning_rate": 0.0001909730595208099, "loss": 1.0233, "step": 8229 }, { "epoch": 0.2113233224365733, "grad_norm": 0.7578125, "learning_rate": 0.0001909712058978978, "loss": 1.0726, "step": 8230 }, { "epoch": 0.21134899963249512, "grad_norm": 0.7734375, "learning_rate": 0.00019096935209368833, "loss": 0.997, "step": 8231 }, { "epoch": 0.21137467682841696, "grad_norm": 0.7734375, "learning_rate": 0.00019096749810818513, "loss": 0.9907, "step": 8232 }, { "epoch": 0.21140035402433877, "grad_norm": 0.78125, "learning_rate": 0.00019096564394139193, "loss": 0.9467, "step": 8233 }, { "epoch": 0.2114260312202606, "grad_norm": 0.83203125, "learning_rate": 0.0001909637895933124, "loss": 1.078, "step": 8234 }, { "epoch": 0.2114517084161824, "grad_norm": 0.74609375, "learning_rate": 0.0001909619350639503, "loss": 0.9067, "step": 8235 }, { "epoch": 0.21147738561210422, "grad_norm": 0.7890625, "learning_rate": 0.00019096008035330924, "loss": 1.0115, "step": 8236 }, { "epoch": 0.21150306280802605, "grad_norm": 0.80859375, "learning_rate": 0.00019095822546139296, "loss": 1.0764, "step": 8237 }, { "epoch": 0.21152874000394786, "grad_norm": 0.875, "learning_rate": 0.00019095637038820518, "loss": 1.0517, "step": 8238 }, { "epoch": 0.2115544171998697, "grad_norm": 0.8046875, "learning_rate": 0.00019095451513374953, "loss": 1.027, "step": 8239 }, { "epoch": 0.2115800943957915, "grad_norm": 0.8515625, "learning_rate": 0.00019095265969802978, "loss": 0.9968, "step": 8240 }, { "epoch": 0.21160577159171332, "grad_norm": 0.84375, "learning_rate": 0.00019095080408104958, "loss": 1.0555, "step": 8241 }, { "epoch": 0.21163144878763515, "grad_norm": 0.8359375, "learning_rate": 0.00019094894828281263, "loss": 1.0032, "step": 8242 }, { "epoch": 0.21165712598355696, "grad_norm": 0.84375, "learning_rate": 0.00019094709230332266, "loss": 1.1045, "step": 8243 }, { "epoch": 0.2116828031794788, "grad_norm": 0.7734375, "learning_rate": 0.00019094523614258336, "loss": 1.0081, "step": 8244 }, { "epoch": 0.2117084803754006, "grad_norm": 0.73046875, "learning_rate": 0.0001909433798005984, "loss": 1.12, "step": 8245 }, { "epoch": 0.2117341575713224, "grad_norm": 0.7421875, "learning_rate": 0.00019094152327737152, "loss": 0.8963, "step": 8246 }, { "epoch": 0.21175983476724425, "grad_norm": 0.84765625, "learning_rate": 0.00019093966657290635, "loss": 1.1481, "step": 8247 }, { "epoch": 0.21178551196316606, "grad_norm": 0.86328125, "learning_rate": 0.0001909378096872067, "loss": 1.0306, "step": 8248 }, { "epoch": 0.2118111891590879, "grad_norm": 0.83984375, "learning_rate": 0.00019093595262027616, "loss": 1.1693, "step": 8249 }, { "epoch": 0.2118368663550097, "grad_norm": 0.78125, "learning_rate": 0.0001909340953721185, "loss": 1.1214, "step": 8250 }, { "epoch": 0.2118625435509315, "grad_norm": 0.8125, "learning_rate": 0.0001909322379427374, "loss": 0.9656, "step": 8251 }, { "epoch": 0.21188822074685335, "grad_norm": 0.80859375, "learning_rate": 0.0001909303803321366, "loss": 0.982, "step": 8252 }, { "epoch": 0.21191389794277515, "grad_norm": 0.78125, "learning_rate": 0.00019092852254031973, "loss": 1.1104, "step": 8253 }, { "epoch": 0.211939575138697, "grad_norm": 0.7734375, "learning_rate": 0.0001909266645672905, "loss": 1.037, "step": 8254 }, { "epoch": 0.2119652523346188, "grad_norm": 0.85546875, "learning_rate": 0.0001909248064130527, "loss": 1.085, "step": 8255 }, { "epoch": 0.2119909295305406, "grad_norm": 0.77734375, "learning_rate": 0.00019092294807760994, "loss": 0.9214, "step": 8256 }, { "epoch": 0.21201660672646244, "grad_norm": 0.828125, "learning_rate": 0.00019092108956096597, "loss": 1.1064, "step": 8257 }, { "epoch": 0.21204228392238425, "grad_norm": 0.76953125, "learning_rate": 0.00019091923086312447, "loss": 0.9876, "step": 8258 }, { "epoch": 0.2120679611183061, "grad_norm": 0.83203125, "learning_rate": 0.00019091737198408918, "loss": 1.0225, "step": 8259 }, { "epoch": 0.2120936383142279, "grad_norm": 0.84765625, "learning_rate": 0.00019091551292386373, "loss": 1.151, "step": 8260 }, { "epoch": 0.2121193155101497, "grad_norm": 0.79296875, "learning_rate": 0.00019091365368245194, "loss": 0.9231, "step": 8261 }, { "epoch": 0.21214499270607154, "grad_norm": 0.82421875, "learning_rate": 0.00019091179425985742, "loss": 1.123, "step": 8262 }, { "epoch": 0.21217066990199335, "grad_norm": 0.86328125, "learning_rate": 0.00019090993465608388, "loss": 0.9144, "step": 8263 }, { "epoch": 0.21219634709791518, "grad_norm": 0.80859375, "learning_rate": 0.00019090807487113506, "loss": 0.9565, "step": 8264 }, { "epoch": 0.212222024293837, "grad_norm": 0.76171875, "learning_rate": 0.0001909062149050147, "loss": 1.0345, "step": 8265 }, { "epoch": 0.2122477014897588, "grad_norm": 0.828125, "learning_rate": 0.00019090435475772644, "loss": 1.029, "step": 8266 }, { "epoch": 0.21227337868568064, "grad_norm": 0.7578125, "learning_rate": 0.000190902494429274, "loss": 1.1406, "step": 8267 }, { "epoch": 0.21229905588160244, "grad_norm": 0.859375, "learning_rate": 0.0001909006339196611, "loss": 1.0519, "step": 8268 }, { "epoch": 0.21232473307752428, "grad_norm": 0.828125, "learning_rate": 0.00019089877322889148, "loss": 1.039, "step": 8269 }, { "epoch": 0.2123504102734461, "grad_norm": 0.87109375, "learning_rate": 0.00019089691235696878, "loss": 1.1068, "step": 8270 }, { "epoch": 0.2123760874693679, "grad_norm": 0.8515625, "learning_rate": 0.00019089505130389677, "loss": 1.1281, "step": 8271 }, { "epoch": 0.21240176466528973, "grad_norm": 0.80078125, "learning_rate": 0.00019089319006967913, "loss": 1.0747, "step": 8272 }, { "epoch": 0.21242744186121154, "grad_norm": 0.78515625, "learning_rate": 0.00019089132865431955, "loss": 1.1592, "step": 8273 }, { "epoch": 0.21245311905713338, "grad_norm": 0.87890625, "learning_rate": 0.0001908894670578218, "loss": 0.9923, "step": 8274 }, { "epoch": 0.21247879625305519, "grad_norm": 0.78125, "learning_rate": 0.0001908876052801895, "loss": 0.9274, "step": 8275 }, { "epoch": 0.212504473448977, "grad_norm": 0.91796875, "learning_rate": 0.00019088574332142645, "loss": 1.1325, "step": 8276 }, { "epoch": 0.21253015064489883, "grad_norm": 0.84375, "learning_rate": 0.00019088388118153633, "loss": 0.9671, "step": 8277 }, { "epoch": 0.21255582784082064, "grad_norm": 0.85546875, "learning_rate": 0.00019088201886052284, "loss": 0.9946, "step": 8278 }, { "epoch": 0.21258150503674247, "grad_norm": 0.8515625, "learning_rate": 0.00019088015635838966, "loss": 1.1555, "step": 8279 }, { "epoch": 0.21260718223266428, "grad_norm": 0.8359375, "learning_rate": 0.00019087829367514059, "loss": 1.0693, "step": 8280 }, { "epoch": 0.2126328594285861, "grad_norm": 0.83203125, "learning_rate": 0.00019087643081077923, "loss": 1.1039, "step": 8281 }, { "epoch": 0.21265853662450793, "grad_norm": 0.765625, "learning_rate": 0.00019087456776530938, "loss": 0.8985, "step": 8282 }, { "epoch": 0.21268421382042973, "grad_norm": 0.82421875, "learning_rate": 0.00019087270453873475, "loss": 1.0391, "step": 8283 }, { "epoch": 0.21270989101635157, "grad_norm": 0.859375, "learning_rate": 0.000190870841131059, "loss": 0.989, "step": 8284 }, { "epoch": 0.21273556821227338, "grad_norm": 0.78515625, "learning_rate": 0.00019086897754228588, "loss": 1.0441, "step": 8285 }, { "epoch": 0.2127612454081952, "grad_norm": 0.83984375, "learning_rate": 0.0001908671137724191, "loss": 1.2159, "step": 8286 }, { "epoch": 0.21278692260411702, "grad_norm": 0.77734375, "learning_rate": 0.00019086524982146237, "loss": 1.1071, "step": 8287 }, { "epoch": 0.21281259980003883, "grad_norm": 0.87890625, "learning_rate": 0.00019086338568941939, "loss": 0.9795, "step": 8288 }, { "epoch": 0.21283827699596067, "grad_norm": 0.796875, "learning_rate": 0.0001908615213762939, "loss": 1.0153, "step": 8289 }, { "epoch": 0.21286395419188248, "grad_norm": 0.84765625, "learning_rate": 0.0001908596568820896, "loss": 1.0673, "step": 8290 }, { "epoch": 0.21288963138780428, "grad_norm": 0.80078125, "learning_rate": 0.00019085779220681025, "loss": 1.1081, "step": 8291 }, { "epoch": 0.21291530858372612, "grad_norm": 0.734375, "learning_rate": 0.00019085592735045947, "loss": 0.9946, "step": 8292 }, { "epoch": 0.21294098577964793, "grad_norm": 0.77734375, "learning_rate": 0.00019085406231304105, "loss": 1.1198, "step": 8293 }, { "epoch": 0.21296666297556976, "grad_norm": 0.87890625, "learning_rate": 0.0001908521970945587, "loss": 1.0479, "step": 8294 }, { "epoch": 0.21299234017149157, "grad_norm": 0.75, "learning_rate": 0.00019085033169501613, "loss": 1.0345, "step": 8295 }, { "epoch": 0.21301801736741338, "grad_norm": 0.76171875, "learning_rate": 0.00019084846611441706, "loss": 0.9117, "step": 8296 }, { "epoch": 0.21304369456333522, "grad_norm": 0.94140625, "learning_rate": 0.0001908466003527652, "loss": 1.0584, "step": 8297 }, { "epoch": 0.21306937175925703, "grad_norm": 0.78125, "learning_rate": 0.00019084473441006427, "loss": 0.949, "step": 8298 }, { "epoch": 0.21309504895517886, "grad_norm": 0.734375, "learning_rate": 0.000190842868286318, "loss": 0.974, "step": 8299 }, { "epoch": 0.21312072615110067, "grad_norm": 0.82421875, "learning_rate": 0.00019084100198153006, "loss": 1.0155, "step": 8300 }, { "epoch": 0.21314640334702248, "grad_norm": 0.8828125, "learning_rate": 0.00019083913549570425, "loss": 1.0305, "step": 8301 }, { "epoch": 0.21317208054294431, "grad_norm": 1.0078125, "learning_rate": 0.00019083726882884424, "loss": 1.0463, "step": 8302 }, { "epoch": 0.21319775773886612, "grad_norm": 0.86328125, "learning_rate": 0.00019083540198095374, "loss": 1.0815, "step": 8303 }, { "epoch": 0.21322343493478796, "grad_norm": 0.8125, "learning_rate": 0.0001908335349520365, "loss": 1.083, "step": 8304 }, { "epoch": 0.21324911213070977, "grad_norm": 0.875, "learning_rate": 0.00019083166774209624, "loss": 1.222, "step": 8305 }, { "epoch": 0.21327478932663158, "grad_norm": 0.84375, "learning_rate": 0.00019082980035113665, "loss": 1.0517, "step": 8306 }, { "epoch": 0.2133004665225534, "grad_norm": 1.0703125, "learning_rate": 0.00019082793277916152, "loss": 1.0597, "step": 8307 }, { "epoch": 0.21332614371847522, "grad_norm": 0.8359375, "learning_rate": 0.00019082606502617447, "loss": 0.9656, "step": 8308 }, { "epoch": 0.21335182091439706, "grad_norm": 0.7890625, "learning_rate": 0.0001908241970921793, "loss": 1.075, "step": 8309 }, { "epoch": 0.21337749811031886, "grad_norm": 0.76953125, "learning_rate": 0.0001908223289771797, "loss": 1.0532, "step": 8310 }, { "epoch": 0.21340317530624067, "grad_norm": 0.83984375, "learning_rate": 0.0001908204606811794, "loss": 1.0726, "step": 8311 }, { "epoch": 0.2134288525021625, "grad_norm": 0.7890625, "learning_rate": 0.00019081859220418214, "loss": 1.0128, "step": 8312 }, { "epoch": 0.21345452969808432, "grad_norm": 0.8046875, "learning_rate": 0.0001908167235461916, "loss": 1.0255, "step": 8313 }, { "epoch": 0.21348020689400615, "grad_norm": 0.8046875, "learning_rate": 0.0001908148547072116, "loss": 0.9351, "step": 8314 }, { "epoch": 0.21350588408992796, "grad_norm": 0.83203125, "learning_rate": 0.00019081298568724572, "loss": 1.0285, "step": 8315 }, { "epoch": 0.21353156128584977, "grad_norm": 0.8046875, "learning_rate": 0.0001908111164862978, "loss": 0.992, "step": 8316 }, { "epoch": 0.2135572384817716, "grad_norm": 0.79296875, "learning_rate": 0.00019080924710437153, "loss": 1.0216, "step": 8317 }, { "epoch": 0.2135829156776934, "grad_norm": 0.84765625, "learning_rate": 0.0001908073775414706, "loss": 1.0959, "step": 8318 }, { "epoch": 0.21360859287361525, "grad_norm": 0.796875, "learning_rate": 0.00019080550779759884, "loss": 1.0172, "step": 8319 }, { "epoch": 0.21363427006953706, "grad_norm": 0.84765625, "learning_rate": 0.00019080363787275984, "loss": 1.1929, "step": 8320 }, { "epoch": 0.21365994726545887, "grad_norm": 0.81640625, "learning_rate": 0.00019080176776695745, "loss": 1.0657, "step": 8321 }, { "epoch": 0.2136856244613807, "grad_norm": 0.796875, "learning_rate": 0.00019079989748019526, "loss": 1.2408, "step": 8322 }, { "epoch": 0.2137113016573025, "grad_norm": 0.87890625, "learning_rate": 0.00019079802701247713, "loss": 1.1078, "step": 8323 }, { "epoch": 0.21373697885322435, "grad_norm": 0.79296875, "learning_rate": 0.00019079615636380672, "loss": 0.9788, "step": 8324 }, { "epoch": 0.21376265604914615, "grad_norm": 0.87890625, "learning_rate": 0.00019079428553418778, "loss": 1.2283, "step": 8325 }, { "epoch": 0.21378833324506796, "grad_norm": 0.83203125, "learning_rate": 0.00019079241452362404, "loss": 1.035, "step": 8326 }, { "epoch": 0.2138140104409898, "grad_norm": 0.75390625, "learning_rate": 0.00019079054333211919, "loss": 1.0985, "step": 8327 }, { "epoch": 0.2138396876369116, "grad_norm": 0.8046875, "learning_rate": 0.00019078867195967703, "loss": 1.0075, "step": 8328 }, { "epoch": 0.21386536483283344, "grad_norm": 0.7890625, "learning_rate": 0.0001907868004063012, "loss": 1.1101, "step": 8329 }, { "epoch": 0.21389104202875525, "grad_norm": 0.76171875, "learning_rate": 0.0001907849286719955, "loss": 0.9965, "step": 8330 }, { "epoch": 0.21391671922467706, "grad_norm": 0.77734375, "learning_rate": 0.00019078305675676363, "loss": 1.0692, "step": 8331 }, { "epoch": 0.2139423964205989, "grad_norm": 0.7734375, "learning_rate": 0.00019078118466060934, "loss": 1.0822, "step": 8332 }, { "epoch": 0.2139680736165207, "grad_norm": 0.734375, "learning_rate": 0.00019077931238353633, "loss": 1.0139, "step": 8333 }, { "epoch": 0.21399375081244254, "grad_norm": 0.79296875, "learning_rate": 0.00019077743992554838, "loss": 1.0718, "step": 8334 }, { "epoch": 0.21401942800836435, "grad_norm": 0.84765625, "learning_rate": 0.00019077556728664918, "loss": 1.0062, "step": 8335 }, { "epoch": 0.21404510520428616, "grad_norm": 0.81640625, "learning_rate": 0.00019077369446684247, "loss": 0.9238, "step": 8336 }, { "epoch": 0.214070782400208, "grad_norm": 0.98046875, "learning_rate": 0.00019077182146613197, "loss": 1.1277, "step": 8337 }, { "epoch": 0.2140964595961298, "grad_norm": 0.85546875, "learning_rate": 0.00019076994828452147, "loss": 1.0023, "step": 8338 }, { "epoch": 0.21412213679205164, "grad_norm": 0.859375, "learning_rate": 0.0001907680749220146, "loss": 1.1076, "step": 8339 }, { "epoch": 0.21414781398797345, "grad_norm": 0.79296875, "learning_rate": 0.0001907662013786152, "loss": 1.0647, "step": 8340 }, { "epoch": 0.21417349118389525, "grad_norm": 0.9375, "learning_rate": 0.00019076432765432696, "loss": 1.0079, "step": 8341 }, { "epoch": 0.2141991683798171, "grad_norm": 1.0390625, "learning_rate": 0.0001907624537491536, "loss": 1.1754, "step": 8342 }, { "epoch": 0.2142248455757389, "grad_norm": 0.75390625, "learning_rate": 0.0001907605796630989, "loss": 0.9353, "step": 8343 }, { "epoch": 0.21425052277166073, "grad_norm": 0.85546875, "learning_rate": 0.00019075870539616655, "loss": 1.0688, "step": 8344 }, { "epoch": 0.21427619996758254, "grad_norm": 0.828125, "learning_rate": 0.00019075683094836026, "loss": 1.1561, "step": 8345 }, { "epoch": 0.21430187716350435, "grad_norm": 0.7734375, "learning_rate": 0.00019075495631968383, "loss": 0.9339, "step": 8346 }, { "epoch": 0.2143275543594262, "grad_norm": 0.8046875, "learning_rate": 0.00019075308151014097, "loss": 0.9911, "step": 8347 }, { "epoch": 0.214353231555348, "grad_norm": 0.8359375, "learning_rate": 0.00019075120651973544, "loss": 1.009, "step": 8348 }, { "epoch": 0.21437890875126983, "grad_norm": 0.76171875, "learning_rate": 0.00019074933134847092, "loss": 0.9105, "step": 8349 }, { "epoch": 0.21440458594719164, "grad_norm": 0.8203125, "learning_rate": 0.00019074745599635117, "loss": 1.0604, "step": 8350 }, { "epoch": 0.21443026314311345, "grad_norm": 0.8203125, "learning_rate": 0.00019074558046337998, "loss": 0.8888, "step": 8351 }, { "epoch": 0.21445594033903528, "grad_norm": 0.77734375, "learning_rate": 0.000190743704749561, "loss": 0.9389, "step": 8352 }, { "epoch": 0.2144816175349571, "grad_norm": 0.8125, "learning_rate": 0.00019074182885489804, "loss": 1.1038, "step": 8353 }, { "epoch": 0.21450729473087893, "grad_norm": 0.92578125, "learning_rate": 0.00019073995277939481, "loss": 1.1426, "step": 8354 }, { "epoch": 0.21453297192680074, "grad_norm": 0.765625, "learning_rate": 0.00019073807652305504, "loss": 0.878, "step": 8355 }, { "epoch": 0.21455864912272254, "grad_norm": 0.8125, "learning_rate": 0.0001907362000858825, "loss": 0.9894, "step": 8356 }, { "epoch": 0.21458432631864438, "grad_norm": 0.79296875, "learning_rate": 0.0001907343234678809, "loss": 0.984, "step": 8357 }, { "epoch": 0.2146100035145662, "grad_norm": 0.8203125, "learning_rate": 0.00019073244666905397, "loss": 1.0084, "step": 8358 }, { "epoch": 0.21463568071048802, "grad_norm": 0.77734375, "learning_rate": 0.0001907305696894055, "loss": 0.8917, "step": 8359 }, { "epoch": 0.21466135790640983, "grad_norm": 0.78515625, "learning_rate": 0.00019072869252893918, "loss": 1.0646, "step": 8360 }, { "epoch": 0.21468703510233164, "grad_norm": 0.80859375, "learning_rate": 0.00019072681518765878, "loss": 1.0925, "step": 8361 }, { "epoch": 0.21471271229825348, "grad_norm": 0.76171875, "learning_rate": 0.00019072493766556802, "loss": 1.0, "step": 8362 }, { "epoch": 0.21473838949417529, "grad_norm": 1.0, "learning_rate": 0.0001907230599626707, "loss": 1.08, "step": 8363 }, { "epoch": 0.21476406669009712, "grad_norm": 0.828125, "learning_rate": 0.00019072118207897045, "loss": 0.9908, "step": 8364 }, { "epoch": 0.21478974388601893, "grad_norm": 0.87109375, "learning_rate": 0.00019071930401447112, "loss": 1.1384, "step": 8365 }, { "epoch": 0.21481542108194074, "grad_norm": 0.90625, "learning_rate": 0.0001907174257691764, "loss": 1.0778, "step": 8366 }, { "epoch": 0.21484109827786257, "grad_norm": 1.3515625, "learning_rate": 0.00019071554734309003, "loss": 1.0624, "step": 8367 }, { "epoch": 0.21486677547378438, "grad_norm": 0.76171875, "learning_rate": 0.00019071366873621578, "loss": 0.9289, "step": 8368 }, { "epoch": 0.2148924526697062, "grad_norm": 0.81640625, "learning_rate": 0.0001907117899485574, "loss": 0.8671, "step": 8369 }, { "epoch": 0.21491812986562803, "grad_norm": 0.85546875, "learning_rate": 0.0001907099109801186, "loss": 1.0526, "step": 8370 }, { "epoch": 0.21494380706154984, "grad_norm": 0.75, "learning_rate": 0.00019070803183090317, "loss": 0.9325, "step": 8371 }, { "epoch": 0.21496948425747167, "grad_norm": 0.77734375, "learning_rate": 0.0001907061525009148, "loss": 0.9369, "step": 8372 }, { "epoch": 0.21499516145339348, "grad_norm": 0.78125, "learning_rate": 0.00019070427299015726, "loss": 0.994, "step": 8373 }, { "epoch": 0.2150208386493153, "grad_norm": 0.7890625, "learning_rate": 0.00019070239329863428, "loss": 1.1153, "step": 8374 }, { "epoch": 0.21504651584523712, "grad_norm": 0.8125, "learning_rate": 0.00019070051342634964, "loss": 0.9584, "step": 8375 }, { "epoch": 0.21507219304115893, "grad_norm": 0.7890625, "learning_rate": 0.00019069863337330709, "loss": 0.9682, "step": 8376 }, { "epoch": 0.21509787023708077, "grad_norm": 0.8671875, "learning_rate": 0.00019069675313951033, "loss": 1.044, "step": 8377 }, { "epoch": 0.21512354743300258, "grad_norm": 0.828125, "learning_rate": 0.00019069487272496317, "loss": 1.113, "step": 8378 }, { "epoch": 0.21514922462892438, "grad_norm": 0.87890625, "learning_rate": 0.00019069299212966926, "loss": 1.0959, "step": 8379 }, { "epoch": 0.21517490182484622, "grad_norm": 0.81640625, "learning_rate": 0.00019069111135363245, "loss": 0.8998, "step": 8380 }, { "epoch": 0.21520057902076803, "grad_norm": 0.796875, "learning_rate": 0.00019068923039685645, "loss": 0.9413, "step": 8381 }, { "epoch": 0.21522625621668987, "grad_norm": 0.8203125, "learning_rate": 0.00019068734925934498, "loss": 1.0699, "step": 8382 }, { "epoch": 0.21525193341261167, "grad_norm": 0.80859375, "learning_rate": 0.00019068546794110184, "loss": 1.1936, "step": 8383 }, { "epoch": 0.21527761060853348, "grad_norm": 0.83984375, "learning_rate": 0.00019068358644213075, "loss": 0.9758, "step": 8384 }, { "epoch": 0.21530328780445532, "grad_norm": 0.78515625, "learning_rate": 0.00019068170476243545, "loss": 1.0209, "step": 8385 }, { "epoch": 0.21532896500037713, "grad_norm": 0.76953125, "learning_rate": 0.00019067982290201974, "loss": 1.0956, "step": 8386 }, { "epoch": 0.21535464219629896, "grad_norm": 0.78125, "learning_rate": 0.0001906779408608873, "loss": 0.9688, "step": 8387 }, { "epoch": 0.21538031939222077, "grad_norm": 0.80078125, "learning_rate": 0.0001906760586390419, "loss": 1.1504, "step": 8388 }, { "epoch": 0.21540599658814258, "grad_norm": 0.7265625, "learning_rate": 0.00019067417623648734, "loss": 1.0594, "step": 8389 }, { "epoch": 0.21543167378406441, "grad_norm": 0.76953125, "learning_rate": 0.00019067229365322732, "loss": 0.9089, "step": 8390 }, { "epoch": 0.21545735097998622, "grad_norm": 0.83984375, "learning_rate": 0.0001906704108892656, "loss": 1.0942, "step": 8391 }, { "epoch": 0.21548302817590806, "grad_norm": 0.88671875, "learning_rate": 0.00019066852794460596, "loss": 1.2153, "step": 8392 }, { "epoch": 0.21550870537182987, "grad_norm": 0.796875, "learning_rate": 0.00019066664481925215, "loss": 0.926, "step": 8393 }, { "epoch": 0.21553438256775168, "grad_norm": 0.8125, "learning_rate": 0.00019066476151320785, "loss": 1.2066, "step": 8394 }, { "epoch": 0.2155600597636735, "grad_norm": 0.8515625, "learning_rate": 0.00019066287802647694, "loss": 1.0359, "step": 8395 }, { "epoch": 0.21558573695959532, "grad_norm": 0.77734375, "learning_rate": 0.00019066099435906307, "loss": 1.1326, "step": 8396 }, { "epoch": 0.21561141415551716, "grad_norm": 0.89453125, "learning_rate": 0.00019065911051097, "loss": 1.0277, "step": 8397 }, { "epoch": 0.21563709135143896, "grad_norm": 0.80859375, "learning_rate": 0.00019065722648220154, "loss": 0.9959, "step": 8398 }, { "epoch": 0.21566276854736077, "grad_norm": 0.80859375, "learning_rate": 0.0001906553422727614, "loss": 1.0235, "step": 8399 }, { "epoch": 0.2156884457432826, "grad_norm": 0.8671875, "learning_rate": 0.0001906534578826534, "loss": 0.9975, "step": 8400 }, { "epoch": 0.21571412293920442, "grad_norm": 0.75390625, "learning_rate": 0.0001906515733118812, "loss": 0.9492, "step": 8401 }, { "epoch": 0.21573980013512625, "grad_norm": 0.87109375, "learning_rate": 0.0001906496885604486, "loss": 1.0301, "step": 8402 }, { "epoch": 0.21576547733104806, "grad_norm": 0.7421875, "learning_rate": 0.0001906478036283594, "loss": 1.0049, "step": 8403 }, { "epoch": 0.21579115452696987, "grad_norm": 0.85546875, "learning_rate": 0.00019064591851561727, "loss": 0.921, "step": 8404 }, { "epoch": 0.2158168317228917, "grad_norm": 0.7734375, "learning_rate": 0.00019064403322222604, "loss": 0.9262, "step": 8405 }, { "epoch": 0.2158425089188135, "grad_norm": 0.859375, "learning_rate": 0.00019064214774818945, "loss": 1.0445, "step": 8406 }, { "epoch": 0.21586818611473535, "grad_norm": 0.81640625, "learning_rate": 0.00019064026209351123, "loss": 1.0586, "step": 8407 }, { "epoch": 0.21589386331065716, "grad_norm": 0.82421875, "learning_rate": 0.00019063837625819515, "loss": 1.0784, "step": 8408 }, { "epoch": 0.21591954050657897, "grad_norm": 0.7890625, "learning_rate": 0.00019063649024224496, "loss": 1.0711, "step": 8409 }, { "epoch": 0.2159452177025008, "grad_norm": 0.828125, "learning_rate": 0.00019063460404566447, "loss": 1.0418, "step": 8410 }, { "epoch": 0.2159708948984226, "grad_norm": 0.75390625, "learning_rate": 0.0001906327176684574, "loss": 0.9929, "step": 8411 }, { "epoch": 0.21599657209434445, "grad_norm": 0.78125, "learning_rate": 0.00019063083111062746, "loss": 1.1043, "step": 8412 }, { "epoch": 0.21602224929026625, "grad_norm": 0.8203125, "learning_rate": 0.0001906289443721785, "loss": 1.189, "step": 8413 }, { "epoch": 0.21604792648618806, "grad_norm": 0.7890625, "learning_rate": 0.00019062705745311425, "loss": 1.1143, "step": 8414 }, { "epoch": 0.2160736036821099, "grad_norm": 0.7734375, "learning_rate": 0.00019062517035343843, "loss": 1.0437, "step": 8415 }, { "epoch": 0.2160992808780317, "grad_norm": 0.84375, "learning_rate": 0.00019062328307315485, "loss": 1.1038, "step": 8416 }, { "epoch": 0.21612495807395354, "grad_norm": 0.75390625, "learning_rate": 0.00019062139561226723, "loss": 1.0625, "step": 8417 }, { "epoch": 0.21615063526987535, "grad_norm": 0.72265625, "learning_rate": 0.0001906195079707794, "loss": 0.8922, "step": 8418 }, { "epoch": 0.21617631246579716, "grad_norm": 0.83203125, "learning_rate": 0.00019061762014869503, "loss": 0.8703, "step": 8419 }, { "epoch": 0.216201989661719, "grad_norm": 0.8359375, "learning_rate": 0.00019061573214601796, "loss": 1.084, "step": 8420 }, { "epoch": 0.2162276668576408, "grad_norm": 0.81640625, "learning_rate": 0.0001906138439627519, "loss": 1.0382, "step": 8421 }, { "epoch": 0.21625334405356264, "grad_norm": 0.765625, "learning_rate": 0.00019061195559890064, "loss": 0.9166, "step": 8422 }, { "epoch": 0.21627902124948445, "grad_norm": 0.80078125, "learning_rate": 0.00019061006705446793, "loss": 1.0975, "step": 8423 }, { "epoch": 0.21630469844540626, "grad_norm": 0.88671875, "learning_rate": 0.00019060817832945755, "loss": 1.173, "step": 8424 }, { "epoch": 0.2163303756413281, "grad_norm": 0.81640625, "learning_rate": 0.00019060628942387328, "loss": 1.1713, "step": 8425 }, { "epoch": 0.2163560528372499, "grad_norm": 0.76953125, "learning_rate": 0.00019060440033771882, "loss": 1.13, "step": 8426 }, { "epoch": 0.21638173003317174, "grad_norm": 0.85546875, "learning_rate": 0.000190602511070998, "loss": 1.0435, "step": 8427 }, { "epoch": 0.21640740722909355, "grad_norm": 0.75390625, "learning_rate": 0.00019060062162371455, "loss": 0.9706, "step": 8428 }, { "epoch": 0.21643308442501535, "grad_norm": 0.81640625, "learning_rate": 0.00019059873199587225, "loss": 1.1103, "step": 8429 }, { "epoch": 0.2164587616209372, "grad_norm": 0.78515625, "learning_rate": 0.00019059684218747486, "loss": 0.993, "step": 8430 }, { "epoch": 0.216484438816859, "grad_norm": 0.7890625, "learning_rate": 0.00019059495219852615, "loss": 0.9618, "step": 8431 }, { "epoch": 0.21651011601278083, "grad_norm": 0.75, "learning_rate": 0.00019059306202902986, "loss": 1.1334, "step": 8432 }, { "epoch": 0.21653579320870264, "grad_norm": 0.90625, "learning_rate": 0.00019059117167898978, "loss": 1.1325, "step": 8433 }, { "epoch": 0.21656147040462445, "grad_norm": 0.79296875, "learning_rate": 0.00019058928114840972, "loss": 1.0129, "step": 8434 }, { "epoch": 0.2165871476005463, "grad_norm": 0.75390625, "learning_rate": 0.00019058739043729337, "loss": 1.0255, "step": 8435 }, { "epoch": 0.2166128247964681, "grad_norm": 0.84375, "learning_rate": 0.00019058549954564455, "loss": 1.0149, "step": 8436 }, { "epoch": 0.21663850199238993, "grad_norm": 0.859375, "learning_rate": 0.00019058360847346698, "loss": 1.1038, "step": 8437 }, { "epoch": 0.21666417918831174, "grad_norm": 0.8203125, "learning_rate": 0.0001905817172207645, "loss": 0.9753, "step": 8438 }, { "epoch": 0.21668985638423355, "grad_norm": 0.828125, "learning_rate": 0.00019057982578754085, "loss": 0.992, "step": 8439 }, { "epoch": 0.21671553358015538, "grad_norm": 0.8515625, "learning_rate": 0.00019057793417379975, "loss": 1.0559, "step": 8440 }, { "epoch": 0.2167412107760772, "grad_norm": 0.796875, "learning_rate": 0.00019057604237954502, "loss": 1.0568, "step": 8441 }, { "epoch": 0.21676688797199903, "grad_norm": 0.953125, "learning_rate": 0.00019057415040478045, "loss": 1.0925, "step": 8442 }, { "epoch": 0.21679256516792084, "grad_norm": 0.82421875, "learning_rate": 0.00019057225824950974, "loss": 1.037, "step": 8443 }, { "epoch": 0.21681824236384264, "grad_norm": 0.71484375, "learning_rate": 0.00019057036591373672, "loss": 0.9142, "step": 8444 }, { "epoch": 0.21684391955976448, "grad_norm": 0.83984375, "learning_rate": 0.00019056847339746512, "loss": 1.2133, "step": 8445 }, { "epoch": 0.2168695967556863, "grad_norm": 0.73828125, "learning_rate": 0.00019056658070069874, "loss": 0.8706, "step": 8446 }, { "epoch": 0.21689527395160813, "grad_norm": 0.7734375, "learning_rate": 0.00019056468782344138, "loss": 0.8823, "step": 8447 }, { "epoch": 0.21692095114752993, "grad_norm": 0.8828125, "learning_rate": 0.00019056279476569677, "loss": 0.9994, "step": 8448 }, { "epoch": 0.21694662834345174, "grad_norm": 0.8359375, "learning_rate": 0.00019056090152746866, "loss": 1.0318, "step": 8449 }, { "epoch": 0.21697230553937358, "grad_norm": 0.8046875, "learning_rate": 0.00019055900810876087, "loss": 1.0697, "step": 8450 }, { "epoch": 0.21699798273529539, "grad_norm": 0.8515625, "learning_rate": 0.00019055711450957717, "loss": 1.0051, "step": 8451 }, { "epoch": 0.21702365993121722, "grad_norm": 0.77734375, "learning_rate": 0.0001905552207299213, "loss": 1.037, "step": 8452 }, { "epoch": 0.21704933712713903, "grad_norm": 0.84375, "learning_rate": 0.00019055332676979706, "loss": 0.9907, "step": 8453 }, { "epoch": 0.21707501432306084, "grad_norm": 0.796875, "learning_rate": 0.00019055143262920821, "loss": 1.0429, "step": 8454 }, { "epoch": 0.21710069151898267, "grad_norm": 0.74609375, "learning_rate": 0.00019054953830815853, "loss": 0.8957, "step": 8455 }, { "epoch": 0.21712636871490448, "grad_norm": 0.80078125, "learning_rate": 0.00019054764380665182, "loss": 1.0461, "step": 8456 }, { "epoch": 0.21715204591082632, "grad_norm": 0.85546875, "learning_rate": 0.00019054574912469183, "loss": 1.125, "step": 8457 }, { "epoch": 0.21717772310674813, "grad_norm": 0.89453125, "learning_rate": 0.00019054385426228233, "loss": 1.0125, "step": 8458 }, { "epoch": 0.21720340030266994, "grad_norm": 0.76953125, "learning_rate": 0.00019054195921942713, "loss": 0.9712, "step": 8459 }, { "epoch": 0.21722907749859177, "grad_norm": 0.81640625, "learning_rate": 0.00019054006399612998, "loss": 1.1127, "step": 8460 }, { "epoch": 0.21725475469451358, "grad_norm": 0.81640625, "learning_rate": 0.00019053816859239463, "loss": 1.0928, "step": 8461 }, { "epoch": 0.21728043189043542, "grad_norm": 0.82421875, "learning_rate": 0.0001905362730082249, "loss": 1.0313, "step": 8462 }, { "epoch": 0.21730610908635722, "grad_norm": 0.796875, "learning_rate": 0.0001905343772436246, "loss": 0.9083, "step": 8463 }, { "epoch": 0.21733178628227903, "grad_norm": 0.9140625, "learning_rate": 0.0001905324812985974, "loss": 1.0126, "step": 8464 }, { "epoch": 0.21735746347820087, "grad_norm": 0.79296875, "learning_rate": 0.0001905305851731472, "loss": 0.9812, "step": 8465 }, { "epoch": 0.21738314067412268, "grad_norm": 0.87890625, "learning_rate": 0.00019052868886727768, "loss": 1.232, "step": 8466 }, { "epoch": 0.2174088178700445, "grad_norm": 0.80078125, "learning_rate": 0.00019052679238099266, "loss": 0.9925, "step": 8467 }, { "epoch": 0.21743449506596632, "grad_norm": 0.76953125, "learning_rate": 0.00019052489571429594, "loss": 0.8642, "step": 8468 }, { "epoch": 0.21746017226188813, "grad_norm": 0.80859375, "learning_rate": 0.00019052299886719126, "loss": 1.0588, "step": 8469 }, { "epoch": 0.21748584945780997, "grad_norm": 0.81640625, "learning_rate": 0.00019052110183968247, "loss": 0.9162, "step": 8470 }, { "epoch": 0.21751152665373177, "grad_norm": 0.84765625, "learning_rate": 0.00019051920463177325, "loss": 0.8968, "step": 8471 }, { "epoch": 0.2175372038496536, "grad_norm": 1.3984375, "learning_rate": 0.00019051730724346745, "loss": 1.1884, "step": 8472 }, { "epoch": 0.21756288104557542, "grad_norm": 0.80859375, "learning_rate": 0.00019051540967476881, "loss": 1.2066, "step": 8473 }, { "epoch": 0.21758855824149723, "grad_norm": 0.734375, "learning_rate": 0.00019051351192568116, "loss": 0.9694, "step": 8474 }, { "epoch": 0.21761423543741906, "grad_norm": 0.7734375, "learning_rate": 0.00019051161399620827, "loss": 1.1368, "step": 8475 }, { "epoch": 0.21763991263334087, "grad_norm": 0.75390625, "learning_rate": 0.0001905097158863539, "loss": 0.9308, "step": 8476 }, { "epoch": 0.2176655898292627, "grad_norm": 0.796875, "learning_rate": 0.00019050781759612182, "loss": 1.0181, "step": 8477 }, { "epoch": 0.21769126702518451, "grad_norm": 0.87109375, "learning_rate": 0.00019050591912551584, "loss": 0.8571, "step": 8478 }, { "epoch": 0.21771694422110632, "grad_norm": 0.84765625, "learning_rate": 0.00019050402047453977, "loss": 1.132, "step": 8479 }, { "epoch": 0.21774262141702816, "grad_norm": 0.9296875, "learning_rate": 0.00019050212164319733, "loss": 1.0015, "step": 8480 }, { "epoch": 0.21776829861294997, "grad_norm": 0.83203125, "learning_rate": 0.00019050022263149234, "loss": 0.9407, "step": 8481 }, { "epoch": 0.2177939758088718, "grad_norm": 0.8203125, "learning_rate": 0.0001904983234394286, "loss": 1.0263, "step": 8482 }, { "epoch": 0.2178196530047936, "grad_norm": 0.7734375, "learning_rate": 0.00019049642406700985, "loss": 1.1486, "step": 8483 }, { "epoch": 0.21784533020071542, "grad_norm": 0.8515625, "learning_rate": 0.0001904945245142399, "loss": 1.0772, "step": 8484 }, { "epoch": 0.21787100739663726, "grad_norm": 0.78125, "learning_rate": 0.00019049262478112254, "loss": 1.0529, "step": 8485 }, { "epoch": 0.21789668459255906, "grad_norm": 0.8046875, "learning_rate": 0.0001904907248676616, "loss": 1.0163, "step": 8486 }, { "epoch": 0.2179223617884809, "grad_norm": 0.8046875, "learning_rate": 0.00019048882477386078, "loss": 0.8913, "step": 8487 }, { "epoch": 0.2179480389844027, "grad_norm": 0.81640625, "learning_rate": 0.0001904869244997239, "loss": 1.0282, "step": 8488 }, { "epoch": 0.21797371618032452, "grad_norm": 0.796875, "learning_rate": 0.00019048502404525478, "loss": 1.0365, "step": 8489 }, { "epoch": 0.21799939337624635, "grad_norm": 0.8203125, "learning_rate": 0.00019048312341045715, "loss": 0.8751, "step": 8490 }, { "epoch": 0.21802507057216816, "grad_norm": 0.8203125, "learning_rate": 0.00019048122259533482, "loss": 1.0291, "step": 8491 }, { "epoch": 0.21805074776809, "grad_norm": 0.765625, "learning_rate": 0.00019047932159989165, "loss": 1.0298, "step": 8492 }, { "epoch": 0.2180764249640118, "grad_norm": 0.9140625, "learning_rate": 0.0001904774204241313, "loss": 0.9829, "step": 8493 }, { "epoch": 0.21810210215993361, "grad_norm": 0.85546875, "learning_rate": 0.00019047551906805764, "loss": 1.046, "step": 8494 }, { "epoch": 0.21812777935585545, "grad_norm": 0.83984375, "learning_rate": 0.00019047361753167445, "loss": 1.1415, "step": 8495 }, { "epoch": 0.21815345655177726, "grad_norm": 0.8203125, "learning_rate": 0.00019047171581498552, "loss": 1.108, "step": 8496 }, { "epoch": 0.2181791337476991, "grad_norm": 0.82421875, "learning_rate": 0.0001904698139179946, "loss": 1.0876, "step": 8497 }, { "epoch": 0.2182048109436209, "grad_norm": 0.8203125, "learning_rate": 0.00019046791184070555, "loss": 1.0449, "step": 8498 }, { "epoch": 0.2182304881395427, "grad_norm": 0.8046875, "learning_rate": 0.0001904660095831221, "loss": 1.0745, "step": 8499 }, { "epoch": 0.21825616533546455, "grad_norm": 0.83203125, "learning_rate": 0.0001904641071452481, "loss": 1.0383, "step": 8500 }, { "epoch": 0.21828184253138636, "grad_norm": 0.73046875, "learning_rate": 0.00019046220452708726, "loss": 0.8797, "step": 8501 }, { "epoch": 0.2183075197273082, "grad_norm": 0.80859375, "learning_rate": 0.00019046030172864344, "loss": 1.0236, "step": 8502 }, { "epoch": 0.21833319692323, "grad_norm": 0.85546875, "learning_rate": 0.00019045839874992044, "loss": 1.0703, "step": 8503 }, { "epoch": 0.2183588741191518, "grad_norm": 0.83984375, "learning_rate": 0.00019045649559092199, "loss": 1.0898, "step": 8504 }, { "epoch": 0.21838455131507364, "grad_norm": 0.77734375, "learning_rate": 0.00019045459225165189, "loss": 0.8921, "step": 8505 }, { "epoch": 0.21841022851099545, "grad_norm": 0.91796875, "learning_rate": 0.00019045268873211398, "loss": 0.9458, "step": 8506 }, { "epoch": 0.2184359057069173, "grad_norm": 0.77734375, "learning_rate": 0.00019045078503231207, "loss": 1.1165, "step": 8507 }, { "epoch": 0.2184615829028391, "grad_norm": 0.78125, "learning_rate": 0.00019044888115224987, "loss": 1.2112, "step": 8508 }, { "epoch": 0.2184872600987609, "grad_norm": 0.78515625, "learning_rate": 0.00019044697709193122, "loss": 1.0611, "step": 8509 }, { "epoch": 0.21851293729468274, "grad_norm": 0.84765625, "learning_rate": 0.00019044507285135995, "loss": 1.0463, "step": 8510 }, { "epoch": 0.21853861449060455, "grad_norm": 0.7421875, "learning_rate": 0.0001904431684305398, "loss": 0.9371, "step": 8511 }, { "epoch": 0.21856429168652639, "grad_norm": 0.84375, "learning_rate": 0.00019044126382947459, "loss": 1.009, "step": 8512 }, { "epoch": 0.2185899688824482, "grad_norm": 0.80078125, "learning_rate": 0.0001904393590481681, "loss": 0.9287, "step": 8513 }, { "epoch": 0.21861564607837, "grad_norm": 0.8046875, "learning_rate": 0.00019043745408662416, "loss": 1.1097, "step": 8514 }, { "epoch": 0.21864132327429184, "grad_norm": 0.83203125, "learning_rate": 0.0001904355489448465, "loss": 0.9413, "step": 8515 }, { "epoch": 0.21866700047021365, "grad_norm": 0.81640625, "learning_rate": 0.00019043364362283897, "loss": 1.1488, "step": 8516 }, { "epoch": 0.21869267766613548, "grad_norm": 0.7578125, "learning_rate": 0.00019043173812060537, "loss": 0.9045, "step": 8517 }, { "epoch": 0.2187183548620573, "grad_norm": 0.80859375, "learning_rate": 0.0001904298324381495, "loss": 0.9905, "step": 8518 }, { "epoch": 0.2187440320579791, "grad_norm": 0.8671875, "learning_rate": 0.00019042792657547513, "loss": 0.9671, "step": 8519 }, { "epoch": 0.21876970925390093, "grad_norm": 0.7578125, "learning_rate": 0.00019042602053258607, "loss": 0.9752, "step": 8520 }, { "epoch": 0.21879538644982274, "grad_norm": 0.83203125, "learning_rate": 0.00019042411430948613, "loss": 1.0222, "step": 8521 }, { "epoch": 0.21882106364574458, "grad_norm": 0.80078125, "learning_rate": 0.00019042220790617908, "loss": 1.0393, "step": 8522 }, { "epoch": 0.2188467408416664, "grad_norm": 0.76953125, "learning_rate": 0.00019042030132266873, "loss": 1.1346, "step": 8523 }, { "epoch": 0.2188724180375882, "grad_norm": 0.7734375, "learning_rate": 0.00019041839455895892, "loss": 0.8965, "step": 8524 }, { "epoch": 0.21889809523351003, "grad_norm": 0.81640625, "learning_rate": 0.00019041648761505337, "loss": 1.0976, "step": 8525 }, { "epoch": 0.21892377242943184, "grad_norm": 0.77734375, "learning_rate": 0.00019041458049095597, "loss": 0.8779, "step": 8526 }, { "epoch": 0.21894944962535368, "grad_norm": 0.8203125, "learning_rate": 0.00019041267318667044, "loss": 0.9391, "step": 8527 }, { "epoch": 0.21897512682127548, "grad_norm": 0.84765625, "learning_rate": 0.00019041076570220066, "loss": 1.1055, "step": 8528 }, { "epoch": 0.2190008040171973, "grad_norm": 0.8125, "learning_rate": 0.00019040885803755036, "loss": 1.1178, "step": 8529 }, { "epoch": 0.21902648121311913, "grad_norm": 0.81640625, "learning_rate": 0.0001904069501927234, "loss": 1.0836, "step": 8530 }, { "epoch": 0.21905215840904094, "grad_norm": 0.76171875, "learning_rate": 0.0001904050421677235, "loss": 1.0271, "step": 8531 }, { "epoch": 0.21907783560496277, "grad_norm": 0.7578125, "learning_rate": 0.00019040313396255458, "loss": 1.093, "step": 8532 }, { "epoch": 0.21910351280088458, "grad_norm": 0.82421875, "learning_rate": 0.00019040122557722038, "loss": 1.0533, "step": 8533 }, { "epoch": 0.2191291899968064, "grad_norm": 0.71875, "learning_rate": 0.00019039931701172468, "loss": 0.9221, "step": 8534 }, { "epoch": 0.21915486719272823, "grad_norm": 0.7578125, "learning_rate": 0.00019039740826607131, "loss": 1.0102, "step": 8535 }, { "epoch": 0.21918054438865003, "grad_norm": 0.90234375, "learning_rate": 0.00019039549934026407, "loss": 1.0727, "step": 8536 }, { "epoch": 0.21920622158457187, "grad_norm": 0.83984375, "learning_rate": 0.00019039359023430676, "loss": 0.9837, "step": 8537 }, { "epoch": 0.21923189878049368, "grad_norm": 0.875, "learning_rate": 0.0001903916809482032, "loss": 1.0306, "step": 8538 }, { "epoch": 0.2192575759764155, "grad_norm": 0.84765625, "learning_rate": 0.00019038977148195719, "loss": 1.0405, "step": 8539 }, { "epoch": 0.21928325317233732, "grad_norm": 0.86328125, "learning_rate": 0.0001903878618355725, "loss": 1.1126, "step": 8540 }, { "epoch": 0.21930893036825913, "grad_norm": 0.84375, "learning_rate": 0.00019038595200905297, "loss": 1.0232, "step": 8541 }, { "epoch": 0.21933460756418097, "grad_norm": 0.8828125, "learning_rate": 0.00019038404200240244, "loss": 0.9985, "step": 8542 }, { "epoch": 0.21936028476010277, "grad_norm": 0.8125, "learning_rate": 0.00019038213181562467, "loss": 1.104, "step": 8543 }, { "epoch": 0.21938596195602458, "grad_norm": 0.91015625, "learning_rate": 0.00019038022144872346, "loss": 1.0606, "step": 8544 }, { "epoch": 0.21941163915194642, "grad_norm": 0.76953125, "learning_rate": 0.00019037831090170263, "loss": 0.9857, "step": 8545 }, { "epoch": 0.21943731634786823, "grad_norm": 0.82421875, "learning_rate": 0.00019037640017456598, "loss": 1.0751, "step": 8546 }, { "epoch": 0.21946299354379006, "grad_norm": 0.796875, "learning_rate": 0.00019037448926731732, "loss": 1.196, "step": 8547 }, { "epoch": 0.21948867073971187, "grad_norm": 0.86328125, "learning_rate": 0.00019037257817996052, "loss": 0.9803, "step": 8548 }, { "epoch": 0.21951434793563368, "grad_norm": 0.84375, "learning_rate": 0.0001903706669124993, "loss": 0.997, "step": 8549 }, { "epoch": 0.21954002513155552, "grad_norm": 0.83203125, "learning_rate": 0.00019036875546493752, "loss": 0.9239, "step": 8550 }, { "epoch": 0.21956570232747732, "grad_norm": 0.74609375, "learning_rate": 0.00019036684383727894, "loss": 1.1344, "step": 8551 }, { "epoch": 0.21959137952339916, "grad_norm": 0.765625, "learning_rate": 0.00019036493202952744, "loss": 1.0595, "step": 8552 }, { "epoch": 0.21961705671932097, "grad_norm": 0.7890625, "learning_rate": 0.00019036302004168677, "loss": 1.082, "step": 8553 }, { "epoch": 0.21964273391524278, "grad_norm": 0.74609375, "learning_rate": 0.0001903611078737608, "loss": 0.8654, "step": 8554 }, { "epoch": 0.2196684111111646, "grad_norm": 0.734375, "learning_rate": 0.00019035919552575322, "loss": 1.0626, "step": 8555 }, { "epoch": 0.21969408830708642, "grad_norm": 0.828125, "learning_rate": 0.000190357282997668, "loss": 1.056, "step": 8556 }, { "epoch": 0.21971976550300826, "grad_norm": 0.765625, "learning_rate": 0.00019035537028950884, "loss": 1.1107, "step": 8557 }, { "epoch": 0.21974544269893007, "grad_norm": 0.84375, "learning_rate": 0.00019035345740127962, "loss": 0.9014, "step": 8558 }, { "epoch": 0.21977111989485187, "grad_norm": 0.765625, "learning_rate": 0.0001903515443329841, "loss": 1.0324, "step": 8559 }, { "epoch": 0.2197967970907737, "grad_norm": 0.7734375, "learning_rate": 0.00019034963108462613, "loss": 1.07, "step": 8560 }, { "epoch": 0.21982247428669552, "grad_norm": 0.78125, "learning_rate": 0.0001903477176562095, "loss": 0.9595, "step": 8561 }, { "epoch": 0.21984815148261735, "grad_norm": 0.76171875, "learning_rate": 0.00019034580404773803, "loss": 0.999, "step": 8562 }, { "epoch": 0.21987382867853916, "grad_norm": 0.8359375, "learning_rate": 0.00019034389025921554, "loss": 1.0035, "step": 8563 }, { "epoch": 0.21989950587446097, "grad_norm": 0.80859375, "learning_rate": 0.0001903419762906458, "loss": 1.0328, "step": 8564 }, { "epoch": 0.2199251830703828, "grad_norm": 1.4765625, "learning_rate": 0.0001903400621420327, "loss": 0.9602, "step": 8565 }, { "epoch": 0.21995086026630462, "grad_norm": 0.80859375, "learning_rate": 0.00019033814781338002, "loss": 1.1476, "step": 8566 }, { "epoch": 0.21997653746222645, "grad_norm": 0.78515625, "learning_rate": 0.00019033623330469155, "loss": 0.9558, "step": 8567 }, { "epoch": 0.22000221465814826, "grad_norm": 0.76171875, "learning_rate": 0.00019033431861597113, "loss": 1.0657, "step": 8568 }, { "epoch": 0.22002789185407007, "grad_norm": 0.88671875, "learning_rate": 0.0001903324037472226, "loss": 1.1347, "step": 8569 }, { "epoch": 0.2200535690499919, "grad_norm": 0.83203125, "learning_rate": 0.0001903304886984497, "loss": 0.9005, "step": 8570 }, { "epoch": 0.2200792462459137, "grad_norm": 0.90234375, "learning_rate": 0.00019032857346965634, "loss": 1.0999, "step": 8571 }, { "epoch": 0.22010492344183552, "grad_norm": 0.80859375, "learning_rate": 0.00019032665806084626, "loss": 0.9525, "step": 8572 }, { "epoch": 0.22013060063775736, "grad_norm": 0.78125, "learning_rate": 0.00019032474247202333, "loss": 0.9868, "step": 8573 }, { "epoch": 0.22015627783367916, "grad_norm": 0.76953125, "learning_rate": 0.00019032282670319135, "loss": 0.8776, "step": 8574 }, { "epoch": 0.220181955029601, "grad_norm": 0.80078125, "learning_rate": 0.00019032091075435413, "loss": 1.0874, "step": 8575 }, { "epoch": 0.2202076322255228, "grad_norm": 0.7890625, "learning_rate": 0.00019031899462551547, "loss": 1.0586, "step": 8576 }, { "epoch": 0.22023330942144462, "grad_norm": 0.8125, "learning_rate": 0.00019031707831667925, "loss": 1.0443, "step": 8577 }, { "epoch": 0.22025898661736645, "grad_norm": 0.75, "learning_rate": 0.00019031516182784923, "loss": 0.9686, "step": 8578 }, { "epoch": 0.22028466381328826, "grad_norm": 0.84765625, "learning_rate": 0.00019031324515902926, "loss": 1.1104, "step": 8579 }, { "epoch": 0.2203103410092101, "grad_norm": 0.80078125, "learning_rate": 0.00019031132831022314, "loss": 1.0438, "step": 8580 }, { "epoch": 0.2203360182051319, "grad_norm": 0.80078125, "learning_rate": 0.00019030941128143471, "loss": 0.9475, "step": 8581 }, { "epoch": 0.22036169540105371, "grad_norm": 0.82421875, "learning_rate": 0.00019030749407266778, "loss": 0.948, "step": 8582 }, { "epoch": 0.22038737259697555, "grad_norm": 0.75390625, "learning_rate": 0.00019030557668392616, "loss": 1.058, "step": 8583 }, { "epoch": 0.22041304979289736, "grad_norm": 0.78125, "learning_rate": 0.00019030365911521367, "loss": 0.9861, "step": 8584 }, { "epoch": 0.2204387269888192, "grad_norm": 0.87890625, "learning_rate": 0.00019030174136653417, "loss": 1.1219, "step": 8585 }, { "epoch": 0.220464404184741, "grad_norm": 0.84375, "learning_rate": 0.00019029982343789147, "loss": 1.0271, "step": 8586 }, { "epoch": 0.2204900813806628, "grad_norm": 0.7890625, "learning_rate": 0.00019029790532928934, "loss": 0.9958, "step": 8587 }, { "epoch": 0.22051575857658465, "grad_norm": 0.75, "learning_rate": 0.0001902959870407317, "loss": 0.9906, "step": 8588 }, { "epoch": 0.22054143577250646, "grad_norm": 0.796875, "learning_rate": 0.00019029406857222226, "loss": 1.081, "step": 8589 }, { "epoch": 0.2205671129684283, "grad_norm": 0.75, "learning_rate": 0.00019029214992376492, "loss": 1.0075, "step": 8590 }, { "epoch": 0.2205927901643501, "grad_norm": 0.7890625, "learning_rate": 0.00019029023109536346, "loss": 1.1331, "step": 8591 }, { "epoch": 0.2206184673602719, "grad_norm": 0.87109375, "learning_rate": 0.00019028831208702177, "loss": 0.991, "step": 8592 }, { "epoch": 0.22064414455619374, "grad_norm": 0.84375, "learning_rate": 0.00019028639289874358, "loss": 1.0349, "step": 8593 }, { "epoch": 0.22066982175211555, "grad_norm": 0.8359375, "learning_rate": 0.00019028447353053277, "loss": 0.991, "step": 8594 }, { "epoch": 0.2206954989480374, "grad_norm": 0.796875, "learning_rate": 0.0001902825539823932, "loss": 1.0104, "step": 8595 }, { "epoch": 0.2207211761439592, "grad_norm": 0.78515625, "learning_rate": 0.0001902806342543286, "loss": 1.1315, "step": 8596 }, { "epoch": 0.220746853339881, "grad_norm": 0.84375, "learning_rate": 0.00019027871434634287, "loss": 1.0294, "step": 8597 }, { "epoch": 0.22077253053580284, "grad_norm": 0.85546875, "learning_rate": 0.00019027679425843985, "loss": 1.0455, "step": 8598 }, { "epoch": 0.22079820773172465, "grad_norm": 0.8203125, "learning_rate": 0.00019027487399062332, "loss": 0.9524, "step": 8599 }, { "epoch": 0.22082388492764649, "grad_norm": 0.8203125, "learning_rate": 0.0001902729535428971, "loss": 0.9889, "step": 8600 }, { "epoch": 0.2208495621235683, "grad_norm": 0.77734375, "learning_rate": 0.00019027103291526505, "loss": 1.0903, "step": 8601 }, { "epoch": 0.2208752393194901, "grad_norm": 0.78515625, "learning_rate": 0.000190269112107731, "loss": 1.0203, "step": 8602 }, { "epoch": 0.22090091651541194, "grad_norm": 0.828125, "learning_rate": 0.00019026719112029875, "loss": 1.0265, "step": 8603 }, { "epoch": 0.22092659371133375, "grad_norm": 0.7890625, "learning_rate": 0.00019026526995297212, "loss": 1.0376, "step": 8604 }, { "epoch": 0.22095227090725558, "grad_norm": 0.80859375, "learning_rate": 0.000190263348605755, "loss": 0.9469, "step": 8605 }, { "epoch": 0.2209779481031774, "grad_norm": 0.76953125, "learning_rate": 0.00019026142707865114, "loss": 1.0969, "step": 8606 }, { "epoch": 0.2210036252990992, "grad_norm": 0.8359375, "learning_rate": 0.00019025950537166443, "loss": 1.1192, "step": 8607 }, { "epoch": 0.22102930249502103, "grad_norm": 0.82421875, "learning_rate": 0.0001902575834847987, "loss": 1.1607, "step": 8608 }, { "epoch": 0.22105497969094284, "grad_norm": 0.8671875, "learning_rate": 0.00019025566141805773, "loss": 1.1557, "step": 8609 }, { "epoch": 0.22108065688686468, "grad_norm": 0.7890625, "learning_rate": 0.0001902537391714454, "loss": 0.9404, "step": 8610 }, { "epoch": 0.2211063340827865, "grad_norm": 0.76171875, "learning_rate": 0.00019025181674496547, "loss": 0.9756, "step": 8611 }, { "epoch": 0.2211320112787083, "grad_norm": 0.80859375, "learning_rate": 0.0001902498941386219, "loss": 0.8952, "step": 8612 }, { "epoch": 0.22115768847463013, "grad_norm": 0.8046875, "learning_rate": 0.0001902479713524184, "loss": 0.9838, "step": 8613 }, { "epoch": 0.22118336567055194, "grad_norm": 0.8046875, "learning_rate": 0.0001902460483863588, "loss": 1.0404, "step": 8614 }, { "epoch": 0.22120904286647378, "grad_norm": 0.82421875, "learning_rate": 0.00019024412524044703, "loss": 1.0043, "step": 8615 }, { "epoch": 0.22123472006239558, "grad_norm": 0.703125, "learning_rate": 0.0001902422019146869, "loss": 0.9674, "step": 8616 }, { "epoch": 0.2212603972583174, "grad_norm": 0.78125, "learning_rate": 0.00019024027840908216, "loss": 0.8838, "step": 8617 }, { "epoch": 0.22128607445423923, "grad_norm": 0.84375, "learning_rate": 0.0001902383547236367, "loss": 0.8831, "step": 8618 }, { "epoch": 0.22131175165016104, "grad_norm": 0.875, "learning_rate": 0.00019023643085835438, "loss": 1.0747, "step": 8619 }, { "epoch": 0.22133742884608287, "grad_norm": 0.828125, "learning_rate": 0.000190234506813239, "loss": 0.973, "step": 8620 }, { "epoch": 0.22136310604200468, "grad_norm": 0.83984375, "learning_rate": 0.00019023258258829437, "loss": 1.0828, "step": 8621 }, { "epoch": 0.2213887832379265, "grad_norm": 0.80859375, "learning_rate": 0.0001902306581835244, "loss": 0.9904, "step": 8622 }, { "epoch": 0.22141446043384833, "grad_norm": 0.84375, "learning_rate": 0.00019022873359893282, "loss": 1.0207, "step": 8623 }, { "epoch": 0.22144013762977013, "grad_norm": 0.8125, "learning_rate": 0.00019022680883452355, "loss": 1.1883, "step": 8624 }, { "epoch": 0.22146581482569197, "grad_norm": 0.77734375, "learning_rate": 0.00019022488389030043, "loss": 0.9653, "step": 8625 }, { "epoch": 0.22149149202161378, "grad_norm": 0.83984375, "learning_rate": 0.00019022295876626723, "loss": 1.0094, "step": 8626 }, { "epoch": 0.2215171692175356, "grad_norm": 0.78515625, "learning_rate": 0.00019022103346242784, "loss": 1.0695, "step": 8627 }, { "epoch": 0.22154284641345742, "grad_norm": 0.7734375, "learning_rate": 0.00019021910797878606, "loss": 0.9947, "step": 8628 }, { "epoch": 0.22156852360937923, "grad_norm": 0.75390625, "learning_rate": 0.00019021718231534577, "loss": 1.0903, "step": 8629 }, { "epoch": 0.22159420080530107, "grad_norm": 0.78515625, "learning_rate": 0.00019021525647211078, "loss": 1.0524, "step": 8630 }, { "epoch": 0.22161987800122288, "grad_norm": 0.78125, "learning_rate": 0.0001902133304490849, "loss": 1.1325, "step": 8631 }, { "epoch": 0.22164555519714468, "grad_norm": 0.75, "learning_rate": 0.00019021140424627206, "loss": 1.0422, "step": 8632 }, { "epoch": 0.22167123239306652, "grad_norm": 0.83203125, "learning_rate": 0.000190209477863676, "loss": 1.0898, "step": 8633 }, { "epoch": 0.22169690958898833, "grad_norm": 0.796875, "learning_rate": 0.00019020755130130063, "loss": 1.0189, "step": 8634 }, { "epoch": 0.22172258678491016, "grad_norm": 0.88671875, "learning_rate": 0.00019020562455914974, "loss": 1.1236, "step": 8635 }, { "epoch": 0.22174826398083197, "grad_norm": 0.76953125, "learning_rate": 0.00019020369763722718, "loss": 0.9937, "step": 8636 }, { "epoch": 0.22177394117675378, "grad_norm": 0.80078125, "learning_rate": 0.0001902017705355368, "loss": 1.1009, "step": 8637 }, { "epoch": 0.22179961837267562, "grad_norm": 0.87109375, "learning_rate": 0.00019019984325408245, "loss": 1.0221, "step": 8638 }, { "epoch": 0.22182529556859742, "grad_norm": 0.7265625, "learning_rate": 0.00019019791579286794, "loss": 0.8972, "step": 8639 }, { "epoch": 0.22185097276451926, "grad_norm": 0.76953125, "learning_rate": 0.00019019598815189716, "loss": 1.0229, "step": 8640 }, { "epoch": 0.22187664996044107, "grad_norm": 0.72265625, "learning_rate": 0.0001901940603311739, "loss": 0.8379, "step": 8641 }, { "epoch": 0.22190232715636288, "grad_norm": 0.83203125, "learning_rate": 0.00019019213233070206, "loss": 1.1232, "step": 8642 }, { "epoch": 0.2219280043522847, "grad_norm": 0.8125, "learning_rate": 0.0001901902041504854, "loss": 0.9993, "step": 8643 }, { "epoch": 0.22195368154820652, "grad_norm": 0.7890625, "learning_rate": 0.00019018827579052783, "loss": 0.968, "step": 8644 }, { "epoch": 0.22197935874412836, "grad_norm": 0.79296875, "learning_rate": 0.00019018634725083316, "loss": 0.9612, "step": 8645 }, { "epoch": 0.22200503594005017, "grad_norm": 0.8359375, "learning_rate": 0.00019018441853140526, "loss": 1.0712, "step": 8646 }, { "epoch": 0.22203071313597197, "grad_norm": 0.88671875, "learning_rate": 0.00019018248963224797, "loss": 1.1397, "step": 8647 }, { "epoch": 0.2220563903318938, "grad_norm": 0.8515625, "learning_rate": 0.00019018056055336512, "loss": 1.114, "step": 8648 }, { "epoch": 0.22208206752781562, "grad_norm": 0.796875, "learning_rate": 0.00019017863129476057, "loss": 1.0915, "step": 8649 }, { "epoch": 0.22210774472373745, "grad_norm": 0.7109375, "learning_rate": 0.0001901767018564381, "loss": 0.9451, "step": 8650 }, { "epoch": 0.22213342191965926, "grad_norm": 0.79296875, "learning_rate": 0.00019017477223840168, "loss": 1.1048, "step": 8651 }, { "epoch": 0.22215909911558107, "grad_norm": 0.76171875, "learning_rate": 0.00019017284244065506, "loss": 0.8542, "step": 8652 }, { "epoch": 0.2221847763115029, "grad_norm": 0.76171875, "learning_rate": 0.00019017091246320211, "loss": 1.1159, "step": 8653 }, { "epoch": 0.22221045350742472, "grad_norm": 0.84765625, "learning_rate": 0.00019016898230604664, "loss": 1.0089, "step": 8654 }, { "epoch": 0.22223613070334655, "grad_norm": 0.875, "learning_rate": 0.00019016705196919258, "loss": 0.966, "step": 8655 }, { "epoch": 0.22226180789926836, "grad_norm": 0.78125, "learning_rate": 0.00019016512145264368, "loss": 1.2469, "step": 8656 }, { "epoch": 0.22228748509519017, "grad_norm": 1.140625, "learning_rate": 0.0001901631907564039, "loss": 1.1072, "step": 8657 }, { "epoch": 0.222313162291112, "grad_norm": 0.78515625, "learning_rate": 0.000190161259880477, "loss": 0.9975, "step": 8658 }, { "epoch": 0.2223388394870338, "grad_norm": 0.79296875, "learning_rate": 0.00019015932882486682, "loss": 1.1319, "step": 8659 }, { "epoch": 0.22236451668295565, "grad_norm": 0.79296875, "learning_rate": 0.00019015739758957727, "loss": 0.9555, "step": 8660 }, { "epoch": 0.22239019387887746, "grad_norm": 0.80859375, "learning_rate": 0.0001901554661746122, "loss": 1.0052, "step": 8661 }, { "epoch": 0.22241587107479927, "grad_norm": 0.7578125, "learning_rate": 0.00019015353457997537, "loss": 0.9955, "step": 8662 }, { "epoch": 0.2224415482707211, "grad_norm": 0.80078125, "learning_rate": 0.00019015160280567073, "loss": 0.998, "step": 8663 }, { "epoch": 0.2224672254666429, "grad_norm": 0.76171875, "learning_rate": 0.00019014967085170207, "loss": 0.962, "step": 8664 }, { "epoch": 0.22249290266256475, "grad_norm": 0.86328125, "learning_rate": 0.00019014773871807328, "loss": 1.0543, "step": 8665 }, { "epoch": 0.22251857985848655, "grad_norm": 0.78125, "learning_rate": 0.00019014580640478815, "loss": 0.9549, "step": 8666 }, { "epoch": 0.22254425705440836, "grad_norm": 0.8515625, "learning_rate": 0.0001901438739118506, "loss": 0.912, "step": 8667 }, { "epoch": 0.2225699342503302, "grad_norm": 0.80078125, "learning_rate": 0.0001901419412392645, "loss": 0.9696, "step": 8668 }, { "epoch": 0.222595611446252, "grad_norm": 0.81640625, "learning_rate": 0.00019014000838703357, "loss": 1.0194, "step": 8669 }, { "epoch": 0.22262128864217384, "grad_norm": 0.8046875, "learning_rate": 0.00019013807535516177, "loss": 0.9458, "step": 8670 }, { "epoch": 0.22264696583809565, "grad_norm": 0.8359375, "learning_rate": 0.00019013614214365295, "loss": 1.1599, "step": 8671 }, { "epoch": 0.22267264303401746, "grad_norm": 0.7890625, "learning_rate": 0.00019013420875251092, "loss": 0.892, "step": 8672 }, { "epoch": 0.2226983202299393, "grad_norm": 0.77734375, "learning_rate": 0.00019013227518173954, "loss": 0.9467, "step": 8673 }, { "epoch": 0.2227239974258611, "grad_norm": 0.8359375, "learning_rate": 0.00019013034143134272, "loss": 1.0087, "step": 8674 }, { "epoch": 0.22274967462178294, "grad_norm": 0.765625, "learning_rate": 0.00019012840750132424, "loss": 0.9742, "step": 8675 }, { "epoch": 0.22277535181770475, "grad_norm": 0.75, "learning_rate": 0.000190126473391688, "loss": 1.0762, "step": 8676 }, { "epoch": 0.22280102901362656, "grad_norm": 0.828125, "learning_rate": 0.00019012453910243783, "loss": 0.9861, "step": 8677 }, { "epoch": 0.2228267062095484, "grad_norm": 0.78125, "learning_rate": 0.00019012260463357761, "loss": 1.081, "step": 8678 }, { "epoch": 0.2228523834054702, "grad_norm": 0.8046875, "learning_rate": 0.00019012066998511115, "loss": 1.0535, "step": 8679 }, { "epoch": 0.22287806060139204, "grad_norm": 0.8359375, "learning_rate": 0.00019011873515704236, "loss": 1.0082, "step": 8680 }, { "epoch": 0.22290373779731384, "grad_norm": 0.80078125, "learning_rate": 0.00019011680014937508, "loss": 1.0475, "step": 8681 }, { "epoch": 0.22292941499323565, "grad_norm": 0.87109375, "learning_rate": 0.00019011486496211313, "loss": 1.0849, "step": 8682 }, { "epoch": 0.2229550921891575, "grad_norm": 1.0546875, "learning_rate": 0.00019011292959526043, "loss": 1.0472, "step": 8683 }, { "epoch": 0.2229807693850793, "grad_norm": 0.8046875, "learning_rate": 0.00019011099404882075, "loss": 1.0122, "step": 8684 }, { "epoch": 0.22300644658100113, "grad_norm": 0.890625, "learning_rate": 0.00019010905832279803, "loss": 0.971, "step": 8685 }, { "epoch": 0.22303212377692294, "grad_norm": 0.953125, "learning_rate": 0.0001901071224171961, "loss": 1.0062, "step": 8686 }, { "epoch": 0.22305780097284475, "grad_norm": 0.83203125, "learning_rate": 0.0001901051863320188, "loss": 1.182, "step": 8687 }, { "epoch": 0.22308347816876659, "grad_norm": 0.84765625, "learning_rate": 0.00019010325006727, "loss": 0.9731, "step": 8688 }, { "epoch": 0.2231091553646884, "grad_norm": 0.84375, "learning_rate": 0.0001901013136229536, "loss": 1.0277, "step": 8689 }, { "epoch": 0.22313483256061023, "grad_norm": 0.91796875, "learning_rate": 0.00019009937699907337, "loss": 1.0468, "step": 8690 }, { "epoch": 0.22316050975653204, "grad_norm": 0.84375, "learning_rate": 0.00019009744019563325, "loss": 1.0626, "step": 8691 }, { "epoch": 0.22318618695245385, "grad_norm": 0.78515625, "learning_rate": 0.00019009550321263705, "loss": 1.0533, "step": 8692 }, { "epoch": 0.22321186414837568, "grad_norm": 0.8046875, "learning_rate": 0.00019009356605008865, "loss": 1.0326, "step": 8693 }, { "epoch": 0.2232375413442975, "grad_norm": 0.79296875, "learning_rate": 0.00019009162870799192, "loss": 1.1371, "step": 8694 }, { "epoch": 0.22326321854021933, "grad_norm": 0.8125, "learning_rate": 0.00019008969118635075, "loss": 1.0384, "step": 8695 }, { "epoch": 0.22328889573614114, "grad_norm": 0.8125, "learning_rate": 0.00019008775348516892, "loss": 1.0173, "step": 8696 }, { "epoch": 0.22331457293206294, "grad_norm": 0.80859375, "learning_rate": 0.00019008581560445034, "loss": 1.0306, "step": 8697 }, { "epoch": 0.22334025012798478, "grad_norm": 0.79296875, "learning_rate": 0.00019008387754419886, "loss": 0.9187, "step": 8698 }, { "epoch": 0.2233659273239066, "grad_norm": 0.84375, "learning_rate": 0.00019008193930441838, "loss": 1.0013, "step": 8699 }, { "epoch": 0.22339160451982842, "grad_norm": 0.76953125, "learning_rate": 0.0001900800008851127, "loss": 0.9672, "step": 8700 }, { "epoch": 0.22341728171575023, "grad_norm": 0.76171875, "learning_rate": 0.0001900780622862857, "loss": 1.0392, "step": 8701 }, { "epoch": 0.22344295891167204, "grad_norm": 0.73828125, "learning_rate": 0.0001900761235079413, "loss": 1.0313, "step": 8702 }, { "epoch": 0.22346863610759388, "grad_norm": 0.8203125, "learning_rate": 0.0001900741845500833, "loss": 0.9229, "step": 8703 }, { "epoch": 0.22349431330351568, "grad_norm": 0.78515625, "learning_rate": 0.0001900722454127156, "loss": 1.1409, "step": 8704 }, { "epoch": 0.22351999049943752, "grad_norm": 0.75390625, "learning_rate": 0.00019007030609584206, "loss": 0.8917, "step": 8705 }, { "epoch": 0.22354566769535933, "grad_norm": 0.82421875, "learning_rate": 0.00019006836659946652, "loss": 1.072, "step": 8706 }, { "epoch": 0.22357134489128114, "grad_norm": 0.7734375, "learning_rate": 0.00019006642692359288, "loss": 1.0891, "step": 8707 }, { "epoch": 0.22359702208720297, "grad_norm": 0.8359375, "learning_rate": 0.00019006448706822496, "loss": 1.0279, "step": 8708 }, { "epoch": 0.22362269928312478, "grad_norm": 0.9765625, "learning_rate": 0.00019006254703336667, "loss": 0.9834, "step": 8709 }, { "epoch": 0.22364837647904662, "grad_norm": 0.75, "learning_rate": 0.00019006060681902182, "loss": 0.9832, "step": 8710 }, { "epoch": 0.22367405367496843, "grad_norm": 0.78125, "learning_rate": 0.00019005866642519436, "loss": 1.1563, "step": 8711 }, { "epoch": 0.22369973087089023, "grad_norm": 0.79296875, "learning_rate": 0.0001900567258518881, "loss": 1.1457, "step": 8712 }, { "epoch": 0.22372540806681207, "grad_norm": 0.8046875, "learning_rate": 0.00019005478509910692, "loss": 0.9978, "step": 8713 }, { "epoch": 0.22375108526273388, "grad_norm": 0.8515625, "learning_rate": 0.00019005284416685468, "loss": 1.0162, "step": 8714 }, { "epoch": 0.22377676245865571, "grad_norm": 0.82421875, "learning_rate": 0.00019005090305513527, "loss": 0.9262, "step": 8715 }, { "epoch": 0.22380243965457752, "grad_norm": 0.8984375, "learning_rate": 0.00019004896176395253, "loss": 1.3515, "step": 8716 }, { "epoch": 0.22382811685049933, "grad_norm": 0.76171875, "learning_rate": 0.00019004702029331037, "loss": 1.0841, "step": 8717 }, { "epoch": 0.22385379404642117, "grad_norm": 1.03125, "learning_rate": 0.0001900450786432126, "loss": 0.8998, "step": 8718 }, { "epoch": 0.22387947124234298, "grad_norm": 0.8046875, "learning_rate": 0.00019004313681366314, "loss": 0.9639, "step": 8719 }, { "epoch": 0.2239051484382648, "grad_norm": 0.83203125, "learning_rate": 0.0001900411948046658, "loss": 1.1406, "step": 8720 }, { "epoch": 0.22393082563418662, "grad_norm": 0.828125, "learning_rate": 0.00019003925261622454, "loss": 1.0159, "step": 8721 }, { "epoch": 0.22395650283010843, "grad_norm": 0.7890625, "learning_rate": 0.0001900373102483432, "loss": 0.9072, "step": 8722 }, { "epoch": 0.22398218002603026, "grad_norm": 0.828125, "learning_rate": 0.0001900353677010256, "loss": 1.0213, "step": 8723 }, { "epoch": 0.22400785722195207, "grad_norm": 0.81640625, "learning_rate": 0.00019003342497427565, "loss": 1.032, "step": 8724 }, { "epoch": 0.2240335344178739, "grad_norm": 0.859375, "learning_rate": 0.00019003148206809723, "loss": 1.0202, "step": 8725 }, { "epoch": 0.22405921161379572, "grad_norm": 0.7890625, "learning_rate": 0.0001900295389824942, "loss": 1.0106, "step": 8726 }, { "epoch": 0.22408488880971753, "grad_norm": 0.78515625, "learning_rate": 0.0001900275957174704, "loss": 0.9194, "step": 8727 }, { "epoch": 0.22411056600563936, "grad_norm": 0.875, "learning_rate": 0.00019002565227302975, "loss": 1.1077, "step": 8728 }, { "epoch": 0.22413624320156117, "grad_norm": 0.765625, "learning_rate": 0.00019002370864917612, "loss": 1.033, "step": 8729 }, { "epoch": 0.224161920397483, "grad_norm": 0.8203125, "learning_rate": 0.00019002176484591337, "loss": 1.1848, "step": 8730 }, { "epoch": 0.2241875975934048, "grad_norm": 0.80078125, "learning_rate": 0.00019001982086324534, "loss": 0.979, "step": 8731 }, { "epoch": 0.22421327478932662, "grad_norm": 0.73828125, "learning_rate": 0.00019001787670117596, "loss": 0.8966, "step": 8732 }, { "epoch": 0.22423895198524846, "grad_norm": 0.828125, "learning_rate": 0.00019001593235970908, "loss": 1.0538, "step": 8733 }, { "epoch": 0.22426462918117027, "grad_norm": 0.7578125, "learning_rate": 0.0001900139878388486, "loss": 0.945, "step": 8734 }, { "epoch": 0.2242903063770921, "grad_norm": 0.828125, "learning_rate": 0.00019001204313859836, "loss": 1.0126, "step": 8735 }, { "epoch": 0.2243159835730139, "grad_norm": 0.875, "learning_rate": 0.00019001009825896222, "loss": 0.9716, "step": 8736 }, { "epoch": 0.22434166076893572, "grad_norm": 0.79296875, "learning_rate": 0.0001900081531999441, "loss": 1.0068, "step": 8737 }, { "epoch": 0.22436733796485755, "grad_norm": 0.78125, "learning_rate": 0.00019000620796154788, "loss": 0.9529, "step": 8738 }, { "epoch": 0.22439301516077936, "grad_norm": 0.8671875, "learning_rate": 0.0001900042625437774, "loss": 0.9138, "step": 8739 }, { "epoch": 0.2244186923567012, "grad_norm": 0.75390625, "learning_rate": 0.0001900023169466366, "loss": 1.0348, "step": 8740 }, { "epoch": 0.224444369552623, "grad_norm": 0.79296875, "learning_rate": 0.00019000037117012926, "loss": 1.0168, "step": 8741 }, { "epoch": 0.22447004674854482, "grad_norm": 0.78515625, "learning_rate": 0.00018999842521425934, "loss": 0.9948, "step": 8742 }, { "epoch": 0.22449572394446665, "grad_norm": 0.85546875, "learning_rate": 0.00018999647907903065, "loss": 1.0397, "step": 8743 }, { "epoch": 0.22452140114038846, "grad_norm": 0.80078125, "learning_rate": 0.00018999453276444714, "loss": 0.9386, "step": 8744 }, { "epoch": 0.2245470783363103, "grad_norm": 0.83203125, "learning_rate": 0.00018999258627051265, "loss": 1.0861, "step": 8745 }, { "epoch": 0.2245727555322321, "grad_norm": 0.74609375, "learning_rate": 0.00018999063959723107, "loss": 1.044, "step": 8746 }, { "epoch": 0.2245984327281539, "grad_norm": 0.82421875, "learning_rate": 0.00018998869274460626, "loss": 0.9908, "step": 8747 }, { "epoch": 0.22462410992407575, "grad_norm": 0.84765625, "learning_rate": 0.00018998674571264212, "loss": 1.1096, "step": 8748 }, { "epoch": 0.22464978711999756, "grad_norm": 0.7890625, "learning_rate": 0.0001899847985013425, "loss": 1.0982, "step": 8749 }, { "epoch": 0.2246754643159194, "grad_norm": 0.8828125, "learning_rate": 0.00018998285111071134, "loss": 1.1614, "step": 8750 }, { "epoch": 0.2247011415118412, "grad_norm": 0.82421875, "learning_rate": 0.0001899809035407525, "loss": 1.0216, "step": 8751 }, { "epoch": 0.224726818707763, "grad_norm": 0.7578125, "learning_rate": 0.00018997895579146978, "loss": 1.0333, "step": 8752 }, { "epoch": 0.22475249590368485, "grad_norm": 0.75390625, "learning_rate": 0.00018997700786286718, "loss": 1.1013, "step": 8753 }, { "epoch": 0.22477817309960665, "grad_norm": 0.83984375, "learning_rate": 0.00018997505975494852, "loss": 1.0576, "step": 8754 }, { "epoch": 0.2248038502955285, "grad_norm": 0.765625, "learning_rate": 0.0001899731114677177, "loss": 0.9128, "step": 8755 }, { "epoch": 0.2248295274914503, "grad_norm": 0.7578125, "learning_rate": 0.0001899711630011786, "loss": 0.8875, "step": 8756 }, { "epoch": 0.2248552046873721, "grad_norm": 0.71484375, "learning_rate": 0.0001899692143553351, "loss": 0.9186, "step": 8757 }, { "epoch": 0.22488088188329394, "grad_norm": 0.7734375, "learning_rate": 0.00018996726553019105, "loss": 1.0116, "step": 8758 }, { "epoch": 0.22490655907921575, "grad_norm": 0.796875, "learning_rate": 0.00018996531652575038, "loss": 1.0279, "step": 8759 }, { "epoch": 0.2249322362751376, "grad_norm": 0.81640625, "learning_rate": 0.00018996336734201696, "loss": 0.9048, "step": 8760 }, { "epoch": 0.2249579134710594, "grad_norm": 0.79296875, "learning_rate": 0.0001899614179789947, "loss": 1.0751, "step": 8761 }, { "epoch": 0.2249835906669812, "grad_norm": 0.76953125, "learning_rate": 0.00018995946843668743, "loss": 0.9742, "step": 8762 }, { "epoch": 0.22500926786290304, "grad_norm": 0.87109375, "learning_rate": 0.00018995751871509907, "loss": 1.0668, "step": 8763 }, { "epoch": 0.22503494505882485, "grad_norm": 0.81640625, "learning_rate": 0.00018995556881423354, "loss": 1.0046, "step": 8764 }, { "epoch": 0.22506062225474668, "grad_norm": 0.80859375, "learning_rate": 0.00018995361873409464, "loss": 1.0267, "step": 8765 }, { "epoch": 0.2250862994506685, "grad_norm": 0.85546875, "learning_rate": 0.00018995166847468633, "loss": 1.1304, "step": 8766 }, { "epoch": 0.2251119766465903, "grad_norm": 0.82421875, "learning_rate": 0.00018994971803601245, "loss": 0.9087, "step": 8767 }, { "epoch": 0.22513765384251214, "grad_norm": 0.7421875, "learning_rate": 0.0001899477674180769, "loss": 1.0053, "step": 8768 }, { "epoch": 0.22516333103843394, "grad_norm": 0.75390625, "learning_rate": 0.0001899458166208836, "loss": 1.0153, "step": 8769 }, { "epoch": 0.22518900823435578, "grad_norm": 0.84375, "learning_rate": 0.00018994386564443643, "loss": 1.0753, "step": 8770 }, { "epoch": 0.2252146854302776, "grad_norm": 0.9140625, "learning_rate": 0.00018994191448873924, "loss": 1.1601, "step": 8771 }, { "epoch": 0.2252403626261994, "grad_norm": 0.7890625, "learning_rate": 0.00018993996315379592, "loss": 0.9589, "step": 8772 }, { "epoch": 0.22526603982212123, "grad_norm": 0.75390625, "learning_rate": 0.00018993801163961037, "loss": 0.9481, "step": 8773 }, { "epoch": 0.22529171701804304, "grad_norm": 0.82421875, "learning_rate": 0.00018993605994618653, "loss": 1.111, "step": 8774 }, { "epoch": 0.22531739421396488, "grad_norm": 0.796875, "learning_rate": 0.00018993410807352824, "loss": 0.9544, "step": 8775 }, { "epoch": 0.22534307140988669, "grad_norm": 0.796875, "learning_rate": 0.00018993215602163937, "loss": 1.1313, "step": 8776 }, { "epoch": 0.2253687486058085, "grad_norm": 0.8203125, "learning_rate": 0.00018993020379052384, "loss": 1.0992, "step": 8777 }, { "epoch": 0.22539442580173033, "grad_norm": 0.80078125, "learning_rate": 0.00018992825138018556, "loss": 1.0888, "step": 8778 }, { "epoch": 0.22542010299765214, "grad_norm": 0.8125, "learning_rate": 0.00018992629879062837, "loss": 1.0451, "step": 8779 }, { "epoch": 0.22544578019357395, "grad_norm": 0.80078125, "learning_rate": 0.0001899243460218562, "loss": 1.0566, "step": 8780 }, { "epoch": 0.22547145738949578, "grad_norm": 0.76953125, "learning_rate": 0.00018992239307387293, "loss": 1.0432, "step": 8781 }, { "epoch": 0.2254971345854176, "grad_norm": 0.83984375, "learning_rate": 0.00018992043994668246, "loss": 1.0575, "step": 8782 }, { "epoch": 0.22552281178133943, "grad_norm": 0.8046875, "learning_rate": 0.0001899184866402887, "loss": 1.0322, "step": 8783 }, { "epoch": 0.22554848897726124, "grad_norm": 0.75390625, "learning_rate": 0.00018991653315469548, "loss": 0.958, "step": 8784 }, { "epoch": 0.22557416617318304, "grad_norm": 0.73828125, "learning_rate": 0.00018991457948990673, "loss": 0.9287, "step": 8785 }, { "epoch": 0.22559984336910488, "grad_norm": 0.7890625, "learning_rate": 0.00018991262564592635, "loss": 1.0076, "step": 8786 }, { "epoch": 0.2256255205650267, "grad_norm": 0.78125, "learning_rate": 0.00018991067162275825, "loss": 1.2062, "step": 8787 }, { "epoch": 0.22565119776094852, "grad_norm": 0.78125, "learning_rate": 0.0001899087174204063, "loss": 1.04, "step": 8788 }, { "epoch": 0.22567687495687033, "grad_norm": 0.8125, "learning_rate": 0.00018990676303887438, "loss": 1.112, "step": 8789 }, { "epoch": 0.22570255215279214, "grad_norm": 0.8203125, "learning_rate": 0.0001899048084781664, "loss": 1.0638, "step": 8790 }, { "epoch": 0.22572822934871398, "grad_norm": 0.75, "learning_rate": 0.00018990285373828627, "loss": 0.9877, "step": 8791 }, { "epoch": 0.22575390654463579, "grad_norm": 0.73828125, "learning_rate": 0.00018990089881923788, "loss": 1.023, "step": 8792 }, { "epoch": 0.22577958374055762, "grad_norm": 0.8125, "learning_rate": 0.0001898989437210251, "loss": 1.0864, "step": 8793 }, { "epoch": 0.22580526093647943, "grad_norm": 0.84375, "learning_rate": 0.00018989698844365188, "loss": 0.8876, "step": 8794 }, { "epoch": 0.22583093813240124, "grad_norm": 0.81640625, "learning_rate": 0.00018989503298712202, "loss": 1.1076, "step": 8795 }, { "epoch": 0.22585661532832307, "grad_norm": 0.76171875, "learning_rate": 0.00018989307735143952, "loss": 0.9807, "step": 8796 }, { "epoch": 0.22588229252424488, "grad_norm": 0.8828125, "learning_rate": 0.00018989112153660825, "loss": 1.025, "step": 8797 }, { "epoch": 0.22590796972016672, "grad_norm": 0.8359375, "learning_rate": 0.00018988916554263206, "loss": 1.1281, "step": 8798 }, { "epoch": 0.22593364691608853, "grad_norm": 0.828125, "learning_rate": 0.0001898872093695149, "loss": 1.1343, "step": 8799 }, { "epoch": 0.22595932411201033, "grad_norm": 0.91796875, "learning_rate": 0.00018988525301726067, "loss": 0.9511, "step": 8800 }, { "epoch": 0.22598500130793217, "grad_norm": 0.7578125, "learning_rate": 0.00018988329648587324, "loss": 1.2331, "step": 8801 }, { "epoch": 0.22601067850385398, "grad_norm": 0.7890625, "learning_rate": 0.0001898813397753565, "loss": 0.9939, "step": 8802 }, { "epoch": 0.22603635569977581, "grad_norm": 0.828125, "learning_rate": 0.00018987938288571437, "loss": 1.0031, "step": 8803 }, { "epoch": 0.22606203289569762, "grad_norm": 0.8515625, "learning_rate": 0.0001898774258169508, "loss": 0.9794, "step": 8804 }, { "epoch": 0.22608771009161943, "grad_norm": 0.7734375, "learning_rate": 0.00018987546856906958, "loss": 1.0883, "step": 8805 }, { "epoch": 0.22611338728754127, "grad_norm": 0.85546875, "learning_rate": 0.00018987351114207467, "loss": 0.9953, "step": 8806 }, { "epoch": 0.22613906448346308, "grad_norm": 0.90234375, "learning_rate": 0.00018987155353597, "loss": 1.0042, "step": 8807 }, { "epoch": 0.2261647416793849, "grad_norm": 0.80078125, "learning_rate": 0.00018986959575075944, "loss": 1.0444, "step": 8808 }, { "epoch": 0.22619041887530672, "grad_norm": 0.8671875, "learning_rate": 0.00018986763778644687, "loss": 1.0826, "step": 8809 }, { "epoch": 0.22621609607122853, "grad_norm": 0.7734375, "learning_rate": 0.00018986567964303627, "loss": 0.9354, "step": 8810 }, { "epoch": 0.22624177326715036, "grad_norm": 0.8203125, "learning_rate": 0.00018986372132053143, "loss": 1.12, "step": 8811 }, { "epoch": 0.22626745046307217, "grad_norm": 0.80078125, "learning_rate": 0.00018986176281893632, "loss": 0.9835, "step": 8812 }, { "epoch": 0.226293127658994, "grad_norm": 0.8203125, "learning_rate": 0.00018985980413825483, "loss": 1.1239, "step": 8813 }, { "epoch": 0.22631880485491582, "grad_norm": 0.72265625, "learning_rate": 0.00018985784527849087, "loss": 0.884, "step": 8814 }, { "epoch": 0.22634448205083763, "grad_norm": 0.79296875, "learning_rate": 0.00018985588623964835, "loss": 1.114, "step": 8815 }, { "epoch": 0.22637015924675946, "grad_norm": 0.81640625, "learning_rate": 0.0001898539270217312, "loss": 1.049, "step": 8816 }, { "epoch": 0.22639583644268127, "grad_norm": 0.78515625, "learning_rate": 0.00018985196762474327, "loss": 1.09, "step": 8817 }, { "epoch": 0.2264215136386031, "grad_norm": 0.78125, "learning_rate": 0.00018985000804868846, "loss": 0.8892, "step": 8818 }, { "epoch": 0.22644719083452491, "grad_norm": 0.77734375, "learning_rate": 0.0001898480482935707, "loss": 1.0261, "step": 8819 }, { "epoch": 0.22647286803044672, "grad_norm": 0.84375, "learning_rate": 0.0001898460883593939, "loss": 1.0447, "step": 8820 }, { "epoch": 0.22649854522636856, "grad_norm": 0.81640625, "learning_rate": 0.00018984412824616197, "loss": 1.1194, "step": 8821 }, { "epoch": 0.22652422242229037, "grad_norm": 0.7734375, "learning_rate": 0.00018984216795387882, "loss": 1.0712, "step": 8822 }, { "epoch": 0.2265498996182122, "grad_norm": 0.83203125, "learning_rate": 0.00018984020748254833, "loss": 0.9954, "step": 8823 }, { "epoch": 0.226575576814134, "grad_norm": 0.8203125, "learning_rate": 0.00018983824683217442, "loss": 1.0679, "step": 8824 }, { "epoch": 0.22660125401005582, "grad_norm": 0.78125, "learning_rate": 0.000189836286002761, "loss": 1.0742, "step": 8825 }, { "epoch": 0.22662693120597766, "grad_norm": 0.7890625, "learning_rate": 0.00018983432499431198, "loss": 1.0728, "step": 8826 }, { "epoch": 0.22665260840189946, "grad_norm": 0.8125, "learning_rate": 0.00018983236380683125, "loss": 1.0125, "step": 8827 }, { "epoch": 0.2266782855978213, "grad_norm": 0.8515625, "learning_rate": 0.00018983040244032276, "loss": 0.9505, "step": 8828 }, { "epoch": 0.2267039627937431, "grad_norm": 0.8046875, "learning_rate": 0.0001898284408947904, "loss": 0.9066, "step": 8829 }, { "epoch": 0.22672963998966492, "grad_norm": 0.8046875, "learning_rate": 0.00018982647917023803, "loss": 1.009, "step": 8830 }, { "epoch": 0.22675531718558675, "grad_norm": 0.90234375, "learning_rate": 0.00018982451726666963, "loss": 0.9769, "step": 8831 }, { "epoch": 0.22678099438150856, "grad_norm": 0.8984375, "learning_rate": 0.00018982255518408908, "loss": 0.9791, "step": 8832 }, { "epoch": 0.2268066715774304, "grad_norm": 0.7421875, "learning_rate": 0.00018982059292250026, "loss": 0.9761, "step": 8833 }, { "epoch": 0.2268323487733522, "grad_norm": 0.77734375, "learning_rate": 0.00018981863048190712, "loss": 1.0265, "step": 8834 }, { "epoch": 0.226858025969274, "grad_norm": 0.8046875, "learning_rate": 0.0001898166678623136, "loss": 1.121, "step": 8835 }, { "epoch": 0.22688370316519585, "grad_norm": 0.953125, "learning_rate": 0.00018981470506372356, "loss": 1.009, "step": 8836 }, { "epoch": 0.22690938036111766, "grad_norm": 0.83984375, "learning_rate": 0.00018981274208614091, "loss": 0.9643, "step": 8837 }, { "epoch": 0.2269350575570395, "grad_norm": 0.8203125, "learning_rate": 0.0001898107789295696, "loss": 1.1627, "step": 8838 }, { "epoch": 0.2269607347529613, "grad_norm": 0.7734375, "learning_rate": 0.00018980881559401352, "loss": 1.0139, "step": 8839 }, { "epoch": 0.2269864119488831, "grad_norm": 0.80078125, "learning_rate": 0.00018980685207947655, "loss": 0.9145, "step": 8840 }, { "epoch": 0.22701208914480495, "grad_norm": 0.85546875, "learning_rate": 0.00018980488838596265, "loss": 1.1105, "step": 8841 }, { "epoch": 0.22703776634072675, "grad_norm": 0.79296875, "learning_rate": 0.00018980292451347571, "loss": 1.0992, "step": 8842 }, { "epoch": 0.2270634435366486, "grad_norm": 0.859375, "learning_rate": 0.00018980096046201968, "loss": 0.973, "step": 8843 }, { "epoch": 0.2270891207325704, "grad_norm": 0.82421875, "learning_rate": 0.00018979899623159844, "loss": 0.9832, "step": 8844 }, { "epoch": 0.2271147979284922, "grad_norm": 0.73046875, "learning_rate": 0.0001897970318222159, "loss": 0.954, "step": 8845 }, { "epoch": 0.22714047512441404, "grad_norm": 0.77734375, "learning_rate": 0.000189795067233876, "loss": 1.1722, "step": 8846 }, { "epoch": 0.22716615232033585, "grad_norm": 0.76171875, "learning_rate": 0.00018979310246658264, "loss": 1.0561, "step": 8847 }, { "epoch": 0.2271918295162577, "grad_norm": 0.86328125, "learning_rate": 0.00018979113752033973, "loss": 1.1306, "step": 8848 }, { "epoch": 0.2272175067121795, "grad_norm": 0.81640625, "learning_rate": 0.0001897891723951512, "loss": 0.9903, "step": 8849 }, { "epoch": 0.2272431839081013, "grad_norm": 0.76953125, "learning_rate": 0.00018978720709102097, "loss": 0.9801, "step": 8850 }, { "epoch": 0.22726886110402314, "grad_norm": 0.8359375, "learning_rate": 0.00018978524160795292, "loss": 0.9881, "step": 8851 }, { "epoch": 0.22729453829994495, "grad_norm": 0.8125, "learning_rate": 0.00018978327594595102, "loss": 1.0035, "step": 8852 }, { "epoch": 0.22732021549586678, "grad_norm": 0.78125, "learning_rate": 0.00018978131010501917, "loss": 0.9113, "step": 8853 }, { "epoch": 0.2273458926917886, "grad_norm": 0.80078125, "learning_rate": 0.00018977934408516122, "loss": 1.0313, "step": 8854 }, { "epoch": 0.2273715698877104, "grad_norm": 0.89453125, "learning_rate": 0.00018977737788638123, "loss": 1.0299, "step": 8855 }, { "epoch": 0.22739724708363224, "grad_norm": 0.75, "learning_rate": 0.00018977541150868297, "loss": 0.8394, "step": 8856 }, { "epoch": 0.22742292427955405, "grad_norm": 0.76171875, "learning_rate": 0.00018977344495207046, "loss": 0.9794, "step": 8857 }, { "epoch": 0.22744860147547588, "grad_norm": 0.72265625, "learning_rate": 0.00018977147821654756, "loss": 0.9581, "step": 8858 }, { "epoch": 0.2274742786713977, "grad_norm": 0.78125, "learning_rate": 0.0001897695113021182, "loss": 1.0183, "step": 8859 }, { "epoch": 0.2274999558673195, "grad_norm": 0.82421875, "learning_rate": 0.00018976754420878633, "loss": 1.051, "step": 8860 }, { "epoch": 0.22752563306324133, "grad_norm": 0.78125, "learning_rate": 0.00018976557693655587, "loss": 0.9713, "step": 8861 }, { "epoch": 0.22755131025916314, "grad_norm": 0.83203125, "learning_rate": 0.0001897636094854307, "loss": 0.9305, "step": 8862 }, { "epoch": 0.22757698745508498, "grad_norm": 0.83203125, "learning_rate": 0.0001897616418554148, "loss": 1.0544, "step": 8863 }, { "epoch": 0.2276026646510068, "grad_norm": 0.98828125, "learning_rate": 0.000189759674046512, "loss": 1.1265, "step": 8864 }, { "epoch": 0.2276283418469286, "grad_norm": 0.81640625, "learning_rate": 0.00018975770605872633, "loss": 1.0623, "step": 8865 }, { "epoch": 0.22765401904285043, "grad_norm": 0.79296875, "learning_rate": 0.00018975573789206164, "loss": 1.0524, "step": 8866 }, { "epoch": 0.22767969623877224, "grad_norm": 0.82421875, "learning_rate": 0.00018975376954652185, "loss": 0.9223, "step": 8867 }, { "epoch": 0.22770537343469407, "grad_norm": 0.8359375, "learning_rate": 0.00018975180102211094, "loss": 1.092, "step": 8868 }, { "epoch": 0.22773105063061588, "grad_norm": 0.80078125, "learning_rate": 0.0001897498323188328, "loss": 0.9844, "step": 8869 }, { "epoch": 0.2277567278265377, "grad_norm": 0.765625, "learning_rate": 0.00018974786343669132, "loss": 0.9109, "step": 8870 }, { "epoch": 0.22778240502245953, "grad_norm": 0.80078125, "learning_rate": 0.00018974589437569046, "loss": 0.9823, "step": 8871 }, { "epoch": 0.22780808221838134, "grad_norm": 0.8203125, "learning_rate": 0.00018974392513583415, "loss": 1.1015, "step": 8872 }, { "epoch": 0.22783375941430317, "grad_norm": 0.80859375, "learning_rate": 0.0001897419557171263, "loss": 1.0732, "step": 8873 }, { "epoch": 0.22785943661022498, "grad_norm": 0.8046875, "learning_rate": 0.00018973998611957087, "loss": 0.9321, "step": 8874 }, { "epoch": 0.2278851138061468, "grad_norm": 0.79296875, "learning_rate": 0.00018973801634317172, "loss": 0.9741, "step": 8875 }, { "epoch": 0.22791079100206862, "grad_norm": 0.77734375, "learning_rate": 0.0001897360463879328, "loss": 0.9637, "step": 8876 }, { "epoch": 0.22793646819799043, "grad_norm": 0.8203125, "learning_rate": 0.00018973407625385807, "loss": 0.9487, "step": 8877 }, { "epoch": 0.22796214539391227, "grad_norm": 0.7734375, "learning_rate": 0.0001897321059409514, "loss": 0.8689, "step": 8878 }, { "epoch": 0.22798782258983408, "grad_norm": 0.8671875, "learning_rate": 0.00018973013544921677, "loss": 0.9884, "step": 8879 }, { "epoch": 0.22801349978575589, "grad_norm": 0.828125, "learning_rate": 0.0001897281647786581, "loss": 1.0183, "step": 8880 }, { "epoch": 0.22803917698167772, "grad_norm": 0.73046875, "learning_rate": 0.00018972619392927927, "loss": 0.9755, "step": 8881 }, { "epoch": 0.22806485417759953, "grad_norm": 0.828125, "learning_rate": 0.00018972422290108426, "loss": 1.0097, "step": 8882 }, { "epoch": 0.22809053137352137, "grad_norm": 0.86328125, "learning_rate": 0.000189722251694077, "loss": 1.1014, "step": 8883 }, { "epoch": 0.22811620856944317, "grad_norm": 0.85546875, "learning_rate": 0.00018972028030826138, "loss": 1.1564, "step": 8884 }, { "epoch": 0.22814188576536498, "grad_norm": 0.82421875, "learning_rate": 0.00018971830874364135, "loss": 1.085, "step": 8885 }, { "epoch": 0.22816756296128682, "grad_norm": 0.86328125, "learning_rate": 0.0001897163370002208, "loss": 1.0596, "step": 8886 }, { "epoch": 0.22819324015720863, "grad_norm": 0.83984375, "learning_rate": 0.00018971436507800372, "loss": 1.0751, "step": 8887 }, { "epoch": 0.22821891735313046, "grad_norm": 0.82421875, "learning_rate": 0.00018971239297699405, "loss": 0.8939, "step": 8888 }, { "epoch": 0.22824459454905227, "grad_norm": 0.76953125, "learning_rate": 0.0001897104206971956, "loss": 0.904, "step": 8889 }, { "epoch": 0.22827027174497408, "grad_norm": 0.79296875, "learning_rate": 0.00018970844823861245, "loss": 1.0657, "step": 8890 }, { "epoch": 0.22829594894089592, "grad_norm": 0.80078125, "learning_rate": 0.00018970647560124845, "loss": 1.0232, "step": 8891 }, { "epoch": 0.22832162613681772, "grad_norm": 0.828125, "learning_rate": 0.00018970450278510753, "loss": 1.0342, "step": 8892 }, { "epoch": 0.22834730333273956, "grad_norm": 0.8046875, "learning_rate": 0.00018970252979019368, "loss": 1.1209, "step": 8893 }, { "epoch": 0.22837298052866137, "grad_norm": 0.80859375, "learning_rate": 0.00018970055661651073, "loss": 0.9712, "step": 8894 }, { "epoch": 0.22839865772458318, "grad_norm": 0.828125, "learning_rate": 0.00018969858326406272, "loss": 1.0546, "step": 8895 }, { "epoch": 0.228424334920505, "grad_norm": 0.80859375, "learning_rate": 0.00018969660973285356, "loss": 1.1077, "step": 8896 }, { "epoch": 0.22845001211642682, "grad_norm": 0.83203125, "learning_rate": 0.0001896946360228871, "loss": 0.932, "step": 8897 }, { "epoch": 0.22847568931234866, "grad_norm": 0.76171875, "learning_rate": 0.00018969266213416733, "loss": 0.9316, "step": 8898 }, { "epoch": 0.22850136650827046, "grad_norm": 0.82421875, "learning_rate": 0.00018969068806669824, "loss": 1.0022, "step": 8899 }, { "epoch": 0.22852704370419227, "grad_norm": 0.85546875, "learning_rate": 0.0001896887138204837, "loss": 1.039, "step": 8900 }, { "epoch": 0.2285527209001141, "grad_norm": 0.73828125, "learning_rate": 0.00018968673939552763, "loss": 0.9729, "step": 8901 }, { "epoch": 0.22857839809603592, "grad_norm": 0.8828125, "learning_rate": 0.000189684764791834, "loss": 1.0312, "step": 8902 }, { "epoch": 0.22860407529195775, "grad_norm": 0.75, "learning_rate": 0.00018968279000940673, "loss": 0.918, "step": 8903 }, { "epoch": 0.22862975248787956, "grad_norm": 0.796875, "learning_rate": 0.00018968081504824977, "loss": 1.1112, "step": 8904 }, { "epoch": 0.22865542968380137, "grad_norm": 0.83984375, "learning_rate": 0.00018967883990836701, "loss": 0.8819, "step": 8905 }, { "epoch": 0.2286811068797232, "grad_norm": 0.8046875, "learning_rate": 0.00018967686458976247, "loss": 0.844, "step": 8906 }, { "epoch": 0.22870678407564501, "grad_norm": 0.828125, "learning_rate": 0.00018967488909244002, "loss": 0.9621, "step": 8907 }, { "epoch": 0.22873246127156685, "grad_norm": 0.82421875, "learning_rate": 0.0001896729134164036, "loss": 1.0605, "step": 8908 }, { "epoch": 0.22875813846748866, "grad_norm": 0.76953125, "learning_rate": 0.0001896709375616572, "loss": 1.044, "step": 8909 }, { "epoch": 0.22878381566341047, "grad_norm": 0.84765625, "learning_rate": 0.0001896689615282047, "loss": 0.9853, "step": 8910 }, { "epoch": 0.2288094928593323, "grad_norm": 0.90234375, "learning_rate": 0.00018966698531605006, "loss": 1.1068, "step": 8911 }, { "epoch": 0.2288351700552541, "grad_norm": 0.7734375, "learning_rate": 0.00018966500892519722, "loss": 1.0383, "step": 8912 }, { "epoch": 0.22886084725117595, "grad_norm": 0.8359375, "learning_rate": 0.00018966303235565013, "loss": 0.9685, "step": 8913 }, { "epoch": 0.22888652444709776, "grad_norm": 0.74609375, "learning_rate": 0.0001896610556074127, "loss": 0.8843, "step": 8914 }, { "epoch": 0.22891220164301956, "grad_norm": 0.80078125, "learning_rate": 0.0001896590786804889, "loss": 0.9646, "step": 8915 }, { "epoch": 0.2289378788389414, "grad_norm": 0.8203125, "learning_rate": 0.00018965710157488262, "loss": 1.0226, "step": 8916 }, { "epoch": 0.2289635560348632, "grad_norm": 0.82421875, "learning_rate": 0.00018965512429059786, "loss": 1.1429, "step": 8917 }, { "epoch": 0.22898923323078504, "grad_norm": 0.7109375, "learning_rate": 0.00018965314682763856, "loss": 0.9742, "step": 8918 }, { "epoch": 0.22901491042670685, "grad_norm": 0.80078125, "learning_rate": 0.0001896511691860086, "loss": 1.0595, "step": 8919 }, { "epoch": 0.22904058762262866, "grad_norm": 0.77734375, "learning_rate": 0.00018964919136571195, "loss": 1.0131, "step": 8920 }, { "epoch": 0.2290662648185505, "grad_norm": 0.7578125, "learning_rate": 0.0001896472133667526, "loss": 1.1305, "step": 8921 }, { "epoch": 0.2290919420144723, "grad_norm": 0.85546875, "learning_rate": 0.00018964523518913442, "loss": 1.0942, "step": 8922 }, { "epoch": 0.22911761921039414, "grad_norm": 0.8203125, "learning_rate": 0.0001896432568328614, "loss": 1.0395, "step": 8923 }, { "epoch": 0.22914329640631595, "grad_norm": 0.85546875, "learning_rate": 0.00018964127829793745, "loss": 1.1178, "step": 8924 }, { "epoch": 0.22916897360223776, "grad_norm": 0.8046875, "learning_rate": 0.00018963929958436656, "loss": 0.9845, "step": 8925 }, { "epoch": 0.2291946507981596, "grad_norm": 0.94921875, "learning_rate": 0.00018963732069215264, "loss": 1.1037, "step": 8926 }, { "epoch": 0.2292203279940814, "grad_norm": 0.8046875, "learning_rate": 0.00018963534162129962, "loss": 1.0408, "step": 8927 }, { "epoch": 0.22924600519000324, "grad_norm": 0.8515625, "learning_rate": 0.00018963336237181144, "loss": 1.2059, "step": 8928 }, { "epoch": 0.22927168238592505, "grad_norm": 0.8359375, "learning_rate": 0.00018963138294369208, "loss": 1.0435, "step": 8929 }, { "epoch": 0.22929735958184685, "grad_norm": 0.90234375, "learning_rate": 0.00018962940333694546, "loss": 0.9674, "step": 8930 }, { "epoch": 0.2293230367777687, "grad_norm": 0.77734375, "learning_rate": 0.00018962742355157556, "loss": 0.9464, "step": 8931 }, { "epoch": 0.2293487139736905, "grad_norm": 0.7734375, "learning_rate": 0.0001896254435875863, "loss": 0.938, "step": 8932 }, { "epoch": 0.22937439116961233, "grad_norm": 0.78125, "learning_rate": 0.00018962346344498162, "loss": 0.912, "step": 8933 }, { "epoch": 0.22940006836553414, "grad_norm": 0.8046875, "learning_rate": 0.00018962148312376547, "loss": 0.952, "step": 8934 }, { "epoch": 0.22942574556145595, "grad_norm": 0.84765625, "learning_rate": 0.00018961950262394182, "loss": 0.9873, "step": 8935 }, { "epoch": 0.2294514227573778, "grad_norm": 0.734375, "learning_rate": 0.00018961752194551456, "loss": 0.8639, "step": 8936 }, { "epoch": 0.2294770999532996, "grad_norm": 0.859375, "learning_rate": 0.0001896155410884877, "loss": 1.0134, "step": 8937 }, { "epoch": 0.22950277714922143, "grad_norm": 0.85546875, "learning_rate": 0.00018961356005286516, "loss": 1.0244, "step": 8938 }, { "epoch": 0.22952845434514324, "grad_norm": 0.80859375, "learning_rate": 0.00018961157883865088, "loss": 1.0612, "step": 8939 }, { "epoch": 0.22955413154106505, "grad_norm": 0.8359375, "learning_rate": 0.0001896095974458488, "loss": 1.0665, "step": 8940 }, { "epoch": 0.22957980873698688, "grad_norm": 0.8125, "learning_rate": 0.00018960761587446295, "loss": 1.0216, "step": 8941 }, { "epoch": 0.2296054859329087, "grad_norm": 0.8125, "learning_rate": 0.00018960563412449716, "loss": 1.0076, "step": 8942 }, { "epoch": 0.22963116312883053, "grad_norm": 0.828125, "learning_rate": 0.00018960365219595544, "loss": 0.8687, "step": 8943 }, { "epoch": 0.22965684032475234, "grad_norm": 0.7734375, "learning_rate": 0.00018960167008884174, "loss": 1.0093, "step": 8944 }, { "epoch": 0.22968251752067415, "grad_norm": 0.78125, "learning_rate": 0.00018959968780316003, "loss": 0.9663, "step": 8945 }, { "epoch": 0.22970819471659598, "grad_norm": 0.8046875, "learning_rate": 0.00018959770533891418, "loss": 1.0005, "step": 8946 }, { "epoch": 0.2297338719125178, "grad_norm": 0.89453125, "learning_rate": 0.00018959572269610826, "loss": 1.0422, "step": 8947 }, { "epoch": 0.22975954910843963, "grad_norm": 0.7734375, "learning_rate": 0.00018959373987474613, "loss": 0.9981, "step": 8948 }, { "epoch": 0.22978522630436143, "grad_norm": 0.76171875, "learning_rate": 0.00018959175687483176, "loss": 1.0827, "step": 8949 }, { "epoch": 0.22981090350028324, "grad_norm": 0.7734375, "learning_rate": 0.00018958977369636911, "loss": 0.9647, "step": 8950 }, { "epoch": 0.22983658069620508, "grad_norm": 0.80078125, "learning_rate": 0.00018958779033936214, "loss": 1.0662, "step": 8951 }, { "epoch": 0.2298622578921269, "grad_norm": 0.76953125, "learning_rate": 0.00018958580680381482, "loss": 0.9272, "step": 8952 }, { "epoch": 0.22988793508804872, "grad_norm": 0.80859375, "learning_rate": 0.00018958382308973106, "loss": 1.0084, "step": 8953 }, { "epoch": 0.22991361228397053, "grad_norm": 0.80859375, "learning_rate": 0.00018958183919711482, "loss": 1.06, "step": 8954 }, { "epoch": 0.22993928947989234, "grad_norm": 0.81640625, "learning_rate": 0.00018957985512597008, "loss": 1.138, "step": 8955 }, { "epoch": 0.22996496667581418, "grad_norm": 0.796875, "learning_rate": 0.00018957787087630078, "loss": 1.0641, "step": 8956 }, { "epoch": 0.22999064387173598, "grad_norm": 0.79296875, "learning_rate": 0.0001895758864481109, "loss": 0.8776, "step": 8957 }, { "epoch": 0.23001632106765782, "grad_norm": 0.8125, "learning_rate": 0.0001895739018414043, "loss": 1.0399, "step": 8958 }, { "epoch": 0.23004199826357963, "grad_norm": 0.796875, "learning_rate": 0.00018957191705618506, "loss": 0.9733, "step": 8959 }, { "epoch": 0.23006767545950144, "grad_norm": 0.76953125, "learning_rate": 0.0001895699320924571, "loss": 1.0985, "step": 8960 }, { "epoch": 0.23009335265542327, "grad_norm": 0.85546875, "learning_rate": 0.0001895679469502243, "loss": 0.9247, "step": 8961 }, { "epoch": 0.23011902985134508, "grad_norm": 0.796875, "learning_rate": 0.0001895659616294907, "loss": 0.988, "step": 8962 }, { "epoch": 0.23014470704726692, "grad_norm": 0.8046875, "learning_rate": 0.00018956397613026021, "loss": 1.1132, "step": 8963 }, { "epoch": 0.23017038424318872, "grad_norm": 0.81640625, "learning_rate": 0.00018956199045253684, "loss": 1.1461, "step": 8964 }, { "epoch": 0.23019606143911053, "grad_norm": 0.7578125, "learning_rate": 0.00018956000459632448, "loss": 1.0227, "step": 8965 }, { "epoch": 0.23022173863503237, "grad_norm": 0.796875, "learning_rate": 0.00018955801856162716, "loss": 1.0508, "step": 8966 }, { "epoch": 0.23024741583095418, "grad_norm": 0.84765625, "learning_rate": 0.00018955603234844877, "loss": 1.0149, "step": 8967 }, { "epoch": 0.230273093026876, "grad_norm": 0.7890625, "learning_rate": 0.00018955404595679328, "loss": 0.9552, "step": 8968 }, { "epoch": 0.23029877022279782, "grad_norm": 0.80078125, "learning_rate": 0.0001895520593866647, "loss": 0.9952, "step": 8969 }, { "epoch": 0.23032444741871963, "grad_norm": 0.78515625, "learning_rate": 0.00018955007263806695, "loss": 0.875, "step": 8970 }, { "epoch": 0.23035012461464147, "grad_norm": 0.8359375, "learning_rate": 0.00018954808571100396, "loss": 1.0563, "step": 8971 }, { "epoch": 0.23037580181056327, "grad_norm": 0.7890625, "learning_rate": 0.00018954609860547978, "loss": 0.9924, "step": 8972 }, { "epoch": 0.2304014790064851, "grad_norm": 0.77734375, "learning_rate": 0.00018954411132149828, "loss": 0.9592, "step": 8973 }, { "epoch": 0.23042715620240692, "grad_norm": 0.80859375, "learning_rate": 0.00018954212385906348, "loss": 1.0597, "step": 8974 }, { "epoch": 0.23045283339832873, "grad_norm": 0.81640625, "learning_rate": 0.00018954013621817926, "loss": 1.0313, "step": 8975 }, { "epoch": 0.23047851059425056, "grad_norm": 0.7734375, "learning_rate": 0.0001895381483988497, "loss": 0.8651, "step": 8976 }, { "epoch": 0.23050418779017237, "grad_norm": 0.82421875, "learning_rate": 0.00018953616040107867, "loss": 1.009, "step": 8977 }, { "epoch": 0.2305298649860942, "grad_norm": 0.7421875, "learning_rate": 0.00018953417222487015, "loss": 0.8906, "step": 8978 }, { "epoch": 0.23055554218201602, "grad_norm": 0.796875, "learning_rate": 0.0001895321838702281, "loss": 1.0251, "step": 8979 }, { "epoch": 0.23058121937793782, "grad_norm": 0.8125, "learning_rate": 0.00018953019533715653, "loss": 0.9143, "step": 8980 }, { "epoch": 0.23060689657385966, "grad_norm": 0.80859375, "learning_rate": 0.00018952820662565935, "loss": 1.1185, "step": 8981 }, { "epoch": 0.23063257376978147, "grad_norm": 0.7109375, "learning_rate": 0.00018952621773574054, "loss": 0.9437, "step": 8982 }, { "epoch": 0.2306582509657033, "grad_norm": 0.8984375, "learning_rate": 0.00018952422866740407, "loss": 0.8499, "step": 8983 }, { "epoch": 0.2306839281616251, "grad_norm": 0.765625, "learning_rate": 0.0001895222394206539, "loss": 0.9095, "step": 8984 }, { "epoch": 0.23070960535754692, "grad_norm": 0.796875, "learning_rate": 0.000189520249995494, "loss": 0.9245, "step": 8985 }, { "epoch": 0.23073528255346876, "grad_norm": 0.79296875, "learning_rate": 0.00018951826039192833, "loss": 1.0767, "step": 8986 }, { "epoch": 0.23076095974939057, "grad_norm": 0.890625, "learning_rate": 0.00018951627060996085, "loss": 1.0851, "step": 8987 }, { "epoch": 0.23078663694531237, "grad_norm": 0.81640625, "learning_rate": 0.00018951428064959552, "loss": 1.0263, "step": 8988 }, { "epoch": 0.2308123141412342, "grad_norm": 0.8203125, "learning_rate": 0.00018951229051083633, "loss": 1.0942, "step": 8989 }, { "epoch": 0.23083799133715602, "grad_norm": 0.84765625, "learning_rate": 0.00018951030019368718, "loss": 1.1074, "step": 8990 }, { "epoch": 0.23086366853307785, "grad_norm": 0.8046875, "learning_rate": 0.00018950830969815218, "loss": 1.1021, "step": 8991 }, { "epoch": 0.23088934572899966, "grad_norm": 0.91796875, "learning_rate": 0.00018950631902423513, "loss": 0.9994, "step": 8992 }, { "epoch": 0.23091502292492147, "grad_norm": 0.81640625, "learning_rate": 0.00018950432817194007, "loss": 0.9022, "step": 8993 }, { "epoch": 0.2309407001208433, "grad_norm": 0.8671875, "learning_rate": 0.00018950233714127099, "loss": 0.9801, "step": 8994 }, { "epoch": 0.23096637731676511, "grad_norm": 0.76171875, "learning_rate": 0.00018950034593223186, "loss": 0.9094, "step": 8995 }, { "epoch": 0.23099205451268695, "grad_norm": 0.8125, "learning_rate": 0.0001894983545448266, "loss": 1.0292, "step": 8996 }, { "epoch": 0.23101773170860876, "grad_norm": 0.83984375, "learning_rate": 0.0001894963629790592, "loss": 1.0691, "step": 8997 }, { "epoch": 0.23104340890453057, "grad_norm": 0.78125, "learning_rate": 0.00018949437123493362, "loss": 1.1004, "step": 8998 }, { "epoch": 0.2310690861004524, "grad_norm": 0.76953125, "learning_rate": 0.00018949237931245388, "loss": 0.9116, "step": 8999 }, { "epoch": 0.2310947632963742, "grad_norm": 0.81640625, "learning_rate": 0.00018949038721162385, "loss": 1.037, "step": 9000 }, { "epoch": 0.2310947632963742, "eval_loss": 1.0149517059326172, "eval_model_preparation_time": 0.0065, "eval_runtime": 405.9124, "eval_samples_per_second": 24.636, "eval_steps_per_second": 0.771, "step": 9000 }, { "epoch": 0.23112044049229605, "grad_norm": 0.8359375, "learning_rate": 0.00018948839493244764, "loss": 1.0099, "step": 9001 }, { "epoch": 0.23114611768821786, "grad_norm": 0.78515625, "learning_rate": 0.0001894864024749291, "loss": 1.1245, "step": 9002 }, { "epoch": 0.23117179488413966, "grad_norm": 0.85546875, "learning_rate": 0.00018948440983907226, "loss": 0.9419, "step": 9003 }, { "epoch": 0.2311974720800615, "grad_norm": 0.84375, "learning_rate": 0.00018948241702488106, "loss": 1.0653, "step": 9004 }, { "epoch": 0.2312231492759833, "grad_norm": 0.78515625, "learning_rate": 0.00018948042403235947, "loss": 0.9153, "step": 9005 }, { "epoch": 0.23124882647190514, "grad_norm": 0.859375, "learning_rate": 0.0001894784308615115, "loss": 1.1629, "step": 9006 }, { "epoch": 0.23127450366782695, "grad_norm": 0.7421875, "learning_rate": 0.0001894764375123411, "loss": 0.9075, "step": 9007 }, { "epoch": 0.23130018086374876, "grad_norm": 0.79296875, "learning_rate": 0.00018947444398485224, "loss": 0.9354, "step": 9008 }, { "epoch": 0.2313258580596706, "grad_norm": 0.8125, "learning_rate": 0.0001894724502790489, "loss": 1.0043, "step": 9009 }, { "epoch": 0.2313515352555924, "grad_norm": 0.796875, "learning_rate": 0.00018947045639493504, "loss": 1.0562, "step": 9010 }, { "epoch": 0.23137721245151424, "grad_norm": 0.7578125, "learning_rate": 0.00018946846233251465, "loss": 0.8771, "step": 9011 }, { "epoch": 0.23140288964743605, "grad_norm": 0.8203125, "learning_rate": 0.0001894664680917917, "loss": 0.9416, "step": 9012 }, { "epoch": 0.23142856684335786, "grad_norm": 1.1171875, "learning_rate": 0.00018946447367277017, "loss": 0.9902, "step": 9013 }, { "epoch": 0.2314542440392797, "grad_norm": 0.828125, "learning_rate": 0.000189462479075454, "loss": 0.9375, "step": 9014 }, { "epoch": 0.2314799212352015, "grad_norm": 0.8359375, "learning_rate": 0.00018946048429984718, "loss": 1.1646, "step": 9015 }, { "epoch": 0.23150559843112334, "grad_norm": 0.8125, "learning_rate": 0.00018945848934595372, "loss": 1.0698, "step": 9016 }, { "epoch": 0.23153127562704515, "grad_norm": 0.75390625, "learning_rate": 0.00018945649421377755, "loss": 1.0787, "step": 9017 }, { "epoch": 0.23155695282296695, "grad_norm": 0.8359375, "learning_rate": 0.00018945449890332269, "loss": 1.0214, "step": 9018 }, { "epoch": 0.2315826300188888, "grad_norm": 0.78125, "learning_rate": 0.00018945250341459308, "loss": 0.9217, "step": 9019 }, { "epoch": 0.2316083072148106, "grad_norm": 0.7890625, "learning_rate": 0.00018945050774759272, "loss": 1.1315, "step": 9020 }, { "epoch": 0.23163398441073244, "grad_norm": 0.8046875, "learning_rate": 0.0001894485119023256, "loss": 1.0839, "step": 9021 }, { "epoch": 0.23165966160665424, "grad_norm": 0.80859375, "learning_rate": 0.00018944651587879564, "loss": 0.8271, "step": 9022 }, { "epoch": 0.23168533880257605, "grad_norm": 0.80078125, "learning_rate": 0.00018944451967700684, "loss": 1.0247, "step": 9023 }, { "epoch": 0.2317110159984979, "grad_norm": 0.765625, "learning_rate": 0.00018944252329696325, "loss": 0.9297, "step": 9024 }, { "epoch": 0.2317366931944197, "grad_norm": 0.85546875, "learning_rate": 0.00018944052673866874, "loss": 1.098, "step": 9025 }, { "epoch": 0.23176237039034153, "grad_norm": 0.85546875, "learning_rate": 0.00018943853000212733, "loss": 1.1096, "step": 9026 }, { "epoch": 0.23178804758626334, "grad_norm": 0.78125, "learning_rate": 0.00018943653308734306, "loss": 0.9113, "step": 9027 }, { "epoch": 0.23181372478218515, "grad_norm": 0.8984375, "learning_rate": 0.00018943453599431983, "loss": 1.0384, "step": 9028 }, { "epoch": 0.23183940197810698, "grad_norm": 0.80859375, "learning_rate": 0.00018943253872306167, "loss": 1.0789, "step": 9029 }, { "epoch": 0.2318650791740288, "grad_norm": 0.8359375, "learning_rate": 0.0001894305412735725, "loss": 0.9516, "step": 9030 }, { "epoch": 0.23189075636995063, "grad_norm": 0.7109375, "learning_rate": 0.00018942854364585636, "loss": 0.946, "step": 9031 }, { "epoch": 0.23191643356587244, "grad_norm": 0.7734375, "learning_rate": 0.00018942654583991724, "loss": 1.0569, "step": 9032 }, { "epoch": 0.23194211076179425, "grad_norm": 0.78125, "learning_rate": 0.00018942454785575902, "loss": 1.015, "step": 9033 }, { "epoch": 0.23196778795771608, "grad_norm": 0.80859375, "learning_rate": 0.00018942254969338582, "loss": 1.0354, "step": 9034 }, { "epoch": 0.2319934651536379, "grad_norm": 0.85546875, "learning_rate": 0.00018942055135280153, "loss": 0.9575, "step": 9035 }, { "epoch": 0.23201914234955973, "grad_norm": 0.83203125, "learning_rate": 0.0001894185528340102, "loss": 1.0972, "step": 9036 }, { "epoch": 0.23204481954548153, "grad_norm": 0.83984375, "learning_rate": 0.00018941655413701568, "loss": 1.1318, "step": 9037 }, { "epoch": 0.23207049674140334, "grad_norm": 0.890625, "learning_rate": 0.0001894145552618221, "loss": 1.1794, "step": 9038 }, { "epoch": 0.23209617393732518, "grad_norm": 0.83984375, "learning_rate": 0.0001894125562084334, "loss": 0.9542, "step": 9039 }, { "epoch": 0.232121851133247, "grad_norm": 0.78515625, "learning_rate": 0.00018941055697685352, "loss": 1.0381, "step": 9040 }, { "epoch": 0.23214752832916882, "grad_norm": 0.79296875, "learning_rate": 0.0001894085575670865, "loss": 1.0017, "step": 9041 }, { "epoch": 0.23217320552509063, "grad_norm": 0.78125, "learning_rate": 0.0001894065579791363, "loss": 1.0282, "step": 9042 }, { "epoch": 0.23219888272101244, "grad_norm": 0.76171875, "learning_rate": 0.00018940455821300688, "loss": 1.0466, "step": 9043 }, { "epoch": 0.23222455991693428, "grad_norm": 0.83984375, "learning_rate": 0.0001894025582687023, "loss": 1.1177, "step": 9044 }, { "epoch": 0.23225023711285608, "grad_norm": 0.83984375, "learning_rate": 0.00018940055814622647, "loss": 1.1699, "step": 9045 }, { "epoch": 0.23227591430877792, "grad_norm": 0.78125, "learning_rate": 0.00018939855784558338, "loss": 0.8717, "step": 9046 }, { "epoch": 0.23230159150469973, "grad_norm": 0.80859375, "learning_rate": 0.0001893965573667771, "loss": 1.1165, "step": 9047 }, { "epoch": 0.23232726870062154, "grad_norm": 0.765625, "learning_rate": 0.00018939455670981148, "loss": 0.9852, "step": 9048 }, { "epoch": 0.23235294589654337, "grad_norm": 0.77734375, "learning_rate": 0.00018939255587469066, "loss": 0.9693, "step": 9049 }, { "epoch": 0.23237862309246518, "grad_norm": 0.80859375, "learning_rate": 0.0001893905548614185, "loss": 1.2353, "step": 9050 }, { "epoch": 0.23240430028838702, "grad_norm": 0.78515625, "learning_rate": 0.00018938855366999903, "loss": 0.9246, "step": 9051 }, { "epoch": 0.23242997748430883, "grad_norm": 0.93359375, "learning_rate": 0.00018938655230043628, "loss": 1.179, "step": 9052 }, { "epoch": 0.23245565468023063, "grad_norm": 0.82421875, "learning_rate": 0.00018938455075273418, "loss": 1.0962, "step": 9053 }, { "epoch": 0.23248133187615247, "grad_norm": 0.8359375, "learning_rate": 0.0001893825490268968, "loss": 0.9298, "step": 9054 }, { "epoch": 0.23250700907207428, "grad_norm": 0.73046875, "learning_rate": 0.000189380547122928, "loss": 0.8799, "step": 9055 }, { "epoch": 0.2325326862679961, "grad_norm": 0.83203125, "learning_rate": 0.00018937854504083186, "loss": 1.0053, "step": 9056 }, { "epoch": 0.23255836346391792, "grad_norm": 0.78515625, "learning_rate": 0.00018937654278061236, "loss": 1.0048, "step": 9057 }, { "epoch": 0.23258404065983973, "grad_norm": 0.8125, "learning_rate": 0.00018937454034227352, "loss": 0.9679, "step": 9058 }, { "epoch": 0.23260971785576157, "grad_norm": 0.8125, "learning_rate": 0.00018937253772581926, "loss": 0.9433, "step": 9059 }, { "epoch": 0.23263539505168337, "grad_norm": 1.171875, "learning_rate": 0.0001893705349312536, "loss": 1.0325, "step": 9060 }, { "epoch": 0.2326610722476052, "grad_norm": 0.765625, "learning_rate": 0.00018936853195858055, "loss": 0.9886, "step": 9061 }, { "epoch": 0.23268674944352702, "grad_norm": 0.9453125, "learning_rate": 0.0001893665288078041, "loss": 1.0582, "step": 9062 }, { "epoch": 0.23271242663944883, "grad_norm": 0.81640625, "learning_rate": 0.0001893645254789282, "loss": 1.0669, "step": 9063 }, { "epoch": 0.23273810383537066, "grad_norm": 0.83984375, "learning_rate": 0.0001893625219719569, "loss": 1.2453, "step": 9064 }, { "epoch": 0.23276378103129247, "grad_norm": 0.74609375, "learning_rate": 0.00018936051828689413, "loss": 0.8969, "step": 9065 }, { "epoch": 0.2327894582272143, "grad_norm": 0.82421875, "learning_rate": 0.00018935851442374398, "loss": 1.0689, "step": 9066 }, { "epoch": 0.23281513542313612, "grad_norm": 0.87890625, "learning_rate": 0.00018935651038251035, "loss": 1.0654, "step": 9067 }, { "epoch": 0.23284081261905792, "grad_norm": 0.82421875, "learning_rate": 0.00018935450616319724, "loss": 0.9979, "step": 9068 }, { "epoch": 0.23286648981497976, "grad_norm": 0.83984375, "learning_rate": 0.0001893525017658087, "loss": 1.1512, "step": 9069 }, { "epoch": 0.23289216701090157, "grad_norm": 0.81640625, "learning_rate": 0.0001893504971903487, "loss": 1.0596, "step": 9070 }, { "epoch": 0.2329178442068234, "grad_norm": 0.8359375, "learning_rate": 0.00018934849243682124, "loss": 0.983, "step": 9071 }, { "epoch": 0.2329435214027452, "grad_norm": 0.84765625, "learning_rate": 0.00018934648750523026, "loss": 0.927, "step": 9072 }, { "epoch": 0.23296919859866702, "grad_norm": 0.80859375, "learning_rate": 0.00018934448239557982, "loss": 1.0241, "step": 9073 }, { "epoch": 0.23299487579458886, "grad_norm": 0.828125, "learning_rate": 0.0001893424771078739, "loss": 1.0281, "step": 9074 }, { "epoch": 0.23302055299051067, "grad_norm": 0.77734375, "learning_rate": 0.0001893404716421165, "loss": 1.0068, "step": 9075 }, { "epoch": 0.2330462301864325, "grad_norm": 0.71875, "learning_rate": 0.0001893384659983116, "loss": 1.0406, "step": 9076 }, { "epoch": 0.2330719073823543, "grad_norm": 0.78515625, "learning_rate": 0.00018933646017646322, "loss": 1.1493, "step": 9077 }, { "epoch": 0.23309758457827612, "grad_norm": 0.77734375, "learning_rate": 0.00018933445417657535, "loss": 1.0388, "step": 9078 }, { "epoch": 0.23312326177419795, "grad_norm": 0.77734375, "learning_rate": 0.00018933244799865194, "loss": 1.1088, "step": 9079 }, { "epoch": 0.23314893897011976, "grad_norm": 0.76953125, "learning_rate": 0.00018933044164269708, "loss": 0.8641, "step": 9080 }, { "epoch": 0.2331746161660416, "grad_norm": 0.86328125, "learning_rate": 0.00018932843510871468, "loss": 0.9911, "step": 9081 }, { "epoch": 0.2332002933619634, "grad_norm": 0.9765625, "learning_rate": 0.0001893264283967088, "loss": 1.061, "step": 9082 }, { "epoch": 0.23322597055788521, "grad_norm": 0.8203125, "learning_rate": 0.00018932442150668344, "loss": 1.009, "step": 9083 }, { "epoch": 0.23325164775380705, "grad_norm": 0.82421875, "learning_rate": 0.00018932241443864257, "loss": 1.0573, "step": 9084 }, { "epoch": 0.23327732494972886, "grad_norm": 0.83984375, "learning_rate": 0.00018932040719259015, "loss": 0.979, "step": 9085 }, { "epoch": 0.2333030021456507, "grad_norm": 0.765625, "learning_rate": 0.00018931839976853024, "loss": 1.0322, "step": 9086 }, { "epoch": 0.2333286793415725, "grad_norm": 0.7734375, "learning_rate": 0.00018931639216646686, "loss": 0.8317, "step": 9087 }, { "epoch": 0.2333543565374943, "grad_norm": 0.80078125, "learning_rate": 0.00018931438438640394, "loss": 0.9786, "step": 9088 }, { "epoch": 0.23338003373341615, "grad_norm": 0.8203125, "learning_rate": 0.00018931237642834555, "loss": 1.0388, "step": 9089 }, { "epoch": 0.23340571092933796, "grad_norm": 0.78515625, "learning_rate": 0.00018931036829229566, "loss": 0.9991, "step": 9090 }, { "epoch": 0.2334313881252598, "grad_norm": 0.83984375, "learning_rate": 0.00018930835997825826, "loss": 1.0794, "step": 9091 }, { "epoch": 0.2334570653211816, "grad_norm": 0.81640625, "learning_rate": 0.0001893063514862374, "loss": 1.0507, "step": 9092 }, { "epoch": 0.2334827425171034, "grad_norm": 0.90234375, "learning_rate": 0.000189304342816237, "loss": 1.0355, "step": 9093 }, { "epoch": 0.23350841971302524, "grad_norm": 0.80078125, "learning_rate": 0.00018930233396826114, "loss": 1.0072, "step": 9094 }, { "epoch": 0.23353409690894705, "grad_norm": 0.91796875, "learning_rate": 0.00018930032494231375, "loss": 1.0465, "step": 9095 }, { "epoch": 0.2335597741048689, "grad_norm": 0.7578125, "learning_rate": 0.00018929831573839893, "loss": 1.0014, "step": 9096 }, { "epoch": 0.2335854513007907, "grad_norm": 0.8984375, "learning_rate": 0.0001892963063565206, "loss": 1.1314, "step": 9097 }, { "epoch": 0.2336111284967125, "grad_norm": 0.75390625, "learning_rate": 0.00018929429679668283, "loss": 0.9466, "step": 9098 }, { "epoch": 0.23363680569263434, "grad_norm": 0.79296875, "learning_rate": 0.00018929228705888957, "loss": 1.0137, "step": 9099 }, { "epoch": 0.23366248288855615, "grad_norm": 0.8203125, "learning_rate": 0.00018929027714314486, "loss": 1.1218, "step": 9100 }, { "epoch": 0.23368816008447799, "grad_norm": 0.7265625, "learning_rate": 0.0001892882670494527, "loss": 1.0059, "step": 9101 }, { "epoch": 0.2337138372803998, "grad_norm": 0.8125, "learning_rate": 0.00018928625677781707, "loss": 1.1151, "step": 9102 }, { "epoch": 0.2337395144763216, "grad_norm": 0.79296875, "learning_rate": 0.000189284246328242, "loss": 1.0842, "step": 9103 }, { "epoch": 0.23376519167224344, "grad_norm": 0.796875, "learning_rate": 0.00018928223570073148, "loss": 0.9083, "step": 9104 }, { "epoch": 0.23379086886816525, "grad_norm": 0.8046875, "learning_rate": 0.00018928022489528957, "loss": 1.0245, "step": 9105 }, { "epoch": 0.23381654606408708, "grad_norm": 0.90625, "learning_rate": 0.00018927821391192019, "loss": 1.0765, "step": 9106 }, { "epoch": 0.2338422232600089, "grad_norm": 0.8515625, "learning_rate": 0.00018927620275062743, "loss": 1.0262, "step": 9107 }, { "epoch": 0.2338679004559307, "grad_norm": 0.796875, "learning_rate": 0.00018927419141141525, "loss": 1.0021, "step": 9108 }, { "epoch": 0.23389357765185254, "grad_norm": 0.8046875, "learning_rate": 0.00018927217989428766, "loss": 1.0392, "step": 9109 }, { "epoch": 0.23391925484777434, "grad_norm": 0.86328125, "learning_rate": 0.00018927016819924867, "loss": 1.2482, "step": 9110 }, { "epoch": 0.23394493204369618, "grad_norm": 0.74609375, "learning_rate": 0.00018926815632630233, "loss": 0.9335, "step": 9111 }, { "epoch": 0.233970609239618, "grad_norm": 0.8515625, "learning_rate": 0.0001892661442754526, "loss": 1.0035, "step": 9112 }, { "epoch": 0.2339962864355398, "grad_norm": 0.82421875, "learning_rate": 0.0001892641320467035, "loss": 1.0231, "step": 9113 }, { "epoch": 0.23402196363146163, "grad_norm": 0.80859375, "learning_rate": 0.00018926211964005905, "loss": 0.9158, "step": 9114 }, { "epoch": 0.23404764082738344, "grad_norm": 0.76953125, "learning_rate": 0.0001892601070555233, "loss": 0.9684, "step": 9115 }, { "epoch": 0.23407331802330528, "grad_norm": 0.7890625, "learning_rate": 0.00018925809429310017, "loss": 1.0638, "step": 9116 }, { "epoch": 0.23409899521922709, "grad_norm": 0.80859375, "learning_rate": 0.00018925608135279376, "loss": 1.1478, "step": 9117 }, { "epoch": 0.2341246724151489, "grad_norm": 0.76953125, "learning_rate": 0.00018925406823460801, "loss": 0.951, "step": 9118 }, { "epoch": 0.23415034961107073, "grad_norm": 0.75390625, "learning_rate": 0.00018925205493854698, "loss": 1.1159, "step": 9119 }, { "epoch": 0.23417602680699254, "grad_norm": 0.83203125, "learning_rate": 0.00018925004146461464, "loss": 1.1091, "step": 9120 }, { "epoch": 0.23420170400291437, "grad_norm": 0.84375, "learning_rate": 0.00018924802781281508, "loss": 1.0106, "step": 9121 }, { "epoch": 0.23422738119883618, "grad_norm": 0.85546875, "learning_rate": 0.00018924601398315224, "loss": 1.0234, "step": 9122 }, { "epoch": 0.234253058394758, "grad_norm": 0.828125, "learning_rate": 0.00018924399997563015, "loss": 1.1076, "step": 9123 }, { "epoch": 0.23427873559067983, "grad_norm": 0.8125, "learning_rate": 0.0001892419857902528, "loss": 0.946, "step": 9124 }, { "epoch": 0.23430441278660163, "grad_norm": 0.73828125, "learning_rate": 0.00018923997142702425, "loss": 0.9599, "step": 9125 }, { "epoch": 0.23433008998252347, "grad_norm": 0.82421875, "learning_rate": 0.00018923795688594852, "loss": 1.0196, "step": 9126 }, { "epoch": 0.23435576717844528, "grad_norm": 0.75, "learning_rate": 0.0001892359421670296, "loss": 0.9724, "step": 9127 }, { "epoch": 0.2343814443743671, "grad_norm": 0.7421875, "learning_rate": 0.0001892339272702715, "loss": 0.9547, "step": 9128 }, { "epoch": 0.23440712157028892, "grad_norm": 0.78515625, "learning_rate": 0.00018923191219567825, "loss": 1.2649, "step": 9129 }, { "epoch": 0.23443279876621073, "grad_norm": 0.7734375, "learning_rate": 0.00018922989694325384, "loss": 1.0187, "step": 9130 }, { "epoch": 0.23445847596213257, "grad_norm": 0.921875, "learning_rate": 0.00018922788151300233, "loss": 1.0711, "step": 9131 }, { "epoch": 0.23448415315805438, "grad_norm": 0.80078125, "learning_rate": 0.00018922586590492768, "loss": 1.0461, "step": 9132 }, { "epoch": 0.23450983035397618, "grad_norm": 0.76953125, "learning_rate": 0.00018922385011903395, "loss": 1.0007, "step": 9133 }, { "epoch": 0.23453550754989802, "grad_norm": 0.8203125, "learning_rate": 0.00018922183415532519, "loss": 1.1344, "step": 9134 }, { "epoch": 0.23456118474581983, "grad_norm": 0.796875, "learning_rate": 0.0001892198180138053, "loss": 1.0929, "step": 9135 }, { "epoch": 0.23458686194174166, "grad_norm": 0.75390625, "learning_rate": 0.00018921780169447842, "loss": 0.874, "step": 9136 }, { "epoch": 0.23461253913766347, "grad_norm": 0.79296875, "learning_rate": 0.00018921578519734853, "loss": 1.0386, "step": 9137 }, { "epoch": 0.23463821633358528, "grad_norm": 0.8046875, "learning_rate": 0.00018921376852241962, "loss": 1.0118, "step": 9138 }, { "epoch": 0.23466389352950712, "grad_norm": 0.859375, "learning_rate": 0.00018921175166969573, "loss": 0.9452, "step": 9139 }, { "epoch": 0.23468957072542893, "grad_norm": 0.765625, "learning_rate": 0.00018920973463918087, "loss": 0.9683, "step": 9140 }, { "epoch": 0.23471524792135076, "grad_norm": 0.8046875, "learning_rate": 0.00018920771743087907, "loss": 0.9716, "step": 9141 }, { "epoch": 0.23474092511727257, "grad_norm": 0.796875, "learning_rate": 0.00018920570004479434, "loss": 1.0921, "step": 9142 }, { "epoch": 0.23476660231319438, "grad_norm": 0.8046875, "learning_rate": 0.00018920368248093072, "loss": 0.9545, "step": 9143 }, { "epoch": 0.23479227950911621, "grad_norm": 0.78515625, "learning_rate": 0.0001892016647392922, "loss": 0.9576, "step": 9144 }, { "epoch": 0.23481795670503802, "grad_norm": 0.77734375, "learning_rate": 0.00018919964681988284, "loss": 1.0393, "step": 9145 }, { "epoch": 0.23484363390095986, "grad_norm": 0.8828125, "learning_rate": 0.00018919762872270665, "loss": 1.0601, "step": 9146 }, { "epoch": 0.23486931109688167, "grad_norm": 0.81640625, "learning_rate": 0.00018919561044776763, "loss": 0.9797, "step": 9147 }, { "epoch": 0.23489498829280347, "grad_norm": 0.80859375, "learning_rate": 0.00018919359199506982, "loss": 0.974, "step": 9148 }, { "epoch": 0.2349206654887253, "grad_norm": 0.6796875, "learning_rate": 0.00018919157336461724, "loss": 0.9121, "step": 9149 }, { "epoch": 0.23494634268464712, "grad_norm": 0.83984375, "learning_rate": 0.0001891895545564139, "loss": 1.1279, "step": 9150 }, { "epoch": 0.23497201988056896, "grad_norm": 0.8203125, "learning_rate": 0.00018918753557046385, "loss": 1.0638, "step": 9151 }, { "epoch": 0.23499769707649076, "grad_norm": 0.7734375, "learning_rate": 0.00018918551640677108, "loss": 1.0835, "step": 9152 }, { "epoch": 0.23502337427241257, "grad_norm": 0.7578125, "learning_rate": 0.00018918349706533962, "loss": 0.9638, "step": 9153 }, { "epoch": 0.2350490514683344, "grad_norm": 0.78515625, "learning_rate": 0.00018918147754617355, "loss": 1.0209, "step": 9154 }, { "epoch": 0.23507472866425622, "grad_norm": 0.8125, "learning_rate": 0.0001891794578492768, "loss": 0.9687, "step": 9155 }, { "epoch": 0.23510040586017805, "grad_norm": 0.8046875, "learning_rate": 0.0001891774379746535, "loss": 0.9238, "step": 9156 }, { "epoch": 0.23512608305609986, "grad_norm": 0.80859375, "learning_rate": 0.00018917541792230758, "loss": 0.9434, "step": 9157 }, { "epoch": 0.23515176025202167, "grad_norm": 0.80078125, "learning_rate": 0.00018917339769224312, "loss": 1.0097, "step": 9158 }, { "epoch": 0.2351774374479435, "grad_norm": 0.8046875, "learning_rate": 0.00018917137728446414, "loss": 0.9705, "step": 9159 }, { "epoch": 0.2352031146438653, "grad_norm": 0.75390625, "learning_rate": 0.00018916935669897466, "loss": 0.8756, "step": 9160 }, { "epoch": 0.23522879183978715, "grad_norm": 0.8046875, "learning_rate": 0.0001891673359357787, "loss": 1.0096, "step": 9161 }, { "epoch": 0.23525446903570896, "grad_norm": 0.77734375, "learning_rate": 0.00018916531499488029, "loss": 0.899, "step": 9162 }, { "epoch": 0.23528014623163077, "grad_norm": 0.78125, "learning_rate": 0.00018916329387628347, "loss": 0.9305, "step": 9163 }, { "epoch": 0.2353058234275526, "grad_norm": 0.8828125, "learning_rate": 0.00018916127257999224, "loss": 1.0492, "step": 9164 }, { "epoch": 0.2353315006234744, "grad_norm": 0.81640625, "learning_rate": 0.00018915925110601066, "loss": 1.0459, "step": 9165 }, { "epoch": 0.23535717781939625, "grad_norm": 0.87109375, "learning_rate": 0.00018915722945434275, "loss": 1.0032, "step": 9166 }, { "epoch": 0.23538285501531805, "grad_norm": 0.85546875, "learning_rate": 0.00018915520762499254, "loss": 1.0531, "step": 9167 }, { "epoch": 0.23540853221123986, "grad_norm": 0.97265625, "learning_rate": 0.00018915318561796404, "loss": 1.0287, "step": 9168 }, { "epoch": 0.2354342094071617, "grad_norm": 0.6953125, "learning_rate": 0.00018915116343326131, "loss": 1.0742, "step": 9169 }, { "epoch": 0.2354598866030835, "grad_norm": 0.82421875, "learning_rate": 0.00018914914107088837, "loss": 1.0659, "step": 9170 }, { "epoch": 0.23548556379900534, "grad_norm": 0.84375, "learning_rate": 0.00018914711853084922, "loss": 1.1855, "step": 9171 }, { "epoch": 0.23551124099492715, "grad_norm": 0.78515625, "learning_rate": 0.00018914509581314794, "loss": 1.0763, "step": 9172 }, { "epoch": 0.23553691819084896, "grad_norm": 0.8125, "learning_rate": 0.0001891430729177885, "loss": 0.9278, "step": 9173 }, { "epoch": 0.2355625953867708, "grad_norm": 0.8046875, "learning_rate": 0.00018914104984477502, "loss": 1.069, "step": 9174 }, { "epoch": 0.2355882725826926, "grad_norm": 0.8046875, "learning_rate": 0.00018913902659411144, "loss": 0.9993, "step": 9175 }, { "epoch": 0.23561394977861444, "grad_norm": 0.80078125, "learning_rate": 0.00018913700316580182, "loss": 1.1086, "step": 9176 }, { "epoch": 0.23563962697453625, "grad_norm": 0.8203125, "learning_rate": 0.00018913497955985026, "loss": 0.9348, "step": 9177 }, { "epoch": 0.23566530417045806, "grad_norm": 0.734375, "learning_rate": 0.00018913295577626069, "loss": 0.9989, "step": 9178 }, { "epoch": 0.2356909813663799, "grad_norm": 0.75390625, "learning_rate": 0.0001891309318150372, "loss": 1.0335, "step": 9179 }, { "epoch": 0.2357166585623017, "grad_norm": 0.8203125, "learning_rate": 0.00018912890767618383, "loss": 0.9858, "step": 9180 }, { "epoch": 0.23574233575822354, "grad_norm": 0.8203125, "learning_rate": 0.00018912688335970458, "loss": 1.0295, "step": 9181 }, { "epoch": 0.23576801295414535, "grad_norm": 0.81640625, "learning_rate": 0.00018912485886560352, "loss": 1.0805, "step": 9182 }, { "epoch": 0.23579369015006715, "grad_norm": 0.8671875, "learning_rate": 0.00018912283419388466, "loss": 0.9137, "step": 9183 }, { "epoch": 0.235819367345989, "grad_norm": 0.82421875, "learning_rate": 0.00018912080934455202, "loss": 1.0166, "step": 9184 }, { "epoch": 0.2358450445419108, "grad_norm": 0.87109375, "learning_rate": 0.0001891187843176097, "loss": 1.1795, "step": 9185 }, { "epoch": 0.23587072173783263, "grad_norm": 0.86328125, "learning_rate": 0.00018911675911306166, "loss": 1.1268, "step": 9186 }, { "epoch": 0.23589639893375444, "grad_norm": 0.7578125, "learning_rate": 0.00018911473373091198, "loss": 0.9681, "step": 9187 }, { "epoch": 0.23592207612967625, "grad_norm": 0.76953125, "learning_rate": 0.00018911270817116468, "loss": 1.0556, "step": 9188 }, { "epoch": 0.2359477533255981, "grad_norm": 0.76953125, "learning_rate": 0.00018911068243382382, "loss": 0.9688, "step": 9189 }, { "epoch": 0.2359734305215199, "grad_norm": 0.77734375, "learning_rate": 0.0001891086565188934, "loss": 1.056, "step": 9190 }, { "epoch": 0.2359991077174417, "grad_norm": 0.8125, "learning_rate": 0.00018910663042637747, "loss": 0.8901, "step": 9191 }, { "epoch": 0.23602478491336354, "grad_norm": 0.83203125, "learning_rate": 0.0001891046041562801, "loss": 0.9718, "step": 9192 }, { "epoch": 0.23605046210928535, "grad_norm": 0.97265625, "learning_rate": 0.00018910257770860528, "loss": 1.2387, "step": 9193 }, { "epoch": 0.23607613930520718, "grad_norm": 0.81640625, "learning_rate": 0.0001891005510833571, "loss": 1.1164, "step": 9194 }, { "epoch": 0.236101816501129, "grad_norm": 0.87109375, "learning_rate": 0.00018909852428053954, "loss": 1.0376, "step": 9195 }, { "epoch": 0.2361274936970508, "grad_norm": 0.8125, "learning_rate": 0.00018909649730015668, "loss": 0.9022, "step": 9196 }, { "epoch": 0.23615317089297264, "grad_norm": 0.74609375, "learning_rate": 0.00018909447014221254, "loss": 0.8181, "step": 9197 }, { "epoch": 0.23617884808889444, "grad_norm": 0.76171875, "learning_rate": 0.00018909244280671116, "loss": 1.126, "step": 9198 }, { "epoch": 0.23620452528481628, "grad_norm": 0.765625, "learning_rate": 0.0001890904152936566, "loss": 1.0953, "step": 9199 }, { "epoch": 0.2362302024807381, "grad_norm": 0.796875, "learning_rate": 0.00018908838760305291, "loss": 1.0254, "step": 9200 }, { "epoch": 0.2362558796766599, "grad_norm": 0.8359375, "learning_rate": 0.0001890863597349041, "loss": 1.0669, "step": 9201 }, { "epoch": 0.23628155687258173, "grad_norm": 0.890625, "learning_rate": 0.00018908433168921422, "loss": 1.0437, "step": 9202 }, { "epoch": 0.23630723406850354, "grad_norm": 0.79296875, "learning_rate": 0.00018908230346598731, "loss": 0.9955, "step": 9203 }, { "epoch": 0.23633291126442538, "grad_norm": 0.83984375, "learning_rate": 0.00018908027506522743, "loss": 0.9885, "step": 9204 }, { "epoch": 0.23635858846034719, "grad_norm": 0.90234375, "learning_rate": 0.0001890782464869386, "loss": 1.0459, "step": 9205 }, { "epoch": 0.236384265656269, "grad_norm": 0.80859375, "learning_rate": 0.00018907621773112484, "loss": 0.8491, "step": 9206 }, { "epoch": 0.23640994285219083, "grad_norm": 0.7578125, "learning_rate": 0.00018907418879779027, "loss": 1.108, "step": 9207 }, { "epoch": 0.23643562004811264, "grad_norm": 0.71875, "learning_rate": 0.00018907215968693887, "loss": 0.9688, "step": 9208 }, { "epoch": 0.23646129724403447, "grad_norm": 0.8046875, "learning_rate": 0.0001890701303985747, "loss": 1.1343, "step": 9209 }, { "epoch": 0.23648697443995628, "grad_norm": 0.85546875, "learning_rate": 0.0001890681009327018, "loss": 1.1018, "step": 9210 }, { "epoch": 0.2365126516358781, "grad_norm": 0.87890625, "learning_rate": 0.00018906607128932424, "loss": 1.0339, "step": 9211 }, { "epoch": 0.23653832883179993, "grad_norm": 0.79296875, "learning_rate": 0.00018906404146844605, "loss": 0.9986, "step": 9212 }, { "epoch": 0.23656400602772173, "grad_norm": 0.8828125, "learning_rate": 0.00018906201147007124, "loss": 0.9528, "step": 9213 }, { "epoch": 0.23658968322364357, "grad_norm": 0.74609375, "learning_rate": 0.0001890599812942039, "loss": 0.9925, "step": 9214 }, { "epoch": 0.23661536041956538, "grad_norm": 0.76953125, "learning_rate": 0.00018905795094084805, "loss": 1.0622, "step": 9215 }, { "epoch": 0.2366410376154872, "grad_norm": 0.79296875, "learning_rate": 0.0001890559204100078, "loss": 1.0652, "step": 9216 }, { "epoch": 0.23666671481140902, "grad_norm": 0.94140625, "learning_rate": 0.0001890538897016871, "loss": 0.9818, "step": 9217 }, { "epoch": 0.23669239200733083, "grad_norm": 0.83984375, "learning_rate": 0.00018905185881589004, "loss": 0.958, "step": 9218 }, { "epoch": 0.23671806920325267, "grad_norm": 0.8359375, "learning_rate": 0.00018904982775262065, "loss": 0.9239, "step": 9219 }, { "epoch": 0.23674374639917448, "grad_norm": 0.8828125, "learning_rate": 0.00018904779651188306, "loss": 0.9679, "step": 9220 }, { "epoch": 0.23676942359509628, "grad_norm": 0.7890625, "learning_rate": 0.00018904576509368122, "loss": 1.0496, "step": 9221 }, { "epoch": 0.23679510079101812, "grad_norm": 0.796875, "learning_rate": 0.0001890437334980192, "loss": 0.9188, "step": 9222 }, { "epoch": 0.23682077798693993, "grad_norm": 0.75390625, "learning_rate": 0.00018904170172490107, "loss": 0.9507, "step": 9223 }, { "epoch": 0.23684645518286176, "grad_norm": 0.78515625, "learning_rate": 0.00018903966977433086, "loss": 1.2657, "step": 9224 }, { "epoch": 0.23687213237878357, "grad_norm": 0.83984375, "learning_rate": 0.00018903763764631265, "loss": 1.1291, "step": 9225 }, { "epoch": 0.23689780957470538, "grad_norm": 0.7578125, "learning_rate": 0.0001890356053408505, "loss": 1.0356, "step": 9226 }, { "epoch": 0.23692348677062722, "grad_norm": 0.79296875, "learning_rate": 0.00018903357285794838, "loss": 0.9167, "step": 9227 }, { "epoch": 0.23694916396654903, "grad_norm": 0.80859375, "learning_rate": 0.00018903154019761043, "loss": 1.0036, "step": 9228 }, { "epoch": 0.23697484116247086, "grad_norm": 0.76953125, "learning_rate": 0.00018902950735984062, "loss": 1.2285, "step": 9229 }, { "epoch": 0.23700051835839267, "grad_norm": 0.78125, "learning_rate": 0.00018902747434464308, "loss": 1.0574, "step": 9230 }, { "epoch": 0.23702619555431448, "grad_norm": 0.90234375, "learning_rate": 0.00018902544115202181, "loss": 1.1563, "step": 9231 }, { "epoch": 0.23705187275023631, "grad_norm": 0.83203125, "learning_rate": 0.0001890234077819809, "loss": 0.9763, "step": 9232 }, { "epoch": 0.23707754994615812, "grad_norm": 0.8203125, "learning_rate": 0.00018902137423452433, "loss": 1.0048, "step": 9233 }, { "epoch": 0.23710322714207996, "grad_norm": 0.8828125, "learning_rate": 0.00018901934050965624, "loss": 1.0788, "step": 9234 }, { "epoch": 0.23712890433800177, "grad_norm": 0.81640625, "learning_rate": 0.00018901730660738063, "loss": 0.9987, "step": 9235 }, { "epoch": 0.23715458153392358, "grad_norm": 0.80078125, "learning_rate": 0.00018901527252770158, "loss": 1.0096, "step": 9236 }, { "epoch": 0.2371802587298454, "grad_norm": 0.75, "learning_rate": 0.00018901323827062315, "loss": 1.0109, "step": 9237 }, { "epoch": 0.23720593592576722, "grad_norm": 0.8828125, "learning_rate": 0.00018901120383614935, "loss": 0.9849, "step": 9238 }, { "epoch": 0.23723161312168906, "grad_norm": 0.8515625, "learning_rate": 0.00018900916922428426, "loss": 1.1135, "step": 9239 }, { "epoch": 0.23725729031761086, "grad_norm": 0.8359375, "learning_rate": 0.00018900713443503194, "loss": 1.0277, "step": 9240 }, { "epoch": 0.23728296751353267, "grad_norm": 0.765625, "learning_rate": 0.00018900509946839648, "loss": 1.0167, "step": 9241 }, { "epoch": 0.2373086447094545, "grad_norm": 0.87109375, "learning_rate": 0.00018900306432438185, "loss": 0.8845, "step": 9242 }, { "epoch": 0.23733432190537632, "grad_norm": 0.7734375, "learning_rate": 0.00018900102900299215, "loss": 0.9847, "step": 9243 }, { "epoch": 0.23735999910129815, "grad_norm": 0.78515625, "learning_rate": 0.00018899899350423147, "loss": 0.9815, "step": 9244 }, { "epoch": 0.23738567629721996, "grad_norm": 0.7578125, "learning_rate": 0.0001889969578281038, "loss": 0.9686, "step": 9245 }, { "epoch": 0.23741135349314177, "grad_norm": 0.8125, "learning_rate": 0.00018899492197461327, "loss": 1.1122, "step": 9246 }, { "epoch": 0.2374370306890636, "grad_norm": 0.8359375, "learning_rate": 0.00018899288594376385, "loss": 1.0646, "step": 9247 }, { "epoch": 0.2374627078849854, "grad_norm": 0.85546875, "learning_rate": 0.00018899084973555969, "loss": 1.1195, "step": 9248 }, { "epoch": 0.23748838508090725, "grad_norm": 0.8125, "learning_rate": 0.00018898881335000477, "loss": 1.0596, "step": 9249 }, { "epoch": 0.23751406227682906, "grad_norm": 0.83203125, "learning_rate": 0.0001889867767871032, "loss": 1.2657, "step": 9250 }, { "epoch": 0.23753973947275087, "grad_norm": 0.8828125, "learning_rate": 0.00018898474004685903, "loss": 1.124, "step": 9251 }, { "epoch": 0.2375654166686727, "grad_norm": 0.77734375, "learning_rate": 0.0001889827031292763, "loss": 1.0377, "step": 9252 }, { "epoch": 0.2375910938645945, "grad_norm": 0.78125, "learning_rate": 0.00018898066603435912, "loss": 1.1365, "step": 9253 }, { "epoch": 0.23761677106051635, "grad_norm": 0.78515625, "learning_rate": 0.00018897862876211146, "loss": 1.0661, "step": 9254 }, { "epoch": 0.23764244825643815, "grad_norm": 0.84765625, "learning_rate": 0.00018897659131253747, "loss": 1.1316, "step": 9255 }, { "epoch": 0.23766812545235996, "grad_norm": 0.7890625, "learning_rate": 0.00018897455368564116, "loss": 0.9907, "step": 9256 }, { "epoch": 0.2376938026482818, "grad_norm": 0.765625, "learning_rate": 0.00018897251588142656, "loss": 1.0617, "step": 9257 }, { "epoch": 0.2377194798442036, "grad_norm": 0.79296875, "learning_rate": 0.00018897047789989783, "loss": 1.0994, "step": 9258 }, { "epoch": 0.23774515704012544, "grad_norm": 0.8125, "learning_rate": 0.00018896843974105894, "loss": 1.0055, "step": 9259 }, { "epoch": 0.23777083423604725, "grad_norm": 0.74609375, "learning_rate": 0.00018896640140491397, "loss": 0.9713, "step": 9260 }, { "epoch": 0.23779651143196906, "grad_norm": 0.76171875, "learning_rate": 0.00018896436289146703, "loss": 0.9954, "step": 9261 }, { "epoch": 0.2378221886278909, "grad_norm": 0.8359375, "learning_rate": 0.00018896232420072213, "loss": 0.9931, "step": 9262 }, { "epoch": 0.2378478658238127, "grad_norm": 0.81640625, "learning_rate": 0.00018896028533268338, "loss": 1.093, "step": 9263 }, { "epoch": 0.23787354301973454, "grad_norm": 0.84765625, "learning_rate": 0.0001889582462873548, "loss": 0.9002, "step": 9264 }, { "epoch": 0.23789922021565635, "grad_norm": 0.8203125, "learning_rate": 0.00018895620706474046, "loss": 0.9818, "step": 9265 }, { "epoch": 0.23792489741157816, "grad_norm": 1.125, "learning_rate": 0.00018895416766484446, "loss": 1.0889, "step": 9266 }, { "epoch": 0.2379505746075, "grad_norm": 0.85546875, "learning_rate": 0.00018895212808767083, "loss": 1.1292, "step": 9267 }, { "epoch": 0.2379762518034218, "grad_norm": 0.796875, "learning_rate": 0.00018895008833322364, "loss": 0.8482, "step": 9268 }, { "epoch": 0.23800192899934364, "grad_norm": 0.82421875, "learning_rate": 0.00018894804840150699, "loss": 0.9185, "step": 9269 }, { "epoch": 0.23802760619526545, "grad_norm": 0.74609375, "learning_rate": 0.00018894600829252487, "loss": 0.8279, "step": 9270 }, { "epoch": 0.23805328339118725, "grad_norm": 0.80859375, "learning_rate": 0.0001889439680062814, "loss": 0.9387, "step": 9271 }, { "epoch": 0.2380789605871091, "grad_norm": 0.8046875, "learning_rate": 0.00018894192754278065, "loss": 1.1608, "step": 9272 }, { "epoch": 0.2381046377830309, "grad_norm": 0.8359375, "learning_rate": 0.00018893988690202665, "loss": 1.1047, "step": 9273 }, { "epoch": 0.23813031497895273, "grad_norm": 0.7578125, "learning_rate": 0.0001889378460840235, "loss": 0.9995, "step": 9274 }, { "epoch": 0.23815599217487454, "grad_norm": 0.76171875, "learning_rate": 0.00018893580508877526, "loss": 1.0332, "step": 9275 }, { "epoch": 0.23818166937079635, "grad_norm": 0.703125, "learning_rate": 0.00018893376391628598, "loss": 0.963, "step": 9276 }, { "epoch": 0.2382073465667182, "grad_norm": 0.8203125, "learning_rate": 0.00018893172256655976, "loss": 0.9475, "step": 9277 }, { "epoch": 0.23823302376264, "grad_norm": 0.78515625, "learning_rate": 0.00018892968103960067, "loss": 0.9831, "step": 9278 }, { "epoch": 0.23825870095856183, "grad_norm": 0.73828125, "learning_rate": 0.00018892763933541274, "loss": 0.8885, "step": 9279 }, { "epoch": 0.23828437815448364, "grad_norm": 0.78515625, "learning_rate": 0.00018892559745400004, "loss": 0.9296, "step": 9280 }, { "epoch": 0.23831005535040545, "grad_norm": 0.796875, "learning_rate": 0.00018892355539536665, "loss": 1.0263, "step": 9281 }, { "epoch": 0.23833573254632728, "grad_norm": 0.76953125, "learning_rate": 0.00018892151315951666, "loss": 0.9934, "step": 9282 }, { "epoch": 0.2383614097422491, "grad_norm": 0.8046875, "learning_rate": 0.00018891947074645414, "loss": 1.1325, "step": 9283 }, { "epoch": 0.23838708693817093, "grad_norm": 0.84375, "learning_rate": 0.00018891742815618315, "loss": 1.1128, "step": 9284 }, { "epoch": 0.23841276413409274, "grad_norm": 0.890625, "learning_rate": 0.00018891538538870774, "loss": 0.9993, "step": 9285 }, { "epoch": 0.23843844133001454, "grad_norm": 0.81640625, "learning_rate": 0.000188913342444032, "loss": 1.0806, "step": 9286 }, { "epoch": 0.23846411852593638, "grad_norm": 0.73828125, "learning_rate": 0.00018891129932216002, "loss": 1.0098, "step": 9287 }, { "epoch": 0.2384897957218582, "grad_norm": 0.8203125, "learning_rate": 0.00018890925602309583, "loss": 0.9958, "step": 9288 }, { "epoch": 0.23851547291778002, "grad_norm": 0.79296875, "learning_rate": 0.00018890721254684353, "loss": 0.9729, "step": 9289 }, { "epoch": 0.23854115011370183, "grad_norm": 0.74609375, "learning_rate": 0.0001889051688934072, "loss": 1.1494, "step": 9290 }, { "epoch": 0.23856682730962364, "grad_norm": 0.69921875, "learning_rate": 0.00018890312506279086, "loss": 0.8386, "step": 9291 }, { "epoch": 0.23859250450554548, "grad_norm": 0.72265625, "learning_rate": 0.00018890108105499865, "loss": 0.8825, "step": 9292 }, { "epoch": 0.23861818170146729, "grad_norm": 0.7734375, "learning_rate": 0.0001888990368700346, "loss": 1.0286, "step": 9293 }, { "epoch": 0.23864385889738912, "grad_norm": 0.81640625, "learning_rate": 0.00018889699250790282, "loss": 1.0807, "step": 9294 }, { "epoch": 0.23866953609331093, "grad_norm": 0.734375, "learning_rate": 0.00018889494796860736, "loss": 0.9537, "step": 9295 }, { "epoch": 0.23869521328923274, "grad_norm": 0.7578125, "learning_rate": 0.0001888929032521523, "loss": 0.9846, "step": 9296 }, { "epoch": 0.23872089048515457, "grad_norm": 1.0703125, "learning_rate": 0.00018889085835854173, "loss": 0.9439, "step": 9297 }, { "epoch": 0.23874656768107638, "grad_norm": 0.84375, "learning_rate": 0.00018888881328777966, "loss": 0.8993, "step": 9298 }, { "epoch": 0.23877224487699822, "grad_norm": 0.765625, "learning_rate": 0.00018888676803987024, "loss": 1.0136, "step": 9299 }, { "epoch": 0.23879792207292003, "grad_norm": 0.83984375, "learning_rate": 0.00018888472261481754, "loss": 1.0176, "step": 9300 }, { "epoch": 0.23882359926884184, "grad_norm": 0.828125, "learning_rate": 0.0001888826770126256, "loss": 1.1274, "step": 9301 }, { "epoch": 0.23884927646476367, "grad_norm": 0.875, "learning_rate": 0.0001888806312332985, "loss": 0.9408, "step": 9302 }, { "epoch": 0.23887495366068548, "grad_norm": 0.7734375, "learning_rate": 0.00018887858527684036, "loss": 0.9448, "step": 9303 }, { "epoch": 0.23890063085660732, "grad_norm": 0.84765625, "learning_rate": 0.0001888765391432552, "loss": 0.9962, "step": 9304 }, { "epoch": 0.23892630805252912, "grad_norm": 0.8046875, "learning_rate": 0.00018887449283254713, "loss": 1.0236, "step": 9305 }, { "epoch": 0.23895198524845093, "grad_norm": 0.83984375, "learning_rate": 0.00018887244634472022, "loss": 0.9908, "step": 9306 }, { "epoch": 0.23897766244437277, "grad_norm": 0.8203125, "learning_rate": 0.00018887039967977858, "loss": 1.2155, "step": 9307 }, { "epoch": 0.23900333964029458, "grad_norm": 0.80859375, "learning_rate": 0.00018886835283772623, "loss": 1.0643, "step": 9308 }, { "epoch": 0.2390290168362164, "grad_norm": 0.92578125, "learning_rate": 0.0001888663058185673, "loss": 1.1546, "step": 9309 }, { "epoch": 0.23905469403213822, "grad_norm": 0.828125, "learning_rate": 0.00018886425862230585, "loss": 1.0748, "step": 9310 }, { "epoch": 0.23908037122806003, "grad_norm": 0.8515625, "learning_rate": 0.00018886221124894594, "loss": 1.102, "step": 9311 }, { "epoch": 0.23910604842398187, "grad_norm": 0.765625, "learning_rate": 0.0001888601636984917, "loss": 0.95, "step": 9312 }, { "epoch": 0.23913172561990367, "grad_norm": 0.74609375, "learning_rate": 0.00018885811597094717, "loss": 0.9048, "step": 9313 }, { "epoch": 0.2391574028158255, "grad_norm": 0.91015625, "learning_rate": 0.0001888560680663164, "loss": 0.9919, "step": 9314 }, { "epoch": 0.23918308001174732, "grad_norm": 0.75, "learning_rate": 0.00018885401998460356, "loss": 0.9833, "step": 9315 }, { "epoch": 0.23920875720766913, "grad_norm": 0.84375, "learning_rate": 0.0001888519717258127, "loss": 1.0283, "step": 9316 }, { "epoch": 0.23923443440359096, "grad_norm": 0.7578125, "learning_rate": 0.00018884992328994783, "loss": 0.8027, "step": 9317 }, { "epoch": 0.23926011159951277, "grad_norm": 0.87890625, "learning_rate": 0.00018884787467701313, "loss": 1.0923, "step": 9318 }, { "epoch": 0.2392857887954346, "grad_norm": 0.7265625, "learning_rate": 0.00018884582588701263, "loss": 0.9792, "step": 9319 }, { "epoch": 0.23931146599135641, "grad_norm": 0.8046875, "learning_rate": 0.00018884377691995042, "loss": 1.0446, "step": 9320 }, { "epoch": 0.23933714318727822, "grad_norm": 0.86328125, "learning_rate": 0.0001888417277758306, "loss": 1.0918, "step": 9321 }, { "epoch": 0.23936282038320006, "grad_norm": 0.74609375, "learning_rate": 0.00018883967845465725, "loss": 0.8376, "step": 9322 }, { "epoch": 0.23938849757912187, "grad_norm": 0.7734375, "learning_rate": 0.00018883762895643443, "loss": 1.1056, "step": 9323 }, { "epoch": 0.2394141747750437, "grad_norm": 0.84765625, "learning_rate": 0.0001888355792811662, "loss": 0.9954, "step": 9324 }, { "epoch": 0.2394398519709655, "grad_norm": 0.796875, "learning_rate": 0.00018883352942885675, "loss": 0.907, "step": 9325 }, { "epoch": 0.23946552916688732, "grad_norm": 0.79296875, "learning_rate": 0.00018883147939951006, "loss": 1.009, "step": 9326 }, { "epoch": 0.23949120636280916, "grad_norm": 0.73828125, "learning_rate": 0.00018882942919313029, "loss": 0.941, "step": 9327 }, { "epoch": 0.23951688355873096, "grad_norm": 0.82421875, "learning_rate": 0.00018882737880972146, "loss": 1.0045, "step": 9328 }, { "epoch": 0.2395425607546528, "grad_norm": 0.87890625, "learning_rate": 0.0001888253282492877, "loss": 0.9139, "step": 9329 }, { "epoch": 0.2395682379505746, "grad_norm": 0.8046875, "learning_rate": 0.00018882327751183308, "loss": 0.9847, "step": 9330 }, { "epoch": 0.23959391514649642, "grad_norm": 0.84765625, "learning_rate": 0.00018882122659736168, "loss": 1.1287, "step": 9331 }, { "epoch": 0.23961959234241825, "grad_norm": 0.77734375, "learning_rate": 0.0001888191755058776, "loss": 0.9854, "step": 9332 }, { "epoch": 0.23964526953834006, "grad_norm": 0.78125, "learning_rate": 0.00018881712423738498, "loss": 0.767, "step": 9333 }, { "epoch": 0.2396709467342619, "grad_norm": 0.77734375, "learning_rate": 0.00018881507279188778, "loss": 1.0056, "step": 9334 }, { "epoch": 0.2396966239301837, "grad_norm": 0.8359375, "learning_rate": 0.0001888130211693902, "loss": 1.0078, "step": 9335 }, { "epoch": 0.2397223011261055, "grad_norm": 0.81640625, "learning_rate": 0.00018881096936989631, "loss": 1.0332, "step": 9336 }, { "epoch": 0.23974797832202735, "grad_norm": 0.80078125, "learning_rate": 0.00018880891739341012, "loss": 1.0761, "step": 9337 }, { "epoch": 0.23977365551794916, "grad_norm": 0.74609375, "learning_rate": 0.00018880686523993585, "loss": 0.8544, "step": 9338 }, { "epoch": 0.239799332713871, "grad_norm": 0.8984375, "learning_rate": 0.0001888048129094775, "loss": 1.0648, "step": 9339 }, { "epoch": 0.2398250099097928, "grad_norm": 0.79296875, "learning_rate": 0.00018880276040203915, "loss": 1.034, "step": 9340 }, { "epoch": 0.2398506871057146, "grad_norm": 0.77734375, "learning_rate": 0.00018880070771762494, "loss": 1.0626, "step": 9341 }, { "epoch": 0.23987636430163645, "grad_norm": 0.8203125, "learning_rate": 0.00018879865485623893, "loss": 1.0955, "step": 9342 }, { "epoch": 0.23990204149755825, "grad_norm": 0.8046875, "learning_rate": 0.00018879660181788524, "loss": 0.9552, "step": 9343 }, { "epoch": 0.2399277186934801, "grad_norm": 0.80859375, "learning_rate": 0.0001887945486025679, "loss": 1.1059, "step": 9344 }, { "epoch": 0.2399533958894019, "grad_norm": 0.79296875, "learning_rate": 0.00018879249521029109, "loss": 0.9832, "step": 9345 }, { "epoch": 0.2399790730853237, "grad_norm": 0.83203125, "learning_rate": 0.00018879044164105886, "loss": 0.9698, "step": 9346 }, { "epoch": 0.24000475028124554, "grad_norm": 0.8359375, "learning_rate": 0.0001887883878948753, "loss": 1.014, "step": 9347 }, { "epoch": 0.24003042747716735, "grad_norm": 1.0078125, "learning_rate": 0.00018878633397174447, "loss": 0.9985, "step": 9348 }, { "epoch": 0.2400561046730892, "grad_norm": 0.8515625, "learning_rate": 0.00018878427987167052, "loss": 1.0954, "step": 9349 }, { "epoch": 0.240081781869011, "grad_norm": 0.7734375, "learning_rate": 0.0001887822255946575, "loss": 1.1052, "step": 9350 }, { "epoch": 0.2401074590649328, "grad_norm": 0.828125, "learning_rate": 0.00018878017114070956, "loss": 1.1713, "step": 9351 }, { "epoch": 0.24013313626085464, "grad_norm": 0.8515625, "learning_rate": 0.00018877811650983071, "loss": 1.0602, "step": 9352 }, { "epoch": 0.24015881345677645, "grad_norm": 0.75, "learning_rate": 0.00018877606170202515, "loss": 1.0109, "step": 9353 }, { "epoch": 0.24018449065269828, "grad_norm": 0.75390625, "learning_rate": 0.0001887740067172969, "loss": 0.988, "step": 9354 }, { "epoch": 0.2402101678486201, "grad_norm": 0.73046875, "learning_rate": 0.00018877195155565004, "loss": 0.8806, "step": 9355 }, { "epoch": 0.2402358450445419, "grad_norm": 0.7734375, "learning_rate": 0.00018876989621708875, "loss": 1.0338, "step": 9356 }, { "epoch": 0.24026152224046374, "grad_norm": 0.78515625, "learning_rate": 0.00018876784070161705, "loss": 1.002, "step": 9357 }, { "epoch": 0.24028719943638555, "grad_norm": 0.71484375, "learning_rate": 0.00018876578500923904, "loss": 0.8867, "step": 9358 }, { "epoch": 0.24031287663230738, "grad_norm": 0.796875, "learning_rate": 0.00018876372913995883, "loss": 0.886, "step": 9359 }, { "epoch": 0.2403385538282292, "grad_norm": 0.80078125, "learning_rate": 0.0001887616730937806, "loss": 0.9484, "step": 9360 }, { "epoch": 0.240364231024151, "grad_norm": 0.7890625, "learning_rate": 0.00018875961687070828, "loss": 1.0633, "step": 9361 }, { "epoch": 0.24038990822007283, "grad_norm": 0.82421875, "learning_rate": 0.00018875756047074613, "loss": 1.0645, "step": 9362 }, { "epoch": 0.24041558541599464, "grad_norm": 0.8203125, "learning_rate": 0.00018875550389389816, "loss": 1.1507, "step": 9363 }, { "epoch": 0.24044126261191648, "grad_norm": 0.73046875, "learning_rate": 0.0001887534471401685, "loss": 1.028, "step": 9364 }, { "epoch": 0.2404669398078383, "grad_norm": 0.8125, "learning_rate": 0.00018875139020956122, "loss": 0.9308, "step": 9365 }, { "epoch": 0.2404926170037601, "grad_norm": 0.765625, "learning_rate": 0.00018874933310208042, "loss": 1.0192, "step": 9366 }, { "epoch": 0.24051829419968193, "grad_norm": 0.828125, "learning_rate": 0.00018874727581773024, "loss": 1.0242, "step": 9367 }, { "epoch": 0.24054397139560374, "grad_norm": 1.1328125, "learning_rate": 0.00018874521835651476, "loss": 1.0091, "step": 9368 }, { "epoch": 0.24056964859152558, "grad_norm": 0.8046875, "learning_rate": 0.00018874316071843805, "loss": 1.1372, "step": 9369 }, { "epoch": 0.24059532578744738, "grad_norm": 0.82421875, "learning_rate": 0.00018874110290350427, "loss": 0.9856, "step": 9370 }, { "epoch": 0.2406210029833692, "grad_norm": 0.76171875, "learning_rate": 0.00018873904491171746, "loss": 1.0139, "step": 9371 }, { "epoch": 0.24064668017929103, "grad_norm": 0.75390625, "learning_rate": 0.00018873698674308175, "loss": 0.9616, "step": 9372 }, { "epoch": 0.24067235737521284, "grad_norm": 0.84375, "learning_rate": 0.00018873492839760125, "loss": 1.1068, "step": 9373 }, { "epoch": 0.24069803457113467, "grad_norm": 0.8125, "learning_rate": 0.00018873286987528006, "loss": 0.9288, "step": 9374 }, { "epoch": 0.24072371176705648, "grad_norm": 0.76953125, "learning_rate": 0.00018873081117612225, "loss": 0.9414, "step": 9375 }, { "epoch": 0.2407493889629783, "grad_norm": 0.77734375, "learning_rate": 0.00018872875230013198, "loss": 1.046, "step": 9376 }, { "epoch": 0.24077506615890013, "grad_norm": 0.8203125, "learning_rate": 0.00018872669324731332, "loss": 0.9854, "step": 9377 }, { "epoch": 0.24080074335482193, "grad_norm": 0.80859375, "learning_rate": 0.00018872463401767035, "loss": 1.1558, "step": 9378 }, { "epoch": 0.24082642055074377, "grad_norm": 0.79296875, "learning_rate": 0.00018872257461120725, "loss": 1.0547, "step": 9379 }, { "epoch": 0.24085209774666558, "grad_norm": 0.83203125, "learning_rate": 0.000188720515027928, "loss": 0.9967, "step": 9380 }, { "epoch": 0.24087777494258739, "grad_norm": 0.7421875, "learning_rate": 0.00018871845526783685, "loss": 1.1109, "step": 9381 }, { "epoch": 0.24090345213850922, "grad_norm": 0.78125, "learning_rate": 0.00018871639533093779, "loss": 0.968, "step": 9382 }, { "epoch": 0.24092912933443103, "grad_norm": 0.83203125, "learning_rate": 0.00018871433521723495, "loss": 1.0706, "step": 9383 }, { "epoch": 0.24095480653035287, "grad_norm": 0.7734375, "learning_rate": 0.00018871227492673248, "loss": 1.0569, "step": 9384 }, { "epoch": 0.24098048372627467, "grad_norm": 0.79296875, "learning_rate": 0.00018871021445943445, "loss": 0.962, "step": 9385 }, { "epoch": 0.24100616092219648, "grad_norm": 0.77734375, "learning_rate": 0.000188708153815345, "loss": 1.0213, "step": 9386 }, { "epoch": 0.24103183811811832, "grad_norm": 0.88671875, "learning_rate": 0.0001887060929944682, "loss": 1.1318, "step": 9387 }, { "epoch": 0.24105751531404013, "grad_norm": 0.76953125, "learning_rate": 0.00018870403199680819, "loss": 1.0379, "step": 9388 }, { "epoch": 0.24108319250996196, "grad_norm": 0.79296875, "learning_rate": 0.000188701970822369, "loss": 1.094, "step": 9389 }, { "epoch": 0.24110886970588377, "grad_norm": 0.7265625, "learning_rate": 0.00018869990947115484, "loss": 0.8083, "step": 9390 }, { "epoch": 0.24113454690180558, "grad_norm": 0.76953125, "learning_rate": 0.00018869784794316977, "loss": 0.9981, "step": 9391 }, { "epoch": 0.24116022409772742, "grad_norm": 0.859375, "learning_rate": 0.00018869578623841786, "loss": 1.0057, "step": 9392 }, { "epoch": 0.24118590129364922, "grad_norm": 0.79296875, "learning_rate": 0.00018869372435690332, "loss": 1.0297, "step": 9393 }, { "epoch": 0.24121157848957106, "grad_norm": 0.81640625, "learning_rate": 0.00018869166229863016, "loss": 0.9286, "step": 9394 }, { "epoch": 0.24123725568549287, "grad_norm": 0.8046875, "learning_rate": 0.00018868960006360254, "loss": 0.9493, "step": 9395 }, { "epoch": 0.24126293288141468, "grad_norm": 0.80859375, "learning_rate": 0.00018868753765182456, "loss": 0.9392, "step": 9396 }, { "epoch": 0.2412886100773365, "grad_norm": 0.83984375, "learning_rate": 0.0001886854750633003, "loss": 0.9648, "step": 9397 }, { "epoch": 0.24131428727325832, "grad_norm": 0.78515625, "learning_rate": 0.0001886834122980339, "loss": 1.0127, "step": 9398 }, { "epoch": 0.24133996446918013, "grad_norm": 0.8203125, "learning_rate": 0.00018868134935602954, "loss": 1.0169, "step": 9399 }, { "epoch": 0.24136564166510197, "grad_norm": 0.7734375, "learning_rate": 0.00018867928623729122, "loss": 0.9175, "step": 9400 }, { "epoch": 0.24139131886102377, "grad_norm": 0.80078125, "learning_rate": 0.0001886772229418231, "loss": 0.9377, "step": 9401 }, { "epoch": 0.2414169960569456, "grad_norm": 0.7890625, "learning_rate": 0.00018867515946962925, "loss": 1.0145, "step": 9402 }, { "epoch": 0.24144267325286742, "grad_norm": 0.72265625, "learning_rate": 0.00018867309582071385, "loss": 0.9796, "step": 9403 }, { "epoch": 0.24146835044878923, "grad_norm": 0.828125, "learning_rate": 0.00018867103199508097, "loss": 1.0189, "step": 9404 }, { "epoch": 0.24149402764471106, "grad_norm": 0.81640625, "learning_rate": 0.00018866896799273473, "loss": 0.9762, "step": 9405 }, { "epoch": 0.24151970484063287, "grad_norm": 0.7734375, "learning_rate": 0.00018866690381367924, "loss": 1.0739, "step": 9406 }, { "epoch": 0.2415453820365547, "grad_norm": 0.83984375, "learning_rate": 0.00018866483945791863, "loss": 1.0519, "step": 9407 }, { "epoch": 0.24157105923247651, "grad_norm": 0.84765625, "learning_rate": 0.00018866277492545698, "loss": 1.053, "step": 9408 }, { "epoch": 0.24159673642839832, "grad_norm": 0.84765625, "learning_rate": 0.00018866071021629848, "loss": 1.0873, "step": 9409 }, { "epoch": 0.24162241362432016, "grad_norm": 0.76953125, "learning_rate": 0.00018865864533044717, "loss": 0.9571, "step": 9410 }, { "epoch": 0.24164809082024197, "grad_norm": 0.73046875, "learning_rate": 0.00018865658026790717, "loss": 0.942, "step": 9411 }, { "epoch": 0.2416737680161638, "grad_norm": 0.78515625, "learning_rate": 0.00018865451502868264, "loss": 1.0322, "step": 9412 }, { "epoch": 0.2416994452120856, "grad_norm": 0.77734375, "learning_rate": 0.00018865244961277763, "loss": 1.0052, "step": 9413 }, { "epoch": 0.24172512240800742, "grad_norm": 0.80078125, "learning_rate": 0.00018865038402019634, "loss": 1.1064, "step": 9414 }, { "epoch": 0.24175079960392926, "grad_norm": 0.84375, "learning_rate": 0.00018864831825094282, "loss": 1.1289, "step": 9415 }, { "epoch": 0.24177647679985106, "grad_norm": 0.8125, "learning_rate": 0.00018864625230502122, "loss": 0.9675, "step": 9416 }, { "epoch": 0.2418021539957729, "grad_norm": 0.765625, "learning_rate": 0.00018864418618243564, "loss": 0.872, "step": 9417 }, { "epoch": 0.2418278311916947, "grad_norm": 0.80078125, "learning_rate": 0.0001886421198831902, "loss": 1.1273, "step": 9418 }, { "epoch": 0.24185350838761652, "grad_norm": 0.77734375, "learning_rate": 0.00018864005340728902, "loss": 0.7797, "step": 9419 }, { "epoch": 0.24187918558353835, "grad_norm": 0.7421875, "learning_rate": 0.00018863798675473623, "loss": 1.0361, "step": 9420 }, { "epoch": 0.24190486277946016, "grad_norm": 0.87109375, "learning_rate": 0.00018863591992553596, "loss": 0.9376, "step": 9421 }, { "epoch": 0.241930539975382, "grad_norm": 0.75, "learning_rate": 0.00018863385291969227, "loss": 1.0906, "step": 9422 }, { "epoch": 0.2419562171713038, "grad_norm": 0.8984375, "learning_rate": 0.00018863178573720935, "loss": 1.0808, "step": 9423 }, { "epoch": 0.24198189436722561, "grad_norm": 0.80859375, "learning_rate": 0.00018862971837809125, "loss": 0.9898, "step": 9424 }, { "epoch": 0.24200757156314745, "grad_norm": 0.8046875, "learning_rate": 0.00018862765084234212, "loss": 0.9491, "step": 9425 }, { "epoch": 0.24203324875906926, "grad_norm": 0.87890625, "learning_rate": 0.00018862558312996615, "loss": 1.1715, "step": 9426 }, { "epoch": 0.2420589259549911, "grad_norm": 0.87890625, "learning_rate": 0.00018862351524096736, "loss": 1.0584, "step": 9427 }, { "epoch": 0.2420846031509129, "grad_norm": 0.82421875, "learning_rate": 0.00018862144717534987, "loss": 1.0904, "step": 9428 }, { "epoch": 0.2421102803468347, "grad_norm": 0.84375, "learning_rate": 0.0001886193789331179, "loss": 0.9628, "step": 9429 }, { "epoch": 0.24213595754275655, "grad_norm": 1.109375, "learning_rate": 0.00018861731051427548, "loss": 0.9215, "step": 9430 }, { "epoch": 0.24216163473867836, "grad_norm": 0.83203125, "learning_rate": 0.00018861524191882682, "loss": 1.024, "step": 9431 }, { "epoch": 0.2421873119346002, "grad_norm": 0.8125, "learning_rate": 0.00018861317314677593, "loss": 0.9496, "step": 9432 }, { "epoch": 0.242212989130522, "grad_norm": 0.80078125, "learning_rate": 0.00018861110419812703, "loss": 0.9286, "step": 9433 }, { "epoch": 0.2422386663264438, "grad_norm": 0.828125, "learning_rate": 0.00018860903507288417, "loss": 1.0885, "step": 9434 }, { "epoch": 0.24226434352236564, "grad_norm": 0.7578125, "learning_rate": 0.0001886069657710515, "loss": 1.1182, "step": 9435 }, { "epoch": 0.24229002071828745, "grad_norm": 0.890625, "learning_rate": 0.00018860489629263317, "loss": 1.0342, "step": 9436 }, { "epoch": 0.2423156979142093, "grad_norm": 0.81640625, "learning_rate": 0.00018860282663763333, "loss": 1.0089, "step": 9437 }, { "epoch": 0.2423413751101311, "grad_norm": 0.81640625, "learning_rate": 0.00018860075680605598, "loss": 0.9159, "step": 9438 }, { "epoch": 0.2423670523060529, "grad_norm": 0.8828125, "learning_rate": 0.00018859868679790536, "loss": 1.1098, "step": 9439 }, { "epoch": 0.24239272950197474, "grad_norm": 0.796875, "learning_rate": 0.00018859661661318558, "loss": 0.9295, "step": 9440 }, { "epoch": 0.24241840669789655, "grad_norm": 0.8125, "learning_rate": 0.00018859454625190072, "loss": 1.0449, "step": 9441 }, { "epoch": 0.24244408389381839, "grad_norm": 0.77734375, "learning_rate": 0.0001885924757140549, "loss": 1.0559, "step": 9442 }, { "epoch": 0.2424697610897402, "grad_norm": 0.76171875, "learning_rate": 0.00018859040499965234, "loss": 0.9882, "step": 9443 }, { "epoch": 0.242495438285662, "grad_norm": 0.7890625, "learning_rate": 0.0001885883341086971, "loss": 0.9447, "step": 9444 }, { "epoch": 0.24252111548158384, "grad_norm": 0.78125, "learning_rate": 0.0001885862630411933, "loss": 1.0498, "step": 9445 }, { "epoch": 0.24254679267750565, "grad_norm": 0.92578125, "learning_rate": 0.00018858419179714506, "loss": 0.9532, "step": 9446 }, { "epoch": 0.24257246987342748, "grad_norm": 1.0234375, "learning_rate": 0.00018858212037655653, "loss": 0.8756, "step": 9447 }, { "epoch": 0.2425981470693493, "grad_norm": 0.7734375, "learning_rate": 0.00018858004877943184, "loss": 1.0315, "step": 9448 }, { "epoch": 0.2426238242652711, "grad_norm": 0.73828125, "learning_rate": 0.00018857797700577513, "loss": 1.0465, "step": 9449 }, { "epoch": 0.24264950146119293, "grad_norm": 0.8203125, "learning_rate": 0.00018857590505559052, "loss": 1.1995, "step": 9450 }, { "epoch": 0.24267517865711474, "grad_norm": 0.81640625, "learning_rate": 0.0001885738329288821, "loss": 1.005, "step": 9451 }, { "epoch": 0.24270085585303658, "grad_norm": 0.73046875, "learning_rate": 0.00018857176062565405, "loss": 0.9126, "step": 9452 }, { "epoch": 0.2427265330489584, "grad_norm": 0.8359375, "learning_rate": 0.00018856968814591048, "loss": 0.9564, "step": 9453 }, { "epoch": 0.2427522102448802, "grad_norm": 0.8515625, "learning_rate": 0.00018856761548965552, "loss": 1.0906, "step": 9454 }, { "epoch": 0.24277788744080203, "grad_norm": 0.765625, "learning_rate": 0.0001885655426568933, "loss": 1.0288, "step": 9455 }, { "epoch": 0.24280356463672384, "grad_norm": 0.859375, "learning_rate": 0.00018856346964762796, "loss": 1.0326, "step": 9456 }, { "epoch": 0.24282924183264568, "grad_norm": 0.86328125, "learning_rate": 0.00018856139646186365, "loss": 1.2075, "step": 9457 }, { "epoch": 0.24285491902856748, "grad_norm": 1.84375, "learning_rate": 0.00018855932309960443, "loss": 1.067, "step": 9458 }, { "epoch": 0.2428805962244893, "grad_norm": 0.79296875, "learning_rate": 0.0001885572495608545, "loss": 1.0281, "step": 9459 }, { "epoch": 0.24290627342041113, "grad_norm": 0.796875, "learning_rate": 0.00018855517584561796, "loss": 1.0084, "step": 9460 }, { "epoch": 0.24293195061633294, "grad_norm": 0.75, "learning_rate": 0.00018855310195389896, "loss": 0.9891, "step": 9461 }, { "epoch": 0.24295762781225477, "grad_norm": 0.7890625, "learning_rate": 0.00018855102788570163, "loss": 0.9789, "step": 9462 }, { "epoch": 0.24298330500817658, "grad_norm": 0.80859375, "learning_rate": 0.0001885489536410301, "loss": 1.178, "step": 9463 }, { "epoch": 0.2430089822040984, "grad_norm": 0.77734375, "learning_rate": 0.0001885468792198885, "loss": 0.9286, "step": 9464 }, { "epoch": 0.24303465940002023, "grad_norm": 0.79296875, "learning_rate": 0.00018854480462228098, "loss": 0.9269, "step": 9465 }, { "epoch": 0.24306033659594203, "grad_norm": 0.7421875, "learning_rate": 0.00018854272984821164, "loss": 0.9741, "step": 9466 }, { "epoch": 0.24308601379186387, "grad_norm": 0.7421875, "learning_rate": 0.00018854065489768467, "loss": 0.9835, "step": 9467 }, { "epoch": 0.24311169098778568, "grad_norm": 0.7734375, "learning_rate": 0.00018853857977070416, "loss": 0.9782, "step": 9468 }, { "epoch": 0.2431373681837075, "grad_norm": 0.80859375, "learning_rate": 0.00018853650446727424, "loss": 0.9207, "step": 9469 }, { "epoch": 0.24316304537962932, "grad_norm": 0.78125, "learning_rate": 0.0001885344289873991, "loss": 1.0168, "step": 9470 }, { "epoch": 0.24318872257555113, "grad_norm": 0.765625, "learning_rate": 0.0001885323533310828, "loss": 1.0944, "step": 9471 }, { "epoch": 0.24321439977147297, "grad_norm": 0.87109375, "learning_rate": 0.00018853027749832953, "loss": 1.0695, "step": 9472 }, { "epoch": 0.24324007696739477, "grad_norm": 0.890625, "learning_rate": 0.0001885282014891434, "loss": 1.0614, "step": 9473 }, { "epoch": 0.24326575416331658, "grad_norm": 0.78125, "learning_rate": 0.0001885261253035286, "loss": 0.8604, "step": 9474 }, { "epoch": 0.24329143135923842, "grad_norm": 0.8359375, "learning_rate": 0.0001885240489414892, "loss": 1.0347, "step": 9475 }, { "epoch": 0.24331710855516023, "grad_norm": 1.109375, "learning_rate": 0.0001885219724030294, "loss": 0.9902, "step": 9476 }, { "epoch": 0.24334278575108206, "grad_norm": 0.87890625, "learning_rate": 0.00018851989568815326, "loss": 1.0674, "step": 9477 }, { "epoch": 0.24336846294700387, "grad_norm": 0.828125, "learning_rate": 0.000188517818796865, "loss": 0.9055, "step": 9478 }, { "epoch": 0.24339414014292568, "grad_norm": 0.8203125, "learning_rate": 0.0001885157417291687, "loss": 0.922, "step": 9479 }, { "epoch": 0.24341981733884752, "grad_norm": 0.7890625, "learning_rate": 0.00018851366448506852, "loss": 0.9334, "step": 9480 }, { "epoch": 0.24344549453476932, "grad_norm": 0.80859375, "learning_rate": 0.00018851158706456863, "loss": 1.0811, "step": 9481 }, { "epoch": 0.24347117173069116, "grad_norm": 0.82421875, "learning_rate": 0.0001885095094676731, "loss": 1.1144, "step": 9482 }, { "epoch": 0.24349684892661297, "grad_norm": 0.77734375, "learning_rate": 0.00018850743169438613, "loss": 0.8864, "step": 9483 }, { "epoch": 0.24352252612253478, "grad_norm": 0.74609375, "learning_rate": 0.00018850535374471185, "loss": 1.0016, "step": 9484 }, { "epoch": 0.2435482033184566, "grad_norm": 0.8359375, "learning_rate": 0.00018850327561865439, "loss": 1.0979, "step": 9485 }, { "epoch": 0.24357388051437842, "grad_norm": 0.796875, "learning_rate": 0.0001885011973162179, "loss": 1.0776, "step": 9486 }, { "epoch": 0.24359955771030026, "grad_norm": 0.796875, "learning_rate": 0.0001884991188374065, "loss": 1.0036, "step": 9487 }, { "epoch": 0.24362523490622207, "grad_norm": 0.77734375, "learning_rate": 0.00018849704018222442, "loss": 1.0467, "step": 9488 }, { "epoch": 0.24365091210214387, "grad_norm": 0.8125, "learning_rate": 0.00018849496135067565, "loss": 0.9322, "step": 9489 }, { "epoch": 0.2436765892980657, "grad_norm": 1.4296875, "learning_rate": 0.00018849288234276446, "loss": 1.1119, "step": 9490 }, { "epoch": 0.24370226649398752, "grad_norm": 0.8359375, "learning_rate": 0.00018849080315849493, "loss": 1.0513, "step": 9491 }, { "epoch": 0.24372794368990935, "grad_norm": 0.79296875, "learning_rate": 0.00018848872379787125, "loss": 0.9559, "step": 9492 }, { "epoch": 0.24375362088583116, "grad_norm": 0.82421875, "learning_rate": 0.0001884866442608975, "loss": 1.0339, "step": 9493 }, { "epoch": 0.24377929808175297, "grad_norm": 0.76171875, "learning_rate": 0.00018848456454757787, "loss": 0.9733, "step": 9494 }, { "epoch": 0.2438049752776748, "grad_norm": 0.76171875, "learning_rate": 0.00018848248465791652, "loss": 0.9161, "step": 9495 }, { "epoch": 0.24383065247359662, "grad_norm": 0.796875, "learning_rate": 0.00018848040459191754, "loss": 1.0153, "step": 9496 }, { "epoch": 0.24385632966951845, "grad_norm": 0.796875, "learning_rate": 0.00018847832434958512, "loss": 0.9377, "step": 9497 }, { "epoch": 0.24388200686544026, "grad_norm": 0.75390625, "learning_rate": 0.00018847624393092337, "loss": 1.1293, "step": 9498 }, { "epoch": 0.24390768406136207, "grad_norm": 0.76953125, "learning_rate": 0.00018847416333593652, "loss": 1.0235, "step": 9499 }, { "epoch": 0.2439333612572839, "grad_norm": 0.75390625, "learning_rate": 0.00018847208256462858, "loss": 1.0057, "step": 9500 }, { "epoch": 0.2439590384532057, "grad_norm": 0.7734375, "learning_rate": 0.0001884700016170038, "loss": 0.9671, "step": 9501 }, { "epoch": 0.24398471564912755, "grad_norm": 0.71484375, "learning_rate": 0.0001884679204930663, "loss": 0.9788, "step": 9502 }, { "epoch": 0.24401039284504936, "grad_norm": 0.78515625, "learning_rate": 0.00018846583919282023, "loss": 1.0628, "step": 9503 }, { "epoch": 0.24403607004097116, "grad_norm": 0.828125, "learning_rate": 0.00018846375771626973, "loss": 0.9403, "step": 9504 }, { "epoch": 0.244061747236893, "grad_norm": 0.73828125, "learning_rate": 0.00018846167606341896, "loss": 0.832, "step": 9505 }, { "epoch": 0.2440874244328148, "grad_norm": 0.9140625, "learning_rate": 0.00018845959423427204, "loss": 1.0879, "step": 9506 }, { "epoch": 0.24411310162873665, "grad_norm": 0.78125, "learning_rate": 0.00018845751222883316, "loss": 0.9605, "step": 9507 }, { "epoch": 0.24413877882465845, "grad_norm": 0.76171875, "learning_rate": 0.00018845543004710646, "loss": 0.9764, "step": 9508 }, { "epoch": 0.24416445602058026, "grad_norm": 0.80078125, "learning_rate": 0.00018845334768909604, "loss": 1.0146, "step": 9509 }, { "epoch": 0.2441901332165021, "grad_norm": 0.828125, "learning_rate": 0.0001884512651548061, "loss": 0.8625, "step": 9510 }, { "epoch": 0.2442158104124239, "grad_norm": 0.85546875, "learning_rate": 0.0001884491824442408, "loss": 0.9349, "step": 9511 }, { "epoch": 0.24424148760834574, "grad_norm": 0.765625, "learning_rate": 0.00018844709955740424, "loss": 1.0639, "step": 9512 }, { "epoch": 0.24426716480426755, "grad_norm": 0.78515625, "learning_rate": 0.0001884450164943006, "loss": 1.0552, "step": 9513 }, { "epoch": 0.24429284200018936, "grad_norm": 0.74609375, "learning_rate": 0.00018844293325493406, "loss": 1.034, "step": 9514 }, { "epoch": 0.2443185191961112, "grad_norm": 0.78515625, "learning_rate": 0.00018844084983930874, "loss": 0.9436, "step": 9515 }, { "epoch": 0.244344196392033, "grad_norm": 0.78515625, "learning_rate": 0.0001884387662474288, "loss": 0.9627, "step": 9516 }, { "epoch": 0.24436987358795484, "grad_norm": 0.87109375, "learning_rate": 0.00018843668247929836, "loss": 1.0758, "step": 9517 }, { "epoch": 0.24439555078387665, "grad_norm": 0.75390625, "learning_rate": 0.0001884345985349216, "loss": 0.9444, "step": 9518 }, { "epoch": 0.24442122797979846, "grad_norm": 0.796875, "learning_rate": 0.0001884325144143027, "loss": 0.9182, "step": 9519 }, { "epoch": 0.2444469051757203, "grad_norm": 0.79296875, "learning_rate": 0.00018843043011744577, "loss": 0.8372, "step": 9520 }, { "epoch": 0.2444725823716421, "grad_norm": 0.80859375, "learning_rate": 0.00018842834564435497, "loss": 0.9739, "step": 9521 }, { "epoch": 0.24449825956756394, "grad_norm": 0.73828125, "learning_rate": 0.00018842626099503448, "loss": 1.0808, "step": 9522 }, { "epoch": 0.24452393676348574, "grad_norm": 0.87890625, "learning_rate": 0.00018842417616948845, "loss": 0.9563, "step": 9523 }, { "epoch": 0.24454961395940755, "grad_norm": 0.8515625, "learning_rate": 0.000188422091167721, "loss": 1.057, "step": 9524 }, { "epoch": 0.2445752911553294, "grad_norm": 0.734375, "learning_rate": 0.0001884200059897363, "loss": 1.0187, "step": 9525 }, { "epoch": 0.2446009683512512, "grad_norm": 0.8359375, "learning_rate": 0.00018841792063553853, "loss": 0.9903, "step": 9526 }, { "epoch": 0.24462664554717303, "grad_norm": 0.87890625, "learning_rate": 0.00018841583510513185, "loss": 1.0322, "step": 9527 }, { "epoch": 0.24465232274309484, "grad_norm": 0.7421875, "learning_rate": 0.00018841374939852038, "loss": 0.984, "step": 9528 }, { "epoch": 0.24467799993901665, "grad_norm": 0.80859375, "learning_rate": 0.00018841166351570831, "loss": 1.0204, "step": 9529 }, { "epoch": 0.24470367713493849, "grad_norm": 0.85546875, "learning_rate": 0.00018840957745669976, "loss": 0.955, "step": 9530 }, { "epoch": 0.2447293543308603, "grad_norm": 0.8828125, "learning_rate": 0.00018840749122149892, "loss": 0.9338, "step": 9531 }, { "epoch": 0.24475503152678213, "grad_norm": 0.77734375, "learning_rate": 0.00018840540481010994, "loss": 1.0459, "step": 9532 }, { "epoch": 0.24478070872270394, "grad_norm": 0.81640625, "learning_rate": 0.00018840331822253696, "loss": 0.9875, "step": 9533 }, { "epoch": 0.24480638591862575, "grad_norm": 0.74609375, "learning_rate": 0.00018840123145878415, "loss": 0.8806, "step": 9534 }, { "epoch": 0.24483206311454758, "grad_norm": 0.74609375, "learning_rate": 0.0001883991445188557, "loss": 0.8145, "step": 9535 }, { "epoch": 0.2448577403104694, "grad_norm": 0.81640625, "learning_rate": 0.0001883970574027557, "loss": 1.0263, "step": 9536 }, { "epoch": 0.24488341750639123, "grad_norm": 0.796875, "learning_rate": 0.0001883949701104884, "loss": 1.0082, "step": 9537 }, { "epoch": 0.24490909470231303, "grad_norm": 0.796875, "learning_rate": 0.00018839288264205787, "loss": 0.9658, "step": 9538 }, { "epoch": 0.24493477189823484, "grad_norm": 0.83203125, "learning_rate": 0.00018839079499746828, "loss": 1.0597, "step": 9539 }, { "epoch": 0.24496044909415668, "grad_norm": 0.81640625, "learning_rate": 0.0001883887071767239, "loss": 1.0734, "step": 9540 }, { "epoch": 0.2449861262900785, "grad_norm": 0.72265625, "learning_rate": 0.00018838661917982874, "loss": 0.9398, "step": 9541 }, { "epoch": 0.24501180348600032, "grad_norm": 0.734375, "learning_rate": 0.00018838453100678706, "loss": 1.1433, "step": 9542 }, { "epoch": 0.24503748068192213, "grad_norm": 0.82421875, "learning_rate": 0.000188382442657603, "loss": 1.0244, "step": 9543 }, { "epoch": 0.24506315787784394, "grad_norm": 0.8671875, "learning_rate": 0.0001883803541322807, "loss": 1.0399, "step": 9544 }, { "epoch": 0.24508883507376578, "grad_norm": 0.8125, "learning_rate": 0.00018837826543082432, "loss": 1.0493, "step": 9545 }, { "epoch": 0.24511451226968758, "grad_norm": 0.8984375, "learning_rate": 0.0001883761765532381, "loss": 1.0917, "step": 9546 }, { "epoch": 0.24514018946560942, "grad_norm": 0.8359375, "learning_rate": 0.0001883740874995261, "loss": 1.0337, "step": 9547 }, { "epoch": 0.24516586666153123, "grad_norm": 0.78125, "learning_rate": 0.00018837199826969255, "loss": 1.0357, "step": 9548 }, { "epoch": 0.24519154385745304, "grad_norm": 0.8359375, "learning_rate": 0.00018836990886374156, "loss": 0.9993, "step": 9549 }, { "epoch": 0.24521722105337487, "grad_norm": 0.734375, "learning_rate": 0.00018836781928167735, "loss": 1.0003, "step": 9550 }, { "epoch": 0.24524289824929668, "grad_norm": 0.8046875, "learning_rate": 0.00018836572952350404, "loss": 0.9826, "step": 9551 }, { "epoch": 0.24526857544521852, "grad_norm": 0.81640625, "learning_rate": 0.00018836363958922583, "loss": 1.0169, "step": 9552 }, { "epoch": 0.24529425264114033, "grad_norm": 0.828125, "learning_rate": 0.00018836154947884684, "loss": 1.0757, "step": 9553 }, { "epoch": 0.24531992983706213, "grad_norm": 0.828125, "learning_rate": 0.00018835945919237128, "loss": 0.9474, "step": 9554 }, { "epoch": 0.24534560703298397, "grad_norm": 0.796875, "learning_rate": 0.0001883573687298033, "loss": 1.0411, "step": 9555 }, { "epoch": 0.24537128422890578, "grad_norm": 0.796875, "learning_rate": 0.00018835527809114708, "loss": 0.9385, "step": 9556 }, { "epoch": 0.24539696142482761, "grad_norm": 0.796875, "learning_rate": 0.00018835318727640678, "loss": 0.8448, "step": 9557 }, { "epoch": 0.24542263862074942, "grad_norm": 0.78125, "learning_rate": 0.0001883510962855865, "loss": 0.8638, "step": 9558 }, { "epoch": 0.24544831581667123, "grad_norm": 0.83984375, "learning_rate": 0.0001883490051186905, "loss": 0.891, "step": 9559 }, { "epoch": 0.24547399301259307, "grad_norm": 0.87109375, "learning_rate": 0.00018834691377572296, "loss": 0.9211, "step": 9560 }, { "epoch": 0.24549967020851488, "grad_norm": 0.828125, "learning_rate": 0.00018834482225668793, "loss": 1.132, "step": 9561 }, { "epoch": 0.2455253474044367, "grad_norm": 0.7890625, "learning_rate": 0.0001883427305615897, "loss": 0.9284, "step": 9562 }, { "epoch": 0.24555102460035852, "grad_norm": 0.82421875, "learning_rate": 0.00018834063869043236, "loss": 1.0216, "step": 9563 }, { "epoch": 0.24557670179628033, "grad_norm": 0.8203125, "learning_rate": 0.0001883385466432201, "loss": 1.0679, "step": 9564 }, { "epoch": 0.24560237899220216, "grad_norm": 0.859375, "learning_rate": 0.00018833645441995712, "loss": 0.9697, "step": 9565 }, { "epoch": 0.24562805618812397, "grad_norm": 0.8828125, "learning_rate": 0.00018833436202064755, "loss": 1.05, "step": 9566 }, { "epoch": 0.2456537333840458, "grad_norm": 0.796875, "learning_rate": 0.0001883322694452956, "loss": 1.0062, "step": 9567 }, { "epoch": 0.24567941057996762, "grad_norm": 0.8125, "learning_rate": 0.0001883301766939054, "loss": 0.8825, "step": 9568 }, { "epoch": 0.24570508777588942, "grad_norm": 0.859375, "learning_rate": 0.00018832808376648111, "loss": 0.9553, "step": 9569 }, { "epoch": 0.24573076497181126, "grad_norm": 0.78515625, "learning_rate": 0.00018832599066302694, "loss": 1.0198, "step": 9570 }, { "epoch": 0.24575644216773307, "grad_norm": 0.80859375, "learning_rate": 0.00018832389738354706, "loss": 1.0995, "step": 9571 }, { "epoch": 0.2457821193636549, "grad_norm": 0.80078125, "learning_rate": 0.00018832180392804564, "loss": 0.9737, "step": 9572 }, { "epoch": 0.2458077965595767, "grad_norm": 0.8046875, "learning_rate": 0.00018831971029652683, "loss": 0.9866, "step": 9573 }, { "epoch": 0.24583347375549852, "grad_norm": 0.77734375, "learning_rate": 0.00018831761648899483, "loss": 1.0258, "step": 9574 }, { "epoch": 0.24585915095142036, "grad_norm": 0.71875, "learning_rate": 0.00018831552250545377, "loss": 0.8848, "step": 9575 }, { "epoch": 0.24588482814734217, "grad_norm": 0.84375, "learning_rate": 0.00018831342834590785, "loss": 0.9427, "step": 9576 }, { "epoch": 0.245910505343264, "grad_norm": 0.8671875, "learning_rate": 0.0001883113340103613, "loss": 1.0799, "step": 9577 }, { "epoch": 0.2459361825391858, "grad_norm": 0.828125, "learning_rate": 0.0001883092394988182, "loss": 1.0394, "step": 9578 }, { "epoch": 0.24596185973510762, "grad_norm": 0.8125, "learning_rate": 0.00018830714481128275, "loss": 1.1274, "step": 9579 }, { "epoch": 0.24598753693102945, "grad_norm": 0.78515625, "learning_rate": 0.00018830504994775914, "loss": 1.0121, "step": 9580 }, { "epoch": 0.24601321412695126, "grad_norm": 0.7890625, "learning_rate": 0.00018830295490825157, "loss": 0.9697, "step": 9581 }, { "epoch": 0.2460388913228731, "grad_norm": 0.83984375, "learning_rate": 0.00018830085969276416, "loss": 1.0831, "step": 9582 }, { "epoch": 0.2460645685187949, "grad_norm": 0.78515625, "learning_rate": 0.00018829876430130114, "loss": 1.0535, "step": 9583 }, { "epoch": 0.24609024571471672, "grad_norm": 1.0, "learning_rate": 0.0001882966687338666, "loss": 0.9398, "step": 9584 }, { "epoch": 0.24611592291063855, "grad_norm": 0.80078125, "learning_rate": 0.00018829457299046485, "loss": 0.8942, "step": 9585 }, { "epoch": 0.24614160010656036, "grad_norm": 0.7421875, "learning_rate": 0.00018829247707109997, "loss": 1.1011, "step": 9586 }, { "epoch": 0.2461672773024822, "grad_norm": 0.7734375, "learning_rate": 0.00018829038097577613, "loss": 1.0637, "step": 9587 }, { "epoch": 0.246192954498404, "grad_norm": 0.84765625, "learning_rate": 0.00018828828470449755, "loss": 0.9941, "step": 9588 }, { "epoch": 0.2462186316943258, "grad_norm": 0.85546875, "learning_rate": 0.0001882861882572684, "loss": 1.0607, "step": 9589 }, { "epoch": 0.24624430889024765, "grad_norm": 0.8203125, "learning_rate": 0.00018828409163409285, "loss": 1.0413, "step": 9590 }, { "epoch": 0.24626998608616946, "grad_norm": 0.828125, "learning_rate": 0.00018828199483497508, "loss": 1.1257, "step": 9591 }, { "epoch": 0.2462956632820913, "grad_norm": 0.7734375, "learning_rate": 0.00018827989785991927, "loss": 0.9085, "step": 9592 }, { "epoch": 0.2463213404780131, "grad_norm": 0.921875, "learning_rate": 0.0001882778007089296, "loss": 1.0211, "step": 9593 }, { "epoch": 0.2463470176739349, "grad_norm": 0.80078125, "learning_rate": 0.00018827570338201022, "loss": 0.9855, "step": 9594 }, { "epoch": 0.24637269486985675, "grad_norm": 0.8515625, "learning_rate": 0.00018827360587916537, "loss": 1.0326, "step": 9595 }, { "epoch": 0.24639837206577855, "grad_norm": 0.7578125, "learning_rate": 0.0001882715082003992, "loss": 0.9473, "step": 9596 }, { "epoch": 0.2464240492617004, "grad_norm": 0.75, "learning_rate": 0.00018826941034571584, "loss": 0.9457, "step": 9597 }, { "epoch": 0.2464497264576222, "grad_norm": 0.7734375, "learning_rate": 0.00018826731231511956, "loss": 0.8876, "step": 9598 }, { "epoch": 0.246475403653544, "grad_norm": 0.7578125, "learning_rate": 0.00018826521410861448, "loss": 0.9988, "step": 9599 }, { "epoch": 0.24650108084946584, "grad_norm": 0.84375, "learning_rate": 0.00018826311572620483, "loss": 1.1444, "step": 9600 }, { "epoch": 0.24652675804538765, "grad_norm": 0.88671875, "learning_rate": 0.00018826101716789474, "loss": 1.07, "step": 9601 }, { "epoch": 0.2465524352413095, "grad_norm": 0.9609375, "learning_rate": 0.00018825891843368843, "loss": 0.954, "step": 9602 }, { "epoch": 0.2465781124372313, "grad_norm": 0.953125, "learning_rate": 0.00018825681952359006, "loss": 0.9842, "step": 9603 }, { "epoch": 0.2466037896331531, "grad_norm": 0.8359375, "learning_rate": 0.0001882547204376038, "loss": 1.0364, "step": 9604 }, { "epoch": 0.24662946682907494, "grad_norm": 0.87890625, "learning_rate": 0.00018825262117573387, "loss": 1.0964, "step": 9605 }, { "epoch": 0.24665514402499675, "grad_norm": 0.796875, "learning_rate": 0.00018825052173798446, "loss": 1.1558, "step": 9606 }, { "epoch": 0.24668082122091856, "grad_norm": 0.7734375, "learning_rate": 0.0001882484221243597, "loss": 0.9461, "step": 9607 }, { "epoch": 0.2467064984168404, "grad_norm": 0.8125, "learning_rate": 0.00018824632233486384, "loss": 1.0786, "step": 9608 }, { "epoch": 0.2467321756127622, "grad_norm": 0.796875, "learning_rate": 0.00018824422236950102, "loss": 1.1279, "step": 9609 }, { "epoch": 0.24675785280868404, "grad_norm": 0.8359375, "learning_rate": 0.0001882421222282754, "loss": 1.0431, "step": 9610 }, { "epoch": 0.24678353000460584, "grad_norm": 0.7109375, "learning_rate": 0.00018824002191119123, "loss": 0.8601, "step": 9611 }, { "epoch": 0.24680920720052765, "grad_norm": 0.78515625, "learning_rate": 0.00018823792141825266, "loss": 0.9481, "step": 9612 }, { "epoch": 0.2468348843964495, "grad_norm": 0.82421875, "learning_rate": 0.00018823582074946392, "loss": 1.0162, "step": 9613 }, { "epoch": 0.2468605615923713, "grad_norm": 0.796875, "learning_rate": 0.0001882337199048291, "loss": 1.0805, "step": 9614 }, { "epoch": 0.24688623878829313, "grad_norm": 0.828125, "learning_rate": 0.00018823161888435248, "loss": 1.0955, "step": 9615 }, { "epoch": 0.24691191598421494, "grad_norm": 0.83203125, "learning_rate": 0.00018822951768803823, "loss": 1.0461, "step": 9616 }, { "epoch": 0.24693759318013675, "grad_norm": 0.8671875, "learning_rate": 0.0001882274163158905, "loss": 0.9451, "step": 9617 }, { "epoch": 0.24696327037605859, "grad_norm": 0.8125, "learning_rate": 0.00018822531476791348, "loss": 0.9878, "step": 9618 }, { "epoch": 0.2469889475719804, "grad_norm": 0.78515625, "learning_rate": 0.0001882232130441114, "loss": 0.8506, "step": 9619 }, { "epoch": 0.24701462476790223, "grad_norm": 0.83984375, "learning_rate": 0.00018822111114448844, "loss": 0.9549, "step": 9620 }, { "epoch": 0.24704030196382404, "grad_norm": 0.80859375, "learning_rate": 0.00018821900906904873, "loss": 0.939, "step": 9621 }, { "epoch": 0.24706597915974585, "grad_norm": 0.8046875, "learning_rate": 0.00018821690681779653, "loss": 1.0325, "step": 9622 }, { "epoch": 0.24709165635566768, "grad_norm": 0.875, "learning_rate": 0.000188214804390736, "loss": 1.0175, "step": 9623 }, { "epoch": 0.2471173335515895, "grad_norm": 0.87109375, "learning_rate": 0.00018821270178787133, "loss": 1.1274, "step": 9624 }, { "epoch": 0.24714301074751133, "grad_norm": 0.9375, "learning_rate": 0.00018821059900920673, "loss": 0.9652, "step": 9625 }, { "epoch": 0.24716868794343314, "grad_norm": 0.76171875, "learning_rate": 0.00018820849605474636, "loss": 1.0885, "step": 9626 }, { "epoch": 0.24719436513935494, "grad_norm": 0.8359375, "learning_rate": 0.00018820639292449443, "loss": 1.1086, "step": 9627 }, { "epoch": 0.24722004233527678, "grad_norm": 0.8046875, "learning_rate": 0.00018820428961845514, "loss": 0.9653, "step": 9628 }, { "epoch": 0.2472457195311986, "grad_norm": 0.78515625, "learning_rate": 0.00018820218613663264, "loss": 0.9493, "step": 9629 }, { "epoch": 0.24727139672712042, "grad_norm": 0.8046875, "learning_rate": 0.0001882000824790312, "loss": 1.0332, "step": 9630 }, { "epoch": 0.24729707392304223, "grad_norm": 0.7578125, "learning_rate": 0.00018819797864565489, "loss": 0.9475, "step": 9631 }, { "epoch": 0.24732275111896404, "grad_norm": 0.7734375, "learning_rate": 0.00018819587463650802, "loss": 0.947, "step": 9632 }, { "epoch": 0.24734842831488588, "grad_norm": 0.875, "learning_rate": 0.0001881937704515947, "loss": 0.9714, "step": 9633 }, { "epoch": 0.24737410551080768, "grad_norm": 0.8046875, "learning_rate": 0.00018819166609091923, "loss": 1.0701, "step": 9634 }, { "epoch": 0.24739978270672952, "grad_norm": 0.7421875, "learning_rate": 0.00018818956155448568, "loss": 0.8772, "step": 9635 }, { "epoch": 0.24742545990265133, "grad_norm": 0.7578125, "learning_rate": 0.00018818745684229832, "loss": 0.9685, "step": 9636 }, { "epoch": 0.24745113709857314, "grad_norm": 0.734375, "learning_rate": 0.0001881853519543613, "loss": 0.9557, "step": 9637 }, { "epoch": 0.24747681429449497, "grad_norm": 0.8203125, "learning_rate": 0.00018818324689067888, "loss": 1.1557, "step": 9638 }, { "epoch": 0.24750249149041678, "grad_norm": 0.84375, "learning_rate": 0.0001881811416512552, "loss": 1.122, "step": 9639 }, { "epoch": 0.24752816868633862, "grad_norm": 0.8203125, "learning_rate": 0.00018817903623609446, "loss": 1.0058, "step": 9640 }, { "epoch": 0.24755384588226043, "grad_norm": 0.796875, "learning_rate": 0.00018817693064520087, "loss": 1.0096, "step": 9641 }, { "epoch": 0.24757952307818223, "grad_norm": 0.73828125, "learning_rate": 0.00018817482487857863, "loss": 0.8591, "step": 9642 }, { "epoch": 0.24760520027410407, "grad_norm": 0.81640625, "learning_rate": 0.00018817271893623193, "loss": 0.9895, "step": 9643 }, { "epoch": 0.24763087747002588, "grad_norm": 0.80078125, "learning_rate": 0.00018817061281816496, "loss": 1.0174, "step": 9644 }, { "epoch": 0.24765655466594771, "grad_norm": 0.80859375, "learning_rate": 0.0001881685065243819, "loss": 1.0823, "step": 9645 }, { "epoch": 0.24768223186186952, "grad_norm": 0.796875, "learning_rate": 0.000188166400054887, "loss": 0.9735, "step": 9646 }, { "epoch": 0.24770790905779133, "grad_norm": 0.77734375, "learning_rate": 0.00018816429340968443, "loss": 0.9562, "step": 9647 }, { "epoch": 0.24773358625371317, "grad_norm": 0.75, "learning_rate": 0.00018816218658877838, "loss": 1.054, "step": 9648 }, { "epoch": 0.24775926344963498, "grad_norm": 0.84375, "learning_rate": 0.00018816007959217306, "loss": 0.9954, "step": 9649 }, { "epoch": 0.2477849406455568, "grad_norm": 0.87109375, "learning_rate": 0.00018815797241987265, "loss": 1.049, "step": 9650 }, { "epoch": 0.24781061784147862, "grad_norm": 0.76171875, "learning_rate": 0.00018815586507188136, "loss": 0.9167, "step": 9651 }, { "epoch": 0.24783629503740043, "grad_norm": 0.7578125, "learning_rate": 0.00018815375754820343, "loss": 0.8948, "step": 9652 }, { "epoch": 0.24786197223332226, "grad_norm": 0.7734375, "learning_rate": 0.00018815164984884299, "loss": 0.9098, "step": 9653 }, { "epoch": 0.24788764942924407, "grad_norm": 0.74609375, "learning_rate": 0.00018814954197380427, "loss": 1.091, "step": 9654 }, { "epoch": 0.2479133266251659, "grad_norm": 0.81640625, "learning_rate": 0.00018814743392309152, "loss": 1.0604, "step": 9655 }, { "epoch": 0.24793900382108772, "grad_norm": 0.80078125, "learning_rate": 0.00018814532569670886, "loss": 1.0216, "step": 9656 }, { "epoch": 0.24796468101700953, "grad_norm": 0.90625, "learning_rate": 0.00018814321729466053, "loss": 1.0278, "step": 9657 }, { "epoch": 0.24799035821293136, "grad_norm": 0.96875, "learning_rate": 0.0001881411087169507, "loss": 0.9357, "step": 9658 }, { "epoch": 0.24801603540885317, "grad_norm": 0.85546875, "learning_rate": 0.0001881389999635837, "loss": 1.0753, "step": 9659 }, { "epoch": 0.248041712604775, "grad_norm": 0.74609375, "learning_rate": 0.00018813689103456356, "loss": 0.8616, "step": 9660 }, { "epoch": 0.2480673898006968, "grad_norm": 0.78125, "learning_rate": 0.00018813478192989454, "loss": 0.9921, "step": 9661 }, { "epoch": 0.24809306699661862, "grad_norm": 0.828125, "learning_rate": 0.0001881326726495809, "loss": 0.9819, "step": 9662 }, { "epoch": 0.24811874419254046, "grad_norm": 0.81640625, "learning_rate": 0.0001881305631936268, "loss": 1.0172, "step": 9663 }, { "epoch": 0.24814442138846227, "grad_norm": 0.80859375, "learning_rate": 0.00018812845356203645, "loss": 0.9634, "step": 9664 }, { "epoch": 0.2481700985843841, "grad_norm": 0.75390625, "learning_rate": 0.00018812634375481405, "loss": 1.1094, "step": 9665 }, { "epoch": 0.2481957757803059, "grad_norm": 0.80859375, "learning_rate": 0.0001881242337719638, "loss": 1.0173, "step": 9666 }, { "epoch": 0.24822145297622772, "grad_norm": 0.75, "learning_rate": 0.0001881221236134899, "loss": 1.0328, "step": 9667 }, { "epoch": 0.24824713017214955, "grad_norm": 0.82421875, "learning_rate": 0.00018812001327939658, "loss": 0.9158, "step": 9668 }, { "epoch": 0.24827280736807136, "grad_norm": 0.80859375, "learning_rate": 0.00018811790276968807, "loss": 1.0145, "step": 9669 }, { "epoch": 0.2482984845639932, "grad_norm": 0.8203125, "learning_rate": 0.00018811579208436848, "loss": 1.1827, "step": 9670 }, { "epoch": 0.248324161759915, "grad_norm": 0.75, "learning_rate": 0.0001881136812234421, "loss": 0.9668, "step": 9671 }, { "epoch": 0.24834983895583682, "grad_norm": 0.76953125, "learning_rate": 0.00018811157018691312, "loss": 0.9346, "step": 9672 }, { "epoch": 0.24837551615175865, "grad_norm": 0.7734375, "learning_rate": 0.00018810945897478575, "loss": 1.036, "step": 9673 }, { "epoch": 0.24840119334768046, "grad_norm": 0.859375, "learning_rate": 0.00018810734758706413, "loss": 0.931, "step": 9674 }, { "epoch": 0.2484268705436023, "grad_norm": 0.80859375, "learning_rate": 0.00018810523602375261, "loss": 0.8756, "step": 9675 }, { "epoch": 0.2484525477395241, "grad_norm": 0.765625, "learning_rate": 0.00018810312428485526, "loss": 1.1018, "step": 9676 }, { "epoch": 0.2484782249354459, "grad_norm": 0.8125, "learning_rate": 0.00018810101237037635, "loss": 1.0338, "step": 9677 }, { "epoch": 0.24850390213136775, "grad_norm": 0.84375, "learning_rate": 0.0001880989002803201, "loss": 1.0081, "step": 9678 }, { "epoch": 0.24852957932728956, "grad_norm": 0.8828125, "learning_rate": 0.00018809678801469067, "loss": 1.0408, "step": 9679 }, { "epoch": 0.2485552565232114, "grad_norm": 0.89453125, "learning_rate": 0.0001880946755734923, "loss": 1.1428, "step": 9680 }, { "epoch": 0.2485809337191332, "grad_norm": 0.84375, "learning_rate": 0.00018809256295672922, "loss": 1.1286, "step": 9681 }, { "epoch": 0.248606610915055, "grad_norm": 0.89453125, "learning_rate": 0.00018809045016440559, "loss": 1.1423, "step": 9682 }, { "epoch": 0.24863228811097685, "grad_norm": 0.80078125, "learning_rate": 0.00018808833719652569, "loss": 0.9347, "step": 9683 }, { "epoch": 0.24865796530689865, "grad_norm": 0.859375, "learning_rate": 0.00018808622405309367, "loss": 1.1336, "step": 9684 }, { "epoch": 0.2486836425028205, "grad_norm": 0.8203125, "learning_rate": 0.00018808411073411376, "loss": 0.9477, "step": 9685 }, { "epoch": 0.2487093196987423, "grad_norm": 0.89453125, "learning_rate": 0.00018808199723959016, "loss": 1.1103, "step": 9686 }, { "epoch": 0.2487349968946641, "grad_norm": 0.79296875, "learning_rate": 0.00018807988356952713, "loss": 1.0947, "step": 9687 }, { "epoch": 0.24876067409058594, "grad_norm": 0.75390625, "learning_rate": 0.00018807776972392884, "loss": 0.9712, "step": 9688 }, { "epoch": 0.24878635128650775, "grad_norm": 0.7890625, "learning_rate": 0.0001880756557027995, "loss": 0.9829, "step": 9689 }, { "epoch": 0.2488120284824296, "grad_norm": 0.8046875, "learning_rate": 0.0001880735415061433, "loss": 1.0289, "step": 9690 }, { "epoch": 0.2488377056783514, "grad_norm": 0.828125, "learning_rate": 0.00018807142713396453, "loss": 1.2071, "step": 9691 }, { "epoch": 0.2488633828742732, "grad_norm": 0.828125, "learning_rate": 0.00018806931258626735, "loss": 1.0226, "step": 9692 }, { "epoch": 0.24888906007019504, "grad_norm": 0.74609375, "learning_rate": 0.000188067197863056, "loss": 1.0458, "step": 9693 }, { "epoch": 0.24891473726611685, "grad_norm": 0.6875, "learning_rate": 0.00018806508296433468, "loss": 0.9462, "step": 9694 }, { "epoch": 0.24894041446203868, "grad_norm": 0.7734375, "learning_rate": 0.00018806296789010757, "loss": 0.9249, "step": 9695 }, { "epoch": 0.2489660916579605, "grad_norm": 0.7265625, "learning_rate": 0.00018806085264037897, "loss": 0.9201, "step": 9696 }, { "epoch": 0.2489917688538823, "grad_norm": 0.83984375, "learning_rate": 0.000188058737215153, "loss": 1.0811, "step": 9697 }, { "epoch": 0.24901744604980414, "grad_norm": 0.75, "learning_rate": 0.00018805662161443395, "loss": 1.0286, "step": 9698 }, { "epoch": 0.24904312324572594, "grad_norm": 0.7734375, "learning_rate": 0.000188054505838226, "loss": 0.9939, "step": 9699 }, { "epoch": 0.24906880044164778, "grad_norm": 0.86328125, "learning_rate": 0.00018805238988653334, "loss": 1.0701, "step": 9700 }, { "epoch": 0.2490944776375696, "grad_norm": 0.828125, "learning_rate": 0.00018805027375936025, "loss": 0.8844, "step": 9701 }, { "epoch": 0.2491201548334914, "grad_norm": 0.77734375, "learning_rate": 0.00018804815745671096, "loss": 0.9426, "step": 9702 }, { "epoch": 0.24914583202941323, "grad_norm": 0.7578125, "learning_rate": 0.0001880460409785896, "loss": 0.9583, "step": 9703 }, { "epoch": 0.24917150922533504, "grad_norm": 0.84375, "learning_rate": 0.00018804392432500043, "loss": 1.0701, "step": 9704 }, { "epoch": 0.24919718642125688, "grad_norm": 0.7734375, "learning_rate": 0.00018804180749594767, "loss": 1.0606, "step": 9705 }, { "epoch": 0.24922286361717869, "grad_norm": 0.82421875, "learning_rate": 0.00018803969049143557, "loss": 1.0024, "step": 9706 }, { "epoch": 0.2492485408131005, "grad_norm": 0.80078125, "learning_rate": 0.00018803757331146826, "loss": 1.1159, "step": 9707 }, { "epoch": 0.24927421800902233, "grad_norm": 0.75, "learning_rate": 0.00018803545595605007, "loss": 0.8478, "step": 9708 }, { "epoch": 0.24929989520494414, "grad_norm": 0.8984375, "learning_rate": 0.00018803333842518517, "loss": 0.9984, "step": 9709 }, { "epoch": 0.24932557240086597, "grad_norm": 0.87890625, "learning_rate": 0.0001880312207188778, "loss": 0.9986, "step": 9710 }, { "epoch": 0.24935124959678778, "grad_norm": 0.828125, "learning_rate": 0.0001880291028371321, "loss": 1.1408, "step": 9711 }, { "epoch": 0.2493769267927096, "grad_norm": 0.765625, "learning_rate": 0.00018802698477995237, "loss": 1.0782, "step": 9712 }, { "epoch": 0.24940260398863143, "grad_norm": 0.7421875, "learning_rate": 0.00018802486654734282, "loss": 1.058, "step": 9713 }, { "epoch": 0.24942828118455324, "grad_norm": 0.8359375, "learning_rate": 0.00018802274813930765, "loss": 0.9473, "step": 9714 }, { "epoch": 0.24945395838047507, "grad_norm": 0.953125, "learning_rate": 0.0001880206295558511, "loss": 1.0361, "step": 9715 }, { "epoch": 0.24947963557639688, "grad_norm": 0.7578125, "learning_rate": 0.0001880185107969774, "loss": 0.9757, "step": 9716 }, { "epoch": 0.2495053127723187, "grad_norm": 0.73828125, "learning_rate": 0.00018801639186269073, "loss": 0.9407, "step": 9717 }, { "epoch": 0.24953098996824052, "grad_norm": 0.90234375, "learning_rate": 0.00018801427275299537, "loss": 1.2329, "step": 9718 }, { "epoch": 0.24955666716416233, "grad_norm": 0.8046875, "learning_rate": 0.0001880121534678955, "loss": 0.9391, "step": 9719 }, { "epoch": 0.24958234436008417, "grad_norm": 0.734375, "learning_rate": 0.00018801003400739535, "loss": 1.0891, "step": 9720 }, { "epoch": 0.24960802155600598, "grad_norm": 0.8515625, "learning_rate": 0.0001880079143714992, "loss": 0.8781, "step": 9721 }, { "epoch": 0.24963369875192779, "grad_norm": 0.796875, "learning_rate": 0.00018800579456021118, "loss": 1.1202, "step": 9722 }, { "epoch": 0.24965937594784962, "grad_norm": 0.80078125, "learning_rate": 0.00018800367457353555, "loss": 0.9993, "step": 9723 }, { "epoch": 0.24968505314377143, "grad_norm": 0.76171875, "learning_rate": 0.00018800155441147656, "loss": 1.0889, "step": 9724 }, { "epoch": 0.24971073033969327, "grad_norm": 0.80078125, "learning_rate": 0.00018799943407403843, "loss": 0.9625, "step": 9725 }, { "epoch": 0.24973640753561507, "grad_norm": 0.82421875, "learning_rate": 0.00018799731356122537, "loss": 1.135, "step": 9726 }, { "epoch": 0.24976208473153688, "grad_norm": 0.796875, "learning_rate": 0.00018799519287304164, "loss": 1.1588, "step": 9727 }, { "epoch": 0.24978776192745872, "grad_norm": 0.8671875, "learning_rate": 0.0001879930720094914, "loss": 1.087, "step": 9728 }, { "epoch": 0.24981343912338053, "grad_norm": 0.7890625, "learning_rate": 0.00018799095097057892, "loss": 1.0623, "step": 9729 }, { "epoch": 0.24983911631930236, "grad_norm": 0.859375, "learning_rate": 0.00018798882975630844, "loss": 1.1, "step": 9730 }, { "epoch": 0.24986479351522417, "grad_norm": 0.76171875, "learning_rate": 0.00018798670836668417, "loss": 0.9448, "step": 9731 }, { "epoch": 0.24989047071114598, "grad_norm": 0.7890625, "learning_rate": 0.00018798458680171034, "loss": 1.0154, "step": 9732 }, { "epoch": 0.24991614790706781, "grad_norm": 0.82421875, "learning_rate": 0.00018798246506139115, "loss": 1.0345, "step": 9733 }, { "epoch": 0.24994182510298962, "grad_norm": 0.8359375, "learning_rate": 0.0001879803431457309, "loss": 0.9713, "step": 9734 }, { "epoch": 0.24996750229891146, "grad_norm": 0.81640625, "learning_rate": 0.00018797822105473373, "loss": 1.031, "step": 9735 }, { "epoch": 0.24999317949483327, "grad_norm": 0.7734375, "learning_rate": 0.0001879760987884039, "loss": 1.1076, "step": 9736 }, { "epoch": 0.2500188566907551, "grad_norm": 0.8203125, "learning_rate": 0.00018797397634674572, "loss": 1.0293, "step": 9737 }, { "epoch": 0.2500445338866769, "grad_norm": 0.8125, "learning_rate": 0.0001879718537297633, "loss": 0.9958, "step": 9738 }, { "epoch": 0.25007021108259875, "grad_norm": 0.7734375, "learning_rate": 0.00018796973093746094, "loss": 0.956, "step": 9739 }, { "epoch": 0.25009588827852053, "grad_norm": 0.75390625, "learning_rate": 0.00018796760796984286, "loss": 1.0231, "step": 9740 }, { "epoch": 0.25012156547444236, "grad_norm": 0.796875, "learning_rate": 0.00018796548482691327, "loss": 1.2034, "step": 9741 }, { "epoch": 0.2501472426703642, "grad_norm": 0.78125, "learning_rate": 0.0001879633615086764, "loss": 0.9597, "step": 9742 }, { "epoch": 0.250172919866286, "grad_norm": 0.9296875, "learning_rate": 0.00018796123801513652, "loss": 0.9346, "step": 9743 }, { "epoch": 0.2501985970622078, "grad_norm": 0.7890625, "learning_rate": 0.00018795911434629786, "loss": 0.8685, "step": 9744 }, { "epoch": 0.25022427425812965, "grad_norm": 0.75390625, "learning_rate": 0.00018795699050216458, "loss": 0.8863, "step": 9745 }, { "epoch": 0.25024995145405143, "grad_norm": 0.79296875, "learning_rate": 0.000187954866482741, "loss": 1.0397, "step": 9746 }, { "epoch": 0.25027562864997327, "grad_norm": 0.82421875, "learning_rate": 0.00018795274228803125, "loss": 0.9008, "step": 9747 }, { "epoch": 0.2503013058458951, "grad_norm": 0.87109375, "learning_rate": 0.0001879506179180397, "loss": 0.991, "step": 9748 }, { "epoch": 0.25032698304181694, "grad_norm": 0.83203125, "learning_rate": 0.0001879484933727705, "loss": 1.2055, "step": 9749 }, { "epoch": 0.2503526602377387, "grad_norm": 0.84765625, "learning_rate": 0.0001879463686522279, "loss": 0.9763, "step": 9750 }, { "epoch": 0.25037833743366056, "grad_norm": 0.82421875, "learning_rate": 0.0001879442437564161, "loss": 0.9438, "step": 9751 }, { "epoch": 0.2504040146295824, "grad_norm": 0.7890625, "learning_rate": 0.00018794211868533939, "loss": 1.0117, "step": 9752 }, { "epoch": 0.2504296918255042, "grad_norm": 0.7578125, "learning_rate": 0.000187939993439002, "loss": 1.0377, "step": 9753 }, { "epoch": 0.250455369021426, "grad_norm": 0.75, "learning_rate": 0.00018793786801740811, "loss": 0.8117, "step": 9754 }, { "epoch": 0.25048104621734785, "grad_norm": 0.8203125, "learning_rate": 0.000187935742420562, "loss": 1.1357, "step": 9755 }, { "epoch": 0.2505067234132696, "grad_norm": 0.83984375, "learning_rate": 0.0001879336166484679, "loss": 1.013, "step": 9756 }, { "epoch": 0.25053240060919146, "grad_norm": 0.84375, "learning_rate": 0.00018793149070113008, "loss": 0.9918, "step": 9757 }, { "epoch": 0.2505580778051133, "grad_norm": 0.78515625, "learning_rate": 0.00018792936457855271, "loss": 0.8833, "step": 9758 }, { "epoch": 0.25058375500103514, "grad_norm": 0.73828125, "learning_rate": 0.00018792723828074003, "loss": 0.987, "step": 9759 }, { "epoch": 0.2506094321969569, "grad_norm": 0.82421875, "learning_rate": 0.00018792511180769635, "loss": 1.0255, "step": 9760 }, { "epoch": 0.25063510939287875, "grad_norm": 0.8125, "learning_rate": 0.00018792298515942586, "loss": 0.9939, "step": 9761 }, { "epoch": 0.2506607865888006, "grad_norm": 0.734375, "learning_rate": 0.00018792085833593277, "loss": 1.0954, "step": 9762 }, { "epoch": 0.25068646378472237, "grad_norm": 0.8515625, "learning_rate": 0.00018791873133722139, "loss": 1.063, "step": 9763 }, { "epoch": 0.2507121409806442, "grad_norm": 0.82421875, "learning_rate": 0.0001879166041632959, "loss": 1.079, "step": 9764 }, { "epoch": 0.25073781817656604, "grad_norm": 0.75390625, "learning_rate": 0.00018791447681416057, "loss": 0.9969, "step": 9765 }, { "epoch": 0.2507634953724878, "grad_norm": 0.9296875, "learning_rate": 0.00018791234928981966, "loss": 1.0533, "step": 9766 }, { "epoch": 0.25078917256840966, "grad_norm": 0.7890625, "learning_rate": 0.0001879102215902773, "loss": 1.1205, "step": 9767 }, { "epoch": 0.2508148497643315, "grad_norm": 0.80078125, "learning_rate": 0.00018790809371553788, "loss": 0.9953, "step": 9768 }, { "epoch": 0.25084052696025333, "grad_norm": 0.80859375, "learning_rate": 0.00018790596566560551, "loss": 0.9519, "step": 9769 }, { "epoch": 0.2508662041561751, "grad_norm": 0.85546875, "learning_rate": 0.00018790383744048453, "loss": 1.0314, "step": 9770 }, { "epoch": 0.25089188135209695, "grad_norm": 1.03125, "learning_rate": 0.00018790170904017917, "loss": 0.8834, "step": 9771 }, { "epoch": 0.2509175585480188, "grad_norm": 0.84375, "learning_rate": 0.00018789958046469362, "loss": 1.1177, "step": 9772 }, { "epoch": 0.25094323574394056, "grad_norm": 0.80078125, "learning_rate": 0.0001878974517140321, "loss": 1.0656, "step": 9773 }, { "epoch": 0.2509689129398624, "grad_norm": 0.7890625, "learning_rate": 0.00018789532278819894, "loss": 1.0262, "step": 9774 }, { "epoch": 0.25099459013578423, "grad_norm": 0.87890625, "learning_rate": 0.00018789319368719834, "loss": 1.0092, "step": 9775 }, { "epoch": 0.251020267331706, "grad_norm": 0.78515625, "learning_rate": 0.00018789106441103454, "loss": 0.9744, "step": 9776 }, { "epoch": 0.25104594452762785, "grad_norm": 0.77734375, "learning_rate": 0.0001878889349597118, "loss": 0.9474, "step": 9777 }, { "epoch": 0.2510716217235497, "grad_norm": 0.83984375, "learning_rate": 0.00018788680533323433, "loss": 1.0383, "step": 9778 }, { "epoch": 0.2510972989194715, "grad_norm": 0.8515625, "learning_rate": 0.0001878846755316064, "loss": 1.0744, "step": 9779 }, { "epoch": 0.2511229761153933, "grad_norm": 0.8359375, "learning_rate": 0.00018788254555483227, "loss": 1.1897, "step": 9780 }, { "epoch": 0.25114865331131514, "grad_norm": 0.84375, "learning_rate": 0.00018788041540291614, "loss": 1.1429, "step": 9781 }, { "epoch": 0.251174330507237, "grad_norm": 0.85546875, "learning_rate": 0.0001878782850758623, "loss": 1.0292, "step": 9782 }, { "epoch": 0.25120000770315876, "grad_norm": 0.78515625, "learning_rate": 0.00018787615457367495, "loss": 0.877, "step": 9783 }, { "epoch": 0.2512256848990806, "grad_norm": 0.78515625, "learning_rate": 0.00018787402389635837, "loss": 1.0546, "step": 9784 }, { "epoch": 0.25125136209500243, "grad_norm": 0.734375, "learning_rate": 0.0001878718930439168, "loss": 0.958, "step": 9785 }, { "epoch": 0.2512770392909242, "grad_norm": 0.8125, "learning_rate": 0.00018786976201635452, "loss": 1.1842, "step": 9786 }, { "epoch": 0.25130271648684605, "grad_norm": 0.8359375, "learning_rate": 0.00018786763081367572, "loss": 1.1295, "step": 9787 }, { "epoch": 0.2513283936827679, "grad_norm": 0.8359375, "learning_rate": 0.00018786549943588466, "loss": 1.0747, "step": 9788 }, { "epoch": 0.2513540708786897, "grad_norm": 0.8203125, "learning_rate": 0.00018786336788298558, "loss": 0.9296, "step": 9789 }, { "epoch": 0.2513797480746115, "grad_norm": 0.78125, "learning_rate": 0.00018786123615498277, "loss": 1.0313, "step": 9790 }, { "epoch": 0.25140542527053333, "grad_norm": 0.828125, "learning_rate": 0.00018785910425188045, "loss": 0.9914, "step": 9791 }, { "epoch": 0.25143110246645517, "grad_norm": 0.76171875, "learning_rate": 0.0001878569721736829, "loss": 0.9671, "step": 9792 }, { "epoch": 0.25145677966237695, "grad_norm": 2.359375, "learning_rate": 0.00018785483992039427, "loss": 0.9881, "step": 9793 }, { "epoch": 0.2514824568582988, "grad_norm": 0.8515625, "learning_rate": 0.00018785270749201893, "loss": 0.975, "step": 9794 }, { "epoch": 0.2515081340542206, "grad_norm": 0.8671875, "learning_rate": 0.00018785057488856106, "loss": 1.1925, "step": 9795 }, { "epoch": 0.2515338112501424, "grad_norm": 0.8203125, "learning_rate": 0.00018784844211002492, "loss": 1.0462, "step": 9796 }, { "epoch": 0.25155948844606424, "grad_norm": 0.79296875, "learning_rate": 0.0001878463091564148, "loss": 0.9663, "step": 9797 }, { "epoch": 0.2515851656419861, "grad_norm": 0.8359375, "learning_rate": 0.00018784417602773492, "loss": 0.9284, "step": 9798 }, { "epoch": 0.2516108428379079, "grad_norm": 0.75, "learning_rate": 0.00018784204272398952, "loss": 1.0372, "step": 9799 }, { "epoch": 0.2516365200338297, "grad_norm": 0.80078125, "learning_rate": 0.00018783990924518284, "loss": 0.9461, "step": 9800 }, { "epoch": 0.2516621972297515, "grad_norm": 0.78515625, "learning_rate": 0.0001878377755913192, "loss": 0.991, "step": 9801 }, { "epoch": 0.25168787442567336, "grad_norm": 0.82421875, "learning_rate": 0.00018783564176240276, "loss": 0.9688, "step": 9802 }, { "epoch": 0.25171355162159514, "grad_norm": 0.796875, "learning_rate": 0.00018783350775843788, "loss": 1.0847, "step": 9803 }, { "epoch": 0.251739228817517, "grad_norm": 0.77734375, "learning_rate": 0.00018783137357942872, "loss": 1.1411, "step": 9804 }, { "epoch": 0.2517649060134388, "grad_norm": 0.78125, "learning_rate": 0.00018782923922537953, "loss": 1.1024, "step": 9805 }, { "epoch": 0.2517905832093606, "grad_norm": 0.77734375, "learning_rate": 0.00018782710469629466, "loss": 0.9315, "step": 9806 }, { "epoch": 0.25181626040528243, "grad_norm": 0.765625, "learning_rate": 0.00018782496999217828, "loss": 0.9342, "step": 9807 }, { "epoch": 0.25184193760120427, "grad_norm": 0.76171875, "learning_rate": 0.00018782283511303467, "loss": 0.9844, "step": 9808 }, { "epoch": 0.2518676147971261, "grad_norm": 0.75390625, "learning_rate": 0.00018782070005886808, "loss": 1.0318, "step": 9809 }, { "epoch": 0.2518932919930479, "grad_norm": 0.83984375, "learning_rate": 0.00018781856482968275, "loss": 0.9367, "step": 9810 }, { "epoch": 0.2519189691889697, "grad_norm": 0.82421875, "learning_rate": 0.00018781642942548299, "loss": 1.0682, "step": 9811 }, { "epoch": 0.25194464638489156, "grad_norm": 0.765625, "learning_rate": 0.00018781429384627299, "loss": 0.8754, "step": 9812 }, { "epoch": 0.25197032358081334, "grad_norm": 0.73828125, "learning_rate": 0.00018781215809205706, "loss": 0.9933, "step": 9813 }, { "epoch": 0.2519960007767352, "grad_norm": 0.89453125, "learning_rate": 0.0001878100221628394, "loss": 1.0704, "step": 9814 }, { "epoch": 0.252021677972657, "grad_norm": 1.0390625, "learning_rate": 0.00018780788605862433, "loss": 0.9193, "step": 9815 }, { "epoch": 0.2520473551685788, "grad_norm": 0.89453125, "learning_rate": 0.00018780574977941603, "loss": 1.0412, "step": 9816 }, { "epoch": 0.2520730323645006, "grad_norm": 0.83203125, "learning_rate": 0.00018780361332521882, "loss": 0.9685, "step": 9817 }, { "epoch": 0.25209870956042246, "grad_norm": 0.74609375, "learning_rate": 0.00018780147669603697, "loss": 1.0938, "step": 9818 }, { "epoch": 0.2521243867563443, "grad_norm": 0.78125, "learning_rate": 0.00018779933989187468, "loss": 0.974, "step": 9819 }, { "epoch": 0.2521500639522661, "grad_norm": 0.83984375, "learning_rate": 0.00018779720291273625, "loss": 0.9517, "step": 9820 }, { "epoch": 0.2521757411481879, "grad_norm": 0.828125, "learning_rate": 0.0001877950657586259, "loss": 1.0417, "step": 9821 }, { "epoch": 0.25220141834410975, "grad_norm": 0.84765625, "learning_rate": 0.00018779292842954797, "loss": 1.0586, "step": 9822 }, { "epoch": 0.25222709554003153, "grad_norm": 0.87890625, "learning_rate": 0.0001877907909255066, "loss": 1.1472, "step": 9823 }, { "epoch": 0.25225277273595337, "grad_norm": 0.7734375, "learning_rate": 0.00018778865324650616, "loss": 0.8691, "step": 9824 }, { "epoch": 0.2522784499318752, "grad_norm": 0.82421875, "learning_rate": 0.00018778651539255083, "loss": 1.0702, "step": 9825 }, { "epoch": 0.252304127127797, "grad_norm": 0.81640625, "learning_rate": 0.00018778437736364492, "loss": 1.0675, "step": 9826 }, { "epoch": 0.2523298043237188, "grad_norm": 0.83984375, "learning_rate": 0.00018778223915979266, "loss": 1.1334, "step": 9827 }, { "epoch": 0.25235548151964066, "grad_norm": 0.78125, "learning_rate": 0.00018778010078099837, "loss": 1.1458, "step": 9828 }, { "epoch": 0.2523811587155625, "grad_norm": 0.80859375, "learning_rate": 0.00018777796222726624, "loss": 1.0475, "step": 9829 }, { "epoch": 0.2524068359114843, "grad_norm": 0.69140625, "learning_rate": 0.00018777582349860055, "loss": 0.8863, "step": 9830 }, { "epoch": 0.2524325131074061, "grad_norm": 0.76953125, "learning_rate": 0.0001877736845950056, "loss": 0.8817, "step": 9831 }, { "epoch": 0.25245819030332795, "grad_norm": 0.890625, "learning_rate": 0.0001877715455164856, "loss": 1.0075, "step": 9832 }, { "epoch": 0.2524838674992497, "grad_norm": 0.79296875, "learning_rate": 0.00018776940626304484, "loss": 0.9234, "step": 9833 }, { "epoch": 0.25250954469517156, "grad_norm": 0.90234375, "learning_rate": 0.00018776726683468757, "loss": 0.8776, "step": 9834 }, { "epoch": 0.2525352218910934, "grad_norm": 0.7890625, "learning_rate": 0.0001877651272314181, "loss": 1.0011, "step": 9835 }, { "epoch": 0.2525608990870152, "grad_norm": 0.7421875, "learning_rate": 0.00018776298745324063, "loss": 1.0996, "step": 9836 }, { "epoch": 0.252586576282937, "grad_norm": 0.76171875, "learning_rate": 0.00018776084750015947, "loss": 0.9409, "step": 9837 }, { "epoch": 0.25261225347885885, "grad_norm": 0.734375, "learning_rate": 0.0001877587073721789, "loss": 0.98, "step": 9838 }, { "epoch": 0.2526379306747807, "grad_norm": 0.87890625, "learning_rate": 0.00018775656706930308, "loss": 1.0237, "step": 9839 }, { "epoch": 0.25266360787070247, "grad_norm": 0.80078125, "learning_rate": 0.0001877544265915364, "loss": 1.0552, "step": 9840 }, { "epoch": 0.2526892850666243, "grad_norm": 0.73828125, "learning_rate": 0.00018775228593888305, "loss": 0.915, "step": 9841 }, { "epoch": 0.25271496226254614, "grad_norm": 0.73828125, "learning_rate": 0.00018775014511134735, "loss": 0.9096, "step": 9842 }, { "epoch": 0.2527406394584679, "grad_norm": 0.84375, "learning_rate": 0.0001877480041089335, "loss": 1.0984, "step": 9843 }, { "epoch": 0.25276631665438976, "grad_norm": 0.85546875, "learning_rate": 0.00018774586293164586, "loss": 0.9835, "step": 9844 }, { "epoch": 0.2527919938503116, "grad_norm": 0.828125, "learning_rate": 0.00018774372157948855, "loss": 1.1745, "step": 9845 }, { "epoch": 0.25281767104623337, "grad_norm": 0.89453125, "learning_rate": 0.000187741580052466, "loss": 1.0247, "step": 9846 }, { "epoch": 0.2528433482421552, "grad_norm": 0.8671875, "learning_rate": 0.0001877394383505824, "loss": 0.9888, "step": 9847 }, { "epoch": 0.25286902543807704, "grad_norm": 0.7265625, "learning_rate": 0.00018773729647384203, "loss": 0.9757, "step": 9848 }, { "epoch": 0.2528947026339989, "grad_norm": 0.81640625, "learning_rate": 0.00018773515442224912, "loss": 0.9769, "step": 9849 }, { "epoch": 0.25292037982992066, "grad_norm": 0.71484375, "learning_rate": 0.000187733012195808, "loss": 0.9589, "step": 9850 }, { "epoch": 0.2529460570258425, "grad_norm": 0.77734375, "learning_rate": 0.0001877308697945229, "loss": 1.005, "step": 9851 }, { "epoch": 0.25297173422176433, "grad_norm": 0.74609375, "learning_rate": 0.0001877287272183981, "loss": 0.9539, "step": 9852 }, { "epoch": 0.2529974114176861, "grad_norm": 0.73828125, "learning_rate": 0.00018772658446743787, "loss": 0.8123, "step": 9853 }, { "epoch": 0.25302308861360795, "grad_norm": 0.8203125, "learning_rate": 0.00018772444154164651, "loss": 0.935, "step": 9854 }, { "epoch": 0.2530487658095298, "grad_norm": 0.8984375, "learning_rate": 0.0001877222984410282, "loss": 1.1523, "step": 9855 }, { "epoch": 0.25307444300545157, "grad_norm": 0.78125, "learning_rate": 0.00018772015516558735, "loss": 0.9523, "step": 9856 }, { "epoch": 0.2531001202013734, "grad_norm": 0.8125, "learning_rate": 0.0001877180117153281, "loss": 1.0391, "step": 9857 }, { "epoch": 0.25312579739729524, "grad_norm": 0.77734375, "learning_rate": 0.0001877158680902548, "loss": 0.9957, "step": 9858 }, { "epoch": 0.2531514745932171, "grad_norm": 0.77734375, "learning_rate": 0.00018771372429037165, "loss": 1.0054, "step": 9859 }, { "epoch": 0.25317715178913885, "grad_norm": 0.859375, "learning_rate": 0.00018771158031568303, "loss": 0.9878, "step": 9860 }, { "epoch": 0.2532028289850607, "grad_norm": 0.72265625, "learning_rate": 0.00018770943616619313, "loss": 0.8368, "step": 9861 }, { "epoch": 0.2532285061809825, "grad_norm": 0.734375, "learning_rate": 0.00018770729184190626, "loss": 0.9643, "step": 9862 }, { "epoch": 0.2532541833769043, "grad_norm": 0.80078125, "learning_rate": 0.00018770514734282667, "loss": 1.0328, "step": 9863 }, { "epoch": 0.25327986057282614, "grad_norm": 0.96875, "learning_rate": 0.00018770300266895864, "loss": 0.9043, "step": 9864 }, { "epoch": 0.253305537768748, "grad_norm": 0.83984375, "learning_rate": 0.00018770085782030647, "loss": 0.9353, "step": 9865 }, { "epoch": 0.25333121496466976, "grad_norm": 0.78515625, "learning_rate": 0.0001876987127968744, "loss": 0.9206, "step": 9866 }, { "epoch": 0.2533568921605916, "grad_norm": 0.79296875, "learning_rate": 0.0001876965675986667, "loss": 0.9717, "step": 9867 }, { "epoch": 0.25338256935651343, "grad_norm": 0.765625, "learning_rate": 0.00018769442222568765, "loss": 0.9254, "step": 9868 }, { "epoch": 0.25340824655243527, "grad_norm": 0.8359375, "learning_rate": 0.00018769227667794156, "loss": 0.9864, "step": 9869 }, { "epoch": 0.25343392374835705, "grad_norm": 0.83984375, "learning_rate": 0.00018769013095543268, "loss": 1.0602, "step": 9870 }, { "epoch": 0.2534596009442789, "grad_norm": 0.78515625, "learning_rate": 0.00018768798505816532, "loss": 1.0241, "step": 9871 }, { "epoch": 0.2534852781402007, "grad_norm": 0.83984375, "learning_rate": 0.0001876858389861437, "loss": 0.9009, "step": 9872 }, { "epoch": 0.2535109553361225, "grad_norm": 0.9609375, "learning_rate": 0.00018768369273937212, "loss": 0.9365, "step": 9873 }, { "epoch": 0.25353663253204434, "grad_norm": 0.79296875, "learning_rate": 0.00018768154631785487, "loss": 1.0717, "step": 9874 }, { "epoch": 0.2535623097279662, "grad_norm": 0.76171875, "learning_rate": 0.0001876793997215962, "loss": 1.1647, "step": 9875 }, { "epoch": 0.25358798692388795, "grad_norm": 0.78515625, "learning_rate": 0.00018767725295060043, "loss": 1.0488, "step": 9876 }, { "epoch": 0.2536136641198098, "grad_norm": 0.82421875, "learning_rate": 0.0001876751060048718, "loss": 1.0268, "step": 9877 }, { "epoch": 0.2536393413157316, "grad_norm": 0.8125, "learning_rate": 0.0001876729588844146, "loss": 1.0173, "step": 9878 }, { "epoch": 0.25366501851165346, "grad_norm": 0.7421875, "learning_rate": 0.00018767081158923315, "loss": 0.969, "step": 9879 }, { "epoch": 0.25369069570757524, "grad_norm": 0.77734375, "learning_rate": 0.00018766866411933165, "loss": 0.8093, "step": 9880 }, { "epoch": 0.2537163729034971, "grad_norm": 0.8203125, "learning_rate": 0.00018766651647471444, "loss": 1.1007, "step": 9881 }, { "epoch": 0.2537420500994189, "grad_norm": 0.82421875, "learning_rate": 0.0001876643686553858, "loss": 0.9886, "step": 9882 }, { "epoch": 0.2537677272953407, "grad_norm": 0.796875, "learning_rate": 0.00018766222066134996, "loss": 0.915, "step": 9883 }, { "epoch": 0.25379340449126253, "grad_norm": 0.74609375, "learning_rate": 0.00018766007249261123, "loss": 0.8681, "step": 9884 }, { "epoch": 0.25381908168718437, "grad_norm": 0.79296875, "learning_rate": 0.0001876579241491739, "loss": 0.9416, "step": 9885 }, { "epoch": 0.25384475888310615, "grad_norm": 0.77734375, "learning_rate": 0.00018765577563104225, "loss": 0.9391, "step": 9886 }, { "epoch": 0.253870436079028, "grad_norm": 0.78515625, "learning_rate": 0.00018765362693822058, "loss": 1.0944, "step": 9887 }, { "epoch": 0.2538961132749498, "grad_norm": 0.80859375, "learning_rate": 0.0001876514780707131, "loss": 1.0772, "step": 9888 }, { "epoch": 0.25392179047087166, "grad_norm": 0.75, "learning_rate": 0.00018764932902852417, "loss": 0.9547, "step": 9889 }, { "epoch": 0.25394746766679344, "grad_norm": 0.81640625, "learning_rate": 0.00018764717981165804, "loss": 1.0782, "step": 9890 }, { "epoch": 0.25397314486271527, "grad_norm": 0.94140625, "learning_rate": 0.00018764503042011902, "loss": 1.1597, "step": 9891 }, { "epoch": 0.2539988220586371, "grad_norm": 0.7890625, "learning_rate": 0.00018764288085391134, "loss": 0.8958, "step": 9892 }, { "epoch": 0.2540244992545589, "grad_norm": 0.82421875, "learning_rate": 0.00018764073111303932, "loss": 1.0548, "step": 9893 }, { "epoch": 0.2540501764504807, "grad_norm": 0.78125, "learning_rate": 0.00018763858119750726, "loss": 0.9301, "step": 9894 }, { "epoch": 0.25407585364640256, "grad_norm": 1.578125, "learning_rate": 0.00018763643110731942, "loss": 0.9808, "step": 9895 }, { "epoch": 0.25410153084232434, "grad_norm": 0.828125, "learning_rate": 0.00018763428084248008, "loss": 0.8621, "step": 9896 }, { "epoch": 0.2541272080382462, "grad_norm": 0.78515625, "learning_rate": 0.0001876321304029935, "loss": 1.0712, "step": 9897 }, { "epoch": 0.254152885234168, "grad_norm": 0.7890625, "learning_rate": 0.00018762997978886406, "loss": 0.8851, "step": 9898 }, { "epoch": 0.25417856243008985, "grad_norm": 0.83203125, "learning_rate": 0.00018762782900009594, "loss": 1.006, "step": 9899 }, { "epoch": 0.25420423962601163, "grad_norm": 0.80859375, "learning_rate": 0.0001876256780366935, "loss": 1.2321, "step": 9900 }, { "epoch": 0.25422991682193347, "grad_norm": 0.87109375, "learning_rate": 0.00018762352689866098, "loss": 0.9579, "step": 9901 }, { "epoch": 0.2542555940178553, "grad_norm": 0.95703125, "learning_rate": 0.0001876213755860027, "loss": 1.0082, "step": 9902 }, { "epoch": 0.2542812712137771, "grad_norm": 0.8203125, "learning_rate": 0.00018761922409872296, "loss": 0.9515, "step": 9903 }, { "epoch": 0.2543069484096989, "grad_norm": 0.81640625, "learning_rate": 0.000187617072436826, "loss": 1.1683, "step": 9904 }, { "epoch": 0.25433262560562075, "grad_norm": 0.828125, "learning_rate": 0.0001876149206003161, "loss": 0.9052, "step": 9905 }, { "epoch": 0.25435830280154254, "grad_norm": 0.8203125, "learning_rate": 0.00018761276858919758, "loss": 0.9515, "step": 9906 }, { "epoch": 0.25438397999746437, "grad_norm": 0.84375, "learning_rate": 0.00018761061640347476, "loss": 0.9692, "step": 9907 }, { "epoch": 0.2544096571933862, "grad_norm": 0.85546875, "learning_rate": 0.00018760846404315186, "loss": 1.1316, "step": 9908 }, { "epoch": 0.25443533438930804, "grad_norm": 0.79296875, "learning_rate": 0.00018760631150823324, "loss": 1.1624, "step": 9909 }, { "epoch": 0.2544610115852298, "grad_norm": 0.75390625, "learning_rate": 0.00018760415879872313, "loss": 0.9006, "step": 9910 }, { "epoch": 0.25448668878115166, "grad_norm": 0.7890625, "learning_rate": 0.00018760200591462587, "loss": 0.9682, "step": 9911 }, { "epoch": 0.2545123659770735, "grad_norm": 0.8203125, "learning_rate": 0.0001875998528559457, "loss": 0.9904, "step": 9912 }, { "epoch": 0.2545380431729953, "grad_norm": 0.84765625, "learning_rate": 0.00018759769962268695, "loss": 0.8222, "step": 9913 }, { "epoch": 0.2545637203689171, "grad_norm": 0.86328125, "learning_rate": 0.0001875955462148539, "loss": 1.1323, "step": 9914 }, { "epoch": 0.25458939756483895, "grad_norm": 0.78125, "learning_rate": 0.0001875933926324508, "loss": 1.0742, "step": 9915 }, { "epoch": 0.25461507476076073, "grad_norm": 0.7734375, "learning_rate": 0.00018759123887548203, "loss": 0.901, "step": 9916 }, { "epoch": 0.25464075195668257, "grad_norm": 0.79296875, "learning_rate": 0.0001875890849439518, "loss": 1.1151, "step": 9917 }, { "epoch": 0.2546664291526044, "grad_norm": 0.84375, "learning_rate": 0.00018758693083786447, "loss": 1.0698, "step": 9918 }, { "epoch": 0.2546921063485262, "grad_norm": 0.8125, "learning_rate": 0.00018758477655722426, "loss": 0.9888, "step": 9919 }, { "epoch": 0.254717783544448, "grad_norm": 0.703125, "learning_rate": 0.00018758262210203552, "loss": 0.9672, "step": 9920 }, { "epoch": 0.25474346074036985, "grad_norm": 0.73828125, "learning_rate": 0.0001875804674723025, "loss": 0.872, "step": 9921 }, { "epoch": 0.2547691379362917, "grad_norm": 0.859375, "learning_rate": 0.00018757831266802958, "loss": 1.0375, "step": 9922 }, { "epoch": 0.25479481513221347, "grad_norm": 0.77734375, "learning_rate": 0.00018757615768922096, "loss": 0.9272, "step": 9923 }, { "epoch": 0.2548204923281353, "grad_norm": 0.76171875, "learning_rate": 0.00018757400253588096, "loss": 0.9663, "step": 9924 }, { "epoch": 0.25484616952405714, "grad_norm": 0.7890625, "learning_rate": 0.0001875718472080139, "loss": 0.9355, "step": 9925 }, { "epoch": 0.2548718467199789, "grad_norm": 0.81640625, "learning_rate": 0.00018756969170562405, "loss": 0.9407, "step": 9926 }, { "epoch": 0.25489752391590076, "grad_norm": 0.84765625, "learning_rate": 0.00018756753602871572, "loss": 1.0173, "step": 9927 }, { "epoch": 0.2549232011118226, "grad_norm": 0.8203125, "learning_rate": 0.00018756538017729318, "loss": 0.9812, "step": 9928 }, { "epoch": 0.2549488783077444, "grad_norm": 0.7734375, "learning_rate": 0.00018756322415136077, "loss": 1.0395, "step": 9929 }, { "epoch": 0.2549745555036662, "grad_norm": 0.8125, "learning_rate": 0.00018756106795092276, "loss": 1.0441, "step": 9930 }, { "epoch": 0.25500023269958805, "grad_norm": 0.78125, "learning_rate": 0.00018755891157598345, "loss": 0.9681, "step": 9931 }, { "epoch": 0.2550259098955099, "grad_norm": 0.84765625, "learning_rate": 0.00018755675502654713, "loss": 0.9811, "step": 9932 }, { "epoch": 0.25505158709143166, "grad_norm": 0.84765625, "learning_rate": 0.0001875545983026181, "loss": 1.0872, "step": 9933 }, { "epoch": 0.2550772642873535, "grad_norm": 0.76171875, "learning_rate": 0.00018755244140420066, "loss": 0.9785, "step": 9934 }, { "epoch": 0.25510294148327534, "grad_norm": 0.80859375, "learning_rate": 0.00018755028433129915, "loss": 0.9667, "step": 9935 }, { "epoch": 0.2551286186791971, "grad_norm": 0.796875, "learning_rate": 0.0001875481270839178, "loss": 0.9659, "step": 9936 }, { "epoch": 0.25515429587511895, "grad_norm": 0.80078125, "learning_rate": 0.00018754596966206094, "loss": 1.0311, "step": 9937 }, { "epoch": 0.2551799730710408, "grad_norm": 0.796875, "learning_rate": 0.0001875438120657329, "loss": 0.8992, "step": 9938 }, { "epoch": 0.25520565026696257, "grad_norm": 0.89453125, "learning_rate": 0.00018754165429493792, "loss": 0.9216, "step": 9939 }, { "epoch": 0.2552313274628844, "grad_norm": 0.82421875, "learning_rate": 0.00018753949634968035, "loss": 1.0069, "step": 9940 }, { "epoch": 0.25525700465880624, "grad_norm": 0.80859375, "learning_rate": 0.00018753733822996445, "loss": 1.0395, "step": 9941 }, { "epoch": 0.2552826818547281, "grad_norm": 0.765625, "learning_rate": 0.00018753517993579458, "loss": 1.0566, "step": 9942 }, { "epoch": 0.25530835905064986, "grad_norm": 0.8828125, "learning_rate": 0.00018753302146717497, "loss": 0.9274, "step": 9943 }, { "epoch": 0.2553340362465717, "grad_norm": 0.828125, "learning_rate": 0.00018753086282410997, "loss": 1.0123, "step": 9944 }, { "epoch": 0.25535971344249353, "grad_norm": 0.70703125, "learning_rate": 0.00018752870400660387, "loss": 1.0241, "step": 9945 }, { "epoch": 0.2553853906384153, "grad_norm": 0.75390625, "learning_rate": 0.00018752654501466096, "loss": 0.8293, "step": 9946 }, { "epoch": 0.25541106783433715, "grad_norm": 0.72265625, "learning_rate": 0.00018752438584828553, "loss": 0.9529, "step": 9947 }, { "epoch": 0.255436745030259, "grad_norm": 0.890625, "learning_rate": 0.00018752222650748196, "loss": 1.053, "step": 9948 }, { "epoch": 0.25546242222618076, "grad_norm": 1.1796875, "learning_rate": 0.00018752006699225446, "loss": 0.9674, "step": 9949 }, { "epoch": 0.2554880994221026, "grad_norm": 0.83203125, "learning_rate": 0.00018751790730260737, "loss": 0.9297, "step": 9950 }, { "epoch": 0.25551377661802444, "grad_norm": 0.82421875, "learning_rate": 0.000187515747438545, "loss": 1.0508, "step": 9951 }, { "epoch": 0.25553945381394627, "grad_norm": 0.828125, "learning_rate": 0.00018751358740007166, "loss": 0.9927, "step": 9952 }, { "epoch": 0.25556513100986805, "grad_norm": 0.80078125, "learning_rate": 0.00018751142718719164, "loss": 1.0065, "step": 9953 }, { "epoch": 0.2555908082057899, "grad_norm": 0.7890625, "learning_rate": 0.00018750926679990924, "loss": 0.7768, "step": 9954 }, { "epoch": 0.2556164854017117, "grad_norm": 0.75, "learning_rate": 0.0001875071062382288, "loss": 0.8534, "step": 9955 }, { "epoch": 0.2556421625976335, "grad_norm": 0.89453125, "learning_rate": 0.00018750494550215458, "loss": 1.0102, "step": 9956 }, { "epoch": 0.25566783979355534, "grad_norm": 0.71875, "learning_rate": 0.00018750278459169091, "loss": 1.0866, "step": 9957 }, { "epoch": 0.2556935169894772, "grad_norm": 0.828125, "learning_rate": 0.0001875006235068421, "loss": 0.9537, "step": 9958 }, { "epoch": 0.25571919418539896, "grad_norm": 0.8046875, "learning_rate": 0.00018749846224761245, "loss": 1.1054, "step": 9959 }, { "epoch": 0.2557448713813208, "grad_norm": 0.78125, "learning_rate": 0.00018749630081400627, "loss": 0.924, "step": 9960 }, { "epoch": 0.25577054857724263, "grad_norm": 0.81640625, "learning_rate": 0.00018749413920602788, "loss": 1.0395, "step": 9961 }, { "epoch": 0.25579622577316447, "grad_norm": 0.83984375, "learning_rate": 0.00018749197742368154, "loss": 1.155, "step": 9962 }, { "epoch": 0.25582190296908625, "grad_norm": 0.77734375, "learning_rate": 0.0001874898154669716, "loss": 1.0329, "step": 9963 }, { "epoch": 0.2558475801650081, "grad_norm": 0.7734375, "learning_rate": 0.0001874876533359024, "loss": 0.9537, "step": 9964 }, { "epoch": 0.2558732573609299, "grad_norm": 0.76953125, "learning_rate": 0.0001874854910304782, "loss": 1.0032, "step": 9965 }, { "epoch": 0.2558989345568517, "grad_norm": 0.8515625, "learning_rate": 0.0001874833285507033, "loss": 1.003, "step": 9966 }, { "epoch": 0.25592461175277353, "grad_norm": 0.9296875, "learning_rate": 0.000187481165896582, "loss": 0.9786, "step": 9967 }, { "epoch": 0.25595028894869537, "grad_norm": 0.82421875, "learning_rate": 0.0001874790030681187, "loss": 0.8942, "step": 9968 }, { "epoch": 0.25597596614461715, "grad_norm": 0.74609375, "learning_rate": 0.0001874768400653176, "loss": 1.0066, "step": 9969 }, { "epoch": 0.256001643340539, "grad_norm": 0.7890625, "learning_rate": 0.00018747467688818307, "loss": 1.0365, "step": 9970 }, { "epoch": 0.2560273205364608, "grad_norm": 0.7890625, "learning_rate": 0.00018747251353671944, "loss": 0.956, "step": 9971 }, { "epoch": 0.25605299773238266, "grad_norm": 0.76953125, "learning_rate": 0.00018747035001093096, "loss": 1.0066, "step": 9972 }, { "epoch": 0.25607867492830444, "grad_norm": 0.8125, "learning_rate": 0.00018746818631082198, "loss": 1.0356, "step": 9973 }, { "epoch": 0.2561043521242263, "grad_norm": 0.78125, "learning_rate": 0.00018746602243639683, "loss": 0.927, "step": 9974 }, { "epoch": 0.2561300293201481, "grad_norm": 0.76171875, "learning_rate": 0.00018746385838765977, "loss": 1.047, "step": 9975 }, { "epoch": 0.2561557065160699, "grad_norm": 0.79296875, "learning_rate": 0.00018746169416461515, "loss": 1.116, "step": 9976 }, { "epoch": 0.25618138371199173, "grad_norm": 0.81640625, "learning_rate": 0.00018745952976726725, "loss": 1.0588, "step": 9977 }, { "epoch": 0.25620706090791356, "grad_norm": 0.7578125, "learning_rate": 0.00018745736519562044, "loss": 0.8431, "step": 9978 }, { "epoch": 0.25623273810383534, "grad_norm": 0.76171875, "learning_rate": 0.000187455200449679, "loss": 1.0406, "step": 9979 }, { "epoch": 0.2562584152997572, "grad_norm": 0.8125, "learning_rate": 0.00018745303552944722, "loss": 0.9517, "step": 9980 }, { "epoch": 0.256284092495679, "grad_norm": 0.80859375, "learning_rate": 0.00018745087043492946, "loss": 0.9605, "step": 9981 }, { "epoch": 0.25630976969160085, "grad_norm": 0.8203125, "learning_rate": 0.00018744870516613, "loss": 1.0182, "step": 9982 }, { "epoch": 0.25633544688752263, "grad_norm": 0.81640625, "learning_rate": 0.0001874465397230532, "loss": 1.0156, "step": 9983 }, { "epoch": 0.25636112408344447, "grad_norm": 0.7890625, "learning_rate": 0.0001874443741057033, "loss": 1.0109, "step": 9984 }, { "epoch": 0.2563868012793663, "grad_norm": 0.8203125, "learning_rate": 0.0001874422083140847, "loss": 1.0115, "step": 9985 }, { "epoch": 0.2564124784752881, "grad_norm": 0.796875, "learning_rate": 0.00018744004234820167, "loss": 0.987, "step": 9986 }, { "epoch": 0.2564381556712099, "grad_norm": 0.7578125, "learning_rate": 0.00018743787620805852, "loss": 1.002, "step": 9987 }, { "epoch": 0.25646383286713176, "grad_norm": 0.765625, "learning_rate": 0.0001874357098936596, "loss": 0.9971, "step": 9988 }, { "epoch": 0.25648951006305354, "grad_norm": 0.76171875, "learning_rate": 0.00018743354340500917, "loss": 0.8936, "step": 9989 }, { "epoch": 0.2565151872589754, "grad_norm": 0.75, "learning_rate": 0.00018743137674211163, "loss": 1.0586, "step": 9990 }, { "epoch": 0.2565408644548972, "grad_norm": 0.7734375, "learning_rate": 0.00018742920990497125, "loss": 1.059, "step": 9991 }, { "epoch": 0.25656654165081905, "grad_norm": 0.79296875, "learning_rate": 0.00018742704289359234, "loss": 0.9939, "step": 9992 }, { "epoch": 0.2565922188467408, "grad_norm": 1.0625, "learning_rate": 0.00018742487570797922, "loss": 0.9403, "step": 9993 }, { "epoch": 0.25661789604266266, "grad_norm": 0.7734375, "learning_rate": 0.00018742270834813622, "loss": 0.9578, "step": 9994 }, { "epoch": 0.2566435732385845, "grad_norm": 0.796875, "learning_rate": 0.00018742054081406767, "loss": 1.1064, "step": 9995 }, { "epoch": 0.2566692504345063, "grad_norm": 0.78515625, "learning_rate": 0.00018741837310577787, "loss": 1.0137, "step": 9996 }, { "epoch": 0.2566949276304281, "grad_norm": 0.76171875, "learning_rate": 0.00018741620522327117, "loss": 0.9761, "step": 9997 }, { "epoch": 0.25672060482634995, "grad_norm": 0.984375, "learning_rate": 0.00018741403716655185, "loss": 1.0893, "step": 9998 }, { "epoch": 0.25674628202227173, "grad_norm": 0.79296875, "learning_rate": 0.00018741186893562426, "loss": 0.9156, "step": 9999 }, { "epoch": 0.25677195921819357, "grad_norm": 0.81640625, "learning_rate": 0.00018740970053049273, "loss": 1.0679, "step": 10000 }, { "epoch": 0.25677195921819357, "eval_loss": 0.9963493943214417, "eval_model_preparation_time": 0.0065, "eval_runtime": 404.1465, "eval_samples_per_second": 24.744, "eval_steps_per_second": 0.774, "step": 10000 }, { "epoch": 0.2567976364141154, "grad_norm": 0.765625, "learning_rate": 0.0001874075319511615, "loss": 0.9687, "step": 10001 }, { "epoch": 0.25682331361003724, "grad_norm": 0.8046875, "learning_rate": 0.000187405363197635, "loss": 0.9475, "step": 10002 }, { "epoch": 0.256848990805959, "grad_norm": 0.8671875, "learning_rate": 0.0001874031942699175, "loss": 1.1426, "step": 10003 }, { "epoch": 0.25687466800188086, "grad_norm": 0.7890625, "learning_rate": 0.00018740102516801333, "loss": 1.0333, "step": 10004 }, { "epoch": 0.2569003451978027, "grad_norm": 0.81640625, "learning_rate": 0.0001873988558919268, "loss": 0.942, "step": 10005 }, { "epoch": 0.2569260223937245, "grad_norm": 0.80859375, "learning_rate": 0.00018739668644166227, "loss": 0.9549, "step": 10006 }, { "epoch": 0.2569516995896463, "grad_norm": 0.8203125, "learning_rate": 0.00018739451681722401, "loss": 0.8684, "step": 10007 }, { "epoch": 0.25697737678556815, "grad_norm": 0.77734375, "learning_rate": 0.00018739234701861638, "loss": 1.0622, "step": 10008 }, { "epoch": 0.2570030539814899, "grad_norm": 0.81640625, "learning_rate": 0.00018739017704584373, "loss": 1.0993, "step": 10009 }, { "epoch": 0.25702873117741176, "grad_norm": 0.77734375, "learning_rate": 0.00018738800689891032, "loss": 1.1425, "step": 10010 }, { "epoch": 0.2570544083733336, "grad_norm": 0.79296875, "learning_rate": 0.00018738583657782048, "loss": 0.9757, "step": 10011 }, { "epoch": 0.25708008556925543, "grad_norm": 1.46875, "learning_rate": 0.0001873836660825786, "loss": 0.8994, "step": 10012 }, { "epoch": 0.2571057627651772, "grad_norm": 0.76171875, "learning_rate": 0.00018738149541318896, "loss": 0.9585, "step": 10013 }, { "epoch": 0.25713143996109905, "grad_norm": 0.81640625, "learning_rate": 0.0001873793245696559, "loss": 1.109, "step": 10014 }, { "epoch": 0.2571571171570209, "grad_norm": 0.828125, "learning_rate": 0.00018737715355198373, "loss": 0.8697, "step": 10015 }, { "epoch": 0.25718279435294267, "grad_norm": 0.7421875, "learning_rate": 0.0001873749823601768, "loss": 1.0541, "step": 10016 }, { "epoch": 0.2572084715488645, "grad_norm": 0.82421875, "learning_rate": 0.0001873728109942394, "loss": 1.1324, "step": 10017 }, { "epoch": 0.25723414874478634, "grad_norm": 0.78515625, "learning_rate": 0.00018737063945417588, "loss": 0.9837, "step": 10018 }, { "epoch": 0.2572598259407081, "grad_norm": 0.828125, "learning_rate": 0.0001873684677399906, "loss": 1.2442, "step": 10019 }, { "epoch": 0.25728550313662996, "grad_norm": 0.90234375, "learning_rate": 0.0001873662958516878, "loss": 0.9612, "step": 10020 }, { "epoch": 0.2573111803325518, "grad_norm": 0.74609375, "learning_rate": 0.00018736412378927191, "loss": 1.0175, "step": 10021 }, { "epoch": 0.25733685752847363, "grad_norm": 0.90234375, "learning_rate": 0.0001873619515527472, "loss": 1.1272, "step": 10022 }, { "epoch": 0.2573625347243954, "grad_norm": 0.7734375, "learning_rate": 0.000187359779142118, "loss": 0.9439, "step": 10023 }, { "epoch": 0.25738821192031724, "grad_norm": 0.83984375, "learning_rate": 0.00018735760655738868, "loss": 1.1254, "step": 10024 }, { "epoch": 0.2574138891162391, "grad_norm": 0.8203125, "learning_rate": 0.0001873554337985635, "loss": 0.9528, "step": 10025 }, { "epoch": 0.25743956631216086, "grad_norm": 0.75390625, "learning_rate": 0.00018735326086564687, "loss": 0.982, "step": 10026 }, { "epoch": 0.2574652435080827, "grad_norm": 0.79296875, "learning_rate": 0.00018735108775864306, "loss": 1.0036, "step": 10027 }, { "epoch": 0.25749092070400453, "grad_norm": 0.7890625, "learning_rate": 0.00018734891447755642, "loss": 1.0245, "step": 10028 }, { "epoch": 0.2575165978999263, "grad_norm": 0.8515625, "learning_rate": 0.00018734674102239128, "loss": 1.0054, "step": 10029 }, { "epoch": 0.25754227509584815, "grad_norm": 0.82421875, "learning_rate": 0.000187344567393152, "loss": 1.2033, "step": 10030 }, { "epoch": 0.25756795229177, "grad_norm": 0.734375, "learning_rate": 0.00018734239358984288, "loss": 1.0215, "step": 10031 }, { "epoch": 0.2575936294876918, "grad_norm": 0.875, "learning_rate": 0.00018734021961246824, "loss": 0.9564, "step": 10032 }, { "epoch": 0.2576193066836136, "grad_norm": 0.796875, "learning_rate": 0.00018733804546103245, "loss": 0.8603, "step": 10033 }, { "epoch": 0.25764498387953544, "grad_norm": 0.80078125, "learning_rate": 0.00018733587113553982, "loss": 0.9886, "step": 10034 }, { "epoch": 0.2576706610754573, "grad_norm": 0.796875, "learning_rate": 0.0001873336966359947, "loss": 0.9741, "step": 10035 }, { "epoch": 0.25769633827137906, "grad_norm": 0.78515625, "learning_rate": 0.00018733152196240141, "loss": 0.996, "step": 10036 }, { "epoch": 0.2577220154673009, "grad_norm": 0.82421875, "learning_rate": 0.00018732934711476425, "loss": 1.1496, "step": 10037 }, { "epoch": 0.2577476926632227, "grad_norm": 0.90625, "learning_rate": 0.00018732717209308763, "loss": 0.9892, "step": 10038 }, { "epoch": 0.2577733698591445, "grad_norm": 0.73046875, "learning_rate": 0.00018732499689737582, "loss": 0.8424, "step": 10039 }, { "epoch": 0.25779904705506634, "grad_norm": 0.8125, "learning_rate": 0.0001873228215276332, "loss": 0.9869, "step": 10040 }, { "epoch": 0.2578247242509882, "grad_norm": 0.7734375, "learning_rate": 0.00018732064598386405, "loss": 0.909, "step": 10041 }, { "epoch": 0.25785040144691, "grad_norm": 0.84765625, "learning_rate": 0.00018731847026607276, "loss": 1.1438, "step": 10042 }, { "epoch": 0.2578760786428318, "grad_norm": 0.83203125, "learning_rate": 0.00018731629437426366, "loss": 1.0595, "step": 10043 }, { "epoch": 0.25790175583875363, "grad_norm": 0.8046875, "learning_rate": 0.00018731411830844106, "loss": 1.0015, "step": 10044 }, { "epoch": 0.25792743303467547, "grad_norm": 0.78515625, "learning_rate": 0.0001873119420686093, "loss": 0.9198, "step": 10045 }, { "epoch": 0.25795311023059725, "grad_norm": 0.7734375, "learning_rate": 0.00018730976565477274, "loss": 1.0633, "step": 10046 }, { "epoch": 0.2579787874265191, "grad_norm": 0.7734375, "learning_rate": 0.00018730758906693572, "loss": 0.9562, "step": 10047 }, { "epoch": 0.2580044646224409, "grad_norm": 0.7421875, "learning_rate": 0.00018730541230510252, "loss": 1.0014, "step": 10048 }, { "epoch": 0.2580301418183627, "grad_norm": 0.890625, "learning_rate": 0.00018730323536927755, "loss": 1.1518, "step": 10049 }, { "epoch": 0.25805581901428454, "grad_norm": 0.77734375, "learning_rate": 0.0001873010582594651, "loss": 0.9861, "step": 10050 }, { "epoch": 0.2580814962102064, "grad_norm": 0.82421875, "learning_rate": 0.00018729888097566955, "loss": 1.0085, "step": 10051 }, { "epoch": 0.2581071734061282, "grad_norm": 0.7890625, "learning_rate": 0.0001872967035178952, "loss": 0.9307, "step": 10052 }, { "epoch": 0.25813285060205, "grad_norm": 0.87109375, "learning_rate": 0.0001872945258861464, "loss": 1.0001, "step": 10053 }, { "epoch": 0.2581585277979718, "grad_norm": 0.73828125, "learning_rate": 0.00018729234808042747, "loss": 1.0081, "step": 10054 }, { "epoch": 0.25818420499389366, "grad_norm": 0.83203125, "learning_rate": 0.00018729017010074282, "loss": 1.0503, "step": 10055 }, { "epoch": 0.25820988218981544, "grad_norm": 0.7734375, "learning_rate": 0.00018728799194709674, "loss": 1.1255, "step": 10056 }, { "epoch": 0.2582355593857373, "grad_norm": 0.81640625, "learning_rate": 0.0001872858136194936, "loss": 1.1203, "step": 10057 }, { "epoch": 0.2582612365816591, "grad_norm": 0.82421875, "learning_rate": 0.00018728363511793768, "loss": 1.0681, "step": 10058 }, { "epoch": 0.2582869137775809, "grad_norm": 0.76953125, "learning_rate": 0.00018728145644243335, "loss": 0.9302, "step": 10059 }, { "epoch": 0.25831259097350273, "grad_norm": 0.78515625, "learning_rate": 0.00018727927759298497, "loss": 0.967, "step": 10060 }, { "epoch": 0.25833826816942457, "grad_norm": 0.83984375, "learning_rate": 0.0001872770985695969, "loss": 0.9148, "step": 10061 }, { "epoch": 0.2583639453653464, "grad_norm": 0.75, "learning_rate": 0.0001872749193722734, "loss": 0.9673, "step": 10062 }, { "epoch": 0.2583896225612682, "grad_norm": 0.8046875, "learning_rate": 0.00018727274000101892, "loss": 1.0621, "step": 10063 }, { "epoch": 0.25841529975719, "grad_norm": 0.81640625, "learning_rate": 0.00018727056045583776, "loss": 1.0697, "step": 10064 }, { "epoch": 0.25844097695311186, "grad_norm": 1.0234375, "learning_rate": 0.00018726838073673422, "loss": 1.1381, "step": 10065 }, { "epoch": 0.25846665414903364, "grad_norm": 0.77734375, "learning_rate": 0.0001872662008437127, "loss": 0.9198, "step": 10066 }, { "epoch": 0.2584923313449555, "grad_norm": 0.83203125, "learning_rate": 0.00018726402077677754, "loss": 1.0847, "step": 10067 }, { "epoch": 0.2585180085408773, "grad_norm": 0.71875, "learning_rate": 0.00018726184053593302, "loss": 0.9037, "step": 10068 }, { "epoch": 0.2585436857367991, "grad_norm": 0.84765625, "learning_rate": 0.00018725966012118358, "loss": 1.069, "step": 10069 }, { "epoch": 0.2585693629327209, "grad_norm": 0.80859375, "learning_rate": 0.00018725747953253353, "loss": 0.9799, "step": 10070 }, { "epoch": 0.25859504012864276, "grad_norm": 0.7734375, "learning_rate": 0.00018725529876998715, "loss": 0.9867, "step": 10071 }, { "epoch": 0.2586207173245646, "grad_norm": 0.78125, "learning_rate": 0.0001872531178335489, "loss": 1.0316, "step": 10072 }, { "epoch": 0.2586463945204864, "grad_norm": 0.77734375, "learning_rate": 0.00018725093672322302, "loss": 0.8281, "step": 10073 }, { "epoch": 0.2586720717164082, "grad_norm": 0.78515625, "learning_rate": 0.00018724875543901394, "loss": 1.0008, "step": 10074 }, { "epoch": 0.25869774891233005, "grad_norm": 0.74609375, "learning_rate": 0.00018724657398092593, "loss": 0.946, "step": 10075 }, { "epoch": 0.25872342610825183, "grad_norm": 0.75, "learning_rate": 0.00018724439234896342, "loss": 1.0707, "step": 10076 }, { "epoch": 0.25874910330417367, "grad_norm": 0.87109375, "learning_rate": 0.00018724221054313068, "loss": 0.894, "step": 10077 }, { "epoch": 0.2587747805000955, "grad_norm": 0.828125, "learning_rate": 0.0001872400285634321, "loss": 1.1928, "step": 10078 }, { "epoch": 0.2588004576960173, "grad_norm": 0.78125, "learning_rate": 0.00018723784640987208, "loss": 0.9223, "step": 10079 }, { "epoch": 0.2588261348919391, "grad_norm": 0.8125, "learning_rate": 0.00018723566408245486, "loss": 1.0471, "step": 10080 }, { "epoch": 0.25885181208786096, "grad_norm": 0.8125, "learning_rate": 0.00018723348158118484, "loss": 1.0956, "step": 10081 }, { "epoch": 0.2588774892837828, "grad_norm": 0.79296875, "learning_rate": 0.0001872312989060664, "loss": 1.0814, "step": 10082 }, { "epoch": 0.25890316647970457, "grad_norm": 0.79296875, "learning_rate": 0.00018722911605710386, "loss": 0.8467, "step": 10083 }, { "epoch": 0.2589288436756264, "grad_norm": 0.7890625, "learning_rate": 0.00018722693303430154, "loss": 0.9253, "step": 10084 }, { "epoch": 0.25895452087154824, "grad_norm": 2.109375, "learning_rate": 0.00018722474983766383, "loss": 1.1037, "step": 10085 }, { "epoch": 0.25898019806747, "grad_norm": 0.8515625, "learning_rate": 0.00018722256646719508, "loss": 1.0695, "step": 10086 }, { "epoch": 0.25900587526339186, "grad_norm": 0.75390625, "learning_rate": 0.00018722038292289962, "loss": 1.0286, "step": 10087 }, { "epoch": 0.2590315524593137, "grad_norm": 0.78125, "learning_rate": 0.00018721819920478183, "loss": 1.0373, "step": 10088 }, { "epoch": 0.2590572296552355, "grad_norm": 0.796875, "learning_rate": 0.00018721601531284605, "loss": 1.0749, "step": 10089 }, { "epoch": 0.2590829068511573, "grad_norm": 0.83984375, "learning_rate": 0.0001872138312470966, "loss": 1.0037, "step": 10090 }, { "epoch": 0.25910858404707915, "grad_norm": 0.76171875, "learning_rate": 0.0001872116470075379, "loss": 1.0117, "step": 10091 }, { "epoch": 0.259134261243001, "grad_norm": 0.74609375, "learning_rate": 0.00018720946259417426, "loss": 0.9216, "step": 10092 }, { "epoch": 0.25915993843892277, "grad_norm": 0.86328125, "learning_rate": 0.00018720727800701004, "loss": 0.9313, "step": 10093 }, { "epoch": 0.2591856156348446, "grad_norm": 0.9375, "learning_rate": 0.00018720509324604957, "loss": 0.9199, "step": 10094 }, { "epoch": 0.25921129283076644, "grad_norm": 0.78515625, "learning_rate": 0.00018720290831129724, "loss": 0.9363, "step": 10095 }, { "epoch": 0.2592369700266882, "grad_norm": 0.7734375, "learning_rate": 0.00018720072320275742, "loss": 0.9229, "step": 10096 }, { "epoch": 0.25926264722261005, "grad_norm": 0.8515625, "learning_rate": 0.0001871985379204344, "loss": 0.9943, "step": 10097 }, { "epoch": 0.2592883244185319, "grad_norm": 0.86328125, "learning_rate": 0.00018719635246433258, "loss": 1.1223, "step": 10098 }, { "epoch": 0.25931400161445367, "grad_norm": 1.5546875, "learning_rate": 0.0001871941668344563, "loss": 1.1309, "step": 10099 }, { "epoch": 0.2593396788103755, "grad_norm": 0.8046875, "learning_rate": 0.0001871919810308099, "loss": 1.0272, "step": 10100 }, { "epoch": 0.25936535600629734, "grad_norm": 0.84765625, "learning_rate": 0.0001871897950533978, "loss": 0.9455, "step": 10101 }, { "epoch": 0.2593910332022192, "grad_norm": 0.84765625, "learning_rate": 0.00018718760890222428, "loss": 1.0813, "step": 10102 }, { "epoch": 0.25941671039814096, "grad_norm": 0.7734375, "learning_rate": 0.00018718542257729377, "loss": 0.86, "step": 10103 }, { "epoch": 0.2594423875940628, "grad_norm": 0.79296875, "learning_rate": 0.00018718323607861054, "loss": 0.9876, "step": 10104 }, { "epoch": 0.25946806478998463, "grad_norm": 0.88671875, "learning_rate": 0.000187181049406179, "loss": 0.9263, "step": 10105 }, { "epoch": 0.2594937419859064, "grad_norm": 0.8125, "learning_rate": 0.00018717886256000357, "loss": 1.0546, "step": 10106 }, { "epoch": 0.25951941918182825, "grad_norm": 0.82421875, "learning_rate": 0.0001871766755400885, "loss": 0.9972, "step": 10107 }, { "epoch": 0.2595450963777501, "grad_norm": 0.8515625, "learning_rate": 0.00018717448834643818, "loss": 1.037, "step": 10108 }, { "epoch": 0.25957077357367186, "grad_norm": 0.76953125, "learning_rate": 0.00018717230097905702, "loss": 1.096, "step": 10109 }, { "epoch": 0.2595964507695937, "grad_norm": 0.85546875, "learning_rate": 0.00018717011343794929, "loss": 0.9268, "step": 10110 }, { "epoch": 0.25962212796551554, "grad_norm": 0.76171875, "learning_rate": 0.00018716792572311938, "loss": 1.0102, "step": 10111 }, { "epoch": 0.2596478051614374, "grad_norm": 0.8671875, "learning_rate": 0.0001871657378345717, "loss": 1.1562, "step": 10112 }, { "epoch": 0.25967348235735915, "grad_norm": 0.7578125, "learning_rate": 0.00018716354977231059, "loss": 0.8777, "step": 10113 }, { "epoch": 0.259699159553281, "grad_norm": 0.87109375, "learning_rate": 0.0001871613615363404, "loss": 1.0559, "step": 10114 }, { "epoch": 0.2597248367492028, "grad_norm": 0.77734375, "learning_rate": 0.00018715917312666546, "loss": 0.9847, "step": 10115 }, { "epoch": 0.2597505139451246, "grad_norm": 0.8125, "learning_rate": 0.0001871569845432902, "loss": 0.9835, "step": 10116 }, { "epoch": 0.25977619114104644, "grad_norm": 0.796875, "learning_rate": 0.0001871547957862189, "loss": 1.049, "step": 10117 }, { "epoch": 0.2598018683369683, "grad_norm": 0.921875, "learning_rate": 0.000187152606855456, "loss": 1.1085, "step": 10118 }, { "epoch": 0.25982754553289006, "grad_norm": 0.82421875, "learning_rate": 0.00018715041775100582, "loss": 1.0696, "step": 10119 }, { "epoch": 0.2598532227288119, "grad_norm": 0.79296875, "learning_rate": 0.00018714822847287272, "loss": 0.9334, "step": 10120 }, { "epoch": 0.25987889992473373, "grad_norm": 0.7578125, "learning_rate": 0.00018714603902106105, "loss": 1.0518, "step": 10121 }, { "epoch": 0.2599045771206555, "grad_norm": 0.84765625, "learning_rate": 0.00018714384939557524, "loss": 1.0415, "step": 10122 }, { "epoch": 0.25993025431657735, "grad_norm": 0.76953125, "learning_rate": 0.00018714165959641957, "loss": 1.0026, "step": 10123 }, { "epoch": 0.2599559315124992, "grad_norm": 0.8125, "learning_rate": 0.00018713946962359848, "loss": 1.0183, "step": 10124 }, { "epoch": 0.259981608708421, "grad_norm": 0.8671875, "learning_rate": 0.0001871372794771163, "loss": 0.9128, "step": 10125 }, { "epoch": 0.2600072859043428, "grad_norm": 0.7265625, "learning_rate": 0.00018713508915697738, "loss": 0.9514, "step": 10126 }, { "epoch": 0.26003296310026464, "grad_norm": 0.796875, "learning_rate": 0.00018713289866318609, "loss": 1.0121, "step": 10127 }, { "epoch": 0.26005864029618647, "grad_norm": 0.79296875, "learning_rate": 0.00018713070799574678, "loss": 0.9471, "step": 10128 }, { "epoch": 0.26008431749210825, "grad_norm": 0.7890625, "learning_rate": 0.00018712851715466387, "loss": 1.0331, "step": 10129 }, { "epoch": 0.2601099946880301, "grad_norm": 0.90625, "learning_rate": 0.0001871263261399417, "loss": 1.007, "step": 10130 }, { "epoch": 0.2601356718839519, "grad_norm": 0.79296875, "learning_rate": 0.00018712413495158462, "loss": 1.1366, "step": 10131 }, { "epoch": 0.2601613490798737, "grad_norm": 0.8125, "learning_rate": 0.00018712194358959703, "loss": 0.9593, "step": 10132 }, { "epoch": 0.26018702627579554, "grad_norm": 0.796875, "learning_rate": 0.00018711975205398327, "loss": 0.9765, "step": 10133 }, { "epoch": 0.2602127034717174, "grad_norm": 0.7578125, "learning_rate": 0.0001871175603447477, "loss": 0.9931, "step": 10134 }, { "epoch": 0.2602383806676392, "grad_norm": 0.75, "learning_rate": 0.0001871153684618947, "loss": 0.8979, "step": 10135 }, { "epoch": 0.260264057863561, "grad_norm": 0.796875, "learning_rate": 0.00018711317640542866, "loss": 1.0543, "step": 10136 }, { "epoch": 0.26028973505948283, "grad_norm": 0.7421875, "learning_rate": 0.0001871109841753539, "loss": 0.9954, "step": 10137 }, { "epoch": 0.26031541225540467, "grad_norm": 0.7734375, "learning_rate": 0.00018710879177167482, "loss": 1.034, "step": 10138 }, { "epoch": 0.26034108945132645, "grad_norm": 0.83203125, "learning_rate": 0.0001871065991943958, "loss": 1.0288, "step": 10139 }, { "epoch": 0.2603667666472483, "grad_norm": 0.78125, "learning_rate": 0.00018710440644352118, "loss": 1.0533, "step": 10140 }, { "epoch": 0.2603924438431701, "grad_norm": 0.76171875, "learning_rate": 0.00018710221351905538, "loss": 0.9398, "step": 10141 }, { "epoch": 0.2604181210390919, "grad_norm": 0.796875, "learning_rate": 0.00018710002042100272, "loss": 0.9975, "step": 10142 }, { "epoch": 0.26044379823501373, "grad_norm": 0.734375, "learning_rate": 0.00018709782714936756, "loss": 0.8484, "step": 10143 }, { "epoch": 0.26046947543093557, "grad_norm": 0.78515625, "learning_rate": 0.00018709563370415433, "loss": 1.0459, "step": 10144 }, { "epoch": 0.2604951526268574, "grad_norm": 0.77734375, "learning_rate": 0.00018709344008536736, "loss": 1.0838, "step": 10145 }, { "epoch": 0.2605208298227792, "grad_norm": 0.828125, "learning_rate": 0.00018709124629301104, "loss": 1.0396, "step": 10146 }, { "epoch": 0.260546507018701, "grad_norm": 0.80078125, "learning_rate": 0.00018708905232708972, "loss": 1.0194, "step": 10147 }, { "epoch": 0.26057218421462286, "grad_norm": 0.78515625, "learning_rate": 0.00018708685818760777, "loss": 0.9622, "step": 10148 }, { "epoch": 0.26059786141054464, "grad_norm": 0.8671875, "learning_rate": 0.00018708466387456962, "loss": 1.165, "step": 10149 }, { "epoch": 0.2606235386064665, "grad_norm": 0.83984375, "learning_rate": 0.00018708246938797958, "loss": 0.9968, "step": 10150 }, { "epoch": 0.2606492158023883, "grad_norm": 0.76171875, "learning_rate": 0.00018708027472784204, "loss": 0.8674, "step": 10151 }, { "epoch": 0.2606748929983101, "grad_norm": 0.95703125, "learning_rate": 0.00018707807989416138, "loss": 1.0623, "step": 10152 }, { "epoch": 0.26070057019423193, "grad_norm": 0.921875, "learning_rate": 0.00018707588488694197, "loss": 1.0361, "step": 10153 }, { "epoch": 0.26072624739015376, "grad_norm": 0.7890625, "learning_rate": 0.0001870736897061882, "loss": 0.9858, "step": 10154 }, { "epoch": 0.2607519245860756, "grad_norm": 0.84765625, "learning_rate": 0.00018707149435190444, "loss": 1.1347, "step": 10155 }, { "epoch": 0.2607776017819974, "grad_norm": 0.83984375, "learning_rate": 0.00018706929882409502, "loss": 1.0776, "step": 10156 }, { "epoch": 0.2608032789779192, "grad_norm": 0.9140625, "learning_rate": 0.00018706710312276439, "loss": 0.9518, "step": 10157 }, { "epoch": 0.26082895617384105, "grad_norm": 1.1484375, "learning_rate": 0.00018706490724791684, "loss": 1.0613, "step": 10158 }, { "epoch": 0.26085463336976283, "grad_norm": 0.76953125, "learning_rate": 0.00018706271119955684, "loss": 0.9301, "step": 10159 }, { "epoch": 0.26088031056568467, "grad_norm": 0.78125, "learning_rate": 0.0001870605149776887, "loss": 0.986, "step": 10160 }, { "epoch": 0.2609059877616065, "grad_norm": 0.7890625, "learning_rate": 0.00018705831858231684, "loss": 0.9258, "step": 10161 }, { "epoch": 0.2609316649575283, "grad_norm": 0.796875, "learning_rate": 0.0001870561220134456, "loss": 1.0179, "step": 10162 }, { "epoch": 0.2609573421534501, "grad_norm": 0.71484375, "learning_rate": 0.00018705392527107936, "loss": 0.8665, "step": 10163 }, { "epoch": 0.26098301934937196, "grad_norm": 0.82421875, "learning_rate": 0.00018705172835522252, "loss": 0.925, "step": 10164 }, { "epoch": 0.2610086965452938, "grad_norm": 0.7578125, "learning_rate": 0.00018704953126587946, "loss": 0.8884, "step": 10165 }, { "epoch": 0.2610343737412156, "grad_norm": 0.78515625, "learning_rate": 0.00018704733400305452, "loss": 0.9949, "step": 10166 }, { "epoch": 0.2610600509371374, "grad_norm": 0.80859375, "learning_rate": 0.00018704513656675212, "loss": 0.9564, "step": 10167 }, { "epoch": 0.26108572813305925, "grad_norm": 0.86328125, "learning_rate": 0.00018704293895697665, "loss": 0.9157, "step": 10168 }, { "epoch": 0.261111405328981, "grad_norm": 0.86328125, "learning_rate": 0.00018704074117373244, "loss": 1.0375, "step": 10169 }, { "epoch": 0.26113708252490286, "grad_norm": 0.84375, "learning_rate": 0.0001870385432170239, "loss": 1.1432, "step": 10170 }, { "epoch": 0.2611627597208247, "grad_norm": 0.78515625, "learning_rate": 0.0001870363450868554, "loss": 0.8966, "step": 10171 }, { "epoch": 0.2611884369167465, "grad_norm": 0.8203125, "learning_rate": 0.00018703414678323133, "loss": 0.9213, "step": 10172 }, { "epoch": 0.2612141141126683, "grad_norm": 0.8671875, "learning_rate": 0.0001870319483061561, "loss": 1.0508, "step": 10173 }, { "epoch": 0.26123979130859015, "grad_norm": 0.8359375, "learning_rate": 0.000187029749655634, "loss": 1.2016, "step": 10174 }, { "epoch": 0.261265468504512, "grad_norm": 0.8359375, "learning_rate": 0.00018702755083166949, "loss": 1.0527, "step": 10175 }, { "epoch": 0.26129114570043377, "grad_norm": 0.8515625, "learning_rate": 0.00018702535183426697, "loss": 0.9928, "step": 10176 }, { "epoch": 0.2613168228963556, "grad_norm": 0.78125, "learning_rate": 0.00018702315266343073, "loss": 0.9801, "step": 10177 }, { "epoch": 0.26134250009227744, "grad_norm": 0.78515625, "learning_rate": 0.00018702095331916524, "loss": 1.0103, "step": 10178 }, { "epoch": 0.2613681772881992, "grad_norm": 0.78125, "learning_rate": 0.00018701875380147486, "loss": 1.0281, "step": 10179 }, { "epoch": 0.26139385448412106, "grad_norm": 0.75390625, "learning_rate": 0.00018701655411036393, "loss": 0.9062, "step": 10180 }, { "epoch": 0.2614195316800429, "grad_norm": 0.828125, "learning_rate": 0.0001870143542458369, "loss": 0.9636, "step": 10181 }, { "epoch": 0.2614452088759647, "grad_norm": 0.76953125, "learning_rate": 0.0001870121542078981, "loss": 1.1048, "step": 10182 }, { "epoch": 0.2614708860718865, "grad_norm": 0.86328125, "learning_rate": 0.00018700995399655192, "loss": 0.858, "step": 10183 }, { "epoch": 0.26149656326780835, "grad_norm": 0.81640625, "learning_rate": 0.0001870077536118028, "loss": 0.9771, "step": 10184 }, { "epoch": 0.2615222404637302, "grad_norm": 0.75390625, "learning_rate": 0.00018700555305365508, "loss": 0.9751, "step": 10185 }, { "epoch": 0.26154791765965196, "grad_norm": 0.8984375, "learning_rate": 0.00018700335232211315, "loss": 0.9652, "step": 10186 }, { "epoch": 0.2615735948555738, "grad_norm": 0.8046875, "learning_rate": 0.00018700115141718137, "loss": 0.915, "step": 10187 }, { "epoch": 0.26159927205149563, "grad_norm": 0.81640625, "learning_rate": 0.00018699895033886418, "loss": 0.9169, "step": 10188 }, { "epoch": 0.2616249492474174, "grad_norm": 0.7890625, "learning_rate": 0.00018699674908716592, "loss": 1.059, "step": 10189 }, { "epoch": 0.26165062644333925, "grad_norm": 0.8828125, "learning_rate": 0.00018699454766209103, "loss": 1.096, "step": 10190 }, { "epoch": 0.2616763036392611, "grad_norm": 0.80859375, "learning_rate": 0.00018699234606364383, "loss": 1.0521, "step": 10191 }, { "epoch": 0.26170198083518287, "grad_norm": 0.80859375, "learning_rate": 0.0001869901442918288, "loss": 1.0881, "step": 10192 }, { "epoch": 0.2617276580311047, "grad_norm": 0.73828125, "learning_rate": 0.0001869879423466502, "loss": 0.9756, "step": 10193 }, { "epoch": 0.26175333522702654, "grad_norm": 0.796875, "learning_rate": 0.00018698574022811253, "loss": 0.9715, "step": 10194 }, { "epoch": 0.2617790124229484, "grad_norm": 0.76953125, "learning_rate": 0.00018698353793622014, "loss": 0.9934, "step": 10195 }, { "epoch": 0.26180468961887016, "grad_norm": 0.77734375, "learning_rate": 0.00018698133547097738, "loss": 1.1265, "step": 10196 }, { "epoch": 0.261830366814792, "grad_norm": 0.7578125, "learning_rate": 0.0001869791328323887, "loss": 0.9302, "step": 10197 }, { "epoch": 0.26185604401071383, "grad_norm": 0.81640625, "learning_rate": 0.00018697693002045845, "loss": 0.9759, "step": 10198 }, { "epoch": 0.2618817212066356, "grad_norm": 0.796875, "learning_rate": 0.00018697472703519105, "loss": 1.1166, "step": 10199 }, { "epoch": 0.26190739840255745, "grad_norm": 0.82421875, "learning_rate": 0.00018697252387659088, "loss": 0.9506, "step": 10200 }, { "epoch": 0.2619330755984793, "grad_norm": 0.82421875, "learning_rate": 0.00018697032054466233, "loss": 0.9942, "step": 10201 }, { "epoch": 0.26195875279440106, "grad_norm": 0.8125, "learning_rate": 0.00018696811703940977, "loss": 1.0939, "step": 10202 }, { "epoch": 0.2619844299903229, "grad_norm": 0.8203125, "learning_rate": 0.00018696591336083763, "loss": 0.9215, "step": 10203 }, { "epoch": 0.26201010718624473, "grad_norm": 0.79296875, "learning_rate": 0.00018696370950895027, "loss": 0.9768, "step": 10204 }, { "epoch": 0.26203578438216657, "grad_norm": 0.76953125, "learning_rate": 0.00018696150548375208, "loss": 0.9092, "step": 10205 }, { "epoch": 0.26206146157808835, "grad_norm": 0.78125, "learning_rate": 0.00018695930128524748, "loss": 0.7593, "step": 10206 }, { "epoch": 0.2620871387740102, "grad_norm": 0.77734375, "learning_rate": 0.00018695709691344083, "loss": 0.9732, "step": 10207 }, { "epoch": 0.262112815969932, "grad_norm": 0.8125, "learning_rate": 0.00018695489236833653, "loss": 0.8607, "step": 10208 }, { "epoch": 0.2621384931658538, "grad_norm": 0.796875, "learning_rate": 0.00018695268764993902, "loss": 0.8646, "step": 10209 }, { "epoch": 0.26216417036177564, "grad_norm": 0.90625, "learning_rate": 0.00018695048275825263, "loss": 1.0494, "step": 10210 }, { "epoch": 0.2621898475576975, "grad_norm": 0.7734375, "learning_rate": 0.0001869482776932818, "loss": 0.9864, "step": 10211 }, { "epoch": 0.26221552475361926, "grad_norm": 0.7890625, "learning_rate": 0.00018694607245503092, "loss": 1.1509, "step": 10212 }, { "epoch": 0.2622412019495411, "grad_norm": 0.81640625, "learning_rate": 0.00018694386704350434, "loss": 1.0792, "step": 10213 }, { "epoch": 0.26226687914546293, "grad_norm": 0.87109375, "learning_rate": 0.0001869416614587065, "loss": 1.0878, "step": 10214 }, { "epoch": 0.26229255634138476, "grad_norm": 0.74609375, "learning_rate": 0.00018693945570064178, "loss": 1.0246, "step": 10215 }, { "epoch": 0.26231823353730654, "grad_norm": 0.8203125, "learning_rate": 0.00018693724976931458, "loss": 0.9731, "step": 10216 }, { "epoch": 0.2623439107332284, "grad_norm": 0.88671875, "learning_rate": 0.0001869350436647293, "loss": 0.8709, "step": 10217 }, { "epoch": 0.2623695879291502, "grad_norm": 0.73046875, "learning_rate": 0.0001869328373868903, "loss": 0.9736, "step": 10218 }, { "epoch": 0.262395265125072, "grad_norm": 0.84375, "learning_rate": 0.00018693063093580206, "loss": 0.9708, "step": 10219 }, { "epoch": 0.26242094232099383, "grad_norm": 0.76953125, "learning_rate": 0.00018692842431146888, "loss": 0.8672, "step": 10220 }, { "epoch": 0.26244661951691567, "grad_norm": 0.8828125, "learning_rate": 0.00018692621751389523, "loss": 1.0683, "step": 10221 }, { "epoch": 0.26247229671283745, "grad_norm": 0.84765625, "learning_rate": 0.00018692401054308546, "loss": 1.0864, "step": 10222 }, { "epoch": 0.2624979739087593, "grad_norm": 0.8046875, "learning_rate": 0.00018692180339904398, "loss": 1.0739, "step": 10223 }, { "epoch": 0.2625236511046811, "grad_norm": 0.75, "learning_rate": 0.00018691959608177524, "loss": 1.0256, "step": 10224 }, { "epoch": 0.26254932830060296, "grad_norm": 0.76171875, "learning_rate": 0.00018691738859128357, "loss": 1.0748, "step": 10225 }, { "epoch": 0.26257500549652474, "grad_norm": 0.73046875, "learning_rate": 0.00018691518092757341, "loss": 0.9652, "step": 10226 }, { "epoch": 0.2626006826924466, "grad_norm": 0.81640625, "learning_rate": 0.0001869129730906491, "loss": 0.9765, "step": 10227 }, { "epoch": 0.2626263598883684, "grad_norm": 0.828125, "learning_rate": 0.00018691076508051512, "loss": 0.9536, "step": 10228 }, { "epoch": 0.2626520370842902, "grad_norm": 0.796875, "learning_rate": 0.00018690855689717587, "loss": 1.0733, "step": 10229 }, { "epoch": 0.262677714280212, "grad_norm": 0.80078125, "learning_rate": 0.00018690634854063567, "loss": 1.0056, "step": 10230 }, { "epoch": 0.26270339147613386, "grad_norm": 0.78515625, "learning_rate": 0.00018690414001089898, "loss": 1.0134, "step": 10231 }, { "epoch": 0.26272906867205564, "grad_norm": 0.75390625, "learning_rate": 0.00018690193130797017, "loss": 0.9915, "step": 10232 }, { "epoch": 0.2627547458679775, "grad_norm": 0.875, "learning_rate": 0.0001868997224318537, "loss": 1.085, "step": 10233 }, { "epoch": 0.2627804230638993, "grad_norm": 0.8203125, "learning_rate": 0.0001868975133825539, "loss": 0.9727, "step": 10234 }, { "epoch": 0.26280610025982115, "grad_norm": 0.7578125, "learning_rate": 0.0001868953041600752, "loss": 0.9247, "step": 10235 }, { "epoch": 0.26283177745574293, "grad_norm": 1.0, "learning_rate": 0.00018689309476442202, "loss": 0.9167, "step": 10236 }, { "epoch": 0.26285745465166477, "grad_norm": 0.83984375, "learning_rate": 0.00018689088519559878, "loss": 0.9808, "step": 10237 }, { "epoch": 0.2628831318475866, "grad_norm": 0.796875, "learning_rate": 0.00018688867545360984, "loss": 0.938, "step": 10238 }, { "epoch": 0.2629088090435084, "grad_norm": 0.78125, "learning_rate": 0.00018688646553845957, "loss": 0.9928, "step": 10239 }, { "epoch": 0.2629344862394302, "grad_norm": 0.77734375, "learning_rate": 0.0001868842554501525, "loss": 1.19, "step": 10240 }, { "epoch": 0.26296016343535206, "grad_norm": 0.76171875, "learning_rate": 0.00018688204518869293, "loss": 0.9418, "step": 10241 }, { "epoch": 0.26298584063127384, "grad_norm": 0.73828125, "learning_rate": 0.00018687983475408526, "loss": 0.8492, "step": 10242 }, { "epoch": 0.2630115178271957, "grad_norm": 0.84375, "learning_rate": 0.00018687762414633396, "loss": 1.0139, "step": 10243 }, { "epoch": 0.2630371950231175, "grad_norm": 0.734375, "learning_rate": 0.00018687541336544339, "loss": 0.9225, "step": 10244 }, { "epoch": 0.26306287221903935, "grad_norm": 0.76953125, "learning_rate": 0.00018687320241141797, "loss": 1.0887, "step": 10245 }, { "epoch": 0.2630885494149611, "grad_norm": 0.7734375, "learning_rate": 0.0001868709912842621, "loss": 0.8794, "step": 10246 }, { "epoch": 0.26311422661088296, "grad_norm": 0.77734375, "learning_rate": 0.0001868687799839802, "loss": 1.0378, "step": 10247 }, { "epoch": 0.2631399038068048, "grad_norm": 0.75, "learning_rate": 0.00018686656851057664, "loss": 0.9079, "step": 10248 }, { "epoch": 0.2631655810027266, "grad_norm": 0.82421875, "learning_rate": 0.00018686435686405588, "loss": 0.9394, "step": 10249 }, { "epoch": 0.2631912581986484, "grad_norm": 0.8828125, "learning_rate": 0.0001868621450444223, "loss": 1.0738, "step": 10250 }, { "epoch": 0.26321693539457025, "grad_norm": 0.8125, "learning_rate": 0.00018685993305168032, "loss": 0.8439, "step": 10251 }, { "epoch": 0.26324261259049203, "grad_norm": 0.74609375, "learning_rate": 0.00018685772088583433, "loss": 0.9926, "step": 10252 }, { "epoch": 0.26326828978641387, "grad_norm": 0.82421875, "learning_rate": 0.00018685550854688875, "loss": 1.0865, "step": 10253 }, { "epoch": 0.2632939669823357, "grad_norm": 0.7734375, "learning_rate": 0.000186853296034848, "loss": 1.0572, "step": 10254 }, { "epoch": 0.26331964417825754, "grad_norm": 0.76953125, "learning_rate": 0.00018685108334971646, "loss": 1.0213, "step": 10255 }, { "epoch": 0.2633453213741793, "grad_norm": 0.76171875, "learning_rate": 0.00018684887049149858, "loss": 0.9355, "step": 10256 }, { "epoch": 0.26337099857010116, "grad_norm": 0.734375, "learning_rate": 0.0001868466574601987, "loss": 0.8608, "step": 10257 }, { "epoch": 0.263396675766023, "grad_norm": 0.80859375, "learning_rate": 0.00018684444425582131, "loss": 1.0487, "step": 10258 }, { "epoch": 0.2634223529619448, "grad_norm": 0.7734375, "learning_rate": 0.00018684223087837078, "loss": 1.0061, "step": 10259 }, { "epoch": 0.2634480301578666, "grad_norm": 0.76171875, "learning_rate": 0.00018684001732785155, "loss": 1.0844, "step": 10260 }, { "epoch": 0.26347370735378844, "grad_norm": 0.859375, "learning_rate": 0.00018683780360426798, "loss": 1.1088, "step": 10261 }, { "epoch": 0.2634993845497102, "grad_norm": 0.84765625, "learning_rate": 0.00018683558970762452, "loss": 1.0599, "step": 10262 }, { "epoch": 0.26352506174563206, "grad_norm": 0.7734375, "learning_rate": 0.00018683337563792557, "loss": 0.9313, "step": 10263 }, { "epoch": 0.2635507389415539, "grad_norm": 0.828125, "learning_rate": 0.00018683116139517556, "loss": 0.9678, "step": 10264 }, { "epoch": 0.26357641613747573, "grad_norm": 0.80078125, "learning_rate": 0.00018682894697937886, "loss": 0.9422, "step": 10265 }, { "epoch": 0.2636020933333975, "grad_norm": 0.7734375, "learning_rate": 0.00018682673239053994, "loss": 0.9508, "step": 10266 }, { "epoch": 0.26362777052931935, "grad_norm": 0.7578125, "learning_rate": 0.00018682451762866314, "loss": 0.9511, "step": 10267 }, { "epoch": 0.2636534477252412, "grad_norm": 0.77734375, "learning_rate": 0.000186822302693753, "loss": 0.954, "step": 10268 }, { "epoch": 0.26367912492116297, "grad_norm": 0.76953125, "learning_rate": 0.0001868200875858138, "loss": 0.9721, "step": 10269 }, { "epoch": 0.2637048021170848, "grad_norm": 0.86328125, "learning_rate": 0.00018681787230485003, "loss": 1.2228, "step": 10270 }, { "epoch": 0.26373047931300664, "grad_norm": 0.83984375, "learning_rate": 0.00018681565685086605, "loss": 1.1907, "step": 10271 }, { "epoch": 0.2637561565089284, "grad_norm": 0.765625, "learning_rate": 0.00018681344122386634, "loss": 1.0486, "step": 10272 }, { "epoch": 0.26378183370485025, "grad_norm": 0.78515625, "learning_rate": 0.00018681122542385525, "loss": 1.0143, "step": 10273 }, { "epoch": 0.2638075109007721, "grad_norm": 0.875, "learning_rate": 0.00018680900945083727, "loss": 1.0936, "step": 10274 }, { "epoch": 0.2638331880966939, "grad_norm": 0.73828125, "learning_rate": 0.00018680679330481675, "loss": 0.9684, "step": 10275 }, { "epoch": 0.2638588652926157, "grad_norm": 0.73828125, "learning_rate": 0.0001868045769857981, "loss": 1.0206, "step": 10276 }, { "epoch": 0.26388454248853754, "grad_norm": 0.8515625, "learning_rate": 0.00018680236049378583, "loss": 1.1426, "step": 10277 }, { "epoch": 0.2639102196844594, "grad_norm": 0.9140625, "learning_rate": 0.00018680014382878426, "loss": 0.9942, "step": 10278 }, { "epoch": 0.26393589688038116, "grad_norm": 0.76953125, "learning_rate": 0.0001867979269907979, "loss": 0.9493, "step": 10279 }, { "epoch": 0.263961574076303, "grad_norm": 0.84765625, "learning_rate": 0.00018679570997983102, "loss": 0.935, "step": 10280 }, { "epoch": 0.26398725127222483, "grad_norm": 0.95703125, "learning_rate": 0.0001867934927958882, "loss": 0.9184, "step": 10281 }, { "epoch": 0.2640129284681466, "grad_norm": 0.8046875, "learning_rate": 0.00018679127543897377, "loss": 1.0576, "step": 10282 }, { "epoch": 0.26403860566406845, "grad_norm": 0.828125, "learning_rate": 0.00018678905790909216, "loss": 0.9874, "step": 10283 }, { "epoch": 0.2640642828599903, "grad_norm": 0.77734375, "learning_rate": 0.00018678684020624782, "loss": 0.9272, "step": 10284 }, { "epoch": 0.2640899600559121, "grad_norm": 0.77734375, "learning_rate": 0.0001867846223304451, "loss": 0.989, "step": 10285 }, { "epoch": 0.2641156372518339, "grad_norm": 0.75, "learning_rate": 0.0001867824042816885, "loss": 0.9092, "step": 10286 }, { "epoch": 0.26414131444775574, "grad_norm": 0.87109375, "learning_rate": 0.0001867801860599824, "loss": 1.0112, "step": 10287 }, { "epoch": 0.2641669916436776, "grad_norm": 0.87890625, "learning_rate": 0.00018677796766533125, "loss": 0.9677, "step": 10288 }, { "epoch": 0.26419266883959935, "grad_norm": 0.84375, "learning_rate": 0.00018677574909773943, "loss": 1.1486, "step": 10289 }, { "epoch": 0.2642183460355212, "grad_norm": 0.81640625, "learning_rate": 0.00018677353035721138, "loss": 1.0782, "step": 10290 }, { "epoch": 0.264244023231443, "grad_norm": 0.859375, "learning_rate": 0.00018677131144375153, "loss": 1.0181, "step": 10291 }, { "epoch": 0.2642697004273648, "grad_norm": 0.8828125, "learning_rate": 0.00018676909235736426, "loss": 0.9491, "step": 10292 }, { "epoch": 0.26429537762328664, "grad_norm": 0.78125, "learning_rate": 0.00018676687309805408, "loss": 0.9909, "step": 10293 }, { "epoch": 0.2643210548192085, "grad_norm": 0.85546875, "learning_rate": 0.00018676465366582531, "loss": 1.0187, "step": 10294 }, { "epoch": 0.2643467320151303, "grad_norm": 0.8125, "learning_rate": 0.00018676243406068246, "loss": 0.9903, "step": 10295 }, { "epoch": 0.2643724092110521, "grad_norm": 0.8984375, "learning_rate": 0.0001867602142826299, "loss": 1.0373, "step": 10296 }, { "epoch": 0.26439808640697393, "grad_norm": 0.8125, "learning_rate": 0.00018675799433167208, "loss": 0.9514, "step": 10297 }, { "epoch": 0.26442376360289577, "grad_norm": 0.7421875, "learning_rate": 0.0001867557742078134, "loss": 1.0147, "step": 10298 }, { "epoch": 0.26444944079881755, "grad_norm": 0.7890625, "learning_rate": 0.0001867535539110583, "loss": 0.9401, "step": 10299 }, { "epoch": 0.2644751179947394, "grad_norm": 0.85546875, "learning_rate": 0.00018675133344141123, "loss": 0.8865, "step": 10300 }, { "epoch": 0.2645007951906612, "grad_norm": 0.76171875, "learning_rate": 0.00018674911279887657, "loss": 0.952, "step": 10301 }, { "epoch": 0.264526472386583, "grad_norm": 0.84765625, "learning_rate": 0.00018674689198345875, "loss": 0.9484, "step": 10302 }, { "epoch": 0.26455214958250484, "grad_norm": 0.77734375, "learning_rate": 0.0001867446709951622, "loss": 0.9358, "step": 10303 }, { "epoch": 0.2645778267784267, "grad_norm": 0.83984375, "learning_rate": 0.0001867424498339914, "loss": 0.8557, "step": 10304 }, { "epoch": 0.2646035039743485, "grad_norm": 0.8828125, "learning_rate": 0.0001867402284999507, "loss": 1.2047, "step": 10305 }, { "epoch": 0.2646291811702703, "grad_norm": 0.8125, "learning_rate": 0.00018673800699304458, "loss": 1.0, "step": 10306 }, { "epoch": 0.2646548583661921, "grad_norm": 1.015625, "learning_rate": 0.00018673578531327745, "loss": 1.0594, "step": 10307 }, { "epoch": 0.26468053556211396, "grad_norm": 0.8203125, "learning_rate": 0.00018673356346065372, "loss": 1.1531, "step": 10308 }, { "epoch": 0.26470621275803574, "grad_norm": 0.8125, "learning_rate": 0.00018673134143517783, "loss": 0.9904, "step": 10309 }, { "epoch": 0.2647318899539576, "grad_norm": 0.7734375, "learning_rate": 0.00018672911923685423, "loss": 1.0375, "step": 10310 }, { "epoch": 0.2647575671498794, "grad_norm": 0.8046875, "learning_rate": 0.0001867268968656873, "loss": 0.9171, "step": 10311 }, { "epoch": 0.2647832443458012, "grad_norm": 0.73828125, "learning_rate": 0.0001867246743216815, "loss": 0.9854, "step": 10312 }, { "epoch": 0.26480892154172303, "grad_norm": 0.7734375, "learning_rate": 0.00018672245160484128, "loss": 0.9527, "step": 10313 }, { "epoch": 0.26483459873764487, "grad_norm": 0.84765625, "learning_rate": 0.00018672022871517104, "loss": 0.9315, "step": 10314 }, { "epoch": 0.2648602759335667, "grad_norm": 0.8125, "learning_rate": 0.0001867180056526752, "loss": 1.0041, "step": 10315 }, { "epoch": 0.2648859531294885, "grad_norm": 0.78515625, "learning_rate": 0.00018671578241735826, "loss": 1.0672, "step": 10316 }, { "epoch": 0.2649116303254103, "grad_norm": 0.76953125, "learning_rate": 0.00018671355900922457, "loss": 0.9666, "step": 10317 }, { "epoch": 0.26493730752133215, "grad_norm": 0.8046875, "learning_rate": 0.00018671133542827857, "loss": 0.8516, "step": 10318 }, { "epoch": 0.26496298471725394, "grad_norm": 0.8203125, "learning_rate": 0.0001867091116745247, "loss": 1.02, "step": 10319 }, { "epoch": 0.26498866191317577, "grad_norm": 0.7734375, "learning_rate": 0.00018670688774796745, "loss": 0.8623, "step": 10320 }, { "epoch": 0.2650143391090976, "grad_norm": 0.8203125, "learning_rate": 0.00018670466364861116, "loss": 0.9492, "step": 10321 }, { "epoch": 0.2650400163050194, "grad_norm": 0.81640625, "learning_rate": 0.00018670243937646033, "loss": 0.9733, "step": 10322 }, { "epoch": 0.2650656935009412, "grad_norm": 0.984375, "learning_rate": 0.0001867002149315194, "loss": 1.0687, "step": 10323 }, { "epoch": 0.26509137069686306, "grad_norm": 0.734375, "learning_rate": 0.00018669799031379274, "loss": 0.8807, "step": 10324 }, { "epoch": 0.2651170478927849, "grad_norm": 0.8046875, "learning_rate": 0.00018669576552328483, "loss": 0.9453, "step": 10325 }, { "epoch": 0.2651427250887067, "grad_norm": 0.84765625, "learning_rate": 0.00018669354056000006, "loss": 0.877, "step": 10326 }, { "epoch": 0.2651684022846285, "grad_norm": 0.828125, "learning_rate": 0.0001866913154239429, "loss": 0.871, "step": 10327 }, { "epoch": 0.26519407948055035, "grad_norm": 0.8359375, "learning_rate": 0.00018668909011511782, "loss": 1.0253, "step": 10328 }, { "epoch": 0.26521975667647213, "grad_norm": 0.9140625, "learning_rate": 0.00018668686463352917, "loss": 0.9879, "step": 10329 }, { "epoch": 0.26524543387239397, "grad_norm": 0.7734375, "learning_rate": 0.00018668463897918147, "loss": 1.0328, "step": 10330 }, { "epoch": 0.2652711110683158, "grad_norm": 0.81640625, "learning_rate": 0.0001866824131520791, "loss": 1.0939, "step": 10331 }, { "epoch": 0.2652967882642376, "grad_norm": 0.76171875, "learning_rate": 0.0001866801871522265, "loss": 0.9969, "step": 10332 }, { "epoch": 0.2653224654601594, "grad_norm": 0.70703125, "learning_rate": 0.0001866779609796281, "loss": 0.8569, "step": 10333 }, { "epoch": 0.26534814265608125, "grad_norm": 0.78515625, "learning_rate": 0.00018667573463428838, "loss": 0.9193, "step": 10334 }, { "epoch": 0.26537381985200303, "grad_norm": 0.80859375, "learning_rate": 0.00018667350811621172, "loss": 0.916, "step": 10335 }, { "epoch": 0.26539949704792487, "grad_norm": 0.76171875, "learning_rate": 0.0001866712814254026, "loss": 0.9452, "step": 10336 }, { "epoch": 0.2654251742438467, "grad_norm": 0.7421875, "learning_rate": 0.0001866690545618655, "loss": 0.999, "step": 10337 }, { "epoch": 0.26545085143976854, "grad_norm": 0.73828125, "learning_rate": 0.00018666682752560475, "loss": 0.8813, "step": 10338 }, { "epoch": 0.2654765286356903, "grad_norm": 0.79296875, "learning_rate": 0.00018666460031662485, "loss": 1.1299, "step": 10339 }, { "epoch": 0.26550220583161216, "grad_norm": 0.703125, "learning_rate": 0.00018666237293493023, "loss": 0.9443, "step": 10340 }, { "epoch": 0.265527883027534, "grad_norm": 0.828125, "learning_rate": 0.00018666014538052533, "loss": 1.0757, "step": 10341 }, { "epoch": 0.2655535602234558, "grad_norm": 0.828125, "learning_rate": 0.00018665791765341459, "loss": 1.0687, "step": 10342 }, { "epoch": 0.2655792374193776, "grad_norm": 0.828125, "learning_rate": 0.00018665568975360244, "loss": 1.1108, "step": 10343 }, { "epoch": 0.26560491461529945, "grad_norm": 1.140625, "learning_rate": 0.00018665346168109332, "loss": 0.8763, "step": 10344 }, { "epoch": 0.26563059181122123, "grad_norm": 0.7734375, "learning_rate": 0.0001866512334358917, "loss": 1.1192, "step": 10345 }, { "epoch": 0.26565626900714306, "grad_norm": 0.77734375, "learning_rate": 0.00018664900501800199, "loss": 1.0315, "step": 10346 }, { "epoch": 0.2656819462030649, "grad_norm": 0.8515625, "learning_rate": 0.00018664677642742865, "loss": 0.9702, "step": 10347 }, { "epoch": 0.26570762339898674, "grad_norm": 0.75390625, "learning_rate": 0.0001866445476641761, "loss": 0.968, "step": 10348 }, { "epoch": 0.2657333005949085, "grad_norm": 0.82421875, "learning_rate": 0.0001866423187282488, "loss": 0.8975, "step": 10349 }, { "epoch": 0.26575897779083035, "grad_norm": 0.796875, "learning_rate": 0.00018664008961965118, "loss": 0.8028, "step": 10350 }, { "epoch": 0.2657846549867522, "grad_norm": 0.71875, "learning_rate": 0.00018663786033838769, "loss": 0.9954, "step": 10351 }, { "epoch": 0.26581033218267397, "grad_norm": 0.81640625, "learning_rate": 0.00018663563088446274, "loss": 0.9059, "step": 10352 }, { "epoch": 0.2658360093785958, "grad_norm": 0.875, "learning_rate": 0.00018663340125788086, "loss": 0.9697, "step": 10353 }, { "epoch": 0.26586168657451764, "grad_norm": 0.7890625, "learning_rate": 0.0001866311714586464, "loss": 1.2038, "step": 10354 }, { "epoch": 0.2658873637704394, "grad_norm": 0.796875, "learning_rate": 0.00018662894148676384, "loss": 0.9021, "step": 10355 }, { "epoch": 0.26591304096636126, "grad_norm": 0.8515625, "learning_rate": 0.00018662671134223765, "loss": 1.3137, "step": 10356 }, { "epoch": 0.2659387181622831, "grad_norm": 0.75, "learning_rate": 0.0001866244810250722, "loss": 1.0229, "step": 10357 }, { "epoch": 0.26596439535820493, "grad_norm": 0.7578125, "learning_rate": 0.00018662225053527203, "loss": 1.1868, "step": 10358 }, { "epoch": 0.2659900725541267, "grad_norm": 0.84765625, "learning_rate": 0.00018662001987284152, "loss": 0.9539, "step": 10359 }, { "epoch": 0.26601574975004855, "grad_norm": 0.7890625, "learning_rate": 0.00018661778903778515, "loss": 1.1288, "step": 10360 }, { "epoch": 0.2660414269459704, "grad_norm": 0.80078125, "learning_rate": 0.00018661555803010734, "loss": 1.0637, "step": 10361 }, { "epoch": 0.26606710414189216, "grad_norm": 0.8203125, "learning_rate": 0.00018661332684981257, "loss": 1.0097, "step": 10362 }, { "epoch": 0.266092781337814, "grad_norm": 0.859375, "learning_rate": 0.00018661109549690524, "loss": 0.8789, "step": 10363 }, { "epoch": 0.26611845853373584, "grad_norm": 0.76953125, "learning_rate": 0.0001866088639713898, "loss": 0.8834, "step": 10364 }, { "epoch": 0.2661441357296576, "grad_norm": 0.83984375, "learning_rate": 0.00018660663227327074, "loss": 0.9465, "step": 10365 }, { "epoch": 0.26616981292557945, "grad_norm": 0.7265625, "learning_rate": 0.0001866044004025525, "loss": 0.9468, "step": 10366 }, { "epoch": 0.2661954901215013, "grad_norm": 0.77734375, "learning_rate": 0.00018660216835923946, "loss": 0.987, "step": 10367 }, { "epoch": 0.2662211673174231, "grad_norm": 0.765625, "learning_rate": 0.00018659993614333618, "loss": 0.8926, "step": 10368 }, { "epoch": 0.2662468445133449, "grad_norm": 0.8203125, "learning_rate": 0.00018659770375484703, "loss": 1.019, "step": 10369 }, { "epoch": 0.26627252170926674, "grad_norm": 0.75, "learning_rate": 0.00018659547119377647, "loss": 0.9418, "step": 10370 }, { "epoch": 0.2662981989051886, "grad_norm": 0.70703125, "learning_rate": 0.00018659323846012898, "loss": 1.0161, "step": 10371 }, { "epoch": 0.26632387610111036, "grad_norm": 0.84375, "learning_rate": 0.00018659100555390895, "loss": 0.9742, "step": 10372 }, { "epoch": 0.2663495532970322, "grad_norm": 0.8515625, "learning_rate": 0.0001865887724751209, "loss": 1.0545, "step": 10373 }, { "epoch": 0.26637523049295403, "grad_norm": 0.79296875, "learning_rate": 0.00018658653922376927, "loss": 1.0493, "step": 10374 }, { "epoch": 0.2664009076888758, "grad_norm": 0.8046875, "learning_rate": 0.00018658430579985842, "loss": 0.8235, "step": 10375 }, { "epoch": 0.26642658488479765, "grad_norm": 0.74609375, "learning_rate": 0.0001865820722033929, "loss": 1.0592, "step": 10376 }, { "epoch": 0.2664522620807195, "grad_norm": 0.76953125, "learning_rate": 0.00018657983843437713, "loss": 0.9991, "step": 10377 }, { "epoch": 0.2664779392766413, "grad_norm": 0.8203125, "learning_rate": 0.00018657760449281558, "loss": 0.9085, "step": 10378 }, { "epoch": 0.2665036164725631, "grad_norm": 0.828125, "learning_rate": 0.0001865753703787127, "loss": 0.8819, "step": 10379 }, { "epoch": 0.26652929366848493, "grad_norm": 0.734375, "learning_rate": 0.00018657313609207288, "loss": 0.9924, "step": 10380 }, { "epoch": 0.26655497086440677, "grad_norm": 0.84375, "learning_rate": 0.00018657090163290066, "loss": 1.063, "step": 10381 }, { "epoch": 0.26658064806032855, "grad_norm": 0.79296875, "learning_rate": 0.00018656866700120044, "loss": 1.1016, "step": 10382 }, { "epoch": 0.2666063252562504, "grad_norm": 0.80078125, "learning_rate": 0.00018656643219697668, "loss": 1.041, "step": 10383 }, { "epoch": 0.2666320024521722, "grad_norm": 0.8125, "learning_rate": 0.00018656419722023384, "loss": 0.94, "step": 10384 }, { "epoch": 0.266657679648094, "grad_norm": 0.81640625, "learning_rate": 0.00018656196207097637, "loss": 0.9422, "step": 10385 }, { "epoch": 0.26668335684401584, "grad_norm": 0.87109375, "learning_rate": 0.00018655972674920873, "loss": 1.0046, "step": 10386 }, { "epoch": 0.2667090340399377, "grad_norm": 0.796875, "learning_rate": 0.00018655749125493537, "loss": 0.9362, "step": 10387 }, { "epoch": 0.2667347112358595, "grad_norm": 0.76953125, "learning_rate": 0.00018655525558816075, "loss": 0.8668, "step": 10388 }, { "epoch": 0.2667603884317813, "grad_norm": 0.76171875, "learning_rate": 0.00018655301974888935, "loss": 0.9387, "step": 10389 }, { "epoch": 0.26678606562770313, "grad_norm": 0.8515625, "learning_rate": 0.00018655078373712558, "loss": 0.9882, "step": 10390 }, { "epoch": 0.26681174282362496, "grad_norm": 0.91015625, "learning_rate": 0.00018654854755287392, "loss": 0.9565, "step": 10391 }, { "epoch": 0.26683742001954674, "grad_norm": 0.8671875, "learning_rate": 0.00018654631119613882, "loss": 1.0274, "step": 10392 }, { "epoch": 0.2668630972154686, "grad_norm": 0.79296875, "learning_rate": 0.00018654407466692474, "loss": 0.8801, "step": 10393 }, { "epoch": 0.2668887744113904, "grad_norm": 0.73046875, "learning_rate": 0.00018654183796523613, "loss": 0.9118, "step": 10394 }, { "epoch": 0.2669144516073122, "grad_norm": 0.83203125, "learning_rate": 0.00018653960109107746, "loss": 1.1671, "step": 10395 }, { "epoch": 0.26694012880323403, "grad_norm": 0.796875, "learning_rate": 0.00018653736404445316, "loss": 1.0132, "step": 10396 }, { "epoch": 0.26696580599915587, "grad_norm": 0.80859375, "learning_rate": 0.00018653512682536777, "loss": 1.0172, "step": 10397 }, { "epoch": 0.2669914831950777, "grad_norm": 0.84765625, "learning_rate": 0.00018653288943382563, "loss": 1.0819, "step": 10398 }, { "epoch": 0.2670171603909995, "grad_norm": 0.7890625, "learning_rate": 0.00018653065186983127, "loss": 0.9197, "step": 10399 }, { "epoch": 0.2670428375869213, "grad_norm": 0.8984375, "learning_rate": 0.00018652841413338916, "loss": 0.9104, "step": 10400 }, { "epoch": 0.26706851478284316, "grad_norm": 0.78515625, "learning_rate": 0.00018652617622450372, "loss": 0.8631, "step": 10401 }, { "epoch": 0.26709419197876494, "grad_norm": 0.86328125, "learning_rate": 0.00018652393814317942, "loss": 1.0322, "step": 10402 }, { "epoch": 0.2671198691746868, "grad_norm": 0.79296875, "learning_rate": 0.00018652169988942074, "loss": 1.0739, "step": 10403 }, { "epoch": 0.2671455463706086, "grad_norm": 0.80078125, "learning_rate": 0.00018651946146323214, "loss": 1.0206, "step": 10404 }, { "epoch": 0.2671712235665304, "grad_norm": 0.81640625, "learning_rate": 0.00018651722286461804, "loss": 0.833, "step": 10405 }, { "epoch": 0.2671969007624522, "grad_norm": 0.83203125, "learning_rate": 0.00018651498409358295, "loss": 1.1824, "step": 10406 }, { "epoch": 0.26722257795837406, "grad_norm": 0.76171875, "learning_rate": 0.0001865127451501313, "loss": 1.0499, "step": 10407 }, { "epoch": 0.2672482551542959, "grad_norm": 0.80859375, "learning_rate": 0.00018651050603426755, "loss": 1.0207, "step": 10408 }, { "epoch": 0.2672739323502177, "grad_norm": 0.765625, "learning_rate": 0.00018650826674599622, "loss": 0.8732, "step": 10409 }, { "epoch": 0.2672996095461395, "grad_norm": 0.78515625, "learning_rate": 0.0001865060272853217, "loss": 1.0025, "step": 10410 }, { "epoch": 0.26732528674206135, "grad_norm": 0.7421875, "learning_rate": 0.00018650378765224846, "loss": 0.9546, "step": 10411 }, { "epoch": 0.26735096393798313, "grad_norm": 0.76953125, "learning_rate": 0.000186501547846781, "loss": 0.9854, "step": 10412 }, { "epoch": 0.26737664113390497, "grad_norm": 0.8046875, "learning_rate": 0.0001864993078689238, "loss": 1.1292, "step": 10413 }, { "epoch": 0.2674023183298268, "grad_norm": 0.9375, "learning_rate": 0.00018649706771868126, "loss": 0.949, "step": 10414 }, { "epoch": 0.2674279955257486, "grad_norm": 0.86328125, "learning_rate": 0.0001864948273960579, "loss": 0.9129, "step": 10415 }, { "epoch": 0.2674536727216704, "grad_norm": 0.859375, "learning_rate": 0.00018649258690105815, "loss": 0.9851, "step": 10416 }, { "epoch": 0.26747934991759226, "grad_norm": 0.77734375, "learning_rate": 0.00018649034623368648, "loss": 1.0269, "step": 10417 }, { "epoch": 0.2675050271135141, "grad_norm": 0.78515625, "learning_rate": 0.00018648810539394737, "loss": 0.9897, "step": 10418 }, { "epoch": 0.2675307043094359, "grad_norm": 0.81640625, "learning_rate": 0.00018648586438184528, "loss": 1.0244, "step": 10419 }, { "epoch": 0.2675563815053577, "grad_norm": 0.80078125, "learning_rate": 0.00018648362319738468, "loss": 0.8935, "step": 10420 }, { "epoch": 0.26758205870127955, "grad_norm": 0.81640625, "learning_rate": 0.00018648138184057, "loss": 1.0598, "step": 10421 }, { "epoch": 0.2676077358972013, "grad_norm": 0.7421875, "learning_rate": 0.00018647914031140575, "loss": 0.8743, "step": 10422 }, { "epoch": 0.26763341309312316, "grad_norm": 0.8359375, "learning_rate": 0.0001864768986098964, "loss": 0.9193, "step": 10423 }, { "epoch": 0.267659090289045, "grad_norm": 0.78125, "learning_rate": 0.00018647465673604644, "loss": 0.9894, "step": 10424 }, { "epoch": 0.2676847674849668, "grad_norm": 0.8671875, "learning_rate": 0.00018647241468986025, "loss": 1.0675, "step": 10425 }, { "epoch": 0.2677104446808886, "grad_norm": 0.7734375, "learning_rate": 0.00018647017247134235, "loss": 0.9448, "step": 10426 }, { "epoch": 0.26773612187681045, "grad_norm": 0.78515625, "learning_rate": 0.0001864679300804972, "loss": 0.8631, "step": 10427 }, { "epoch": 0.2677617990727323, "grad_norm": 0.859375, "learning_rate": 0.0001864656875173293, "loss": 0.9418, "step": 10428 }, { "epoch": 0.26778747626865407, "grad_norm": 0.83984375, "learning_rate": 0.0001864634447818431, "loss": 1.1961, "step": 10429 }, { "epoch": 0.2678131534645759, "grad_norm": 0.8203125, "learning_rate": 0.00018646120187404302, "loss": 0.9346, "step": 10430 }, { "epoch": 0.26783883066049774, "grad_norm": 0.82421875, "learning_rate": 0.00018645895879393365, "loss": 1.0646, "step": 10431 }, { "epoch": 0.2678645078564195, "grad_norm": 0.81640625, "learning_rate": 0.00018645671554151934, "loss": 0.9869, "step": 10432 }, { "epoch": 0.26789018505234136, "grad_norm": 0.796875, "learning_rate": 0.00018645447211680457, "loss": 0.8882, "step": 10433 }, { "epoch": 0.2679158622482632, "grad_norm": 0.765625, "learning_rate": 0.0001864522285197939, "loss": 0.9501, "step": 10434 }, { "epoch": 0.267941539444185, "grad_norm": 1.0, "learning_rate": 0.00018644998475049173, "loss": 1.1064, "step": 10435 }, { "epoch": 0.2679672166401068, "grad_norm": 0.859375, "learning_rate": 0.00018644774080890254, "loss": 1.0861, "step": 10436 }, { "epoch": 0.26799289383602865, "grad_norm": 0.8515625, "learning_rate": 0.00018644549669503082, "loss": 0.9737, "step": 10437 }, { "epoch": 0.2680185710319505, "grad_norm": 0.828125, "learning_rate": 0.00018644325240888104, "loss": 1.0487, "step": 10438 }, { "epoch": 0.26804424822787226, "grad_norm": 0.73828125, "learning_rate": 0.00018644100795045767, "loss": 0.8533, "step": 10439 }, { "epoch": 0.2680699254237941, "grad_norm": 0.984375, "learning_rate": 0.00018643876331976515, "loss": 0.9641, "step": 10440 }, { "epoch": 0.26809560261971593, "grad_norm": 0.765625, "learning_rate": 0.00018643651851680802, "loss": 1.0191, "step": 10441 }, { "epoch": 0.2681212798156377, "grad_norm": 0.78125, "learning_rate": 0.0001864342735415907, "loss": 1.0792, "step": 10442 }, { "epoch": 0.26814695701155955, "grad_norm": 0.76171875, "learning_rate": 0.00018643202839411767, "loss": 0.973, "step": 10443 }, { "epoch": 0.2681726342074814, "grad_norm": 0.75390625, "learning_rate": 0.00018642978307439341, "loss": 0.959, "step": 10444 }, { "epoch": 0.26819831140340317, "grad_norm": 0.8359375, "learning_rate": 0.00018642753758242245, "loss": 1.0231, "step": 10445 }, { "epoch": 0.268223988599325, "grad_norm": 0.7109375, "learning_rate": 0.00018642529191820913, "loss": 0.9892, "step": 10446 }, { "epoch": 0.26824966579524684, "grad_norm": 0.8125, "learning_rate": 0.00018642304608175808, "loss": 0.8974, "step": 10447 }, { "epoch": 0.2682753429911687, "grad_norm": 0.77734375, "learning_rate": 0.0001864208000730737, "loss": 1.1228, "step": 10448 }, { "epoch": 0.26830102018709046, "grad_norm": 0.8359375, "learning_rate": 0.00018641855389216045, "loss": 1.0225, "step": 10449 }, { "epoch": 0.2683266973830123, "grad_norm": 0.7890625, "learning_rate": 0.00018641630753902285, "loss": 1.0646, "step": 10450 }, { "epoch": 0.2683523745789341, "grad_norm": 0.859375, "learning_rate": 0.00018641406101366532, "loss": 0.9531, "step": 10451 }, { "epoch": 0.2683780517748559, "grad_norm": 0.82421875, "learning_rate": 0.0001864118143160924, "loss": 1.0988, "step": 10452 }, { "epoch": 0.26840372897077774, "grad_norm": 0.80078125, "learning_rate": 0.00018640956744630854, "loss": 0.9388, "step": 10453 }, { "epoch": 0.2684294061666996, "grad_norm": 0.76953125, "learning_rate": 0.00018640732040431818, "loss": 0.9322, "step": 10454 }, { "epoch": 0.26845508336262136, "grad_norm": 0.84765625, "learning_rate": 0.0001864050731901259, "loss": 1.1541, "step": 10455 }, { "epoch": 0.2684807605585432, "grad_norm": 0.796875, "learning_rate": 0.00018640282580373606, "loss": 0.9139, "step": 10456 }, { "epoch": 0.26850643775446503, "grad_norm": 1.140625, "learning_rate": 0.0001864005782451532, "loss": 1.0662, "step": 10457 }, { "epoch": 0.26853211495038687, "grad_norm": 0.80078125, "learning_rate": 0.00018639833051438181, "loss": 0.9835, "step": 10458 }, { "epoch": 0.26855779214630865, "grad_norm": 0.8828125, "learning_rate": 0.00018639608261142635, "loss": 0.9334, "step": 10459 }, { "epoch": 0.2685834693422305, "grad_norm": 0.734375, "learning_rate": 0.00018639383453629128, "loss": 1.105, "step": 10460 }, { "epoch": 0.2686091465381523, "grad_norm": 0.77734375, "learning_rate": 0.00018639158628898112, "loss": 0.9117, "step": 10461 }, { "epoch": 0.2686348237340741, "grad_norm": 0.75, "learning_rate": 0.00018638933786950036, "loss": 0.8424, "step": 10462 }, { "epoch": 0.26866050092999594, "grad_norm": 0.859375, "learning_rate": 0.0001863870892778534, "loss": 1.0764, "step": 10463 }, { "epoch": 0.2686861781259178, "grad_norm": 1.0703125, "learning_rate": 0.00018638484051404478, "loss": 0.9123, "step": 10464 }, { "epoch": 0.26871185532183955, "grad_norm": 0.859375, "learning_rate": 0.00018638259157807902, "loss": 0.986, "step": 10465 }, { "epoch": 0.2687375325177614, "grad_norm": 0.8203125, "learning_rate": 0.00018638034246996053, "loss": 0.9494, "step": 10466 }, { "epoch": 0.2687632097136832, "grad_norm": 0.8125, "learning_rate": 0.00018637809318969382, "loss": 1.0561, "step": 10467 }, { "epoch": 0.26878888690960506, "grad_norm": 0.984375, "learning_rate": 0.00018637584373728337, "loss": 1.0513, "step": 10468 }, { "epoch": 0.26881456410552684, "grad_norm": 0.88671875, "learning_rate": 0.00018637359411273369, "loss": 1.2148, "step": 10469 }, { "epoch": 0.2688402413014487, "grad_norm": 0.8203125, "learning_rate": 0.00018637134431604918, "loss": 0.9322, "step": 10470 }, { "epoch": 0.2688659184973705, "grad_norm": 0.83984375, "learning_rate": 0.00018636909434723443, "loss": 1.0677, "step": 10471 }, { "epoch": 0.2688915956932923, "grad_norm": 0.8515625, "learning_rate": 0.00018636684420629388, "loss": 1.0475, "step": 10472 }, { "epoch": 0.26891727288921413, "grad_norm": 0.78515625, "learning_rate": 0.000186364593893232, "loss": 0.8947, "step": 10473 }, { "epoch": 0.26894295008513597, "grad_norm": 0.76171875, "learning_rate": 0.0001863623434080533, "loss": 0.9005, "step": 10474 }, { "epoch": 0.26896862728105775, "grad_norm": 0.828125, "learning_rate": 0.00018636009275076223, "loss": 0.962, "step": 10475 }, { "epoch": 0.2689943044769796, "grad_norm": 0.87109375, "learning_rate": 0.00018635784192136328, "loss": 1.0, "step": 10476 }, { "epoch": 0.2690199816729014, "grad_norm": 0.8671875, "learning_rate": 0.00018635559091986098, "loss": 1.0113, "step": 10477 }, { "epoch": 0.26904565886882326, "grad_norm": 0.7578125, "learning_rate": 0.0001863533397462598, "loss": 1.0137, "step": 10478 }, { "epoch": 0.26907133606474504, "grad_norm": 0.76953125, "learning_rate": 0.00018635108840056418, "loss": 1.0323, "step": 10479 }, { "epoch": 0.2690970132606669, "grad_norm": 0.8828125, "learning_rate": 0.00018634883688277865, "loss": 1.0002, "step": 10480 }, { "epoch": 0.2691226904565887, "grad_norm": 0.7578125, "learning_rate": 0.0001863465851929077, "loss": 0.9538, "step": 10481 }, { "epoch": 0.2691483676525105, "grad_norm": 0.765625, "learning_rate": 0.0001863443333309558, "loss": 0.9575, "step": 10482 }, { "epoch": 0.2691740448484323, "grad_norm": 0.8671875, "learning_rate": 0.00018634208129692743, "loss": 1.0415, "step": 10483 }, { "epoch": 0.26919972204435416, "grad_norm": 0.8671875, "learning_rate": 0.00018633982909082713, "loss": 1.0429, "step": 10484 }, { "epoch": 0.26922539924027594, "grad_norm": 0.859375, "learning_rate": 0.0001863375767126593, "loss": 1.0381, "step": 10485 }, { "epoch": 0.2692510764361978, "grad_norm": 0.8125, "learning_rate": 0.00018633532416242852, "loss": 1.0513, "step": 10486 }, { "epoch": 0.2692767536321196, "grad_norm": 0.8359375, "learning_rate": 0.00018633307144013924, "loss": 1.042, "step": 10487 }, { "epoch": 0.26930243082804145, "grad_norm": 0.8125, "learning_rate": 0.0001863308185457959, "loss": 1.001, "step": 10488 }, { "epoch": 0.26932810802396323, "grad_norm": 0.87109375, "learning_rate": 0.00018632856547940306, "loss": 0.9728, "step": 10489 }, { "epoch": 0.26935378521988507, "grad_norm": 0.796875, "learning_rate": 0.00018632631224096518, "loss": 0.9356, "step": 10490 }, { "epoch": 0.2693794624158069, "grad_norm": 0.80078125, "learning_rate": 0.0001863240588304868, "loss": 0.985, "step": 10491 }, { "epoch": 0.2694051396117287, "grad_norm": 0.765625, "learning_rate": 0.0001863218052479723, "loss": 1.0473, "step": 10492 }, { "epoch": 0.2694308168076505, "grad_norm": 0.87109375, "learning_rate": 0.00018631955149342631, "loss": 1.1029, "step": 10493 }, { "epoch": 0.26945649400357236, "grad_norm": 0.80859375, "learning_rate": 0.00018631729756685318, "loss": 1.0651, "step": 10494 }, { "epoch": 0.26948217119949414, "grad_norm": 0.7734375, "learning_rate": 0.00018631504346825755, "loss": 1.1154, "step": 10495 }, { "epoch": 0.26950784839541597, "grad_norm": 0.81640625, "learning_rate": 0.00018631278919764376, "loss": 1.0002, "step": 10496 }, { "epoch": 0.2695335255913378, "grad_norm": 0.80078125, "learning_rate": 0.00018631053475501644, "loss": 1.0674, "step": 10497 }, { "epoch": 0.26955920278725964, "grad_norm": 0.76953125, "learning_rate": 0.00018630828014038, "loss": 1.0004, "step": 10498 }, { "epoch": 0.2695848799831814, "grad_norm": 0.96484375, "learning_rate": 0.00018630602535373893, "loss": 1.0548, "step": 10499 }, { "epoch": 0.26961055717910326, "grad_norm": 0.79296875, "learning_rate": 0.00018630377039509775, "loss": 1.0529, "step": 10500 }, { "epoch": 0.2696362343750251, "grad_norm": 0.765625, "learning_rate": 0.00018630151526446097, "loss": 0.9076, "step": 10501 }, { "epoch": 0.2696619115709469, "grad_norm": 0.8125, "learning_rate": 0.00018629925996183308, "loss": 1.1129, "step": 10502 }, { "epoch": 0.2696875887668687, "grad_norm": 0.7421875, "learning_rate": 0.00018629700448721855, "loss": 1.0236, "step": 10503 }, { "epoch": 0.26971326596279055, "grad_norm": 0.83984375, "learning_rate": 0.00018629474884062188, "loss": 0.9502, "step": 10504 }, { "epoch": 0.26973894315871233, "grad_norm": 0.7734375, "learning_rate": 0.00018629249302204754, "loss": 0.9683, "step": 10505 }, { "epoch": 0.26976462035463417, "grad_norm": 0.84765625, "learning_rate": 0.0001862902370315001, "loss": 0.8711, "step": 10506 }, { "epoch": 0.269790297550556, "grad_norm": 0.875, "learning_rate": 0.000186287980868984, "loss": 1.0186, "step": 10507 }, { "epoch": 0.26981597474647784, "grad_norm": 0.8984375, "learning_rate": 0.00018628572453450372, "loss": 1.1041, "step": 10508 }, { "epoch": 0.2698416519423996, "grad_norm": 0.7890625, "learning_rate": 0.00018628346802806382, "loss": 0.9372, "step": 10509 }, { "epoch": 0.26986732913832145, "grad_norm": 0.91796875, "learning_rate": 0.00018628121134966875, "loss": 0.9755, "step": 10510 }, { "epoch": 0.2698930063342433, "grad_norm": 0.875, "learning_rate": 0.00018627895449932304, "loss": 1.0136, "step": 10511 }, { "epoch": 0.26991868353016507, "grad_norm": 0.8515625, "learning_rate": 0.00018627669747703113, "loss": 0.9035, "step": 10512 }, { "epoch": 0.2699443607260869, "grad_norm": 0.78125, "learning_rate": 0.00018627444028279758, "loss": 0.9336, "step": 10513 }, { "epoch": 0.26997003792200874, "grad_norm": 0.90234375, "learning_rate": 0.00018627218291662688, "loss": 1.0172, "step": 10514 }, { "epoch": 0.2699957151179305, "grad_norm": 0.86328125, "learning_rate": 0.00018626992537852348, "loss": 1.076, "step": 10515 }, { "epoch": 0.27002139231385236, "grad_norm": 0.890625, "learning_rate": 0.00018626766766849192, "loss": 1.0563, "step": 10516 }, { "epoch": 0.2700470695097742, "grad_norm": 0.80078125, "learning_rate": 0.00018626540978653667, "loss": 0.9583, "step": 10517 }, { "epoch": 0.27007274670569603, "grad_norm": 0.78125, "learning_rate": 0.00018626315173266228, "loss": 1.0342, "step": 10518 }, { "epoch": 0.2700984239016178, "grad_norm": 1.046875, "learning_rate": 0.00018626089350687322, "loss": 0.9605, "step": 10519 }, { "epoch": 0.27012410109753965, "grad_norm": 0.80078125, "learning_rate": 0.00018625863510917398, "loss": 1.1222, "step": 10520 }, { "epoch": 0.2701497782934615, "grad_norm": 0.7734375, "learning_rate": 0.00018625637653956907, "loss": 1.0528, "step": 10521 }, { "epoch": 0.27017545548938326, "grad_norm": 0.73828125, "learning_rate": 0.000186254117798063, "loss": 0.9442, "step": 10522 }, { "epoch": 0.2702011326853051, "grad_norm": 0.78125, "learning_rate": 0.00018625185888466027, "loss": 0.9203, "step": 10523 }, { "epoch": 0.27022680988122694, "grad_norm": 0.86328125, "learning_rate": 0.00018624959979936538, "loss": 0.985, "step": 10524 }, { "epoch": 0.2702524870771487, "grad_norm": 0.84765625, "learning_rate": 0.0001862473405421828, "loss": 0.9889, "step": 10525 }, { "epoch": 0.27027816427307055, "grad_norm": 0.875, "learning_rate": 0.0001862450811131171, "loss": 1.1263, "step": 10526 }, { "epoch": 0.2703038414689924, "grad_norm": 0.7265625, "learning_rate": 0.00018624282151217272, "loss": 1.0249, "step": 10527 }, { "epoch": 0.2703295186649142, "grad_norm": 0.7578125, "learning_rate": 0.0001862405617393542, "loss": 1.0828, "step": 10528 }, { "epoch": 0.270355195860836, "grad_norm": 0.890625, "learning_rate": 0.00018623830179466603, "loss": 1.0246, "step": 10529 }, { "epoch": 0.27038087305675784, "grad_norm": 0.83203125, "learning_rate": 0.00018623604167811267, "loss": 0.936, "step": 10530 }, { "epoch": 0.2704065502526797, "grad_norm": 0.8203125, "learning_rate": 0.00018623378138969874, "loss": 0.8799, "step": 10531 }, { "epoch": 0.27043222744860146, "grad_norm": 0.83984375, "learning_rate": 0.0001862315209294286, "loss": 0.9938, "step": 10532 }, { "epoch": 0.2704579046445233, "grad_norm": 0.8671875, "learning_rate": 0.0001862292602973069, "loss": 1.0843, "step": 10533 }, { "epoch": 0.27048358184044513, "grad_norm": 0.81640625, "learning_rate": 0.000186226999493338, "loss": 0.9494, "step": 10534 }, { "epoch": 0.2705092590363669, "grad_norm": 0.87890625, "learning_rate": 0.00018622473851752656, "loss": 1.0061, "step": 10535 }, { "epoch": 0.27053493623228875, "grad_norm": 0.83203125, "learning_rate": 0.00018622247736987694, "loss": 0.9579, "step": 10536 }, { "epoch": 0.2705606134282106, "grad_norm": 0.97265625, "learning_rate": 0.00018622021605039374, "loss": 0.9403, "step": 10537 }, { "epoch": 0.27058629062413236, "grad_norm": 0.74609375, "learning_rate": 0.00018621795455908143, "loss": 0.9371, "step": 10538 }, { "epoch": 0.2706119678200542, "grad_norm": 0.75390625, "learning_rate": 0.00018621569289594452, "loss": 0.9616, "step": 10539 }, { "epoch": 0.27063764501597604, "grad_norm": 0.78125, "learning_rate": 0.00018621343106098754, "loss": 1.0926, "step": 10540 }, { "epoch": 0.27066332221189787, "grad_norm": 0.80859375, "learning_rate": 0.00018621116905421497, "loss": 0.9467, "step": 10541 }, { "epoch": 0.27068899940781965, "grad_norm": 0.765625, "learning_rate": 0.00018620890687563133, "loss": 0.9836, "step": 10542 }, { "epoch": 0.2707146766037415, "grad_norm": 0.89453125, "learning_rate": 0.0001862066445252411, "loss": 0.9289, "step": 10543 }, { "epoch": 0.2707403537996633, "grad_norm": 0.8515625, "learning_rate": 0.00018620438200304883, "loss": 0.9259, "step": 10544 }, { "epoch": 0.2707660309955851, "grad_norm": 0.87109375, "learning_rate": 0.00018620211930905903, "loss": 0.9719, "step": 10545 }, { "epoch": 0.27079170819150694, "grad_norm": 0.859375, "learning_rate": 0.00018619985644327617, "loss": 1.1333, "step": 10546 }, { "epoch": 0.2708173853874288, "grad_norm": 0.77734375, "learning_rate": 0.0001861975934057048, "loss": 0.8516, "step": 10547 }, { "epoch": 0.27084306258335056, "grad_norm": 0.76953125, "learning_rate": 0.0001861953301963494, "loss": 0.8866, "step": 10548 }, { "epoch": 0.2708687397792724, "grad_norm": 0.7578125, "learning_rate": 0.00018619306681521452, "loss": 1.069, "step": 10549 }, { "epoch": 0.27089441697519423, "grad_norm": 0.76953125, "learning_rate": 0.00018619080326230463, "loss": 0.912, "step": 10550 }, { "epoch": 0.27092009417111607, "grad_norm": 0.93359375, "learning_rate": 0.00018618853953762424, "loss": 1.0061, "step": 10551 }, { "epoch": 0.27094577136703785, "grad_norm": 0.82421875, "learning_rate": 0.00018618627564117787, "loss": 1.035, "step": 10552 }, { "epoch": 0.2709714485629597, "grad_norm": 0.8359375, "learning_rate": 0.00018618401157297006, "loss": 1.0552, "step": 10553 }, { "epoch": 0.2709971257588815, "grad_norm": 0.8359375, "learning_rate": 0.0001861817473330053, "loss": 1.0874, "step": 10554 }, { "epoch": 0.2710228029548033, "grad_norm": 0.734375, "learning_rate": 0.00018617948292128808, "loss": 0.9146, "step": 10555 }, { "epoch": 0.27104848015072514, "grad_norm": 0.84765625, "learning_rate": 0.000186177218337823, "loss": 0.9943, "step": 10556 }, { "epoch": 0.27107415734664697, "grad_norm": 0.85546875, "learning_rate": 0.0001861749535826144, "loss": 0.9925, "step": 10557 }, { "epoch": 0.27109983454256875, "grad_norm": 0.80859375, "learning_rate": 0.00018617268865566696, "loss": 1.1009, "step": 10558 }, { "epoch": 0.2711255117384906, "grad_norm": 0.7734375, "learning_rate": 0.00018617042355698515, "loss": 0.8938, "step": 10559 }, { "epoch": 0.2711511889344124, "grad_norm": 0.84765625, "learning_rate": 0.00018616815828657345, "loss": 1.1855, "step": 10560 }, { "epoch": 0.27117686613033426, "grad_norm": 0.87890625, "learning_rate": 0.0001861658928444364, "loss": 1.0285, "step": 10561 }, { "epoch": 0.27120254332625604, "grad_norm": 0.8046875, "learning_rate": 0.0001861636272305785, "loss": 0.9566, "step": 10562 }, { "epoch": 0.2712282205221779, "grad_norm": 0.83203125, "learning_rate": 0.00018616136144500428, "loss": 1.0528, "step": 10563 }, { "epoch": 0.2712538977180997, "grad_norm": 0.8203125, "learning_rate": 0.00018615909548771827, "loss": 1.0271, "step": 10564 }, { "epoch": 0.2712795749140215, "grad_norm": 0.73046875, "learning_rate": 0.00018615682935872494, "loss": 0.954, "step": 10565 }, { "epoch": 0.27130525210994333, "grad_norm": 0.8984375, "learning_rate": 0.00018615456305802884, "loss": 1.02, "step": 10566 }, { "epoch": 0.27133092930586517, "grad_norm": 0.80078125, "learning_rate": 0.00018615229658563447, "loss": 0.865, "step": 10567 }, { "epoch": 0.27135660650178695, "grad_norm": 0.80078125, "learning_rate": 0.00018615002994154637, "loss": 0.9366, "step": 10568 }, { "epoch": 0.2713822836977088, "grad_norm": 0.86328125, "learning_rate": 0.00018614776312576902, "loss": 1.0229, "step": 10569 }, { "epoch": 0.2714079608936306, "grad_norm": 0.7578125, "learning_rate": 0.00018614549613830698, "loss": 0.9426, "step": 10570 }, { "epoch": 0.27143363808955245, "grad_norm": 0.86328125, "learning_rate": 0.00018614322897916475, "loss": 1.0366, "step": 10571 }, { "epoch": 0.27145931528547423, "grad_norm": 0.796875, "learning_rate": 0.0001861409616483468, "loss": 0.9903, "step": 10572 }, { "epoch": 0.27148499248139607, "grad_norm": 0.875, "learning_rate": 0.00018613869414585774, "loss": 1.0814, "step": 10573 }, { "epoch": 0.2715106696773179, "grad_norm": 0.79296875, "learning_rate": 0.00018613642647170205, "loss": 1.1841, "step": 10574 }, { "epoch": 0.2715363468732397, "grad_norm": 0.8203125, "learning_rate": 0.00018613415862588423, "loss": 0.9165, "step": 10575 }, { "epoch": 0.2715620240691615, "grad_norm": 0.77734375, "learning_rate": 0.00018613189060840882, "loss": 1.015, "step": 10576 }, { "epoch": 0.27158770126508336, "grad_norm": 0.9609375, "learning_rate": 0.00018612962241928028, "loss": 0.9397, "step": 10577 }, { "epoch": 0.27161337846100514, "grad_norm": 1.03125, "learning_rate": 0.00018612735405850326, "loss": 1.0377, "step": 10578 }, { "epoch": 0.271639055656927, "grad_norm": 0.81640625, "learning_rate": 0.00018612508552608213, "loss": 0.9611, "step": 10579 }, { "epoch": 0.2716647328528488, "grad_norm": 0.8046875, "learning_rate": 0.00018612281682202155, "loss": 1.0037, "step": 10580 }, { "epoch": 0.27169041004877065, "grad_norm": 0.81640625, "learning_rate": 0.00018612054794632594, "loss": 0.8976, "step": 10581 }, { "epoch": 0.27171608724469243, "grad_norm": 0.7109375, "learning_rate": 0.00018611827889899985, "loss": 0.9708, "step": 10582 }, { "epoch": 0.27174176444061426, "grad_norm": 0.7578125, "learning_rate": 0.00018611600968004784, "loss": 0.9257, "step": 10583 }, { "epoch": 0.2717674416365361, "grad_norm": 0.82421875, "learning_rate": 0.0001861137402894744, "loss": 1.063, "step": 10584 }, { "epoch": 0.2717931188324579, "grad_norm": 0.7890625, "learning_rate": 0.000186111470727284, "loss": 0.9969, "step": 10585 }, { "epoch": 0.2718187960283797, "grad_norm": 0.8671875, "learning_rate": 0.00018610920099348127, "loss": 1.0325, "step": 10586 }, { "epoch": 0.27184447322430155, "grad_norm": 0.75390625, "learning_rate": 0.00018610693108807067, "loss": 0.9848, "step": 10587 }, { "epoch": 0.27187015042022333, "grad_norm": 0.828125, "learning_rate": 0.00018610466101105674, "loss": 0.9896, "step": 10588 }, { "epoch": 0.27189582761614517, "grad_norm": 0.7734375, "learning_rate": 0.000186102390762444, "loss": 1.0338, "step": 10589 }, { "epoch": 0.271921504812067, "grad_norm": 1.3046875, "learning_rate": 0.00018610012034223696, "loss": 0.9677, "step": 10590 }, { "epoch": 0.27194718200798884, "grad_norm": 0.73828125, "learning_rate": 0.00018609784975044016, "loss": 0.9004, "step": 10591 }, { "epoch": 0.2719728592039106, "grad_norm": 0.84375, "learning_rate": 0.0001860955789870581, "loss": 1.025, "step": 10592 }, { "epoch": 0.27199853639983246, "grad_norm": 0.80078125, "learning_rate": 0.00018609330805209538, "loss": 0.9913, "step": 10593 }, { "epoch": 0.2720242135957543, "grad_norm": 0.8515625, "learning_rate": 0.00018609103694555644, "loss": 1.0029, "step": 10594 }, { "epoch": 0.2720498907916761, "grad_norm": 0.7890625, "learning_rate": 0.00018608876566744584, "loss": 0.9708, "step": 10595 }, { "epoch": 0.2720755679875979, "grad_norm": 0.9765625, "learning_rate": 0.0001860864942177681, "loss": 0.9865, "step": 10596 }, { "epoch": 0.27210124518351975, "grad_norm": 0.8359375, "learning_rate": 0.00018608422259652776, "loss": 1.0031, "step": 10597 }, { "epoch": 0.2721269223794415, "grad_norm": 0.72265625, "learning_rate": 0.00018608195080372934, "loss": 1.0041, "step": 10598 }, { "epoch": 0.27215259957536336, "grad_norm": 0.7265625, "learning_rate": 0.00018607967883937742, "loss": 1.1264, "step": 10599 }, { "epoch": 0.2721782767712852, "grad_norm": 0.8359375, "learning_rate": 0.00018607740670347639, "loss": 1.0963, "step": 10600 }, { "epoch": 0.27220395396720704, "grad_norm": 0.77734375, "learning_rate": 0.0001860751343960309, "loss": 0.949, "step": 10601 }, { "epoch": 0.2722296311631288, "grad_norm": 0.796875, "learning_rate": 0.00018607286191704546, "loss": 0.9181, "step": 10602 }, { "epoch": 0.27225530835905065, "grad_norm": 0.8359375, "learning_rate": 0.00018607058926652456, "loss": 1.1231, "step": 10603 }, { "epoch": 0.2722809855549725, "grad_norm": 0.87890625, "learning_rate": 0.00018606831644447277, "loss": 0.9262, "step": 10604 }, { "epoch": 0.27230666275089427, "grad_norm": 0.79296875, "learning_rate": 0.00018606604345089455, "loss": 1.0687, "step": 10605 }, { "epoch": 0.2723323399468161, "grad_norm": 0.79296875, "learning_rate": 0.00018606377028579452, "loss": 1.0452, "step": 10606 }, { "epoch": 0.27235801714273794, "grad_norm": 0.81640625, "learning_rate": 0.00018606149694917718, "loss": 0.8843, "step": 10607 }, { "epoch": 0.2723836943386597, "grad_norm": 0.91015625, "learning_rate": 0.00018605922344104703, "loss": 1.1044, "step": 10608 }, { "epoch": 0.27240937153458156, "grad_norm": 0.7578125, "learning_rate": 0.00018605694976140863, "loss": 0.8817, "step": 10609 }, { "epoch": 0.2724350487305034, "grad_norm": 0.82421875, "learning_rate": 0.0001860546759102665, "loss": 1.0073, "step": 10610 }, { "epoch": 0.27246072592642523, "grad_norm": 0.7578125, "learning_rate": 0.00018605240188762515, "loss": 1.0127, "step": 10611 }, { "epoch": 0.272486403122347, "grad_norm": 0.70703125, "learning_rate": 0.00018605012769348916, "loss": 0.924, "step": 10612 }, { "epoch": 0.27251208031826885, "grad_norm": 0.80859375, "learning_rate": 0.00018604785332786307, "loss": 0.8813, "step": 10613 }, { "epoch": 0.2725377575141907, "grad_norm": 0.84765625, "learning_rate": 0.00018604557879075135, "loss": 0.8848, "step": 10614 }, { "epoch": 0.27256343471011246, "grad_norm": 0.83203125, "learning_rate": 0.00018604330408215856, "loss": 1.1731, "step": 10615 }, { "epoch": 0.2725891119060343, "grad_norm": 0.8203125, "learning_rate": 0.00018604102920208922, "loss": 1.1229, "step": 10616 }, { "epoch": 0.27261478910195613, "grad_norm": 0.75, "learning_rate": 0.0001860387541505479, "loss": 0.8663, "step": 10617 }, { "epoch": 0.2726404662978779, "grad_norm": 0.7890625, "learning_rate": 0.00018603647892753914, "loss": 1.0195, "step": 10618 }, { "epoch": 0.27266614349379975, "grad_norm": 0.87109375, "learning_rate": 0.00018603420353306742, "loss": 1.052, "step": 10619 }, { "epoch": 0.2726918206897216, "grad_norm": 0.859375, "learning_rate": 0.00018603192796713732, "loss": 1.0329, "step": 10620 }, { "epoch": 0.2727174978856434, "grad_norm": 0.79296875, "learning_rate": 0.00018602965222975332, "loss": 0.9322, "step": 10621 }, { "epoch": 0.2727431750815652, "grad_norm": 0.80859375, "learning_rate": 0.00018602737632092002, "loss": 0.8767, "step": 10622 }, { "epoch": 0.27276885227748704, "grad_norm": 0.7890625, "learning_rate": 0.00018602510024064194, "loss": 0.9225, "step": 10623 }, { "epoch": 0.2727945294734089, "grad_norm": 0.9375, "learning_rate": 0.00018602282398892358, "loss": 1.0584, "step": 10624 }, { "epoch": 0.27282020666933066, "grad_norm": 0.75, "learning_rate": 0.00018602054756576954, "loss": 0.9465, "step": 10625 }, { "epoch": 0.2728458838652525, "grad_norm": 0.7578125, "learning_rate": 0.00018601827097118433, "loss": 1.0084, "step": 10626 }, { "epoch": 0.27287156106117433, "grad_norm": 0.80078125, "learning_rate": 0.0001860159942051724, "loss": 0.9536, "step": 10627 }, { "epoch": 0.2728972382570961, "grad_norm": 0.8671875, "learning_rate": 0.00018601371726773843, "loss": 1.041, "step": 10628 }, { "epoch": 0.27292291545301794, "grad_norm": 0.83203125, "learning_rate": 0.00018601144015888688, "loss": 1.1647, "step": 10629 }, { "epoch": 0.2729485926489398, "grad_norm": 0.75390625, "learning_rate": 0.00018600916287862227, "loss": 1.0395, "step": 10630 }, { "epoch": 0.2729742698448616, "grad_norm": 0.8046875, "learning_rate": 0.00018600688542694916, "loss": 0.9122, "step": 10631 }, { "epoch": 0.2729999470407834, "grad_norm": 0.8359375, "learning_rate": 0.00018600460780387215, "loss": 1.0317, "step": 10632 }, { "epoch": 0.27302562423670523, "grad_norm": 0.82421875, "learning_rate": 0.00018600233000939568, "loss": 0.9856, "step": 10633 }, { "epoch": 0.27305130143262707, "grad_norm": 0.87109375, "learning_rate": 0.00018600005204352438, "loss": 0.9879, "step": 10634 }, { "epoch": 0.27307697862854885, "grad_norm": 0.76171875, "learning_rate": 0.0001859977739062627, "loss": 1.1017, "step": 10635 }, { "epoch": 0.2731026558244707, "grad_norm": 0.796875, "learning_rate": 0.00018599549559761527, "loss": 0.8845, "step": 10636 }, { "epoch": 0.2731283330203925, "grad_norm": 0.84375, "learning_rate": 0.00018599321711758652, "loss": 0.9312, "step": 10637 }, { "epoch": 0.2731540102163143, "grad_norm": 0.75, "learning_rate": 0.0001859909384661811, "loss": 1.0307, "step": 10638 }, { "epoch": 0.27317968741223614, "grad_norm": 0.796875, "learning_rate": 0.0001859886596434035, "loss": 0.9418, "step": 10639 }, { "epoch": 0.273205364608158, "grad_norm": 0.80078125, "learning_rate": 0.00018598638064925828, "loss": 1.0632, "step": 10640 }, { "epoch": 0.2732310418040798, "grad_norm": 0.8984375, "learning_rate": 0.00018598410148374997, "loss": 1.0369, "step": 10641 }, { "epoch": 0.2732567190000016, "grad_norm": 0.8125, "learning_rate": 0.0001859818221468831, "loss": 0.9858, "step": 10642 }, { "epoch": 0.2732823961959234, "grad_norm": 0.77734375, "learning_rate": 0.0001859795426386622, "loss": 0.9313, "step": 10643 }, { "epoch": 0.27330807339184526, "grad_norm": 0.76953125, "learning_rate": 0.00018597726295909188, "loss": 0.8792, "step": 10644 }, { "epoch": 0.27333375058776704, "grad_norm": 0.89453125, "learning_rate": 0.00018597498310817662, "loss": 0.9844, "step": 10645 }, { "epoch": 0.2733594277836889, "grad_norm": 0.7890625, "learning_rate": 0.000185972703085921, "loss": 0.9797, "step": 10646 }, { "epoch": 0.2733851049796107, "grad_norm": 0.7734375, "learning_rate": 0.00018597042289232954, "loss": 0.8343, "step": 10647 }, { "epoch": 0.2734107821755325, "grad_norm": 0.76953125, "learning_rate": 0.00018596814252740677, "loss": 0.9364, "step": 10648 }, { "epoch": 0.27343645937145433, "grad_norm": 0.765625, "learning_rate": 0.00018596586199115728, "loss": 0.9786, "step": 10649 }, { "epoch": 0.27346213656737617, "grad_norm": 0.79296875, "learning_rate": 0.0001859635812835856, "loss": 1.0024, "step": 10650 }, { "epoch": 0.273487813763298, "grad_norm": 1.109375, "learning_rate": 0.00018596130040469627, "loss": 0.9724, "step": 10651 }, { "epoch": 0.2735134909592198, "grad_norm": 0.80859375, "learning_rate": 0.00018595901935449384, "loss": 1.1362, "step": 10652 }, { "epoch": 0.2735391681551416, "grad_norm": 0.8203125, "learning_rate": 0.0001859567381329828, "loss": 0.9428, "step": 10653 }, { "epoch": 0.27356484535106346, "grad_norm": 0.765625, "learning_rate": 0.00018595445674016777, "loss": 0.9739, "step": 10654 }, { "epoch": 0.27359052254698524, "grad_norm": 0.78125, "learning_rate": 0.0001859521751760533, "loss": 0.9689, "step": 10655 }, { "epoch": 0.2736161997429071, "grad_norm": 0.88671875, "learning_rate": 0.00018594989344064387, "loss": 1.085, "step": 10656 }, { "epoch": 0.2736418769388289, "grad_norm": 0.953125, "learning_rate": 0.00018594761153394412, "loss": 0.9934, "step": 10657 }, { "epoch": 0.2736675541347507, "grad_norm": 0.83984375, "learning_rate": 0.0001859453294559585, "loss": 0.9974, "step": 10658 }, { "epoch": 0.2736932313306725, "grad_norm": 0.828125, "learning_rate": 0.0001859430472066916, "loss": 1.1187, "step": 10659 }, { "epoch": 0.27371890852659436, "grad_norm": 0.83984375, "learning_rate": 0.00018594076478614796, "loss": 0.9918, "step": 10660 }, { "epoch": 0.2737445857225162, "grad_norm": 0.82421875, "learning_rate": 0.00018593848219433217, "loss": 1.0928, "step": 10661 }, { "epoch": 0.273770262918438, "grad_norm": 0.875, "learning_rate": 0.00018593619943124874, "loss": 0.9062, "step": 10662 }, { "epoch": 0.2737959401143598, "grad_norm": 0.88671875, "learning_rate": 0.00018593391649690224, "loss": 1.0032, "step": 10663 }, { "epoch": 0.27382161731028165, "grad_norm": 0.796875, "learning_rate": 0.00018593163339129717, "loss": 0.9991, "step": 10664 }, { "epoch": 0.27384729450620343, "grad_norm": 0.78515625, "learning_rate": 0.00018592935011443816, "loss": 0.9397, "step": 10665 }, { "epoch": 0.27387297170212527, "grad_norm": 0.97265625, "learning_rate": 0.00018592706666632967, "loss": 1.115, "step": 10666 }, { "epoch": 0.2738986488980471, "grad_norm": 0.7890625, "learning_rate": 0.00018592478304697632, "loss": 0.95, "step": 10667 }, { "epoch": 0.2739243260939689, "grad_norm": 0.83984375, "learning_rate": 0.00018592249925638264, "loss": 0.9511, "step": 10668 }, { "epoch": 0.2739500032898907, "grad_norm": 0.85546875, "learning_rate": 0.00018592021529455322, "loss": 0.932, "step": 10669 }, { "epoch": 0.27397568048581256, "grad_norm": 0.875, "learning_rate": 0.00018591793116149252, "loss": 1.1406, "step": 10670 }, { "epoch": 0.2740013576817344, "grad_norm": 0.765625, "learning_rate": 0.00018591564685720515, "loss": 0.9678, "step": 10671 }, { "epoch": 0.2740270348776562, "grad_norm": 0.859375, "learning_rate": 0.00018591336238169565, "loss": 1.1147, "step": 10672 }, { "epoch": 0.274052712073578, "grad_norm": 1.046875, "learning_rate": 0.0001859110777349686, "loss": 1.0193, "step": 10673 }, { "epoch": 0.27407838926949984, "grad_norm": 0.73046875, "learning_rate": 0.00018590879291702854, "loss": 0.8972, "step": 10674 }, { "epoch": 0.2741040664654216, "grad_norm": 0.83984375, "learning_rate": 0.00018590650792788, "loss": 1.0956, "step": 10675 }, { "epoch": 0.27412974366134346, "grad_norm": 0.78125, "learning_rate": 0.00018590422276752752, "loss": 0.9719, "step": 10676 }, { "epoch": 0.2741554208572653, "grad_norm": 0.953125, "learning_rate": 0.00018590193743597575, "loss": 0.9537, "step": 10677 }, { "epoch": 0.2741810980531871, "grad_norm": 0.7578125, "learning_rate": 0.00018589965193322914, "loss": 1.0937, "step": 10678 }, { "epoch": 0.2742067752491089, "grad_norm": 0.8359375, "learning_rate": 0.00018589736625929226, "loss": 0.9696, "step": 10679 }, { "epoch": 0.27423245244503075, "grad_norm": 0.80859375, "learning_rate": 0.00018589508041416973, "loss": 1.0213, "step": 10680 }, { "epoch": 0.2742581296409526, "grad_norm": 0.78125, "learning_rate": 0.00018589279439786607, "loss": 0.956, "step": 10681 }, { "epoch": 0.27428380683687437, "grad_norm": 0.8515625, "learning_rate": 0.0001858905082103858, "loss": 0.9559, "step": 10682 }, { "epoch": 0.2743094840327962, "grad_norm": 0.8671875, "learning_rate": 0.00018588822185173348, "loss": 0.8642, "step": 10683 }, { "epoch": 0.27433516122871804, "grad_norm": 0.9296875, "learning_rate": 0.00018588593532191375, "loss": 1.0092, "step": 10684 }, { "epoch": 0.2743608384246398, "grad_norm": 0.734375, "learning_rate": 0.00018588364862093108, "loss": 0.9441, "step": 10685 }, { "epoch": 0.27438651562056166, "grad_norm": 0.80078125, "learning_rate": 0.00018588136174879004, "loss": 1.0249, "step": 10686 }, { "epoch": 0.2744121928164835, "grad_norm": 0.81640625, "learning_rate": 0.00018587907470549525, "loss": 0.956, "step": 10687 }, { "epoch": 0.27443787001240527, "grad_norm": 0.82421875, "learning_rate": 0.0001858767874910512, "loss": 0.9365, "step": 10688 }, { "epoch": 0.2744635472083271, "grad_norm": 1.109375, "learning_rate": 0.00018587450010546246, "loss": 0.9945, "step": 10689 }, { "epoch": 0.27448922440424894, "grad_norm": 0.98828125, "learning_rate": 0.0001858722125487336, "loss": 1.0871, "step": 10690 }, { "epoch": 0.2745149016001708, "grad_norm": 0.80078125, "learning_rate": 0.00018586992482086918, "loss": 0.9897, "step": 10691 }, { "epoch": 0.27454057879609256, "grad_norm": 0.82421875, "learning_rate": 0.00018586763692187374, "loss": 1.0308, "step": 10692 }, { "epoch": 0.2745662559920144, "grad_norm": 0.8515625, "learning_rate": 0.00018586534885175189, "loss": 1.2169, "step": 10693 }, { "epoch": 0.27459193318793623, "grad_norm": 0.859375, "learning_rate": 0.00018586306061050813, "loss": 0.9477, "step": 10694 }, { "epoch": 0.274617610383858, "grad_norm": 0.8203125, "learning_rate": 0.00018586077219814707, "loss": 0.9652, "step": 10695 }, { "epoch": 0.27464328757977985, "grad_norm": 0.83203125, "learning_rate": 0.00018585848361467322, "loss": 1.0439, "step": 10696 }, { "epoch": 0.2746689647757017, "grad_norm": 0.8515625, "learning_rate": 0.00018585619486009117, "loss": 1.007, "step": 10697 }, { "epoch": 0.27469464197162347, "grad_norm": 0.83203125, "learning_rate": 0.0001858539059344055, "loss": 0.965, "step": 10698 }, { "epoch": 0.2747203191675453, "grad_norm": 0.85546875, "learning_rate": 0.00018585161683762076, "loss": 0.9832, "step": 10699 }, { "epoch": 0.27474599636346714, "grad_norm": 0.8359375, "learning_rate": 0.00018584932756974146, "loss": 1.0119, "step": 10700 }, { "epoch": 0.274771673559389, "grad_norm": 0.8203125, "learning_rate": 0.00018584703813077225, "loss": 1.1297, "step": 10701 }, { "epoch": 0.27479735075531075, "grad_norm": 0.7890625, "learning_rate": 0.0001858447485207176, "loss": 0.8678, "step": 10702 }, { "epoch": 0.2748230279512326, "grad_norm": 0.88671875, "learning_rate": 0.00018584245873958216, "loss": 1.0569, "step": 10703 }, { "epoch": 0.2748487051471544, "grad_norm": 0.9296875, "learning_rate": 0.00018584016878737042, "loss": 1.0398, "step": 10704 }, { "epoch": 0.2748743823430762, "grad_norm": 0.8515625, "learning_rate": 0.000185837878664087, "loss": 0.8975, "step": 10705 }, { "epoch": 0.27490005953899804, "grad_norm": 0.8046875, "learning_rate": 0.00018583558836973645, "loss": 0.9091, "step": 10706 }, { "epoch": 0.2749257367349199, "grad_norm": 0.8984375, "learning_rate": 0.0001858332979043233, "loss": 0.8707, "step": 10707 }, { "epoch": 0.27495141393084166, "grad_norm": 0.71875, "learning_rate": 0.00018583100726785216, "loss": 0.8482, "step": 10708 }, { "epoch": 0.2749770911267635, "grad_norm": 0.80078125, "learning_rate": 0.00018582871646032759, "loss": 1.0397, "step": 10709 }, { "epoch": 0.27500276832268533, "grad_norm": 0.734375, "learning_rate": 0.0001858264254817541, "loss": 1.0126, "step": 10710 }, { "epoch": 0.27502844551860717, "grad_norm": 0.93359375, "learning_rate": 0.00018582413433213633, "loss": 0.9044, "step": 10711 }, { "epoch": 0.27505412271452895, "grad_norm": 0.82421875, "learning_rate": 0.0001858218430114788, "loss": 0.9375, "step": 10712 }, { "epoch": 0.2750797999104508, "grad_norm": 0.88671875, "learning_rate": 0.00018581955151978608, "loss": 0.9644, "step": 10713 }, { "epoch": 0.2751054771063726, "grad_norm": 0.8515625, "learning_rate": 0.00018581725985706275, "loss": 0.943, "step": 10714 }, { "epoch": 0.2751311543022944, "grad_norm": 0.7734375, "learning_rate": 0.00018581496802331338, "loss": 1.0584, "step": 10715 }, { "epoch": 0.27515683149821624, "grad_norm": 0.84375, "learning_rate": 0.00018581267601854254, "loss": 1.0293, "step": 10716 }, { "epoch": 0.2751825086941381, "grad_norm": 0.8125, "learning_rate": 0.00018581038384275477, "loss": 1.0487, "step": 10717 }, { "epoch": 0.27520818589005985, "grad_norm": 0.8828125, "learning_rate": 0.00018580809149595467, "loss": 0.9949, "step": 10718 }, { "epoch": 0.2752338630859817, "grad_norm": 0.80859375, "learning_rate": 0.00018580579897814676, "loss": 1.0006, "step": 10719 }, { "epoch": 0.2752595402819035, "grad_norm": 0.8203125, "learning_rate": 0.0001858035062893357, "loss": 1.0099, "step": 10720 }, { "epoch": 0.27528521747782536, "grad_norm": 0.73046875, "learning_rate": 0.00018580121342952596, "loss": 0.8932, "step": 10721 }, { "epoch": 0.27531089467374714, "grad_norm": 0.74609375, "learning_rate": 0.00018579892039872217, "loss": 0.9063, "step": 10722 }, { "epoch": 0.275336571869669, "grad_norm": 0.78515625, "learning_rate": 0.00018579662719692887, "loss": 0.9672, "step": 10723 }, { "epoch": 0.2753622490655908, "grad_norm": 0.8125, "learning_rate": 0.00018579433382415064, "loss": 0.9411, "step": 10724 }, { "epoch": 0.2753879262615126, "grad_norm": 0.86328125, "learning_rate": 0.00018579204028039209, "loss": 0.9837, "step": 10725 }, { "epoch": 0.27541360345743443, "grad_norm": 0.8515625, "learning_rate": 0.00018578974656565772, "loss": 1.0687, "step": 10726 }, { "epoch": 0.27543928065335627, "grad_norm": 0.81640625, "learning_rate": 0.00018578745267995212, "loss": 1.0017, "step": 10727 }, { "epoch": 0.27546495784927805, "grad_norm": 0.8125, "learning_rate": 0.0001857851586232799, "loss": 1.0917, "step": 10728 }, { "epoch": 0.2754906350451999, "grad_norm": 0.83984375, "learning_rate": 0.00018578286439564561, "loss": 0.8999, "step": 10729 }, { "epoch": 0.2755163122411217, "grad_norm": 0.84375, "learning_rate": 0.0001857805699970538, "loss": 1.1274, "step": 10730 }, { "epoch": 0.27554198943704356, "grad_norm": 1.0859375, "learning_rate": 0.00018577827542750908, "loss": 0.9754, "step": 10731 }, { "epoch": 0.27556766663296534, "grad_norm": 0.96484375, "learning_rate": 0.000185775980687016, "loss": 1.113, "step": 10732 }, { "epoch": 0.27559334382888717, "grad_norm": 0.78125, "learning_rate": 0.00018577368577557914, "loss": 1.0135, "step": 10733 }, { "epoch": 0.275619021024809, "grad_norm": 0.828125, "learning_rate": 0.0001857713906932031, "loss": 0.9243, "step": 10734 }, { "epoch": 0.2756446982207308, "grad_norm": 0.71875, "learning_rate": 0.0001857690954398924, "loss": 1.0522, "step": 10735 }, { "epoch": 0.2756703754166526, "grad_norm": 0.76171875, "learning_rate": 0.00018576680001565162, "loss": 0.8699, "step": 10736 }, { "epoch": 0.27569605261257446, "grad_norm": 0.78125, "learning_rate": 0.00018576450442048538, "loss": 0.926, "step": 10737 }, { "epoch": 0.27572172980849624, "grad_norm": 0.875, "learning_rate": 0.00018576220865439822, "loss": 0.9096, "step": 10738 }, { "epoch": 0.2757474070044181, "grad_norm": 0.78125, "learning_rate": 0.0001857599127173947, "loss": 1.0549, "step": 10739 }, { "epoch": 0.2757730842003399, "grad_norm": 0.82421875, "learning_rate": 0.00018575761660947948, "loss": 0.9338, "step": 10740 }, { "epoch": 0.2757987613962617, "grad_norm": 1.1171875, "learning_rate": 0.00018575532033065703, "loss": 1.0175, "step": 10741 }, { "epoch": 0.27582443859218353, "grad_norm": 0.828125, "learning_rate": 0.00018575302388093202, "loss": 0.9873, "step": 10742 }, { "epoch": 0.27585011578810537, "grad_norm": 0.82421875, "learning_rate": 0.0001857507272603089, "loss": 1.0589, "step": 10743 }, { "epoch": 0.2758757929840272, "grad_norm": 0.87109375, "learning_rate": 0.00018574843046879238, "loss": 0.973, "step": 10744 }, { "epoch": 0.275901470179949, "grad_norm": 0.79296875, "learning_rate": 0.000185746133506387, "loss": 0.9433, "step": 10745 }, { "epoch": 0.2759271473758708, "grad_norm": 0.73828125, "learning_rate": 0.00018574383637309729, "loss": 0.9033, "step": 10746 }, { "epoch": 0.27595282457179265, "grad_norm": 0.87109375, "learning_rate": 0.00018574153906892788, "loss": 1.0878, "step": 10747 }, { "epoch": 0.27597850176771443, "grad_norm": 0.87109375, "learning_rate": 0.0001857392415938833, "loss": 1.0594, "step": 10748 }, { "epoch": 0.27600417896363627, "grad_norm": 0.7578125, "learning_rate": 0.0001857369439479682, "loss": 0.9807, "step": 10749 }, { "epoch": 0.2760298561595581, "grad_norm": 0.796875, "learning_rate": 0.00018573464613118708, "loss": 1.0027, "step": 10750 }, { "epoch": 0.2760555333554799, "grad_norm": 0.84375, "learning_rate": 0.00018573234814354454, "loss": 1.0376, "step": 10751 }, { "epoch": 0.2760812105514017, "grad_norm": 0.8203125, "learning_rate": 0.0001857300499850452, "loss": 1.0475, "step": 10752 }, { "epoch": 0.27610688774732356, "grad_norm": 0.875, "learning_rate": 0.00018572775165569364, "loss": 1.107, "step": 10753 }, { "epoch": 0.2761325649432454, "grad_norm": 0.80859375, "learning_rate": 0.00018572545315549433, "loss": 1.0404, "step": 10754 }, { "epoch": 0.2761582421391672, "grad_norm": 0.7890625, "learning_rate": 0.000185723154484452, "loss": 0.9427, "step": 10755 }, { "epoch": 0.276183919335089, "grad_norm": 0.84375, "learning_rate": 0.00018572085564257118, "loss": 0.9711, "step": 10756 }, { "epoch": 0.27620959653101085, "grad_norm": 0.8671875, "learning_rate": 0.0001857185566298564, "loss": 1.1009, "step": 10757 }, { "epoch": 0.27623527372693263, "grad_norm": 0.875, "learning_rate": 0.0001857162574463123, "loss": 1.091, "step": 10758 }, { "epoch": 0.27626095092285446, "grad_norm": 0.75390625, "learning_rate": 0.00018571395809194346, "loss": 0.9804, "step": 10759 }, { "epoch": 0.2762866281187763, "grad_norm": 0.859375, "learning_rate": 0.00018571165856675438, "loss": 0.905, "step": 10760 }, { "epoch": 0.2763123053146981, "grad_norm": 0.8359375, "learning_rate": 0.0001857093588707498, "loss": 0.9262, "step": 10761 }, { "epoch": 0.2763379825106199, "grad_norm": 0.8828125, "learning_rate": 0.00018570705900393413, "loss": 1.1186, "step": 10762 }, { "epoch": 0.27636365970654175, "grad_norm": 0.765625, "learning_rate": 0.00018570475896631205, "loss": 1.1145, "step": 10763 }, { "epoch": 0.2763893369024636, "grad_norm": 0.81640625, "learning_rate": 0.00018570245875788814, "loss": 0.9152, "step": 10764 }, { "epoch": 0.27641501409838537, "grad_norm": 0.859375, "learning_rate": 0.00018570015837866697, "loss": 1.0191, "step": 10765 }, { "epoch": 0.2764406912943072, "grad_norm": 0.83984375, "learning_rate": 0.00018569785782865313, "loss": 0.9968, "step": 10766 }, { "epoch": 0.27646636849022904, "grad_norm": 0.91015625, "learning_rate": 0.0001856955571078512, "loss": 1.054, "step": 10767 }, { "epoch": 0.2764920456861508, "grad_norm": 2.75, "learning_rate": 0.00018569325621626575, "loss": 0.9875, "step": 10768 }, { "epoch": 0.27651772288207266, "grad_norm": 0.8359375, "learning_rate": 0.0001856909551539014, "loss": 1.0103, "step": 10769 }, { "epoch": 0.2765434000779945, "grad_norm": 0.81640625, "learning_rate": 0.0001856886539207627, "loss": 1.0049, "step": 10770 }, { "epoch": 0.2765690772739163, "grad_norm": 0.87109375, "learning_rate": 0.00018568635251685426, "loss": 1.1602, "step": 10771 }, { "epoch": 0.2765947544698381, "grad_norm": 0.859375, "learning_rate": 0.00018568405094218066, "loss": 0.9811, "step": 10772 }, { "epoch": 0.27662043166575995, "grad_norm": 0.828125, "learning_rate": 0.00018568174919674648, "loss": 1.0764, "step": 10773 }, { "epoch": 0.2766461088616818, "grad_norm": 0.8671875, "learning_rate": 0.00018567944728055632, "loss": 1.0228, "step": 10774 }, { "epoch": 0.27667178605760356, "grad_norm": 0.75390625, "learning_rate": 0.00018567714519361476, "loss": 0.8988, "step": 10775 }, { "epoch": 0.2766974632535254, "grad_norm": 0.77734375, "learning_rate": 0.0001856748429359264, "loss": 0.9723, "step": 10776 }, { "epoch": 0.27672314044944724, "grad_norm": 0.796875, "learning_rate": 0.0001856725405074958, "loss": 0.9138, "step": 10777 }, { "epoch": 0.276748817645369, "grad_norm": 0.78125, "learning_rate": 0.0001856702379083276, "loss": 1.0968, "step": 10778 }, { "epoch": 0.27677449484129085, "grad_norm": 0.79296875, "learning_rate": 0.00018566793513842629, "loss": 1.0268, "step": 10779 }, { "epoch": 0.2768001720372127, "grad_norm": 0.76953125, "learning_rate": 0.0001856656321977966, "loss": 0.903, "step": 10780 }, { "epoch": 0.27682584923313447, "grad_norm": 0.83984375, "learning_rate": 0.00018566332908644297, "loss": 0.9784, "step": 10781 }, { "epoch": 0.2768515264290563, "grad_norm": 0.8984375, "learning_rate": 0.0001856610258043701, "loss": 1.0529, "step": 10782 }, { "epoch": 0.27687720362497814, "grad_norm": 0.8828125, "learning_rate": 0.00018565872235158252, "loss": 1.0095, "step": 10783 }, { "epoch": 0.2769028808209, "grad_norm": 0.7890625, "learning_rate": 0.0001856564187280849, "loss": 0.9797, "step": 10784 }, { "epoch": 0.27692855801682176, "grad_norm": 0.7734375, "learning_rate": 0.00018565411493388172, "loss": 1.0455, "step": 10785 }, { "epoch": 0.2769542352127436, "grad_norm": 0.8203125, "learning_rate": 0.00018565181096897763, "loss": 0.9673, "step": 10786 }, { "epoch": 0.27697991240866543, "grad_norm": 1.1796875, "learning_rate": 0.00018564950683337724, "loss": 1.0129, "step": 10787 }, { "epoch": 0.2770055896045872, "grad_norm": 0.765625, "learning_rate": 0.00018564720252708513, "loss": 0.7906, "step": 10788 }, { "epoch": 0.27703126680050905, "grad_norm": 0.83984375, "learning_rate": 0.00018564489805010585, "loss": 1.0294, "step": 10789 }, { "epoch": 0.2770569439964309, "grad_norm": 0.81640625, "learning_rate": 0.00018564259340244404, "loss": 0.917, "step": 10790 }, { "epoch": 0.27708262119235266, "grad_norm": 0.88671875, "learning_rate": 0.00018564028858410428, "loss": 1.0261, "step": 10791 }, { "epoch": 0.2771082983882745, "grad_norm": 0.765625, "learning_rate": 0.00018563798359509114, "loss": 0.8932, "step": 10792 }, { "epoch": 0.27713397558419633, "grad_norm": 0.85546875, "learning_rate": 0.00018563567843540925, "loss": 1.0075, "step": 10793 }, { "epoch": 0.27715965278011817, "grad_norm": 0.8515625, "learning_rate": 0.0001856333731050632, "loss": 0.9166, "step": 10794 }, { "epoch": 0.27718532997603995, "grad_norm": 1.7890625, "learning_rate": 0.00018563106760405756, "loss": 0.8886, "step": 10795 }, { "epoch": 0.2772110071719618, "grad_norm": 0.86328125, "learning_rate": 0.00018562876193239694, "loss": 1.0612, "step": 10796 }, { "epoch": 0.2772366843678836, "grad_norm": 0.8671875, "learning_rate": 0.00018562645609008593, "loss": 1.0051, "step": 10797 }, { "epoch": 0.2772623615638054, "grad_norm": 1.515625, "learning_rate": 0.00018562415007712915, "loss": 1.0929, "step": 10798 }, { "epoch": 0.27728803875972724, "grad_norm": 0.77734375, "learning_rate": 0.0001856218438935311, "loss": 0.9516, "step": 10799 }, { "epoch": 0.2773137159556491, "grad_norm": 0.82421875, "learning_rate": 0.00018561953753929653, "loss": 1.0034, "step": 10800 }, { "epoch": 0.27733939315157086, "grad_norm": 0.8125, "learning_rate": 0.00018561723101442992, "loss": 0.9132, "step": 10801 }, { "epoch": 0.2773650703474927, "grad_norm": 0.81640625, "learning_rate": 0.00018561492431893591, "loss": 0.9725, "step": 10802 }, { "epoch": 0.27739074754341453, "grad_norm": 0.75390625, "learning_rate": 0.00018561261745281908, "loss": 1.0254, "step": 10803 }, { "epoch": 0.27741642473933636, "grad_norm": 0.8125, "learning_rate": 0.00018561031041608407, "loss": 0.9094, "step": 10804 }, { "epoch": 0.27744210193525815, "grad_norm": 1.0, "learning_rate": 0.00018560800320873542, "loss": 0.8998, "step": 10805 }, { "epoch": 0.27746777913118, "grad_norm": 0.74609375, "learning_rate": 0.00018560569583077775, "loss": 0.9021, "step": 10806 }, { "epoch": 0.2774934563271018, "grad_norm": 0.83984375, "learning_rate": 0.00018560338828221566, "loss": 0.9601, "step": 10807 }, { "epoch": 0.2775191335230236, "grad_norm": 0.859375, "learning_rate": 0.00018560108056305373, "loss": 1.1385, "step": 10808 }, { "epoch": 0.27754481071894543, "grad_norm": 0.78515625, "learning_rate": 0.0001855987726732966, "loss": 0.8977, "step": 10809 }, { "epoch": 0.27757048791486727, "grad_norm": 0.71875, "learning_rate": 0.0001855964646129489, "loss": 0.9267, "step": 10810 }, { "epoch": 0.27759616511078905, "grad_norm": 0.7734375, "learning_rate": 0.00018559415638201508, "loss": 0.9577, "step": 10811 }, { "epoch": 0.2776218423067109, "grad_norm": 0.84375, "learning_rate": 0.0001855918479804999, "loss": 1.0114, "step": 10812 }, { "epoch": 0.2776475195026327, "grad_norm": 0.765625, "learning_rate": 0.0001855895394084079, "loss": 0.9694, "step": 10813 }, { "epoch": 0.27767319669855456, "grad_norm": 0.85546875, "learning_rate": 0.00018558723066574366, "loss": 1.049, "step": 10814 }, { "epoch": 0.27769887389447634, "grad_norm": 0.7421875, "learning_rate": 0.0001855849217525118, "loss": 0.8911, "step": 10815 }, { "epoch": 0.2777245510903982, "grad_norm": 0.828125, "learning_rate": 0.00018558261266871692, "loss": 0.9811, "step": 10816 }, { "epoch": 0.27775022828632, "grad_norm": 0.88671875, "learning_rate": 0.00018558030341436364, "loss": 0.8929, "step": 10817 }, { "epoch": 0.2777759054822418, "grad_norm": 0.83984375, "learning_rate": 0.00018557799398945653, "loss": 1.0016, "step": 10818 }, { "epoch": 0.2778015826781636, "grad_norm": 0.78515625, "learning_rate": 0.0001855756843940002, "loss": 0.9342, "step": 10819 }, { "epoch": 0.27782725987408546, "grad_norm": 0.82421875, "learning_rate": 0.0001855733746279993, "loss": 1.0364, "step": 10820 }, { "epoch": 0.27785293707000724, "grad_norm": 0.84375, "learning_rate": 0.00018557106469145834, "loss": 0.9854, "step": 10821 }, { "epoch": 0.2778786142659291, "grad_norm": 0.87109375, "learning_rate": 0.000185568754584382, "loss": 1.0178, "step": 10822 }, { "epoch": 0.2779042914618509, "grad_norm": 0.74609375, "learning_rate": 0.00018556644430677488, "loss": 0.9167, "step": 10823 }, { "epoch": 0.27792996865777275, "grad_norm": 0.94140625, "learning_rate": 0.00018556413385864155, "loss": 0.8885, "step": 10824 }, { "epoch": 0.27795564585369453, "grad_norm": 0.84765625, "learning_rate": 0.00018556182323998662, "loss": 1.0538, "step": 10825 }, { "epoch": 0.27798132304961637, "grad_norm": 0.796875, "learning_rate": 0.00018555951245081476, "loss": 0.9841, "step": 10826 }, { "epoch": 0.2780070002455382, "grad_norm": 0.7734375, "learning_rate": 0.00018555720149113046, "loss": 0.9498, "step": 10827 }, { "epoch": 0.27803267744146, "grad_norm": 0.81640625, "learning_rate": 0.0001855548903609384, "loss": 1.0964, "step": 10828 }, { "epoch": 0.2780583546373818, "grad_norm": 0.74609375, "learning_rate": 0.00018555257906024316, "loss": 0.9621, "step": 10829 }, { "epoch": 0.27808403183330366, "grad_norm": 0.72265625, "learning_rate": 0.0001855502675890494, "loss": 1.0261, "step": 10830 }, { "epoch": 0.27810970902922544, "grad_norm": 0.7734375, "learning_rate": 0.00018554795594736165, "loss": 0.9865, "step": 10831 }, { "epoch": 0.2781353862251473, "grad_norm": 0.7734375, "learning_rate": 0.00018554564413518455, "loss": 1.0326, "step": 10832 }, { "epoch": 0.2781610634210691, "grad_norm": 0.87890625, "learning_rate": 0.00018554333215252273, "loss": 0.9759, "step": 10833 }, { "epoch": 0.27818674061699095, "grad_norm": 0.87109375, "learning_rate": 0.00018554101999938076, "loss": 0.8866, "step": 10834 }, { "epoch": 0.2782124178129127, "grad_norm": 1.0703125, "learning_rate": 0.00018553870767576327, "loss": 1.1801, "step": 10835 }, { "epoch": 0.27823809500883456, "grad_norm": 0.734375, "learning_rate": 0.00018553639518167488, "loss": 0.9094, "step": 10836 }, { "epoch": 0.2782637722047564, "grad_norm": 1.2109375, "learning_rate": 0.00018553408251712015, "loss": 0.8422, "step": 10837 }, { "epoch": 0.2782894494006782, "grad_norm": 0.78515625, "learning_rate": 0.00018553176968210373, "loss": 1.1181, "step": 10838 }, { "epoch": 0.2783151265966, "grad_norm": 0.77734375, "learning_rate": 0.00018552945667663022, "loss": 0.9033, "step": 10839 }, { "epoch": 0.27834080379252185, "grad_norm": 0.74609375, "learning_rate": 0.0001855271435007042, "loss": 0.96, "step": 10840 }, { "epoch": 0.27836648098844363, "grad_norm": 0.78515625, "learning_rate": 0.00018552483015433035, "loss": 0.9149, "step": 10841 }, { "epoch": 0.27839215818436547, "grad_norm": 0.79296875, "learning_rate": 0.00018552251663751324, "loss": 1.0402, "step": 10842 }, { "epoch": 0.2784178353802873, "grad_norm": 0.8046875, "learning_rate": 0.00018552020295025746, "loss": 0.9231, "step": 10843 }, { "epoch": 0.27844351257620914, "grad_norm": 0.91015625, "learning_rate": 0.00018551788909256764, "loss": 0.9863, "step": 10844 }, { "epoch": 0.2784691897721309, "grad_norm": 0.8125, "learning_rate": 0.0001855155750644484, "loss": 1.0443, "step": 10845 }, { "epoch": 0.27849486696805276, "grad_norm": 0.8203125, "learning_rate": 0.00018551326086590434, "loss": 1.0536, "step": 10846 }, { "epoch": 0.2785205441639746, "grad_norm": 0.75390625, "learning_rate": 0.0001855109464969401, "loss": 1.0906, "step": 10847 }, { "epoch": 0.2785462213598964, "grad_norm": 0.82421875, "learning_rate": 0.00018550863195756023, "loss": 1.1944, "step": 10848 }, { "epoch": 0.2785718985558182, "grad_norm": 1.3671875, "learning_rate": 0.0001855063172477694, "loss": 1.0273, "step": 10849 }, { "epoch": 0.27859757575174005, "grad_norm": 0.8125, "learning_rate": 0.00018550400236757222, "loss": 0.9871, "step": 10850 }, { "epoch": 0.2786232529476618, "grad_norm": 0.74609375, "learning_rate": 0.00018550168731697327, "loss": 0.9767, "step": 10851 }, { "epoch": 0.27864893014358366, "grad_norm": 0.8984375, "learning_rate": 0.00018549937209597715, "loss": 0.9834, "step": 10852 }, { "epoch": 0.2786746073395055, "grad_norm": 0.8515625, "learning_rate": 0.00018549705670458852, "loss": 0.8732, "step": 10853 }, { "epoch": 0.27870028453542733, "grad_norm": 0.77734375, "learning_rate": 0.000185494741142812, "loss": 0.8825, "step": 10854 }, { "epoch": 0.2787259617313491, "grad_norm": 0.81640625, "learning_rate": 0.00018549242541065217, "loss": 0.9189, "step": 10855 }, { "epoch": 0.27875163892727095, "grad_norm": 0.80078125, "learning_rate": 0.00018549010950811366, "loss": 0.8352, "step": 10856 }, { "epoch": 0.2787773161231928, "grad_norm": 0.76953125, "learning_rate": 0.0001854877934352011, "loss": 1.0117, "step": 10857 }, { "epoch": 0.27880299331911457, "grad_norm": 0.76171875, "learning_rate": 0.00018548547719191905, "loss": 1.0125, "step": 10858 }, { "epoch": 0.2788286705150364, "grad_norm": 0.79296875, "learning_rate": 0.00018548316077827222, "loss": 0.8874, "step": 10859 }, { "epoch": 0.27885434771095824, "grad_norm": 0.82421875, "learning_rate": 0.00018548084419426513, "loss": 1.0999, "step": 10860 }, { "epoch": 0.27888002490688, "grad_norm": 0.83203125, "learning_rate": 0.00018547852743990245, "loss": 0.854, "step": 10861 }, { "epoch": 0.27890570210280186, "grad_norm": 0.765625, "learning_rate": 0.00018547621051518882, "loss": 0.9697, "step": 10862 }, { "epoch": 0.2789313792987237, "grad_norm": 0.75, "learning_rate": 0.00018547389342012878, "loss": 1.1409, "step": 10863 }, { "epoch": 0.27895705649464553, "grad_norm": 0.79296875, "learning_rate": 0.000185471576154727, "loss": 0.9644, "step": 10864 }, { "epoch": 0.2789827336905673, "grad_norm": 0.72265625, "learning_rate": 0.00018546925871898808, "loss": 0.9183, "step": 10865 }, { "epoch": 0.27900841088648914, "grad_norm": 0.82421875, "learning_rate": 0.00018546694111291667, "loss": 0.9674, "step": 10866 }, { "epoch": 0.279034088082411, "grad_norm": 0.79296875, "learning_rate": 0.00018546462333651737, "loss": 0.971, "step": 10867 }, { "epoch": 0.27905976527833276, "grad_norm": 0.796875, "learning_rate": 0.00018546230538979476, "loss": 1.0058, "step": 10868 }, { "epoch": 0.2790854424742546, "grad_norm": 0.82421875, "learning_rate": 0.00018545998727275353, "loss": 0.9668, "step": 10869 }, { "epoch": 0.27911111967017643, "grad_norm": 0.83203125, "learning_rate": 0.00018545766898539826, "loss": 0.9885, "step": 10870 }, { "epoch": 0.2791367968660982, "grad_norm": 0.85546875, "learning_rate": 0.00018545535052773358, "loss": 0.9227, "step": 10871 }, { "epoch": 0.27916247406202005, "grad_norm": 0.75390625, "learning_rate": 0.00018545303189976408, "loss": 0.9184, "step": 10872 }, { "epoch": 0.2791881512579419, "grad_norm": 0.8125, "learning_rate": 0.00018545071310149442, "loss": 1.0235, "step": 10873 }, { "epoch": 0.2792138284538637, "grad_norm": 0.80078125, "learning_rate": 0.0001854483941329292, "loss": 0.9454, "step": 10874 }, { "epoch": 0.2792395056497855, "grad_norm": 0.8125, "learning_rate": 0.00018544607499407304, "loss": 1.1094, "step": 10875 }, { "epoch": 0.27926518284570734, "grad_norm": 0.89453125, "learning_rate": 0.00018544375568493063, "loss": 0.924, "step": 10876 }, { "epoch": 0.2792908600416292, "grad_norm": 0.76171875, "learning_rate": 0.00018544143620550648, "loss": 1.0384, "step": 10877 }, { "epoch": 0.27931653723755095, "grad_norm": 0.80078125, "learning_rate": 0.00018543911655580526, "loss": 0.7778, "step": 10878 }, { "epoch": 0.2793422144334728, "grad_norm": 0.74609375, "learning_rate": 0.0001854367967358316, "loss": 0.9885, "step": 10879 }, { "epoch": 0.2793678916293946, "grad_norm": 0.7265625, "learning_rate": 0.00018543447674559016, "loss": 0.9155, "step": 10880 }, { "epoch": 0.2793935688253164, "grad_norm": 0.796875, "learning_rate": 0.00018543215658508547, "loss": 0.8482, "step": 10881 }, { "epoch": 0.27941924602123824, "grad_norm": 0.84765625, "learning_rate": 0.00018542983625432228, "loss": 1.0502, "step": 10882 }, { "epoch": 0.2794449232171601, "grad_norm": 0.796875, "learning_rate": 0.0001854275157533051, "loss": 1.0102, "step": 10883 }, { "epoch": 0.2794706004130819, "grad_norm": 0.74609375, "learning_rate": 0.0001854251950820386, "loss": 0.8012, "step": 10884 }, { "epoch": 0.2794962776090037, "grad_norm": 0.80078125, "learning_rate": 0.0001854228742405274, "loss": 0.9358, "step": 10885 }, { "epoch": 0.27952195480492553, "grad_norm": 0.8203125, "learning_rate": 0.00018542055322877613, "loss": 0.9791, "step": 10886 }, { "epoch": 0.27954763200084737, "grad_norm": 0.83984375, "learning_rate": 0.0001854182320467894, "loss": 1.0231, "step": 10887 }, { "epoch": 0.27957330919676915, "grad_norm": 0.796875, "learning_rate": 0.00018541591069457186, "loss": 0.9264, "step": 10888 }, { "epoch": 0.279598986392691, "grad_norm": 0.78515625, "learning_rate": 0.00018541358917212815, "loss": 0.9107, "step": 10889 }, { "epoch": 0.2796246635886128, "grad_norm": 0.7890625, "learning_rate": 0.00018541126747946286, "loss": 0.9462, "step": 10890 }, { "epoch": 0.2796503407845346, "grad_norm": 0.76171875, "learning_rate": 0.00018540894561658061, "loss": 0.8338, "step": 10891 }, { "epoch": 0.27967601798045644, "grad_norm": 0.87109375, "learning_rate": 0.00018540662358348606, "loss": 1.1243, "step": 10892 }, { "epoch": 0.2797016951763783, "grad_norm": 0.7734375, "learning_rate": 0.00018540430138018382, "loss": 0.9127, "step": 10893 }, { "epoch": 0.2797273723723001, "grad_norm": 0.87890625, "learning_rate": 0.0001854019790066785, "loss": 1.0809, "step": 10894 }, { "epoch": 0.2797530495682219, "grad_norm": 0.7890625, "learning_rate": 0.0001853996564629748, "loss": 0.8076, "step": 10895 }, { "epoch": 0.2797787267641437, "grad_norm": 0.7890625, "learning_rate": 0.00018539733374907727, "loss": 0.9725, "step": 10896 }, { "epoch": 0.27980440396006556, "grad_norm": 0.76171875, "learning_rate": 0.00018539501086499056, "loss": 0.9883, "step": 10897 }, { "epoch": 0.27983008115598734, "grad_norm": 0.80859375, "learning_rate": 0.0001853926878107193, "loss": 0.9574, "step": 10898 }, { "epoch": 0.2798557583519092, "grad_norm": 0.78125, "learning_rate": 0.00018539036458626817, "loss": 0.9473, "step": 10899 }, { "epoch": 0.279881435547831, "grad_norm": 0.87109375, "learning_rate": 0.0001853880411916417, "loss": 0.9216, "step": 10900 }, { "epoch": 0.2799071127437528, "grad_norm": 0.78515625, "learning_rate": 0.00018538571762684463, "loss": 0.8761, "step": 10901 }, { "epoch": 0.27993278993967463, "grad_norm": 0.85546875, "learning_rate": 0.0001853833938918815, "loss": 1.0367, "step": 10902 }, { "epoch": 0.27995846713559647, "grad_norm": 0.703125, "learning_rate": 0.000185381069986757, "loss": 0.9185, "step": 10903 }, { "epoch": 0.2799841443315183, "grad_norm": 0.87890625, "learning_rate": 0.00018537874591147575, "loss": 1.0027, "step": 10904 }, { "epoch": 0.2800098215274401, "grad_norm": 1.2109375, "learning_rate": 0.00018537642166604234, "loss": 1.1006, "step": 10905 }, { "epoch": 0.2800354987233619, "grad_norm": 0.77734375, "learning_rate": 0.00018537409725046146, "loss": 1.0527, "step": 10906 }, { "epoch": 0.28006117591928376, "grad_norm": 0.8515625, "learning_rate": 0.0001853717726647377, "loss": 1.0475, "step": 10907 }, { "epoch": 0.28008685311520554, "grad_norm": 0.796875, "learning_rate": 0.0001853694479088757, "loss": 0.9604, "step": 10908 }, { "epoch": 0.2801125303111274, "grad_norm": 0.828125, "learning_rate": 0.00018536712298288012, "loss": 0.9859, "step": 10909 }, { "epoch": 0.2801382075070492, "grad_norm": 0.8515625, "learning_rate": 0.0001853647978867556, "loss": 0.9703, "step": 10910 }, { "epoch": 0.280163884702971, "grad_norm": 0.83203125, "learning_rate": 0.0001853624726205067, "loss": 0.9791, "step": 10911 }, { "epoch": 0.2801895618988928, "grad_norm": 0.8203125, "learning_rate": 0.00018536014718413814, "loss": 0.8956, "step": 10912 }, { "epoch": 0.28021523909481466, "grad_norm": 0.8828125, "learning_rate": 0.0001853578215776545, "loss": 0.9946, "step": 10913 }, { "epoch": 0.2802409162907365, "grad_norm": 0.80859375, "learning_rate": 0.00018535549580106042, "loss": 0.9409, "step": 10914 }, { "epoch": 0.2802665934866583, "grad_norm": 0.734375, "learning_rate": 0.00018535316985436056, "loss": 0.905, "step": 10915 }, { "epoch": 0.2802922706825801, "grad_norm": 0.6953125, "learning_rate": 0.00018535084373755953, "loss": 0.9787, "step": 10916 }, { "epoch": 0.28031794787850195, "grad_norm": 0.79296875, "learning_rate": 0.000185348517450662, "loss": 0.9368, "step": 10917 }, { "epoch": 0.28034362507442373, "grad_norm": 0.80078125, "learning_rate": 0.00018534619099367256, "loss": 0.9839, "step": 10918 }, { "epoch": 0.28036930227034557, "grad_norm": 0.7421875, "learning_rate": 0.0001853438643665959, "loss": 0.8143, "step": 10919 }, { "epoch": 0.2803949794662674, "grad_norm": 0.76953125, "learning_rate": 0.00018534153756943658, "loss": 1.0033, "step": 10920 }, { "epoch": 0.2804206566621892, "grad_norm": 0.76171875, "learning_rate": 0.00018533921060219933, "loss": 0.9602, "step": 10921 }, { "epoch": 0.280446333858111, "grad_norm": 0.9140625, "learning_rate": 0.00018533688346488873, "loss": 0.9138, "step": 10922 }, { "epoch": 0.28047201105403285, "grad_norm": 0.81640625, "learning_rate": 0.00018533455615750942, "loss": 0.9169, "step": 10923 }, { "epoch": 0.2804976882499547, "grad_norm": 0.78515625, "learning_rate": 0.00018533222868006606, "loss": 0.9498, "step": 10924 }, { "epoch": 0.28052336544587647, "grad_norm": 0.8125, "learning_rate": 0.00018532990103256326, "loss": 0.8365, "step": 10925 }, { "epoch": 0.2805490426417983, "grad_norm": 0.8046875, "learning_rate": 0.0001853275732150057, "loss": 1.0214, "step": 10926 }, { "epoch": 0.28057471983772014, "grad_norm": 0.80078125, "learning_rate": 0.00018532524522739798, "loss": 1.0398, "step": 10927 }, { "epoch": 0.2806003970336419, "grad_norm": 0.8671875, "learning_rate": 0.00018532291706974477, "loss": 1.124, "step": 10928 }, { "epoch": 0.28062607422956376, "grad_norm": 0.75, "learning_rate": 0.00018532058874205066, "loss": 0.9768, "step": 10929 }, { "epoch": 0.2806517514254856, "grad_norm": 0.8125, "learning_rate": 0.00018531826024432036, "loss": 1.0735, "step": 10930 }, { "epoch": 0.2806774286214074, "grad_norm": 0.75, "learning_rate": 0.00018531593157655843, "loss": 0.9077, "step": 10931 }, { "epoch": 0.2807031058173292, "grad_norm": 0.9765625, "learning_rate": 0.00018531360273876959, "loss": 0.9717, "step": 10932 }, { "epoch": 0.28072878301325105, "grad_norm": 0.828125, "learning_rate": 0.00018531127373095843, "loss": 1.0236, "step": 10933 }, { "epoch": 0.2807544602091729, "grad_norm": 0.8046875, "learning_rate": 0.0001853089445531296, "loss": 0.9572, "step": 10934 }, { "epoch": 0.28078013740509467, "grad_norm": 0.88671875, "learning_rate": 0.0001853066152052878, "loss": 1.1016, "step": 10935 }, { "epoch": 0.2808058146010165, "grad_norm": 0.72265625, "learning_rate": 0.0001853042856874376, "loss": 0.8618, "step": 10936 }, { "epoch": 0.28083149179693834, "grad_norm": 0.83984375, "learning_rate": 0.0001853019559995836, "loss": 1.0328, "step": 10937 }, { "epoch": 0.2808571689928601, "grad_norm": 0.88671875, "learning_rate": 0.00018529962614173058, "loss": 0.9229, "step": 10938 }, { "epoch": 0.28088284618878195, "grad_norm": 0.80859375, "learning_rate": 0.00018529729611388307, "loss": 1.0235, "step": 10939 }, { "epoch": 0.2809085233847038, "grad_norm": 0.83203125, "learning_rate": 0.0001852949659160458, "loss": 0.9333, "step": 10940 }, { "epoch": 0.28093420058062557, "grad_norm": 1.046875, "learning_rate": 0.0001852926355482233, "loss": 0.9072, "step": 10941 }, { "epoch": 0.2809598777765474, "grad_norm": 1.1796875, "learning_rate": 0.00018529030501042036, "loss": 1.0616, "step": 10942 }, { "epoch": 0.28098555497246924, "grad_norm": 0.76171875, "learning_rate": 0.00018528797430264148, "loss": 0.9377, "step": 10943 }, { "epoch": 0.2810112321683911, "grad_norm": 0.80859375, "learning_rate": 0.0001852856434248914, "loss": 1.0318, "step": 10944 }, { "epoch": 0.28103690936431286, "grad_norm": 0.7578125, "learning_rate": 0.00018528331237717473, "loss": 0.8276, "step": 10945 }, { "epoch": 0.2810625865602347, "grad_norm": 0.7734375, "learning_rate": 0.0001852809811594961, "loss": 0.9135, "step": 10946 }, { "epoch": 0.28108826375615653, "grad_norm": 0.796875, "learning_rate": 0.00018527864977186024, "loss": 1.0481, "step": 10947 }, { "epoch": 0.2811139409520783, "grad_norm": 0.85546875, "learning_rate": 0.0001852763182142717, "loss": 0.966, "step": 10948 }, { "epoch": 0.28113961814800015, "grad_norm": 0.7421875, "learning_rate": 0.00018527398648673514, "loss": 0.9392, "step": 10949 }, { "epoch": 0.281165295343922, "grad_norm": 0.76171875, "learning_rate": 0.00018527165458925526, "loss": 0.8943, "step": 10950 }, { "epoch": 0.28119097253984376, "grad_norm": 0.88671875, "learning_rate": 0.00018526932252183666, "loss": 1.1684, "step": 10951 }, { "epoch": 0.2812166497357656, "grad_norm": 0.76953125, "learning_rate": 0.000185266990284484, "loss": 0.9568, "step": 10952 }, { "epoch": 0.28124232693168744, "grad_norm": 0.94140625, "learning_rate": 0.00018526465787720194, "loss": 1.0675, "step": 10953 }, { "epoch": 0.2812680041276092, "grad_norm": 0.80078125, "learning_rate": 0.00018526232529999511, "loss": 0.8694, "step": 10954 }, { "epoch": 0.28129368132353105, "grad_norm": 0.80078125, "learning_rate": 0.00018525999255286817, "loss": 1.0753, "step": 10955 }, { "epoch": 0.2813193585194529, "grad_norm": 0.8515625, "learning_rate": 0.00018525765963582576, "loss": 1.0978, "step": 10956 }, { "epoch": 0.2813450357153747, "grad_norm": 0.8203125, "learning_rate": 0.00018525532654887255, "loss": 0.9819, "step": 10957 }, { "epoch": 0.2813707129112965, "grad_norm": 0.8515625, "learning_rate": 0.00018525299329201316, "loss": 1.0062, "step": 10958 }, { "epoch": 0.28139639010721834, "grad_norm": 0.74609375, "learning_rate": 0.0001852506598652523, "loss": 0.8367, "step": 10959 }, { "epoch": 0.2814220673031402, "grad_norm": 0.84765625, "learning_rate": 0.0001852483262685945, "loss": 1.028, "step": 10960 }, { "epoch": 0.28144774449906196, "grad_norm": 0.7890625, "learning_rate": 0.00018524599250204453, "loss": 1.0111, "step": 10961 }, { "epoch": 0.2814734216949838, "grad_norm": 0.87890625, "learning_rate": 0.000185243658565607, "loss": 1.0497, "step": 10962 }, { "epoch": 0.28149909889090563, "grad_norm": 0.83203125, "learning_rate": 0.00018524132445928656, "loss": 1.001, "step": 10963 }, { "epoch": 0.2815247760868274, "grad_norm": 0.79296875, "learning_rate": 0.00018523899018308784, "loss": 1.1348, "step": 10964 }, { "epoch": 0.28155045328274925, "grad_norm": 0.77734375, "learning_rate": 0.0001852366557370155, "loss": 0.9298, "step": 10965 }, { "epoch": 0.2815761304786711, "grad_norm": 0.88671875, "learning_rate": 0.00018523432112107423, "loss": 1.0757, "step": 10966 }, { "epoch": 0.2816018076745929, "grad_norm": 0.79296875, "learning_rate": 0.00018523198633526867, "loss": 0.9636, "step": 10967 }, { "epoch": 0.2816274848705147, "grad_norm": 0.83984375, "learning_rate": 0.00018522965137960344, "loss": 1.0647, "step": 10968 }, { "epoch": 0.28165316206643654, "grad_norm": 0.765625, "learning_rate": 0.00018522731625408322, "loss": 0.8982, "step": 10969 }, { "epoch": 0.28167883926235837, "grad_norm": 1.0703125, "learning_rate": 0.00018522498095871264, "loss": 0.9929, "step": 10970 }, { "epoch": 0.28170451645828015, "grad_norm": 0.99609375, "learning_rate": 0.0001852226454934964, "loss": 1.0305, "step": 10971 }, { "epoch": 0.281730193654202, "grad_norm": 0.79296875, "learning_rate": 0.0001852203098584391, "loss": 1.0324, "step": 10972 }, { "epoch": 0.2817558708501238, "grad_norm": 0.796875, "learning_rate": 0.00018521797405354545, "loss": 0.9399, "step": 10973 }, { "epoch": 0.2817815480460456, "grad_norm": 0.78125, "learning_rate": 0.00018521563807882005, "loss": 0.9737, "step": 10974 }, { "epoch": 0.28180722524196744, "grad_norm": 0.8046875, "learning_rate": 0.0001852133019342676, "loss": 1.034, "step": 10975 }, { "epoch": 0.2818329024378893, "grad_norm": 0.77734375, "learning_rate": 0.00018521096561989272, "loss": 0.9069, "step": 10976 }, { "epoch": 0.2818585796338111, "grad_norm": 0.984375, "learning_rate": 0.00018520862913570007, "loss": 1.006, "step": 10977 }, { "epoch": 0.2818842568297329, "grad_norm": 0.84765625, "learning_rate": 0.00018520629248169434, "loss": 0.9815, "step": 10978 }, { "epoch": 0.28190993402565473, "grad_norm": 0.76953125, "learning_rate": 0.0001852039556578802, "loss": 0.954, "step": 10979 }, { "epoch": 0.28193561122157657, "grad_norm": 0.84765625, "learning_rate": 0.0001852016186642622, "loss": 0.9363, "step": 10980 }, { "epoch": 0.28196128841749835, "grad_norm": 0.8125, "learning_rate": 0.0001851992815008451, "loss": 0.9475, "step": 10981 }, { "epoch": 0.2819869656134202, "grad_norm": 0.87109375, "learning_rate": 0.00018519694416763353, "loss": 1.1088, "step": 10982 }, { "epoch": 0.282012642809342, "grad_norm": 0.79296875, "learning_rate": 0.00018519460666463217, "loss": 1.1135, "step": 10983 }, { "epoch": 0.2820383200052638, "grad_norm": 0.99609375, "learning_rate": 0.00018519226899184563, "loss": 0.9965, "step": 10984 }, { "epoch": 0.28206399720118563, "grad_norm": 0.83203125, "learning_rate": 0.00018518993114927858, "loss": 0.8855, "step": 10985 }, { "epoch": 0.28208967439710747, "grad_norm": 0.73828125, "learning_rate": 0.0001851875931369357, "loss": 0.9784, "step": 10986 }, { "epoch": 0.2821153515930293, "grad_norm": 0.78515625, "learning_rate": 0.0001851852549548217, "loss": 0.9724, "step": 10987 }, { "epoch": 0.2821410287889511, "grad_norm": 0.8515625, "learning_rate": 0.0001851829166029411, "loss": 0.879, "step": 10988 }, { "epoch": 0.2821667059848729, "grad_norm": 0.74609375, "learning_rate": 0.0001851805780812987, "loss": 0.8893, "step": 10989 }, { "epoch": 0.28219238318079476, "grad_norm": 0.73828125, "learning_rate": 0.00018517823938989905, "loss": 0.9413, "step": 10990 }, { "epoch": 0.28221806037671654, "grad_norm": 0.703125, "learning_rate": 0.0001851759005287469, "loss": 0.8968, "step": 10991 }, { "epoch": 0.2822437375726384, "grad_norm": 0.796875, "learning_rate": 0.00018517356149784687, "loss": 0.9355, "step": 10992 }, { "epoch": 0.2822694147685602, "grad_norm": 0.8671875, "learning_rate": 0.0001851712222972036, "loss": 0.8836, "step": 10993 }, { "epoch": 0.282295091964482, "grad_norm": 1.0078125, "learning_rate": 0.00018516888292682178, "loss": 1.066, "step": 10994 }, { "epoch": 0.28232076916040383, "grad_norm": 0.7890625, "learning_rate": 0.0001851665433867061, "loss": 0.8868, "step": 10995 }, { "epoch": 0.28234644635632566, "grad_norm": 0.796875, "learning_rate": 0.0001851642036768612, "loss": 1.0151, "step": 10996 }, { "epoch": 0.2823721235522475, "grad_norm": 0.73046875, "learning_rate": 0.0001851618637972917, "loss": 1.0137, "step": 10997 }, { "epoch": 0.2823978007481693, "grad_norm": 0.7265625, "learning_rate": 0.0001851595237480023, "loss": 0.9635, "step": 10998 }, { "epoch": 0.2824234779440911, "grad_norm": 0.73828125, "learning_rate": 0.00018515718352899772, "loss": 1.0394, "step": 10999 }, { "epoch": 0.28244915514001295, "grad_norm": 0.8203125, "learning_rate": 0.00018515484314028248, "loss": 0.9893, "step": 11000 }, { "epoch": 0.28244915514001295, "eval_loss": 0.9871212840080261, "eval_model_preparation_time": 0.0065, "eval_runtime": 408.083, "eval_samples_per_second": 24.505, "eval_steps_per_second": 0.767, "step": 11000 }, { "epoch": 0.28247483233593473, "grad_norm": 0.82421875, "learning_rate": 0.0001851525025818614, "loss": 1.0285, "step": 11001 }, { "epoch": 0.28250050953185657, "grad_norm": 0.78125, "learning_rate": 0.00018515016185373902, "loss": 0.888, "step": 11002 }, { "epoch": 0.2825261867277784, "grad_norm": 0.71875, "learning_rate": 0.0001851478209559201, "loss": 0.8644, "step": 11003 }, { "epoch": 0.2825518639237002, "grad_norm": 0.7890625, "learning_rate": 0.00018514547988840923, "loss": 1.0262, "step": 11004 }, { "epoch": 0.282577541119622, "grad_norm": 0.8046875, "learning_rate": 0.00018514313865121114, "loss": 1.0273, "step": 11005 }, { "epoch": 0.28260321831554386, "grad_norm": 0.8125, "learning_rate": 0.00018514079724433045, "loss": 1.0615, "step": 11006 }, { "epoch": 0.2826288955114657, "grad_norm": 0.7890625, "learning_rate": 0.00018513845566777183, "loss": 0.9902, "step": 11007 }, { "epoch": 0.2826545727073875, "grad_norm": 0.80859375, "learning_rate": 0.00018513611392153998, "loss": 0.9625, "step": 11008 }, { "epoch": 0.2826802499033093, "grad_norm": 0.90234375, "learning_rate": 0.00018513377200563954, "loss": 1.0393, "step": 11009 }, { "epoch": 0.28270592709923115, "grad_norm": 0.83203125, "learning_rate": 0.00018513142992007518, "loss": 1.0707, "step": 11010 }, { "epoch": 0.2827316042951529, "grad_norm": 0.80078125, "learning_rate": 0.00018512908766485158, "loss": 0.9859, "step": 11011 }, { "epoch": 0.28275728149107476, "grad_norm": 0.7734375, "learning_rate": 0.00018512674523997339, "loss": 1.078, "step": 11012 }, { "epoch": 0.2827829586869966, "grad_norm": 0.78515625, "learning_rate": 0.00018512440264544528, "loss": 1.0955, "step": 11013 }, { "epoch": 0.2828086358829184, "grad_norm": 0.71875, "learning_rate": 0.00018512205988127192, "loss": 0.8632, "step": 11014 }, { "epoch": 0.2828343130788402, "grad_norm": 0.7578125, "learning_rate": 0.00018511971694745797, "loss": 0.9825, "step": 11015 }, { "epoch": 0.28285999027476205, "grad_norm": 0.77734375, "learning_rate": 0.00018511737384400813, "loss": 0.8986, "step": 11016 }, { "epoch": 0.2828856674706839, "grad_norm": 0.74609375, "learning_rate": 0.0001851150305709271, "loss": 1.0257, "step": 11017 }, { "epoch": 0.28291134466660567, "grad_norm": 0.8046875, "learning_rate": 0.00018511268712821942, "loss": 0.9358, "step": 11018 }, { "epoch": 0.2829370218625275, "grad_norm": 0.71484375, "learning_rate": 0.00018511034351588988, "loss": 1.0402, "step": 11019 }, { "epoch": 0.28296269905844934, "grad_norm": 0.77734375, "learning_rate": 0.00018510799973394315, "loss": 1.1053, "step": 11020 }, { "epoch": 0.2829883762543711, "grad_norm": 0.78125, "learning_rate": 0.00018510565578238382, "loss": 1.0536, "step": 11021 }, { "epoch": 0.28301405345029296, "grad_norm": 0.75, "learning_rate": 0.00018510331166121663, "loss": 0.9573, "step": 11022 }, { "epoch": 0.2830397306462148, "grad_norm": 0.8828125, "learning_rate": 0.0001851009673704462, "loss": 1.0108, "step": 11023 }, { "epoch": 0.2830654078421366, "grad_norm": 0.79296875, "learning_rate": 0.00018509862291007725, "loss": 0.9993, "step": 11024 }, { "epoch": 0.2830910850380584, "grad_norm": 0.7578125, "learning_rate": 0.0001850962782801144, "loss": 0.9745, "step": 11025 }, { "epoch": 0.28311676223398025, "grad_norm": 0.80078125, "learning_rate": 0.00018509393348056238, "loss": 0.9195, "step": 11026 }, { "epoch": 0.2831424394299021, "grad_norm": 0.80078125, "learning_rate": 0.00018509158851142584, "loss": 0.9071, "step": 11027 }, { "epoch": 0.28316811662582386, "grad_norm": 0.80859375, "learning_rate": 0.00018508924337270945, "loss": 0.976, "step": 11028 }, { "epoch": 0.2831937938217457, "grad_norm": 0.78515625, "learning_rate": 0.00018508689806441789, "loss": 1.0701, "step": 11029 }, { "epoch": 0.28321947101766753, "grad_norm": 0.8046875, "learning_rate": 0.00018508455258655582, "loss": 0.8828, "step": 11030 }, { "epoch": 0.2832451482135893, "grad_norm": 1.21875, "learning_rate": 0.0001850822069391279, "loss": 1.0164, "step": 11031 }, { "epoch": 0.28327082540951115, "grad_norm": 0.78515625, "learning_rate": 0.00018507986112213884, "loss": 0.9872, "step": 11032 }, { "epoch": 0.283296502605433, "grad_norm": 0.796875, "learning_rate": 0.00018507751513559332, "loss": 0.9029, "step": 11033 }, { "epoch": 0.28332217980135477, "grad_norm": 0.8515625, "learning_rate": 0.000185075168979496, "loss": 0.957, "step": 11034 }, { "epoch": 0.2833478569972766, "grad_norm": 0.84375, "learning_rate": 0.00018507282265385153, "loss": 0.9484, "step": 11035 }, { "epoch": 0.28337353419319844, "grad_norm": 0.7421875, "learning_rate": 0.00018507047615866462, "loss": 0.932, "step": 11036 }, { "epoch": 0.2833992113891203, "grad_norm": 0.83203125, "learning_rate": 0.00018506812949393994, "loss": 0.9728, "step": 11037 }, { "epoch": 0.28342488858504206, "grad_norm": 0.83984375, "learning_rate": 0.00018506578265968214, "loss": 0.9813, "step": 11038 }, { "epoch": 0.2834505657809639, "grad_norm": 0.65625, "learning_rate": 0.00018506343565589595, "loss": 0.8701, "step": 11039 }, { "epoch": 0.28347624297688573, "grad_norm": 0.9609375, "learning_rate": 0.00018506108848258599, "loss": 1.0239, "step": 11040 }, { "epoch": 0.2835019201728075, "grad_norm": 0.8046875, "learning_rate": 0.00018505874113975698, "loss": 0.9645, "step": 11041 }, { "epoch": 0.28352759736872934, "grad_norm": 0.91796875, "learning_rate": 0.00018505639362741358, "loss": 0.9968, "step": 11042 }, { "epoch": 0.2835532745646512, "grad_norm": 0.7578125, "learning_rate": 0.0001850540459455605, "loss": 0.9187, "step": 11043 }, { "epoch": 0.28357895176057296, "grad_norm": 0.7890625, "learning_rate": 0.00018505169809420233, "loss": 0.9163, "step": 11044 }, { "epoch": 0.2836046289564948, "grad_norm": 0.75390625, "learning_rate": 0.00018504935007334383, "loss": 0.9143, "step": 11045 }, { "epoch": 0.28363030615241663, "grad_norm": 0.8984375, "learning_rate": 0.00018504700188298967, "loss": 0.9153, "step": 11046 }, { "epoch": 0.28365598334833847, "grad_norm": 0.76953125, "learning_rate": 0.0001850446535231445, "loss": 0.9854, "step": 11047 }, { "epoch": 0.28368166054426025, "grad_norm": 0.76953125, "learning_rate": 0.00018504230499381304, "loss": 0.8207, "step": 11048 }, { "epoch": 0.2837073377401821, "grad_norm": 0.81640625, "learning_rate": 0.00018503995629499994, "loss": 1.1063, "step": 11049 }, { "epoch": 0.2837330149361039, "grad_norm": 0.75390625, "learning_rate": 0.00018503760742670988, "loss": 0.8579, "step": 11050 }, { "epoch": 0.2837586921320257, "grad_norm": 0.78515625, "learning_rate": 0.00018503525838894756, "loss": 0.8897, "step": 11051 }, { "epoch": 0.28378436932794754, "grad_norm": 0.89453125, "learning_rate": 0.00018503290918171766, "loss": 0.9122, "step": 11052 }, { "epoch": 0.2838100465238694, "grad_norm": 0.74609375, "learning_rate": 0.00018503055980502483, "loss": 0.8441, "step": 11053 }, { "epoch": 0.28383572371979116, "grad_norm": 0.8828125, "learning_rate": 0.0001850282102588738, "loss": 1.0557, "step": 11054 }, { "epoch": 0.283861400915713, "grad_norm": 0.88671875, "learning_rate": 0.00018502586054326917, "loss": 1.015, "step": 11055 }, { "epoch": 0.2838870781116348, "grad_norm": 0.74609375, "learning_rate": 0.00018502351065821575, "loss": 1.0172, "step": 11056 }, { "epoch": 0.28391275530755666, "grad_norm": 0.7734375, "learning_rate": 0.00018502116060371813, "loss": 0.8735, "step": 11057 }, { "epoch": 0.28393843250347844, "grad_norm": 0.8671875, "learning_rate": 0.000185018810379781, "loss": 1.0216, "step": 11058 }, { "epoch": 0.2839641096994003, "grad_norm": 0.8359375, "learning_rate": 0.00018501645998640908, "loss": 0.938, "step": 11059 }, { "epoch": 0.2839897868953221, "grad_norm": 0.82421875, "learning_rate": 0.00018501410942360703, "loss": 1.0765, "step": 11060 }, { "epoch": 0.2840154640912439, "grad_norm": 0.8203125, "learning_rate": 0.00018501175869137953, "loss": 1.0425, "step": 11061 }, { "epoch": 0.28404114128716573, "grad_norm": 0.9140625, "learning_rate": 0.0001850094077897313, "loss": 0.9423, "step": 11062 }, { "epoch": 0.28406681848308757, "grad_norm": 0.7265625, "learning_rate": 0.00018500705671866695, "loss": 0.8829, "step": 11063 }, { "epoch": 0.28409249567900935, "grad_norm": 0.83984375, "learning_rate": 0.00018500470547819125, "loss": 1.011, "step": 11064 }, { "epoch": 0.2841181728749312, "grad_norm": 0.8359375, "learning_rate": 0.00018500235406830885, "loss": 0.9631, "step": 11065 }, { "epoch": 0.284143850070853, "grad_norm": 0.8125, "learning_rate": 0.00018500000248902442, "loss": 0.8873, "step": 11066 }, { "epoch": 0.28416952726677486, "grad_norm": 0.7578125, "learning_rate": 0.00018499765074034267, "loss": 0.9266, "step": 11067 }, { "epoch": 0.28419520446269664, "grad_norm": 0.73828125, "learning_rate": 0.00018499529882226828, "loss": 0.8926, "step": 11068 }, { "epoch": 0.2842208816586185, "grad_norm": 0.7890625, "learning_rate": 0.00018499294673480598, "loss": 1.1132, "step": 11069 }, { "epoch": 0.2842465588545403, "grad_norm": 0.76953125, "learning_rate": 0.00018499059447796038, "loss": 0.9794, "step": 11070 }, { "epoch": 0.2842722360504621, "grad_norm": 0.77734375, "learning_rate": 0.00018498824205173618, "loss": 1.0286, "step": 11071 }, { "epoch": 0.2842979132463839, "grad_norm": 0.765625, "learning_rate": 0.00018498588945613811, "loss": 0.9564, "step": 11072 }, { "epoch": 0.28432359044230576, "grad_norm": 0.7734375, "learning_rate": 0.00018498353669117084, "loss": 1.0071, "step": 11073 }, { "epoch": 0.28434926763822754, "grad_norm": 0.7734375, "learning_rate": 0.00018498118375683906, "loss": 0.8916, "step": 11074 }, { "epoch": 0.2843749448341494, "grad_norm": 0.796875, "learning_rate": 0.00018497883065314745, "loss": 0.9372, "step": 11075 }, { "epoch": 0.2844006220300712, "grad_norm": 0.83203125, "learning_rate": 0.00018497647738010073, "loss": 1.0056, "step": 11076 }, { "epoch": 0.28442629922599305, "grad_norm": 0.82421875, "learning_rate": 0.00018497412393770355, "loss": 1.0038, "step": 11077 }, { "epoch": 0.28445197642191483, "grad_norm": 0.82421875, "learning_rate": 0.00018497177032596057, "loss": 1.032, "step": 11078 }, { "epoch": 0.28447765361783667, "grad_norm": 0.78125, "learning_rate": 0.00018496941654487658, "loss": 1.0595, "step": 11079 }, { "epoch": 0.2845033308137585, "grad_norm": 0.78125, "learning_rate": 0.00018496706259445622, "loss": 1.0004, "step": 11080 }, { "epoch": 0.2845290080096803, "grad_norm": 0.73046875, "learning_rate": 0.00018496470847470417, "loss": 1.0502, "step": 11081 }, { "epoch": 0.2845546852056021, "grad_norm": 0.8203125, "learning_rate": 0.00018496235418562513, "loss": 0.9811, "step": 11082 }, { "epoch": 0.28458036240152396, "grad_norm": 0.78515625, "learning_rate": 0.0001849599997272238, "loss": 0.8694, "step": 11083 }, { "epoch": 0.28460603959744574, "grad_norm": 0.71484375, "learning_rate": 0.00018495764509950485, "loss": 0.8764, "step": 11084 }, { "epoch": 0.2846317167933676, "grad_norm": 0.80859375, "learning_rate": 0.00018495529030247297, "loss": 0.9594, "step": 11085 }, { "epoch": 0.2846573939892894, "grad_norm": 0.8515625, "learning_rate": 0.00018495293533613293, "loss": 0.9906, "step": 11086 }, { "epoch": 0.28468307118521124, "grad_norm": 0.8359375, "learning_rate": 0.00018495058020048932, "loss": 1.1128, "step": 11087 }, { "epoch": 0.284708748381133, "grad_norm": 0.75390625, "learning_rate": 0.0001849482248955469, "loss": 1.0004, "step": 11088 }, { "epoch": 0.28473442557705486, "grad_norm": 0.796875, "learning_rate": 0.0001849458694213103, "loss": 1.1046, "step": 11089 }, { "epoch": 0.2847601027729767, "grad_norm": 0.80859375, "learning_rate": 0.00018494351377778428, "loss": 1.0283, "step": 11090 }, { "epoch": 0.2847857799688985, "grad_norm": 0.8984375, "learning_rate": 0.00018494115796497353, "loss": 1.1147, "step": 11091 }, { "epoch": 0.2848114571648203, "grad_norm": 0.74609375, "learning_rate": 0.0001849388019828827, "loss": 0.8648, "step": 11092 }, { "epoch": 0.28483713436074215, "grad_norm": 0.8203125, "learning_rate": 0.00018493644583151653, "loss": 1.1409, "step": 11093 }, { "epoch": 0.28486281155666393, "grad_norm": 0.8125, "learning_rate": 0.00018493408951087968, "loss": 0.9487, "step": 11094 }, { "epoch": 0.28488848875258577, "grad_norm": 0.71484375, "learning_rate": 0.00018493173302097687, "loss": 0.8628, "step": 11095 }, { "epoch": 0.2849141659485076, "grad_norm": 0.75390625, "learning_rate": 0.00018492937636181277, "loss": 1.0118, "step": 11096 }, { "epoch": 0.28493984314442944, "grad_norm": 0.7265625, "learning_rate": 0.00018492701953339212, "loss": 0.9366, "step": 11097 }, { "epoch": 0.2849655203403512, "grad_norm": 0.8046875, "learning_rate": 0.00018492466253571956, "loss": 0.9999, "step": 11098 }, { "epoch": 0.28499119753627306, "grad_norm": 0.76953125, "learning_rate": 0.00018492230536879984, "loss": 0.9218, "step": 11099 }, { "epoch": 0.2850168747321949, "grad_norm": 0.78515625, "learning_rate": 0.00018491994803263763, "loss": 0.9106, "step": 11100 }, { "epoch": 0.28504255192811667, "grad_norm": 0.75, "learning_rate": 0.00018491759052723764, "loss": 0.9124, "step": 11101 }, { "epoch": 0.2850682291240385, "grad_norm": 0.80078125, "learning_rate": 0.00018491523285260456, "loss": 0.9439, "step": 11102 }, { "epoch": 0.28509390631996034, "grad_norm": 0.78515625, "learning_rate": 0.00018491287500874308, "loss": 1.0362, "step": 11103 }, { "epoch": 0.2851195835158821, "grad_norm": 0.8359375, "learning_rate": 0.00018491051699565794, "loss": 0.8797, "step": 11104 }, { "epoch": 0.28514526071180396, "grad_norm": 0.89453125, "learning_rate": 0.00018490815881335378, "loss": 0.915, "step": 11105 }, { "epoch": 0.2851709379077258, "grad_norm": 0.8359375, "learning_rate": 0.00018490580046183534, "loss": 0.9279, "step": 11106 }, { "epoch": 0.28519661510364763, "grad_norm": 0.8203125, "learning_rate": 0.00018490344194110731, "loss": 0.9949, "step": 11107 }, { "epoch": 0.2852222922995694, "grad_norm": 0.8828125, "learning_rate": 0.00018490108325117438, "loss": 0.9633, "step": 11108 }, { "epoch": 0.28524796949549125, "grad_norm": 0.796875, "learning_rate": 0.0001848987243920413, "loss": 1.0671, "step": 11109 }, { "epoch": 0.2852736466914131, "grad_norm": 0.78515625, "learning_rate": 0.00018489636536371269, "loss": 1.0018, "step": 11110 }, { "epoch": 0.28529932388733487, "grad_norm": 0.75390625, "learning_rate": 0.0001848940061661933, "loss": 1.0117, "step": 11111 }, { "epoch": 0.2853250010832567, "grad_norm": 0.72265625, "learning_rate": 0.00018489164679948782, "loss": 1.0017, "step": 11112 }, { "epoch": 0.28535067827917854, "grad_norm": 0.80078125, "learning_rate": 0.00018488928726360097, "loss": 0.9443, "step": 11113 }, { "epoch": 0.2853763554751003, "grad_norm": 0.734375, "learning_rate": 0.00018488692755853743, "loss": 0.8385, "step": 11114 }, { "epoch": 0.28540203267102215, "grad_norm": 0.84375, "learning_rate": 0.00018488456768430192, "loss": 1.0293, "step": 11115 }, { "epoch": 0.285427709866944, "grad_norm": 0.83984375, "learning_rate": 0.00018488220764089914, "loss": 0.9184, "step": 11116 }, { "epoch": 0.2854533870628658, "grad_norm": 0.765625, "learning_rate": 0.00018487984742833374, "loss": 0.9589, "step": 11117 }, { "epoch": 0.2854790642587876, "grad_norm": 0.7421875, "learning_rate": 0.00018487748704661052, "loss": 0.9064, "step": 11118 }, { "epoch": 0.28550474145470944, "grad_norm": 0.796875, "learning_rate": 0.00018487512649573414, "loss": 0.9461, "step": 11119 }, { "epoch": 0.2855304186506313, "grad_norm": 1.2734375, "learning_rate": 0.00018487276577570927, "loss": 0.9069, "step": 11120 }, { "epoch": 0.28555609584655306, "grad_norm": 0.81640625, "learning_rate": 0.00018487040488654067, "loss": 0.9591, "step": 11121 }, { "epoch": 0.2855817730424749, "grad_norm": 0.859375, "learning_rate": 0.000184868043828233, "loss": 1.1077, "step": 11122 }, { "epoch": 0.28560745023839673, "grad_norm": 0.78515625, "learning_rate": 0.00018486568260079098, "loss": 0.9468, "step": 11123 }, { "epoch": 0.2856331274343185, "grad_norm": 0.8046875, "learning_rate": 0.00018486332120421932, "loss": 1.0002, "step": 11124 }, { "epoch": 0.28565880463024035, "grad_norm": 0.66796875, "learning_rate": 0.00018486095963852274, "loss": 0.7307, "step": 11125 }, { "epoch": 0.2856844818261622, "grad_norm": 0.81640625, "learning_rate": 0.00018485859790370594, "loss": 1.0291, "step": 11126 }, { "epoch": 0.285710159022084, "grad_norm": 0.7109375, "learning_rate": 0.0001848562359997736, "loss": 0.9707, "step": 11127 }, { "epoch": 0.2857358362180058, "grad_norm": 0.9296875, "learning_rate": 0.00018485387392673045, "loss": 0.9705, "step": 11128 }, { "epoch": 0.28576151341392764, "grad_norm": 0.77734375, "learning_rate": 0.00018485151168458122, "loss": 1.0663, "step": 11129 }, { "epoch": 0.2857871906098495, "grad_norm": 0.703125, "learning_rate": 0.00018484914927333054, "loss": 0.9632, "step": 11130 }, { "epoch": 0.28581286780577125, "grad_norm": 0.80078125, "learning_rate": 0.00018484678669298323, "loss": 0.9634, "step": 11131 }, { "epoch": 0.2858385450016931, "grad_norm": 0.7578125, "learning_rate": 0.00018484442394354388, "loss": 0.9218, "step": 11132 }, { "epoch": 0.2858642221976149, "grad_norm": 0.83984375, "learning_rate": 0.00018484206102501732, "loss": 1.0834, "step": 11133 }, { "epoch": 0.2858898993935367, "grad_norm": 0.765625, "learning_rate": 0.00018483969793740815, "loss": 0.9392, "step": 11134 }, { "epoch": 0.28591557658945854, "grad_norm": 2.234375, "learning_rate": 0.00018483733468072116, "loss": 1.002, "step": 11135 }, { "epoch": 0.2859412537853804, "grad_norm": 0.84375, "learning_rate": 0.000184834971254961, "loss": 0.9483, "step": 11136 }, { "epoch": 0.2859669309813022, "grad_norm": 0.875, "learning_rate": 0.0001848326076601324, "loss": 1.1301, "step": 11137 }, { "epoch": 0.285992608177224, "grad_norm": 0.78515625, "learning_rate": 0.0001848302438962401, "loss": 0.9657, "step": 11138 }, { "epoch": 0.28601828537314583, "grad_norm": 0.85546875, "learning_rate": 0.00018482787996328877, "loss": 0.9577, "step": 11139 }, { "epoch": 0.28604396256906767, "grad_norm": 0.8671875, "learning_rate": 0.0001848255158612831, "loss": 1.1667, "step": 11140 }, { "epoch": 0.28606963976498945, "grad_norm": 0.8125, "learning_rate": 0.0001848231515902279, "loss": 0.9775, "step": 11141 }, { "epoch": 0.2860953169609113, "grad_norm": 0.73828125, "learning_rate": 0.00018482078715012778, "loss": 0.9878, "step": 11142 }, { "epoch": 0.2861209941568331, "grad_norm": 0.77734375, "learning_rate": 0.0001848184225409875, "loss": 0.937, "step": 11143 }, { "epoch": 0.2861466713527549, "grad_norm": 0.86328125, "learning_rate": 0.00018481605776281181, "loss": 1.0771, "step": 11144 }, { "epoch": 0.28617234854867674, "grad_norm": 0.8359375, "learning_rate": 0.00018481369281560535, "loss": 1.0191, "step": 11145 }, { "epoch": 0.28619802574459857, "grad_norm": 0.74609375, "learning_rate": 0.00018481132769937285, "loss": 1.0498, "step": 11146 }, { "epoch": 0.2862237029405204, "grad_norm": 0.81640625, "learning_rate": 0.00018480896241411903, "loss": 1.0238, "step": 11147 }, { "epoch": 0.2862493801364422, "grad_norm": 0.84375, "learning_rate": 0.00018480659695984864, "loss": 1.1779, "step": 11148 }, { "epoch": 0.286275057332364, "grad_norm": 0.81640625, "learning_rate": 0.00018480423133656633, "loss": 0.9537, "step": 11149 }, { "epoch": 0.28630073452828586, "grad_norm": 0.70703125, "learning_rate": 0.00018480186554427688, "loss": 1.0086, "step": 11150 }, { "epoch": 0.28632641172420764, "grad_norm": 0.8125, "learning_rate": 0.00018479949958298493, "loss": 0.9998, "step": 11151 }, { "epoch": 0.2863520889201295, "grad_norm": 0.73828125, "learning_rate": 0.00018479713345269526, "loss": 0.9785, "step": 11152 }, { "epoch": 0.2863777661160513, "grad_norm": 0.765625, "learning_rate": 0.00018479476715341258, "loss": 1.0665, "step": 11153 }, { "epoch": 0.2864034433119731, "grad_norm": 0.79296875, "learning_rate": 0.00018479240068514156, "loss": 0.9659, "step": 11154 }, { "epoch": 0.28642912050789493, "grad_norm": 0.84375, "learning_rate": 0.00018479003404788698, "loss": 0.9756, "step": 11155 }, { "epoch": 0.28645479770381677, "grad_norm": 0.82421875, "learning_rate": 0.00018478766724165348, "loss": 1.0527, "step": 11156 }, { "epoch": 0.28648047489973855, "grad_norm": 0.828125, "learning_rate": 0.00018478530026644582, "loss": 1.0886, "step": 11157 }, { "epoch": 0.2865061520956604, "grad_norm": 0.81640625, "learning_rate": 0.00018478293312226875, "loss": 0.9172, "step": 11158 }, { "epoch": 0.2865318292915822, "grad_norm": 0.8046875, "learning_rate": 0.00018478056580912693, "loss": 1.0664, "step": 11159 }, { "epoch": 0.28655750648750405, "grad_norm": 0.79296875, "learning_rate": 0.0001847781983270251, "loss": 0.8942, "step": 11160 }, { "epoch": 0.28658318368342584, "grad_norm": 0.80078125, "learning_rate": 0.00018477583067596798, "loss": 1.0095, "step": 11161 }, { "epoch": 0.28660886087934767, "grad_norm": 0.796875, "learning_rate": 0.00018477346285596032, "loss": 1.1013, "step": 11162 }, { "epoch": 0.2866345380752695, "grad_norm": 0.859375, "learning_rate": 0.00018477109486700677, "loss": 1.0942, "step": 11163 }, { "epoch": 0.2866602152711913, "grad_norm": 0.80859375, "learning_rate": 0.0001847687267091121, "loss": 0.9627, "step": 11164 }, { "epoch": 0.2866858924671131, "grad_norm": 0.75, "learning_rate": 0.000184766358382281, "loss": 0.8965, "step": 11165 }, { "epoch": 0.28671156966303496, "grad_norm": 0.7890625, "learning_rate": 0.0001847639898865182, "loss": 1.0062, "step": 11166 }, { "epoch": 0.28673724685895674, "grad_norm": 0.76171875, "learning_rate": 0.00018476162122182843, "loss": 0.9509, "step": 11167 }, { "epoch": 0.2867629240548786, "grad_norm": 0.76171875, "learning_rate": 0.00018475925238821642, "loss": 0.9004, "step": 11168 }, { "epoch": 0.2867886012508004, "grad_norm": 0.828125, "learning_rate": 0.00018475688338568687, "loss": 0.9022, "step": 11169 }, { "epoch": 0.28681427844672225, "grad_norm": 0.8046875, "learning_rate": 0.00018475451421424448, "loss": 1.0231, "step": 11170 }, { "epoch": 0.28683995564264403, "grad_norm": 0.78125, "learning_rate": 0.00018475214487389402, "loss": 0.8722, "step": 11171 }, { "epoch": 0.28686563283856586, "grad_norm": 0.75, "learning_rate": 0.0001847497753646402, "loss": 0.9206, "step": 11172 }, { "epoch": 0.2868913100344877, "grad_norm": 0.734375, "learning_rate": 0.00018474740568648774, "loss": 0.9223, "step": 11173 }, { "epoch": 0.2869169872304095, "grad_norm": 0.80859375, "learning_rate": 0.00018474503583944133, "loss": 1.0397, "step": 11174 }, { "epoch": 0.2869426644263313, "grad_norm": 0.859375, "learning_rate": 0.00018474266582350573, "loss": 1.0169, "step": 11175 }, { "epoch": 0.28696834162225315, "grad_norm": 0.8125, "learning_rate": 0.00018474029563868567, "loss": 1.0215, "step": 11176 }, { "epoch": 0.28699401881817493, "grad_norm": 0.875, "learning_rate": 0.00018473792528498584, "loss": 1.0023, "step": 11177 }, { "epoch": 0.28701969601409677, "grad_norm": 0.86328125, "learning_rate": 0.00018473555476241098, "loss": 0.9694, "step": 11178 }, { "epoch": 0.2870453732100186, "grad_norm": 0.80078125, "learning_rate": 0.0001847331840709658, "loss": 1.0422, "step": 11179 }, { "epoch": 0.28707105040594044, "grad_norm": 0.828125, "learning_rate": 0.00018473081321065507, "loss": 1.0229, "step": 11180 }, { "epoch": 0.2870967276018622, "grad_norm": 0.81640625, "learning_rate": 0.00018472844218148346, "loss": 1.0093, "step": 11181 }, { "epoch": 0.28712240479778406, "grad_norm": 0.8359375, "learning_rate": 0.0001847260709834557, "loss": 0.8936, "step": 11182 }, { "epoch": 0.2871480819937059, "grad_norm": 0.85546875, "learning_rate": 0.00018472369961657656, "loss": 0.9329, "step": 11183 }, { "epoch": 0.2871737591896277, "grad_norm": 0.76953125, "learning_rate": 0.00018472132808085073, "loss": 0.9423, "step": 11184 }, { "epoch": 0.2871994363855495, "grad_norm": 0.77734375, "learning_rate": 0.00018471895637628297, "loss": 0.9182, "step": 11185 }, { "epoch": 0.28722511358147135, "grad_norm": 0.87890625, "learning_rate": 0.00018471658450287795, "loss": 1.0883, "step": 11186 }, { "epoch": 0.28725079077739313, "grad_norm": 0.7578125, "learning_rate": 0.00018471421246064044, "loss": 1.067, "step": 11187 }, { "epoch": 0.28727646797331496, "grad_norm": 0.86328125, "learning_rate": 0.0001847118402495752, "loss": 1.0082, "step": 11188 }, { "epoch": 0.2873021451692368, "grad_norm": 0.88671875, "learning_rate": 0.00018470946786968684, "loss": 0.9668, "step": 11189 }, { "epoch": 0.28732782236515864, "grad_norm": 0.8203125, "learning_rate": 0.0001847070953209802, "loss": 0.8911, "step": 11190 }, { "epoch": 0.2873534995610804, "grad_norm": 0.78515625, "learning_rate": 0.00018470472260345998, "loss": 0.9682, "step": 11191 }, { "epoch": 0.28737917675700225, "grad_norm": 0.79296875, "learning_rate": 0.00018470234971713087, "loss": 1.0793, "step": 11192 }, { "epoch": 0.2874048539529241, "grad_norm": 0.81640625, "learning_rate": 0.00018469997666199764, "loss": 0.939, "step": 11193 }, { "epoch": 0.28743053114884587, "grad_norm": 0.84375, "learning_rate": 0.000184697603438065, "loss": 0.9661, "step": 11194 }, { "epoch": 0.2874562083447677, "grad_norm": 0.8046875, "learning_rate": 0.00018469523004533773, "loss": 1.0392, "step": 11195 }, { "epoch": 0.28748188554068954, "grad_norm": 0.76171875, "learning_rate": 0.0001846928564838205, "loss": 1.0565, "step": 11196 }, { "epoch": 0.2875075627366113, "grad_norm": 0.78125, "learning_rate": 0.00018469048275351804, "loss": 1.0059, "step": 11197 }, { "epoch": 0.28753323993253316, "grad_norm": 0.7734375, "learning_rate": 0.0001846881088544351, "loss": 0.8981, "step": 11198 }, { "epoch": 0.287558917128455, "grad_norm": 0.74609375, "learning_rate": 0.0001846857347865764, "loss": 0.893, "step": 11199 }, { "epoch": 0.28758459432437683, "grad_norm": 0.76953125, "learning_rate": 0.0001846833605499467, "loss": 0.9801, "step": 11200 }, { "epoch": 0.2876102715202986, "grad_norm": 0.81640625, "learning_rate": 0.0001846809861445507, "loss": 0.977, "step": 11201 }, { "epoch": 0.28763594871622045, "grad_norm": 0.80078125, "learning_rate": 0.00018467861157039312, "loss": 0.8504, "step": 11202 }, { "epoch": 0.2876616259121423, "grad_norm": 0.765625, "learning_rate": 0.00018467623682747873, "loss": 0.963, "step": 11203 }, { "epoch": 0.28768730310806406, "grad_norm": 0.73828125, "learning_rate": 0.0001846738619158123, "loss": 0.8752, "step": 11204 }, { "epoch": 0.2877129803039859, "grad_norm": 0.87109375, "learning_rate": 0.00018467148683539846, "loss": 1.0267, "step": 11205 }, { "epoch": 0.28773865749990774, "grad_norm": 0.79296875, "learning_rate": 0.000184669111586242, "loss": 1.0125, "step": 11206 }, { "epoch": 0.2877643346958295, "grad_norm": 0.8046875, "learning_rate": 0.00018466673616834766, "loss": 0.9137, "step": 11207 }, { "epoch": 0.28779001189175135, "grad_norm": 0.83984375, "learning_rate": 0.00018466436058172016, "loss": 1.101, "step": 11208 }, { "epoch": 0.2878156890876732, "grad_norm": 0.78515625, "learning_rate": 0.00018466198482636423, "loss": 1.0279, "step": 11209 }, { "epoch": 0.287841366283595, "grad_norm": 0.71484375, "learning_rate": 0.00018465960890228463, "loss": 0.8754, "step": 11210 }, { "epoch": 0.2878670434795168, "grad_norm": 0.875, "learning_rate": 0.00018465723280948606, "loss": 1.1336, "step": 11211 }, { "epoch": 0.28789272067543864, "grad_norm": 0.8125, "learning_rate": 0.00018465485654797328, "loss": 1.099, "step": 11212 }, { "epoch": 0.2879183978713605, "grad_norm": 0.78515625, "learning_rate": 0.000184652480117751, "loss": 0.8823, "step": 11213 }, { "epoch": 0.28794407506728226, "grad_norm": 0.76171875, "learning_rate": 0.00018465010351882398, "loss": 0.8842, "step": 11214 }, { "epoch": 0.2879697522632041, "grad_norm": 0.71484375, "learning_rate": 0.00018464772675119698, "loss": 0.9244, "step": 11215 }, { "epoch": 0.28799542945912593, "grad_norm": 1.0, "learning_rate": 0.00018464534981487468, "loss": 1.038, "step": 11216 }, { "epoch": 0.2880211066550477, "grad_norm": 0.828125, "learning_rate": 0.00018464297270986185, "loss": 0.9274, "step": 11217 }, { "epoch": 0.28804678385096955, "grad_norm": 0.8515625, "learning_rate": 0.0001846405954361632, "loss": 1.0016, "step": 11218 }, { "epoch": 0.2880724610468914, "grad_norm": 0.83203125, "learning_rate": 0.00018463821799378352, "loss": 1.1263, "step": 11219 }, { "epoch": 0.2880981382428132, "grad_norm": 0.8046875, "learning_rate": 0.0001846358403827275, "loss": 0.9781, "step": 11220 }, { "epoch": 0.288123815438735, "grad_norm": 0.87890625, "learning_rate": 0.00018463346260299993, "loss": 1.038, "step": 11221 }, { "epoch": 0.28814949263465683, "grad_norm": 0.79296875, "learning_rate": 0.00018463108465460547, "loss": 0.9479, "step": 11222 }, { "epoch": 0.28817516983057867, "grad_norm": 0.828125, "learning_rate": 0.00018462870653754892, "loss": 1.0517, "step": 11223 }, { "epoch": 0.28820084702650045, "grad_norm": 0.75390625, "learning_rate": 0.000184626328251835, "loss": 1.0445, "step": 11224 }, { "epoch": 0.2882265242224223, "grad_norm": 0.765625, "learning_rate": 0.00018462394979746846, "loss": 1.0725, "step": 11225 }, { "epoch": 0.2882522014183441, "grad_norm": 0.73828125, "learning_rate": 0.00018462157117445403, "loss": 1.0321, "step": 11226 }, { "epoch": 0.2882778786142659, "grad_norm": 0.79296875, "learning_rate": 0.00018461919238279642, "loss": 1.0368, "step": 11227 }, { "epoch": 0.28830355581018774, "grad_norm": 0.828125, "learning_rate": 0.00018461681342250044, "loss": 1.0727, "step": 11228 }, { "epoch": 0.2883292330061096, "grad_norm": 0.765625, "learning_rate": 0.00018461443429357077, "loss": 0.8902, "step": 11229 }, { "epoch": 0.2883549102020314, "grad_norm": 0.828125, "learning_rate": 0.0001846120549960122, "loss": 1.0177, "step": 11230 }, { "epoch": 0.2883805873979532, "grad_norm": 0.80859375, "learning_rate": 0.00018460967552982944, "loss": 0.896, "step": 11231 }, { "epoch": 0.28840626459387503, "grad_norm": 0.84375, "learning_rate": 0.00018460729589502722, "loss": 1.0843, "step": 11232 }, { "epoch": 0.28843194178979686, "grad_norm": 0.7890625, "learning_rate": 0.00018460491609161034, "loss": 0.9703, "step": 11233 }, { "epoch": 0.28845761898571864, "grad_norm": 0.76953125, "learning_rate": 0.00018460253611958348, "loss": 1.0202, "step": 11234 }, { "epoch": 0.2884832961816405, "grad_norm": 0.80859375, "learning_rate": 0.00018460015597895142, "loss": 0.9104, "step": 11235 }, { "epoch": 0.2885089733775623, "grad_norm": 0.8125, "learning_rate": 0.00018459777566971885, "loss": 0.8703, "step": 11236 }, { "epoch": 0.2885346505734841, "grad_norm": 0.734375, "learning_rate": 0.0001845953951918906, "loss": 0.9433, "step": 11237 }, { "epoch": 0.28856032776940593, "grad_norm": 0.734375, "learning_rate": 0.00018459301454547137, "loss": 0.988, "step": 11238 }, { "epoch": 0.28858600496532777, "grad_norm": 0.6796875, "learning_rate": 0.00018459063373046588, "loss": 0.9159, "step": 11239 }, { "epoch": 0.2886116821612496, "grad_norm": 0.81640625, "learning_rate": 0.0001845882527468789, "loss": 0.9833, "step": 11240 }, { "epoch": 0.2886373593571714, "grad_norm": 0.7890625, "learning_rate": 0.00018458587159471518, "loss": 1.1052, "step": 11241 }, { "epoch": 0.2886630365530932, "grad_norm": 0.74609375, "learning_rate": 0.00018458349027397945, "loss": 0.9298, "step": 11242 }, { "epoch": 0.28868871374901506, "grad_norm": 0.796875, "learning_rate": 0.00018458110878467648, "loss": 0.9327, "step": 11243 }, { "epoch": 0.28871439094493684, "grad_norm": 0.8671875, "learning_rate": 0.00018457872712681099, "loss": 1.1552, "step": 11244 }, { "epoch": 0.2887400681408587, "grad_norm": 0.82421875, "learning_rate": 0.00018457634530038775, "loss": 1.0226, "step": 11245 }, { "epoch": 0.2887657453367805, "grad_norm": 0.8671875, "learning_rate": 0.00018457396330541148, "loss": 1.1991, "step": 11246 }, { "epoch": 0.2887914225327023, "grad_norm": 0.86328125, "learning_rate": 0.00018457158114188694, "loss": 1.043, "step": 11247 }, { "epoch": 0.2888170997286241, "grad_norm": 0.81640625, "learning_rate": 0.00018456919880981886, "loss": 0.9525, "step": 11248 }, { "epoch": 0.28884277692454596, "grad_norm": 0.75, "learning_rate": 0.00018456681630921206, "loss": 0.935, "step": 11249 }, { "epoch": 0.2888684541204678, "grad_norm": 0.7578125, "learning_rate": 0.0001845644336400712, "loss": 1.0232, "step": 11250 }, { "epoch": 0.2888941313163896, "grad_norm": 0.7734375, "learning_rate": 0.00018456205080240106, "loss": 0.996, "step": 11251 }, { "epoch": 0.2889198085123114, "grad_norm": 0.78125, "learning_rate": 0.0001845596677962064, "loss": 0.9533, "step": 11252 }, { "epoch": 0.28894548570823325, "grad_norm": 0.8046875, "learning_rate": 0.00018455728462149194, "loss": 0.9104, "step": 11253 }, { "epoch": 0.28897116290415503, "grad_norm": 0.75, "learning_rate": 0.00018455490127826247, "loss": 0.8464, "step": 11254 }, { "epoch": 0.28899684010007687, "grad_norm": 0.89453125, "learning_rate": 0.0001845525177665227, "loss": 1.1301, "step": 11255 }, { "epoch": 0.2890225172959987, "grad_norm": 0.796875, "learning_rate": 0.00018455013408627741, "loss": 0.9698, "step": 11256 }, { "epoch": 0.2890481944919205, "grad_norm": 0.859375, "learning_rate": 0.00018454775023753138, "loss": 0.9686, "step": 11257 }, { "epoch": 0.2890738716878423, "grad_norm": 0.75390625, "learning_rate": 0.00018454536622028927, "loss": 0.9741, "step": 11258 }, { "epoch": 0.28909954888376416, "grad_norm": 0.78125, "learning_rate": 0.00018454298203455588, "loss": 0.9294, "step": 11259 }, { "epoch": 0.289125226079686, "grad_norm": 0.7578125, "learning_rate": 0.00018454059768033601, "loss": 0.9001, "step": 11260 }, { "epoch": 0.2891509032756078, "grad_norm": 0.80859375, "learning_rate": 0.00018453821315763433, "loss": 0.9725, "step": 11261 }, { "epoch": 0.2891765804715296, "grad_norm": 0.73828125, "learning_rate": 0.00018453582846645564, "loss": 0.9142, "step": 11262 }, { "epoch": 0.28920225766745145, "grad_norm": 0.76953125, "learning_rate": 0.00018453344360680468, "loss": 1.0668, "step": 11263 }, { "epoch": 0.2892279348633732, "grad_norm": 0.84375, "learning_rate": 0.00018453105857868619, "loss": 0.9822, "step": 11264 }, { "epoch": 0.28925361205929506, "grad_norm": 0.78125, "learning_rate": 0.00018452867338210495, "loss": 0.9623, "step": 11265 }, { "epoch": 0.2892792892552169, "grad_norm": 0.8046875, "learning_rate": 0.0001845262880170657, "loss": 1.076, "step": 11266 }, { "epoch": 0.2893049664511387, "grad_norm": 0.74609375, "learning_rate": 0.00018452390248357317, "loss": 1.0164, "step": 11267 }, { "epoch": 0.2893306436470605, "grad_norm": 0.76953125, "learning_rate": 0.00018452151678163215, "loss": 1.0045, "step": 11268 }, { "epoch": 0.28935632084298235, "grad_norm": 0.828125, "learning_rate": 0.00018451913091124738, "loss": 1.1191, "step": 11269 }, { "epoch": 0.2893819980389042, "grad_norm": 0.81640625, "learning_rate": 0.00018451674487242364, "loss": 1.0075, "step": 11270 }, { "epoch": 0.28940767523482597, "grad_norm": 0.85546875, "learning_rate": 0.00018451435866516564, "loss": 0.8958, "step": 11271 }, { "epoch": 0.2894333524307478, "grad_norm": 0.78125, "learning_rate": 0.00018451197228947815, "loss": 0.9031, "step": 11272 }, { "epoch": 0.28945902962666964, "grad_norm": 0.8046875, "learning_rate": 0.00018450958574536594, "loss": 0.8772, "step": 11273 }, { "epoch": 0.2894847068225914, "grad_norm": 0.8125, "learning_rate": 0.00018450719903283377, "loss": 0.8156, "step": 11274 }, { "epoch": 0.28951038401851326, "grad_norm": 0.85546875, "learning_rate": 0.00018450481215188637, "loss": 1.0085, "step": 11275 }, { "epoch": 0.2895360612144351, "grad_norm": 0.7421875, "learning_rate": 0.0001845024251025285, "loss": 1.033, "step": 11276 }, { "epoch": 0.2895617384103569, "grad_norm": 0.7890625, "learning_rate": 0.00018450003788476497, "loss": 0.9354, "step": 11277 }, { "epoch": 0.2895874156062787, "grad_norm": 0.80078125, "learning_rate": 0.00018449765049860048, "loss": 1.0098, "step": 11278 }, { "epoch": 0.28961309280220054, "grad_norm": 0.75390625, "learning_rate": 0.0001844952629440398, "loss": 0.8291, "step": 11279 }, { "epoch": 0.2896387699981224, "grad_norm": 0.77734375, "learning_rate": 0.0001844928752210877, "loss": 0.9777, "step": 11280 }, { "epoch": 0.28966444719404416, "grad_norm": 0.7734375, "learning_rate": 0.00018449048732974892, "loss": 0.8723, "step": 11281 }, { "epoch": 0.289690124389966, "grad_norm": 0.76171875, "learning_rate": 0.00018448809927002825, "loss": 0.8504, "step": 11282 }, { "epoch": 0.28971580158588783, "grad_norm": 1.390625, "learning_rate": 0.00018448571104193043, "loss": 0.9256, "step": 11283 }, { "epoch": 0.2897414787818096, "grad_norm": 0.83203125, "learning_rate": 0.0001844833226454602, "loss": 1.0378, "step": 11284 }, { "epoch": 0.28976715597773145, "grad_norm": 0.875, "learning_rate": 0.00018448093408062233, "loss": 1.0179, "step": 11285 }, { "epoch": 0.2897928331736533, "grad_norm": 0.765625, "learning_rate": 0.00018447854534742164, "loss": 0.9473, "step": 11286 }, { "epoch": 0.28981851036957507, "grad_norm": 0.80078125, "learning_rate": 0.0001844761564458628, "loss": 0.937, "step": 11287 }, { "epoch": 0.2898441875654969, "grad_norm": 0.75390625, "learning_rate": 0.00018447376737595062, "loss": 0.9573, "step": 11288 }, { "epoch": 0.28986986476141874, "grad_norm": 0.73828125, "learning_rate": 0.00018447137813768985, "loss": 0.9965, "step": 11289 }, { "epoch": 0.2898955419573406, "grad_norm": 0.7578125, "learning_rate": 0.00018446898873108526, "loss": 0.9032, "step": 11290 }, { "epoch": 0.28992121915326236, "grad_norm": 0.80859375, "learning_rate": 0.0001844665991561416, "loss": 0.936, "step": 11291 }, { "epoch": 0.2899468963491842, "grad_norm": 1.640625, "learning_rate": 0.00018446420941286364, "loss": 1.0897, "step": 11292 }, { "epoch": 0.289972573545106, "grad_norm": 0.80078125, "learning_rate": 0.00018446181950125613, "loss": 0.8121, "step": 11293 }, { "epoch": 0.2899982507410278, "grad_norm": 0.76953125, "learning_rate": 0.00018445942942132385, "loss": 1.057, "step": 11294 }, { "epoch": 0.29002392793694964, "grad_norm": 0.765625, "learning_rate": 0.00018445703917307156, "loss": 0.8838, "step": 11295 }, { "epoch": 0.2900496051328715, "grad_norm": 0.7578125, "learning_rate": 0.00018445464875650402, "loss": 1.0663, "step": 11296 }, { "epoch": 0.29007528232879326, "grad_norm": 0.734375, "learning_rate": 0.000184452258171626, "loss": 0.9988, "step": 11297 }, { "epoch": 0.2901009595247151, "grad_norm": 0.7890625, "learning_rate": 0.00018444986741844225, "loss": 1.0065, "step": 11298 }, { "epoch": 0.29012663672063693, "grad_norm": 0.7734375, "learning_rate": 0.00018444747649695752, "loss": 0.9816, "step": 11299 }, { "epoch": 0.29015231391655877, "grad_norm": 0.8828125, "learning_rate": 0.00018444508540717665, "loss": 1.098, "step": 11300 }, { "epoch": 0.29017799111248055, "grad_norm": 0.86328125, "learning_rate": 0.0001844426941491043, "loss": 0.9151, "step": 11301 }, { "epoch": 0.2902036683084024, "grad_norm": 0.86328125, "learning_rate": 0.0001844403027227453, "loss": 1.0008, "step": 11302 }, { "epoch": 0.2902293455043242, "grad_norm": 0.76953125, "learning_rate": 0.00018443791112810444, "loss": 0.9516, "step": 11303 }, { "epoch": 0.290255022700246, "grad_norm": 0.81640625, "learning_rate": 0.00018443551936518643, "loss": 1.1905, "step": 11304 }, { "epoch": 0.29028069989616784, "grad_norm": 0.84765625, "learning_rate": 0.00018443312743399604, "loss": 1.0079, "step": 11305 }, { "epoch": 0.2903063770920897, "grad_norm": 0.8359375, "learning_rate": 0.00018443073533453807, "loss": 1.0805, "step": 11306 }, { "epoch": 0.29033205428801145, "grad_norm": 0.78125, "learning_rate": 0.00018442834306681727, "loss": 1.0564, "step": 11307 }, { "epoch": 0.2903577314839333, "grad_norm": 0.734375, "learning_rate": 0.0001844259506308384, "loss": 0.9729, "step": 11308 }, { "epoch": 0.2903834086798551, "grad_norm": 0.8515625, "learning_rate": 0.00018442355802660623, "loss": 1.0642, "step": 11309 }, { "epoch": 0.29040908587577696, "grad_norm": 0.78515625, "learning_rate": 0.00018442116525412553, "loss": 0.9846, "step": 11310 }, { "epoch": 0.29043476307169874, "grad_norm": 0.90234375, "learning_rate": 0.00018441877231340113, "loss": 0.8901, "step": 11311 }, { "epoch": 0.2904604402676206, "grad_norm": 0.703125, "learning_rate": 0.00018441637920443767, "loss": 1.0249, "step": 11312 }, { "epoch": 0.2904861174635424, "grad_norm": 0.80859375, "learning_rate": 0.00018441398592724005, "loss": 1.1697, "step": 11313 }, { "epoch": 0.2905117946594642, "grad_norm": 1.421875, "learning_rate": 0.00018441159248181295, "loss": 1.1086, "step": 11314 }, { "epoch": 0.29053747185538603, "grad_norm": 0.8359375, "learning_rate": 0.00018440919886816116, "loss": 0.9635, "step": 11315 }, { "epoch": 0.29056314905130787, "grad_norm": 0.74609375, "learning_rate": 0.0001844068050862895, "loss": 0.9906, "step": 11316 }, { "epoch": 0.29058882624722965, "grad_norm": 0.828125, "learning_rate": 0.00018440441113620266, "loss": 0.9877, "step": 11317 }, { "epoch": 0.2906145034431515, "grad_norm": 0.88671875, "learning_rate": 0.0001844020170179055, "loss": 0.7911, "step": 11318 }, { "epoch": 0.2906401806390733, "grad_norm": 0.78515625, "learning_rate": 0.0001843996227314027, "loss": 1.0059, "step": 11319 }, { "epoch": 0.29066585783499516, "grad_norm": 0.7578125, "learning_rate": 0.00018439722827669907, "loss": 0.9644, "step": 11320 }, { "epoch": 0.29069153503091694, "grad_norm": 0.76171875, "learning_rate": 0.00018439483365379942, "loss": 0.798, "step": 11321 }, { "epoch": 0.2907172122268388, "grad_norm": 0.78515625, "learning_rate": 0.00018439243886270848, "loss": 1.0024, "step": 11322 }, { "epoch": 0.2907428894227606, "grad_norm": 0.85546875, "learning_rate": 0.00018439004390343104, "loss": 1.0498, "step": 11323 }, { "epoch": 0.2907685666186824, "grad_norm": 0.796875, "learning_rate": 0.00018438764877597185, "loss": 1.1122, "step": 11324 }, { "epoch": 0.2907942438146042, "grad_norm": 0.8828125, "learning_rate": 0.0001843852534803357, "loss": 1.082, "step": 11325 }, { "epoch": 0.29081992101052606, "grad_norm": 0.85546875, "learning_rate": 0.0001843828580165274, "loss": 1.0537, "step": 11326 }, { "epoch": 0.29084559820644784, "grad_norm": 0.84375, "learning_rate": 0.00018438046238455162, "loss": 0.9958, "step": 11327 }, { "epoch": 0.2908712754023697, "grad_norm": 0.7890625, "learning_rate": 0.00018437806658441326, "loss": 0.9549, "step": 11328 }, { "epoch": 0.2908969525982915, "grad_norm": 0.84375, "learning_rate": 0.000184375670616117, "loss": 0.9134, "step": 11329 }, { "epoch": 0.29092262979421335, "grad_norm": 0.82421875, "learning_rate": 0.00018437327447966766, "loss": 1.1241, "step": 11330 }, { "epoch": 0.29094830699013513, "grad_norm": 0.78515625, "learning_rate": 0.00018437087817507001, "loss": 0.8985, "step": 11331 }, { "epoch": 0.29097398418605697, "grad_norm": 0.80859375, "learning_rate": 0.00018436848170232882, "loss": 0.9616, "step": 11332 }, { "epoch": 0.2909996613819788, "grad_norm": 1.78125, "learning_rate": 0.00018436608506144887, "loss": 0.9506, "step": 11333 }, { "epoch": 0.2910253385779006, "grad_norm": 0.75, "learning_rate": 0.0001843636882524349, "loss": 0.9526, "step": 11334 }, { "epoch": 0.2910510157738224, "grad_norm": 0.77734375, "learning_rate": 0.00018436129127529175, "loss": 0.9093, "step": 11335 }, { "epoch": 0.29107669296974426, "grad_norm": 0.7578125, "learning_rate": 0.0001843588941300242, "loss": 0.9977, "step": 11336 }, { "epoch": 0.29110237016566604, "grad_norm": 0.77734375, "learning_rate": 0.00018435649681663692, "loss": 1.0523, "step": 11337 }, { "epoch": 0.29112804736158787, "grad_norm": 0.76171875, "learning_rate": 0.0001843540993351348, "loss": 0.8869, "step": 11338 }, { "epoch": 0.2911537245575097, "grad_norm": 0.859375, "learning_rate": 0.00018435170168552262, "loss": 0.9154, "step": 11339 }, { "epoch": 0.29117940175343154, "grad_norm": 0.7421875, "learning_rate": 0.00018434930386780504, "loss": 1.0212, "step": 11340 }, { "epoch": 0.2912050789493533, "grad_norm": 0.8046875, "learning_rate": 0.00018434690588198696, "loss": 1.0545, "step": 11341 }, { "epoch": 0.29123075614527516, "grad_norm": 0.79296875, "learning_rate": 0.00018434450772807312, "loss": 0.7989, "step": 11342 }, { "epoch": 0.291256433341197, "grad_norm": 0.82421875, "learning_rate": 0.00018434210940606826, "loss": 1.2781, "step": 11343 }, { "epoch": 0.2912821105371188, "grad_norm": 0.83984375, "learning_rate": 0.00018433971091597723, "loss": 0.8917, "step": 11344 }, { "epoch": 0.2913077877330406, "grad_norm": 0.8125, "learning_rate": 0.00018433731225780476, "loss": 1.1552, "step": 11345 }, { "epoch": 0.29133346492896245, "grad_norm": 0.72265625, "learning_rate": 0.00018433491343155565, "loss": 0.8915, "step": 11346 }, { "epoch": 0.29135914212488423, "grad_norm": 0.81640625, "learning_rate": 0.00018433251443723467, "loss": 0.8414, "step": 11347 }, { "epoch": 0.29138481932080607, "grad_norm": 0.7421875, "learning_rate": 0.00018433011527484663, "loss": 0.9701, "step": 11348 }, { "epoch": 0.2914104965167279, "grad_norm": 0.75, "learning_rate": 0.00018432771594439627, "loss": 0.9378, "step": 11349 }, { "epoch": 0.29143617371264974, "grad_norm": 0.765625, "learning_rate": 0.00018432531644588837, "loss": 1.0319, "step": 11350 }, { "epoch": 0.2914618509085715, "grad_norm": 0.77734375, "learning_rate": 0.00018432291677932775, "loss": 1.0049, "step": 11351 }, { "epoch": 0.29148752810449335, "grad_norm": 0.734375, "learning_rate": 0.00018432051694471918, "loss": 1.0344, "step": 11352 }, { "epoch": 0.2915132053004152, "grad_norm": 0.77734375, "learning_rate": 0.00018431811694206743, "loss": 1.0672, "step": 11353 }, { "epoch": 0.29153888249633697, "grad_norm": 0.76171875, "learning_rate": 0.00018431571677137728, "loss": 0.966, "step": 11354 }, { "epoch": 0.2915645596922588, "grad_norm": 0.81640625, "learning_rate": 0.00018431331643265354, "loss": 1.0224, "step": 11355 }, { "epoch": 0.29159023688818064, "grad_norm": 0.76953125, "learning_rate": 0.00018431091592590096, "loss": 0.9031, "step": 11356 }, { "epoch": 0.2916159140841024, "grad_norm": 0.796875, "learning_rate": 0.00018430851525112434, "loss": 1.0666, "step": 11357 }, { "epoch": 0.29164159128002426, "grad_norm": 0.79296875, "learning_rate": 0.00018430611440832845, "loss": 0.9093, "step": 11358 }, { "epoch": 0.2916672684759461, "grad_norm": 0.7578125, "learning_rate": 0.00018430371339751814, "loss": 0.9648, "step": 11359 }, { "epoch": 0.2916929456718679, "grad_norm": 0.78125, "learning_rate": 0.0001843013122186981, "loss": 1.0587, "step": 11360 }, { "epoch": 0.2917186228677897, "grad_norm": 0.828125, "learning_rate": 0.00018429891087187316, "loss": 0.9436, "step": 11361 }, { "epoch": 0.29174430006371155, "grad_norm": 0.7265625, "learning_rate": 0.00018429650935704813, "loss": 0.9182, "step": 11362 }, { "epoch": 0.2917699772596334, "grad_norm": 0.76953125, "learning_rate": 0.00018429410767422776, "loss": 0.968, "step": 11363 }, { "epoch": 0.29179565445555516, "grad_norm": 0.84375, "learning_rate": 0.00018429170582341686, "loss": 0.8433, "step": 11364 }, { "epoch": 0.291821331651477, "grad_norm": 0.8984375, "learning_rate": 0.00018428930380462018, "loss": 0.9727, "step": 11365 }, { "epoch": 0.29184700884739884, "grad_norm": 0.828125, "learning_rate": 0.00018428690161784256, "loss": 1.016, "step": 11366 }, { "epoch": 0.2918726860433206, "grad_norm": 0.81640625, "learning_rate": 0.00018428449926308874, "loss": 0.9594, "step": 11367 }, { "epoch": 0.29189836323924245, "grad_norm": 0.828125, "learning_rate": 0.00018428209674036354, "loss": 1.0356, "step": 11368 }, { "epoch": 0.2919240404351643, "grad_norm": 0.6953125, "learning_rate": 0.00018427969404967174, "loss": 0.9281, "step": 11369 }, { "epoch": 0.29194971763108607, "grad_norm": 0.75, "learning_rate": 0.00018427729119101807, "loss": 1.0192, "step": 11370 }, { "epoch": 0.2919753948270079, "grad_norm": 0.8046875, "learning_rate": 0.00018427488816440742, "loss": 1.0622, "step": 11371 }, { "epoch": 0.29200107202292974, "grad_norm": 0.83984375, "learning_rate": 0.00018427248496984453, "loss": 0.904, "step": 11372 }, { "epoch": 0.2920267492188516, "grad_norm": 0.7734375, "learning_rate": 0.00018427008160733417, "loss": 0.8777, "step": 11373 }, { "epoch": 0.29205242641477336, "grad_norm": 0.78125, "learning_rate": 0.00018426767807688116, "loss": 1.0688, "step": 11374 }, { "epoch": 0.2920781036106952, "grad_norm": 0.78515625, "learning_rate": 0.00018426527437849028, "loss": 1.0021, "step": 11375 }, { "epoch": 0.29210378080661703, "grad_norm": 0.80859375, "learning_rate": 0.00018426287051216632, "loss": 0.8625, "step": 11376 }, { "epoch": 0.2921294580025388, "grad_norm": 0.77734375, "learning_rate": 0.00018426046647791405, "loss": 0.9802, "step": 11377 }, { "epoch": 0.29215513519846065, "grad_norm": 0.84375, "learning_rate": 0.00018425806227573833, "loss": 0.9755, "step": 11378 }, { "epoch": 0.2921808123943825, "grad_norm": 0.80078125, "learning_rate": 0.00018425565790564384, "loss": 0.9305, "step": 11379 }, { "epoch": 0.29220648959030426, "grad_norm": 0.80859375, "learning_rate": 0.0001842532533676355, "loss": 0.8743, "step": 11380 }, { "epoch": 0.2922321667862261, "grad_norm": 0.7578125, "learning_rate": 0.000184250848661718, "loss": 1.0871, "step": 11381 }, { "epoch": 0.29225784398214794, "grad_norm": 0.79296875, "learning_rate": 0.00018424844378789616, "loss": 0.8893, "step": 11382 }, { "epoch": 0.29228352117806977, "grad_norm": 0.80078125, "learning_rate": 0.0001842460387461748, "loss": 0.9909, "step": 11383 }, { "epoch": 0.29230919837399155, "grad_norm": 0.72265625, "learning_rate": 0.0001842436335365587, "loss": 0.8441, "step": 11384 }, { "epoch": 0.2923348755699134, "grad_norm": 0.8125, "learning_rate": 0.00018424122815905263, "loss": 0.9816, "step": 11385 }, { "epoch": 0.2923605527658352, "grad_norm": 0.86328125, "learning_rate": 0.0001842388226136614, "loss": 1.0494, "step": 11386 }, { "epoch": 0.292386229961757, "grad_norm": 0.94921875, "learning_rate": 0.0001842364169003898, "loss": 1.0824, "step": 11387 }, { "epoch": 0.29241190715767884, "grad_norm": 0.859375, "learning_rate": 0.00018423401101924267, "loss": 1.0551, "step": 11388 }, { "epoch": 0.2924375843536007, "grad_norm": 1.03125, "learning_rate": 0.00018423160497022475, "loss": 0.995, "step": 11389 }, { "epoch": 0.29246326154952246, "grad_norm": 0.78515625, "learning_rate": 0.00018422919875334083, "loss": 1.0529, "step": 11390 }, { "epoch": 0.2924889387454443, "grad_norm": 0.84765625, "learning_rate": 0.0001842267923685957, "loss": 1.1055, "step": 11391 }, { "epoch": 0.29251461594136613, "grad_norm": 0.80078125, "learning_rate": 0.00018422438581599425, "loss": 0.9276, "step": 11392 }, { "epoch": 0.29254029313728797, "grad_norm": 0.75390625, "learning_rate": 0.00018422197909554114, "loss": 0.9277, "step": 11393 }, { "epoch": 0.29256597033320975, "grad_norm": 0.79296875, "learning_rate": 0.00018421957220724129, "loss": 0.7632, "step": 11394 }, { "epoch": 0.2925916475291316, "grad_norm": 0.78515625, "learning_rate": 0.00018421716515109942, "loss": 0.9495, "step": 11395 }, { "epoch": 0.2926173247250534, "grad_norm": 0.80859375, "learning_rate": 0.00018421475792712032, "loss": 0.8584, "step": 11396 }, { "epoch": 0.2926430019209752, "grad_norm": 0.859375, "learning_rate": 0.00018421235053530885, "loss": 0.9168, "step": 11397 }, { "epoch": 0.29266867911689703, "grad_norm": 0.79296875, "learning_rate": 0.00018420994297566976, "loss": 0.928, "step": 11398 }, { "epoch": 0.29269435631281887, "grad_norm": 0.84765625, "learning_rate": 0.00018420753524820785, "loss": 0.8907, "step": 11399 }, { "epoch": 0.29272003350874065, "grad_norm": 0.78515625, "learning_rate": 0.00018420512735292796, "loss": 0.8723, "step": 11400 }, { "epoch": 0.2927457107046625, "grad_norm": 0.8515625, "learning_rate": 0.00018420271928983484, "loss": 0.8784, "step": 11401 }, { "epoch": 0.2927713879005843, "grad_norm": 0.8046875, "learning_rate": 0.00018420031105893328, "loss": 1.006, "step": 11402 }, { "epoch": 0.29279706509650616, "grad_norm": 0.75390625, "learning_rate": 0.00018419790266022815, "loss": 0.9155, "step": 11403 }, { "epoch": 0.29282274229242794, "grad_norm": 0.79296875, "learning_rate": 0.0001841954940937242, "loss": 1.1546, "step": 11404 }, { "epoch": 0.2928484194883498, "grad_norm": 1.0859375, "learning_rate": 0.0001841930853594262, "loss": 1.1254, "step": 11405 }, { "epoch": 0.2928740966842716, "grad_norm": 0.84375, "learning_rate": 0.000184190676457339, "loss": 1.0282, "step": 11406 }, { "epoch": 0.2928997738801934, "grad_norm": 0.875, "learning_rate": 0.0001841882673874674, "loss": 1.0496, "step": 11407 }, { "epoch": 0.29292545107611523, "grad_norm": 0.8515625, "learning_rate": 0.0001841858581498162, "loss": 0.8594, "step": 11408 }, { "epoch": 0.29295112827203706, "grad_norm": 0.7890625, "learning_rate": 0.00018418344874439015, "loss": 0.9611, "step": 11409 }, { "epoch": 0.29297680546795885, "grad_norm": 0.7890625, "learning_rate": 0.00018418103917119412, "loss": 0.966, "step": 11410 }, { "epoch": 0.2930024826638807, "grad_norm": 0.828125, "learning_rate": 0.00018417862943023288, "loss": 1.0956, "step": 11411 }, { "epoch": 0.2930281598598025, "grad_norm": 0.83203125, "learning_rate": 0.00018417621952151122, "loss": 0.9774, "step": 11412 }, { "epoch": 0.29305383705572435, "grad_norm": 1.84375, "learning_rate": 0.00018417380944503396, "loss": 0.985, "step": 11413 }, { "epoch": 0.29307951425164613, "grad_norm": 0.7421875, "learning_rate": 0.00018417139920080594, "loss": 0.8535, "step": 11414 }, { "epoch": 0.29310519144756797, "grad_norm": 0.74609375, "learning_rate": 0.00018416898878883186, "loss": 0.9422, "step": 11415 }, { "epoch": 0.2931308686434898, "grad_norm": 0.8125, "learning_rate": 0.0001841665782091166, "loss": 0.911, "step": 11416 }, { "epoch": 0.2931565458394116, "grad_norm": 0.72265625, "learning_rate": 0.00018416416746166499, "loss": 0.9845, "step": 11417 }, { "epoch": 0.2931822230353334, "grad_norm": 0.80859375, "learning_rate": 0.0001841617565464818, "loss": 0.8806, "step": 11418 }, { "epoch": 0.29320790023125526, "grad_norm": 0.76171875, "learning_rate": 0.00018415934546357182, "loss": 0.7721, "step": 11419 }, { "epoch": 0.29323357742717704, "grad_norm": 0.76953125, "learning_rate": 0.00018415693421293985, "loss": 0.8277, "step": 11420 }, { "epoch": 0.2932592546230989, "grad_norm": 0.8125, "learning_rate": 0.0001841545227945907, "loss": 1.1653, "step": 11421 }, { "epoch": 0.2932849318190207, "grad_norm": 0.84765625, "learning_rate": 0.0001841521112085292, "loss": 1.1003, "step": 11422 }, { "epoch": 0.29331060901494255, "grad_norm": 0.73828125, "learning_rate": 0.00018414969945476016, "loss": 0.9563, "step": 11423 }, { "epoch": 0.2933362862108643, "grad_norm": 0.82421875, "learning_rate": 0.00018414728753328837, "loss": 0.9934, "step": 11424 }, { "epoch": 0.29336196340678616, "grad_norm": 0.76171875, "learning_rate": 0.00018414487544411864, "loss": 0.9485, "step": 11425 }, { "epoch": 0.293387640602708, "grad_norm": 0.7890625, "learning_rate": 0.00018414246318725573, "loss": 1.045, "step": 11426 }, { "epoch": 0.2934133177986298, "grad_norm": 0.734375, "learning_rate": 0.00018414005076270454, "loss": 0.9718, "step": 11427 }, { "epoch": 0.2934389949945516, "grad_norm": 0.77734375, "learning_rate": 0.00018413763817046983, "loss": 1.0579, "step": 11428 }, { "epoch": 0.29346467219047345, "grad_norm": 0.87109375, "learning_rate": 0.0001841352254105564, "loss": 1.0677, "step": 11429 }, { "epoch": 0.29349034938639523, "grad_norm": 0.8125, "learning_rate": 0.00018413281248296902, "loss": 0.9983, "step": 11430 }, { "epoch": 0.29351602658231707, "grad_norm": 0.78125, "learning_rate": 0.00018413039938771259, "loss": 0.9824, "step": 11431 }, { "epoch": 0.2935417037782389, "grad_norm": 0.8828125, "learning_rate": 0.00018412798612479187, "loss": 0.9844, "step": 11432 }, { "epoch": 0.29356738097416074, "grad_norm": 0.71484375, "learning_rate": 0.00018412557269421168, "loss": 0.9529, "step": 11433 }, { "epoch": 0.2935930581700825, "grad_norm": 0.734375, "learning_rate": 0.0001841231590959768, "loss": 1.0215, "step": 11434 }, { "epoch": 0.29361873536600436, "grad_norm": 0.76953125, "learning_rate": 0.00018412074533009207, "loss": 0.9084, "step": 11435 }, { "epoch": 0.2936444125619262, "grad_norm": 0.8359375, "learning_rate": 0.0001841183313965623, "loss": 0.9463, "step": 11436 }, { "epoch": 0.293670089757848, "grad_norm": 0.76953125, "learning_rate": 0.00018411591729539228, "loss": 1.0163, "step": 11437 }, { "epoch": 0.2936957669537698, "grad_norm": 0.78515625, "learning_rate": 0.00018411350302658685, "loss": 1.0472, "step": 11438 }, { "epoch": 0.29372144414969165, "grad_norm": 0.8046875, "learning_rate": 0.0001841110885901508, "loss": 0.9479, "step": 11439 }, { "epoch": 0.2937471213456134, "grad_norm": 0.71484375, "learning_rate": 0.00018410867398608896, "loss": 1.0636, "step": 11440 }, { "epoch": 0.29377279854153526, "grad_norm": 0.75, "learning_rate": 0.0001841062592144061, "loss": 0.8938, "step": 11441 }, { "epoch": 0.2937984757374571, "grad_norm": 0.8515625, "learning_rate": 0.0001841038442751071, "loss": 0.8803, "step": 11442 }, { "epoch": 0.29382415293337893, "grad_norm": 0.8046875, "learning_rate": 0.0001841014291681967, "loss": 1.1206, "step": 11443 }, { "epoch": 0.2938498301293007, "grad_norm": 0.84765625, "learning_rate": 0.00018409901389367979, "loss": 0.9501, "step": 11444 }, { "epoch": 0.29387550732522255, "grad_norm": 0.8046875, "learning_rate": 0.00018409659845156111, "loss": 1.0067, "step": 11445 }, { "epoch": 0.2939011845211444, "grad_norm": 0.75, "learning_rate": 0.00018409418284184552, "loss": 0.8595, "step": 11446 }, { "epoch": 0.29392686171706617, "grad_norm": 0.73828125, "learning_rate": 0.0001840917670645378, "loss": 1.1174, "step": 11447 }, { "epoch": 0.293952538912988, "grad_norm": 0.8203125, "learning_rate": 0.0001840893511196428, "loss": 0.8523, "step": 11448 }, { "epoch": 0.29397821610890984, "grad_norm": 0.76953125, "learning_rate": 0.0001840869350071653, "loss": 1.0248, "step": 11449 }, { "epoch": 0.2940038933048316, "grad_norm": 0.7734375, "learning_rate": 0.00018408451872711017, "loss": 0.8944, "step": 11450 }, { "epoch": 0.29402957050075346, "grad_norm": 0.77734375, "learning_rate": 0.00018408210227948218, "loss": 0.9946, "step": 11451 }, { "epoch": 0.2940552476966753, "grad_norm": 0.87890625, "learning_rate": 0.00018407968566428612, "loss": 0.9611, "step": 11452 }, { "epoch": 0.29408092489259713, "grad_norm": 0.81640625, "learning_rate": 0.00018407726888152687, "loss": 1.1175, "step": 11453 }, { "epoch": 0.2941066020885189, "grad_norm": 0.7734375, "learning_rate": 0.00018407485193120923, "loss": 0.8521, "step": 11454 }, { "epoch": 0.29413227928444075, "grad_norm": 0.828125, "learning_rate": 0.00018407243481333797, "loss": 1.1155, "step": 11455 }, { "epoch": 0.2941579564803626, "grad_norm": 0.86328125, "learning_rate": 0.00018407001752791796, "loss": 1.0627, "step": 11456 }, { "epoch": 0.29418363367628436, "grad_norm": 0.8125, "learning_rate": 0.00018406760007495403, "loss": 1.0078, "step": 11457 }, { "epoch": 0.2942093108722062, "grad_norm": 0.83984375, "learning_rate": 0.00018406518245445093, "loss": 0.993, "step": 11458 }, { "epoch": 0.29423498806812803, "grad_norm": 0.75390625, "learning_rate": 0.00018406276466641352, "loss": 0.9414, "step": 11459 }, { "epoch": 0.2942606652640498, "grad_norm": 0.77734375, "learning_rate": 0.0001840603467108466, "loss": 0.8992, "step": 11460 }, { "epoch": 0.29428634245997165, "grad_norm": 0.8515625, "learning_rate": 0.00018405792858775503, "loss": 0.9971, "step": 11461 }, { "epoch": 0.2943120196558935, "grad_norm": 0.71484375, "learning_rate": 0.0001840555102971436, "loss": 0.9222, "step": 11462 }, { "epoch": 0.2943376968518153, "grad_norm": 0.76953125, "learning_rate": 0.0001840530918390171, "loss": 1.049, "step": 11463 }, { "epoch": 0.2943633740477371, "grad_norm": 0.7578125, "learning_rate": 0.00018405067321338042, "loss": 0.9865, "step": 11464 }, { "epoch": 0.29438905124365894, "grad_norm": 0.80078125, "learning_rate": 0.0001840482544202383, "loss": 1.0208, "step": 11465 }, { "epoch": 0.2944147284395808, "grad_norm": 0.8203125, "learning_rate": 0.0001840458354595956, "loss": 1.0743, "step": 11466 }, { "epoch": 0.29444040563550256, "grad_norm": 0.78125, "learning_rate": 0.00018404341633145718, "loss": 0.8708, "step": 11467 }, { "epoch": 0.2944660828314244, "grad_norm": 0.76953125, "learning_rate": 0.00018404099703582782, "loss": 0.8263, "step": 11468 }, { "epoch": 0.2944917600273462, "grad_norm": 0.7734375, "learning_rate": 0.00018403857757271232, "loss": 1.0023, "step": 11469 }, { "epoch": 0.294517437223268, "grad_norm": 0.87890625, "learning_rate": 0.00018403615794211552, "loss": 1.0125, "step": 11470 }, { "epoch": 0.29454311441918984, "grad_norm": 0.80859375, "learning_rate": 0.00018403373814404229, "loss": 0.9409, "step": 11471 }, { "epoch": 0.2945687916151117, "grad_norm": 0.76953125, "learning_rate": 0.00018403131817849738, "loss": 0.993, "step": 11472 }, { "epoch": 0.2945944688110335, "grad_norm": 0.8671875, "learning_rate": 0.00018402889804548567, "loss": 0.8502, "step": 11473 }, { "epoch": 0.2946201460069553, "grad_norm": 0.6953125, "learning_rate": 0.00018402647774501193, "loss": 0.9053, "step": 11474 }, { "epoch": 0.29464582320287713, "grad_norm": 0.79296875, "learning_rate": 0.000184024057277081, "loss": 0.9716, "step": 11475 }, { "epoch": 0.29467150039879897, "grad_norm": 0.74609375, "learning_rate": 0.00018402163664169773, "loss": 0.9178, "step": 11476 }, { "epoch": 0.29469717759472075, "grad_norm": 0.78515625, "learning_rate": 0.00018401921583886694, "loss": 1.045, "step": 11477 }, { "epoch": 0.2947228547906426, "grad_norm": 0.75390625, "learning_rate": 0.0001840167948685934, "loss": 1.0338, "step": 11478 }, { "epoch": 0.2947485319865644, "grad_norm": 0.76171875, "learning_rate": 0.00018401437373088202, "loss": 0.9363, "step": 11479 }, { "epoch": 0.2947742091824862, "grad_norm": 0.79296875, "learning_rate": 0.0001840119524257376, "loss": 0.9827, "step": 11480 }, { "epoch": 0.29479988637840804, "grad_norm": 0.82421875, "learning_rate": 0.00018400953095316488, "loss": 0.9309, "step": 11481 }, { "epoch": 0.2948255635743299, "grad_norm": 0.81640625, "learning_rate": 0.0001840071093131688, "loss": 0.9357, "step": 11482 }, { "epoch": 0.2948512407702517, "grad_norm": 0.78515625, "learning_rate": 0.00018400468750575413, "loss": 1.0521, "step": 11483 }, { "epoch": 0.2948769179661735, "grad_norm": 0.7890625, "learning_rate": 0.0001840022655309257, "loss": 0.835, "step": 11484 }, { "epoch": 0.2949025951620953, "grad_norm": 0.828125, "learning_rate": 0.00018399984338868835, "loss": 1.043, "step": 11485 }, { "epoch": 0.29492827235801716, "grad_norm": 0.7421875, "learning_rate": 0.00018399742107904688, "loss": 0.9569, "step": 11486 }, { "epoch": 0.29495394955393894, "grad_norm": 0.8203125, "learning_rate": 0.00018399499860200614, "loss": 1.0169, "step": 11487 }, { "epoch": 0.2949796267498608, "grad_norm": 0.8515625, "learning_rate": 0.000183992575957571, "loss": 1.1103, "step": 11488 }, { "epoch": 0.2950053039457826, "grad_norm": 0.79296875, "learning_rate": 0.0001839901531457462, "loss": 0.9251, "step": 11489 }, { "epoch": 0.2950309811417044, "grad_norm": 0.76171875, "learning_rate": 0.00018398773016653662, "loss": 0.9822, "step": 11490 }, { "epoch": 0.29505665833762623, "grad_norm": 0.734375, "learning_rate": 0.0001839853070199471, "loss": 0.8873, "step": 11491 }, { "epoch": 0.29508233553354807, "grad_norm": 0.7890625, "learning_rate": 0.00018398288370598243, "loss": 0.9142, "step": 11492 }, { "epoch": 0.2951080127294699, "grad_norm": 0.77734375, "learning_rate": 0.00018398046022464746, "loss": 1.0327, "step": 11493 }, { "epoch": 0.2951336899253917, "grad_norm": 0.8671875, "learning_rate": 0.000183978036575947, "loss": 1.0641, "step": 11494 }, { "epoch": 0.2951593671213135, "grad_norm": 0.76953125, "learning_rate": 0.00018397561275988595, "loss": 0.9094, "step": 11495 }, { "epoch": 0.29518504431723536, "grad_norm": 0.78125, "learning_rate": 0.00018397318877646902, "loss": 1.013, "step": 11496 }, { "epoch": 0.29521072151315714, "grad_norm": 0.80078125, "learning_rate": 0.00018397076462570118, "loss": 0.9031, "step": 11497 }, { "epoch": 0.295236398709079, "grad_norm": 0.86328125, "learning_rate": 0.00018396834030758713, "loss": 0.9968, "step": 11498 }, { "epoch": 0.2952620759050008, "grad_norm": 0.78125, "learning_rate": 0.0001839659158221318, "loss": 0.9748, "step": 11499 }, { "epoch": 0.2952877531009226, "grad_norm": 0.8203125, "learning_rate": 0.00018396349116934, "loss": 0.9094, "step": 11500 }, { "epoch": 0.2953134302968444, "grad_norm": 0.796875, "learning_rate": 0.00018396106634921652, "loss": 0.985, "step": 11501 }, { "epoch": 0.29533910749276626, "grad_norm": 0.8125, "learning_rate": 0.0001839586413617662, "loss": 0.8599, "step": 11502 }, { "epoch": 0.2953647846886881, "grad_norm": 0.796875, "learning_rate": 0.00018395621620699392, "loss": 0.9354, "step": 11503 }, { "epoch": 0.2953904618846099, "grad_norm": 0.74609375, "learning_rate": 0.0001839537908849045, "loss": 1.121, "step": 11504 }, { "epoch": 0.2954161390805317, "grad_norm": 0.83203125, "learning_rate": 0.00018395136539550272, "loss": 0.909, "step": 11505 }, { "epoch": 0.29544181627645355, "grad_norm": 0.8203125, "learning_rate": 0.00018394893973879348, "loss": 0.9524, "step": 11506 }, { "epoch": 0.29546749347237533, "grad_norm": 0.78125, "learning_rate": 0.00018394651391478157, "loss": 1.1266, "step": 11507 }, { "epoch": 0.29549317066829717, "grad_norm": 0.87890625, "learning_rate": 0.00018394408792347187, "loss": 0.946, "step": 11508 }, { "epoch": 0.295518847864219, "grad_norm": 0.796875, "learning_rate": 0.00018394166176486913, "loss": 1.0013, "step": 11509 }, { "epoch": 0.2955445250601408, "grad_norm": 0.88671875, "learning_rate": 0.00018393923543897828, "loss": 1.0908, "step": 11510 }, { "epoch": 0.2955702022560626, "grad_norm": 0.7578125, "learning_rate": 0.0001839368089458041, "loss": 0.8564, "step": 11511 }, { "epoch": 0.29559587945198446, "grad_norm": 0.74609375, "learning_rate": 0.00018393438228535148, "loss": 0.9957, "step": 11512 }, { "epoch": 0.2956215566479063, "grad_norm": 1.015625, "learning_rate": 0.00018393195545762518, "loss": 1.12, "step": 11513 }, { "epoch": 0.2956472338438281, "grad_norm": 0.80078125, "learning_rate": 0.00018392952846263007, "loss": 0.9274, "step": 11514 }, { "epoch": 0.2956729110397499, "grad_norm": 0.77734375, "learning_rate": 0.000183927101300371, "loss": 0.9615, "step": 11515 }, { "epoch": 0.29569858823567174, "grad_norm": 0.85546875, "learning_rate": 0.0001839246739708528, "loss": 0.9407, "step": 11516 }, { "epoch": 0.2957242654315935, "grad_norm": 0.78515625, "learning_rate": 0.00018392224647408031, "loss": 0.9327, "step": 11517 }, { "epoch": 0.29574994262751536, "grad_norm": 0.81640625, "learning_rate": 0.00018391981881005838, "loss": 0.9535, "step": 11518 }, { "epoch": 0.2957756198234372, "grad_norm": 0.74609375, "learning_rate": 0.00018391739097879182, "loss": 1.0808, "step": 11519 }, { "epoch": 0.295801297019359, "grad_norm": 0.796875, "learning_rate": 0.00018391496298028547, "loss": 0.9005, "step": 11520 }, { "epoch": 0.2958269742152808, "grad_norm": 0.75, "learning_rate": 0.00018391253481454416, "loss": 1.011, "step": 11521 }, { "epoch": 0.29585265141120265, "grad_norm": 0.80859375, "learning_rate": 0.0001839101064815728, "loss": 0.9295, "step": 11522 }, { "epoch": 0.2958783286071245, "grad_norm": 0.8515625, "learning_rate": 0.00018390767798137612, "loss": 0.9477, "step": 11523 }, { "epoch": 0.29590400580304627, "grad_norm": 0.87109375, "learning_rate": 0.00018390524931395904, "loss": 0.9868, "step": 11524 }, { "epoch": 0.2959296829989681, "grad_norm": 0.8203125, "learning_rate": 0.00018390282047932635, "loss": 0.9326, "step": 11525 }, { "epoch": 0.29595536019488994, "grad_norm": 0.8046875, "learning_rate": 0.00018390039147748297, "loss": 1.2168, "step": 11526 }, { "epoch": 0.2959810373908117, "grad_norm": 0.828125, "learning_rate": 0.00018389796230843368, "loss": 0.971, "step": 11527 }, { "epoch": 0.29600671458673355, "grad_norm": 0.77734375, "learning_rate": 0.00018389553297218331, "loss": 0.8444, "step": 11528 }, { "epoch": 0.2960323917826554, "grad_norm": 0.828125, "learning_rate": 0.00018389310346873675, "loss": 1.111, "step": 11529 }, { "epoch": 0.29605806897857717, "grad_norm": 0.765625, "learning_rate": 0.00018389067379809878, "loss": 0.8959, "step": 11530 }, { "epoch": 0.296083746174499, "grad_norm": 0.83203125, "learning_rate": 0.00018388824396027426, "loss": 0.9543, "step": 11531 }, { "epoch": 0.29610942337042084, "grad_norm": 0.796875, "learning_rate": 0.0001838858139552681, "loss": 1.1384, "step": 11532 }, { "epoch": 0.2961351005663427, "grad_norm": 0.74609375, "learning_rate": 0.00018388338378308504, "loss": 1.1333, "step": 11533 }, { "epoch": 0.29616077776226446, "grad_norm": 0.796875, "learning_rate": 0.00018388095344373, "loss": 0.916, "step": 11534 }, { "epoch": 0.2961864549581863, "grad_norm": 0.79296875, "learning_rate": 0.0001838785229372078, "loss": 0.9762, "step": 11535 }, { "epoch": 0.29621213215410813, "grad_norm": 0.84375, "learning_rate": 0.0001838760922635233, "loss": 1.1101, "step": 11536 }, { "epoch": 0.2962378093500299, "grad_norm": 0.76953125, "learning_rate": 0.00018387366142268128, "loss": 1.0287, "step": 11537 }, { "epoch": 0.29626348654595175, "grad_norm": 0.7734375, "learning_rate": 0.00018387123041468663, "loss": 0.9294, "step": 11538 }, { "epoch": 0.2962891637418736, "grad_norm": 0.890625, "learning_rate": 0.0001838687992395442, "loss": 0.949, "step": 11539 }, { "epoch": 0.29631484093779537, "grad_norm": 0.83984375, "learning_rate": 0.00018386636789725886, "loss": 1.0221, "step": 11540 }, { "epoch": 0.2963405181337172, "grad_norm": 0.78125, "learning_rate": 0.0001838639363878354, "loss": 1.0361, "step": 11541 }, { "epoch": 0.29636619532963904, "grad_norm": 0.8046875, "learning_rate": 0.0001838615047112787, "loss": 0.9436, "step": 11542 }, { "epoch": 0.2963918725255609, "grad_norm": 0.828125, "learning_rate": 0.00018385907286759358, "loss": 1.1166, "step": 11543 }, { "epoch": 0.29641754972148265, "grad_norm": 0.7421875, "learning_rate": 0.0001838566408567849, "loss": 0.857, "step": 11544 }, { "epoch": 0.2964432269174045, "grad_norm": 0.76953125, "learning_rate": 0.00018385420867885754, "loss": 0.8896, "step": 11545 }, { "epoch": 0.2964689041133263, "grad_norm": 0.83203125, "learning_rate": 0.00018385177633381628, "loss": 1.0396, "step": 11546 }, { "epoch": 0.2964945813092481, "grad_norm": 0.796875, "learning_rate": 0.00018384934382166605, "loss": 1.155, "step": 11547 }, { "epoch": 0.29652025850516994, "grad_norm": 0.7734375, "learning_rate": 0.00018384691114241162, "loss": 1.2265, "step": 11548 }, { "epoch": 0.2965459357010918, "grad_norm": 0.7890625, "learning_rate": 0.00018384447829605785, "loss": 0.9301, "step": 11549 }, { "epoch": 0.29657161289701356, "grad_norm": 0.8203125, "learning_rate": 0.00018384204528260965, "loss": 0.9381, "step": 11550 }, { "epoch": 0.2965972900929354, "grad_norm": 0.74609375, "learning_rate": 0.00018383961210207181, "loss": 0.9761, "step": 11551 }, { "epoch": 0.29662296728885723, "grad_norm": 0.76953125, "learning_rate": 0.00018383717875444919, "loss": 1.0481, "step": 11552 }, { "epoch": 0.29664864448477907, "grad_norm": 0.7421875, "learning_rate": 0.0001838347452397467, "loss": 1.0636, "step": 11553 }, { "epoch": 0.29667432168070085, "grad_norm": 0.81640625, "learning_rate": 0.00018383231155796905, "loss": 0.9948, "step": 11554 }, { "epoch": 0.2966999988766227, "grad_norm": 0.765625, "learning_rate": 0.00018382987770912122, "loss": 1.1447, "step": 11555 }, { "epoch": 0.2967256760725445, "grad_norm": 0.70703125, "learning_rate": 0.000183827443693208, "loss": 0.9859, "step": 11556 }, { "epoch": 0.2967513532684663, "grad_norm": 0.87890625, "learning_rate": 0.0001838250095102343, "loss": 1.0599, "step": 11557 }, { "epoch": 0.29677703046438814, "grad_norm": 0.90234375, "learning_rate": 0.00018382257516020488, "loss": 0.931, "step": 11558 }, { "epoch": 0.29680270766031, "grad_norm": 0.7734375, "learning_rate": 0.00018382014064312465, "loss": 0.977, "step": 11559 }, { "epoch": 0.29682838485623175, "grad_norm": 0.78515625, "learning_rate": 0.00018381770595899845, "loss": 0.9793, "step": 11560 }, { "epoch": 0.2968540620521536, "grad_norm": 1.1328125, "learning_rate": 0.00018381527110783113, "loss": 1.0813, "step": 11561 }, { "epoch": 0.2968797392480754, "grad_norm": 0.8359375, "learning_rate": 0.00018381283608962757, "loss": 1.1208, "step": 11562 }, { "epoch": 0.29690541644399726, "grad_norm": 0.79296875, "learning_rate": 0.00018381040090439257, "loss": 1.0135, "step": 11563 }, { "epoch": 0.29693109363991904, "grad_norm": 0.76171875, "learning_rate": 0.00018380796555213103, "loss": 0.9882, "step": 11564 }, { "epoch": 0.2969567708358409, "grad_norm": 0.7421875, "learning_rate": 0.00018380553003284776, "loss": 0.9078, "step": 11565 }, { "epoch": 0.2969824480317627, "grad_norm": 0.83984375, "learning_rate": 0.00018380309434654768, "loss": 0.9494, "step": 11566 }, { "epoch": 0.2970081252276845, "grad_norm": 0.81640625, "learning_rate": 0.00018380065849323557, "loss": 1.0026, "step": 11567 }, { "epoch": 0.29703380242360633, "grad_norm": 0.8671875, "learning_rate": 0.00018379822247291633, "loss": 0.935, "step": 11568 }, { "epoch": 0.29705947961952817, "grad_norm": 0.8046875, "learning_rate": 0.0001837957862855948, "loss": 0.9962, "step": 11569 }, { "epoch": 0.29708515681544995, "grad_norm": 0.8125, "learning_rate": 0.00018379334993127584, "loss": 0.8796, "step": 11570 }, { "epoch": 0.2971108340113718, "grad_norm": 0.87109375, "learning_rate": 0.00018379091340996428, "loss": 1.0827, "step": 11571 }, { "epoch": 0.2971365112072936, "grad_norm": 0.80859375, "learning_rate": 0.00018378847672166507, "loss": 1.1146, "step": 11572 }, { "epoch": 0.2971621884032154, "grad_norm": 0.765625, "learning_rate": 0.00018378603986638292, "loss": 0.9379, "step": 11573 }, { "epoch": 0.29718786559913724, "grad_norm": 0.84375, "learning_rate": 0.00018378360284412276, "loss": 1.0237, "step": 11574 }, { "epoch": 0.29721354279505907, "grad_norm": 0.75, "learning_rate": 0.00018378116565488949, "loss": 0.9589, "step": 11575 }, { "epoch": 0.2972392199909809, "grad_norm": 0.85546875, "learning_rate": 0.0001837787282986879, "loss": 1.0128, "step": 11576 }, { "epoch": 0.2972648971869027, "grad_norm": 0.75, "learning_rate": 0.0001837762907755229, "loss": 1.0325, "step": 11577 }, { "epoch": 0.2972905743828245, "grad_norm": 0.80078125, "learning_rate": 0.00018377385308539928, "loss": 0.9953, "step": 11578 }, { "epoch": 0.29731625157874636, "grad_norm": 0.83984375, "learning_rate": 0.00018377141522832194, "loss": 1.1254, "step": 11579 }, { "epoch": 0.29734192877466814, "grad_norm": 0.83984375, "learning_rate": 0.0001837689772042958, "loss": 0.9235, "step": 11580 }, { "epoch": 0.29736760597059, "grad_norm": 0.79296875, "learning_rate": 0.0001837665390133256, "loss": 1.0056, "step": 11581 }, { "epoch": 0.2973932831665118, "grad_norm": 0.765625, "learning_rate": 0.00018376410065541624, "loss": 0.9193, "step": 11582 }, { "epoch": 0.2974189603624336, "grad_norm": 0.80859375, "learning_rate": 0.00018376166213057262, "loss": 0.9173, "step": 11583 }, { "epoch": 0.29744463755835543, "grad_norm": 0.7265625, "learning_rate": 0.00018375922343879955, "loss": 0.9521, "step": 11584 }, { "epoch": 0.29747031475427727, "grad_norm": 0.8984375, "learning_rate": 0.00018375678458010196, "loss": 1.0125, "step": 11585 }, { "epoch": 0.2974959919501991, "grad_norm": 0.84375, "learning_rate": 0.00018375434555448463, "loss": 0.9734, "step": 11586 }, { "epoch": 0.2975216691461209, "grad_norm": 0.796875, "learning_rate": 0.00018375190636195246, "loss": 0.8759, "step": 11587 }, { "epoch": 0.2975473463420427, "grad_norm": 0.74609375, "learning_rate": 0.0001837494670025103, "loss": 0.8794, "step": 11588 }, { "epoch": 0.29757302353796455, "grad_norm": 0.77734375, "learning_rate": 0.00018374702747616303, "loss": 0.8926, "step": 11589 }, { "epoch": 0.29759870073388633, "grad_norm": 0.76953125, "learning_rate": 0.0001837445877829155, "loss": 0.97, "step": 11590 }, { "epoch": 0.29762437792980817, "grad_norm": 0.7578125, "learning_rate": 0.00018374214792277256, "loss": 0.8561, "step": 11591 }, { "epoch": 0.29765005512573, "grad_norm": 0.9296875, "learning_rate": 0.00018373970789573908, "loss": 1.0415, "step": 11592 }, { "epoch": 0.2976757323216518, "grad_norm": 0.72265625, "learning_rate": 0.00018373726770181995, "loss": 0.9701, "step": 11593 }, { "epoch": 0.2977014095175736, "grad_norm": 0.79296875, "learning_rate": 0.00018373482734101998, "loss": 0.9394, "step": 11594 }, { "epoch": 0.29772708671349546, "grad_norm": 0.7578125, "learning_rate": 0.0001837323868133441, "loss": 0.9376, "step": 11595 }, { "epoch": 0.2977527639094173, "grad_norm": 0.79296875, "learning_rate": 0.0001837299461187971, "loss": 0.9748, "step": 11596 }, { "epoch": 0.2977784411053391, "grad_norm": 0.84375, "learning_rate": 0.0001837275052573839, "loss": 1.0762, "step": 11597 }, { "epoch": 0.2978041183012609, "grad_norm": 1.2421875, "learning_rate": 0.00018372506422910934, "loss": 0.8556, "step": 11598 }, { "epoch": 0.29782979549718275, "grad_norm": 0.78515625, "learning_rate": 0.0001837226230339783, "loss": 1.0124, "step": 11599 }, { "epoch": 0.29785547269310453, "grad_norm": 0.76953125, "learning_rate": 0.0001837201816719956, "loss": 1.1013, "step": 11600 }, { "epoch": 0.29788114988902636, "grad_norm": 0.8203125, "learning_rate": 0.00018371774014316617, "loss": 0.9788, "step": 11601 }, { "epoch": 0.2979068270849482, "grad_norm": 0.7890625, "learning_rate": 0.00018371529844749484, "loss": 1.0311, "step": 11602 }, { "epoch": 0.29793250428087, "grad_norm": 0.78125, "learning_rate": 0.0001837128565849865, "loss": 0.846, "step": 11603 }, { "epoch": 0.2979581814767918, "grad_norm": 0.74609375, "learning_rate": 0.00018371041455564598, "loss": 0.9806, "step": 11604 }, { "epoch": 0.29798385867271365, "grad_norm": 0.77734375, "learning_rate": 0.00018370797235947817, "loss": 1.0803, "step": 11605 }, { "epoch": 0.2980095358686355, "grad_norm": 0.7578125, "learning_rate": 0.0001837055299964879, "loss": 0.8467, "step": 11606 }, { "epoch": 0.29803521306455727, "grad_norm": 0.8203125, "learning_rate": 0.00018370308746668012, "loss": 0.9341, "step": 11607 }, { "epoch": 0.2980608902604791, "grad_norm": 0.8359375, "learning_rate": 0.00018370064477005963, "loss": 1.0787, "step": 11608 }, { "epoch": 0.29808656745640094, "grad_norm": 0.80078125, "learning_rate": 0.00018369820190663132, "loss": 0.9125, "step": 11609 }, { "epoch": 0.2981122446523227, "grad_norm": 1.3359375, "learning_rate": 0.00018369575887640003, "loss": 0.8373, "step": 11610 }, { "epoch": 0.29813792184824456, "grad_norm": 0.7421875, "learning_rate": 0.00018369331567937068, "loss": 0.9493, "step": 11611 }, { "epoch": 0.2981635990441664, "grad_norm": 0.8125, "learning_rate": 0.0001836908723155481, "loss": 0.9617, "step": 11612 }, { "epoch": 0.2981892762400882, "grad_norm": 0.8515625, "learning_rate": 0.00018368842878493718, "loss": 1.1042, "step": 11613 }, { "epoch": 0.29821495343601, "grad_norm": 0.7578125, "learning_rate": 0.00018368598508754276, "loss": 0.9706, "step": 11614 }, { "epoch": 0.29824063063193185, "grad_norm": 0.74609375, "learning_rate": 0.00018368354122336975, "loss": 0.9368, "step": 11615 }, { "epoch": 0.2982663078278537, "grad_norm": 0.9296875, "learning_rate": 0.000183681097192423, "loss": 0.9489, "step": 11616 }, { "epoch": 0.29829198502377546, "grad_norm": 0.7734375, "learning_rate": 0.00018367865299470738, "loss": 1.0142, "step": 11617 }, { "epoch": 0.2983176622196973, "grad_norm": 0.76953125, "learning_rate": 0.00018367620863022775, "loss": 0.9975, "step": 11618 }, { "epoch": 0.29834333941561914, "grad_norm": 0.83984375, "learning_rate": 0.000183673764098989, "loss": 1.2263, "step": 11619 }, { "epoch": 0.2983690166115409, "grad_norm": 1.1953125, "learning_rate": 0.00018367131940099602, "loss": 1.1646, "step": 11620 }, { "epoch": 0.29839469380746275, "grad_norm": 0.7734375, "learning_rate": 0.00018366887453625364, "loss": 0.8615, "step": 11621 }, { "epoch": 0.2984203710033846, "grad_norm": 0.74609375, "learning_rate": 0.00018366642950476677, "loss": 0.9716, "step": 11622 }, { "epoch": 0.29844604819930637, "grad_norm": 2.421875, "learning_rate": 0.00018366398430654023, "loss": 1.0876, "step": 11623 }, { "epoch": 0.2984717253952282, "grad_norm": 0.75390625, "learning_rate": 0.00018366153894157895, "loss": 0.8748, "step": 11624 }, { "epoch": 0.29849740259115004, "grad_norm": 0.7578125, "learning_rate": 0.00018365909340988776, "loss": 0.8827, "step": 11625 }, { "epoch": 0.2985230797870719, "grad_norm": 0.73828125, "learning_rate": 0.00018365664771147157, "loss": 0.9658, "step": 11626 }, { "epoch": 0.29854875698299366, "grad_norm": 0.7734375, "learning_rate": 0.00018365420184633522, "loss": 0.9779, "step": 11627 }, { "epoch": 0.2985744341789155, "grad_norm": 0.80078125, "learning_rate": 0.00018365175581448363, "loss": 1.1185, "step": 11628 }, { "epoch": 0.29860011137483733, "grad_norm": 0.8203125, "learning_rate": 0.0001836493096159216, "loss": 0.9625, "step": 11629 }, { "epoch": 0.2986257885707591, "grad_norm": 0.78515625, "learning_rate": 0.0001836468632506541, "loss": 1.0174, "step": 11630 }, { "epoch": 0.29865146576668095, "grad_norm": 0.81640625, "learning_rate": 0.00018364441671868593, "loss": 1.0119, "step": 11631 }, { "epoch": 0.2986771429626028, "grad_norm": 0.8671875, "learning_rate": 0.00018364197002002203, "loss": 0.9536, "step": 11632 }, { "epoch": 0.29870282015852456, "grad_norm": 0.78125, "learning_rate": 0.00018363952315466722, "loss": 0.9077, "step": 11633 }, { "epoch": 0.2987284973544464, "grad_norm": 0.890625, "learning_rate": 0.00018363707612262638, "loss": 1.0784, "step": 11634 }, { "epoch": 0.29875417455036823, "grad_norm": 0.7734375, "learning_rate": 0.00018363462892390442, "loss": 1.0074, "step": 11635 }, { "epoch": 0.29877985174629007, "grad_norm": 0.73828125, "learning_rate": 0.00018363218155850616, "loss": 0.9651, "step": 11636 }, { "epoch": 0.29880552894221185, "grad_norm": 0.77734375, "learning_rate": 0.00018362973402643657, "loss": 0.994, "step": 11637 }, { "epoch": 0.2988312061381337, "grad_norm": 0.7734375, "learning_rate": 0.00018362728632770046, "loss": 0.9663, "step": 11638 }, { "epoch": 0.2988568833340555, "grad_norm": 0.859375, "learning_rate": 0.00018362483846230272, "loss": 1.0846, "step": 11639 }, { "epoch": 0.2988825605299773, "grad_norm": 0.71875, "learning_rate": 0.0001836223904302482, "loss": 0.8477, "step": 11640 }, { "epoch": 0.29890823772589914, "grad_norm": 0.82421875, "learning_rate": 0.00018361994223154183, "loss": 0.9916, "step": 11641 }, { "epoch": 0.298933914921821, "grad_norm": 0.74609375, "learning_rate": 0.0001836174938661885, "loss": 0.7998, "step": 11642 }, { "epoch": 0.29895959211774276, "grad_norm": 0.859375, "learning_rate": 0.00018361504533419303, "loss": 1.0091, "step": 11643 }, { "epoch": 0.2989852693136646, "grad_norm": 0.875, "learning_rate": 0.0001836125966355603, "loss": 0.9992, "step": 11644 }, { "epoch": 0.29901094650958643, "grad_norm": 0.7890625, "learning_rate": 0.00018361014777029525, "loss": 0.9682, "step": 11645 }, { "epoch": 0.29903662370550826, "grad_norm": 0.82421875, "learning_rate": 0.00018360769873840274, "loss": 1.0571, "step": 11646 }, { "epoch": 0.29906230090143004, "grad_norm": 0.81640625, "learning_rate": 0.00018360524953988763, "loss": 0.9637, "step": 11647 }, { "epoch": 0.2990879780973519, "grad_norm": 0.8203125, "learning_rate": 0.00018360280017475477, "loss": 0.9125, "step": 11648 }, { "epoch": 0.2991136552932737, "grad_norm": 0.7578125, "learning_rate": 0.00018360035064300914, "loss": 0.9777, "step": 11649 }, { "epoch": 0.2991393324891955, "grad_norm": 0.82421875, "learning_rate": 0.00018359790094465552, "loss": 0.9857, "step": 11650 }, { "epoch": 0.29916500968511733, "grad_norm": 0.75390625, "learning_rate": 0.00018359545107969884, "loss": 0.9045, "step": 11651 }, { "epoch": 0.29919068688103917, "grad_norm": 0.859375, "learning_rate": 0.000183593001048144, "loss": 1.0038, "step": 11652 }, { "epoch": 0.29921636407696095, "grad_norm": 0.734375, "learning_rate": 0.00018359055084999585, "loss": 0.9494, "step": 11653 }, { "epoch": 0.2992420412728828, "grad_norm": 0.765625, "learning_rate": 0.00018358810048525926, "loss": 0.925, "step": 11654 }, { "epoch": 0.2992677184688046, "grad_norm": 0.73046875, "learning_rate": 0.0001835856499539392, "loss": 0.9614, "step": 11655 }, { "epoch": 0.29929339566472646, "grad_norm": 0.7734375, "learning_rate": 0.00018358319925604043, "loss": 0.9541, "step": 11656 }, { "epoch": 0.29931907286064824, "grad_norm": 0.8125, "learning_rate": 0.0001835807483915679, "loss": 0.9831, "step": 11657 }, { "epoch": 0.2993447500565701, "grad_norm": 0.77734375, "learning_rate": 0.0001835782973605265, "loss": 0.9873, "step": 11658 }, { "epoch": 0.2993704272524919, "grad_norm": 0.81640625, "learning_rate": 0.00018357584616292112, "loss": 1.0099, "step": 11659 }, { "epoch": 0.2993961044484137, "grad_norm": 0.76171875, "learning_rate": 0.0001835733947987566, "loss": 0.9836, "step": 11660 }, { "epoch": 0.2994217816443355, "grad_norm": 0.84375, "learning_rate": 0.00018357094326803788, "loss": 0.9848, "step": 11661 }, { "epoch": 0.29944745884025736, "grad_norm": 0.84375, "learning_rate": 0.0001835684915707698, "loss": 1.0486, "step": 11662 }, { "epoch": 0.29947313603617914, "grad_norm": 0.79296875, "learning_rate": 0.00018356603970695726, "loss": 0.9914, "step": 11663 }, { "epoch": 0.299498813232101, "grad_norm": 0.78125, "learning_rate": 0.00018356358767660516, "loss": 1.0261, "step": 11664 }, { "epoch": 0.2995244904280228, "grad_norm": 0.8125, "learning_rate": 0.00018356113547971837, "loss": 0.9514, "step": 11665 }, { "epoch": 0.29955016762394465, "grad_norm": 0.8515625, "learning_rate": 0.00018355868311630179, "loss": 0.9151, "step": 11666 }, { "epoch": 0.29957584481986643, "grad_norm": 0.80859375, "learning_rate": 0.00018355623058636028, "loss": 1.0081, "step": 11667 }, { "epoch": 0.29960152201578827, "grad_norm": 0.7109375, "learning_rate": 0.0001835537778898988, "loss": 0.9806, "step": 11668 }, { "epoch": 0.2996271992117101, "grad_norm": 0.77734375, "learning_rate": 0.00018355132502692216, "loss": 1.1211, "step": 11669 }, { "epoch": 0.2996528764076319, "grad_norm": 0.7421875, "learning_rate": 0.00018354887199743526, "loss": 0.9631, "step": 11670 }, { "epoch": 0.2996785536035537, "grad_norm": 0.7734375, "learning_rate": 0.00018354641880144302, "loss": 0.819, "step": 11671 }, { "epoch": 0.29970423079947556, "grad_norm": 0.80078125, "learning_rate": 0.00018354396543895034, "loss": 1.0124, "step": 11672 }, { "epoch": 0.29972990799539734, "grad_norm": 0.7890625, "learning_rate": 0.00018354151190996208, "loss": 0.9398, "step": 11673 }, { "epoch": 0.2997555851913192, "grad_norm": 0.796875, "learning_rate": 0.0001835390582144831, "loss": 0.9555, "step": 11674 }, { "epoch": 0.299781262387241, "grad_norm": 0.75, "learning_rate": 0.00018353660435251832, "loss": 1.069, "step": 11675 }, { "epoch": 0.29980693958316285, "grad_norm": 0.83984375, "learning_rate": 0.00018353415032407267, "loss": 1.0562, "step": 11676 }, { "epoch": 0.2998326167790846, "grad_norm": 0.7578125, "learning_rate": 0.00018353169612915097, "loss": 1.086, "step": 11677 }, { "epoch": 0.29985829397500646, "grad_norm": 0.90234375, "learning_rate": 0.00018352924176775817, "loss": 1.0325, "step": 11678 }, { "epoch": 0.2998839711709283, "grad_norm": 0.71484375, "learning_rate": 0.0001835267872398991, "loss": 0.8851, "step": 11679 }, { "epoch": 0.2999096483668501, "grad_norm": 0.91796875, "learning_rate": 0.00018352433254557873, "loss": 1.1896, "step": 11680 }, { "epoch": 0.2999353255627719, "grad_norm": 1.1484375, "learning_rate": 0.00018352187768480192, "loss": 1.0214, "step": 11681 }, { "epoch": 0.29996100275869375, "grad_norm": 0.87109375, "learning_rate": 0.0001835194226575735, "loss": 1.0385, "step": 11682 }, { "epoch": 0.29998667995461553, "grad_norm": 0.8359375, "learning_rate": 0.00018351696746389843, "loss": 0.8603, "step": 11683 }, { "epoch": 0.30001235715053737, "grad_norm": 0.81640625, "learning_rate": 0.0001835145121037816, "loss": 1.162, "step": 11684 }, { "epoch": 0.3000380343464592, "grad_norm": 1.359375, "learning_rate": 0.0001835120565772279, "loss": 0.9456, "step": 11685 }, { "epoch": 0.30006371154238104, "grad_norm": 0.73828125, "learning_rate": 0.0001835096008842422, "loss": 0.9254, "step": 11686 }, { "epoch": 0.3000893887383028, "grad_norm": 0.82421875, "learning_rate": 0.0001835071450248294, "loss": 0.9971, "step": 11687 }, { "epoch": 0.30011506593422466, "grad_norm": 0.82421875, "learning_rate": 0.0001835046889989944, "loss": 0.9548, "step": 11688 }, { "epoch": 0.3001407431301465, "grad_norm": 0.71875, "learning_rate": 0.00018350223280674214, "loss": 0.9213, "step": 11689 }, { "epoch": 0.3001664203260683, "grad_norm": 0.79296875, "learning_rate": 0.00018349977644807746, "loss": 1.0152, "step": 11690 }, { "epoch": 0.3001920975219901, "grad_norm": 0.79296875, "learning_rate": 0.00018349731992300524, "loss": 1.0831, "step": 11691 }, { "epoch": 0.30021777471791194, "grad_norm": 0.81640625, "learning_rate": 0.00018349486323153042, "loss": 1.1329, "step": 11692 }, { "epoch": 0.3002434519138337, "grad_norm": 0.84765625, "learning_rate": 0.00018349240637365787, "loss": 0.9776, "step": 11693 }, { "epoch": 0.30026912910975556, "grad_norm": 0.79296875, "learning_rate": 0.00018348994934939253, "loss": 1.0231, "step": 11694 }, { "epoch": 0.3002948063056774, "grad_norm": 0.78515625, "learning_rate": 0.00018348749215873925, "loss": 0.9876, "step": 11695 }, { "epoch": 0.30032048350159923, "grad_norm": 0.77734375, "learning_rate": 0.00018348503480170288, "loss": 0.8073, "step": 11696 }, { "epoch": 0.300346160697521, "grad_norm": 0.7734375, "learning_rate": 0.00018348257727828846, "loss": 0.9054, "step": 11697 }, { "epoch": 0.30037183789344285, "grad_norm": 0.83203125, "learning_rate": 0.00018348011958850075, "loss": 1.0607, "step": 11698 }, { "epoch": 0.3003975150893647, "grad_norm": 0.78125, "learning_rate": 0.00018347766173234473, "loss": 0.9947, "step": 11699 }, { "epoch": 0.30042319228528647, "grad_norm": 0.7890625, "learning_rate": 0.00018347520370982524, "loss": 0.8843, "step": 11700 }, { "epoch": 0.3004488694812083, "grad_norm": 0.7578125, "learning_rate": 0.00018347274552094723, "loss": 0.9226, "step": 11701 }, { "epoch": 0.30047454667713014, "grad_norm": 0.73828125, "learning_rate": 0.00018347028716571558, "loss": 0.9238, "step": 11702 }, { "epoch": 0.3005002238730519, "grad_norm": 0.875, "learning_rate": 0.0001834678286441352, "loss": 0.9652, "step": 11703 }, { "epoch": 0.30052590106897376, "grad_norm": 0.79296875, "learning_rate": 0.00018346536995621096, "loss": 1.0003, "step": 11704 }, { "epoch": 0.3005515782648956, "grad_norm": 0.91796875, "learning_rate": 0.00018346291110194778, "loss": 1.019, "step": 11705 }, { "epoch": 0.3005772554608174, "grad_norm": 0.77734375, "learning_rate": 0.00018346045208135055, "loss": 0.964, "step": 11706 }, { "epoch": 0.3006029326567392, "grad_norm": 0.8515625, "learning_rate": 0.0001834579928944242, "loss": 0.973, "step": 11707 }, { "epoch": 0.30062860985266104, "grad_norm": 0.86328125, "learning_rate": 0.00018345553354117357, "loss": 1.0628, "step": 11708 }, { "epoch": 0.3006542870485829, "grad_norm": 0.81640625, "learning_rate": 0.00018345307402160366, "loss": 1.0199, "step": 11709 }, { "epoch": 0.30067996424450466, "grad_norm": 0.84765625, "learning_rate": 0.00018345061433571927, "loss": 1.0162, "step": 11710 }, { "epoch": 0.3007056414404265, "grad_norm": 0.76953125, "learning_rate": 0.00018344815448352535, "loss": 0.8889, "step": 11711 }, { "epoch": 0.30073131863634833, "grad_norm": 0.8046875, "learning_rate": 0.00018344569446502676, "loss": 0.9609, "step": 11712 }, { "epoch": 0.3007569958322701, "grad_norm": 0.83203125, "learning_rate": 0.0001834432342802285, "loss": 1.0059, "step": 11713 }, { "epoch": 0.30078267302819195, "grad_norm": 0.77734375, "learning_rate": 0.0001834407739291354, "loss": 0.8821, "step": 11714 }, { "epoch": 0.3008083502241138, "grad_norm": 0.796875, "learning_rate": 0.00018343831341175235, "loss": 1.0854, "step": 11715 }, { "epoch": 0.3008340274200356, "grad_norm": 0.6796875, "learning_rate": 0.0001834358527280843, "loss": 0.8276, "step": 11716 }, { "epoch": 0.3008597046159574, "grad_norm": 0.83984375, "learning_rate": 0.0001834333918781361, "loss": 0.9346, "step": 11717 }, { "epoch": 0.30088538181187924, "grad_norm": 0.95703125, "learning_rate": 0.00018343093086191272, "loss": 1.0549, "step": 11718 }, { "epoch": 0.3009110590078011, "grad_norm": 0.78515625, "learning_rate": 0.00018342846967941902, "loss": 0.8926, "step": 11719 }, { "epoch": 0.30093673620372285, "grad_norm": 0.74609375, "learning_rate": 0.0001834260083306599, "loss": 0.9773, "step": 11720 }, { "epoch": 0.3009624133996447, "grad_norm": 0.76953125, "learning_rate": 0.00018342354681564032, "loss": 0.9126, "step": 11721 }, { "epoch": 0.3009880905955665, "grad_norm": 0.74609375, "learning_rate": 0.00018342108513436512, "loss": 1.2087, "step": 11722 }, { "epoch": 0.3010137677914883, "grad_norm": 0.73046875, "learning_rate": 0.0001834186232868392, "loss": 0.8365, "step": 11723 }, { "epoch": 0.30103944498741014, "grad_norm": 0.828125, "learning_rate": 0.00018341616127306757, "loss": 0.9545, "step": 11724 }, { "epoch": 0.301065122183332, "grad_norm": 0.8125, "learning_rate": 0.000183413699093055, "loss": 0.8402, "step": 11725 }, { "epoch": 0.3010907993792538, "grad_norm": 0.78125, "learning_rate": 0.0001834112367468065, "loss": 0.9567, "step": 11726 }, { "epoch": 0.3011164765751756, "grad_norm": 0.78515625, "learning_rate": 0.00018340877423432694, "loss": 0.9616, "step": 11727 }, { "epoch": 0.30114215377109743, "grad_norm": 0.91015625, "learning_rate": 0.00018340631155562123, "loss": 1.0675, "step": 11728 }, { "epoch": 0.30116783096701927, "grad_norm": 0.74609375, "learning_rate": 0.00018340384871069426, "loss": 0.8263, "step": 11729 }, { "epoch": 0.30119350816294105, "grad_norm": 0.8125, "learning_rate": 0.00018340138569955094, "loss": 0.9429, "step": 11730 }, { "epoch": 0.3012191853588629, "grad_norm": 0.87890625, "learning_rate": 0.00018339892252219623, "loss": 0.9847, "step": 11731 }, { "epoch": 0.3012448625547847, "grad_norm": 0.765625, "learning_rate": 0.00018339645917863498, "loss": 0.9725, "step": 11732 }, { "epoch": 0.3012705397507065, "grad_norm": 0.8984375, "learning_rate": 0.00018339399566887212, "loss": 0.9653, "step": 11733 }, { "epoch": 0.30129621694662834, "grad_norm": 0.82421875, "learning_rate": 0.00018339153199291257, "loss": 1.0212, "step": 11734 }, { "epoch": 0.3013218941425502, "grad_norm": 0.8515625, "learning_rate": 0.00018338906815076121, "loss": 1.1234, "step": 11735 }, { "epoch": 0.301347571338472, "grad_norm": 0.859375, "learning_rate": 0.00018338660414242297, "loss": 1.0047, "step": 11736 }, { "epoch": 0.3013732485343938, "grad_norm": 0.84765625, "learning_rate": 0.00018338413996790278, "loss": 0.9579, "step": 11737 }, { "epoch": 0.3013989257303156, "grad_norm": 0.87890625, "learning_rate": 0.0001833816756272055, "loss": 0.9162, "step": 11738 }, { "epoch": 0.30142460292623746, "grad_norm": 0.76953125, "learning_rate": 0.00018337921112033612, "loss": 0.8702, "step": 11739 }, { "epoch": 0.30145028012215924, "grad_norm": 0.84375, "learning_rate": 0.00018337674644729947, "loss": 0.8944, "step": 11740 }, { "epoch": 0.3014759573180811, "grad_norm": 0.8125, "learning_rate": 0.0001833742816081005, "loss": 0.987, "step": 11741 }, { "epoch": 0.3015016345140029, "grad_norm": 0.859375, "learning_rate": 0.00018337181660274413, "loss": 1.0691, "step": 11742 }, { "epoch": 0.3015273117099247, "grad_norm": 0.875, "learning_rate": 0.00018336935143123523, "loss": 0.9435, "step": 11743 }, { "epoch": 0.30155298890584653, "grad_norm": 0.83203125, "learning_rate": 0.00018336688609357877, "loss": 1.0286, "step": 11744 }, { "epoch": 0.30157866610176837, "grad_norm": 0.8046875, "learning_rate": 0.00018336442058977963, "loss": 1.0659, "step": 11745 }, { "epoch": 0.3016043432976902, "grad_norm": 0.73828125, "learning_rate": 0.00018336195491984274, "loss": 0.8693, "step": 11746 }, { "epoch": 0.301630020493612, "grad_norm": 0.80859375, "learning_rate": 0.00018335948908377298, "loss": 0.9025, "step": 11747 }, { "epoch": 0.3016556976895338, "grad_norm": 0.76953125, "learning_rate": 0.00018335702308157532, "loss": 0.8698, "step": 11748 }, { "epoch": 0.30168137488545566, "grad_norm": 0.7890625, "learning_rate": 0.0001833545569132546, "loss": 0.9209, "step": 11749 }, { "epoch": 0.30170705208137744, "grad_norm": 0.8125, "learning_rate": 0.0001833520905788158, "loss": 1.0635, "step": 11750 }, { "epoch": 0.30173272927729927, "grad_norm": 0.8046875, "learning_rate": 0.0001833496240782638, "loss": 1.1011, "step": 11751 }, { "epoch": 0.3017584064732211, "grad_norm": 0.74609375, "learning_rate": 0.00018334715741160357, "loss": 1.0648, "step": 11752 }, { "epoch": 0.3017840836691429, "grad_norm": 0.7421875, "learning_rate": 0.00018334469057883996, "loss": 1.0954, "step": 11753 }, { "epoch": 0.3018097608650647, "grad_norm": 0.82421875, "learning_rate": 0.0001833422235799779, "loss": 1.0132, "step": 11754 }, { "epoch": 0.30183543806098656, "grad_norm": 0.828125, "learning_rate": 0.00018333975641502233, "loss": 0.9442, "step": 11755 }, { "epoch": 0.3018611152569084, "grad_norm": 0.7421875, "learning_rate": 0.00018333728908397813, "loss": 0.8847, "step": 11756 }, { "epoch": 0.3018867924528302, "grad_norm": 0.76171875, "learning_rate": 0.00018333482158685026, "loss": 0.9673, "step": 11757 }, { "epoch": 0.301912469648752, "grad_norm": 0.7578125, "learning_rate": 0.00018333235392364365, "loss": 1.0801, "step": 11758 }, { "epoch": 0.30193814684467385, "grad_norm": 0.765625, "learning_rate": 0.00018332988609436312, "loss": 1.087, "step": 11759 }, { "epoch": 0.30196382404059563, "grad_norm": 0.703125, "learning_rate": 0.0001833274180990137, "loss": 0.9381, "step": 11760 }, { "epoch": 0.30198950123651747, "grad_norm": 0.75390625, "learning_rate": 0.00018332494993760024, "loss": 1.071, "step": 11761 }, { "epoch": 0.3020151784324393, "grad_norm": 0.86328125, "learning_rate": 0.0001833224816101277, "loss": 0.9709, "step": 11762 }, { "epoch": 0.3020408556283611, "grad_norm": 0.85546875, "learning_rate": 0.00018332001311660099, "loss": 0.9685, "step": 11763 }, { "epoch": 0.3020665328242829, "grad_norm": 0.84765625, "learning_rate": 0.000183317544457025, "loss": 1.0224, "step": 11764 }, { "epoch": 0.30209221002020475, "grad_norm": 0.76953125, "learning_rate": 0.00018331507563140465, "loss": 1.0175, "step": 11765 }, { "epoch": 0.3021178872161266, "grad_norm": 0.765625, "learning_rate": 0.00018331260663974493, "loss": 1.0209, "step": 11766 }, { "epoch": 0.30214356441204837, "grad_norm": 0.8046875, "learning_rate": 0.00018331013748205068, "loss": 1.129, "step": 11767 }, { "epoch": 0.3021692416079702, "grad_norm": 0.7578125, "learning_rate": 0.00018330766815832683, "loss": 1.0055, "step": 11768 }, { "epoch": 0.30219491880389204, "grad_norm": 0.80859375, "learning_rate": 0.00018330519866857837, "loss": 0.9979, "step": 11769 }, { "epoch": 0.3022205959998138, "grad_norm": 0.73828125, "learning_rate": 0.00018330272901281015, "loss": 0.8759, "step": 11770 }, { "epoch": 0.30224627319573566, "grad_norm": 0.88671875, "learning_rate": 0.0001833002591910271, "loss": 0.9883, "step": 11771 }, { "epoch": 0.3022719503916575, "grad_norm": 0.765625, "learning_rate": 0.00018329778920323417, "loss": 0.9944, "step": 11772 }, { "epoch": 0.3022976275875793, "grad_norm": 0.73046875, "learning_rate": 0.0001832953190494363, "loss": 0.8312, "step": 11773 }, { "epoch": 0.3023233047835011, "grad_norm": 0.84375, "learning_rate": 0.00018329284872963835, "loss": 0.9555, "step": 11774 }, { "epoch": 0.30234898197942295, "grad_norm": 0.859375, "learning_rate": 0.00018329037824384528, "loss": 1.1042, "step": 11775 }, { "epoch": 0.30237465917534473, "grad_norm": 0.80859375, "learning_rate": 0.000183287907592062, "loss": 0.935, "step": 11776 }, { "epoch": 0.30240033637126656, "grad_norm": 0.828125, "learning_rate": 0.00018328543677429345, "loss": 0.9579, "step": 11777 }, { "epoch": 0.3024260135671884, "grad_norm": 0.7578125, "learning_rate": 0.00018328296579054457, "loss": 0.9437, "step": 11778 }, { "epoch": 0.30245169076311024, "grad_norm": 0.765625, "learning_rate": 0.00018328049464082024, "loss": 1.019, "step": 11779 }, { "epoch": 0.302477367959032, "grad_norm": 0.75390625, "learning_rate": 0.0001832780233251254, "loss": 1.0488, "step": 11780 }, { "epoch": 0.30250304515495385, "grad_norm": 0.77734375, "learning_rate": 0.00018327555184346502, "loss": 0.9357, "step": 11781 }, { "epoch": 0.3025287223508757, "grad_norm": 0.7578125, "learning_rate": 0.00018327308019584396, "loss": 0.8887, "step": 11782 }, { "epoch": 0.30255439954679747, "grad_norm": 0.734375, "learning_rate": 0.00018327060838226717, "loss": 1.0058, "step": 11783 }, { "epoch": 0.3025800767427193, "grad_norm": 0.77734375, "learning_rate": 0.00018326813640273958, "loss": 0.9176, "step": 11784 }, { "epoch": 0.30260575393864114, "grad_norm": 0.83984375, "learning_rate": 0.00018326566425726614, "loss": 0.9502, "step": 11785 }, { "epoch": 0.3026314311345629, "grad_norm": 0.84765625, "learning_rate": 0.00018326319194585173, "loss": 1.1433, "step": 11786 }, { "epoch": 0.30265710833048476, "grad_norm": 0.796875, "learning_rate": 0.0001832607194685013, "loss": 1.0174, "step": 11787 }, { "epoch": 0.3026827855264066, "grad_norm": 0.8125, "learning_rate": 0.00018325824682521977, "loss": 1.0501, "step": 11788 }, { "epoch": 0.30270846272232843, "grad_norm": 0.71875, "learning_rate": 0.00018325577401601207, "loss": 0.8993, "step": 11789 }, { "epoch": 0.3027341399182502, "grad_norm": 0.984375, "learning_rate": 0.0001832533010408832, "loss": 1.0414, "step": 11790 }, { "epoch": 0.30275981711417205, "grad_norm": 0.78125, "learning_rate": 0.00018325082789983794, "loss": 1.0758, "step": 11791 }, { "epoch": 0.3027854943100939, "grad_norm": 0.73828125, "learning_rate": 0.00018324835459288132, "loss": 1.0742, "step": 11792 }, { "epoch": 0.30281117150601566, "grad_norm": 0.8125, "learning_rate": 0.00018324588112001827, "loss": 1.1323, "step": 11793 }, { "epoch": 0.3028368487019375, "grad_norm": 0.75, "learning_rate": 0.0001832434074812537, "loss": 0.9484, "step": 11794 }, { "epoch": 0.30286252589785934, "grad_norm": 0.828125, "learning_rate": 0.00018324093367659252, "loss": 1.059, "step": 11795 }, { "epoch": 0.3028882030937811, "grad_norm": 0.8046875, "learning_rate": 0.00018323845970603968, "loss": 1.0031, "step": 11796 }, { "epoch": 0.30291388028970295, "grad_norm": 0.76171875, "learning_rate": 0.00018323598556960012, "loss": 0.9778, "step": 11797 }, { "epoch": 0.3029395574856248, "grad_norm": 0.8359375, "learning_rate": 0.00018323351126727872, "loss": 0.9474, "step": 11798 }, { "epoch": 0.3029652346815466, "grad_norm": 0.7890625, "learning_rate": 0.0001832310367990805, "loss": 0.9651, "step": 11799 }, { "epoch": 0.3029909118774684, "grad_norm": 0.796875, "learning_rate": 0.00018322856216501033, "loss": 0.9506, "step": 11800 }, { "epoch": 0.30301658907339024, "grad_norm": 0.7265625, "learning_rate": 0.00018322608736507314, "loss": 1.0126, "step": 11801 }, { "epoch": 0.3030422662693121, "grad_norm": 0.75, "learning_rate": 0.0001832236123992739, "loss": 0.8821, "step": 11802 }, { "epoch": 0.30306794346523386, "grad_norm": 0.7734375, "learning_rate": 0.0001832211372676175, "loss": 0.9532, "step": 11803 }, { "epoch": 0.3030936206611557, "grad_norm": 0.74609375, "learning_rate": 0.00018321866197010888, "loss": 0.9079, "step": 11804 }, { "epoch": 0.30311929785707753, "grad_norm": 0.84765625, "learning_rate": 0.00018321618650675298, "loss": 0.9228, "step": 11805 }, { "epoch": 0.3031449750529993, "grad_norm": 0.8359375, "learning_rate": 0.00018321371087755476, "loss": 0.9987, "step": 11806 }, { "epoch": 0.30317065224892115, "grad_norm": 0.7890625, "learning_rate": 0.00018321123508251912, "loss": 0.8689, "step": 11807 }, { "epoch": 0.303196329444843, "grad_norm": 0.71875, "learning_rate": 0.000183208759121651, "loss": 0.9147, "step": 11808 }, { "epoch": 0.3032220066407648, "grad_norm": 0.7734375, "learning_rate": 0.00018320628299495537, "loss": 0.9295, "step": 11809 }, { "epoch": 0.3032476838366866, "grad_norm": 0.73828125, "learning_rate": 0.0001832038067024371, "loss": 0.9125, "step": 11810 }, { "epoch": 0.30327336103260844, "grad_norm": 0.8203125, "learning_rate": 0.00018320133024410119, "loss": 0.9995, "step": 11811 }, { "epoch": 0.30329903822853027, "grad_norm": 0.76953125, "learning_rate": 0.00018319885361995253, "loss": 0.8452, "step": 11812 }, { "epoch": 0.30332471542445205, "grad_norm": 0.75, "learning_rate": 0.00018319637682999605, "loss": 1.0538, "step": 11813 }, { "epoch": 0.3033503926203739, "grad_norm": 0.74609375, "learning_rate": 0.0001831938998742367, "loss": 0.9732, "step": 11814 }, { "epoch": 0.3033760698162957, "grad_norm": 0.78515625, "learning_rate": 0.00018319142275267946, "loss": 0.9114, "step": 11815 }, { "epoch": 0.3034017470122175, "grad_norm": 0.828125, "learning_rate": 0.00018318894546532922, "loss": 0.9628, "step": 11816 }, { "epoch": 0.30342742420813934, "grad_norm": 0.85546875, "learning_rate": 0.0001831864680121909, "loss": 0.9615, "step": 11817 }, { "epoch": 0.3034531014040612, "grad_norm": 0.77734375, "learning_rate": 0.0001831839903932695, "loss": 1.0023, "step": 11818 }, { "epoch": 0.303478778599983, "grad_norm": 0.7578125, "learning_rate": 0.0001831815126085699, "loss": 0.8964, "step": 11819 }, { "epoch": 0.3035044557959048, "grad_norm": 0.796875, "learning_rate": 0.00018317903465809706, "loss": 1.009, "step": 11820 }, { "epoch": 0.30353013299182663, "grad_norm": 0.8203125, "learning_rate": 0.00018317655654185593, "loss": 0.9515, "step": 11821 }, { "epoch": 0.30355581018774846, "grad_norm": 0.796875, "learning_rate": 0.00018317407825985143, "loss": 1.1339, "step": 11822 }, { "epoch": 0.30358148738367025, "grad_norm": 0.90234375, "learning_rate": 0.00018317159981208852, "loss": 0.9798, "step": 11823 }, { "epoch": 0.3036071645795921, "grad_norm": 0.76953125, "learning_rate": 0.00018316912119857207, "loss": 1.1664, "step": 11824 }, { "epoch": 0.3036328417755139, "grad_norm": 0.73046875, "learning_rate": 0.0001831666424193071, "loss": 0.9489, "step": 11825 }, { "epoch": 0.3036585189714357, "grad_norm": 0.828125, "learning_rate": 0.00018316416347429856, "loss": 0.9669, "step": 11826 }, { "epoch": 0.30368419616735753, "grad_norm": 0.76171875, "learning_rate": 0.00018316168436355133, "loss": 0.9745, "step": 11827 }, { "epoch": 0.30370987336327937, "grad_norm": 0.7734375, "learning_rate": 0.00018315920508707036, "loss": 0.9273, "step": 11828 }, { "epoch": 0.3037355505592012, "grad_norm": 0.8046875, "learning_rate": 0.00018315672564486063, "loss": 1.1092, "step": 11829 }, { "epoch": 0.303761227755123, "grad_norm": 0.78125, "learning_rate": 0.00018315424603692707, "loss": 0.9216, "step": 11830 }, { "epoch": 0.3037869049510448, "grad_norm": 0.7578125, "learning_rate": 0.00018315176626327456, "loss": 0.8681, "step": 11831 }, { "epoch": 0.30381258214696666, "grad_norm": 0.8203125, "learning_rate": 0.0001831492863239081, "loss": 1.0044, "step": 11832 }, { "epoch": 0.30383825934288844, "grad_norm": 0.75, "learning_rate": 0.00018314680621883266, "loss": 0.9999, "step": 11833 }, { "epoch": 0.3038639365388103, "grad_norm": 0.796875, "learning_rate": 0.0001831443259480531, "loss": 0.9118, "step": 11834 }, { "epoch": 0.3038896137347321, "grad_norm": 0.83984375, "learning_rate": 0.00018314184551157444, "loss": 0.9031, "step": 11835 }, { "epoch": 0.3039152909306539, "grad_norm": 0.86328125, "learning_rate": 0.0001831393649094016, "loss": 0.9178, "step": 11836 }, { "epoch": 0.30394096812657573, "grad_norm": 0.7890625, "learning_rate": 0.0001831368841415395, "loss": 0.9772, "step": 11837 }, { "epoch": 0.30396664532249756, "grad_norm": 0.8046875, "learning_rate": 0.00018313440320799312, "loss": 1.1098, "step": 11838 }, { "epoch": 0.3039923225184194, "grad_norm": 0.7890625, "learning_rate": 0.00018313192210876736, "loss": 0.9954, "step": 11839 }, { "epoch": 0.3040179997143412, "grad_norm": 0.76171875, "learning_rate": 0.0001831294408438672, "loss": 1.0525, "step": 11840 }, { "epoch": 0.304043676910263, "grad_norm": 0.76953125, "learning_rate": 0.00018312695941329757, "loss": 1.0289, "step": 11841 }, { "epoch": 0.30406935410618485, "grad_norm": 0.765625, "learning_rate": 0.0001831244778170634, "loss": 0.9674, "step": 11842 }, { "epoch": 0.30409503130210663, "grad_norm": 0.81640625, "learning_rate": 0.0001831219960551697, "loss": 0.9238, "step": 11843 }, { "epoch": 0.30412070849802847, "grad_norm": 0.72265625, "learning_rate": 0.00018311951412762131, "loss": 0.9004, "step": 11844 }, { "epoch": 0.3041463856939503, "grad_norm": 0.8046875, "learning_rate": 0.00018311703203442327, "loss": 1.0331, "step": 11845 }, { "epoch": 0.3041720628898721, "grad_norm": 0.95703125, "learning_rate": 0.00018311454977558048, "loss": 1.0115, "step": 11846 }, { "epoch": 0.3041977400857939, "grad_norm": 0.7734375, "learning_rate": 0.0001831120673510979, "loss": 0.9437, "step": 11847 }, { "epoch": 0.30422341728171576, "grad_norm": 0.73828125, "learning_rate": 0.0001831095847609805, "loss": 0.9034, "step": 11848 }, { "epoch": 0.3042490944776376, "grad_norm": 0.76171875, "learning_rate": 0.0001831071020052332, "loss": 0.9672, "step": 11849 }, { "epoch": 0.3042747716735594, "grad_norm": 0.83203125, "learning_rate": 0.00018310461908386096, "loss": 0.9537, "step": 11850 }, { "epoch": 0.3043004488694812, "grad_norm": 0.80859375, "learning_rate": 0.0001831021359968687, "loss": 1.0515, "step": 11851 }, { "epoch": 0.30432612606540305, "grad_norm": 0.77734375, "learning_rate": 0.0001830996527442614, "loss": 1.063, "step": 11852 }, { "epoch": 0.3043518032613248, "grad_norm": 0.7578125, "learning_rate": 0.00018309716932604398, "loss": 0.8983, "step": 11853 }, { "epoch": 0.30437748045724666, "grad_norm": 0.83203125, "learning_rate": 0.0001830946857422214, "loss": 1.0277, "step": 11854 }, { "epoch": 0.3044031576531685, "grad_norm": 0.8203125, "learning_rate": 0.00018309220199279867, "loss": 0.9037, "step": 11855 }, { "epoch": 0.3044288348490903, "grad_norm": 0.75, "learning_rate": 0.00018308971807778064, "loss": 0.9334, "step": 11856 }, { "epoch": 0.3044545120450121, "grad_norm": 0.83984375, "learning_rate": 0.00018308723399717232, "loss": 0.9786, "step": 11857 }, { "epoch": 0.30448018924093395, "grad_norm": 0.89453125, "learning_rate": 0.00018308474975097867, "loss": 0.9071, "step": 11858 }, { "epoch": 0.3045058664368558, "grad_norm": 0.88671875, "learning_rate": 0.0001830822653392046, "loss": 0.9907, "step": 11859 }, { "epoch": 0.30453154363277757, "grad_norm": 0.75390625, "learning_rate": 0.00018307978076185507, "loss": 0.9452, "step": 11860 }, { "epoch": 0.3045572208286994, "grad_norm": 0.828125, "learning_rate": 0.00018307729601893503, "loss": 1.0427, "step": 11861 }, { "epoch": 0.30458289802462124, "grad_norm": 0.77734375, "learning_rate": 0.00018307481111044948, "loss": 0.9363, "step": 11862 }, { "epoch": 0.304608575220543, "grad_norm": 1.1015625, "learning_rate": 0.00018307232603640333, "loss": 0.8854, "step": 11863 }, { "epoch": 0.30463425241646486, "grad_norm": 0.94921875, "learning_rate": 0.00018306984079680155, "loss": 1.0367, "step": 11864 }, { "epoch": 0.3046599296123867, "grad_norm": 0.74609375, "learning_rate": 0.00018306735539164905, "loss": 0.8718, "step": 11865 }, { "epoch": 0.3046856068083085, "grad_norm": 0.765625, "learning_rate": 0.00018306486982095082, "loss": 1.0187, "step": 11866 }, { "epoch": 0.3047112840042303, "grad_norm": 0.7734375, "learning_rate": 0.0001830623840847118, "loss": 0.8255, "step": 11867 }, { "epoch": 0.30473696120015215, "grad_norm": 0.8359375, "learning_rate": 0.00018305989818293697, "loss": 0.9673, "step": 11868 }, { "epoch": 0.304762638396074, "grad_norm": 0.76171875, "learning_rate": 0.00018305741211563126, "loss": 0.8804, "step": 11869 }, { "epoch": 0.30478831559199576, "grad_norm": 0.8359375, "learning_rate": 0.00018305492588279964, "loss": 0.8439, "step": 11870 }, { "epoch": 0.3048139927879176, "grad_norm": 0.7734375, "learning_rate": 0.00018305243948444703, "loss": 0.9691, "step": 11871 }, { "epoch": 0.30483966998383943, "grad_norm": 0.765625, "learning_rate": 0.00018304995292057843, "loss": 0.9341, "step": 11872 }, { "epoch": 0.3048653471797612, "grad_norm": 0.85546875, "learning_rate": 0.00018304746619119877, "loss": 0.9926, "step": 11873 }, { "epoch": 0.30489102437568305, "grad_norm": 0.765625, "learning_rate": 0.000183044979296313, "loss": 0.9843, "step": 11874 }, { "epoch": 0.3049167015716049, "grad_norm": 0.75, "learning_rate": 0.0001830424922359261, "loss": 1.0254, "step": 11875 }, { "epoch": 0.30494237876752667, "grad_norm": 0.84375, "learning_rate": 0.000183040005010043, "loss": 0.8959, "step": 11876 }, { "epoch": 0.3049680559634485, "grad_norm": 0.80078125, "learning_rate": 0.0001830375176186687, "loss": 0.9898, "step": 11877 }, { "epoch": 0.30499373315937034, "grad_norm": 0.86328125, "learning_rate": 0.00018303503006180814, "loss": 1.022, "step": 11878 }, { "epoch": 0.3050194103552922, "grad_norm": 0.8203125, "learning_rate": 0.00018303254233946622, "loss": 0.9584, "step": 11879 }, { "epoch": 0.30504508755121396, "grad_norm": 0.8359375, "learning_rate": 0.00018303005445164796, "loss": 1.053, "step": 11880 }, { "epoch": 0.3050707647471358, "grad_norm": 0.765625, "learning_rate": 0.0001830275663983583, "loss": 0.8807, "step": 11881 }, { "epoch": 0.30509644194305763, "grad_norm": 0.84375, "learning_rate": 0.0001830250781796022, "loss": 1.0439, "step": 11882 }, { "epoch": 0.3051221191389794, "grad_norm": 0.79296875, "learning_rate": 0.00018302258979538462, "loss": 0.8969, "step": 11883 }, { "epoch": 0.30514779633490124, "grad_norm": 0.80078125, "learning_rate": 0.0001830201012457105, "loss": 1.1185, "step": 11884 }, { "epoch": 0.3051734735308231, "grad_norm": 0.73828125, "learning_rate": 0.00018301761253058487, "loss": 0.9826, "step": 11885 }, { "epoch": 0.30519915072674486, "grad_norm": 0.78125, "learning_rate": 0.0001830151236500126, "loss": 0.8991, "step": 11886 }, { "epoch": 0.3052248279226667, "grad_norm": 0.7734375, "learning_rate": 0.0001830126346039987, "loss": 0.817, "step": 11887 }, { "epoch": 0.30525050511858853, "grad_norm": 0.75390625, "learning_rate": 0.00018301014539254807, "loss": 0.9001, "step": 11888 }, { "epoch": 0.30527618231451037, "grad_norm": 0.796875, "learning_rate": 0.00018300765601566579, "loss": 0.9878, "step": 11889 }, { "epoch": 0.30530185951043215, "grad_norm": 0.75, "learning_rate": 0.0001830051664733567, "loss": 1.0264, "step": 11890 }, { "epoch": 0.305327536706354, "grad_norm": 0.83203125, "learning_rate": 0.00018300267676562583, "loss": 1.0467, "step": 11891 }, { "epoch": 0.3053532139022758, "grad_norm": 0.79296875, "learning_rate": 0.00018300018689247814, "loss": 0.9943, "step": 11892 }, { "epoch": 0.3053788910981976, "grad_norm": 0.78125, "learning_rate": 0.00018299769685391855, "loss": 0.9978, "step": 11893 }, { "epoch": 0.30540456829411944, "grad_norm": 1.0859375, "learning_rate": 0.00018299520664995207, "loss": 0.863, "step": 11894 }, { "epoch": 0.3054302454900413, "grad_norm": 0.8203125, "learning_rate": 0.0001829927162805836, "loss": 1.081, "step": 11895 }, { "epoch": 0.30545592268596305, "grad_norm": 0.74609375, "learning_rate": 0.00018299022574581818, "loss": 0.9056, "step": 11896 }, { "epoch": 0.3054815998818849, "grad_norm": 0.75390625, "learning_rate": 0.00018298773504566072, "loss": 1.0824, "step": 11897 }, { "epoch": 0.3055072770778067, "grad_norm": 0.87890625, "learning_rate": 0.0001829852441801162, "loss": 0.959, "step": 11898 }, { "epoch": 0.30553295427372856, "grad_norm": 1.0390625, "learning_rate": 0.00018298275314918957, "loss": 0.867, "step": 11899 }, { "epoch": 0.30555863146965034, "grad_norm": 0.765625, "learning_rate": 0.00018298026195288586, "loss": 1.1356, "step": 11900 }, { "epoch": 0.3055843086655722, "grad_norm": 0.796875, "learning_rate": 0.00018297777059120992, "loss": 0.8791, "step": 11901 }, { "epoch": 0.305609985861494, "grad_norm": 0.77734375, "learning_rate": 0.00018297527906416684, "loss": 1.0789, "step": 11902 }, { "epoch": 0.3056356630574158, "grad_norm": 0.76171875, "learning_rate": 0.00018297278737176148, "loss": 0.9837, "step": 11903 }, { "epoch": 0.30566134025333763, "grad_norm": 0.78125, "learning_rate": 0.00018297029551399887, "loss": 0.9305, "step": 11904 }, { "epoch": 0.30568701744925947, "grad_norm": 0.7265625, "learning_rate": 0.00018296780349088393, "loss": 0.9391, "step": 11905 }, { "epoch": 0.30571269464518125, "grad_norm": 0.76171875, "learning_rate": 0.00018296531130242166, "loss": 0.9484, "step": 11906 }, { "epoch": 0.3057383718411031, "grad_norm": 0.74609375, "learning_rate": 0.00018296281894861704, "loss": 0.8983, "step": 11907 }, { "epoch": 0.3057640490370249, "grad_norm": 0.75, "learning_rate": 0.00018296032642947503, "loss": 0.9185, "step": 11908 }, { "epoch": 0.30578972623294676, "grad_norm": 0.72265625, "learning_rate": 0.00018295783374500056, "loss": 0.9314, "step": 11909 }, { "epoch": 0.30581540342886854, "grad_norm": 0.84375, "learning_rate": 0.0001829553408951986, "loss": 0.9961, "step": 11910 }, { "epoch": 0.3058410806247904, "grad_norm": 0.80859375, "learning_rate": 0.00018295284788007415, "loss": 1.0552, "step": 11911 }, { "epoch": 0.3058667578207122, "grad_norm": 0.73046875, "learning_rate": 0.0001829503546996322, "loss": 0.7603, "step": 11912 }, { "epoch": 0.305892435016634, "grad_norm": 0.7421875, "learning_rate": 0.00018294786135387764, "loss": 0.9745, "step": 11913 }, { "epoch": 0.3059181122125558, "grad_norm": 0.7734375, "learning_rate": 0.0001829453678428155, "loss": 0.9499, "step": 11914 }, { "epoch": 0.30594378940847766, "grad_norm": 0.76171875, "learning_rate": 0.00018294287416645074, "loss": 0.9635, "step": 11915 }, { "epoch": 0.30596946660439944, "grad_norm": 0.80078125, "learning_rate": 0.00018294038032478833, "loss": 0.9752, "step": 11916 }, { "epoch": 0.3059951438003213, "grad_norm": 0.7578125, "learning_rate": 0.00018293788631783321, "loss": 0.9849, "step": 11917 }, { "epoch": 0.3060208209962431, "grad_norm": 0.76953125, "learning_rate": 0.00018293539214559042, "loss": 0.8835, "step": 11918 }, { "epoch": 0.30604649819216495, "grad_norm": 0.86328125, "learning_rate": 0.00018293289780806483, "loss": 1.1128, "step": 11919 }, { "epoch": 0.30607217538808673, "grad_norm": 0.796875, "learning_rate": 0.0001829304033052615, "loss": 0.867, "step": 11920 }, { "epoch": 0.30609785258400857, "grad_norm": 0.78125, "learning_rate": 0.0001829279086371854, "loss": 0.9767, "step": 11921 }, { "epoch": 0.3061235297799304, "grad_norm": 0.78125, "learning_rate": 0.0001829254138038414, "loss": 0.921, "step": 11922 }, { "epoch": 0.3061492069758522, "grad_norm": 0.82421875, "learning_rate": 0.00018292291880523458, "loss": 0.9879, "step": 11923 }, { "epoch": 0.306174884171774, "grad_norm": 0.78515625, "learning_rate": 0.00018292042364136989, "loss": 0.9926, "step": 11924 }, { "epoch": 0.30620056136769586, "grad_norm": 0.75390625, "learning_rate": 0.00018291792831225223, "loss": 0.9165, "step": 11925 }, { "epoch": 0.30622623856361764, "grad_norm": 0.7421875, "learning_rate": 0.00018291543281788667, "loss": 0.9701, "step": 11926 }, { "epoch": 0.3062519157595395, "grad_norm": 0.796875, "learning_rate": 0.00018291293715827812, "loss": 1.1197, "step": 11927 }, { "epoch": 0.3062775929554613, "grad_norm": 0.78125, "learning_rate": 0.00018291044133343157, "loss": 1.0089, "step": 11928 }, { "epoch": 0.30630327015138314, "grad_norm": 0.765625, "learning_rate": 0.00018290794534335202, "loss": 1.0225, "step": 11929 }, { "epoch": 0.3063289473473049, "grad_norm": 0.78125, "learning_rate": 0.00018290544918804442, "loss": 1.068, "step": 11930 }, { "epoch": 0.30635462454322676, "grad_norm": 0.85546875, "learning_rate": 0.00018290295286751377, "loss": 0.9832, "step": 11931 }, { "epoch": 0.3063803017391486, "grad_norm": 0.828125, "learning_rate": 0.00018290045638176498, "loss": 0.999, "step": 11932 }, { "epoch": 0.3064059789350704, "grad_norm": 0.7734375, "learning_rate": 0.0001828979597308031, "loss": 0.9097, "step": 11933 }, { "epoch": 0.3064316561309922, "grad_norm": 0.88671875, "learning_rate": 0.00018289546291463306, "loss": 1.0594, "step": 11934 }, { "epoch": 0.30645733332691405, "grad_norm": 0.83203125, "learning_rate": 0.00018289296593325987, "loss": 1.1632, "step": 11935 }, { "epoch": 0.30648301052283583, "grad_norm": 0.828125, "learning_rate": 0.00018289046878668846, "loss": 0.9678, "step": 11936 }, { "epoch": 0.30650868771875767, "grad_norm": 0.7265625, "learning_rate": 0.00018288797147492387, "loss": 0.9348, "step": 11937 }, { "epoch": 0.3065343649146795, "grad_norm": 0.765625, "learning_rate": 0.000182885473997971, "loss": 0.9683, "step": 11938 }, { "epoch": 0.30656004211060134, "grad_norm": 0.76171875, "learning_rate": 0.0001828829763558349, "loss": 1.016, "step": 11939 }, { "epoch": 0.3065857193065231, "grad_norm": 0.7890625, "learning_rate": 0.00018288047854852048, "loss": 0.9951, "step": 11940 }, { "epoch": 0.30661139650244496, "grad_norm": 1.359375, "learning_rate": 0.00018287798057603277, "loss": 0.9221, "step": 11941 }, { "epoch": 0.3066370736983668, "grad_norm": 0.73828125, "learning_rate": 0.00018287548243837672, "loss": 0.8753, "step": 11942 }, { "epoch": 0.30666275089428857, "grad_norm": 0.8125, "learning_rate": 0.00018287298413555733, "loss": 1.0729, "step": 11943 }, { "epoch": 0.3066884280902104, "grad_norm": 0.7421875, "learning_rate": 0.00018287048566757957, "loss": 0.8867, "step": 11944 }, { "epoch": 0.30671410528613224, "grad_norm": 0.7578125, "learning_rate": 0.00018286798703444842, "loss": 1.1212, "step": 11945 }, { "epoch": 0.306739782482054, "grad_norm": 0.8203125, "learning_rate": 0.00018286548823616885, "loss": 0.9798, "step": 11946 }, { "epoch": 0.30676545967797586, "grad_norm": 0.7890625, "learning_rate": 0.00018286298927274586, "loss": 1.0061, "step": 11947 }, { "epoch": 0.3067911368738977, "grad_norm": 0.8203125, "learning_rate": 0.00018286049014418443, "loss": 1.1633, "step": 11948 }, { "epoch": 0.30681681406981953, "grad_norm": 0.7890625, "learning_rate": 0.0001828579908504895, "loss": 0.939, "step": 11949 }, { "epoch": 0.3068424912657413, "grad_norm": 0.828125, "learning_rate": 0.00018285549139166607, "loss": 1.0161, "step": 11950 }, { "epoch": 0.30686816846166315, "grad_norm": 0.8359375, "learning_rate": 0.00018285299176771917, "loss": 0.983, "step": 11951 }, { "epoch": 0.306893845657585, "grad_norm": 0.828125, "learning_rate": 0.0001828504919786537, "loss": 0.9429, "step": 11952 }, { "epoch": 0.30691952285350677, "grad_norm": 0.7890625, "learning_rate": 0.0001828479920244747, "loss": 0.9597, "step": 11953 }, { "epoch": 0.3069452000494286, "grad_norm": 0.74609375, "learning_rate": 0.00018284549190518713, "loss": 0.8462, "step": 11954 }, { "epoch": 0.30697087724535044, "grad_norm": 0.72265625, "learning_rate": 0.000182842991620796, "loss": 0.8292, "step": 11955 }, { "epoch": 0.3069965544412722, "grad_norm": 0.71875, "learning_rate": 0.00018284049117130624, "loss": 0.8153, "step": 11956 }, { "epoch": 0.30702223163719405, "grad_norm": 0.796875, "learning_rate": 0.0001828379905567229, "loss": 1.0011, "step": 11957 }, { "epoch": 0.3070479088331159, "grad_norm": 0.79296875, "learning_rate": 0.0001828354897770509, "loss": 1.0402, "step": 11958 }, { "epoch": 0.3070735860290377, "grad_norm": 0.8828125, "learning_rate": 0.00018283298883229528, "loss": 1.0908, "step": 11959 }, { "epoch": 0.3070992632249595, "grad_norm": 0.8359375, "learning_rate": 0.00018283048772246095, "loss": 0.9435, "step": 11960 }, { "epoch": 0.30712494042088134, "grad_norm": 0.765625, "learning_rate": 0.00018282798644755297, "loss": 0.9105, "step": 11961 }, { "epoch": 0.3071506176168032, "grad_norm": 0.83203125, "learning_rate": 0.00018282548500757627, "loss": 0.8857, "step": 11962 }, { "epoch": 0.30717629481272496, "grad_norm": 0.7734375, "learning_rate": 0.0001828229834025359, "loss": 0.9275, "step": 11963 }, { "epoch": 0.3072019720086468, "grad_norm": 0.80078125, "learning_rate": 0.00018282048163243677, "loss": 1.0631, "step": 11964 }, { "epoch": 0.30722764920456863, "grad_norm": 0.78125, "learning_rate": 0.0001828179796972839, "loss": 1.0926, "step": 11965 }, { "epoch": 0.3072533264004904, "grad_norm": 0.7265625, "learning_rate": 0.00018281547759708232, "loss": 0.8159, "step": 11966 }, { "epoch": 0.30727900359641225, "grad_norm": 0.80078125, "learning_rate": 0.0001828129753318369, "loss": 0.9184, "step": 11967 }, { "epoch": 0.3073046807923341, "grad_norm": 0.73828125, "learning_rate": 0.0001828104729015528, "loss": 0.9448, "step": 11968 }, { "epoch": 0.3073303579882559, "grad_norm": 0.7890625, "learning_rate": 0.00018280797030623483, "loss": 0.972, "step": 11969 }, { "epoch": 0.3073560351841777, "grad_norm": 0.81640625, "learning_rate": 0.00018280546754588807, "loss": 1.0824, "step": 11970 }, { "epoch": 0.30738171238009954, "grad_norm": 0.80078125, "learning_rate": 0.00018280296462051753, "loss": 0.9128, "step": 11971 }, { "epoch": 0.3074073895760214, "grad_norm": 0.71875, "learning_rate": 0.0001828004615301281, "loss": 1.0309, "step": 11972 }, { "epoch": 0.30743306677194315, "grad_norm": 0.8671875, "learning_rate": 0.00018279795827472488, "loss": 0.9461, "step": 11973 }, { "epoch": 0.307458743967865, "grad_norm": 0.83984375, "learning_rate": 0.0001827954548543128, "loss": 1.0759, "step": 11974 }, { "epoch": 0.3074844211637868, "grad_norm": 0.79296875, "learning_rate": 0.00018279295126889683, "loss": 0.8218, "step": 11975 }, { "epoch": 0.3075100983597086, "grad_norm": 0.796875, "learning_rate": 0.00018279044751848202, "loss": 0.9844, "step": 11976 }, { "epoch": 0.30753577555563044, "grad_norm": 0.77734375, "learning_rate": 0.00018278794360307332, "loss": 0.923, "step": 11977 }, { "epoch": 0.3075614527515523, "grad_norm": 0.85546875, "learning_rate": 0.0001827854395226757, "loss": 1.2269, "step": 11978 }, { "epoch": 0.3075871299474741, "grad_norm": 0.91796875, "learning_rate": 0.0001827829352772942, "loss": 1.0129, "step": 11979 }, { "epoch": 0.3076128071433959, "grad_norm": 0.75, "learning_rate": 0.00018278043086693383, "loss": 0.9577, "step": 11980 }, { "epoch": 0.30763848433931773, "grad_norm": 0.86328125, "learning_rate": 0.00018277792629159946, "loss": 1.089, "step": 11981 }, { "epoch": 0.30766416153523957, "grad_norm": 0.7734375, "learning_rate": 0.0001827754215512962, "loss": 0.992, "step": 11982 }, { "epoch": 0.30768983873116135, "grad_norm": 0.78125, "learning_rate": 0.00018277291664602898, "loss": 0.9475, "step": 11983 }, { "epoch": 0.3077155159270832, "grad_norm": 0.86328125, "learning_rate": 0.00018277041157580283, "loss": 0.8901, "step": 11984 }, { "epoch": 0.307741193123005, "grad_norm": 0.85546875, "learning_rate": 0.00018276790634062272, "loss": 1.0946, "step": 11985 }, { "epoch": 0.3077668703189268, "grad_norm": 0.71875, "learning_rate": 0.00018276540094049366, "loss": 0.9024, "step": 11986 }, { "epoch": 0.30779254751484864, "grad_norm": 0.796875, "learning_rate": 0.0001827628953754206, "loss": 0.7888, "step": 11987 }, { "epoch": 0.30781822471077047, "grad_norm": 0.8125, "learning_rate": 0.00018276038964540862, "loss": 1.0837, "step": 11988 }, { "epoch": 0.30784390190669225, "grad_norm": 0.83203125, "learning_rate": 0.00018275788375046262, "loss": 0.9044, "step": 11989 }, { "epoch": 0.3078695791026141, "grad_norm": 0.84375, "learning_rate": 0.00018275537769058765, "loss": 0.9651, "step": 11990 }, { "epoch": 0.3078952562985359, "grad_norm": 0.97265625, "learning_rate": 0.00018275287146578867, "loss": 1.1345, "step": 11991 }, { "epoch": 0.30792093349445776, "grad_norm": 0.91015625, "learning_rate": 0.00018275036507607072, "loss": 1.0988, "step": 11992 }, { "epoch": 0.30794661069037954, "grad_norm": 0.76953125, "learning_rate": 0.00018274785852143873, "loss": 1.0357, "step": 11993 }, { "epoch": 0.3079722878863014, "grad_norm": 0.7734375, "learning_rate": 0.00018274535180189778, "loss": 0.984, "step": 11994 }, { "epoch": 0.3079979650822232, "grad_norm": 0.78515625, "learning_rate": 0.0001827428449174528, "loss": 0.971, "step": 11995 }, { "epoch": 0.308023642278145, "grad_norm": 0.71875, "learning_rate": 0.0001827403378681088, "loss": 0.894, "step": 11996 }, { "epoch": 0.30804931947406683, "grad_norm": 0.84765625, "learning_rate": 0.00018273783065387078, "loss": 1.0379, "step": 11997 }, { "epoch": 0.30807499666998867, "grad_norm": 0.76171875, "learning_rate": 0.0001827353232747437, "loss": 0.9402, "step": 11998 }, { "epoch": 0.30810067386591045, "grad_norm": 0.734375, "learning_rate": 0.00018273281573073265, "loss": 0.8644, "step": 11999 }, { "epoch": 0.3081263510618323, "grad_norm": 0.79296875, "learning_rate": 0.00018273030802184255, "loss": 1.0765, "step": 12000 }, { "epoch": 0.3081263510618323, "eval_loss": 0.965718150138855, "eval_model_preparation_time": 0.0065, "eval_runtime": 406.4014, "eval_samples_per_second": 24.606, "eval_steps_per_second": 0.77, "step": 12000 }, { "epoch": 0.3081520282577541, "grad_norm": 0.78125, "learning_rate": 0.00018272780014807842, "loss": 0.9097, "step": 12001 }, { "epoch": 0.30817770545367595, "grad_norm": 0.8125, "learning_rate": 0.00018272529210944526, "loss": 0.9935, "step": 12002 }, { "epoch": 0.30820338264959773, "grad_norm": 0.8125, "learning_rate": 0.00018272278390594804, "loss": 1.0013, "step": 12003 }, { "epoch": 0.30822905984551957, "grad_norm": 0.8125, "learning_rate": 0.0001827202755375918, "loss": 0.8675, "step": 12004 }, { "epoch": 0.3082547370414414, "grad_norm": 0.80859375, "learning_rate": 0.00018271776700438155, "loss": 0.9676, "step": 12005 }, { "epoch": 0.3082804142373632, "grad_norm": 0.8046875, "learning_rate": 0.00018271525830632223, "loss": 1.1404, "step": 12006 }, { "epoch": 0.308306091433285, "grad_norm": 0.80078125, "learning_rate": 0.00018271274944341889, "loss": 0.9758, "step": 12007 }, { "epoch": 0.30833176862920686, "grad_norm": 0.78125, "learning_rate": 0.0001827102404156765, "loss": 0.8879, "step": 12008 }, { "epoch": 0.30835744582512864, "grad_norm": 0.73828125, "learning_rate": 0.00018270773122310008, "loss": 0.9727, "step": 12009 }, { "epoch": 0.3083831230210505, "grad_norm": 0.81640625, "learning_rate": 0.00018270522186569462, "loss": 1.0161, "step": 12010 }, { "epoch": 0.3084088002169723, "grad_norm": 0.734375, "learning_rate": 0.0001827027123434651, "loss": 1.0104, "step": 12011 }, { "epoch": 0.30843447741289415, "grad_norm": 0.8515625, "learning_rate": 0.00018270020265641657, "loss": 1.0529, "step": 12012 }, { "epoch": 0.30846015460881593, "grad_norm": 0.84375, "learning_rate": 0.00018269769280455402, "loss": 0.9394, "step": 12013 }, { "epoch": 0.30848583180473776, "grad_norm": 0.83203125, "learning_rate": 0.0001826951827878824, "loss": 1.0515, "step": 12014 }, { "epoch": 0.3085115090006596, "grad_norm": 0.91015625, "learning_rate": 0.0001826926726064068, "loss": 1.0673, "step": 12015 }, { "epoch": 0.3085371861965814, "grad_norm": 0.80078125, "learning_rate": 0.00018269016226013214, "loss": 1.0162, "step": 12016 }, { "epoch": 0.3085628633925032, "grad_norm": 0.78125, "learning_rate": 0.00018268765174906344, "loss": 0.8036, "step": 12017 }, { "epoch": 0.30858854058842505, "grad_norm": 0.765625, "learning_rate": 0.00018268514107320575, "loss": 0.9837, "step": 12018 }, { "epoch": 0.30861421778434683, "grad_norm": 0.77734375, "learning_rate": 0.00018268263023256404, "loss": 1.0278, "step": 12019 }, { "epoch": 0.30863989498026867, "grad_norm": 0.74609375, "learning_rate": 0.0001826801192271433, "loss": 0.996, "step": 12020 }, { "epoch": 0.3086655721761905, "grad_norm": 0.78125, "learning_rate": 0.00018267760805694856, "loss": 0.9116, "step": 12021 }, { "epoch": 0.30869124937211234, "grad_norm": 0.85546875, "learning_rate": 0.0001826750967219848, "loss": 1.1137, "step": 12022 }, { "epoch": 0.3087169265680341, "grad_norm": 0.8359375, "learning_rate": 0.00018267258522225706, "loss": 1.0891, "step": 12023 }, { "epoch": 0.30874260376395596, "grad_norm": 0.796875, "learning_rate": 0.00018267007355777035, "loss": 1.0294, "step": 12024 }, { "epoch": 0.3087682809598778, "grad_norm": 0.76953125, "learning_rate": 0.00018266756172852962, "loss": 0.9526, "step": 12025 }, { "epoch": 0.3087939581557996, "grad_norm": 0.8359375, "learning_rate": 0.0001826650497345399, "loss": 1.0288, "step": 12026 }, { "epoch": 0.3088196353517214, "grad_norm": 0.9296875, "learning_rate": 0.0001826625375758062, "loss": 1.0702, "step": 12027 }, { "epoch": 0.30884531254764325, "grad_norm": 0.81640625, "learning_rate": 0.00018266002525233355, "loss": 1.0629, "step": 12028 }, { "epoch": 0.308870989743565, "grad_norm": 0.7890625, "learning_rate": 0.00018265751276412692, "loss": 0.9769, "step": 12029 }, { "epoch": 0.30889666693948686, "grad_norm": 0.78515625, "learning_rate": 0.00018265500011119135, "loss": 0.9716, "step": 12030 }, { "epoch": 0.3089223441354087, "grad_norm": 0.890625, "learning_rate": 0.0001826524872935318, "loss": 0.9789, "step": 12031 }, { "epoch": 0.30894802133133054, "grad_norm": 0.89453125, "learning_rate": 0.00018264997431115333, "loss": 0.973, "step": 12032 }, { "epoch": 0.3089736985272523, "grad_norm": 0.70703125, "learning_rate": 0.00018264746116406096, "loss": 1.1209, "step": 12033 }, { "epoch": 0.30899937572317415, "grad_norm": 0.82421875, "learning_rate": 0.0001826449478522596, "loss": 0.9482, "step": 12034 }, { "epoch": 0.309025052919096, "grad_norm": 0.76171875, "learning_rate": 0.00018264243437575436, "loss": 0.9179, "step": 12035 }, { "epoch": 0.30905073011501777, "grad_norm": 0.74609375, "learning_rate": 0.00018263992073455022, "loss": 0.8003, "step": 12036 }, { "epoch": 0.3090764073109396, "grad_norm": 0.7734375, "learning_rate": 0.00018263740692865219, "loss": 0.9824, "step": 12037 }, { "epoch": 0.30910208450686144, "grad_norm": 0.70703125, "learning_rate": 0.00018263489295806525, "loss": 0.9863, "step": 12038 }, { "epoch": 0.3091277617027832, "grad_norm": 0.77734375, "learning_rate": 0.00018263237882279443, "loss": 1.0703, "step": 12039 }, { "epoch": 0.30915343889870506, "grad_norm": 0.71484375, "learning_rate": 0.00018262986452284477, "loss": 0.8912, "step": 12040 }, { "epoch": 0.3091791160946269, "grad_norm": 0.734375, "learning_rate": 0.00018262735005822122, "loss": 0.9273, "step": 12041 }, { "epoch": 0.30920479329054873, "grad_norm": 0.80859375, "learning_rate": 0.00018262483542892883, "loss": 1.0086, "step": 12042 }, { "epoch": 0.3092304704864705, "grad_norm": 0.73046875, "learning_rate": 0.00018262232063497263, "loss": 0.9474, "step": 12043 }, { "epoch": 0.30925614768239235, "grad_norm": 0.8671875, "learning_rate": 0.0001826198056763576, "loss": 0.9794, "step": 12044 }, { "epoch": 0.3092818248783142, "grad_norm": 0.8046875, "learning_rate": 0.00018261729055308877, "loss": 0.9602, "step": 12045 }, { "epoch": 0.30930750207423596, "grad_norm": 0.734375, "learning_rate": 0.00018261477526517112, "loss": 0.9384, "step": 12046 }, { "epoch": 0.3093331792701578, "grad_norm": 0.703125, "learning_rate": 0.0001826122598126097, "loss": 0.9368, "step": 12047 }, { "epoch": 0.30935885646607963, "grad_norm": 0.78125, "learning_rate": 0.0001826097441954095, "loss": 1.1225, "step": 12048 }, { "epoch": 0.3093845336620014, "grad_norm": 0.7734375, "learning_rate": 0.00018260722841357554, "loss": 1.0347, "step": 12049 }, { "epoch": 0.30941021085792325, "grad_norm": 0.73828125, "learning_rate": 0.00018260471246711284, "loss": 1.0169, "step": 12050 }, { "epoch": 0.3094358880538451, "grad_norm": 0.7578125, "learning_rate": 0.0001826021963560264, "loss": 0.9526, "step": 12051 }, { "epoch": 0.3094615652497669, "grad_norm": 0.73828125, "learning_rate": 0.00018259968008032127, "loss": 0.9707, "step": 12052 }, { "epoch": 0.3094872424456887, "grad_norm": 0.7578125, "learning_rate": 0.00018259716364000243, "loss": 0.9247, "step": 12053 }, { "epoch": 0.30951291964161054, "grad_norm": 0.78515625, "learning_rate": 0.00018259464703507489, "loss": 0.8479, "step": 12054 }, { "epoch": 0.3095385968375324, "grad_norm": 0.86328125, "learning_rate": 0.0001825921302655437, "loss": 1.0172, "step": 12055 }, { "epoch": 0.30956427403345416, "grad_norm": 0.8359375, "learning_rate": 0.00018258961333141382, "loss": 1.0378, "step": 12056 }, { "epoch": 0.309589951229376, "grad_norm": 0.7578125, "learning_rate": 0.00018258709623269033, "loss": 0.8844, "step": 12057 }, { "epoch": 0.30961562842529783, "grad_norm": 0.73046875, "learning_rate": 0.00018258457896937823, "loss": 0.835, "step": 12058 }, { "epoch": 0.3096413056212196, "grad_norm": 0.78515625, "learning_rate": 0.00018258206154148248, "loss": 0.9253, "step": 12059 }, { "epoch": 0.30966698281714145, "grad_norm": 0.83984375, "learning_rate": 0.00018257954394900817, "loss": 0.967, "step": 12060 }, { "epoch": 0.3096926600130633, "grad_norm": 0.7890625, "learning_rate": 0.0001825770261919603, "loss": 0.9678, "step": 12061 }, { "epoch": 0.3097183372089851, "grad_norm": 0.75390625, "learning_rate": 0.00018257450827034383, "loss": 1.047, "step": 12062 }, { "epoch": 0.3097440144049069, "grad_norm": 0.8203125, "learning_rate": 0.00018257199018416385, "loss": 1.0511, "step": 12063 }, { "epoch": 0.30976969160082873, "grad_norm": 0.76953125, "learning_rate": 0.00018256947193342536, "loss": 1.0022, "step": 12064 }, { "epoch": 0.30979536879675057, "grad_norm": 0.76171875, "learning_rate": 0.00018256695351813335, "loss": 0.9936, "step": 12065 }, { "epoch": 0.30982104599267235, "grad_norm": 0.81640625, "learning_rate": 0.0001825644349382929, "loss": 0.937, "step": 12066 }, { "epoch": 0.3098467231885942, "grad_norm": 0.80078125, "learning_rate": 0.00018256191619390894, "loss": 0.8992, "step": 12067 }, { "epoch": 0.309872400384516, "grad_norm": 0.80859375, "learning_rate": 0.00018255939728498656, "loss": 1.0464, "step": 12068 }, { "epoch": 0.3098980775804378, "grad_norm": 0.78515625, "learning_rate": 0.0001825568782115308, "loss": 0.8198, "step": 12069 }, { "epoch": 0.30992375477635964, "grad_norm": 0.671875, "learning_rate": 0.00018255435897354658, "loss": 0.9437, "step": 12070 }, { "epoch": 0.3099494319722815, "grad_norm": 0.8359375, "learning_rate": 0.000182551839571039, "loss": 0.9161, "step": 12071 }, { "epoch": 0.3099751091682033, "grad_norm": 0.890625, "learning_rate": 0.00018254932000401306, "loss": 0.9341, "step": 12072 }, { "epoch": 0.3100007863641251, "grad_norm": 0.73046875, "learning_rate": 0.00018254680027247376, "loss": 0.9707, "step": 12073 }, { "epoch": 0.3100264635600469, "grad_norm": 0.8828125, "learning_rate": 0.00018254428037642618, "loss": 0.8617, "step": 12074 }, { "epoch": 0.31005214075596876, "grad_norm": 0.80078125, "learning_rate": 0.00018254176031587532, "loss": 0.934, "step": 12075 }, { "epoch": 0.31007781795189054, "grad_norm": 0.86328125, "learning_rate": 0.00018253924009082613, "loss": 1.0938, "step": 12076 }, { "epoch": 0.3101034951478124, "grad_norm": 0.77734375, "learning_rate": 0.00018253671970128372, "loss": 0.8914, "step": 12077 }, { "epoch": 0.3101291723437342, "grad_norm": 0.75, "learning_rate": 0.0001825341991472531, "loss": 0.9532, "step": 12078 }, { "epoch": 0.310154849539656, "grad_norm": 0.82421875, "learning_rate": 0.00018253167842873926, "loss": 0.9065, "step": 12079 }, { "epoch": 0.31018052673557783, "grad_norm": 0.8046875, "learning_rate": 0.00018252915754574724, "loss": 0.936, "step": 12080 }, { "epoch": 0.31020620393149967, "grad_norm": 0.76171875, "learning_rate": 0.00018252663649828205, "loss": 0.9222, "step": 12081 }, { "epoch": 0.3102318811274215, "grad_norm": 0.79296875, "learning_rate": 0.00018252411528634873, "loss": 0.9243, "step": 12082 }, { "epoch": 0.3102575583233433, "grad_norm": 0.77734375, "learning_rate": 0.00018252159390995233, "loss": 0.8652, "step": 12083 }, { "epoch": 0.3102832355192651, "grad_norm": 0.87109375, "learning_rate": 0.00018251907236909785, "loss": 1.0303, "step": 12084 }, { "epoch": 0.31030891271518696, "grad_norm": 0.77734375, "learning_rate": 0.0001825165506637903, "loss": 0.9052, "step": 12085 }, { "epoch": 0.31033458991110874, "grad_norm": 0.78515625, "learning_rate": 0.0001825140287940347, "loss": 0.9447, "step": 12086 }, { "epoch": 0.3103602671070306, "grad_norm": 0.80859375, "learning_rate": 0.0001825115067598361, "loss": 1.0254, "step": 12087 }, { "epoch": 0.3103859443029524, "grad_norm": 0.85546875, "learning_rate": 0.00018250898456119953, "loss": 0.9758, "step": 12088 }, { "epoch": 0.3104116214988742, "grad_norm": 0.9375, "learning_rate": 0.00018250646219813, "loss": 1.0824, "step": 12089 }, { "epoch": 0.310437298694796, "grad_norm": 0.76171875, "learning_rate": 0.00018250393967063255, "loss": 0.951, "step": 12090 }, { "epoch": 0.31046297589071786, "grad_norm": 0.98046875, "learning_rate": 0.0001825014169787122, "loss": 0.9895, "step": 12091 }, { "epoch": 0.3104886530866397, "grad_norm": 0.75390625, "learning_rate": 0.00018249889412237397, "loss": 0.9194, "step": 12092 }, { "epoch": 0.3105143302825615, "grad_norm": 0.796875, "learning_rate": 0.0001824963711016229, "loss": 0.8693, "step": 12093 }, { "epoch": 0.3105400074784833, "grad_norm": 0.8203125, "learning_rate": 0.00018249384791646403, "loss": 0.8899, "step": 12094 }, { "epoch": 0.31056568467440515, "grad_norm": 0.78125, "learning_rate": 0.00018249132456690234, "loss": 0.8988, "step": 12095 }, { "epoch": 0.31059136187032693, "grad_norm": 0.7890625, "learning_rate": 0.00018248880105294294, "loss": 0.9786, "step": 12096 }, { "epoch": 0.31061703906624877, "grad_norm": 0.828125, "learning_rate": 0.00018248627737459076, "loss": 1.0293, "step": 12097 }, { "epoch": 0.3106427162621706, "grad_norm": 0.8046875, "learning_rate": 0.0001824837535318509, "loss": 1.0025, "step": 12098 }, { "epoch": 0.3106683934580924, "grad_norm": 0.8125, "learning_rate": 0.00018248122952472837, "loss": 0.9071, "step": 12099 }, { "epoch": 0.3106940706540142, "grad_norm": 0.828125, "learning_rate": 0.0001824787053532282, "loss": 0.9689, "step": 12100 }, { "epoch": 0.31071974784993606, "grad_norm": 0.81640625, "learning_rate": 0.00018247618101735542, "loss": 1.0896, "step": 12101 }, { "epoch": 0.3107454250458579, "grad_norm": 0.7734375, "learning_rate": 0.00018247365651711509, "loss": 1.0313, "step": 12102 }, { "epoch": 0.3107711022417797, "grad_norm": 0.828125, "learning_rate": 0.00018247113185251217, "loss": 0.9524, "step": 12103 }, { "epoch": 0.3107967794377015, "grad_norm": 0.78515625, "learning_rate": 0.00018246860702355176, "loss": 0.8971, "step": 12104 }, { "epoch": 0.31082245663362335, "grad_norm": 0.7578125, "learning_rate": 0.00018246608203023885, "loss": 1.1838, "step": 12105 }, { "epoch": 0.3108481338295451, "grad_norm": 0.76953125, "learning_rate": 0.0001824635568725785, "loss": 0.9258, "step": 12106 }, { "epoch": 0.31087381102546696, "grad_norm": 0.81640625, "learning_rate": 0.0001824610315505757, "loss": 0.8753, "step": 12107 }, { "epoch": 0.3108994882213888, "grad_norm": 0.78515625, "learning_rate": 0.00018245850606423555, "loss": 0.9421, "step": 12108 }, { "epoch": 0.3109251654173106, "grad_norm": 0.7109375, "learning_rate": 0.00018245598041356304, "loss": 0.8475, "step": 12109 }, { "epoch": 0.3109508426132324, "grad_norm": 0.7734375, "learning_rate": 0.00018245345459856318, "loss": 1.0493, "step": 12110 }, { "epoch": 0.31097651980915425, "grad_norm": 0.86328125, "learning_rate": 0.00018245092861924106, "loss": 0.992, "step": 12111 }, { "epoch": 0.3110021970050761, "grad_norm": 0.7734375, "learning_rate": 0.0001824484024756017, "loss": 0.9805, "step": 12112 }, { "epoch": 0.31102787420099787, "grad_norm": 0.7421875, "learning_rate": 0.0001824458761676501, "loss": 0.9733, "step": 12113 }, { "epoch": 0.3110535513969197, "grad_norm": 0.8515625, "learning_rate": 0.00018244334969539128, "loss": 1.1108, "step": 12114 }, { "epoch": 0.31107922859284154, "grad_norm": 0.8125, "learning_rate": 0.00018244082305883037, "loss": 0.845, "step": 12115 }, { "epoch": 0.3111049057887633, "grad_norm": 0.80859375, "learning_rate": 0.00018243829625797233, "loss": 1.0216, "step": 12116 }, { "epoch": 0.31113058298468516, "grad_norm": 0.71484375, "learning_rate": 0.0001824357692928222, "loss": 0.918, "step": 12117 }, { "epoch": 0.311156260180607, "grad_norm": 0.8203125, "learning_rate": 0.00018243324216338504, "loss": 1.1072, "step": 12118 }, { "epoch": 0.31118193737652877, "grad_norm": 0.765625, "learning_rate": 0.00018243071486966588, "loss": 0.9629, "step": 12119 }, { "epoch": 0.3112076145724506, "grad_norm": 0.90234375, "learning_rate": 0.00018242818741166973, "loss": 0.993, "step": 12120 }, { "epoch": 0.31123329176837244, "grad_norm": 0.8125, "learning_rate": 0.00018242565978940166, "loss": 1.0723, "step": 12121 }, { "epoch": 0.3112589689642943, "grad_norm": 0.68359375, "learning_rate": 0.0001824231320028667, "loss": 1.0388, "step": 12122 }, { "epoch": 0.31128464616021606, "grad_norm": 0.75390625, "learning_rate": 0.00018242060405206987, "loss": 0.9182, "step": 12123 }, { "epoch": 0.3113103233561379, "grad_norm": 0.78125, "learning_rate": 0.00018241807593701625, "loss": 0.9754, "step": 12124 }, { "epoch": 0.31133600055205973, "grad_norm": 0.765625, "learning_rate": 0.00018241554765771083, "loss": 0.9086, "step": 12125 }, { "epoch": 0.3113616777479815, "grad_norm": 0.8046875, "learning_rate": 0.00018241301921415864, "loss": 1.1057, "step": 12126 }, { "epoch": 0.31138735494390335, "grad_norm": 0.80078125, "learning_rate": 0.0001824104906063648, "loss": 1.0598, "step": 12127 }, { "epoch": 0.3114130321398252, "grad_norm": 0.71875, "learning_rate": 0.00018240796183433426, "loss": 0.9365, "step": 12128 }, { "epoch": 0.31143870933574697, "grad_norm": 0.87890625, "learning_rate": 0.00018240543289807214, "loss": 0.9694, "step": 12129 }, { "epoch": 0.3114643865316688, "grad_norm": 1.0234375, "learning_rate": 0.00018240290379758337, "loss": 1.0289, "step": 12130 }, { "epoch": 0.31149006372759064, "grad_norm": 0.81640625, "learning_rate": 0.0001824003745328731, "loss": 0.9019, "step": 12131 }, { "epoch": 0.3115157409235125, "grad_norm": 0.85546875, "learning_rate": 0.00018239784510394632, "loss": 0.9254, "step": 12132 }, { "epoch": 0.31154141811943425, "grad_norm": 0.88671875, "learning_rate": 0.00018239531551080806, "loss": 1.0854, "step": 12133 }, { "epoch": 0.3115670953153561, "grad_norm": 0.73046875, "learning_rate": 0.00018239278575346338, "loss": 0.8805, "step": 12134 }, { "epoch": 0.3115927725112779, "grad_norm": 0.76953125, "learning_rate": 0.00018239025583191736, "loss": 0.8384, "step": 12135 }, { "epoch": 0.3116184497071997, "grad_norm": 0.80078125, "learning_rate": 0.00018238772574617496, "loss": 0.9305, "step": 12136 }, { "epoch": 0.31164412690312154, "grad_norm": 0.796875, "learning_rate": 0.00018238519549624126, "loss": 1.1114, "step": 12137 }, { "epoch": 0.3116698040990434, "grad_norm": 0.765625, "learning_rate": 0.00018238266508212135, "loss": 0.9209, "step": 12138 }, { "epoch": 0.31169548129496516, "grad_norm": 0.7734375, "learning_rate": 0.00018238013450382022, "loss": 0.9306, "step": 12139 }, { "epoch": 0.311721158490887, "grad_norm": 0.8203125, "learning_rate": 0.0001823776037613429, "loss": 0.9909, "step": 12140 }, { "epoch": 0.31174683568680883, "grad_norm": 0.78515625, "learning_rate": 0.00018237507285469447, "loss": 0.9426, "step": 12141 }, { "epoch": 0.31177251288273067, "grad_norm": 0.76171875, "learning_rate": 0.00018237254178387995, "loss": 0.9468, "step": 12142 }, { "epoch": 0.31179819007865245, "grad_norm": 0.78125, "learning_rate": 0.0001823700105489044, "loss": 0.9863, "step": 12143 }, { "epoch": 0.3118238672745743, "grad_norm": 0.72265625, "learning_rate": 0.00018236747914977287, "loss": 0.9653, "step": 12144 }, { "epoch": 0.3118495444704961, "grad_norm": 0.8359375, "learning_rate": 0.00018236494758649038, "loss": 0.8677, "step": 12145 }, { "epoch": 0.3118752216664179, "grad_norm": 0.78125, "learning_rate": 0.000182362415859062, "loss": 0.9511, "step": 12146 }, { "epoch": 0.31190089886233974, "grad_norm": 0.7734375, "learning_rate": 0.00018235988396749276, "loss": 1.1509, "step": 12147 }, { "epoch": 0.3119265760582616, "grad_norm": 0.796875, "learning_rate": 0.0001823573519117877, "loss": 0.8909, "step": 12148 }, { "epoch": 0.31195225325418335, "grad_norm": 0.75, "learning_rate": 0.00018235481969195192, "loss": 0.9506, "step": 12149 }, { "epoch": 0.3119779304501052, "grad_norm": 0.765625, "learning_rate": 0.00018235228730799038, "loss": 1.0123, "step": 12150 }, { "epoch": 0.312003607646027, "grad_norm": 0.765625, "learning_rate": 0.0001823497547599082, "loss": 0.9332, "step": 12151 }, { "epoch": 0.31202928484194886, "grad_norm": 0.87890625, "learning_rate": 0.00018234722204771035, "loss": 0.9425, "step": 12152 }, { "epoch": 0.31205496203787064, "grad_norm": 0.83203125, "learning_rate": 0.00018234468917140197, "loss": 1.009, "step": 12153 }, { "epoch": 0.3120806392337925, "grad_norm": 0.828125, "learning_rate": 0.00018234215613098804, "loss": 1.0361, "step": 12154 }, { "epoch": 0.3121063164297143, "grad_norm": 0.74609375, "learning_rate": 0.00018233962292647365, "loss": 0.9243, "step": 12155 }, { "epoch": 0.3121319936256361, "grad_norm": 0.8125, "learning_rate": 0.00018233708955786378, "loss": 0.9745, "step": 12156 }, { "epoch": 0.31215767082155793, "grad_norm": 0.8203125, "learning_rate": 0.00018233455602516358, "loss": 1.0366, "step": 12157 }, { "epoch": 0.31218334801747977, "grad_norm": 0.79296875, "learning_rate": 0.00018233202232837803, "loss": 1.1243, "step": 12158 }, { "epoch": 0.31220902521340155, "grad_norm": 0.72265625, "learning_rate": 0.00018232948846751222, "loss": 0.9431, "step": 12159 }, { "epoch": 0.3122347024093234, "grad_norm": 0.79296875, "learning_rate": 0.00018232695444257116, "loss": 1.0736, "step": 12160 }, { "epoch": 0.3122603796052452, "grad_norm": 0.78515625, "learning_rate": 0.00018232442025355988, "loss": 0.8554, "step": 12161 }, { "epoch": 0.31228605680116706, "grad_norm": 0.7578125, "learning_rate": 0.00018232188590048352, "loss": 0.8311, "step": 12162 }, { "epoch": 0.31231173399708884, "grad_norm": 0.796875, "learning_rate": 0.00018231935138334705, "loss": 0.9203, "step": 12163 }, { "epoch": 0.31233741119301067, "grad_norm": 0.78515625, "learning_rate": 0.00018231681670215555, "loss": 1.0548, "step": 12164 }, { "epoch": 0.3123630883889325, "grad_norm": 0.82421875, "learning_rate": 0.00018231428185691407, "loss": 0.9982, "step": 12165 }, { "epoch": 0.3123887655848543, "grad_norm": 0.8203125, "learning_rate": 0.00018231174684762768, "loss": 0.9526, "step": 12166 }, { "epoch": 0.3124144427807761, "grad_norm": 0.76953125, "learning_rate": 0.0001823092116743014, "loss": 0.8507, "step": 12167 }, { "epoch": 0.31244011997669796, "grad_norm": 0.7265625, "learning_rate": 0.00018230667633694028, "loss": 0.9585, "step": 12168 }, { "epoch": 0.31246579717261974, "grad_norm": 0.765625, "learning_rate": 0.0001823041408355494, "loss": 0.921, "step": 12169 }, { "epoch": 0.3124914743685416, "grad_norm": 0.9375, "learning_rate": 0.00018230160517013381, "loss": 1.0677, "step": 12170 }, { "epoch": 0.3125171515644634, "grad_norm": 0.7578125, "learning_rate": 0.00018229906934069854, "loss": 1.1444, "step": 12171 }, { "epoch": 0.31254282876038525, "grad_norm": 0.734375, "learning_rate": 0.0001822965333472487, "loss": 0.8736, "step": 12172 }, { "epoch": 0.31256850595630703, "grad_norm": 0.8203125, "learning_rate": 0.00018229399718978928, "loss": 1.0723, "step": 12173 }, { "epoch": 0.31259418315222887, "grad_norm": 0.83984375, "learning_rate": 0.00018229146086832533, "loss": 0.853, "step": 12174 }, { "epoch": 0.3126198603481507, "grad_norm": 0.765625, "learning_rate": 0.00018228892438286196, "loss": 0.9074, "step": 12175 }, { "epoch": 0.3126455375440725, "grad_norm": 0.765625, "learning_rate": 0.00018228638773340418, "loss": 0.9747, "step": 12176 }, { "epoch": 0.3126712147399943, "grad_norm": 0.7734375, "learning_rate": 0.00018228385091995707, "loss": 0.8471, "step": 12177 }, { "epoch": 0.31269689193591615, "grad_norm": 0.8046875, "learning_rate": 0.0001822813139425257, "loss": 1.0274, "step": 12178 }, { "epoch": 0.31272256913183794, "grad_norm": 0.87890625, "learning_rate": 0.00018227877680111507, "loss": 1.1331, "step": 12179 }, { "epoch": 0.31274824632775977, "grad_norm": 0.8046875, "learning_rate": 0.00018227623949573028, "loss": 1.1268, "step": 12180 }, { "epoch": 0.3127739235236816, "grad_norm": 0.80859375, "learning_rate": 0.00018227370202637637, "loss": 0.8764, "step": 12181 }, { "epoch": 0.31279960071960344, "grad_norm": 0.81640625, "learning_rate": 0.00018227116439305843, "loss": 0.923, "step": 12182 }, { "epoch": 0.3128252779155252, "grad_norm": 0.76171875, "learning_rate": 0.00018226862659578144, "loss": 1.046, "step": 12183 }, { "epoch": 0.31285095511144706, "grad_norm": 0.7578125, "learning_rate": 0.00018226608863455057, "loss": 0.9958, "step": 12184 }, { "epoch": 0.3128766323073689, "grad_norm": 0.7890625, "learning_rate": 0.00018226355050937078, "loss": 0.983, "step": 12185 }, { "epoch": 0.3129023095032907, "grad_norm": 1.015625, "learning_rate": 0.0001822610122202472, "loss": 1.0227, "step": 12186 }, { "epoch": 0.3129279866992125, "grad_norm": 0.8125, "learning_rate": 0.0001822584737671848, "loss": 0.8945, "step": 12187 }, { "epoch": 0.31295366389513435, "grad_norm": 0.74609375, "learning_rate": 0.00018225593515018874, "loss": 0.9278, "step": 12188 }, { "epoch": 0.31297934109105613, "grad_norm": 0.7421875, "learning_rate": 0.000182253396369264, "loss": 1.0192, "step": 12189 }, { "epoch": 0.31300501828697797, "grad_norm": 0.8828125, "learning_rate": 0.0001822508574244157, "loss": 0.9954, "step": 12190 }, { "epoch": 0.3130306954828998, "grad_norm": 0.8515625, "learning_rate": 0.00018224831831564883, "loss": 0.9782, "step": 12191 }, { "epoch": 0.3130563726788216, "grad_norm": 0.8203125, "learning_rate": 0.00018224577904296852, "loss": 1.0279, "step": 12192 }, { "epoch": 0.3130820498747434, "grad_norm": 0.8203125, "learning_rate": 0.00018224323960637978, "loss": 1.069, "step": 12193 }, { "epoch": 0.31310772707066525, "grad_norm": 0.7734375, "learning_rate": 0.00018224070000588772, "loss": 1.01, "step": 12194 }, { "epoch": 0.3131334042665871, "grad_norm": 0.859375, "learning_rate": 0.00018223816024149736, "loss": 0.9431, "step": 12195 }, { "epoch": 0.31315908146250887, "grad_norm": 0.82421875, "learning_rate": 0.00018223562031321376, "loss": 0.9871, "step": 12196 }, { "epoch": 0.3131847586584307, "grad_norm": 0.7890625, "learning_rate": 0.00018223308022104204, "loss": 1.0481, "step": 12197 }, { "epoch": 0.31321043585435254, "grad_norm": 0.78515625, "learning_rate": 0.0001822305399649872, "loss": 0.8383, "step": 12198 }, { "epoch": 0.3132361130502743, "grad_norm": 0.765625, "learning_rate": 0.00018222799954505429, "loss": 1.0115, "step": 12199 }, { "epoch": 0.31326179024619616, "grad_norm": 0.8125, "learning_rate": 0.00018222545896124843, "loss": 1.0219, "step": 12200 }, { "epoch": 0.313287467442118, "grad_norm": 0.7578125, "learning_rate": 0.00018222291821357468, "loss": 0.9864, "step": 12201 }, { "epoch": 0.3133131446380398, "grad_norm": 0.78125, "learning_rate": 0.00018222037730203803, "loss": 1.1614, "step": 12202 }, { "epoch": 0.3133388218339616, "grad_norm": 0.7890625, "learning_rate": 0.0001822178362266436, "loss": 1.0082, "step": 12203 }, { "epoch": 0.31336449902988345, "grad_norm": 0.7265625, "learning_rate": 0.0001822152949873965, "loss": 0.8335, "step": 12204 }, { "epoch": 0.3133901762258053, "grad_norm": 0.73828125, "learning_rate": 0.00018221275358430171, "loss": 0.9004, "step": 12205 }, { "epoch": 0.31341585342172706, "grad_norm": 0.8125, "learning_rate": 0.00018221021201736434, "loss": 0.9362, "step": 12206 }, { "epoch": 0.3134415306176489, "grad_norm": 0.75, "learning_rate": 0.00018220767028658946, "loss": 0.9772, "step": 12207 }, { "epoch": 0.31346720781357074, "grad_norm": 0.80078125, "learning_rate": 0.00018220512839198206, "loss": 0.9089, "step": 12208 }, { "epoch": 0.3134928850094925, "grad_norm": 0.83984375, "learning_rate": 0.0001822025863335473, "loss": 1.0695, "step": 12209 }, { "epoch": 0.31351856220541435, "grad_norm": 0.77734375, "learning_rate": 0.00018220004411129023, "loss": 1.0307, "step": 12210 }, { "epoch": 0.3135442394013362, "grad_norm": 0.80859375, "learning_rate": 0.00018219750172521586, "loss": 1.0801, "step": 12211 }, { "epoch": 0.31356991659725797, "grad_norm": 0.79296875, "learning_rate": 0.00018219495917532933, "loss": 0.9482, "step": 12212 }, { "epoch": 0.3135955937931798, "grad_norm": 0.78515625, "learning_rate": 0.00018219241646163562, "loss": 1.0715, "step": 12213 }, { "epoch": 0.31362127098910164, "grad_norm": 0.79296875, "learning_rate": 0.0001821898735841399, "loss": 0.9967, "step": 12214 }, { "epoch": 0.3136469481850235, "grad_norm": 0.8125, "learning_rate": 0.00018218733054284719, "loss": 1.0462, "step": 12215 }, { "epoch": 0.31367262538094526, "grad_norm": 0.765625, "learning_rate": 0.0001821847873377625, "loss": 1.0955, "step": 12216 }, { "epoch": 0.3136983025768671, "grad_norm": 0.7265625, "learning_rate": 0.00018218224396889099, "loss": 0.9595, "step": 12217 }, { "epoch": 0.31372397977278893, "grad_norm": 0.76953125, "learning_rate": 0.0001821797004362377, "loss": 0.9791, "step": 12218 }, { "epoch": 0.3137496569687107, "grad_norm": 0.83984375, "learning_rate": 0.00018217715673980766, "loss": 1.0297, "step": 12219 }, { "epoch": 0.31377533416463255, "grad_norm": 0.73046875, "learning_rate": 0.000182174612879606, "loss": 0.799, "step": 12220 }, { "epoch": 0.3138010113605544, "grad_norm": 0.765625, "learning_rate": 0.00018217206885563775, "loss": 1.0541, "step": 12221 }, { "epoch": 0.31382668855647616, "grad_norm": 0.76171875, "learning_rate": 0.00018216952466790798, "loss": 1.1542, "step": 12222 }, { "epoch": 0.313852365752398, "grad_norm": 0.78515625, "learning_rate": 0.00018216698031642178, "loss": 0.9917, "step": 12223 }, { "epoch": 0.31387804294831984, "grad_norm": 0.78515625, "learning_rate": 0.00018216443580118422, "loss": 1.0398, "step": 12224 }, { "epoch": 0.31390372014424167, "grad_norm": 0.7890625, "learning_rate": 0.00018216189112220033, "loss": 1.0802, "step": 12225 }, { "epoch": 0.31392939734016345, "grad_norm": 0.95703125, "learning_rate": 0.00018215934627947522, "loss": 0.9887, "step": 12226 }, { "epoch": 0.3139550745360853, "grad_norm": 0.7421875, "learning_rate": 0.00018215680127301397, "loss": 0.877, "step": 12227 }, { "epoch": 0.3139807517320071, "grad_norm": 0.77734375, "learning_rate": 0.0001821542561028216, "loss": 1.0153, "step": 12228 }, { "epoch": 0.3140064289279289, "grad_norm": 0.76953125, "learning_rate": 0.00018215171076890326, "loss": 0.8498, "step": 12229 }, { "epoch": 0.31403210612385074, "grad_norm": 0.78125, "learning_rate": 0.00018214916527126398, "loss": 1.0564, "step": 12230 }, { "epoch": 0.3140577833197726, "grad_norm": 0.7734375, "learning_rate": 0.00018214661960990882, "loss": 0.8526, "step": 12231 }, { "epoch": 0.31408346051569436, "grad_norm": 0.75390625, "learning_rate": 0.00018214407378484287, "loss": 0.9502, "step": 12232 }, { "epoch": 0.3141091377116162, "grad_norm": 0.765625, "learning_rate": 0.00018214152779607117, "loss": 1.0114, "step": 12233 }, { "epoch": 0.31413481490753803, "grad_norm": 0.7265625, "learning_rate": 0.0001821389816435989, "loss": 0.9547, "step": 12234 }, { "epoch": 0.31416049210345987, "grad_norm": 0.828125, "learning_rate": 0.00018213643532743098, "loss": 0.9257, "step": 12235 }, { "epoch": 0.31418616929938165, "grad_norm": 0.73828125, "learning_rate": 0.00018213388884757262, "loss": 0.9311, "step": 12236 }, { "epoch": 0.3142118464953035, "grad_norm": 0.79296875, "learning_rate": 0.00018213134220402882, "loss": 0.8928, "step": 12237 }, { "epoch": 0.3142375236912253, "grad_norm": 0.76953125, "learning_rate": 0.00018212879539680467, "loss": 0.9278, "step": 12238 }, { "epoch": 0.3142632008871471, "grad_norm": 0.6875, "learning_rate": 0.00018212624842590524, "loss": 0.8154, "step": 12239 }, { "epoch": 0.31428887808306893, "grad_norm": 0.81640625, "learning_rate": 0.00018212370129133562, "loss": 1.0247, "step": 12240 }, { "epoch": 0.31431455527899077, "grad_norm": 0.76953125, "learning_rate": 0.0001821211539931009, "loss": 1.0077, "step": 12241 }, { "epoch": 0.31434023247491255, "grad_norm": 0.8046875, "learning_rate": 0.00018211860653120614, "loss": 0.8411, "step": 12242 }, { "epoch": 0.3143659096708344, "grad_norm": 0.78125, "learning_rate": 0.00018211605890565638, "loss": 0.8666, "step": 12243 }, { "epoch": 0.3143915868667562, "grad_norm": 0.81640625, "learning_rate": 0.00018211351111645677, "loss": 0.8833, "step": 12244 }, { "epoch": 0.31441726406267806, "grad_norm": 0.75, "learning_rate": 0.00018211096316361235, "loss": 1.0746, "step": 12245 }, { "epoch": 0.31444294125859984, "grad_norm": 0.7578125, "learning_rate": 0.00018210841504712818, "loss": 1.0969, "step": 12246 }, { "epoch": 0.3144686184545217, "grad_norm": 0.8046875, "learning_rate": 0.00018210586676700933, "loss": 0.893, "step": 12247 }, { "epoch": 0.3144942956504435, "grad_norm": 0.76953125, "learning_rate": 0.00018210331832326093, "loss": 0.9526, "step": 12248 }, { "epoch": 0.3145199728463653, "grad_norm": 0.74609375, "learning_rate": 0.00018210076971588804, "loss": 0.9127, "step": 12249 }, { "epoch": 0.31454565004228713, "grad_norm": 0.72265625, "learning_rate": 0.00018209822094489574, "loss": 1.0217, "step": 12250 }, { "epoch": 0.31457132723820896, "grad_norm": 0.76953125, "learning_rate": 0.00018209567201028906, "loss": 0.9236, "step": 12251 }, { "epoch": 0.31459700443413074, "grad_norm": 0.7890625, "learning_rate": 0.00018209312291207316, "loss": 0.9688, "step": 12252 }, { "epoch": 0.3146226816300526, "grad_norm": 0.86328125, "learning_rate": 0.00018209057365025307, "loss": 0.9446, "step": 12253 }, { "epoch": 0.3146483588259744, "grad_norm": 0.8203125, "learning_rate": 0.00018208802422483387, "loss": 0.9626, "step": 12254 }, { "epoch": 0.31467403602189625, "grad_norm": 0.71484375, "learning_rate": 0.00018208547463582065, "loss": 0.907, "step": 12255 }, { "epoch": 0.31469971321781803, "grad_norm": 0.74609375, "learning_rate": 0.00018208292488321854, "loss": 0.8552, "step": 12256 }, { "epoch": 0.31472539041373987, "grad_norm": 0.82421875, "learning_rate": 0.0001820803749670325, "loss": 0.9587, "step": 12257 }, { "epoch": 0.3147510676096617, "grad_norm": 0.8203125, "learning_rate": 0.00018207782488726776, "loss": 0.9014, "step": 12258 }, { "epoch": 0.3147767448055835, "grad_norm": 0.79296875, "learning_rate": 0.00018207527464392928, "loss": 0.8091, "step": 12259 }, { "epoch": 0.3148024220015053, "grad_norm": 0.71875, "learning_rate": 0.00018207272423702224, "loss": 0.9089, "step": 12260 }, { "epoch": 0.31482809919742716, "grad_norm": 0.8203125, "learning_rate": 0.00018207017366655162, "loss": 0.8997, "step": 12261 }, { "epoch": 0.31485377639334894, "grad_norm": 0.7890625, "learning_rate": 0.0001820676229325226, "loss": 1.0557, "step": 12262 }, { "epoch": 0.3148794535892708, "grad_norm": 0.7578125, "learning_rate": 0.00018206507203494021, "loss": 1.0399, "step": 12263 }, { "epoch": 0.3149051307851926, "grad_norm": 0.80078125, "learning_rate": 0.0001820625209738095, "loss": 0.8475, "step": 12264 }, { "epoch": 0.31493080798111445, "grad_norm": 0.83984375, "learning_rate": 0.00018205996974913563, "loss": 0.9392, "step": 12265 }, { "epoch": 0.3149564851770362, "grad_norm": 0.74609375, "learning_rate": 0.00018205741836092369, "loss": 0.8414, "step": 12266 }, { "epoch": 0.31498216237295806, "grad_norm": 0.75, "learning_rate": 0.0001820548668091787, "loss": 0.8227, "step": 12267 }, { "epoch": 0.3150078395688799, "grad_norm": 0.77734375, "learning_rate": 0.00018205231509390576, "loss": 0.8586, "step": 12268 }, { "epoch": 0.3150335167648017, "grad_norm": 0.78125, "learning_rate": 0.00018204976321510997, "loss": 0.8955, "step": 12269 }, { "epoch": 0.3150591939607235, "grad_norm": 0.78515625, "learning_rate": 0.00018204721117279644, "loss": 1.0248, "step": 12270 }, { "epoch": 0.31508487115664535, "grad_norm": 0.7578125, "learning_rate": 0.0001820446589669702, "loss": 1.0452, "step": 12271 }, { "epoch": 0.31511054835256713, "grad_norm": 0.7890625, "learning_rate": 0.00018204210659763636, "loss": 0.8295, "step": 12272 }, { "epoch": 0.31513622554848897, "grad_norm": 0.77734375, "learning_rate": 0.00018203955406480003, "loss": 1.0275, "step": 12273 }, { "epoch": 0.3151619027444108, "grad_norm": 0.8359375, "learning_rate": 0.00018203700136846629, "loss": 1.1168, "step": 12274 }, { "epoch": 0.31518757994033264, "grad_norm": 0.81640625, "learning_rate": 0.00018203444850864017, "loss": 0.9204, "step": 12275 }, { "epoch": 0.3152132571362544, "grad_norm": 0.84765625, "learning_rate": 0.00018203189548532687, "loss": 0.9057, "step": 12276 }, { "epoch": 0.31523893433217626, "grad_norm": 0.765625, "learning_rate": 0.00018202934229853137, "loss": 0.9068, "step": 12277 }, { "epoch": 0.3152646115280981, "grad_norm": 0.73828125, "learning_rate": 0.0001820267889482588, "loss": 0.9313, "step": 12278 }, { "epoch": 0.3152902887240199, "grad_norm": 0.76171875, "learning_rate": 0.00018202423543451425, "loss": 0.9041, "step": 12279 }, { "epoch": 0.3153159659199417, "grad_norm": 0.80859375, "learning_rate": 0.00018202168175730283, "loss": 1.1188, "step": 12280 }, { "epoch": 0.31534164311586355, "grad_norm": 0.796875, "learning_rate": 0.00018201912791662958, "loss": 0.9679, "step": 12281 }, { "epoch": 0.3153673203117853, "grad_norm": 0.796875, "learning_rate": 0.00018201657391249965, "loss": 0.8754, "step": 12282 }, { "epoch": 0.31539299750770716, "grad_norm": 0.75, "learning_rate": 0.00018201401974491809, "loss": 0.9817, "step": 12283 }, { "epoch": 0.315418674703629, "grad_norm": 0.6875, "learning_rate": 0.00018201146541388998, "loss": 0.8669, "step": 12284 }, { "epoch": 0.31544435189955083, "grad_norm": 0.76171875, "learning_rate": 0.00018200891091942043, "loss": 0.9177, "step": 12285 }, { "epoch": 0.3154700290954726, "grad_norm": 0.73828125, "learning_rate": 0.00018200635626151453, "loss": 0.9731, "step": 12286 }, { "epoch": 0.31549570629139445, "grad_norm": 0.79296875, "learning_rate": 0.00018200380144017735, "loss": 1.0032, "step": 12287 }, { "epoch": 0.3155213834873163, "grad_norm": 0.8828125, "learning_rate": 0.00018200124645541404, "loss": 1.0345, "step": 12288 }, { "epoch": 0.31554706068323807, "grad_norm": 0.8671875, "learning_rate": 0.00018199869130722962, "loss": 0.8734, "step": 12289 }, { "epoch": 0.3155727378791599, "grad_norm": 0.7734375, "learning_rate": 0.00018199613599562922, "loss": 1.0474, "step": 12290 }, { "epoch": 0.31559841507508174, "grad_norm": 0.88671875, "learning_rate": 0.00018199358052061794, "loss": 1.0754, "step": 12291 }, { "epoch": 0.3156240922710035, "grad_norm": 0.765625, "learning_rate": 0.00018199102488220085, "loss": 1.1245, "step": 12292 }, { "epoch": 0.31564976946692536, "grad_norm": 0.72265625, "learning_rate": 0.00018198846908038308, "loss": 1.0278, "step": 12293 }, { "epoch": 0.3156754466628472, "grad_norm": 0.76953125, "learning_rate": 0.00018198591311516967, "loss": 0.8806, "step": 12294 }, { "epoch": 0.31570112385876903, "grad_norm": 0.81640625, "learning_rate": 0.00018198335698656575, "loss": 1.0516, "step": 12295 }, { "epoch": 0.3157268010546908, "grad_norm": 1.109375, "learning_rate": 0.0001819808006945764, "loss": 0.8994, "step": 12296 }, { "epoch": 0.31575247825061264, "grad_norm": 0.7890625, "learning_rate": 0.00018197824423920672, "loss": 0.993, "step": 12297 }, { "epoch": 0.3157781554465345, "grad_norm": 0.81640625, "learning_rate": 0.0001819756876204618, "loss": 1.0958, "step": 12298 }, { "epoch": 0.31580383264245626, "grad_norm": 0.7578125, "learning_rate": 0.00018197313083834676, "loss": 0.9321, "step": 12299 }, { "epoch": 0.3158295098383781, "grad_norm": 0.7578125, "learning_rate": 0.00018197057389286668, "loss": 0.9099, "step": 12300 }, { "epoch": 0.31585518703429993, "grad_norm": 0.734375, "learning_rate": 0.0001819680167840266, "loss": 0.9931, "step": 12301 }, { "epoch": 0.3158808642302217, "grad_norm": 0.85546875, "learning_rate": 0.00018196545951183174, "loss": 1.1365, "step": 12302 }, { "epoch": 0.31590654142614355, "grad_norm": 0.80078125, "learning_rate": 0.00018196290207628704, "loss": 0.9841, "step": 12303 }, { "epoch": 0.3159322186220654, "grad_norm": 0.76171875, "learning_rate": 0.00018196034447739772, "loss": 0.9353, "step": 12304 }, { "epoch": 0.3159578958179872, "grad_norm": 0.78125, "learning_rate": 0.00018195778671516885, "loss": 1.0199, "step": 12305 }, { "epoch": 0.315983573013909, "grad_norm": 0.8359375, "learning_rate": 0.0001819552287896055, "loss": 0.9614, "step": 12306 }, { "epoch": 0.31600925020983084, "grad_norm": 0.8046875, "learning_rate": 0.0001819526707007128, "loss": 0.9273, "step": 12307 }, { "epoch": 0.3160349274057527, "grad_norm": 1.1484375, "learning_rate": 0.00018195011244849583, "loss": 0.9345, "step": 12308 }, { "epoch": 0.31606060460167446, "grad_norm": 0.82421875, "learning_rate": 0.00018194755403295963, "loss": 0.9264, "step": 12309 }, { "epoch": 0.3160862817975963, "grad_norm": 0.69921875, "learning_rate": 0.00018194499545410943, "loss": 0.9707, "step": 12310 }, { "epoch": 0.3161119589935181, "grad_norm": 0.73046875, "learning_rate": 0.00018194243671195023, "loss": 0.8922, "step": 12311 }, { "epoch": 0.3161376361894399, "grad_norm": 0.80078125, "learning_rate": 0.00018193987780648713, "loss": 0.8555, "step": 12312 }, { "epoch": 0.31616331338536174, "grad_norm": 0.80078125, "learning_rate": 0.0001819373187377253, "loss": 1.0531, "step": 12313 }, { "epoch": 0.3161889905812836, "grad_norm": 0.71484375, "learning_rate": 0.00018193475950566975, "loss": 0.8944, "step": 12314 }, { "epoch": 0.3162146677772054, "grad_norm": 0.75, "learning_rate": 0.00018193220011032565, "loss": 0.8899, "step": 12315 }, { "epoch": 0.3162403449731272, "grad_norm": 0.8515625, "learning_rate": 0.0001819296405516981, "loss": 1.0279, "step": 12316 }, { "epoch": 0.31626602216904903, "grad_norm": 0.78125, "learning_rate": 0.00018192708082979213, "loss": 0.9363, "step": 12317 }, { "epoch": 0.31629169936497087, "grad_norm": 0.8125, "learning_rate": 0.00018192452094461292, "loss": 0.923, "step": 12318 }, { "epoch": 0.31631737656089265, "grad_norm": 0.77734375, "learning_rate": 0.00018192196089616554, "loss": 0.9713, "step": 12319 }, { "epoch": 0.3163430537568145, "grad_norm": 0.75390625, "learning_rate": 0.00018191940068445507, "loss": 0.8543, "step": 12320 }, { "epoch": 0.3163687309527363, "grad_norm": 0.69921875, "learning_rate": 0.00018191684030948664, "loss": 0.9248, "step": 12321 }, { "epoch": 0.3163944081486581, "grad_norm": 1.0234375, "learning_rate": 0.00018191427977126535, "loss": 0.9701, "step": 12322 }, { "epoch": 0.31642008534457994, "grad_norm": 0.734375, "learning_rate": 0.00018191171906979631, "loss": 0.9492, "step": 12323 }, { "epoch": 0.3164457625405018, "grad_norm": 0.85546875, "learning_rate": 0.0001819091582050846, "loss": 1.0724, "step": 12324 }, { "epoch": 0.3164714397364236, "grad_norm": 0.7109375, "learning_rate": 0.00018190659717713534, "loss": 0.7762, "step": 12325 }, { "epoch": 0.3164971169323454, "grad_norm": 0.7265625, "learning_rate": 0.00018190403598595362, "loss": 0.9104, "step": 12326 }, { "epoch": 0.3165227941282672, "grad_norm": 0.7578125, "learning_rate": 0.00018190147463154456, "loss": 1.0211, "step": 12327 }, { "epoch": 0.31654847132418906, "grad_norm": 0.80078125, "learning_rate": 0.00018189891311391328, "loss": 0.9393, "step": 12328 }, { "epoch": 0.31657414852011084, "grad_norm": 0.78125, "learning_rate": 0.00018189635143306485, "loss": 0.9851, "step": 12329 }, { "epoch": 0.3165998257160327, "grad_norm": 0.7734375, "learning_rate": 0.0001818937895890044, "loss": 0.9479, "step": 12330 }, { "epoch": 0.3166255029119545, "grad_norm": 0.78125, "learning_rate": 0.000181891227581737, "loss": 0.9601, "step": 12331 }, { "epoch": 0.3166511801078763, "grad_norm": 0.7734375, "learning_rate": 0.0001818886654112678, "loss": 0.9624, "step": 12332 }, { "epoch": 0.31667685730379813, "grad_norm": 0.76953125, "learning_rate": 0.00018188610307760187, "loss": 0.93, "step": 12333 }, { "epoch": 0.31670253449971997, "grad_norm": 0.84375, "learning_rate": 0.00018188354058074437, "loss": 0.8896, "step": 12334 }, { "epoch": 0.3167282116956418, "grad_norm": 0.7265625, "learning_rate": 0.00018188097792070032, "loss": 0.9073, "step": 12335 }, { "epoch": 0.3167538888915636, "grad_norm": 0.75, "learning_rate": 0.00018187841509747493, "loss": 0.9401, "step": 12336 }, { "epoch": 0.3167795660874854, "grad_norm": 0.765625, "learning_rate": 0.0001818758521110732, "loss": 1.0074, "step": 12337 }, { "epoch": 0.31680524328340726, "grad_norm": 0.78125, "learning_rate": 0.00018187328896150033, "loss": 1.0124, "step": 12338 }, { "epoch": 0.31683092047932904, "grad_norm": 0.76953125, "learning_rate": 0.00018187072564876137, "loss": 0.8477, "step": 12339 }, { "epoch": 0.3168565976752509, "grad_norm": 0.765625, "learning_rate": 0.00018186816217286147, "loss": 1.0134, "step": 12340 }, { "epoch": 0.3168822748711727, "grad_norm": 0.81640625, "learning_rate": 0.0001818655985338057, "loss": 0.8935, "step": 12341 }, { "epoch": 0.3169079520670945, "grad_norm": 0.7265625, "learning_rate": 0.0001818630347315992, "loss": 0.9683, "step": 12342 }, { "epoch": 0.3169336292630163, "grad_norm": 0.78125, "learning_rate": 0.00018186047076624705, "loss": 0.937, "step": 12343 }, { "epoch": 0.31695930645893816, "grad_norm": 0.73046875, "learning_rate": 0.0001818579066377544, "loss": 0.9263, "step": 12344 }, { "epoch": 0.31698498365486, "grad_norm": 0.796875, "learning_rate": 0.0001818553423461263, "loss": 1.0076, "step": 12345 }, { "epoch": 0.3170106608507818, "grad_norm": 0.82421875, "learning_rate": 0.00018185277789136794, "loss": 0.9484, "step": 12346 }, { "epoch": 0.3170363380467036, "grad_norm": 0.765625, "learning_rate": 0.00018185021327348438, "loss": 0.9088, "step": 12347 }, { "epoch": 0.31706201524262545, "grad_norm": 0.828125, "learning_rate": 0.0001818476484924807, "loss": 0.8781, "step": 12348 }, { "epoch": 0.31708769243854723, "grad_norm": 0.92578125, "learning_rate": 0.00018184508354836208, "loss": 0.8225, "step": 12349 }, { "epoch": 0.31711336963446907, "grad_norm": 0.78515625, "learning_rate": 0.0001818425184411336, "loss": 1.0035, "step": 12350 }, { "epoch": 0.3171390468303909, "grad_norm": 0.8515625, "learning_rate": 0.0001818399531708004, "loss": 0.9629, "step": 12351 }, { "epoch": 0.3171647240263127, "grad_norm": 2.390625, "learning_rate": 0.0001818373877373675, "loss": 0.9593, "step": 12352 }, { "epoch": 0.3171904012222345, "grad_norm": 1.0390625, "learning_rate": 0.00018183482214084013, "loss": 0.9026, "step": 12353 }, { "epoch": 0.31721607841815636, "grad_norm": 0.87890625, "learning_rate": 0.00018183225638122332, "loss": 1.0439, "step": 12354 }, { "epoch": 0.3172417556140782, "grad_norm": 0.73046875, "learning_rate": 0.00018182969045852225, "loss": 1.0583, "step": 12355 }, { "epoch": 0.31726743280999997, "grad_norm": 0.73828125, "learning_rate": 0.000181827124372742, "loss": 0.9467, "step": 12356 }, { "epoch": 0.3172931100059218, "grad_norm": 0.76953125, "learning_rate": 0.00018182455812388763, "loss": 1.0018, "step": 12357 }, { "epoch": 0.31731878720184364, "grad_norm": 0.8046875, "learning_rate": 0.00018182199171196435, "loss": 0.8256, "step": 12358 }, { "epoch": 0.3173444643977654, "grad_norm": 0.8203125, "learning_rate": 0.00018181942513697722, "loss": 0.9589, "step": 12359 }, { "epoch": 0.31737014159368726, "grad_norm": 0.76953125, "learning_rate": 0.00018181685839893137, "loss": 1.0566, "step": 12360 }, { "epoch": 0.3173958187896091, "grad_norm": 0.78515625, "learning_rate": 0.0001818142914978319, "loss": 0.9494, "step": 12361 }, { "epoch": 0.3174214959855309, "grad_norm": 0.73828125, "learning_rate": 0.00018181172443368395, "loss": 0.9694, "step": 12362 }, { "epoch": 0.3174471731814527, "grad_norm": 0.796875, "learning_rate": 0.0001818091572064926, "loss": 0.8628, "step": 12363 }, { "epoch": 0.31747285037737455, "grad_norm": 0.796875, "learning_rate": 0.000181806589816263, "loss": 0.9349, "step": 12364 }, { "epoch": 0.3174985275732964, "grad_norm": 0.8203125, "learning_rate": 0.0001818040222630003, "loss": 0.9947, "step": 12365 }, { "epoch": 0.31752420476921817, "grad_norm": 0.76171875, "learning_rate": 0.00018180145454670953, "loss": 0.8955, "step": 12366 }, { "epoch": 0.31754988196514, "grad_norm": 0.85546875, "learning_rate": 0.00018179888666739586, "loss": 1.0467, "step": 12367 }, { "epoch": 0.31757555916106184, "grad_norm": 0.76171875, "learning_rate": 0.00018179631862506442, "loss": 0.9769, "step": 12368 }, { "epoch": 0.3176012363569836, "grad_norm": 1.515625, "learning_rate": 0.00018179375041972023, "loss": 1.0648, "step": 12369 }, { "epoch": 0.31762691355290545, "grad_norm": 0.73828125, "learning_rate": 0.00018179118205136855, "loss": 0.8976, "step": 12370 }, { "epoch": 0.3176525907488273, "grad_norm": 0.75, "learning_rate": 0.00018178861352001442, "loss": 1.0216, "step": 12371 }, { "epoch": 0.31767826794474907, "grad_norm": 0.7421875, "learning_rate": 0.00018178604482566298, "loss": 1.0228, "step": 12372 }, { "epoch": 0.3177039451406709, "grad_norm": 0.84765625, "learning_rate": 0.00018178347596831934, "loss": 1.0365, "step": 12373 }, { "epoch": 0.31772962233659274, "grad_norm": 0.88671875, "learning_rate": 0.0001817809069479886, "loss": 0.9978, "step": 12374 }, { "epoch": 0.3177552995325146, "grad_norm": 0.78125, "learning_rate": 0.0001817783377646759, "loss": 0.8859, "step": 12375 }, { "epoch": 0.31778097672843636, "grad_norm": 0.78515625, "learning_rate": 0.00018177576841838636, "loss": 0.986, "step": 12376 }, { "epoch": 0.3178066539243582, "grad_norm": 0.75390625, "learning_rate": 0.0001817731989091251, "loss": 0.8933, "step": 12377 }, { "epoch": 0.31783233112028003, "grad_norm": 0.8046875, "learning_rate": 0.00018177062923689725, "loss": 0.8597, "step": 12378 }, { "epoch": 0.3178580083162018, "grad_norm": 0.69140625, "learning_rate": 0.00018176805940170793, "loss": 0.8924, "step": 12379 }, { "epoch": 0.31788368551212365, "grad_norm": 1.0390625, "learning_rate": 0.00018176548940356226, "loss": 1.0621, "step": 12380 }, { "epoch": 0.3179093627080455, "grad_norm": 0.7890625, "learning_rate": 0.00018176291924246532, "loss": 0.8658, "step": 12381 }, { "epoch": 0.31793503990396726, "grad_norm": 0.75, "learning_rate": 0.0001817603489184223, "loss": 0.839, "step": 12382 }, { "epoch": 0.3179607170998891, "grad_norm": 0.76953125, "learning_rate": 0.0001817577784314383, "loss": 1.0689, "step": 12383 }, { "epoch": 0.31798639429581094, "grad_norm": 0.92578125, "learning_rate": 0.0001817552077815184, "loss": 0.997, "step": 12384 }, { "epoch": 0.3180120714917328, "grad_norm": 0.83203125, "learning_rate": 0.00018175263696866778, "loss": 0.9055, "step": 12385 }, { "epoch": 0.31803774868765455, "grad_norm": 0.71484375, "learning_rate": 0.00018175006599289152, "loss": 0.9134, "step": 12386 }, { "epoch": 0.3180634258835764, "grad_norm": 0.84375, "learning_rate": 0.00018174749485419476, "loss": 1.0294, "step": 12387 }, { "epoch": 0.3180891030794982, "grad_norm": 0.77734375, "learning_rate": 0.00018174492355258262, "loss": 0.9101, "step": 12388 }, { "epoch": 0.31811478027542, "grad_norm": 0.78125, "learning_rate": 0.00018174235208806024, "loss": 0.8014, "step": 12389 }, { "epoch": 0.31814045747134184, "grad_norm": 1.125, "learning_rate": 0.00018173978046063277, "loss": 1.1605, "step": 12390 }, { "epoch": 0.3181661346672637, "grad_norm": 0.7890625, "learning_rate": 0.00018173720867030525, "loss": 0.9692, "step": 12391 }, { "epoch": 0.31819181186318546, "grad_norm": 0.84765625, "learning_rate": 0.0001817346367170829, "loss": 1.0285, "step": 12392 }, { "epoch": 0.3182174890591073, "grad_norm": 0.703125, "learning_rate": 0.00018173206460097077, "loss": 0.7621, "step": 12393 }, { "epoch": 0.31824316625502913, "grad_norm": 0.8046875, "learning_rate": 0.00018172949232197404, "loss": 0.9969, "step": 12394 }, { "epoch": 0.3182688434509509, "grad_norm": 0.82421875, "learning_rate": 0.0001817269198800978, "loss": 0.9829, "step": 12395 }, { "epoch": 0.31829452064687275, "grad_norm": 0.7578125, "learning_rate": 0.0001817243472753472, "loss": 0.892, "step": 12396 }, { "epoch": 0.3183201978427946, "grad_norm": 0.77734375, "learning_rate": 0.00018172177450772735, "loss": 1.0831, "step": 12397 }, { "epoch": 0.3183458750387164, "grad_norm": 0.74609375, "learning_rate": 0.0001817192015772434, "loss": 0.9352, "step": 12398 }, { "epoch": 0.3183715522346382, "grad_norm": 0.7578125, "learning_rate": 0.00018171662848390045, "loss": 0.8059, "step": 12399 }, { "epoch": 0.31839722943056004, "grad_norm": 0.859375, "learning_rate": 0.00018171405522770365, "loss": 0.8675, "step": 12400 }, { "epoch": 0.31842290662648187, "grad_norm": 0.83984375, "learning_rate": 0.0001817114818086581, "loss": 0.9671, "step": 12401 }, { "epoch": 0.31844858382240365, "grad_norm": 0.74609375, "learning_rate": 0.00018170890822676898, "loss": 0.9682, "step": 12402 }, { "epoch": 0.3184742610183255, "grad_norm": 0.78125, "learning_rate": 0.0001817063344820414, "loss": 0.8738, "step": 12403 }, { "epoch": 0.3184999382142473, "grad_norm": 0.82421875, "learning_rate": 0.00018170376057448042, "loss": 0.9658, "step": 12404 }, { "epoch": 0.3185256154101691, "grad_norm": 0.77734375, "learning_rate": 0.00018170118650409124, "loss": 1.011, "step": 12405 }, { "epoch": 0.31855129260609094, "grad_norm": 0.74609375, "learning_rate": 0.00018169861227087898, "loss": 0.9209, "step": 12406 }, { "epoch": 0.3185769698020128, "grad_norm": 0.7734375, "learning_rate": 0.00018169603787484875, "loss": 0.9891, "step": 12407 }, { "epoch": 0.3186026469979346, "grad_norm": 0.7734375, "learning_rate": 0.00018169346331600573, "loss": 1.083, "step": 12408 }, { "epoch": 0.3186283241938564, "grad_norm": 0.8046875, "learning_rate": 0.000181690888594355, "loss": 1.0344, "step": 12409 }, { "epoch": 0.31865400138977823, "grad_norm": 0.69921875, "learning_rate": 0.00018168831370990172, "loss": 0.92, "step": 12410 }, { "epoch": 0.31867967858570007, "grad_norm": 0.8046875, "learning_rate": 0.000181685738662651, "loss": 0.9204, "step": 12411 }, { "epoch": 0.31870535578162185, "grad_norm": 0.76171875, "learning_rate": 0.00018168316345260798, "loss": 0.9036, "step": 12412 }, { "epoch": 0.3187310329775437, "grad_norm": 0.79296875, "learning_rate": 0.00018168058807977778, "loss": 1.0674, "step": 12413 }, { "epoch": 0.3187567101734655, "grad_norm": 0.7734375, "learning_rate": 0.00018167801254416558, "loss": 0.9946, "step": 12414 }, { "epoch": 0.3187823873693873, "grad_norm": 0.796875, "learning_rate": 0.00018167543684577647, "loss": 1.0751, "step": 12415 }, { "epoch": 0.31880806456530913, "grad_norm": 0.73828125, "learning_rate": 0.00018167286098461557, "loss": 0.9598, "step": 12416 }, { "epoch": 0.31883374176123097, "grad_norm": 0.8046875, "learning_rate": 0.00018167028496068806, "loss": 0.908, "step": 12417 }, { "epoch": 0.3188594189571528, "grad_norm": 0.8046875, "learning_rate": 0.00018166770877399904, "loss": 0.9959, "step": 12418 }, { "epoch": 0.3188850961530746, "grad_norm": 0.7265625, "learning_rate": 0.00018166513242455365, "loss": 0.9434, "step": 12419 }, { "epoch": 0.3189107733489964, "grad_norm": 0.87890625, "learning_rate": 0.00018166255591235702, "loss": 0.9644, "step": 12420 }, { "epoch": 0.31893645054491826, "grad_norm": 0.79296875, "learning_rate": 0.0001816599792374143, "loss": 1.0388, "step": 12421 }, { "epoch": 0.31896212774084004, "grad_norm": 0.7578125, "learning_rate": 0.00018165740239973061, "loss": 0.8118, "step": 12422 }, { "epoch": 0.3189878049367619, "grad_norm": 0.74609375, "learning_rate": 0.0001816548253993111, "loss": 1.0463, "step": 12423 }, { "epoch": 0.3190134821326837, "grad_norm": 0.7578125, "learning_rate": 0.0001816522482361609, "loss": 0.7914, "step": 12424 }, { "epoch": 0.3190391593286055, "grad_norm": 0.83203125, "learning_rate": 0.00018164967091028515, "loss": 0.8936, "step": 12425 }, { "epoch": 0.31906483652452733, "grad_norm": 0.77734375, "learning_rate": 0.00018164709342168898, "loss": 0.8595, "step": 12426 }, { "epoch": 0.31909051372044916, "grad_norm": 0.7578125, "learning_rate": 0.00018164451577037754, "loss": 1.0444, "step": 12427 }, { "epoch": 0.319116190916371, "grad_norm": 0.828125, "learning_rate": 0.0001816419379563559, "loss": 1.1864, "step": 12428 }, { "epoch": 0.3191418681122928, "grad_norm": 0.78515625, "learning_rate": 0.00018163935997962934, "loss": 0.846, "step": 12429 }, { "epoch": 0.3191675453082146, "grad_norm": 1.0390625, "learning_rate": 0.00018163678184020284, "loss": 0.907, "step": 12430 }, { "epoch": 0.31919322250413645, "grad_norm": 0.734375, "learning_rate": 0.00018163420353808161, "loss": 0.8933, "step": 12431 }, { "epoch": 0.31921889970005823, "grad_norm": 0.73046875, "learning_rate": 0.00018163162507327084, "loss": 0.9965, "step": 12432 }, { "epoch": 0.31924457689598007, "grad_norm": 0.78515625, "learning_rate": 0.00018162904644577558, "loss": 1.0662, "step": 12433 }, { "epoch": 0.3192702540919019, "grad_norm": 0.89453125, "learning_rate": 0.00018162646765560098, "loss": 0.983, "step": 12434 }, { "epoch": 0.3192959312878237, "grad_norm": 0.76953125, "learning_rate": 0.00018162388870275224, "loss": 0.8563, "step": 12435 }, { "epoch": 0.3193216084837455, "grad_norm": 0.86328125, "learning_rate": 0.00018162130958723446, "loss": 0.9875, "step": 12436 }, { "epoch": 0.31934728567966736, "grad_norm": 0.7734375, "learning_rate": 0.00018161873030905277, "loss": 1.0514, "step": 12437 }, { "epoch": 0.3193729628755892, "grad_norm": 0.77734375, "learning_rate": 0.00018161615086821232, "loss": 0.9615, "step": 12438 }, { "epoch": 0.319398640071511, "grad_norm": 0.86328125, "learning_rate": 0.00018161357126471827, "loss": 0.923, "step": 12439 }, { "epoch": 0.3194243172674328, "grad_norm": 0.83984375, "learning_rate": 0.00018161099149857575, "loss": 0.9569, "step": 12440 }, { "epoch": 0.31944999446335465, "grad_norm": 0.80078125, "learning_rate": 0.00018160841156978987, "loss": 1.0023, "step": 12441 }, { "epoch": 0.31947567165927643, "grad_norm": 0.76171875, "learning_rate": 0.0001816058314783658, "loss": 0.8758, "step": 12442 }, { "epoch": 0.31950134885519826, "grad_norm": 0.7578125, "learning_rate": 0.0001816032512243087, "loss": 1.0105, "step": 12443 }, { "epoch": 0.3195270260511201, "grad_norm": 0.7109375, "learning_rate": 0.00018160067080762366, "loss": 0.9821, "step": 12444 }, { "epoch": 0.3195527032470419, "grad_norm": 0.83984375, "learning_rate": 0.00018159809022831589, "loss": 1.0348, "step": 12445 }, { "epoch": 0.3195783804429637, "grad_norm": 0.734375, "learning_rate": 0.0001815955094863905, "loss": 1.0157, "step": 12446 }, { "epoch": 0.31960405763888555, "grad_norm": 0.80078125, "learning_rate": 0.00018159292858185258, "loss": 0.8892, "step": 12447 }, { "epoch": 0.3196297348348074, "grad_norm": 0.765625, "learning_rate": 0.0001815903475147074, "loss": 0.8917, "step": 12448 }, { "epoch": 0.31965541203072917, "grad_norm": 0.77734375, "learning_rate": 0.00018158776628495995, "loss": 1.0006, "step": 12449 }, { "epoch": 0.319681089226651, "grad_norm": 0.765625, "learning_rate": 0.00018158518489261546, "loss": 1.0174, "step": 12450 }, { "epoch": 0.31970676642257284, "grad_norm": 0.76953125, "learning_rate": 0.00018158260333767912, "loss": 0.9286, "step": 12451 }, { "epoch": 0.3197324436184946, "grad_norm": 0.8359375, "learning_rate": 0.00018158002162015598, "loss": 0.888, "step": 12452 }, { "epoch": 0.31975812081441646, "grad_norm": 0.8125, "learning_rate": 0.00018157743974005124, "loss": 1.0473, "step": 12453 }, { "epoch": 0.3197837980103383, "grad_norm": 0.75390625, "learning_rate": 0.00018157485769737002, "loss": 0.9995, "step": 12454 }, { "epoch": 0.3198094752062601, "grad_norm": 0.8046875, "learning_rate": 0.0001815722754921175, "loss": 0.9662, "step": 12455 }, { "epoch": 0.3198351524021819, "grad_norm": 0.765625, "learning_rate": 0.00018156969312429876, "loss": 0.98, "step": 12456 }, { "epoch": 0.31986082959810375, "grad_norm": 0.73046875, "learning_rate": 0.00018156711059391905, "loss": 1.0004, "step": 12457 }, { "epoch": 0.3198865067940256, "grad_norm": 0.80859375, "learning_rate": 0.00018156452790098342, "loss": 1.0664, "step": 12458 }, { "epoch": 0.31991218398994736, "grad_norm": 0.734375, "learning_rate": 0.00018156194504549705, "loss": 0.8832, "step": 12459 }, { "epoch": 0.3199378611858692, "grad_norm": 0.78125, "learning_rate": 0.0001815593620274651, "loss": 0.8671, "step": 12460 }, { "epoch": 0.31996353838179104, "grad_norm": 0.859375, "learning_rate": 0.00018155677884689272, "loss": 1.0121, "step": 12461 }, { "epoch": 0.3199892155777128, "grad_norm": 0.7578125, "learning_rate": 0.00018155419550378504, "loss": 0.9822, "step": 12462 }, { "epoch": 0.32001489277363465, "grad_norm": 0.79296875, "learning_rate": 0.0001815516119981472, "loss": 0.9441, "step": 12463 }, { "epoch": 0.3200405699695565, "grad_norm": 0.80859375, "learning_rate": 0.00018154902832998436, "loss": 1.0079, "step": 12464 }, { "epoch": 0.32006624716547827, "grad_norm": 0.74609375, "learning_rate": 0.0001815464444993017, "loss": 0.996, "step": 12465 }, { "epoch": 0.3200919243614001, "grad_norm": 0.8515625, "learning_rate": 0.00018154386050610433, "loss": 0.8103, "step": 12466 }, { "epoch": 0.32011760155732194, "grad_norm": 0.89453125, "learning_rate": 0.00018154127635039742, "loss": 1.1548, "step": 12467 }, { "epoch": 0.3201432787532438, "grad_norm": 0.8046875, "learning_rate": 0.00018153869203218608, "loss": 0.9151, "step": 12468 }, { "epoch": 0.32016895594916556, "grad_norm": 0.8515625, "learning_rate": 0.00018153610755147553, "loss": 1.0336, "step": 12469 }, { "epoch": 0.3201946331450874, "grad_norm": 0.76171875, "learning_rate": 0.00018153352290827085, "loss": 0.9681, "step": 12470 }, { "epoch": 0.32022031034100923, "grad_norm": 0.83984375, "learning_rate": 0.00018153093810257723, "loss": 1.0369, "step": 12471 }, { "epoch": 0.320245987536931, "grad_norm": 0.7734375, "learning_rate": 0.00018152835313439987, "loss": 1.0762, "step": 12472 }, { "epoch": 0.32027166473285285, "grad_norm": 0.71875, "learning_rate": 0.00018152576800374378, "loss": 0.9926, "step": 12473 }, { "epoch": 0.3202973419287747, "grad_norm": 0.72265625, "learning_rate": 0.00018152318271061425, "loss": 0.9451, "step": 12474 }, { "epoch": 0.32032301912469646, "grad_norm": 0.734375, "learning_rate": 0.00018152059725501637, "loss": 1.0864, "step": 12475 }, { "epoch": 0.3203486963206183, "grad_norm": 0.76953125, "learning_rate": 0.00018151801163695532, "loss": 0.8794, "step": 12476 }, { "epoch": 0.32037437351654013, "grad_norm": 0.7890625, "learning_rate": 0.00018151542585643623, "loss": 0.851, "step": 12477 }, { "epoch": 0.32040005071246197, "grad_norm": 0.7421875, "learning_rate": 0.00018151283991346424, "loss": 0.868, "step": 12478 }, { "epoch": 0.32042572790838375, "grad_norm": 0.76171875, "learning_rate": 0.00018151025380804453, "loss": 0.9065, "step": 12479 }, { "epoch": 0.3204514051043056, "grad_norm": 0.7734375, "learning_rate": 0.00018150766754018228, "loss": 0.9447, "step": 12480 }, { "epoch": 0.3204770823002274, "grad_norm": 0.77734375, "learning_rate": 0.00018150508110988255, "loss": 0.8477, "step": 12481 }, { "epoch": 0.3205027594961492, "grad_norm": 0.76953125, "learning_rate": 0.0001815024945171506, "loss": 1.0083, "step": 12482 }, { "epoch": 0.32052843669207104, "grad_norm": 0.9453125, "learning_rate": 0.00018149990776199153, "loss": 0.8665, "step": 12483 }, { "epoch": 0.3205541138879929, "grad_norm": 0.73046875, "learning_rate": 0.0001814973208444105, "loss": 0.9048, "step": 12484 }, { "epoch": 0.32057979108391466, "grad_norm": 0.734375, "learning_rate": 0.00018149473376441268, "loss": 0.9066, "step": 12485 }, { "epoch": 0.3206054682798365, "grad_norm": 0.8046875, "learning_rate": 0.0001814921465220032, "loss": 0.9139, "step": 12486 }, { "epoch": 0.32063114547575833, "grad_norm": 0.828125, "learning_rate": 0.00018148955911718726, "loss": 0.959, "step": 12487 }, { "epoch": 0.32065682267168016, "grad_norm": 0.8828125, "learning_rate": 0.00018148697154997, "loss": 0.869, "step": 12488 }, { "epoch": 0.32068249986760194, "grad_norm": 0.796875, "learning_rate": 0.0001814843838203565, "loss": 0.8523, "step": 12489 }, { "epoch": 0.3207081770635238, "grad_norm": 0.8828125, "learning_rate": 0.00018148179592835203, "loss": 1.1048, "step": 12490 }, { "epoch": 0.3207338542594456, "grad_norm": 0.83203125, "learning_rate": 0.00018147920787396168, "loss": 0.9259, "step": 12491 }, { "epoch": 0.3207595314553674, "grad_norm": 0.78125, "learning_rate": 0.00018147661965719068, "loss": 1.0471, "step": 12492 }, { "epoch": 0.32078520865128923, "grad_norm": 0.8125, "learning_rate": 0.0001814740312780441, "loss": 0.9961, "step": 12493 }, { "epoch": 0.32081088584721107, "grad_norm": 0.73828125, "learning_rate": 0.00018147144273652713, "loss": 0.8476, "step": 12494 }, { "epoch": 0.32083656304313285, "grad_norm": 0.78125, "learning_rate": 0.00018146885403264493, "loss": 0.9885, "step": 12495 }, { "epoch": 0.3208622402390547, "grad_norm": 0.84765625, "learning_rate": 0.0001814662651664027, "loss": 0.9756, "step": 12496 }, { "epoch": 0.3208879174349765, "grad_norm": 0.79296875, "learning_rate": 0.00018146367613780553, "loss": 1.0221, "step": 12497 }, { "epoch": 0.32091359463089836, "grad_norm": 0.828125, "learning_rate": 0.00018146108694685862, "loss": 1.0173, "step": 12498 }, { "epoch": 0.32093927182682014, "grad_norm": 0.82421875, "learning_rate": 0.0001814584975935671, "loss": 0.8554, "step": 12499 }, { "epoch": 0.320964949022742, "grad_norm": 0.7734375, "learning_rate": 0.00018145590807793619, "loss": 0.866, "step": 12500 }, { "epoch": 0.3209906262186638, "grad_norm": 0.78125, "learning_rate": 0.00018145331839997098, "loss": 0.9605, "step": 12501 }, { "epoch": 0.3210163034145856, "grad_norm": 0.75390625, "learning_rate": 0.0001814507285596767, "loss": 0.8885, "step": 12502 }, { "epoch": 0.3210419806105074, "grad_norm": 0.80859375, "learning_rate": 0.00018144813855705845, "loss": 1.0353, "step": 12503 }, { "epoch": 0.32106765780642926, "grad_norm": 0.75390625, "learning_rate": 0.0001814455483921214, "loss": 0.8513, "step": 12504 }, { "epoch": 0.32109333500235104, "grad_norm": 0.80078125, "learning_rate": 0.00018144295806487074, "loss": 1.0473, "step": 12505 }, { "epoch": 0.3211190121982729, "grad_norm": 0.765625, "learning_rate": 0.00018144036757531166, "loss": 0.9133, "step": 12506 }, { "epoch": 0.3211446893941947, "grad_norm": 0.7734375, "learning_rate": 0.00018143777692344924, "loss": 1.1101, "step": 12507 }, { "epoch": 0.32117036659011655, "grad_norm": 0.76953125, "learning_rate": 0.0001814351861092887, "loss": 1.0163, "step": 12508 }, { "epoch": 0.32119604378603833, "grad_norm": 0.7578125, "learning_rate": 0.0001814325951328352, "loss": 0.8843, "step": 12509 }, { "epoch": 0.32122172098196017, "grad_norm": 0.75390625, "learning_rate": 0.00018143000399409388, "loss": 0.9628, "step": 12510 }, { "epoch": 0.321247398177882, "grad_norm": 0.7734375, "learning_rate": 0.00018142741269306994, "loss": 0.9133, "step": 12511 }, { "epoch": 0.3212730753738038, "grad_norm": 0.80078125, "learning_rate": 0.0001814248212297685, "loss": 0.8098, "step": 12512 }, { "epoch": 0.3212987525697256, "grad_norm": 0.76953125, "learning_rate": 0.00018142222960419475, "loss": 0.9011, "step": 12513 }, { "epoch": 0.32132442976564746, "grad_norm": 0.7890625, "learning_rate": 0.00018141963781635387, "loss": 0.9587, "step": 12514 }, { "epoch": 0.32135010696156924, "grad_norm": 0.75390625, "learning_rate": 0.00018141704586625097, "loss": 0.8725, "step": 12515 }, { "epoch": 0.3213757841574911, "grad_norm": 0.80078125, "learning_rate": 0.00018141445375389128, "loss": 0.878, "step": 12516 }, { "epoch": 0.3214014613534129, "grad_norm": 0.80078125, "learning_rate": 0.00018141186147927995, "loss": 0.9245, "step": 12517 }, { "epoch": 0.32142713854933475, "grad_norm": 0.8515625, "learning_rate": 0.0001814092690424221, "loss": 0.9871, "step": 12518 }, { "epoch": 0.3214528157452565, "grad_norm": 0.8046875, "learning_rate": 0.00018140667644332294, "loss": 0.866, "step": 12519 }, { "epoch": 0.32147849294117836, "grad_norm": 0.8125, "learning_rate": 0.00018140408368198764, "loss": 0.9791, "step": 12520 }, { "epoch": 0.3215041701371002, "grad_norm": 0.765625, "learning_rate": 0.00018140149075842135, "loss": 0.9033, "step": 12521 }, { "epoch": 0.321529847333022, "grad_norm": 0.76171875, "learning_rate": 0.00018139889767262922, "loss": 1.0346, "step": 12522 }, { "epoch": 0.3215555245289438, "grad_norm": 0.796875, "learning_rate": 0.0001813963044246165, "loss": 1.0945, "step": 12523 }, { "epoch": 0.32158120172486565, "grad_norm": 0.7421875, "learning_rate": 0.00018139371101438822, "loss": 1.0326, "step": 12524 }, { "epoch": 0.32160687892078743, "grad_norm": 0.796875, "learning_rate": 0.00018139111744194968, "loss": 0.8779, "step": 12525 }, { "epoch": 0.32163255611670927, "grad_norm": 0.765625, "learning_rate": 0.00018138852370730597, "loss": 0.9855, "step": 12526 }, { "epoch": 0.3216582333126311, "grad_norm": 0.7421875, "learning_rate": 0.00018138592981046227, "loss": 0.8691, "step": 12527 }, { "epoch": 0.32168391050855294, "grad_norm": 0.86328125, "learning_rate": 0.0001813833357514238, "loss": 1.1579, "step": 12528 }, { "epoch": 0.3217095877044747, "grad_norm": 0.77734375, "learning_rate": 0.00018138074153019567, "loss": 1.0426, "step": 12529 }, { "epoch": 0.32173526490039656, "grad_norm": 0.765625, "learning_rate": 0.00018137814714678306, "loss": 0.9052, "step": 12530 }, { "epoch": 0.3217609420963184, "grad_norm": 0.82421875, "learning_rate": 0.00018137555260119118, "loss": 0.8618, "step": 12531 }, { "epoch": 0.3217866192922402, "grad_norm": 0.80078125, "learning_rate": 0.00018137295789342518, "loss": 0.9234, "step": 12532 }, { "epoch": 0.321812296488162, "grad_norm": 0.76953125, "learning_rate": 0.00018137036302349024, "loss": 0.8504, "step": 12533 }, { "epoch": 0.32183797368408384, "grad_norm": 0.75390625, "learning_rate": 0.00018136776799139147, "loss": 1.0938, "step": 12534 }, { "epoch": 0.3218636508800056, "grad_norm": 0.76953125, "learning_rate": 0.00018136517279713411, "loss": 0.9881, "step": 12535 }, { "epoch": 0.32188932807592746, "grad_norm": 0.83203125, "learning_rate": 0.00018136257744072332, "loss": 0.9786, "step": 12536 }, { "epoch": 0.3219150052718493, "grad_norm": 0.7578125, "learning_rate": 0.00018135998192216426, "loss": 0.931, "step": 12537 }, { "epoch": 0.32194068246777113, "grad_norm": 0.6796875, "learning_rate": 0.0001813573862414621, "loss": 0.8667, "step": 12538 }, { "epoch": 0.3219663596636929, "grad_norm": 0.78125, "learning_rate": 0.00018135479039862199, "loss": 1.019, "step": 12539 }, { "epoch": 0.32199203685961475, "grad_norm": 0.765625, "learning_rate": 0.00018135219439364918, "loss": 1.0066, "step": 12540 }, { "epoch": 0.3220177140555366, "grad_norm": 0.79296875, "learning_rate": 0.00018134959822654876, "loss": 0.8719, "step": 12541 }, { "epoch": 0.32204339125145837, "grad_norm": 1.046875, "learning_rate": 0.00018134700189732596, "loss": 0.922, "step": 12542 }, { "epoch": 0.3220690684473802, "grad_norm": 0.80078125, "learning_rate": 0.00018134440540598589, "loss": 1.038, "step": 12543 }, { "epoch": 0.32209474564330204, "grad_norm": 0.75390625, "learning_rate": 0.0001813418087525338, "loss": 0.9761, "step": 12544 }, { "epoch": 0.3221204228392238, "grad_norm": 0.91015625, "learning_rate": 0.00018133921193697484, "loss": 1.0221, "step": 12545 }, { "epoch": 0.32214610003514565, "grad_norm": 0.72265625, "learning_rate": 0.00018133661495931417, "loss": 1.2125, "step": 12546 }, { "epoch": 0.3221717772310675, "grad_norm": 0.79296875, "learning_rate": 0.00018133401781955698, "loss": 0.9183, "step": 12547 }, { "epoch": 0.3221974544269893, "grad_norm": 0.72265625, "learning_rate": 0.00018133142051770843, "loss": 1.0074, "step": 12548 }, { "epoch": 0.3222231316229111, "grad_norm": 0.79296875, "learning_rate": 0.00018132882305377373, "loss": 0.9911, "step": 12549 }, { "epoch": 0.32224880881883294, "grad_norm": 0.79296875, "learning_rate": 0.00018132622542775802, "loss": 1.0606, "step": 12550 }, { "epoch": 0.3222744860147548, "grad_norm": 0.859375, "learning_rate": 0.00018132362763966645, "loss": 1.0381, "step": 12551 }, { "epoch": 0.32230016321067656, "grad_norm": 0.79296875, "learning_rate": 0.0001813210296895043, "loss": 0.9891, "step": 12552 }, { "epoch": 0.3223258404065984, "grad_norm": 0.84375, "learning_rate": 0.00018131843157727663, "loss": 1.1048, "step": 12553 }, { "epoch": 0.32235151760252023, "grad_norm": 0.78125, "learning_rate": 0.00018131583330298869, "loss": 0.9142, "step": 12554 }, { "epoch": 0.322377194798442, "grad_norm": 0.8046875, "learning_rate": 0.00018131323486664567, "loss": 1.0701, "step": 12555 }, { "epoch": 0.32240287199436385, "grad_norm": 0.7578125, "learning_rate": 0.0001813106362682527, "loss": 0.9146, "step": 12556 }, { "epoch": 0.3224285491902857, "grad_norm": 0.8515625, "learning_rate": 0.00018130803750781492, "loss": 1.0512, "step": 12557 }, { "epoch": 0.3224542263862075, "grad_norm": 0.78515625, "learning_rate": 0.00018130543858533763, "loss": 0.9729, "step": 12558 }, { "epoch": 0.3224799035821293, "grad_norm": 0.7421875, "learning_rate": 0.00018130283950082594, "loss": 0.8668, "step": 12559 }, { "epoch": 0.32250558077805114, "grad_norm": 0.72265625, "learning_rate": 0.00018130024025428502, "loss": 0.913, "step": 12560 }, { "epoch": 0.322531257973973, "grad_norm": 0.76953125, "learning_rate": 0.00018129764084572005, "loss": 0.9422, "step": 12561 }, { "epoch": 0.32255693516989475, "grad_norm": 0.7421875, "learning_rate": 0.00018129504127513627, "loss": 0.9638, "step": 12562 }, { "epoch": 0.3225826123658166, "grad_norm": 0.8046875, "learning_rate": 0.00018129244154253878, "loss": 0.9924, "step": 12563 }, { "epoch": 0.3226082895617384, "grad_norm": 0.73046875, "learning_rate": 0.0001812898416479328, "loss": 1.0693, "step": 12564 }, { "epoch": 0.3226339667576602, "grad_norm": 0.765625, "learning_rate": 0.0001812872415913235, "loss": 1.0905, "step": 12565 }, { "epoch": 0.32265964395358204, "grad_norm": 0.9296875, "learning_rate": 0.0001812846413727161, "loss": 1.0741, "step": 12566 }, { "epoch": 0.3226853211495039, "grad_norm": 0.76953125, "learning_rate": 0.00018128204099211574, "loss": 1.0269, "step": 12567 }, { "epoch": 0.3227109983454257, "grad_norm": 0.8046875, "learning_rate": 0.00018127944044952764, "loss": 0.9105, "step": 12568 }, { "epoch": 0.3227366755413475, "grad_norm": 0.83984375, "learning_rate": 0.0001812768397449569, "loss": 1.0097, "step": 12569 }, { "epoch": 0.32276235273726933, "grad_norm": 0.7734375, "learning_rate": 0.00018127423887840877, "loss": 0.9531, "step": 12570 }, { "epoch": 0.32278802993319117, "grad_norm": 0.8125, "learning_rate": 0.00018127163784988846, "loss": 0.9742, "step": 12571 }, { "epoch": 0.32281370712911295, "grad_norm": 0.83984375, "learning_rate": 0.00018126903665940112, "loss": 0.9304, "step": 12572 }, { "epoch": 0.3228393843250348, "grad_norm": 0.81640625, "learning_rate": 0.00018126643530695188, "loss": 0.8961, "step": 12573 }, { "epoch": 0.3228650615209566, "grad_norm": 0.7890625, "learning_rate": 0.00018126383379254602, "loss": 0.838, "step": 12574 }, { "epoch": 0.3228907387168784, "grad_norm": 0.78125, "learning_rate": 0.00018126123211618865, "loss": 1.0739, "step": 12575 }, { "epoch": 0.32291641591280024, "grad_norm": 0.8046875, "learning_rate": 0.000181258630277885, "loss": 0.8599, "step": 12576 }, { "epoch": 0.3229420931087221, "grad_norm": 0.796875, "learning_rate": 0.00018125602827764027, "loss": 0.9392, "step": 12577 }, { "epoch": 0.3229677703046439, "grad_norm": 0.703125, "learning_rate": 0.00018125342611545958, "loss": 0.9237, "step": 12578 }, { "epoch": 0.3229934475005657, "grad_norm": 0.81640625, "learning_rate": 0.00018125082379134814, "loss": 0.976, "step": 12579 }, { "epoch": 0.3230191246964875, "grad_norm": 0.80859375, "learning_rate": 0.0001812482213053112, "loss": 0.9815, "step": 12580 }, { "epoch": 0.32304480189240936, "grad_norm": 0.76171875, "learning_rate": 0.0001812456186573538, "loss": 0.9713, "step": 12581 }, { "epoch": 0.32307047908833114, "grad_norm": 0.875, "learning_rate": 0.0001812430158474813, "loss": 1.0223, "step": 12582 }, { "epoch": 0.323096156284253, "grad_norm": 0.8046875, "learning_rate": 0.00018124041287569876, "loss": 0.8796, "step": 12583 }, { "epoch": 0.3231218334801748, "grad_norm": 0.87109375, "learning_rate": 0.00018123780974201148, "loss": 0.9864, "step": 12584 }, { "epoch": 0.3231475106760966, "grad_norm": 0.82421875, "learning_rate": 0.00018123520644642454, "loss": 0.9916, "step": 12585 }, { "epoch": 0.32317318787201843, "grad_norm": 0.7734375, "learning_rate": 0.00018123260298894318, "loss": 1.0897, "step": 12586 }, { "epoch": 0.32319886506794027, "grad_norm": 0.81640625, "learning_rate": 0.00018122999936957257, "loss": 0.9652, "step": 12587 }, { "epoch": 0.3232245422638621, "grad_norm": 0.74609375, "learning_rate": 0.0001812273955883179, "loss": 0.9152, "step": 12588 }, { "epoch": 0.3232502194597839, "grad_norm": 0.77734375, "learning_rate": 0.0001812247916451844, "loss": 0.9707, "step": 12589 }, { "epoch": 0.3232758966557057, "grad_norm": 0.75, "learning_rate": 0.00018122218754017722, "loss": 0.9644, "step": 12590 }, { "epoch": 0.32330157385162756, "grad_norm": 1.0859375, "learning_rate": 0.00018121958327330155, "loss": 1.0093, "step": 12591 }, { "epoch": 0.32332725104754934, "grad_norm": 0.734375, "learning_rate": 0.00018121697884456256, "loss": 1.0613, "step": 12592 }, { "epoch": 0.32335292824347117, "grad_norm": 0.80078125, "learning_rate": 0.0001812143742539655, "loss": 1.0148, "step": 12593 }, { "epoch": 0.323378605439393, "grad_norm": 0.8671875, "learning_rate": 0.00018121176950151554, "loss": 0.9514, "step": 12594 }, { "epoch": 0.3234042826353148, "grad_norm": 0.88671875, "learning_rate": 0.00018120916458721782, "loss": 0.9324, "step": 12595 }, { "epoch": 0.3234299598312366, "grad_norm": 0.77734375, "learning_rate": 0.00018120655951107758, "loss": 1.047, "step": 12596 }, { "epoch": 0.32345563702715846, "grad_norm": 0.76953125, "learning_rate": 0.00018120395427310003, "loss": 0.9716, "step": 12597 }, { "epoch": 0.3234813142230803, "grad_norm": 0.76953125, "learning_rate": 0.0001812013488732903, "loss": 0.9572, "step": 12598 }, { "epoch": 0.3235069914190021, "grad_norm": 0.859375, "learning_rate": 0.00018119874331165364, "loss": 1.0575, "step": 12599 }, { "epoch": 0.3235326686149239, "grad_norm": 0.7734375, "learning_rate": 0.00018119613758819524, "loss": 1.0829, "step": 12600 }, { "epoch": 0.32355834581084575, "grad_norm": 0.82421875, "learning_rate": 0.00018119353170292023, "loss": 1.0157, "step": 12601 }, { "epoch": 0.32358402300676753, "grad_norm": 1.0625, "learning_rate": 0.00018119092565583388, "loss": 0.887, "step": 12602 }, { "epoch": 0.32360970020268937, "grad_norm": 0.75390625, "learning_rate": 0.0001811883194469413, "loss": 1.0443, "step": 12603 }, { "epoch": 0.3236353773986112, "grad_norm": 0.76953125, "learning_rate": 0.00018118571307624776, "loss": 0.8838, "step": 12604 }, { "epoch": 0.323661054594533, "grad_norm": 0.83984375, "learning_rate": 0.00018118310654375845, "loss": 0.8852, "step": 12605 }, { "epoch": 0.3236867317904548, "grad_norm": 0.78515625, "learning_rate": 0.0001811804998494785, "loss": 0.8915, "step": 12606 }, { "epoch": 0.32371240898637665, "grad_norm": 0.78125, "learning_rate": 0.0001811778929934132, "loss": 0.9872, "step": 12607 }, { "epoch": 0.32373808618229843, "grad_norm": 0.78125, "learning_rate": 0.00018117528597556763, "loss": 0.9884, "step": 12608 }, { "epoch": 0.32376376337822027, "grad_norm": 0.7265625, "learning_rate": 0.0001811726787959471, "loss": 0.9279, "step": 12609 }, { "epoch": 0.3237894405741421, "grad_norm": 0.78515625, "learning_rate": 0.00018117007145455674, "loss": 0.8906, "step": 12610 }, { "epoch": 0.32381511777006394, "grad_norm": 0.765625, "learning_rate": 0.00018116746395140175, "loss": 1.0555, "step": 12611 }, { "epoch": 0.3238407949659857, "grad_norm": 0.78125, "learning_rate": 0.00018116485628648734, "loss": 0.9306, "step": 12612 }, { "epoch": 0.32386647216190756, "grad_norm": 0.734375, "learning_rate": 0.00018116224845981868, "loss": 0.808, "step": 12613 }, { "epoch": 0.3238921493578294, "grad_norm": 0.7734375, "learning_rate": 0.000181159640471401, "loss": 0.9285, "step": 12614 }, { "epoch": 0.3239178265537512, "grad_norm": 0.796875, "learning_rate": 0.00018115703232123953, "loss": 1.0153, "step": 12615 }, { "epoch": 0.323943503749673, "grad_norm": 0.75390625, "learning_rate": 0.00018115442400933938, "loss": 0.9498, "step": 12616 }, { "epoch": 0.32396918094559485, "grad_norm": 0.75390625, "learning_rate": 0.0001811518155357058, "loss": 1.0666, "step": 12617 }, { "epoch": 0.32399485814151663, "grad_norm": 0.82421875, "learning_rate": 0.000181149206900344, "loss": 0.9919, "step": 12618 }, { "epoch": 0.32402053533743846, "grad_norm": 0.765625, "learning_rate": 0.00018114659810325914, "loss": 0.8968, "step": 12619 }, { "epoch": 0.3240462125333603, "grad_norm": 0.69921875, "learning_rate": 0.00018114398914445646, "loss": 0.7554, "step": 12620 }, { "epoch": 0.32407188972928214, "grad_norm": 0.71484375, "learning_rate": 0.00018114138002394111, "loss": 0.8864, "step": 12621 }, { "epoch": 0.3240975669252039, "grad_norm": 0.8359375, "learning_rate": 0.00018113877074171834, "loss": 1.1279, "step": 12622 }, { "epoch": 0.32412324412112575, "grad_norm": 0.77734375, "learning_rate": 0.00018113616129779335, "loss": 0.956, "step": 12623 }, { "epoch": 0.3241489213170476, "grad_norm": 0.78515625, "learning_rate": 0.0001811335516921713, "loss": 0.9573, "step": 12624 }, { "epoch": 0.32417459851296937, "grad_norm": 0.75, "learning_rate": 0.00018113094192485742, "loss": 0.9531, "step": 12625 }, { "epoch": 0.3242002757088912, "grad_norm": 0.8359375, "learning_rate": 0.00018112833199585685, "loss": 0.9657, "step": 12626 }, { "epoch": 0.32422595290481304, "grad_norm": 0.87109375, "learning_rate": 0.0001811257219051749, "loss": 1.0225, "step": 12627 }, { "epoch": 0.3242516301007348, "grad_norm": 0.81640625, "learning_rate": 0.0001811231116528167, "loss": 0.9186, "step": 12628 }, { "epoch": 0.32427730729665666, "grad_norm": 0.78125, "learning_rate": 0.00018112050123878746, "loss": 0.9425, "step": 12629 }, { "epoch": 0.3243029844925785, "grad_norm": 0.81640625, "learning_rate": 0.00018111789066309239, "loss": 0.9703, "step": 12630 }, { "epoch": 0.32432866168850033, "grad_norm": 0.79296875, "learning_rate": 0.0001811152799257367, "loss": 0.9527, "step": 12631 }, { "epoch": 0.3243543388844221, "grad_norm": 0.765625, "learning_rate": 0.00018111266902672558, "loss": 1.0986, "step": 12632 }, { "epoch": 0.32438001608034395, "grad_norm": 0.75, "learning_rate": 0.00018111005796606425, "loss": 0.8384, "step": 12633 }, { "epoch": 0.3244056932762658, "grad_norm": 0.77734375, "learning_rate": 0.00018110744674375786, "loss": 0.9696, "step": 12634 }, { "epoch": 0.32443137047218756, "grad_norm": 0.7421875, "learning_rate": 0.00018110483535981167, "loss": 0.9133, "step": 12635 }, { "epoch": 0.3244570476681094, "grad_norm": 0.75390625, "learning_rate": 0.0001811022238142309, "loss": 0.9646, "step": 12636 }, { "epoch": 0.32448272486403124, "grad_norm": 0.78515625, "learning_rate": 0.0001810996121070207, "loss": 0.9031, "step": 12637 }, { "epoch": 0.324508402059953, "grad_norm": 0.75, "learning_rate": 0.0001810970002381863, "loss": 0.9059, "step": 12638 }, { "epoch": 0.32453407925587485, "grad_norm": 0.72265625, "learning_rate": 0.0001810943882077329, "loss": 0.9588, "step": 12639 }, { "epoch": 0.3245597564517967, "grad_norm": 0.7578125, "learning_rate": 0.0001810917760156657, "loss": 0.7562, "step": 12640 }, { "epoch": 0.3245854336477185, "grad_norm": 0.76953125, "learning_rate": 0.00018108916366198994, "loss": 1.0568, "step": 12641 }, { "epoch": 0.3246111108436403, "grad_norm": 0.73828125, "learning_rate": 0.0001810865511467108, "loss": 0.8316, "step": 12642 }, { "epoch": 0.32463678803956214, "grad_norm": 1.203125, "learning_rate": 0.00018108393846983346, "loss": 1.1201, "step": 12643 }, { "epoch": 0.324662465235484, "grad_norm": 0.796875, "learning_rate": 0.0001810813256313632, "loss": 0.9812, "step": 12644 }, { "epoch": 0.32468814243140576, "grad_norm": 0.80859375, "learning_rate": 0.00018107871263130517, "loss": 0.9394, "step": 12645 }, { "epoch": 0.3247138196273276, "grad_norm": 0.828125, "learning_rate": 0.00018107609946966456, "loss": 0.956, "step": 12646 }, { "epoch": 0.32473949682324943, "grad_norm": 0.96484375, "learning_rate": 0.0001810734861464466, "loss": 1.0109, "step": 12647 }, { "epoch": 0.3247651740191712, "grad_norm": 0.80078125, "learning_rate": 0.00018107087266165656, "loss": 0.8562, "step": 12648 }, { "epoch": 0.32479085121509305, "grad_norm": 0.80078125, "learning_rate": 0.00018106825901529955, "loss": 0.788, "step": 12649 }, { "epoch": 0.3248165284110149, "grad_norm": 0.80078125, "learning_rate": 0.00018106564520738085, "loss": 0.8778, "step": 12650 }, { "epoch": 0.3248422056069367, "grad_norm": 0.78515625, "learning_rate": 0.00018106303123790562, "loss": 1.0918, "step": 12651 }, { "epoch": 0.3248678828028585, "grad_norm": 0.7578125, "learning_rate": 0.0001810604171068791, "loss": 0.8627, "step": 12652 }, { "epoch": 0.32489355999878033, "grad_norm": 0.90625, "learning_rate": 0.0001810578028143065, "loss": 0.9579, "step": 12653 }, { "epoch": 0.32491923719470217, "grad_norm": 0.734375, "learning_rate": 0.000181055188360193, "loss": 0.8299, "step": 12654 }, { "epoch": 0.32494491439062395, "grad_norm": 0.84765625, "learning_rate": 0.00018105257374454383, "loss": 1.0286, "step": 12655 }, { "epoch": 0.3249705915865458, "grad_norm": 0.7578125, "learning_rate": 0.0001810499589673642, "loss": 0.9777, "step": 12656 }, { "epoch": 0.3249962687824676, "grad_norm": 0.75390625, "learning_rate": 0.00018104734402865934, "loss": 0.8616, "step": 12657 }, { "epoch": 0.3250219459783894, "grad_norm": 0.81640625, "learning_rate": 0.00018104472892843448, "loss": 0.9243, "step": 12658 }, { "epoch": 0.32504762317431124, "grad_norm": 0.80078125, "learning_rate": 0.00018104211366669476, "loss": 0.8563, "step": 12659 }, { "epoch": 0.3250733003702331, "grad_norm": 0.80859375, "learning_rate": 0.0001810394982434454, "loss": 0.964, "step": 12660 }, { "epoch": 0.3250989775661549, "grad_norm": 0.82421875, "learning_rate": 0.00018103688265869167, "loss": 0.8097, "step": 12661 }, { "epoch": 0.3251246547620767, "grad_norm": 0.921875, "learning_rate": 0.00018103426691243874, "loss": 0.958, "step": 12662 }, { "epoch": 0.32515033195799853, "grad_norm": 0.79296875, "learning_rate": 0.00018103165100469187, "loss": 0.9938, "step": 12663 }, { "epoch": 0.32517600915392036, "grad_norm": 0.7734375, "learning_rate": 0.0001810290349354562, "loss": 1.108, "step": 12664 }, { "epoch": 0.32520168634984215, "grad_norm": 0.80078125, "learning_rate": 0.000181026418704737, "loss": 0.9136, "step": 12665 }, { "epoch": 0.325227363545764, "grad_norm": 0.82421875, "learning_rate": 0.00018102380231253945, "loss": 0.9276, "step": 12666 }, { "epoch": 0.3252530407416858, "grad_norm": 0.80078125, "learning_rate": 0.0001810211857588688, "loss": 1.057, "step": 12667 }, { "epoch": 0.3252787179376076, "grad_norm": 0.828125, "learning_rate": 0.00018101856904373023, "loss": 0.8727, "step": 12668 }, { "epoch": 0.32530439513352943, "grad_norm": 0.76171875, "learning_rate": 0.000181015952167129, "loss": 0.921, "step": 12669 }, { "epoch": 0.32533007232945127, "grad_norm": 0.79296875, "learning_rate": 0.00018101333512907027, "loss": 0.8987, "step": 12670 }, { "epoch": 0.3253557495253731, "grad_norm": 0.75390625, "learning_rate": 0.00018101071792955928, "loss": 0.9514, "step": 12671 }, { "epoch": 0.3253814267212949, "grad_norm": 0.87109375, "learning_rate": 0.00018100810056860124, "loss": 0.9838, "step": 12672 }, { "epoch": 0.3254071039172167, "grad_norm": 0.7734375, "learning_rate": 0.0001810054830462014, "loss": 0.9563, "step": 12673 }, { "epoch": 0.32543278111313856, "grad_norm": 0.73046875, "learning_rate": 0.00018100286536236495, "loss": 1.0207, "step": 12674 }, { "epoch": 0.32545845830906034, "grad_norm": 0.84765625, "learning_rate": 0.00018100024751709708, "loss": 0.9609, "step": 12675 }, { "epoch": 0.3254841355049822, "grad_norm": 0.84375, "learning_rate": 0.00018099762951040305, "loss": 0.998, "step": 12676 }, { "epoch": 0.325509812700904, "grad_norm": 0.80078125, "learning_rate": 0.00018099501134228804, "loss": 0.9386, "step": 12677 }, { "epoch": 0.3255354898968258, "grad_norm": 0.79296875, "learning_rate": 0.0001809923930127573, "loss": 0.824, "step": 12678 }, { "epoch": 0.3255611670927476, "grad_norm": 0.734375, "learning_rate": 0.00018098977452181604, "loss": 0.9477, "step": 12679 }, { "epoch": 0.32558684428866946, "grad_norm": 0.78515625, "learning_rate": 0.00018098715586946948, "loss": 0.9436, "step": 12680 }, { "epoch": 0.3256125214845913, "grad_norm": 1.8359375, "learning_rate": 0.00018098453705572284, "loss": 1.007, "step": 12681 }, { "epoch": 0.3256381986805131, "grad_norm": 0.7734375, "learning_rate": 0.00018098191808058132, "loss": 0.9817, "step": 12682 }, { "epoch": 0.3256638758764349, "grad_norm": 0.6875, "learning_rate": 0.00018097929894405015, "loss": 0.8028, "step": 12683 }, { "epoch": 0.32568955307235675, "grad_norm": 0.7265625, "learning_rate": 0.00018097667964613457, "loss": 0.8795, "step": 12684 }, { "epoch": 0.32571523026827853, "grad_norm": 0.6875, "learning_rate": 0.00018097406018683976, "loss": 0.9697, "step": 12685 }, { "epoch": 0.32574090746420037, "grad_norm": 0.75, "learning_rate": 0.00018097144056617096, "loss": 0.7804, "step": 12686 }, { "epoch": 0.3257665846601222, "grad_norm": 0.74609375, "learning_rate": 0.00018096882078413341, "loss": 1.0162, "step": 12687 }, { "epoch": 0.325792261856044, "grad_norm": 0.76953125, "learning_rate": 0.00018096620084073233, "loss": 0.9195, "step": 12688 }, { "epoch": 0.3258179390519658, "grad_norm": 0.76953125, "learning_rate": 0.0001809635807359729, "loss": 1.0547, "step": 12689 }, { "epoch": 0.32584361624788766, "grad_norm": 0.7890625, "learning_rate": 0.00018096096046986039, "loss": 0.8387, "step": 12690 }, { "epoch": 0.3258692934438095, "grad_norm": 0.7578125, "learning_rate": 0.00018095834004239994, "loss": 0.7836, "step": 12691 }, { "epoch": 0.3258949706397313, "grad_norm": 0.83203125, "learning_rate": 0.00018095571945359688, "loss": 0.9405, "step": 12692 }, { "epoch": 0.3259206478356531, "grad_norm": 0.74609375, "learning_rate": 0.00018095309870345638, "loss": 0.9304, "step": 12693 }, { "epoch": 0.32594632503157495, "grad_norm": 0.81640625, "learning_rate": 0.00018095047779198366, "loss": 0.818, "step": 12694 }, { "epoch": 0.3259720022274967, "grad_norm": 0.70703125, "learning_rate": 0.00018094785671918397, "loss": 0.8487, "step": 12695 }, { "epoch": 0.32599767942341856, "grad_norm": 2.015625, "learning_rate": 0.00018094523548506248, "loss": 0.9873, "step": 12696 }, { "epoch": 0.3260233566193404, "grad_norm": 0.8359375, "learning_rate": 0.00018094261408962446, "loss": 0.9606, "step": 12697 }, { "epoch": 0.3260490338152622, "grad_norm": 0.8046875, "learning_rate": 0.00018093999253287514, "loss": 1.0147, "step": 12698 }, { "epoch": 0.326074711011184, "grad_norm": 0.80078125, "learning_rate": 0.0001809373708148197, "loss": 1.0082, "step": 12699 }, { "epoch": 0.32610038820710585, "grad_norm": 0.875, "learning_rate": 0.0001809347489354634, "loss": 0.9974, "step": 12700 }, { "epoch": 0.3261260654030277, "grad_norm": 0.75390625, "learning_rate": 0.00018093212689481147, "loss": 0.9199, "step": 12701 }, { "epoch": 0.32615174259894947, "grad_norm": 0.79296875, "learning_rate": 0.0001809295046928691, "loss": 1.0862, "step": 12702 }, { "epoch": 0.3261774197948713, "grad_norm": 0.8046875, "learning_rate": 0.00018092688232964154, "loss": 0.9058, "step": 12703 }, { "epoch": 0.32620309699079314, "grad_norm": 0.80859375, "learning_rate": 0.00018092425980513404, "loss": 0.9449, "step": 12704 }, { "epoch": 0.3262287741867149, "grad_norm": 0.77734375, "learning_rate": 0.00018092163711935178, "loss": 0.9295, "step": 12705 }, { "epoch": 0.32625445138263676, "grad_norm": 0.8125, "learning_rate": 0.0001809190142723, "loss": 0.9063, "step": 12706 }, { "epoch": 0.3262801285785586, "grad_norm": 0.87109375, "learning_rate": 0.00018091639126398395, "loss": 1.0217, "step": 12707 }, { "epoch": 0.3263058057744804, "grad_norm": 0.76953125, "learning_rate": 0.00018091376809440882, "loss": 0.7963, "step": 12708 }, { "epoch": 0.3263314829704022, "grad_norm": 0.76953125, "learning_rate": 0.00018091114476357988, "loss": 1.0225, "step": 12709 }, { "epoch": 0.32635716016632405, "grad_norm": 0.6953125, "learning_rate": 0.00018090852127150233, "loss": 0.8545, "step": 12710 }, { "epoch": 0.3263828373622459, "grad_norm": 0.84375, "learning_rate": 0.0001809058976181814, "loss": 1.0154, "step": 12711 }, { "epoch": 0.32640851455816766, "grad_norm": 0.73828125, "learning_rate": 0.00018090327380362233, "loss": 0.9188, "step": 12712 }, { "epoch": 0.3264341917540895, "grad_norm": 0.76953125, "learning_rate": 0.00018090064982783033, "loss": 0.9831, "step": 12713 }, { "epoch": 0.32645986895001133, "grad_norm": 0.80078125, "learning_rate": 0.00018089802569081067, "loss": 1.0179, "step": 12714 }, { "epoch": 0.3264855461459331, "grad_norm": 0.796875, "learning_rate": 0.00018089540139256852, "loss": 0.9417, "step": 12715 }, { "epoch": 0.32651122334185495, "grad_norm": 0.8203125, "learning_rate": 0.00018089277693310914, "loss": 0.9357, "step": 12716 }, { "epoch": 0.3265369005377768, "grad_norm": 0.8515625, "learning_rate": 0.00018089015231243782, "loss": 0.9379, "step": 12717 }, { "epoch": 0.32656257773369857, "grad_norm": 0.7734375, "learning_rate": 0.0001808875275305597, "loss": 1.0788, "step": 12718 }, { "epoch": 0.3265882549296204, "grad_norm": 0.83203125, "learning_rate": 0.00018088490258748004, "loss": 0.953, "step": 12719 }, { "epoch": 0.32661393212554224, "grad_norm": 0.78125, "learning_rate": 0.00018088227748320405, "loss": 0.9301, "step": 12720 }, { "epoch": 0.3266396093214641, "grad_norm": 0.8203125, "learning_rate": 0.00018087965221773704, "loss": 1.0647, "step": 12721 }, { "epoch": 0.32666528651738586, "grad_norm": 0.83203125, "learning_rate": 0.00018087702679108416, "loss": 1.083, "step": 12722 }, { "epoch": 0.3266909637133077, "grad_norm": 0.76171875, "learning_rate": 0.00018087440120325067, "loss": 0.9504, "step": 12723 }, { "epoch": 0.3267166409092295, "grad_norm": 0.80859375, "learning_rate": 0.00018087177545424182, "loss": 0.9263, "step": 12724 }, { "epoch": 0.3267423181051513, "grad_norm": 0.828125, "learning_rate": 0.00018086914954406277, "loss": 0.9106, "step": 12725 }, { "epoch": 0.32676799530107314, "grad_norm": 0.765625, "learning_rate": 0.00018086652347271888, "loss": 0.9892, "step": 12726 }, { "epoch": 0.326793672496995, "grad_norm": 0.8125, "learning_rate": 0.00018086389724021528, "loss": 0.9124, "step": 12727 }, { "epoch": 0.32681934969291676, "grad_norm": 0.8125, "learning_rate": 0.00018086127084655725, "loss": 1.1807, "step": 12728 }, { "epoch": 0.3268450268888386, "grad_norm": 0.7734375, "learning_rate": 0.00018085864429175, "loss": 0.8315, "step": 12729 }, { "epoch": 0.32687070408476043, "grad_norm": 0.69921875, "learning_rate": 0.00018085601757579877, "loss": 0.762, "step": 12730 }, { "epoch": 0.32689638128068227, "grad_norm": 0.7421875, "learning_rate": 0.00018085339069870882, "loss": 0.8898, "step": 12731 }, { "epoch": 0.32692205847660405, "grad_norm": 0.8125, "learning_rate": 0.00018085076366048536, "loss": 0.9832, "step": 12732 }, { "epoch": 0.3269477356725259, "grad_norm": 0.7890625, "learning_rate": 0.00018084813646113363, "loss": 1.0589, "step": 12733 }, { "epoch": 0.3269734128684477, "grad_norm": 0.84765625, "learning_rate": 0.00018084550910065885, "loss": 1.1149, "step": 12734 }, { "epoch": 0.3269990900643695, "grad_norm": 0.78125, "learning_rate": 0.00018084288157906627, "loss": 0.9276, "step": 12735 }, { "epoch": 0.32702476726029134, "grad_norm": 0.83203125, "learning_rate": 0.00018084025389636117, "loss": 0.9327, "step": 12736 }, { "epoch": 0.3270504444562132, "grad_norm": 0.88671875, "learning_rate": 0.00018083762605254871, "loss": 1.0418, "step": 12737 }, { "epoch": 0.32707612165213495, "grad_norm": 0.79296875, "learning_rate": 0.0001808349980476342, "loss": 0.9542, "step": 12738 }, { "epoch": 0.3271017988480568, "grad_norm": 0.79296875, "learning_rate": 0.00018083236988162283, "loss": 0.8016, "step": 12739 }, { "epoch": 0.3271274760439786, "grad_norm": 0.75, "learning_rate": 0.00018082974155451983, "loss": 0.9234, "step": 12740 }, { "epoch": 0.32715315323990046, "grad_norm": 0.7578125, "learning_rate": 0.00018082711306633044, "loss": 0.8937, "step": 12741 }, { "epoch": 0.32717883043582224, "grad_norm": 0.859375, "learning_rate": 0.00018082448441705993, "loss": 1.0535, "step": 12742 }, { "epoch": 0.3272045076317441, "grad_norm": 0.78125, "learning_rate": 0.00018082185560671355, "loss": 1.0716, "step": 12743 }, { "epoch": 0.3272301848276659, "grad_norm": 0.78515625, "learning_rate": 0.0001808192266352965, "loss": 1.0196, "step": 12744 }, { "epoch": 0.3272558620235877, "grad_norm": 0.78125, "learning_rate": 0.000180816597502814, "loss": 0.9618, "step": 12745 }, { "epoch": 0.32728153921950953, "grad_norm": 1.1328125, "learning_rate": 0.00018081396820927137, "loss": 1.0349, "step": 12746 }, { "epoch": 0.32730721641543137, "grad_norm": 0.875, "learning_rate": 0.00018081133875467374, "loss": 0.9566, "step": 12747 }, { "epoch": 0.32733289361135315, "grad_norm": 0.79296875, "learning_rate": 0.00018080870913902647, "loss": 0.8339, "step": 12748 }, { "epoch": 0.327358570807275, "grad_norm": 0.828125, "learning_rate": 0.00018080607936233473, "loss": 0.8883, "step": 12749 }, { "epoch": 0.3273842480031968, "grad_norm": 0.76171875, "learning_rate": 0.00018080344942460376, "loss": 0.9592, "step": 12750 }, { "epoch": 0.32740992519911866, "grad_norm": 0.734375, "learning_rate": 0.0001808008193258388, "loss": 1.0075, "step": 12751 }, { "epoch": 0.32743560239504044, "grad_norm": 0.78515625, "learning_rate": 0.00018079818906604515, "loss": 1.0462, "step": 12752 }, { "epoch": 0.3274612795909623, "grad_norm": 0.7890625, "learning_rate": 0.00018079555864522796, "loss": 0.9515, "step": 12753 }, { "epoch": 0.3274869567868841, "grad_norm": 0.78125, "learning_rate": 0.00018079292806339256, "loss": 1.1044, "step": 12754 }, { "epoch": 0.3275126339828059, "grad_norm": 0.74609375, "learning_rate": 0.00018079029732054414, "loss": 0.946, "step": 12755 }, { "epoch": 0.3275383111787277, "grad_norm": 0.7421875, "learning_rate": 0.00018078766641668794, "loss": 0.9138, "step": 12756 }, { "epoch": 0.32756398837464956, "grad_norm": 0.80078125, "learning_rate": 0.00018078503535182922, "loss": 0.9525, "step": 12757 }, { "epoch": 0.32758966557057134, "grad_norm": 0.9296875, "learning_rate": 0.00018078240412597324, "loss": 0.8831, "step": 12758 }, { "epoch": 0.3276153427664932, "grad_norm": 0.7734375, "learning_rate": 0.0001807797727391252, "loss": 0.966, "step": 12759 }, { "epoch": 0.327641019962415, "grad_norm": 0.78125, "learning_rate": 0.0001807771411912904, "loss": 0.8258, "step": 12760 }, { "epoch": 0.32766669715833685, "grad_norm": 0.73046875, "learning_rate": 0.00018077450948247403, "loss": 0.9014, "step": 12761 }, { "epoch": 0.32769237435425863, "grad_norm": 0.92578125, "learning_rate": 0.00018077187761268137, "loss": 0.972, "step": 12762 }, { "epoch": 0.32771805155018047, "grad_norm": 0.7421875, "learning_rate": 0.00018076924558191765, "loss": 0.8525, "step": 12763 }, { "epoch": 0.3277437287461023, "grad_norm": 0.80078125, "learning_rate": 0.00018076661339018813, "loss": 1.0371, "step": 12764 }, { "epoch": 0.3277694059420241, "grad_norm": 0.75, "learning_rate": 0.000180763981037498, "loss": 0.9373, "step": 12765 }, { "epoch": 0.3277950831379459, "grad_norm": 0.72265625, "learning_rate": 0.0001807613485238526, "loss": 0.859, "step": 12766 }, { "epoch": 0.32782076033386776, "grad_norm": 0.7734375, "learning_rate": 0.00018075871584925714, "loss": 0.9207, "step": 12767 }, { "epoch": 0.32784643752978954, "grad_norm": 0.671875, "learning_rate": 0.0001807560830137168, "loss": 1.017, "step": 12768 }, { "epoch": 0.32787211472571137, "grad_norm": 0.953125, "learning_rate": 0.00018075345001723693, "loss": 0.9671, "step": 12769 }, { "epoch": 0.3278977919216332, "grad_norm": 0.7890625, "learning_rate": 0.0001807508168598227, "loss": 0.9018, "step": 12770 }, { "epoch": 0.32792346911755504, "grad_norm": 0.8671875, "learning_rate": 0.0001807481835414794, "loss": 1.1193, "step": 12771 }, { "epoch": 0.3279491463134768, "grad_norm": 1.15625, "learning_rate": 0.00018074555006221226, "loss": 0.9446, "step": 12772 }, { "epoch": 0.32797482350939866, "grad_norm": 0.7578125, "learning_rate": 0.00018074291642202655, "loss": 0.8863, "step": 12773 }, { "epoch": 0.3280005007053205, "grad_norm": 0.77734375, "learning_rate": 0.00018074028262092746, "loss": 1.0272, "step": 12774 }, { "epoch": 0.3280261779012423, "grad_norm": 0.796875, "learning_rate": 0.00018073764865892032, "loss": 0.9058, "step": 12775 }, { "epoch": 0.3280518550971641, "grad_norm": 0.765625, "learning_rate": 0.00018073501453601032, "loss": 0.9159, "step": 12776 }, { "epoch": 0.32807753229308595, "grad_norm": 0.7421875, "learning_rate": 0.00018073238025220274, "loss": 1.0434, "step": 12777 }, { "epoch": 0.32810320948900773, "grad_norm": 0.79296875, "learning_rate": 0.0001807297458075028, "loss": 0.8576, "step": 12778 }, { "epoch": 0.32812888668492957, "grad_norm": 0.79296875, "learning_rate": 0.00018072711120191578, "loss": 1.021, "step": 12779 }, { "epoch": 0.3281545638808514, "grad_norm": 0.88671875, "learning_rate": 0.0001807244764354469, "loss": 0.9972, "step": 12780 }, { "epoch": 0.32818024107677324, "grad_norm": 0.7421875, "learning_rate": 0.00018072184150810146, "loss": 0.8238, "step": 12781 }, { "epoch": 0.328205918272695, "grad_norm": 0.72265625, "learning_rate": 0.00018071920641988466, "loss": 0.9436, "step": 12782 }, { "epoch": 0.32823159546861685, "grad_norm": 0.8203125, "learning_rate": 0.00018071657117080177, "loss": 0.8918, "step": 12783 }, { "epoch": 0.3282572726645387, "grad_norm": 0.84765625, "learning_rate": 0.00018071393576085809, "loss": 1.0134, "step": 12784 }, { "epoch": 0.32828294986046047, "grad_norm": 0.78125, "learning_rate": 0.00018071130019005878, "loss": 0.9873, "step": 12785 }, { "epoch": 0.3283086270563823, "grad_norm": 0.81640625, "learning_rate": 0.00018070866445840916, "loss": 1.0069, "step": 12786 }, { "epoch": 0.32833430425230414, "grad_norm": 0.80859375, "learning_rate": 0.00018070602856591446, "loss": 0.9499, "step": 12787 }, { "epoch": 0.3283599814482259, "grad_norm": 0.84375, "learning_rate": 0.00018070339251257991, "loss": 1.0834, "step": 12788 }, { "epoch": 0.32838565864414776, "grad_norm": 0.8125, "learning_rate": 0.00018070075629841083, "loss": 0.9627, "step": 12789 }, { "epoch": 0.3284113358400696, "grad_norm": 0.76171875, "learning_rate": 0.0001806981199234124, "loss": 0.9718, "step": 12790 }, { "epoch": 0.32843701303599143, "grad_norm": 0.72265625, "learning_rate": 0.00018069548338758992, "loss": 1.0076, "step": 12791 }, { "epoch": 0.3284626902319132, "grad_norm": 0.7421875, "learning_rate": 0.00018069284669094865, "loss": 0.8841, "step": 12792 }, { "epoch": 0.32848836742783505, "grad_norm": 0.73828125, "learning_rate": 0.0001806902098334938, "loss": 0.985, "step": 12793 }, { "epoch": 0.3285140446237569, "grad_norm": 0.76953125, "learning_rate": 0.00018068757281523065, "loss": 0.8211, "step": 12794 }, { "epoch": 0.32853972181967867, "grad_norm": 0.76953125, "learning_rate": 0.00018068493563616443, "loss": 0.9049, "step": 12795 }, { "epoch": 0.3285653990156005, "grad_norm": 0.7890625, "learning_rate": 0.0001806822982963005, "loss": 1.0555, "step": 12796 }, { "epoch": 0.32859107621152234, "grad_norm": 0.7890625, "learning_rate": 0.00018067966079564395, "loss": 1.073, "step": 12797 }, { "epoch": 0.3286167534074441, "grad_norm": 0.81640625, "learning_rate": 0.0001806770231342002, "loss": 0.8676, "step": 12798 }, { "epoch": 0.32864243060336595, "grad_norm": 0.79296875, "learning_rate": 0.00018067438531197438, "loss": 1.0547, "step": 12799 }, { "epoch": 0.3286681077992878, "grad_norm": 0.7578125, "learning_rate": 0.0001806717473289718, "loss": 0.9567, "step": 12800 }, { "epoch": 0.3286937849952096, "grad_norm": 0.8203125, "learning_rate": 0.0001806691091851977, "loss": 0.8487, "step": 12801 }, { "epoch": 0.3287194621911314, "grad_norm": 0.88671875, "learning_rate": 0.0001806664708806574, "loss": 0.874, "step": 12802 }, { "epoch": 0.32874513938705324, "grad_norm": 0.81640625, "learning_rate": 0.0001806638324153561, "loss": 0.9876, "step": 12803 }, { "epoch": 0.3287708165829751, "grad_norm": 0.796875, "learning_rate": 0.00018066119378929904, "loss": 1.0011, "step": 12804 }, { "epoch": 0.32879649377889686, "grad_norm": 0.76953125, "learning_rate": 0.00018065855500249155, "loss": 0.9939, "step": 12805 }, { "epoch": 0.3288221709748187, "grad_norm": 0.81640625, "learning_rate": 0.00018065591605493882, "loss": 0.8654, "step": 12806 }, { "epoch": 0.32884784817074053, "grad_norm": 0.72265625, "learning_rate": 0.0001806532769466461, "loss": 0.9053, "step": 12807 }, { "epoch": 0.3288735253666623, "grad_norm": 0.83203125, "learning_rate": 0.00018065063767761874, "loss": 0.8516, "step": 12808 }, { "epoch": 0.32889920256258415, "grad_norm": 0.765625, "learning_rate": 0.00018064799824786193, "loss": 0.9895, "step": 12809 }, { "epoch": 0.328924879758506, "grad_norm": 0.79296875, "learning_rate": 0.00018064535865738092, "loss": 0.8452, "step": 12810 }, { "epoch": 0.32895055695442776, "grad_norm": 0.78515625, "learning_rate": 0.00018064271890618101, "loss": 1.1091, "step": 12811 }, { "epoch": 0.3289762341503496, "grad_norm": 0.7734375, "learning_rate": 0.00018064007899426746, "loss": 0.8858, "step": 12812 }, { "epoch": 0.32900191134627144, "grad_norm": 0.8515625, "learning_rate": 0.00018063743892164551, "loss": 0.9287, "step": 12813 }, { "epoch": 0.32902758854219327, "grad_norm": 0.8203125, "learning_rate": 0.00018063479868832041, "loss": 1.0262, "step": 12814 }, { "epoch": 0.32905326573811505, "grad_norm": 0.71875, "learning_rate": 0.00018063215829429744, "loss": 0.9103, "step": 12815 }, { "epoch": 0.3290789429340369, "grad_norm": 0.7578125, "learning_rate": 0.0001806295177395819, "loss": 0.938, "step": 12816 }, { "epoch": 0.3291046201299587, "grad_norm": 0.81640625, "learning_rate": 0.00018062687702417898, "loss": 1.0128, "step": 12817 }, { "epoch": 0.3291302973258805, "grad_norm": 0.828125, "learning_rate": 0.000180624236148094, "loss": 0.9539, "step": 12818 }, { "epoch": 0.32915597452180234, "grad_norm": 0.77734375, "learning_rate": 0.00018062159511133215, "loss": 0.9324, "step": 12819 }, { "epoch": 0.3291816517177242, "grad_norm": 0.796875, "learning_rate": 0.0001806189539138988, "loss": 0.8742, "step": 12820 }, { "epoch": 0.32920732891364596, "grad_norm": 0.73828125, "learning_rate": 0.00018061631255579915, "loss": 1.067, "step": 12821 }, { "epoch": 0.3292330061095678, "grad_norm": 0.765625, "learning_rate": 0.00018061367103703842, "loss": 0.9315, "step": 12822 }, { "epoch": 0.32925868330548963, "grad_norm": 0.77734375, "learning_rate": 0.00018061102935762197, "loss": 0.9298, "step": 12823 }, { "epoch": 0.32928436050141147, "grad_norm": 0.76171875, "learning_rate": 0.00018060838751755504, "loss": 0.9667, "step": 12824 }, { "epoch": 0.32931003769733325, "grad_norm": 0.6953125, "learning_rate": 0.00018060574551684284, "loss": 0.8251, "step": 12825 }, { "epoch": 0.3293357148932551, "grad_norm": 0.73046875, "learning_rate": 0.0001806031033554907, "loss": 0.8515, "step": 12826 }, { "epoch": 0.3293613920891769, "grad_norm": 0.828125, "learning_rate": 0.0001806004610335038, "loss": 1.0388, "step": 12827 }, { "epoch": 0.3293870692850987, "grad_norm": 0.7890625, "learning_rate": 0.0001805978185508875, "loss": 1.0113, "step": 12828 }, { "epoch": 0.32941274648102054, "grad_norm": 0.796875, "learning_rate": 0.00018059517590764704, "loss": 0.8493, "step": 12829 }, { "epoch": 0.32943842367694237, "grad_norm": 0.828125, "learning_rate": 0.00018059253310378766, "loss": 0.9687, "step": 12830 }, { "epoch": 0.32946410087286415, "grad_norm": 0.765625, "learning_rate": 0.00018058989013931465, "loss": 0.9695, "step": 12831 }, { "epoch": 0.329489778068786, "grad_norm": 0.8359375, "learning_rate": 0.00018058724701423324, "loss": 0.9495, "step": 12832 }, { "epoch": 0.3295154552647078, "grad_norm": 0.80859375, "learning_rate": 0.00018058460372854875, "loss": 0.9843, "step": 12833 }, { "epoch": 0.32954113246062966, "grad_norm": 0.7421875, "learning_rate": 0.0001805819602822664, "loss": 0.9017, "step": 12834 }, { "epoch": 0.32956680965655144, "grad_norm": 0.80078125, "learning_rate": 0.00018057931667539152, "loss": 0.8391, "step": 12835 }, { "epoch": 0.3295924868524733, "grad_norm": 0.765625, "learning_rate": 0.00018057667290792933, "loss": 0.9203, "step": 12836 }, { "epoch": 0.3296181640483951, "grad_norm": 0.75390625, "learning_rate": 0.00018057402897988508, "loss": 0.8631, "step": 12837 }, { "epoch": 0.3296438412443169, "grad_norm": 1.203125, "learning_rate": 0.0001805713848912641, "loss": 0.9798, "step": 12838 }, { "epoch": 0.32966951844023873, "grad_norm": 0.76171875, "learning_rate": 0.0001805687406420716, "loss": 0.8911, "step": 12839 }, { "epoch": 0.32969519563616057, "grad_norm": 0.8046875, "learning_rate": 0.00018056609623231292, "loss": 0.9477, "step": 12840 }, { "epoch": 0.32972087283208235, "grad_norm": 0.76953125, "learning_rate": 0.00018056345166199325, "loss": 0.9791, "step": 12841 }, { "epoch": 0.3297465500280042, "grad_norm": 0.75, "learning_rate": 0.0001805608069311179, "loss": 0.9563, "step": 12842 }, { "epoch": 0.329772227223926, "grad_norm": 1.078125, "learning_rate": 0.00018055816203969214, "loss": 0.9992, "step": 12843 }, { "epoch": 0.32979790441984785, "grad_norm": 0.76171875, "learning_rate": 0.00018055551698772126, "loss": 0.7862, "step": 12844 }, { "epoch": 0.32982358161576963, "grad_norm": 0.8046875, "learning_rate": 0.0001805528717752105, "loss": 0.8597, "step": 12845 }, { "epoch": 0.32984925881169147, "grad_norm": 0.7421875, "learning_rate": 0.00018055022640216513, "loss": 0.9187, "step": 12846 }, { "epoch": 0.3298749360076133, "grad_norm": 0.765625, "learning_rate": 0.00018054758086859048, "loss": 1.0543, "step": 12847 }, { "epoch": 0.3299006132035351, "grad_norm": 0.80859375, "learning_rate": 0.00018054493517449173, "loss": 0.9385, "step": 12848 }, { "epoch": 0.3299262903994569, "grad_norm": 0.7109375, "learning_rate": 0.00018054228931987422, "loss": 0.9756, "step": 12849 }, { "epoch": 0.32995196759537876, "grad_norm": 0.79296875, "learning_rate": 0.0001805396433047432, "loss": 0.9799, "step": 12850 }, { "epoch": 0.32997764479130054, "grad_norm": 0.8359375, "learning_rate": 0.00018053699712910394, "loss": 1.0141, "step": 12851 }, { "epoch": 0.3300033219872224, "grad_norm": 0.8125, "learning_rate": 0.00018053435079296173, "loss": 0.8778, "step": 12852 }, { "epoch": 0.3300289991831442, "grad_norm": 0.77734375, "learning_rate": 0.00018053170429632183, "loss": 0.9946, "step": 12853 }, { "epoch": 0.33005467637906605, "grad_norm": 0.75, "learning_rate": 0.0001805290576391895, "loss": 0.8352, "step": 12854 }, { "epoch": 0.33008035357498783, "grad_norm": 0.8046875, "learning_rate": 0.00018052641082157007, "loss": 0.964, "step": 12855 }, { "epoch": 0.33010603077090966, "grad_norm": 0.72265625, "learning_rate": 0.00018052376384346873, "loss": 0.9709, "step": 12856 }, { "epoch": 0.3301317079668315, "grad_norm": 0.72265625, "learning_rate": 0.00018052111670489084, "loss": 0.9481, "step": 12857 }, { "epoch": 0.3301573851627533, "grad_norm": 0.7890625, "learning_rate": 0.00018051846940584163, "loss": 0.9272, "step": 12858 }, { "epoch": 0.3301830623586751, "grad_norm": 0.83203125, "learning_rate": 0.00018051582194632637, "loss": 1.0283, "step": 12859 }, { "epoch": 0.33020873955459695, "grad_norm": 0.9765625, "learning_rate": 0.00018051317432635035, "loss": 0.9894, "step": 12860 }, { "epoch": 0.33023441675051873, "grad_norm": 0.703125, "learning_rate": 0.00018051052654591886, "loss": 0.9043, "step": 12861 }, { "epoch": 0.33026009394644057, "grad_norm": 0.82421875, "learning_rate": 0.00018050787860503714, "loss": 0.9741, "step": 12862 }, { "epoch": 0.3302857711423624, "grad_norm": 0.78125, "learning_rate": 0.00018050523050371054, "loss": 1.045, "step": 12863 }, { "epoch": 0.33031144833828424, "grad_norm": 0.765625, "learning_rate": 0.00018050258224194425, "loss": 0.9082, "step": 12864 }, { "epoch": 0.330337125534206, "grad_norm": 0.796875, "learning_rate": 0.0001804999338197436, "loss": 0.9835, "step": 12865 }, { "epoch": 0.33036280273012786, "grad_norm": 0.76953125, "learning_rate": 0.00018049728523711383, "loss": 0.918, "step": 12866 }, { "epoch": 0.3303884799260497, "grad_norm": 0.84375, "learning_rate": 0.00018049463649406024, "loss": 0.9773, "step": 12867 }, { "epoch": 0.3304141571219715, "grad_norm": 0.81640625, "learning_rate": 0.00018049198759058813, "loss": 1.0718, "step": 12868 }, { "epoch": 0.3304398343178933, "grad_norm": 0.73828125, "learning_rate": 0.00018048933852670275, "loss": 0.9333, "step": 12869 }, { "epoch": 0.33046551151381515, "grad_norm": 0.82421875, "learning_rate": 0.0001804866893024094, "loss": 0.9866, "step": 12870 }, { "epoch": 0.3304911887097369, "grad_norm": 0.796875, "learning_rate": 0.00018048403991771336, "loss": 0.8461, "step": 12871 }, { "epoch": 0.33051686590565876, "grad_norm": 0.703125, "learning_rate": 0.00018048139037261986, "loss": 0.8639, "step": 12872 }, { "epoch": 0.3305425431015806, "grad_norm": 0.82421875, "learning_rate": 0.00018047874066713426, "loss": 1.0596, "step": 12873 }, { "epoch": 0.33056822029750244, "grad_norm": 0.7890625, "learning_rate": 0.00018047609080126174, "loss": 1.0445, "step": 12874 }, { "epoch": 0.3305938974934242, "grad_norm": 0.74609375, "learning_rate": 0.00018047344077500773, "loss": 1.024, "step": 12875 }, { "epoch": 0.33061957468934605, "grad_norm": 0.796875, "learning_rate": 0.00018047079058837736, "loss": 1.1884, "step": 12876 }, { "epoch": 0.3306452518852679, "grad_norm": 0.8671875, "learning_rate": 0.00018046814024137597, "loss": 0.9238, "step": 12877 }, { "epoch": 0.33067092908118967, "grad_norm": 0.8828125, "learning_rate": 0.00018046548973400887, "loss": 0.9527, "step": 12878 }, { "epoch": 0.3306966062771115, "grad_norm": 0.82421875, "learning_rate": 0.0001804628390662813, "loss": 0.8932, "step": 12879 }, { "epoch": 0.33072228347303334, "grad_norm": 0.9296875, "learning_rate": 0.00018046018823819856, "loss": 1.0478, "step": 12880 }, { "epoch": 0.3307479606689551, "grad_norm": 0.7421875, "learning_rate": 0.00018045753724976594, "loss": 0.8936, "step": 12881 }, { "epoch": 0.33077363786487696, "grad_norm": 0.7421875, "learning_rate": 0.00018045488610098873, "loss": 0.8915, "step": 12882 }, { "epoch": 0.3307993150607988, "grad_norm": 0.7578125, "learning_rate": 0.0001804522347918722, "loss": 0.9289, "step": 12883 }, { "epoch": 0.33082499225672063, "grad_norm": 0.75390625, "learning_rate": 0.00018044958332242162, "loss": 0.9123, "step": 12884 }, { "epoch": 0.3308506694526424, "grad_norm": 0.78515625, "learning_rate": 0.00018044693169264227, "loss": 0.9693, "step": 12885 }, { "epoch": 0.33087634664856425, "grad_norm": 0.796875, "learning_rate": 0.00018044427990253947, "loss": 0.9348, "step": 12886 }, { "epoch": 0.3309020238444861, "grad_norm": 0.87109375, "learning_rate": 0.0001804416279521185, "loss": 1.018, "step": 12887 }, { "epoch": 0.33092770104040786, "grad_norm": 0.8671875, "learning_rate": 0.0001804389758413846, "loss": 0.8556, "step": 12888 }, { "epoch": 0.3309533782363297, "grad_norm": 0.7265625, "learning_rate": 0.0001804363235703431, "loss": 0.9841, "step": 12889 }, { "epoch": 0.33097905543225153, "grad_norm": 0.70703125, "learning_rate": 0.0001804336711389993, "loss": 0.9977, "step": 12890 }, { "epoch": 0.3310047326281733, "grad_norm": 0.7734375, "learning_rate": 0.0001804310185473584, "loss": 0.977, "step": 12891 }, { "epoch": 0.33103040982409515, "grad_norm": 0.74609375, "learning_rate": 0.00018042836579542582, "loss": 1.0763, "step": 12892 }, { "epoch": 0.331056087020017, "grad_norm": 0.765625, "learning_rate": 0.0001804257128832067, "loss": 1.0433, "step": 12893 }, { "epoch": 0.3310817642159388, "grad_norm": 0.87109375, "learning_rate": 0.00018042305981070644, "loss": 0.9457, "step": 12894 }, { "epoch": 0.3311074414118606, "grad_norm": 0.875, "learning_rate": 0.0001804204065779303, "loss": 0.9858, "step": 12895 }, { "epoch": 0.33113311860778244, "grad_norm": 0.7265625, "learning_rate": 0.0001804177531848835, "loss": 1.0527, "step": 12896 }, { "epoch": 0.3311587958037043, "grad_norm": 0.77734375, "learning_rate": 0.00018041509963157143, "loss": 1.0522, "step": 12897 }, { "epoch": 0.33118447299962606, "grad_norm": 0.77734375, "learning_rate": 0.0001804124459179993, "loss": 0.9631, "step": 12898 }, { "epoch": 0.3312101501955479, "grad_norm": 0.80859375, "learning_rate": 0.00018040979204417246, "loss": 0.9651, "step": 12899 }, { "epoch": 0.33123582739146973, "grad_norm": 0.7578125, "learning_rate": 0.00018040713801009613, "loss": 0.8855, "step": 12900 }, { "epoch": 0.3312615045873915, "grad_norm": 0.765625, "learning_rate": 0.0001804044838157757, "loss": 1.0141, "step": 12901 }, { "epoch": 0.33128718178331334, "grad_norm": 0.8828125, "learning_rate": 0.00018040182946121633, "loss": 0.9616, "step": 12902 }, { "epoch": 0.3313128589792352, "grad_norm": 0.90625, "learning_rate": 0.00018039917494642343, "loss": 1.0688, "step": 12903 }, { "epoch": 0.331338536175157, "grad_norm": 0.78515625, "learning_rate": 0.0001803965202714022, "loss": 1.0518, "step": 12904 }, { "epoch": 0.3313642133710788, "grad_norm": 0.82421875, "learning_rate": 0.00018039386543615798, "loss": 0.8954, "step": 12905 }, { "epoch": 0.33138989056700063, "grad_norm": 0.80078125, "learning_rate": 0.00018039121044069603, "loss": 0.9781, "step": 12906 }, { "epoch": 0.33141556776292247, "grad_norm": 0.78515625, "learning_rate": 0.00018038855528502168, "loss": 0.9656, "step": 12907 }, { "epoch": 0.33144124495884425, "grad_norm": 0.89453125, "learning_rate": 0.0001803858999691402, "loss": 1.008, "step": 12908 }, { "epoch": 0.3314669221547661, "grad_norm": 0.80078125, "learning_rate": 0.00018038324449305685, "loss": 0.9469, "step": 12909 }, { "epoch": 0.3314925993506879, "grad_norm": 0.8125, "learning_rate": 0.00018038058885677698, "loss": 1.0493, "step": 12910 }, { "epoch": 0.3315182765466097, "grad_norm": 0.82421875, "learning_rate": 0.00018037793306030587, "loss": 1.0864, "step": 12911 }, { "epoch": 0.33154395374253154, "grad_norm": 0.77734375, "learning_rate": 0.0001803752771036488, "loss": 0.8968, "step": 12912 }, { "epoch": 0.3315696309384534, "grad_norm": 0.7265625, "learning_rate": 0.00018037262098681103, "loss": 0.943, "step": 12913 }, { "epoch": 0.3315953081343752, "grad_norm": 0.796875, "learning_rate": 0.00018036996470979793, "loss": 1.0143, "step": 12914 }, { "epoch": 0.331620985330297, "grad_norm": 0.7578125, "learning_rate": 0.00018036730827261473, "loss": 1.0185, "step": 12915 }, { "epoch": 0.3316466625262188, "grad_norm": 0.765625, "learning_rate": 0.00018036465167526675, "loss": 0.9639, "step": 12916 }, { "epoch": 0.33167233972214066, "grad_norm": 0.8203125, "learning_rate": 0.00018036199491775924, "loss": 1.0477, "step": 12917 }, { "epoch": 0.33169801691806244, "grad_norm": 0.76171875, "learning_rate": 0.00018035933800009758, "loss": 0.8591, "step": 12918 }, { "epoch": 0.3317236941139843, "grad_norm": 0.72265625, "learning_rate": 0.000180356680922287, "loss": 1.0694, "step": 12919 }, { "epoch": 0.3317493713099061, "grad_norm": 0.703125, "learning_rate": 0.00018035402368433282, "loss": 0.939, "step": 12920 }, { "epoch": 0.3317750485058279, "grad_norm": 0.84765625, "learning_rate": 0.0001803513662862403, "loss": 0.9299, "step": 12921 }, { "epoch": 0.33180072570174973, "grad_norm": 0.80078125, "learning_rate": 0.00018034870872801482, "loss": 1.0114, "step": 12922 }, { "epoch": 0.33182640289767157, "grad_norm": 0.7734375, "learning_rate": 0.0001803460510096616, "loss": 0.8872, "step": 12923 }, { "epoch": 0.3318520800935934, "grad_norm": 0.76171875, "learning_rate": 0.00018034339313118593, "loss": 0.8986, "step": 12924 }, { "epoch": 0.3318777572895152, "grad_norm": 0.734375, "learning_rate": 0.00018034073509259316, "loss": 0.8063, "step": 12925 }, { "epoch": 0.331903434485437, "grad_norm": 0.71875, "learning_rate": 0.00018033807689388855, "loss": 0.9697, "step": 12926 }, { "epoch": 0.33192911168135886, "grad_norm": 0.79296875, "learning_rate": 0.00018033541853507742, "loss": 0.8981, "step": 12927 }, { "epoch": 0.33195478887728064, "grad_norm": 0.86328125, "learning_rate": 0.00018033276001616505, "loss": 1.1516, "step": 12928 }, { "epoch": 0.3319804660732025, "grad_norm": 0.79296875, "learning_rate": 0.00018033010133715674, "loss": 0.9523, "step": 12929 }, { "epoch": 0.3320061432691243, "grad_norm": 0.78515625, "learning_rate": 0.00018032744249805782, "loss": 1.0406, "step": 12930 }, { "epoch": 0.3320318204650461, "grad_norm": 0.703125, "learning_rate": 0.00018032478349887354, "loss": 0.8577, "step": 12931 }, { "epoch": 0.3320574976609679, "grad_norm": 0.76171875, "learning_rate": 0.00018032212433960923, "loss": 0.8105, "step": 12932 }, { "epoch": 0.33208317485688976, "grad_norm": 0.71875, "learning_rate": 0.00018031946502027017, "loss": 0.8784, "step": 12933 }, { "epoch": 0.3321088520528116, "grad_norm": 0.80859375, "learning_rate": 0.0001803168055408617, "loss": 0.9564, "step": 12934 }, { "epoch": 0.3321345292487334, "grad_norm": 0.71875, "learning_rate": 0.00018031414590138906, "loss": 0.9156, "step": 12935 }, { "epoch": 0.3321602064446552, "grad_norm": 0.8515625, "learning_rate": 0.0001803114861018576, "loss": 0.965, "step": 12936 }, { "epoch": 0.33218588364057705, "grad_norm": 0.90625, "learning_rate": 0.0001803088261422726, "loss": 0.9416, "step": 12937 }, { "epoch": 0.33221156083649883, "grad_norm": 0.7578125, "learning_rate": 0.00018030616602263938, "loss": 0.9361, "step": 12938 }, { "epoch": 0.33223723803242067, "grad_norm": 0.765625, "learning_rate": 0.00018030350574296317, "loss": 0.8356, "step": 12939 }, { "epoch": 0.3322629152283425, "grad_norm": 0.80859375, "learning_rate": 0.0001803008453032494, "loss": 1.1058, "step": 12940 }, { "epoch": 0.3322885924242643, "grad_norm": 0.765625, "learning_rate": 0.00018029818470350324, "loss": 0.9061, "step": 12941 }, { "epoch": 0.3323142696201861, "grad_norm": 0.74609375, "learning_rate": 0.00018029552394373006, "loss": 0.9027, "step": 12942 }, { "epoch": 0.33233994681610796, "grad_norm": 0.765625, "learning_rate": 0.0001802928630239352, "loss": 0.8949, "step": 12943 }, { "epoch": 0.3323656240120298, "grad_norm": 0.75390625, "learning_rate": 0.00018029020194412388, "loss": 0.8574, "step": 12944 }, { "epoch": 0.3323913012079516, "grad_norm": 0.79296875, "learning_rate": 0.00018028754070430143, "loss": 0.919, "step": 12945 }, { "epoch": 0.3324169784038734, "grad_norm": 0.796875, "learning_rate": 0.00018028487930447318, "loss": 0.954, "step": 12946 }, { "epoch": 0.33244265559979524, "grad_norm": 0.78515625, "learning_rate": 0.00018028221774464442, "loss": 0.9, "step": 12947 }, { "epoch": 0.332468332795717, "grad_norm": 0.9453125, "learning_rate": 0.00018027955602482044, "loss": 0.9087, "step": 12948 }, { "epoch": 0.33249400999163886, "grad_norm": 0.734375, "learning_rate": 0.00018027689414500656, "loss": 0.8617, "step": 12949 }, { "epoch": 0.3325196871875607, "grad_norm": 0.8125, "learning_rate": 0.0001802742321052081, "loss": 1.01, "step": 12950 }, { "epoch": 0.3325453643834825, "grad_norm": 0.75390625, "learning_rate": 0.00018027156990543032, "loss": 1.0844, "step": 12951 }, { "epoch": 0.3325710415794043, "grad_norm": 0.7578125, "learning_rate": 0.00018026890754567858, "loss": 0.8916, "step": 12952 }, { "epoch": 0.33259671877532615, "grad_norm": 0.8046875, "learning_rate": 0.00018026624502595815, "loss": 0.9013, "step": 12953 }, { "epoch": 0.332622395971248, "grad_norm": 0.76953125, "learning_rate": 0.00018026358234627432, "loss": 0.806, "step": 12954 }, { "epoch": 0.33264807316716977, "grad_norm": 0.75390625, "learning_rate": 0.00018026091950663244, "loss": 0.9285, "step": 12955 }, { "epoch": 0.3326737503630916, "grad_norm": 0.734375, "learning_rate": 0.00018025825650703779, "loss": 0.8945, "step": 12956 }, { "epoch": 0.33269942755901344, "grad_norm": 0.84765625, "learning_rate": 0.0001802555933474957, "loss": 1.1471, "step": 12957 }, { "epoch": 0.3327251047549352, "grad_norm": 0.81640625, "learning_rate": 0.00018025293002801144, "loss": 0.9435, "step": 12958 }, { "epoch": 0.33275078195085706, "grad_norm": 0.80078125, "learning_rate": 0.00018025026654859035, "loss": 0.9159, "step": 12959 }, { "epoch": 0.3327764591467789, "grad_norm": 0.79296875, "learning_rate": 0.00018024760290923775, "loss": 0.8538, "step": 12960 }, { "epoch": 0.33280213634270067, "grad_norm": 0.8203125, "learning_rate": 0.0001802449391099589, "loss": 0.9862, "step": 12961 }, { "epoch": 0.3328278135386225, "grad_norm": 0.77734375, "learning_rate": 0.00018024227515075914, "loss": 0.906, "step": 12962 }, { "epoch": 0.33285349073454434, "grad_norm": 0.859375, "learning_rate": 0.00018023961103164377, "loss": 1.0987, "step": 12963 }, { "epoch": 0.3328791679304662, "grad_norm": 0.78515625, "learning_rate": 0.0001802369467526181, "loss": 0.908, "step": 12964 }, { "epoch": 0.33290484512638796, "grad_norm": 0.80078125, "learning_rate": 0.00018023428231368744, "loss": 0.9407, "step": 12965 }, { "epoch": 0.3329305223223098, "grad_norm": 0.78125, "learning_rate": 0.00018023161771485712, "loss": 0.9874, "step": 12966 }, { "epoch": 0.33295619951823163, "grad_norm": 0.76953125, "learning_rate": 0.00018022895295613243, "loss": 0.9808, "step": 12967 }, { "epoch": 0.3329818767141534, "grad_norm": 1.078125, "learning_rate": 0.0001802262880375187, "loss": 0.9393, "step": 12968 }, { "epoch": 0.33300755391007525, "grad_norm": 0.71484375, "learning_rate": 0.00018022362295902115, "loss": 1.0871, "step": 12969 }, { "epoch": 0.3330332311059971, "grad_norm": 0.796875, "learning_rate": 0.00018022095772064525, "loss": 0.8488, "step": 12970 }, { "epoch": 0.33305890830191887, "grad_norm": 0.78515625, "learning_rate": 0.0001802182923223962, "loss": 0.9408, "step": 12971 }, { "epoch": 0.3330845854978407, "grad_norm": 0.82421875, "learning_rate": 0.00018021562676427932, "loss": 1.1715, "step": 12972 }, { "epoch": 0.33311026269376254, "grad_norm": 0.7890625, "learning_rate": 0.00018021296104629996, "loss": 0.9488, "step": 12973 }, { "epoch": 0.3331359398896844, "grad_norm": 0.7578125, "learning_rate": 0.0001802102951684634, "loss": 0.9008, "step": 12974 }, { "epoch": 0.33316161708560615, "grad_norm": 0.8125, "learning_rate": 0.000180207629130775, "loss": 0.8846, "step": 12975 }, { "epoch": 0.333187294281528, "grad_norm": 0.8125, "learning_rate": 0.00018020496293324003, "loss": 1.1269, "step": 12976 }, { "epoch": 0.3332129714774498, "grad_norm": 0.80078125, "learning_rate": 0.0001802022965758638, "loss": 1.0546, "step": 12977 }, { "epoch": 0.3332386486733716, "grad_norm": 1.1796875, "learning_rate": 0.00018019963005865163, "loss": 0.9383, "step": 12978 }, { "epoch": 0.33326432586929344, "grad_norm": 0.8125, "learning_rate": 0.00018019696338160884, "loss": 0.9789, "step": 12979 }, { "epoch": 0.3332900030652153, "grad_norm": 0.85546875, "learning_rate": 0.00018019429654474078, "loss": 0.9578, "step": 12980 }, { "epoch": 0.33331568026113706, "grad_norm": 0.80078125, "learning_rate": 0.0001801916295480527, "loss": 1.0835, "step": 12981 }, { "epoch": 0.3333413574570589, "grad_norm": 0.75390625, "learning_rate": 0.00018018896239154993, "loss": 0.9389, "step": 12982 }, { "epoch": 0.33336703465298073, "grad_norm": 0.78125, "learning_rate": 0.00018018629507523786, "loss": 0.9366, "step": 12983 }, { "epoch": 0.33339271184890257, "grad_norm": 0.72265625, "learning_rate": 0.00018018362759912173, "loss": 0.8397, "step": 12984 }, { "epoch": 0.33341838904482435, "grad_norm": 0.72265625, "learning_rate": 0.00018018095996320685, "loss": 0.9932, "step": 12985 }, { "epoch": 0.3334440662407462, "grad_norm": 0.73046875, "learning_rate": 0.00018017829216749855, "loss": 1.074, "step": 12986 }, { "epoch": 0.333469743436668, "grad_norm": 0.8359375, "learning_rate": 0.00018017562421200218, "loss": 0.969, "step": 12987 }, { "epoch": 0.3334954206325898, "grad_norm": 0.79296875, "learning_rate": 0.00018017295609672304, "loss": 1.1326, "step": 12988 }, { "epoch": 0.33352109782851164, "grad_norm": 0.78515625, "learning_rate": 0.0001801702878216664, "loss": 1.0891, "step": 12989 }, { "epoch": 0.3335467750244335, "grad_norm": 0.7578125, "learning_rate": 0.00018016761938683765, "loss": 1.0114, "step": 12990 }, { "epoch": 0.33357245222035525, "grad_norm": 0.87890625, "learning_rate": 0.0001801649507922421, "loss": 0.8692, "step": 12991 }, { "epoch": 0.3335981294162771, "grad_norm": 0.84375, "learning_rate": 0.00018016228203788498, "loss": 0.9824, "step": 12992 }, { "epoch": 0.3336238066121989, "grad_norm": 0.76953125, "learning_rate": 0.00018015961312377172, "loss": 0.9267, "step": 12993 }, { "epoch": 0.33364948380812076, "grad_norm": 0.81640625, "learning_rate": 0.00018015694404990758, "loss": 0.986, "step": 12994 }, { "epoch": 0.33367516100404254, "grad_norm": 0.82421875, "learning_rate": 0.0001801542748162979, "loss": 0.8399, "step": 12995 }, { "epoch": 0.3337008381999644, "grad_norm": 0.75390625, "learning_rate": 0.000180151605422948, "loss": 0.865, "step": 12996 }, { "epoch": 0.3337265153958862, "grad_norm": 0.85546875, "learning_rate": 0.00018014893586986314, "loss": 1.1116, "step": 12997 }, { "epoch": 0.333752192591808, "grad_norm": 0.83203125, "learning_rate": 0.00018014626615704872, "loss": 0.8933, "step": 12998 }, { "epoch": 0.33377786978772983, "grad_norm": 0.78515625, "learning_rate": 0.00018014359628451006, "loss": 0.8962, "step": 12999 }, { "epoch": 0.33380354698365167, "grad_norm": 0.84765625, "learning_rate": 0.00018014092625225244, "loss": 0.9574, "step": 13000 }, { "epoch": 0.33380354698365167, "eval_loss": 0.9538285136222839, "eval_model_preparation_time": 0.0065, "eval_runtime": 406.1525, "eval_samples_per_second": 24.621, "eval_steps_per_second": 0.771, "step": 13000 }, { "epoch": 0.33382922417957345, "grad_norm": 0.80078125, "learning_rate": 0.00018013825606028118, "loss": 1.0813, "step": 13001 }, { "epoch": 0.3338549013754953, "grad_norm": 0.77734375, "learning_rate": 0.0001801355857086016, "loss": 0.8755, "step": 13002 }, { "epoch": 0.3338805785714171, "grad_norm": 0.75, "learning_rate": 0.00018013291519721904, "loss": 0.9212, "step": 13003 }, { "epoch": 0.33390625576733896, "grad_norm": 0.87109375, "learning_rate": 0.00018013024452613884, "loss": 1.149, "step": 13004 }, { "epoch": 0.33393193296326074, "grad_norm": 0.6953125, "learning_rate": 0.00018012757369536628, "loss": 0.8671, "step": 13005 }, { "epoch": 0.33395761015918257, "grad_norm": 0.8203125, "learning_rate": 0.0001801249027049067, "loss": 0.9997, "step": 13006 }, { "epoch": 0.3339832873551044, "grad_norm": 0.74609375, "learning_rate": 0.00018012223155476544, "loss": 0.959, "step": 13007 }, { "epoch": 0.3340089645510262, "grad_norm": 0.7890625, "learning_rate": 0.00018011956024494783, "loss": 1.0507, "step": 13008 }, { "epoch": 0.334034641746948, "grad_norm": 0.9375, "learning_rate": 0.00018011688877545915, "loss": 0.9584, "step": 13009 }, { "epoch": 0.33406031894286986, "grad_norm": 0.77734375, "learning_rate": 0.00018011421714630475, "loss": 0.8992, "step": 13010 }, { "epoch": 0.33408599613879164, "grad_norm": 0.8125, "learning_rate": 0.00018011154535748993, "loss": 1.027, "step": 13011 }, { "epoch": 0.3341116733347135, "grad_norm": 0.78125, "learning_rate": 0.00018010887340902008, "loss": 0.8496, "step": 13012 }, { "epoch": 0.3341373505306353, "grad_norm": 0.7578125, "learning_rate": 0.00018010620130090045, "loss": 0.9195, "step": 13013 }, { "epoch": 0.3341630277265571, "grad_norm": 0.83984375, "learning_rate": 0.0001801035290331364, "loss": 0.9283, "step": 13014 }, { "epoch": 0.33418870492247893, "grad_norm": 0.74609375, "learning_rate": 0.00018010085660573325, "loss": 0.9047, "step": 13015 }, { "epoch": 0.33421438211840077, "grad_norm": 0.80859375, "learning_rate": 0.00018009818401869637, "loss": 0.9213, "step": 13016 }, { "epoch": 0.3342400593143226, "grad_norm": 0.68359375, "learning_rate": 0.00018009551127203102, "loss": 0.8357, "step": 13017 }, { "epoch": 0.3342657365102444, "grad_norm": 1.6171875, "learning_rate": 0.00018009283836574252, "loss": 1.1239, "step": 13018 }, { "epoch": 0.3342914137061662, "grad_norm": 0.81640625, "learning_rate": 0.00018009016529983626, "loss": 1.1022, "step": 13019 }, { "epoch": 0.33431709090208805, "grad_norm": 0.7109375, "learning_rate": 0.00018008749207431752, "loss": 0.8568, "step": 13020 }, { "epoch": 0.33434276809800983, "grad_norm": 0.8984375, "learning_rate": 0.00018008481868919164, "loss": 0.9794, "step": 13021 }, { "epoch": 0.33436844529393167, "grad_norm": 0.75, "learning_rate": 0.00018008214514446394, "loss": 0.9704, "step": 13022 }, { "epoch": 0.3343941224898535, "grad_norm": 0.80078125, "learning_rate": 0.00018007947144013978, "loss": 0.9471, "step": 13023 }, { "epoch": 0.3344197996857753, "grad_norm": 0.84375, "learning_rate": 0.00018007679757622448, "loss": 0.9733, "step": 13024 }, { "epoch": 0.3344454768816971, "grad_norm": 0.828125, "learning_rate": 0.00018007412355272335, "loss": 1.0404, "step": 13025 }, { "epoch": 0.33447115407761896, "grad_norm": 0.71484375, "learning_rate": 0.00018007144936964172, "loss": 0.85, "step": 13026 }, { "epoch": 0.3344968312735408, "grad_norm": 0.859375, "learning_rate": 0.00018006877502698492, "loss": 1.1166, "step": 13027 }, { "epoch": 0.3345225084694626, "grad_norm": 0.7265625, "learning_rate": 0.00018006610052475827, "loss": 0.8706, "step": 13028 }, { "epoch": 0.3345481856653844, "grad_norm": 0.78515625, "learning_rate": 0.00018006342586296715, "loss": 0.9175, "step": 13029 }, { "epoch": 0.33457386286130625, "grad_norm": 0.7578125, "learning_rate": 0.00018006075104161684, "loss": 0.9589, "step": 13030 }, { "epoch": 0.33459954005722803, "grad_norm": 0.89453125, "learning_rate": 0.00018005807606071266, "loss": 1.1267, "step": 13031 }, { "epoch": 0.33462521725314986, "grad_norm": 0.80859375, "learning_rate": 0.00018005540092025997, "loss": 0.9801, "step": 13032 }, { "epoch": 0.3346508944490717, "grad_norm": 0.7890625, "learning_rate": 0.00018005272562026411, "loss": 0.9639, "step": 13033 }, { "epoch": 0.3346765716449935, "grad_norm": 0.74609375, "learning_rate": 0.00018005005016073043, "loss": 0.8939, "step": 13034 }, { "epoch": 0.3347022488409153, "grad_norm": 0.70703125, "learning_rate": 0.00018004737454166417, "loss": 0.9497, "step": 13035 }, { "epoch": 0.33472792603683715, "grad_norm": 0.6796875, "learning_rate": 0.00018004469876307076, "loss": 0.811, "step": 13036 }, { "epoch": 0.334753603232759, "grad_norm": 0.8203125, "learning_rate": 0.0001800420228249555, "loss": 0.9072, "step": 13037 }, { "epoch": 0.33477928042868077, "grad_norm": 0.7578125, "learning_rate": 0.0001800393467273237, "loss": 0.8263, "step": 13038 }, { "epoch": 0.3348049576246026, "grad_norm": 0.75, "learning_rate": 0.0001800366704701807, "loss": 0.8951, "step": 13039 }, { "epoch": 0.33483063482052444, "grad_norm": 0.8125, "learning_rate": 0.00018003399405353186, "loss": 0.9145, "step": 13040 }, { "epoch": 0.3348563120164462, "grad_norm": 0.79296875, "learning_rate": 0.00018003131747738253, "loss": 1.0257, "step": 13041 }, { "epoch": 0.33488198921236806, "grad_norm": 0.75390625, "learning_rate": 0.00018002864074173798, "loss": 0.8592, "step": 13042 }, { "epoch": 0.3349076664082899, "grad_norm": 0.7890625, "learning_rate": 0.0001800259638466036, "loss": 0.9558, "step": 13043 }, { "epoch": 0.3349333436042117, "grad_norm": 0.78515625, "learning_rate": 0.00018002328679198469, "loss": 0.9688, "step": 13044 }, { "epoch": 0.3349590208001335, "grad_norm": 0.70703125, "learning_rate": 0.0001800206095778866, "loss": 0.9085, "step": 13045 }, { "epoch": 0.33498469799605535, "grad_norm": 0.77734375, "learning_rate": 0.00018001793220431464, "loss": 0.9829, "step": 13046 }, { "epoch": 0.3350103751919772, "grad_norm": 0.7734375, "learning_rate": 0.0001800152546712742, "loss": 0.9779, "step": 13047 }, { "epoch": 0.33503605238789896, "grad_norm": 0.84375, "learning_rate": 0.00018001257697877057, "loss": 0.9425, "step": 13048 }, { "epoch": 0.3350617295838208, "grad_norm": 0.79296875, "learning_rate": 0.0001800098991268091, "loss": 0.8844, "step": 13049 }, { "epoch": 0.33508740677974264, "grad_norm": 0.76953125, "learning_rate": 0.00018000722111539513, "loss": 0.9482, "step": 13050 }, { "epoch": 0.3351130839756644, "grad_norm": 0.76171875, "learning_rate": 0.00018000454294453402, "loss": 1.0006, "step": 13051 }, { "epoch": 0.33513876117158625, "grad_norm": 0.78515625, "learning_rate": 0.00018000186461423104, "loss": 0.8924, "step": 13052 }, { "epoch": 0.3351644383675081, "grad_norm": 0.84765625, "learning_rate": 0.0001799991861244916, "loss": 0.8967, "step": 13053 }, { "epoch": 0.33519011556342987, "grad_norm": 0.89453125, "learning_rate": 0.00017999650747532102, "loss": 1.1166, "step": 13054 }, { "epoch": 0.3352157927593517, "grad_norm": 0.76171875, "learning_rate": 0.0001799938286667246, "loss": 0.8154, "step": 13055 }, { "epoch": 0.33524146995527354, "grad_norm": 0.78515625, "learning_rate": 0.00017999114969870773, "loss": 1.0094, "step": 13056 }, { "epoch": 0.3352671471511954, "grad_norm": 0.77734375, "learning_rate": 0.00017998847057127567, "loss": 1.0053, "step": 13057 }, { "epoch": 0.33529282434711716, "grad_norm": 0.734375, "learning_rate": 0.00017998579128443386, "loss": 0.8663, "step": 13058 }, { "epoch": 0.335318501543039, "grad_norm": 0.80078125, "learning_rate": 0.0001799831118381876, "loss": 0.9263, "step": 13059 }, { "epoch": 0.33534417873896083, "grad_norm": 0.8515625, "learning_rate": 0.0001799804322325422, "loss": 0.9149, "step": 13060 }, { "epoch": 0.3353698559348826, "grad_norm": 0.84375, "learning_rate": 0.00017997775246750305, "loss": 1.0704, "step": 13061 }, { "epoch": 0.33539553313080445, "grad_norm": 0.74609375, "learning_rate": 0.0001799750725430754, "loss": 0.8545, "step": 13062 }, { "epoch": 0.3354212103267263, "grad_norm": 0.7890625, "learning_rate": 0.00017997239245926472, "loss": 0.8911, "step": 13063 }, { "epoch": 0.33544688752264806, "grad_norm": 0.75390625, "learning_rate": 0.00017996971221607625, "loss": 0.8183, "step": 13064 }, { "epoch": 0.3354725647185699, "grad_norm": 0.859375, "learning_rate": 0.0001799670318135154, "loss": 0.9254, "step": 13065 }, { "epoch": 0.33549824191449173, "grad_norm": 0.69140625, "learning_rate": 0.00017996435125158743, "loss": 0.9008, "step": 13066 }, { "epoch": 0.33552391911041357, "grad_norm": 0.83203125, "learning_rate": 0.00017996167053029778, "loss": 1.0188, "step": 13067 }, { "epoch": 0.33554959630633535, "grad_norm": 0.765625, "learning_rate": 0.00017995898964965173, "loss": 1.0979, "step": 13068 }, { "epoch": 0.3355752735022572, "grad_norm": 0.8046875, "learning_rate": 0.00017995630860965462, "loss": 0.8623, "step": 13069 }, { "epoch": 0.335600950698179, "grad_norm": 0.73828125, "learning_rate": 0.00017995362741031183, "loss": 1.0317, "step": 13070 }, { "epoch": 0.3356266278941008, "grad_norm": 0.76953125, "learning_rate": 0.00017995094605162865, "loss": 0.8925, "step": 13071 }, { "epoch": 0.33565230509002264, "grad_norm": 0.73828125, "learning_rate": 0.0001799482645336105, "loss": 0.9949, "step": 13072 }, { "epoch": 0.3356779822859445, "grad_norm": 0.7734375, "learning_rate": 0.00017994558285626267, "loss": 0.8156, "step": 13073 }, { "epoch": 0.33570365948186626, "grad_norm": 0.80859375, "learning_rate": 0.00017994290101959047, "loss": 0.9942, "step": 13074 }, { "epoch": 0.3357293366777881, "grad_norm": 1.203125, "learning_rate": 0.00017994021902359935, "loss": 0.9612, "step": 13075 }, { "epoch": 0.33575501387370993, "grad_norm": 0.80078125, "learning_rate": 0.00017993753686829455, "loss": 1.0265, "step": 13076 }, { "epoch": 0.33578069106963176, "grad_norm": 0.8046875, "learning_rate": 0.00017993485455368148, "loss": 0.8836, "step": 13077 }, { "epoch": 0.33580636826555355, "grad_norm": 0.72265625, "learning_rate": 0.00017993217207976547, "loss": 0.845, "step": 13078 }, { "epoch": 0.3358320454614754, "grad_norm": 0.75390625, "learning_rate": 0.00017992948944655187, "loss": 1.0403, "step": 13079 }, { "epoch": 0.3358577226573972, "grad_norm": 0.79296875, "learning_rate": 0.000179926806654046, "loss": 0.9301, "step": 13080 }, { "epoch": 0.335883399853319, "grad_norm": 0.91015625, "learning_rate": 0.00017992412370225323, "loss": 0.951, "step": 13081 }, { "epoch": 0.33590907704924083, "grad_norm": 0.796875, "learning_rate": 0.00017992144059117887, "loss": 0.9425, "step": 13082 }, { "epoch": 0.33593475424516267, "grad_norm": 0.76171875, "learning_rate": 0.00017991875732082836, "loss": 0.894, "step": 13083 }, { "epoch": 0.33596043144108445, "grad_norm": 0.81640625, "learning_rate": 0.00017991607389120692, "loss": 0.7868, "step": 13084 }, { "epoch": 0.3359861086370063, "grad_norm": 0.828125, "learning_rate": 0.00017991339030232003, "loss": 1.1477, "step": 13085 }, { "epoch": 0.3360117858329281, "grad_norm": 0.8359375, "learning_rate": 0.00017991070655417292, "loss": 1.0056, "step": 13086 }, { "epoch": 0.33603746302884996, "grad_norm": 0.85546875, "learning_rate": 0.00017990802264677101, "loss": 0.9566, "step": 13087 }, { "epoch": 0.33606314022477174, "grad_norm": 0.76171875, "learning_rate": 0.00017990533858011962, "loss": 0.9636, "step": 13088 }, { "epoch": 0.3360888174206936, "grad_norm": 0.70703125, "learning_rate": 0.0001799026543542241, "loss": 0.7976, "step": 13089 }, { "epoch": 0.3361144946166154, "grad_norm": 0.80859375, "learning_rate": 0.00017989996996908982, "loss": 1.0743, "step": 13090 }, { "epoch": 0.3361401718125372, "grad_norm": 0.82421875, "learning_rate": 0.0001798972854247221, "loss": 0.8489, "step": 13091 }, { "epoch": 0.33616584900845903, "grad_norm": 0.75, "learning_rate": 0.00017989460072112635, "loss": 0.8433, "step": 13092 }, { "epoch": 0.33619152620438086, "grad_norm": 0.76953125, "learning_rate": 0.00017989191585830784, "loss": 0.9993, "step": 13093 }, { "epoch": 0.33621720340030264, "grad_norm": 0.8203125, "learning_rate": 0.00017988923083627197, "loss": 1.0333, "step": 13094 }, { "epoch": 0.3362428805962245, "grad_norm": 0.8046875, "learning_rate": 0.0001798865456550241, "loss": 0.9216, "step": 13095 }, { "epoch": 0.3362685577921463, "grad_norm": 0.8125, "learning_rate": 0.00017988386031456951, "loss": 0.8747, "step": 13096 }, { "epoch": 0.33629423498806815, "grad_norm": 0.78125, "learning_rate": 0.00017988117481491366, "loss": 0.9312, "step": 13097 }, { "epoch": 0.33631991218398993, "grad_norm": 0.8515625, "learning_rate": 0.00017987848915606184, "loss": 0.932, "step": 13098 }, { "epoch": 0.33634558937991177, "grad_norm": 0.8359375, "learning_rate": 0.00017987580333801936, "loss": 1.0668, "step": 13099 }, { "epoch": 0.3363712665758336, "grad_norm": 0.875, "learning_rate": 0.00017987311736079165, "loss": 1.0207, "step": 13100 }, { "epoch": 0.3363969437717554, "grad_norm": 0.7890625, "learning_rate": 0.00017987043122438402, "loss": 0.9547, "step": 13101 }, { "epoch": 0.3364226209676772, "grad_norm": 0.765625, "learning_rate": 0.00017986774492880186, "loss": 0.9956, "step": 13102 }, { "epoch": 0.33644829816359906, "grad_norm": 0.6796875, "learning_rate": 0.00017986505847405047, "loss": 0.8327, "step": 13103 }, { "epoch": 0.33647397535952084, "grad_norm": 0.84765625, "learning_rate": 0.00017986237186013525, "loss": 0.8606, "step": 13104 }, { "epoch": 0.3364996525554427, "grad_norm": 0.7578125, "learning_rate": 0.00017985968508706153, "loss": 0.9253, "step": 13105 }, { "epoch": 0.3365253297513645, "grad_norm": 0.734375, "learning_rate": 0.00017985699815483466, "loss": 0.9189, "step": 13106 }, { "epoch": 0.33655100694728635, "grad_norm": 0.9140625, "learning_rate": 0.00017985431106346, "loss": 1.0127, "step": 13107 }, { "epoch": 0.3365766841432081, "grad_norm": 0.6953125, "learning_rate": 0.00017985162381294295, "loss": 0.9525, "step": 13108 }, { "epoch": 0.33660236133912996, "grad_norm": 0.7890625, "learning_rate": 0.0001798489364032888, "loss": 0.8867, "step": 13109 }, { "epoch": 0.3366280385350518, "grad_norm": 0.859375, "learning_rate": 0.00017984624883450291, "loss": 0.9354, "step": 13110 }, { "epoch": 0.3366537157309736, "grad_norm": 0.828125, "learning_rate": 0.0001798435611065907, "loss": 0.9391, "step": 13111 }, { "epoch": 0.3366793929268954, "grad_norm": 0.765625, "learning_rate": 0.00017984087321955747, "loss": 0.8229, "step": 13112 }, { "epoch": 0.33670507012281725, "grad_norm": 0.84765625, "learning_rate": 0.0001798381851734086, "loss": 0.966, "step": 13113 }, { "epoch": 0.33673074731873903, "grad_norm": 0.8359375, "learning_rate": 0.0001798354969681494, "loss": 0.9034, "step": 13114 }, { "epoch": 0.33675642451466087, "grad_norm": 0.796875, "learning_rate": 0.0001798328086037853, "loss": 0.8913, "step": 13115 }, { "epoch": 0.3367821017105827, "grad_norm": 0.8359375, "learning_rate": 0.00017983012008032163, "loss": 1.0984, "step": 13116 }, { "epoch": 0.33680777890650454, "grad_norm": 0.77734375, "learning_rate": 0.00017982743139776376, "loss": 0.8951, "step": 13117 }, { "epoch": 0.3368334561024263, "grad_norm": 0.88671875, "learning_rate": 0.000179824742556117, "loss": 0.9311, "step": 13118 }, { "epoch": 0.33685913329834816, "grad_norm": 0.7578125, "learning_rate": 0.00017982205355538672, "loss": 1.1529, "step": 13119 }, { "epoch": 0.33688481049427, "grad_norm": 0.80859375, "learning_rate": 0.0001798193643955783, "loss": 1.0368, "step": 13120 }, { "epoch": 0.3369104876901918, "grad_norm": 0.796875, "learning_rate": 0.00017981667507669714, "loss": 0.8744, "step": 13121 }, { "epoch": 0.3369361648861136, "grad_norm": 0.80859375, "learning_rate": 0.00017981398559874852, "loss": 0.9686, "step": 13122 }, { "epoch": 0.33696184208203545, "grad_norm": 0.77734375, "learning_rate": 0.00017981129596173788, "loss": 0.9487, "step": 13123 }, { "epoch": 0.3369875192779572, "grad_norm": 0.8125, "learning_rate": 0.00017980860616567047, "loss": 1.0068, "step": 13124 }, { "epoch": 0.33701319647387906, "grad_norm": 0.796875, "learning_rate": 0.0001798059162105518, "loss": 1.0215, "step": 13125 }, { "epoch": 0.3370388736698009, "grad_norm": 0.75390625, "learning_rate": 0.00017980322609638705, "loss": 0.8463, "step": 13126 }, { "epoch": 0.33706455086572273, "grad_norm": 0.8984375, "learning_rate": 0.00017980053582318176, "loss": 1.0398, "step": 13127 }, { "epoch": 0.3370902280616445, "grad_norm": 0.76953125, "learning_rate": 0.00017979784539094116, "loss": 0.9117, "step": 13128 }, { "epoch": 0.33711590525756635, "grad_norm": 0.7890625, "learning_rate": 0.00017979515479967068, "loss": 0.9459, "step": 13129 }, { "epoch": 0.3371415824534882, "grad_norm": 0.796875, "learning_rate": 0.00017979246404937566, "loss": 0.9583, "step": 13130 }, { "epoch": 0.33716725964940997, "grad_norm": 0.74609375, "learning_rate": 0.00017978977314006146, "loss": 1.0161, "step": 13131 }, { "epoch": 0.3371929368453318, "grad_norm": 0.76953125, "learning_rate": 0.00017978708207173346, "loss": 0.8056, "step": 13132 }, { "epoch": 0.33721861404125364, "grad_norm": 0.73828125, "learning_rate": 0.00017978439084439702, "loss": 0.9155, "step": 13133 }, { "epoch": 0.3372442912371754, "grad_norm": 0.74609375, "learning_rate": 0.0001797816994580575, "loss": 1.0411, "step": 13134 }, { "epoch": 0.33726996843309726, "grad_norm": 0.796875, "learning_rate": 0.00017977900791272026, "loss": 0.9163, "step": 13135 }, { "epoch": 0.3372956456290191, "grad_norm": 0.75, "learning_rate": 0.00017977631620839064, "loss": 0.8873, "step": 13136 }, { "epoch": 0.33732132282494093, "grad_norm": 0.80859375, "learning_rate": 0.00017977362434507406, "loss": 0.893, "step": 13137 }, { "epoch": 0.3373470000208627, "grad_norm": 0.7890625, "learning_rate": 0.0001797709323227758, "loss": 0.9229, "step": 13138 }, { "epoch": 0.33737267721678454, "grad_norm": 0.76953125, "learning_rate": 0.00017976824014150132, "loss": 1.0475, "step": 13139 }, { "epoch": 0.3373983544127064, "grad_norm": 0.8203125, "learning_rate": 0.00017976554780125593, "loss": 0.9944, "step": 13140 }, { "epoch": 0.33742403160862816, "grad_norm": 0.69921875, "learning_rate": 0.00017976285530204504, "loss": 0.9742, "step": 13141 }, { "epoch": 0.33744970880455, "grad_norm": 0.76953125, "learning_rate": 0.00017976016264387395, "loss": 0.8894, "step": 13142 }, { "epoch": 0.33747538600047183, "grad_norm": 0.78125, "learning_rate": 0.00017975746982674807, "loss": 1.0392, "step": 13143 }, { "epoch": 0.3375010631963936, "grad_norm": 0.765625, "learning_rate": 0.00017975477685067275, "loss": 0.9219, "step": 13144 }, { "epoch": 0.33752674039231545, "grad_norm": 0.79296875, "learning_rate": 0.00017975208371565334, "loss": 0.9521, "step": 13145 }, { "epoch": 0.3375524175882373, "grad_norm": 1.625, "learning_rate": 0.00017974939042169526, "loss": 0.8935, "step": 13146 }, { "epoch": 0.3375780947841591, "grad_norm": 0.70703125, "learning_rate": 0.00017974669696880387, "loss": 0.8534, "step": 13147 }, { "epoch": 0.3376037719800809, "grad_norm": 0.79296875, "learning_rate": 0.0001797440033569845, "loss": 0.9448, "step": 13148 }, { "epoch": 0.33762944917600274, "grad_norm": 0.8125, "learning_rate": 0.00017974130958624255, "loss": 0.9594, "step": 13149 }, { "epoch": 0.3376551263719246, "grad_norm": 0.80078125, "learning_rate": 0.0001797386156565833, "loss": 0.8744, "step": 13150 }, { "epoch": 0.33768080356784635, "grad_norm": 0.80078125, "learning_rate": 0.00017973592156801226, "loss": 1.0331, "step": 13151 }, { "epoch": 0.3377064807637682, "grad_norm": 0.80859375, "learning_rate": 0.00017973322732053472, "loss": 0.9799, "step": 13152 }, { "epoch": 0.33773215795969, "grad_norm": 0.7734375, "learning_rate": 0.00017973053291415604, "loss": 1.0022, "step": 13153 }, { "epoch": 0.3377578351556118, "grad_norm": 0.73828125, "learning_rate": 0.00017972783834888166, "loss": 0.85, "step": 13154 }, { "epoch": 0.33778351235153364, "grad_norm": 0.83203125, "learning_rate": 0.00017972514362471685, "loss": 1.0297, "step": 13155 }, { "epoch": 0.3378091895474555, "grad_norm": 3.15625, "learning_rate": 0.00017972244874166703, "loss": 0.8752, "step": 13156 }, { "epoch": 0.3378348667433773, "grad_norm": 0.8828125, "learning_rate": 0.0001797197536997376, "loss": 0.9812, "step": 13157 }, { "epoch": 0.3378605439392991, "grad_norm": 0.81640625, "learning_rate": 0.0001797170584989339, "loss": 1.0479, "step": 13158 }, { "epoch": 0.33788622113522093, "grad_norm": 0.74609375, "learning_rate": 0.00017971436313926126, "loss": 0.921, "step": 13159 }, { "epoch": 0.33791189833114277, "grad_norm": 0.68359375, "learning_rate": 0.00017971166762072516, "loss": 0.9033, "step": 13160 }, { "epoch": 0.33793757552706455, "grad_norm": 0.7109375, "learning_rate": 0.00017970897194333088, "loss": 0.9592, "step": 13161 }, { "epoch": 0.3379632527229864, "grad_norm": 0.83984375, "learning_rate": 0.0001797062761070838, "loss": 1.0577, "step": 13162 }, { "epoch": 0.3379889299189082, "grad_norm": 0.8125, "learning_rate": 0.0001797035801119893, "loss": 0.9412, "step": 13163 }, { "epoch": 0.33801460711483, "grad_norm": 0.8203125, "learning_rate": 0.0001797008839580528, "loss": 1.0156, "step": 13164 }, { "epoch": 0.33804028431075184, "grad_norm": 0.8203125, "learning_rate": 0.00017969818764527963, "loss": 0.9289, "step": 13165 }, { "epoch": 0.3380659615066737, "grad_norm": 0.7734375, "learning_rate": 0.00017969549117367517, "loss": 0.9157, "step": 13166 }, { "epoch": 0.3380916387025955, "grad_norm": 0.765625, "learning_rate": 0.0001796927945432448, "loss": 0.8917, "step": 13167 }, { "epoch": 0.3381173158985173, "grad_norm": 0.78515625, "learning_rate": 0.00017969009775399387, "loss": 0.9646, "step": 13168 }, { "epoch": 0.3381429930944391, "grad_norm": 0.74609375, "learning_rate": 0.0001796874008059278, "loss": 0.9263, "step": 13169 }, { "epoch": 0.33816867029036096, "grad_norm": 0.75390625, "learning_rate": 0.0001796847036990519, "loss": 0.983, "step": 13170 }, { "epoch": 0.33819434748628274, "grad_norm": 0.8046875, "learning_rate": 0.0001796820064333716, "loss": 0.9482, "step": 13171 }, { "epoch": 0.3382200246822046, "grad_norm": 0.8203125, "learning_rate": 0.00017967930900889228, "loss": 1.0246, "step": 13172 }, { "epoch": 0.3382457018781264, "grad_norm": 0.71484375, "learning_rate": 0.00017967661142561926, "loss": 0.8502, "step": 13173 }, { "epoch": 0.3382713790740482, "grad_norm": 0.8125, "learning_rate": 0.000179673913683558, "loss": 0.9699, "step": 13174 }, { "epoch": 0.33829705626997003, "grad_norm": 0.74609375, "learning_rate": 0.00017967121578271378, "loss": 0.8457, "step": 13175 }, { "epoch": 0.33832273346589187, "grad_norm": 0.77734375, "learning_rate": 0.00017966851772309203, "loss": 0.8242, "step": 13176 }, { "epoch": 0.3383484106618137, "grad_norm": 0.90234375, "learning_rate": 0.00017966581950469815, "loss": 0.9658, "step": 13177 }, { "epoch": 0.3383740878577355, "grad_norm": 0.703125, "learning_rate": 0.00017966312112753746, "loss": 0.9254, "step": 13178 }, { "epoch": 0.3383997650536573, "grad_norm": 0.73828125, "learning_rate": 0.00017966042259161537, "loss": 0.9542, "step": 13179 }, { "epoch": 0.33842544224957916, "grad_norm": 0.796875, "learning_rate": 0.00017965772389693724, "loss": 1.0231, "step": 13180 }, { "epoch": 0.33845111944550094, "grad_norm": 0.796875, "learning_rate": 0.0001796550250435085, "loss": 0.9634, "step": 13181 }, { "epoch": 0.3384767966414228, "grad_norm": 0.95703125, "learning_rate": 0.00017965232603133448, "loss": 0.8783, "step": 13182 }, { "epoch": 0.3385024738373446, "grad_norm": 0.75, "learning_rate": 0.00017964962686042055, "loss": 0.9072, "step": 13183 }, { "epoch": 0.3385281510332664, "grad_norm": 0.78125, "learning_rate": 0.00017964692753077213, "loss": 0.9541, "step": 13184 }, { "epoch": 0.3385538282291882, "grad_norm": 0.73046875, "learning_rate": 0.00017964422804239455, "loss": 0.9982, "step": 13185 }, { "epoch": 0.33857950542511006, "grad_norm": 0.84765625, "learning_rate": 0.00017964152839529326, "loss": 1.0547, "step": 13186 }, { "epoch": 0.3386051826210319, "grad_norm": 0.81640625, "learning_rate": 0.00017963882858947354, "loss": 0.9893, "step": 13187 }, { "epoch": 0.3386308598169537, "grad_norm": 0.75, "learning_rate": 0.00017963612862494088, "loss": 0.8841, "step": 13188 }, { "epoch": 0.3386565370128755, "grad_norm": 0.7109375, "learning_rate": 0.00017963342850170057, "loss": 0.9327, "step": 13189 }, { "epoch": 0.33868221420879735, "grad_norm": 0.7578125, "learning_rate": 0.00017963072821975805, "loss": 0.9299, "step": 13190 }, { "epoch": 0.33870789140471913, "grad_norm": 0.7890625, "learning_rate": 0.0001796280277791187, "loss": 0.8097, "step": 13191 }, { "epoch": 0.33873356860064097, "grad_norm": 0.78515625, "learning_rate": 0.00017962532717978784, "loss": 0.9292, "step": 13192 }, { "epoch": 0.3387592457965628, "grad_norm": 0.73046875, "learning_rate": 0.00017962262642177092, "loss": 0.9756, "step": 13193 }, { "epoch": 0.3387849229924846, "grad_norm": 0.83203125, "learning_rate": 0.00017961992550507333, "loss": 0.9156, "step": 13194 }, { "epoch": 0.3388106001884064, "grad_norm": 0.78125, "learning_rate": 0.00017961722442970035, "loss": 0.8912, "step": 13195 }, { "epoch": 0.33883627738432825, "grad_norm": 0.828125, "learning_rate": 0.00017961452319565748, "loss": 0.9592, "step": 13196 }, { "epoch": 0.3388619545802501, "grad_norm": 0.7265625, "learning_rate": 0.00017961182180295006, "loss": 0.9837, "step": 13197 }, { "epoch": 0.33888763177617187, "grad_norm": 0.83984375, "learning_rate": 0.00017960912025158344, "loss": 0.8598, "step": 13198 }, { "epoch": 0.3389133089720937, "grad_norm": 0.73046875, "learning_rate": 0.00017960641854156305, "loss": 0.9931, "step": 13199 }, { "epoch": 0.33893898616801554, "grad_norm": 0.76953125, "learning_rate": 0.00017960371667289428, "loss": 0.9301, "step": 13200 }, { "epoch": 0.3389646633639373, "grad_norm": 0.78515625, "learning_rate": 0.00017960101464558248, "loss": 0.9625, "step": 13201 }, { "epoch": 0.33899034055985916, "grad_norm": 0.84375, "learning_rate": 0.00017959831245963303, "loss": 0.974, "step": 13202 }, { "epoch": 0.339016017755781, "grad_norm": 0.83984375, "learning_rate": 0.00017959561011505134, "loss": 0.9608, "step": 13203 }, { "epoch": 0.3390416949517028, "grad_norm": 0.76953125, "learning_rate": 0.0001795929076118428, "loss": 1.0457, "step": 13204 }, { "epoch": 0.3390673721476246, "grad_norm": 0.8203125, "learning_rate": 0.0001795902049500128, "loss": 0.9725, "step": 13205 }, { "epoch": 0.33909304934354645, "grad_norm": 0.69921875, "learning_rate": 0.00017958750212956667, "loss": 0.7369, "step": 13206 }, { "epoch": 0.3391187265394683, "grad_norm": 0.87890625, "learning_rate": 0.00017958479915050987, "loss": 0.9912, "step": 13207 }, { "epoch": 0.33914440373539007, "grad_norm": 0.76171875, "learning_rate": 0.00017958209601284773, "loss": 0.9455, "step": 13208 }, { "epoch": 0.3391700809313119, "grad_norm": 0.796875, "learning_rate": 0.00017957939271658567, "loss": 0.8513, "step": 13209 }, { "epoch": 0.33919575812723374, "grad_norm": 0.79296875, "learning_rate": 0.00017957668926172908, "loss": 1.0624, "step": 13210 }, { "epoch": 0.3392214353231555, "grad_norm": 0.79296875, "learning_rate": 0.00017957398564828333, "loss": 1.0647, "step": 13211 }, { "epoch": 0.33924711251907735, "grad_norm": 0.82421875, "learning_rate": 0.00017957128187625382, "loss": 0.838, "step": 13212 }, { "epoch": 0.3392727897149992, "grad_norm": 0.80859375, "learning_rate": 0.00017956857794564592, "loss": 1.1776, "step": 13213 }, { "epoch": 0.33929846691092097, "grad_norm": 0.7109375, "learning_rate": 0.00017956587385646505, "loss": 0.8762, "step": 13214 }, { "epoch": 0.3393241441068428, "grad_norm": 0.8671875, "learning_rate": 0.00017956316960871655, "loss": 0.971, "step": 13215 }, { "epoch": 0.33934982130276464, "grad_norm": 0.796875, "learning_rate": 0.00017956046520240586, "loss": 0.9927, "step": 13216 }, { "epoch": 0.3393754984986865, "grad_norm": 0.74609375, "learning_rate": 0.00017955776063753836, "loss": 0.9442, "step": 13217 }, { "epoch": 0.33940117569460826, "grad_norm": 0.78515625, "learning_rate": 0.00017955505591411942, "loss": 0.8055, "step": 13218 }, { "epoch": 0.3394268528905301, "grad_norm": 0.76171875, "learning_rate": 0.0001795523510321544, "loss": 0.9873, "step": 13219 }, { "epoch": 0.33945253008645193, "grad_norm": 0.8828125, "learning_rate": 0.00017954964599164878, "loss": 1.0275, "step": 13220 }, { "epoch": 0.3394782072823737, "grad_norm": 0.765625, "learning_rate": 0.0001795469407926079, "loss": 0.9109, "step": 13221 }, { "epoch": 0.33950388447829555, "grad_norm": 0.82421875, "learning_rate": 0.00017954423543503712, "loss": 0.9841, "step": 13222 }, { "epoch": 0.3395295616742174, "grad_norm": 0.8125, "learning_rate": 0.00017954152991894192, "loss": 0.9619, "step": 13223 }, { "epoch": 0.33955523887013916, "grad_norm": 0.73828125, "learning_rate": 0.00017953882424432758, "loss": 0.9006, "step": 13224 }, { "epoch": 0.339580916066061, "grad_norm": 0.83984375, "learning_rate": 0.00017953611841119957, "loss": 0.9586, "step": 13225 }, { "epoch": 0.33960659326198284, "grad_norm": 0.7734375, "learning_rate": 0.00017953341241956324, "loss": 0.8588, "step": 13226 }, { "epoch": 0.3396322704579046, "grad_norm": 0.78515625, "learning_rate": 0.00017953070626942403, "loss": 1.0291, "step": 13227 }, { "epoch": 0.33965794765382645, "grad_norm": 0.82421875, "learning_rate": 0.00017952799996078731, "loss": 1.0466, "step": 13228 }, { "epoch": 0.3396836248497483, "grad_norm": 0.87109375, "learning_rate": 0.00017952529349365848, "loss": 0.9898, "step": 13229 }, { "epoch": 0.3397093020456701, "grad_norm": 0.80859375, "learning_rate": 0.0001795225868680429, "loss": 0.9742, "step": 13230 }, { "epoch": 0.3397349792415919, "grad_norm": 0.83984375, "learning_rate": 0.000179519880083946, "loss": 1.0218, "step": 13231 }, { "epoch": 0.33976065643751374, "grad_norm": 0.75390625, "learning_rate": 0.00017951717314137314, "loss": 0.9234, "step": 13232 }, { "epoch": 0.3397863336334356, "grad_norm": 0.79296875, "learning_rate": 0.00017951446604032973, "loss": 0.9297, "step": 13233 }, { "epoch": 0.33981201082935736, "grad_norm": 0.80078125, "learning_rate": 0.00017951175878082117, "loss": 0.8398, "step": 13234 }, { "epoch": 0.3398376880252792, "grad_norm": 0.76953125, "learning_rate": 0.0001795090513628529, "loss": 1.026, "step": 13235 }, { "epoch": 0.33986336522120103, "grad_norm": 0.78125, "learning_rate": 0.00017950634378643024, "loss": 0.8357, "step": 13236 }, { "epoch": 0.3398890424171228, "grad_norm": 0.65625, "learning_rate": 0.00017950363605155864, "loss": 0.7199, "step": 13237 }, { "epoch": 0.33991471961304465, "grad_norm": 0.87109375, "learning_rate": 0.00017950092815824346, "loss": 1.0179, "step": 13238 }, { "epoch": 0.3399403968089665, "grad_norm": 0.7734375, "learning_rate": 0.0001794982201064901, "loss": 0.9175, "step": 13239 }, { "epoch": 0.3399660740048883, "grad_norm": 0.703125, "learning_rate": 0.000179495511896304, "loss": 0.8889, "step": 13240 }, { "epoch": 0.3399917512008101, "grad_norm": 0.765625, "learning_rate": 0.00017949280352769052, "loss": 0.8379, "step": 13241 }, { "epoch": 0.34001742839673194, "grad_norm": 0.796875, "learning_rate": 0.00017949009500065502, "loss": 0.8921, "step": 13242 }, { "epoch": 0.34004310559265377, "grad_norm": 0.8359375, "learning_rate": 0.00017948738631520298, "loss": 1.1115, "step": 13243 }, { "epoch": 0.34006878278857555, "grad_norm": 0.75, "learning_rate": 0.00017948467747133976, "loss": 0.9174, "step": 13244 }, { "epoch": 0.3400944599844974, "grad_norm": 0.78515625, "learning_rate": 0.00017948196846907075, "loss": 0.856, "step": 13245 }, { "epoch": 0.3401201371804192, "grad_norm": 0.8125, "learning_rate": 0.00017947925930840138, "loss": 1.124, "step": 13246 }, { "epoch": 0.340145814376341, "grad_norm": 0.80078125, "learning_rate": 0.00017947654998933698, "loss": 0.8883, "step": 13247 }, { "epoch": 0.34017149157226284, "grad_norm": 0.7109375, "learning_rate": 0.000179473840511883, "loss": 0.8701, "step": 13248 }, { "epoch": 0.3401971687681847, "grad_norm": 0.75, "learning_rate": 0.00017947113087604487, "loss": 0.9367, "step": 13249 }, { "epoch": 0.3402228459641065, "grad_norm": 0.890625, "learning_rate": 0.00017946842108182792, "loss": 0.7404, "step": 13250 }, { "epoch": 0.3402485231600283, "grad_norm": 0.734375, "learning_rate": 0.00017946571112923763, "loss": 0.8195, "step": 13251 }, { "epoch": 0.34027420035595013, "grad_norm": 0.7890625, "learning_rate": 0.0001794630010182793, "loss": 0.8934, "step": 13252 }, { "epoch": 0.34029987755187197, "grad_norm": 0.78515625, "learning_rate": 0.00017946029074895843, "loss": 0.9619, "step": 13253 }, { "epoch": 0.34032555474779375, "grad_norm": 0.78515625, "learning_rate": 0.00017945758032128034, "loss": 0.9607, "step": 13254 }, { "epoch": 0.3403512319437156, "grad_norm": 0.81640625, "learning_rate": 0.0001794548697352505, "loss": 1.0778, "step": 13255 }, { "epoch": 0.3403769091396374, "grad_norm": 0.7890625, "learning_rate": 0.0001794521589908743, "loss": 0.9531, "step": 13256 }, { "epoch": 0.3404025863355592, "grad_norm": 0.77734375, "learning_rate": 0.00017944944808815706, "loss": 0.8809, "step": 13257 }, { "epoch": 0.34042826353148103, "grad_norm": 0.74609375, "learning_rate": 0.0001794467370271043, "loss": 1.0034, "step": 13258 }, { "epoch": 0.34045394072740287, "grad_norm": 0.78125, "learning_rate": 0.00017944402580772134, "loss": 0.9195, "step": 13259 }, { "epoch": 0.3404796179233247, "grad_norm": 0.77734375, "learning_rate": 0.00017944131443001365, "loss": 0.8295, "step": 13260 }, { "epoch": 0.3405052951192465, "grad_norm": 0.80859375, "learning_rate": 0.00017943860289398655, "loss": 1.0332, "step": 13261 }, { "epoch": 0.3405309723151683, "grad_norm": 0.77734375, "learning_rate": 0.00017943589119964553, "loss": 0.915, "step": 13262 }, { "epoch": 0.34055664951109016, "grad_norm": 0.7265625, "learning_rate": 0.00017943317934699591, "loss": 0.9225, "step": 13263 }, { "epoch": 0.34058232670701194, "grad_norm": 0.79296875, "learning_rate": 0.0001794304673360432, "loss": 1.0852, "step": 13264 }, { "epoch": 0.3406080039029338, "grad_norm": 0.7734375, "learning_rate": 0.0001794277551667927, "loss": 0.9581, "step": 13265 }, { "epoch": 0.3406336810988556, "grad_norm": 0.76953125, "learning_rate": 0.00017942504283924987, "loss": 1.0134, "step": 13266 }, { "epoch": 0.3406593582947774, "grad_norm": 0.73828125, "learning_rate": 0.00017942233035342012, "loss": 0.8674, "step": 13267 }, { "epoch": 0.34068503549069923, "grad_norm": 0.81640625, "learning_rate": 0.0001794196177093088, "loss": 1.1068, "step": 13268 }, { "epoch": 0.34071071268662106, "grad_norm": 0.8515625, "learning_rate": 0.00017941690490692137, "loss": 0.969, "step": 13269 }, { "epoch": 0.3407363898825429, "grad_norm": 0.7890625, "learning_rate": 0.00017941419194626322, "loss": 0.9206, "step": 13270 }, { "epoch": 0.3407620670784647, "grad_norm": 0.78125, "learning_rate": 0.00017941147882733976, "loss": 0.8841, "step": 13271 }, { "epoch": 0.3407877442743865, "grad_norm": 0.8359375, "learning_rate": 0.00017940876555015643, "loss": 0.9464, "step": 13272 }, { "epoch": 0.34081342147030835, "grad_norm": 0.734375, "learning_rate": 0.00017940605211471858, "loss": 0.8265, "step": 13273 }, { "epoch": 0.34083909866623013, "grad_norm": 0.83203125, "learning_rate": 0.00017940333852103164, "loss": 0.9456, "step": 13274 }, { "epoch": 0.34086477586215197, "grad_norm": 0.84375, "learning_rate": 0.000179400624769101, "loss": 1.0381, "step": 13275 }, { "epoch": 0.3408904530580738, "grad_norm": 0.8203125, "learning_rate": 0.0001793979108589321, "loss": 1.0175, "step": 13276 }, { "epoch": 0.3409161302539956, "grad_norm": 0.78515625, "learning_rate": 0.00017939519679053035, "loss": 0.8963, "step": 13277 }, { "epoch": 0.3409418074499174, "grad_norm": 0.7890625, "learning_rate": 0.00017939248256390114, "loss": 0.9466, "step": 13278 }, { "epoch": 0.34096748464583926, "grad_norm": 0.75390625, "learning_rate": 0.00017938976817904987, "loss": 0.9859, "step": 13279 }, { "epoch": 0.3409931618417611, "grad_norm": 0.8125, "learning_rate": 0.00017938705363598198, "loss": 0.9058, "step": 13280 }, { "epoch": 0.3410188390376829, "grad_norm": 0.734375, "learning_rate": 0.00017938433893470283, "loss": 0.7748, "step": 13281 }, { "epoch": 0.3410445162336047, "grad_norm": 0.765625, "learning_rate": 0.0001793816240752179, "loss": 0.9406, "step": 13282 }, { "epoch": 0.34107019342952655, "grad_norm": 0.76953125, "learning_rate": 0.00017937890905753257, "loss": 0.9565, "step": 13283 }, { "epoch": 0.3410958706254483, "grad_norm": 0.8359375, "learning_rate": 0.0001793761938816522, "loss": 0.9635, "step": 13284 }, { "epoch": 0.34112154782137016, "grad_norm": 0.7421875, "learning_rate": 0.0001793734785475823, "loss": 0.9239, "step": 13285 }, { "epoch": 0.341147225017292, "grad_norm": 0.80859375, "learning_rate": 0.0001793707630553282, "loss": 0.9225, "step": 13286 }, { "epoch": 0.3411729022132138, "grad_norm": 0.765625, "learning_rate": 0.0001793680474048953, "loss": 0.9612, "step": 13287 }, { "epoch": 0.3411985794091356, "grad_norm": 0.765625, "learning_rate": 0.0001793653315962891, "loss": 0.9209, "step": 13288 }, { "epoch": 0.34122425660505745, "grad_norm": 0.83203125, "learning_rate": 0.00017936261562951494, "loss": 0.9454, "step": 13289 }, { "epoch": 0.3412499338009793, "grad_norm": 0.77734375, "learning_rate": 0.0001793598995045783, "loss": 0.9729, "step": 13290 }, { "epoch": 0.34127561099690107, "grad_norm": 0.796875, "learning_rate": 0.0001793571832214845, "loss": 0.8928, "step": 13291 }, { "epoch": 0.3413012881928229, "grad_norm": 0.77734375, "learning_rate": 0.000179354466780239, "loss": 0.9006, "step": 13292 }, { "epoch": 0.34132696538874474, "grad_norm": 0.828125, "learning_rate": 0.00017935175018084728, "loss": 0.9591, "step": 13293 }, { "epoch": 0.3413526425846665, "grad_norm": 0.7890625, "learning_rate": 0.0001793490334233146, "loss": 0.9654, "step": 13294 }, { "epoch": 0.34137831978058836, "grad_norm": 0.80078125, "learning_rate": 0.00017934631650764652, "loss": 0.9319, "step": 13295 }, { "epoch": 0.3414039969765102, "grad_norm": 0.76171875, "learning_rate": 0.0001793435994338484, "loss": 0.9642, "step": 13296 }, { "epoch": 0.341429674172432, "grad_norm": 0.7265625, "learning_rate": 0.0001793408822019256, "loss": 0.9703, "step": 13297 }, { "epoch": 0.3414553513683538, "grad_norm": 0.78515625, "learning_rate": 0.00017933816481188365, "loss": 1.0764, "step": 13298 }, { "epoch": 0.34148102856427565, "grad_norm": 0.83984375, "learning_rate": 0.0001793354472637279, "loss": 0.8667, "step": 13299 }, { "epoch": 0.3415067057601975, "grad_norm": 0.83203125, "learning_rate": 0.00017933272955746374, "loss": 0.9874, "step": 13300 }, { "epoch": 0.34153238295611926, "grad_norm": 0.734375, "learning_rate": 0.0001793300116930966, "loss": 0.9729, "step": 13301 }, { "epoch": 0.3415580601520411, "grad_norm": 0.76953125, "learning_rate": 0.00017932729367063196, "loss": 0.9924, "step": 13302 }, { "epoch": 0.34158373734796293, "grad_norm": 1.1640625, "learning_rate": 0.0001793245754900752, "loss": 1.0499, "step": 13303 }, { "epoch": 0.3416094145438847, "grad_norm": 0.7734375, "learning_rate": 0.00017932185715143167, "loss": 1.0002, "step": 13304 }, { "epoch": 0.34163509173980655, "grad_norm": 0.78125, "learning_rate": 0.00017931913865470688, "loss": 0.866, "step": 13305 }, { "epoch": 0.3416607689357284, "grad_norm": 0.8515625, "learning_rate": 0.0001793164199999062, "loss": 1.0772, "step": 13306 }, { "epoch": 0.34168644613165017, "grad_norm": 0.88671875, "learning_rate": 0.00017931370118703507, "loss": 1.0776, "step": 13307 }, { "epoch": 0.341712123327572, "grad_norm": 0.765625, "learning_rate": 0.0001793109822160989, "loss": 0.9758, "step": 13308 }, { "epoch": 0.34173780052349384, "grad_norm": 0.76171875, "learning_rate": 0.00017930826308710306, "loss": 0.8847, "step": 13309 }, { "epoch": 0.3417634777194157, "grad_norm": 0.8125, "learning_rate": 0.00017930554380005308, "loss": 1.038, "step": 13310 }, { "epoch": 0.34178915491533746, "grad_norm": 0.76953125, "learning_rate": 0.00017930282435495428, "loss": 0.9145, "step": 13311 }, { "epoch": 0.3418148321112593, "grad_norm": 0.7421875, "learning_rate": 0.00017930010475181212, "loss": 0.9629, "step": 13312 }, { "epoch": 0.34184050930718113, "grad_norm": 0.7890625, "learning_rate": 0.000179297384990632, "loss": 1.0549, "step": 13313 }, { "epoch": 0.3418661865031029, "grad_norm": 0.76171875, "learning_rate": 0.00017929466507141939, "loss": 0.8829, "step": 13314 }, { "epoch": 0.34189186369902475, "grad_norm": 0.734375, "learning_rate": 0.00017929194499417966, "loss": 1.0423, "step": 13315 }, { "epoch": 0.3419175408949466, "grad_norm": 0.84375, "learning_rate": 0.00017928922475891822, "loss": 0.9074, "step": 13316 }, { "epoch": 0.34194321809086836, "grad_norm": 0.80078125, "learning_rate": 0.00017928650436564055, "loss": 0.98, "step": 13317 }, { "epoch": 0.3419688952867902, "grad_norm": 0.8046875, "learning_rate": 0.00017928378381435202, "loss": 1.0122, "step": 13318 }, { "epoch": 0.34199457248271203, "grad_norm": 0.859375, "learning_rate": 0.0001792810631050581, "loss": 0.9985, "step": 13319 }, { "epoch": 0.34202024967863387, "grad_norm": 0.8203125, "learning_rate": 0.00017927834223776414, "loss": 0.9244, "step": 13320 }, { "epoch": 0.34204592687455565, "grad_norm": 0.8125, "learning_rate": 0.00017927562121247562, "loss": 0.9094, "step": 13321 }, { "epoch": 0.3420716040704775, "grad_norm": 0.8515625, "learning_rate": 0.00017927290002919794, "loss": 0.9828, "step": 13322 }, { "epoch": 0.3420972812663993, "grad_norm": 1.3203125, "learning_rate": 0.00017927017868793655, "loss": 0.9806, "step": 13323 }, { "epoch": 0.3421229584623211, "grad_norm": 0.8046875, "learning_rate": 0.00017926745718869686, "loss": 0.8851, "step": 13324 }, { "epoch": 0.34214863565824294, "grad_norm": 0.80078125, "learning_rate": 0.00017926473553148427, "loss": 0.8751, "step": 13325 }, { "epoch": 0.3421743128541648, "grad_norm": 0.73046875, "learning_rate": 0.00017926201371630423, "loss": 0.9391, "step": 13326 }, { "epoch": 0.34219999005008656, "grad_norm": 0.78125, "learning_rate": 0.00017925929174316215, "loss": 0.9869, "step": 13327 }, { "epoch": 0.3422256672460084, "grad_norm": 0.7890625, "learning_rate": 0.00017925656961206347, "loss": 0.9086, "step": 13328 }, { "epoch": 0.3422513444419302, "grad_norm": 1.4296875, "learning_rate": 0.0001792538473230136, "loss": 0.9397, "step": 13329 }, { "epoch": 0.34227702163785206, "grad_norm": 0.78125, "learning_rate": 0.00017925112487601798, "loss": 1.0154, "step": 13330 }, { "epoch": 0.34230269883377384, "grad_norm": 0.7578125, "learning_rate": 0.00017924840227108202, "loss": 0.9615, "step": 13331 }, { "epoch": 0.3423283760296957, "grad_norm": 0.79296875, "learning_rate": 0.00017924567950821114, "loss": 0.9496, "step": 13332 }, { "epoch": 0.3423540532256175, "grad_norm": 0.828125, "learning_rate": 0.0001792429565874108, "loss": 1.0068, "step": 13333 }, { "epoch": 0.3423797304215393, "grad_norm": 0.734375, "learning_rate": 0.00017924023350868642, "loss": 1.0587, "step": 13334 }, { "epoch": 0.34240540761746113, "grad_norm": 0.86328125, "learning_rate": 0.00017923751027204337, "loss": 1.0255, "step": 13335 }, { "epoch": 0.34243108481338297, "grad_norm": 0.77734375, "learning_rate": 0.00017923478687748714, "loss": 0.9345, "step": 13336 }, { "epoch": 0.34245676200930475, "grad_norm": 0.83203125, "learning_rate": 0.00017923206332502313, "loss": 0.9548, "step": 13337 }, { "epoch": 0.3424824392052266, "grad_norm": 0.79296875, "learning_rate": 0.0001792293396146568, "loss": 1.1674, "step": 13338 }, { "epoch": 0.3425081164011484, "grad_norm": 0.83984375, "learning_rate": 0.0001792266157463935, "loss": 0.9316, "step": 13339 }, { "epoch": 0.34253379359707026, "grad_norm": 0.7734375, "learning_rate": 0.00017922389172023878, "loss": 0.8505, "step": 13340 }, { "epoch": 0.34255947079299204, "grad_norm": 0.91015625, "learning_rate": 0.00017922116753619797, "loss": 0.9315, "step": 13341 }, { "epoch": 0.3425851479889139, "grad_norm": 0.73046875, "learning_rate": 0.0001792184431942765, "loss": 0.9612, "step": 13342 }, { "epoch": 0.3426108251848357, "grad_norm": 0.79296875, "learning_rate": 0.00017921571869447986, "loss": 1.0553, "step": 13343 }, { "epoch": 0.3426365023807575, "grad_norm": 0.7421875, "learning_rate": 0.0001792129940368134, "loss": 0.8913, "step": 13344 }, { "epoch": 0.3426621795766793, "grad_norm": 0.73046875, "learning_rate": 0.00017921026922128268, "loss": 0.8794, "step": 13345 }, { "epoch": 0.34268785677260116, "grad_norm": 0.75390625, "learning_rate": 0.000179207544247893, "loss": 0.9167, "step": 13346 }, { "epoch": 0.34271353396852294, "grad_norm": 0.84765625, "learning_rate": 0.00017920481911664986, "loss": 1.054, "step": 13347 }, { "epoch": 0.3427392111644448, "grad_norm": 0.74609375, "learning_rate": 0.00017920209382755865, "loss": 0.9958, "step": 13348 }, { "epoch": 0.3427648883603666, "grad_norm": 0.796875, "learning_rate": 0.00017919936838062484, "loss": 0.9847, "step": 13349 }, { "epoch": 0.34279056555628845, "grad_norm": 0.83203125, "learning_rate": 0.0001791966427758538, "loss": 0.8879, "step": 13350 }, { "epoch": 0.34281624275221023, "grad_norm": 0.86328125, "learning_rate": 0.00017919391701325105, "loss": 0.8982, "step": 13351 }, { "epoch": 0.34284191994813207, "grad_norm": 0.796875, "learning_rate": 0.00017919119109282197, "loss": 0.9272, "step": 13352 }, { "epoch": 0.3428675971440539, "grad_norm": 0.83203125, "learning_rate": 0.00017918846501457198, "loss": 0.9715, "step": 13353 }, { "epoch": 0.3428932743399757, "grad_norm": 0.7890625, "learning_rate": 0.00017918573877850655, "loss": 0.8623, "step": 13354 }, { "epoch": 0.3429189515358975, "grad_norm": 0.90625, "learning_rate": 0.0001791830123846311, "loss": 0.9935, "step": 13355 }, { "epoch": 0.34294462873181936, "grad_norm": 0.83203125, "learning_rate": 0.00017918028583295103, "loss": 1.0934, "step": 13356 }, { "epoch": 0.34297030592774114, "grad_norm": 0.7890625, "learning_rate": 0.00017917755912347181, "loss": 0.9058, "step": 13357 }, { "epoch": 0.342995983123663, "grad_norm": 0.76953125, "learning_rate": 0.0001791748322561989, "loss": 0.828, "step": 13358 }, { "epoch": 0.3430216603195848, "grad_norm": 0.75, "learning_rate": 0.00017917210523113767, "loss": 0.9472, "step": 13359 }, { "epoch": 0.34304733751550665, "grad_norm": 0.81640625, "learning_rate": 0.00017916937804829362, "loss": 1.2961, "step": 13360 }, { "epoch": 0.3430730147114284, "grad_norm": 0.80859375, "learning_rate": 0.00017916665070767212, "loss": 1.0195, "step": 13361 }, { "epoch": 0.34309869190735026, "grad_norm": 0.7890625, "learning_rate": 0.00017916392320927864, "loss": 0.7688, "step": 13362 }, { "epoch": 0.3431243691032721, "grad_norm": 0.7421875, "learning_rate": 0.0001791611955531186, "loss": 0.9165, "step": 13363 }, { "epoch": 0.3431500462991939, "grad_norm": 0.78125, "learning_rate": 0.00017915846773919748, "loss": 0.9076, "step": 13364 }, { "epoch": 0.3431757234951157, "grad_norm": 0.734375, "learning_rate": 0.00017915573976752064, "loss": 0.8736, "step": 13365 }, { "epoch": 0.34320140069103755, "grad_norm": 0.76953125, "learning_rate": 0.0001791530116380936, "loss": 0.9082, "step": 13366 }, { "epoch": 0.34322707788695933, "grad_norm": 0.70703125, "learning_rate": 0.00017915028335092174, "loss": 0.8946, "step": 13367 }, { "epoch": 0.34325275508288117, "grad_norm": 0.78125, "learning_rate": 0.00017914755490601052, "loss": 0.9438, "step": 13368 }, { "epoch": 0.343278432278803, "grad_norm": 0.74609375, "learning_rate": 0.0001791448263033654, "loss": 0.8897, "step": 13369 }, { "epoch": 0.34330410947472484, "grad_norm": 0.75, "learning_rate": 0.00017914209754299175, "loss": 0.9444, "step": 13370 }, { "epoch": 0.3433297866706466, "grad_norm": 0.8515625, "learning_rate": 0.00017913936862489505, "loss": 0.9917, "step": 13371 }, { "epoch": 0.34335546386656846, "grad_norm": 0.79296875, "learning_rate": 0.00017913663954908073, "loss": 0.93, "step": 13372 }, { "epoch": 0.3433811410624903, "grad_norm": 0.78125, "learning_rate": 0.00017913391031555427, "loss": 1.0055, "step": 13373 }, { "epoch": 0.34340681825841207, "grad_norm": 0.8203125, "learning_rate": 0.00017913118092432106, "loss": 0.9582, "step": 13374 }, { "epoch": 0.3434324954543339, "grad_norm": 0.859375, "learning_rate": 0.00017912845137538655, "loss": 0.8889, "step": 13375 }, { "epoch": 0.34345817265025574, "grad_norm": 0.8125, "learning_rate": 0.0001791257216687562, "loss": 1.0015, "step": 13376 }, { "epoch": 0.3434838498461775, "grad_norm": 0.796875, "learning_rate": 0.00017912299180443538, "loss": 0.9096, "step": 13377 }, { "epoch": 0.34350952704209936, "grad_norm": 0.75, "learning_rate": 0.00017912026178242964, "loss": 0.9159, "step": 13378 }, { "epoch": 0.3435352042380212, "grad_norm": 0.73828125, "learning_rate": 0.00017911753160274437, "loss": 1.0148, "step": 13379 }, { "epoch": 0.34356088143394303, "grad_norm": 0.77734375, "learning_rate": 0.00017911480126538498, "loss": 0.9479, "step": 13380 }, { "epoch": 0.3435865586298648, "grad_norm": 0.71875, "learning_rate": 0.00017911207077035693, "loss": 0.9679, "step": 13381 }, { "epoch": 0.34361223582578665, "grad_norm": 0.8671875, "learning_rate": 0.0001791093401176657, "loss": 0.9288, "step": 13382 }, { "epoch": 0.3436379130217085, "grad_norm": 0.7890625, "learning_rate": 0.00017910660930731668, "loss": 0.9694, "step": 13383 }, { "epoch": 0.34366359021763027, "grad_norm": 0.76171875, "learning_rate": 0.00017910387833931535, "loss": 1.004, "step": 13384 }, { "epoch": 0.3436892674135521, "grad_norm": 0.83203125, "learning_rate": 0.0001791011472136671, "loss": 0.8716, "step": 13385 }, { "epoch": 0.34371494460947394, "grad_norm": 0.765625, "learning_rate": 0.00017909841593037745, "loss": 0.8979, "step": 13386 }, { "epoch": 0.3437406218053957, "grad_norm": 0.83203125, "learning_rate": 0.00017909568448945176, "loss": 0.9812, "step": 13387 }, { "epoch": 0.34376629900131755, "grad_norm": 0.7265625, "learning_rate": 0.00017909295289089556, "loss": 0.8899, "step": 13388 }, { "epoch": 0.3437919761972394, "grad_norm": 0.7890625, "learning_rate": 0.0001790902211347142, "loss": 0.9464, "step": 13389 }, { "epoch": 0.3438176533931612, "grad_norm": 0.796875, "learning_rate": 0.0001790874892209132, "loss": 0.8766, "step": 13390 }, { "epoch": 0.343843330589083, "grad_norm": 0.828125, "learning_rate": 0.00017908475714949803, "loss": 0.9128, "step": 13391 }, { "epoch": 0.34386900778500484, "grad_norm": 0.77734375, "learning_rate": 0.000179082024920474, "loss": 0.9775, "step": 13392 }, { "epoch": 0.3438946849809267, "grad_norm": 1.1796875, "learning_rate": 0.00017907929253384669, "loss": 1.027, "step": 13393 }, { "epoch": 0.34392036217684846, "grad_norm": 0.75390625, "learning_rate": 0.00017907655998962145, "loss": 0.8775, "step": 13394 }, { "epoch": 0.3439460393727703, "grad_norm": 0.87109375, "learning_rate": 0.00017907382728780383, "loss": 1.042, "step": 13395 }, { "epoch": 0.34397171656869213, "grad_norm": 0.78515625, "learning_rate": 0.00017907109442839912, "loss": 1.0339, "step": 13396 }, { "epoch": 0.3439973937646139, "grad_norm": 0.80859375, "learning_rate": 0.00017906836141141295, "loss": 0.9112, "step": 13397 }, { "epoch": 0.34402307096053575, "grad_norm": 0.81640625, "learning_rate": 0.00017906562823685065, "loss": 0.9354, "step": 13398 }, { "epoch": 0.3440487481564576, "grad_norm": 0.80859375, "learning_rate": 0.00017906289490471767, "loss": 0.9236, "step": 13399 }, { "epoch": 0.3440744253523794, "grad_norm": 0.796875, "learning_rate": 0.00017906016141501954, "loss": 0.8704, "step": 13400 }, { "epoch": 0.3441001025483012, "grad_norm": 0.78125, "learning_rate": 0.0001790574277677616, "loss": 0.8469, "step": 13401 }, { "epoch": 0.34412577974422304, "grad_norm": 0.80078125, "learning_rate": 0.00017905469396294933, "loss": 0.9255, "step": 13402 }, { "epoch": 0.3441514569401449, "grad_norm": 0.83203125, "learning_rate": 0.00017905196000058825, "loss": 0.9853, "step": 13403 }, { "epoch": 0.34417713413606665, "grad_norm": 0.81640625, "learning_rate": 0.00017904922588068373, "loss": 1.059, "step": 13404 }, { "epoch": 0.3442028113319885, "grad_norm": 0.79296875, "learning_rate": 0.00017904649160324125, "loss": 0.9298, "step": 13405 }, { "epoch": 0.3442284885279103, "grad_norm": 0.7265625, "learning_rate": 0.00017904375716826625, "loss": 0.8829, "step": 13406 }, { "epoch": 0.3442541657238321, "grad_norm": 0.765625, "learning_rate": 0.00017904102257576416, "loss": 1.031, "step": 13407 }, { "epoch": 0.34427984291975394, "grad_norm": 0.78125, "learning_rate": 0.00017903828782574047, "loss": 0.907, "step": 13408 }, { "epoch": 0.3443055201156758, "grad_norm": 0.78515625, "learning_rate": 0.00017903555291820061, "loss": 0.9884, "step": 13409 }, { "epoch": 0.3443311973115976, "grad_norm": 0.72265625, "learning_rate": 0.00017903281785315006, "loss": 0.9425, "step": 13410 }, { "epoch": 0.3443568745075194, "grad_norm": 0.7734375, "learning_rate": 0.00017903008263059423, "loss": 0.9168, "step": 13411 }, { "epoch": 0.34438255170344123, "grad_norm": 0.7421875, "learning_rate": 0.00017902734725053855, "loss": 0.9846, "step": 13412 }, { "epoch": 0.34440822889936307, "grad_norm": 0.77734375, "learning_rate": 0.00017902461171298852, "loss": 0.9582, "step": 13413 }, { "epoch": 0.34443390609528485, "grad_norm": 0.7890625, "learning_rate": 0.0001790218760179496, "loss": 1.1088, "step": 13414 }, { "epoch": 0.3444595832912067, "grad_norm": 0.76953125, "learning_rate": 0.0001790191401654272, "loss": 0.9342, "step": 13415 }, { "epoch": 0.3444852604871285, "grad_norm": 0.7734375, "learning_rate": 0.00017901640415542679, "loss": 0.9686, "step": 13416 }, { "epoch": 0.3445109376830503, "grad_norm": 0.79296875, "learning_rate": 0.00017901366798795385, "loss": 0.9346, "step": 13417 }, { "epoch": 0.34453661487897214, "grad_norm": 0.8359375, "learning_rate": 0.00017901093166301378, "loss": 1.0299, "step": 13418 }, { "epoch": 0.34456229207489397, "grad_norm": 0.80078125, "learning_rate": 0.00017900819518061205, "loss": 1.0008, "step": 13419 }, { "epoch": 0.3445879692708158, "grad_norm": 0.859375, "learning_rate": 0.00017900545854075418, "loss": 1.0227, "step": 13420 }, { "epoch": 0.3446136464667376, "grad_norm": 0.8359375, "learning_rate": 0.0001790027217434455, "loss": 0.8132, "step": 13421 }, { "epoch": 0.3446393236626594, "grad_norm": 0.8203125, "learning_rate": 0.00017899998478869156, "loss": 0.9181, "step": 13422 }, { "epoch": 0.34466500085858126, "grad_norm": 0.7578125, "learning_rate": 0.0001789972476764978, "loss": 0.965, "step": 13423 }, { "epoch": 0.34469067805450304, "grad_norm": 0.77734375, "learning_rate": 0.00017899451040686963, "loss": 0.9511, "step": 13424 }, { "epoch": 0.3447163552504249, "grad_norm": 0.76171875, "learning_rate": 0.00017899177297981254, "loss": 1.0453, "step": 13425 }, { "epoch": 0.3447420324463467, "grad_norm": 0.89453125, "learning_rate": 0.00017898903539533201, "loss": 1.0748, "step": 13426 }, { "epoch": 0.3447677096422685, "grad_norm": 0.76171875, "learning_rate": 0.00017898629765343346, "loss": 0.9145, "step": 13427 }, { "epoch": 0.34479338683819033, "grad_norm": 0.7421875, "learning_rate": 0.00017898355975412233, "loss": 0.8402, "step": 13428 }, { "epoch": 0.34481906403411217, "grad_norm": 0.79296875, "learning_rate": 0.00017898082169740413, "loss": 1.0568, "step": 13429 }, { "epoch": 0.34484474123003395, "grad_norm": 0.76953125, "learning_rate": 0.0001789780834832843, "loss": 0.9052, "step": 13430 }, { "epoch": 0.3448704184259558, "grad_norm": 0.83984375, "learning_rate": 0.00017897534511176824, "loss": 0.883, "step": 13431 }, { "epoch": 0.3448960956218776, "grad_norm": 0.7734375, "learning_rate": 0.00017897260658286145, "loss": 1.0074, "step": 13432 }, { "epoch": 0.34492177281779945, "grad_norm": 0.8125, "learning_rate": 0.0001789698678965694, "loss": 1.0415, "step": 13433 }, { "epoch": 0.34494745001372124, "grad_norm": 0.83984375, "learning_rate": 0.00017896712905289756, "loss": 1.0994, "step": 13434 }, { "epoch": 0.34497312720964307, "grad_norm": 0.734375, "learning_rate": 0.00017896439005185135, "loss": 0.9286, "step": 13435 }, { "epoch": 0.3449988044055649, "grad_norm": 0.76171875, "learning_rate": 0.00017896165089343623, "loss": 1.0236, "step": 13436 }, { "epoch": 0.3450244816014867, "grad_norm": 0.80859375, "learning_rate": 0.0001789589115776577, "loss": 0.9356, "step": 13437 }, { "epoch": 0.3450501587974085, "grad_norm": 0.80859375, "learning_rate": 0.00017895617210452117, "loss": 0.9738, "step": 13438 }, { "epoch": 0.34507583599333036, "grad_norm": 0.77734375, "learning_rate": 0.00017895343247403212, "loss": 0.9499, "step": 13439 }, { "epoch": 0.34510151318925214, "grad_norm": 0.7109375, "learning_rate": 0.00017895069268619604, "loss": 0.8102, "step": 13440 }, { "epoch": 0.345127190385174, "grad_norm": 0.83984375, "learning_rate": 0.00017894795274101831, "loss": 0.836, "step": 13441 }, { "epoch": 0.3451528675810958, "grad_norm": 0.7109375, "learning_rate": 0.00017894521263850448, "loss": 0.9092, "step": 13442 }, { "epoch": 0.34517854477701765, "grad_norm": 0.77734375, "learning_rate": 0.00017894247237865995, "loss": 1.0651, "step": 13443 }, { "epoch": 0.34520422197293943, "grad_norm": 0.88671875, "learning_rate": 0.00017893973196149023, "loss": 0.981, "step": 13444 }, { "epoch": 0.34522989916886127, "grad_norm": 0.7890625, "learning_rate": 0.00017893699138700074, "loss": 1.0069, "step": 13445 }, { "epoch": 0.3452555763647831, "grad_norm": 0.828125, "learning_rate": 0.00017893425065519696, "loss": 0.9374, "step": 13446 }, { "epoch": 0.3452812535607049, "grad_norm": 0.734375, "learning_rate": 0.00017893150976608434, "loss": 0.9614, "step": 13447 }, { "epoch": 0.3453069307566267, "grad_norm": 0.7890625, "learning_rate": 0.00017892876871966834, "loss": 0.8965, "step": 13448 }, { "epoch": 0.34533260795254855, "grad_norm": 0.73046875, "learning_rate": 0.00017892602751595445, "loss": 0.9184, "step": 13449 }, { "epoch": 0.34535828514847033, "grad_norm": 0.8046875, "learning_rate": 0.00017892328615494812, "loss": 0.8813, "step": 13450 }, { "epoch": 0.34538396234439217, "grad_norm": 0.765625, "learning_rate": 0.00017892054463665481, "loss": 1.0506, "step": 13451 }, { "epoch": 0.345409639540314, "grad_norm": 0.8125, "learning_rate": 0.00017891780296107999, "loss": 0.9578, "step": 13452 }, { "epoch": 0.34543531673623584, "grad_norm": 0.75, "learning_rate": 0.0001789150611282291, "loss": 0.8148, "step": 13453 }, { "epoch": 0.3454609939321576, "grad_norm": 0.80078125, "learning_rate": 0.00017891231913810763, "loss": 0.9281, "step": 13454 }, { "epoch": 0.34548667112807946, "grad_norm": 0.7890625, "learning_rate": 0.00017890957699072103, "loss": 1.003, "step": 13455 }, { "epoch": 0.3455123483240013, "grad_norm": 0.8359375, "learning_rate": 0.00017890683468607479, "loss": 1.0482, "step": 13456 }, { "epoch": 0.3455380255199231, "grad_norm": 0.734375, "learning_rate": 0.00017890409222417434, "loss": 0.9365, "step": 13457 }, { "epoch": 0.3455637027158449, "grad_norm": 0.796875, "learning_rate": 0.00017890134960502516, "loss": 0.93, "step": 13458 }, { "epoch": 0.34558937991176675, "grad_norm": 0.71875, "learning_rate": 0.00017889860682863273, "loss": 0.9266, "step": 13459 }, { "epoch": 0.34561505710768853, "grad_norm": 0.87890625, "learning_rate": 0.0001788958638950025, "loss": 0.8644, "step": 13460 }, { "epoch": 0.34564073430361036, "grad_norm": 0.83203125, "learning_rate": 0.00017889312080413992, "loss": 1.0244, "step": 13461 }, { "epoch": 0.3456664114995322, "grad_norm": 0.7734375, "learning_rate": 0.0001788903775560505, "loss": 0.9267, "step": 13462 }, { "epoch": 0.34569208869545404, "grad_norm": 0.8359375, "learning_rate": 0.00017888763415073968, "loss": 1.0224, "step": 13463 }, { "epoch": 0.3457177658913758, "grad_norm": 0.75, "learning_rate": 0.0001788848905882129, "loss": 1.0236, "step": 13464 }, { "epoch": 0.34574344308729765, "grad_norm": 0.80859375, "learning_rate": 0.0001788821468684757, "loss": 1.024, "step": 13465 }, { "epoch": 0.3457691202832195, "grad_norm": 0.734375, "learning_rate": 0.0001788794029915335, "loss": 0.9796, "step": 13466 }, { "epoch": 0.34579479747914127, "grad_norm": 0.74609375, "learning_rate": 0.00017887665895739176, "loss": 1.0301, "step": 13467 }, { "epoch": 0.3458204746750631, "grad_norm": 0.88671875, "learning_rate": 0.00017887391476605595, "loss": 1.0329, "step": 13468 }, { "epoch": 0.34584615187098494, "grad_norm": 0.70703125, "learning_rate": 0.00017887117041753156, "loss": 1.0913, "step": 13469 }, { "epoch": 0.3458718290669067, "grad_norm": 0.80078125, "learning_rate": 0.0001788684259118241, "loss": 0.8998, "step": 13470 }, { "epoch": 0.34589750626282856, "grad_norm": 0.75, "learning_rate": 0.00017886568124893894, "loss": 0.951, "step": 13471 }, { "epoch": 0.3459231834587504, "grad_norm": 1.421875, "learning_rate": 0.00017886293642888163, "loss": 0.9681, "step": 13472 }, { "epoch": 0.34594886065467223, "grad_norm": 0.734375, "learning_rate": 0.00017886019145165757, "loss": 0.8498, "step": 13473 }, { "epoch": 0.345974537850594, "grad_norm": 0.80859375, "learning_rate": 0.0001788574463172723, "loss": 1.0282, "step": 13474 }, { "epoch": 0.34600021504651585, "grad_norm": 0.75, "learning_rate": 0.00017885470102573127, "loss": 0.8834, "step": 13475 }, { "epoch": 0.3460258922424377, "grad_norm": 0.72265625, "learning_rate": 0.00017885195557703997, "loss": 0.9197, "step": 13476 }, { "epoch": 0.34605156943835946, "grad_norm": 0.7890625, "learning_rate": 0.00017884920997120378, "loss": 0.8915, "step": 13477 }, { "epoch": 0.3460772466342813, "grad_norm": 0.75390625, "learning_rate": 0.00017884646420822828, "loss": 0.891, "step": 13478 }, { "epoch": 0.34610292383020314, "grad_norm": 0.796875, "learning_rate": 0.00017884371828811888, "loss": 0.955, "step": 13479 }, { "epoch": 0.3461286010261249, "grad_norm": 0.828125, "learning_rate": 0.00017884097221088107, "loss": 1.0187, "step": 13480 }, { "epoch": 0.34615427822204675, "grad_norm": 0.78125, "learning_rate": 0.00017883822597652033, "loss": 0.9277, "step": 13481 }, { "epoch": 0.3461799554179686, "grad_norm": 0.82421875, "learning_rate": 0.0001788354795850421, "loss": 0.9809, "step": 13482 }, { "epoch": 0.3462056326138904, "grad_norm": 0.78125, "learning_rate": 0.00017883273303645196, "loss": 0.9878, "step": 13483 }, { "epoch": 0.3462313098098122, "grad_norm": 0.9296875, "learning_rate": 0.00017882998633075525, "loss": 0.968, "step": 13484 }, { "epoch": 0.34625698700573404, "grad_norm": 0.73046875, "learning_rate": 0.0001788272394679575, "loss": 0.8905, "step": 13485 }, { "epoch": 0.3462826642016559, "grad_norm": 0.7734375, "learning_rate": 0.00017882449244806416, "loss": 0.9705, "step": 13486 }, { "epoch": 0.34630834139757766, "grad_norm": 0.84375, "learning_rate": 0.00017882174527108076, "loss": 0.8578, "step": 13487 }, { "epoch": 0.3463340185934995, "grad_norm": 0.87890625, "learning_rate": 0.00017881899793701272, "loss": 0.9918, "step": 13488 }, { "epoch": 0.34635969578942133, "grad_norm": 0.83984375, "learning_rate": 0.00017881625044586556, "loss": 0.9505, "step": 13489 }, { "epoch": 0.3463853729853431, "grad_norm": 0.7578125, "learning_rate": 0.00017881350279764472, "loss": 1.0871, "step": 13490 }, { "epoch": 0.34641105018126495, "grad_norm": 0.75, "learning_rate": 0.0001788107549923557, "loss": 0.9481, "step": 13491 }, { "epoch": 0.3464367273771868, "grad_norm": 0.76171875, "learning_rate": 0.00017880800703000397, "loss": 0.924, "step": 13492 }, { "epoch": 0.3464624045731086, "grad_norm": 0.7734375, "learning_rate": 0.00017880525891059496, "loss": 1.0177, "step": 13493 }, { "epoch": 0.3464880817690304, "grad_norm": 0.7734375, "learning_rate": 0.00017880251063413422, "loss": 1.0025, "step": 13494 }, { "epoch": 0.34651375896495223, "grad_norm": 0.69140625, "learning_rate": 0.00017879976220062718, "loss": 0.9239, "step": 13495 }, { "epoch": 0.34653943616087407, "grad_norm": 0.796875, "learning_rate": 0.00017879701361007933, "loss": 0.9655, "step": 13496 }, { "epoch": 0.34656511335679585, "grad_norm": 0.7734375, "learning_rate": 0.00017879426486249616, "loss": 0.9491, "step": 13497 }, { "epoch": 0.3465907905527177, "grad_norm": 0.80078125, "learning_rate": 0.00017879151595788314, "loss": 0.9931, "step": 13498 }, { "epoch": 0.3466164677486395, "grad_norm": 0.77734375, "learning_rate": 0.00017878876689624576, "loss": 1.0549, "step": 13499 }, { "epoch": 0.3466421449445613, "grad_norm": 0.84375, "learning_rate": 0.00017878601767758946, "loss": 1.0178, "step": 13500 }, { "epoch": 0.34666782214048314, "grad_norm": 0.90625, "learning_rate": 0.00017878326830191973, "loss": 0.9427, "step": 13501 }, { "epoch": 0.346693499336405, "grad_norm": 0.79296875, "learning_rate": 0.0001787805187692421, "loss": 0.8724, "step": 13502 }, { "epoch": 0.3467191765323268, "grad_norm": 1.0078125, "learning_rate": 0.000178777769079562, "loss": 0.994, "step": 13503 }, { "epoch": 0.3467448537282486, "grad_norm": 0.80078125, "learning_rate": 0.0001787750192328849, "loss": 0.9916, "step": 13504 }, { "epoch": 0.34677053092417043, "grad_norm": 0.703125, "learning_rate": 0.00017877226922921632, "loss": 0.8995, "step": 13505 }, { "epoch": 0.34679620812009226, "grad_norm": 0.7265625, "learning_rate": 0.00017876951906856172, "loss": 0.7836, "step": 13506 }, { "epoch": 0.34682188531601404, "grad_norm": 0.7578125, "learning_rate": 0.0001787667687509266, "loss": 0.9563, "step": 13507 }, { "epoch": 0.3468475625119359, "grad_norm": 0.70703125, "learning_rate": 0.0001787640182763164, "loss": 0.8211, "step": 13508 }, { "epoch": 0.3468732397078577, "grad_norm": 0.73828125, "learning_rate": 0.00017876126764473663, "loss": 0.8458, "step": 13509 }, { "epoch": 0.3468989169037795, "grad_norm": 0.87109375, "learning_rate": 0.00017875851685619277, "loss": 1.1071, "step": 13510 }, { "epoch": 0.34692459409970133, "grad_norm": 0.8125, "learning_rate": 0.0001787557659106903, "loss": 1.086, "step": 13511 }, { "epoch": 0.34695027129562317, "grad_norm": 0.8125, "learning_rate": 0.00017875301480823472, "loss": 0.884, "step": 13512 }, { "epoch": 0.346975948491545, "grad_norm": 0.78125, "learning_rate": 0.00017875026354883148, "loss": 0.8751, "step": 13513 }, { "epoch": 0.3470016256874668, "grad_norm": 0.7890625, "learning_rate": 0.0001787475121324861, "loss": 0.9397, "step": 13514 }, { "epoch": 0.3470273028833886, "grad_norm": 0.75390625, "learning_rate": 0.000178744760559204, "loss": 0.8703, "step": 13515 }, { "epoch": 0.34705298007931046, "grad_norm": 0.76953125, "learning_rate": 0.00017874200882899075, "loss": 1.0686, "step": 13516 }, { "epoch": 0.34707865727523224, "grad_norm": 0.84765625, "learning_rate": 0.00017873925694185174, "loss": 1.0141, "step": 13517 }, { "epoch": 0.3471043344711541, "grad_norm": 0.84765625, "learning_rate": 0.00017873650489779252, "loss": 0.8758, "step": 13518 }, { "epoch": 0.3471300116670759, "grad_norm": 0.7578125, "learning_rate": 0.0001787337526968186, "loss": 0.8759, "step": 13519 }, { "epoch": 0.3471556888629977, "grad_norm": 0.7578125, "learning_rate": 0.0001787310003389354, "loss": 0.9346, "step": 13520 }, { "epoch": 0.3471813660589195, "grad_norm": 0.7734375, "learning_rate": 0.0001787282478241484, "loss": 0.8897, "step": 13521 }, { "epoch": 0.34720704325484136, "grad_norm": 0.76953125, "learning_rate": 0.00017872549515246313, "loss": 0.9496, "step": 13522 }, { "epoch": 0.3472327204507632, "grad_norm": 0.9375, "learning_rate": 0.00017872274232388506, "loss": 0.8508, "step": 13523 }, { "epoch": 0.347258397646685, "grad_norm": 0.8515625, "learning_rate": 0.00017871998933841968, "loss": 1.0557, "step": 13524 }, { "epoch": 0.3472840748426068, "grad_norm": 0.796875, "learning_rate": 0.00017871723619607247, "loss": 0.9951, "step": 13525 }, { "epoch": 0.34730975203852865, "grad_norm": 0.71875, "learning_rate": 0.0001787144828968489, "loss": 0.7841, "step": 13526 }, { "epoch": 0.34733542923445043, "grad_norm": 0.8515625, "learning_rate": 0.0001787117294407545, "loss": 1.1217, "step": 13527 }, { "epoch": 0.34736110643037227, "grad_norm": 0.87109375, "learning_rate": 0.00017870897582779476, "loss": 0.9658, "step": 13528 }, { "epoch": 0.3473867836262941, "grad_norm": 0.82421875, "learning_rate": 0.00017870622205797508, "loss": 1.0812, "step": 13529 }, { "epoch": 0.3474124608222159, "grad_norm": 0.7890625, "learning_rate": 0.00017870346813130103, "loss": 0.9009, "step": 13530 }, { "epoch": 0.3474381380181377, "grad_norm": 0.80859375, "learning_rate": 0.0001787007140477781, "loss": 1.0017, "step": 13531 }, { "epoch": 0.34746381521405956, "grad_norm": 0.75390625, "learning_rate": 0.00017869795980741176, "loss": 0.8761, "step": 13532 }, { "epoch": 0.3474894924099814, "grad_norm": 0.76171875, "learning_rate": 0.00017869520541020746, "loss": 0.8894, "step": 13533 }, { "epoch": 0.3475151696059032, "grad_norm": 0.76953125, "learning_rate": 0.00017869245085617075, "loss": 0.8688, "step": 13534 }, { "epoch": 0.347540846801825, "grad_norm": 0.83203125, "learning_rate": 0.00017868969614530706, "loss": 0.8637, "step": 13535 }, { "epoch": 0.34756652399774685, "grad_norm": 0.8046875, "learning_rate": 0.00017868694127762193, "loss": 1.0301, "step": 13536 }, { "epoch": 0.3475922011936686, "grad_norm": 0.765625, "learning_rate": 0.00017868418625312086, "loss": 0.9774, "step": 13537 }, { "epoch": 0.34761787838959046, "grad_norm": 0.73828125, "learning_rate": 0.00017868143107180928, "loss": 0.773, "step": 13538 }, { "epoch": 0.3476435555855123, "grad_norm": 0.71875, "learning_rate": 0.00017867867573369273, "loss": 0.8848, "step": 13539 }, { "epoch": 0.3476692327814341, "grad_norm": 0.75390625, "learning_rate": 0.0001786759202387767, "loss": 1.0174, "step": 13540 }, { "epoch": 0.3476949099773559, "grad_norm": 0.8125, "learning_rate": 0.00017867316458706665, "loss": 0.8881, "step": 13541 }, { "epoch": 0.34772058717327775, "grad_norm": 0.7421875, "learning_rate": 0.0001786704087785681, "loss": 0.8998, "step": 13542 }, { "epoch": 0.3477462643691996, "grad_norm": 0.80859375, "learning_rate": 0.0001786676528132865, "loss": 1.0459, "step": 13543 }, { "epoch": 0.34777194156512137, "grad_norm": 0.8203125, "learning_rate": 0.0001786648966912274, "loss": 0.9002, "step": 13544 }, { "epoch": 0.3477976187610432, "grad_norm": 0.73828125, "learning_rate": 0.0001786621404123963, "loss": 0.883, "step": 13545 }, { "epoch": 0.34782329595696504, "grad_norm": 0.77734375, "learning_rate": 0.0001786593839767986, "loss": 0.9148, "step": 13546 }, { "epoch": 0.3478489731528868, "grad_norm": 0.7109375, "learning_rate": 0.00017865662738443988, "loss": 0.9204, "step": 13547 }, { "epoch": 0.34787465034880866, "grad_norm": 0.7421875, "learning_rate": 0.0001786538706353256, "loss": 0.8534, "step": 13548 }, { "epoch": 0.3479003275447305, "grad_norm": 0.703125, "learning_rate": 0.00017865111372946125, "loss": 0.8892, "step": 13549 }, { "epoch": 0.3479260047406523, "grad_norm": 0.71875, "learning_rate": 0.00017864835666685239, "loss": 0.9092, "step": 13550 }, { "epoch": 0.3479516819365741, "grad_norm": 0.796875, "learning_rate": 0.0001786455994475044, "loss": 0.9577, "step": 13551 }, { "epoch": 0.34797735913249594, "grad_norm": 0.83203125, "learning_rate": 0.00017864284207142286, "loss": 0.9671, "step": 13552 }, { "epoch": 0.3480030363284178, "grad_norm": 0.765625, "learning_rate": 0.00017864008453861324, "loss": 0.8302, "step": 13553 }, { "epoch": 0.34802871352433956, "grad_norm": 0.69140625, "learning_rate": 0.00017863732684908102, "loss": 0.876, "step": 13554 }, { "epoch": 0.3480543907202614, "grad_norm": 0.78125, "learning_rate": 0.00017863456900283172, "loss": 0.8251, "step": 13555 }, { "epoch": 0.34808006791618323, "grad_norm": 0.81640625, "learning_rate": 0.0001786318109998708, "loss": 0.9878, "step": 13556 }, { "epoch": 0.348105745112105, "grad_norm": 0.80859375, "learning_rate": 0.00017862905284020384, "loss": 0.8172, "step": 13557 }, { "epoch": 0.34813142230802685, "grad_norm": 0.80859375, "learning_rate": 0.00017862629452383626, "loss": 0.9319, "step": 13558 }, { "epoch": 0.3481570995039487, "grad_norm": 0.75390625, "learning_rate": 0.00017862353605077355, "loss": 0.9017, "step": 13559 }, { "epoch": 0.34818277669987047, "grad_norm": 0.78125, "learning_rate": 0.00017862077742102127, "loss": 0.9873, "step": 13560 }, { "epoch": 0.3482084538957923, "grad_norm": 0.765625, "learning_rate": 0.00017861801863458486, "loss": 1.0032, "step": 13561 }, { "epoch": 0.34823413109171414, "grad_norm": 0.73828125, "learning_rate": 0.00017861525969146985, "loss": 0.8686, "step": 13562 }, { "epoch": 0.348259808287636, "grad_norm": 0.73828125, "learning_rate": 0.00017861250059168172, "loss": 0.9015, "step": 13563 }, { "epoch": 0.34828548548355776, "grad_norm": 0.75, "learning_rate": 0.00017860974133522596, "loss": 0.8778, "step": 13564 }, { "epoch": 0.3483111626794796, "grad_norm": 0.73046875, "learning_rate": 0.00017860698192210812, "loss": 1.0635, "step": 13565 }, { "epoch": 0.3483368398754014, "grad_norm": 0.7890625, "learning_rate": 0.00017860422235233366, "loss": 1.0633, "step": 13566 }, { "epoch": 0.3483625170713232, "grad_norm": 0.74609375, "learning_rate": 0.00017860146262590806, "loss": 0.9275, "step": 13567 }, { "epoch": 0.34838819426724504, "grad_norm": 0.8359375, "learning_rate": 0.00017859870274283685, "loss": 1.0034, "step": 13568 }, { "epoch": 0.3484138714631669, "grad_norm": 0.875, "learning_rate": 0.00017859594270312556, "loss": 1.0497, "step": 13569 }, { "epoch": 0.34843954865908866, "grad_norm": 0.8046875, "learning_rate": 0.00017859318250677963, "loss": 0.9246, "step": 13570 }, { "epoch": 0.3484652258550105, "grad_norm": 0.73828125, "learning_rate": 0.00017859042215380457, "loss": 0.7934, "step": 13571 }, { "epoch": 0.34849090305093233, "grad_norm": 0.734375, "learning_rate": 0.00017858766164420594, "loss": 0.8872, "step": 13572 }, { "epoch": 0.34851658024685417, "grad_norm": 0.83984375, "learning_rate": 0.00017858490097798918, "loss": 0.951, "step": 13573 }, { "epoch": 0.34854225744277595, "grad_norm": 0.78125, "learning_rate": 0.00017858214015515982, "loss": 0.9009, "step": 13574 }, { "epoch": 0.3485679346386978, "grad_norm": 0.83984375, "learning_rate": 0.00017857937917572332, "loss": 1.1248, "step": 13575 }, { "epoch": 0.3485936118346196, "grad_norm": 0.77734375, "learning_rate": 0.00017857661803968525, "loss": 0.9738, "step": 13576 }, { "epoch": 0.3486192890305414, "grad_norm": 0.78515625, "learning_rate": 0.00017857385674705106, "loss": 0.9211, "step": 13577 }, { "epoch": 0.34864496622646324, "grad_norm": 0.8046875, "learning_rate": 0.00017857109529782628, "loss": 0.7846, "step": 13578 }, { "epoch": 0.3486706434223851, "grad_norm": 0.765625, "learning_rate": 0.0001785683336920164, "loss": 0.9088, "step": 13579 }, { "epoch": 0.34869632061830685, "grad_norm": 0.73828125, "learning_rate": 0.00017856557192962692, "loss": 0.8554, "step": 13580 }, { "epoch": 0.3487219978142287, "grad_norm": 1.2109375, "learning_rate": 0.00017856281001066336, "loss": 0.9771, "step": 13581 }, { "epoch": 0.3487476750101505, "grad_norm": 0.79296875, "learning_rate": 0.0001785600479351312, "loss": 0.9429, "step": 13582 }, { "epoch": 0.34877335220607236, "grad_norm": 0.87890625, "learning_rate": 0.00017855728570303597, "loss": 0.9268, "step": 13583 }, { "epoch": 0.34879902940199414, "grad_norm": 0.70703125, "learning_rate": 0.00017855452331438319, "loss": 0.9504, "step": 13584 }, { "epoch": 0.348824706597916, "grad_norm": 1.7890625, "learning_rate": 0.00017855176076917833, "loss": 0.9601, "step": 13585 }, { "epoch": 0.3488503837938378, "grad_norm": 0.796875, "learning_rate": 0.0001785489980674269, "loss": 0.9272, "step": 13586 }, { "epoch": 0.3488760609897596, "grad_norm": 0.8046875, "learning_rate": 0.00017854623520913442, "loss": 1.0426, "step": 13587 }, { "epoch": 0.34890173818568143, "grad_norm": 0.76171875, "learning_rate": 0.00017854347219430636, "loss": 0.8764, "step": 13588 }, { "epoch": 0.34892741538160327, "grad_norm": 0.71484375, "learning_rate": 0.00017854070902294827, "loss": 0.8282, "step": 13589 }, { "epoch": 0.34895309257752505, "grad_norm": 0.76171875, "learning_rate": 0.00017853794569506568, "loss": 1.0456, "step": 13590 }, { "epoch": 0.3489787697734469, "grad_norm": 0.81640625, "learning_rate": 0.00017853518221066402, "loss": 0.9153, "step": 13591 }, { "epoch": 0.3490044469693687, "grad_norm": 0.703125, "learning_rate": 0.00017853241856974884, "loss": 0.8428, "step": 13592 }, { "epoch": 0.34903012416529056, "grad_norm": 0.78125, "learning_rate": 0.00017852965477232563, "loss": 0.9479, "step": 13593 }, { "epoch": 0.34905580136121234, "grad_norm": 0.796875, "learning_rate": 0.00017852689081839993, "loss": 1.0959, "step": 13594 }, { "epoch": 0.3490814785571342, "grad_norm": 0.7734375, "learning_rate": 0.00017852412670797723, "loss": 0.9783, "step": 13595 }, { "epoch": 0.349107155753056, "grad_norm": 0.71484375, "learning_rate": 0.000178521362441063, "loss": 0.8313, "step": 13596 }, { "epoch": 0.3491328329489778, "grad_norm": 0.80078125, "learning_rate": 0.00017851859801766286, "loss": 0.9023, "step": 13597 }, { "epoch": 0.3491585101448996, "grad_norm": 0.74609375, "learning_rate": 0.0001785158334377822, "loss": 0.9255, "step": 13598 }, { "epoch": 0.34918418734082146, "grad_norm": 0.81640625, "learning_rate": 0.0001785130687014266, "loss": 1.0325, "step": 13599 }, { "epoch": 0.34920986453674324, "grad_norm": 0.73828125, "learning_rate": 0.0001785103038086015, "loss": 0.9118, "step": 13600 }, { "epoch": 0.3492355417326651, "grad_norm": 0.86328125, "learning_rate": 0.00017850753875931253, "loss": 1.1273, "step": 13601 }, { "epoch": 0.3492612189285869, "grad_norm": 0.75390625, "learning_rate": 0.00017850477355356507, "loss": 0.9739, "step": 13602 }, { "epoch": 0.34928689612450875, "grad_norm": 0.8515625, "learning_rate": 0.0001785020081913647, "loss": 0.9585, "step": 13603 }, { "epoch": 0.34931257332043053, "grad_norm": 0.85546875, "learning_rate": 0.00017849924267271692, "loss": 1.1458, "step": 13604 }, { "epoch": 0.34933825051635237, "grad_norm": 0.79296875, "learning_rate": 0.00017849647699762724, "loss": 0.9106, "step": 13605 }, { "epoch": 0.3493639277122742, "grad_norm": 0.79296875, "learning_rate": 0.00017849371116610116, "loss": 1.0387, "step": 13606 }, { "epoch": 0.349389604908196, "grad_norm": 0.80859375, "learning_rate": 0.00017849094517814423, "loss": 0.9026, "step": 13607 }, { "epoch": 0.3494152821041178, "grad_norm": 0.84765625, "learning_rate": 0.0001784881790337619, "loss": 0.9777, "step": 13608 }, { "epoch": 0.34944095930003966, "grad_norm": 0.83203125, "learning_rate": 0.00017848541273295974, "loss": 0.8993, "step": 13609 }, { "epoch": 0.34946663649596144, "grad_norm": 0.8125, "learning_rate": 0.00017848264627574328, "loss": 0.9903, "step": 13610 }, { "epoch": 0.34949231369188327, "grad_norm": 0.83984375, "learning_rate": 0.0001784798796621179, "loss": 1.0058, "step": 13611 }, { "epoch": 0.3495179908878051, "grad_norm": 1.1640625, "learning_rate": 0.0001784771128920893, "loss": 0.9045, "step": 13612 }, { "epoch": 0.34954366808372694, "grad_norm": 0.76953125, "learning_rate": 0.00017847434596566286, "loss": 0.8363, "step": 13613 }, { "epoch": 0.3495693452796487, "grad_norm": 0.76171875, "learning_rate": 0.00017847157888284417, "loss": 0.9183, "step": 13614 }, { "epoch": 0.34959502247557056, "grad_norm": 0.8828125, "learning_rate": 0.00017846881164363868, "loss": 0.8645, "step": 13615 }, { "epoch": 0.3496206996714924, "grad_norm": 0.76171875, "learning_rate": 0.00017846604424805195, "loss": 0.9718, "step": 13616 }, { "epoch": 0.3496463768674142, "grad_norm": 0.75, "learning_rate": 0.00017846327669608948, "loss": 0.9146, "step": 13617 }, { "epoch": 0.349672054063336, "grad_norm": 0.8828125, "learning_rate": 0.00017846050898775678, "loss": 1.0469, "step": 13618 }, { "epoch": 0.34969773125925785, "grad_norm": 0.703125, "learning_rate": 0.00017845774112305938, "loss": 1.0776, "step": 13619 }, { "epoch": 0.34972340845517963, "grad_norm": 0.78515625, "learning_rate": 0.0001784549731020028, "loss": 0.9585, "step": 13620 }, { "epoch": 0.34974908565110147, "grad_norm": 0.80078125, "learning_rate": 0.00017845220492459252, "loss": 0.9077, "step": 13621 }, { "epoch": 0.3497747628470233, "grad_norm": 0.94921875, "learning_rate": 0.00017844943659083408, "loss": 1.0945, "step": 13622 }, { "epoch": 0.34980044004294514, "grad_norm": 0.76953125, "learning_rate": 0.00017844666810073304, "loss": 0.9258, "step": 13623 }, { "epoch": 0.3498261172388669, "grad_norm": 0.796875, "learning_rate": 0.00017844389945429482, "loss": 0.9639, "step": 13624 }, { "epoch": 0.34985179443478875, "grad_norm": 0.86328125, "learning_rate": 0.00017844113065152505, "loss": 0.8642, "step": 13625 }, { "epoch": 0.3498774716307106, "grad_norm": 0.7890625, "learning_rate": 0.00017843836169242913, "loss": 0.9101, "step": 13626 }, { "epoch": 0.34990314882663237, "grad_norm": 0.71875, "learning_rate": 0.0001784355925770127, "loss": 0.838, "step": 13627 }, { "epoch": 0.3499288260225542, "grad_norm": 0.734375, "learning_rate": 0.0001784328233052812, "loss": 0.8302, "step": 13628 }, { "epoch": 0.34995450321847604, "grad_norm": 0.77734375, "learning_rate": 0.00017843005387724014, "loss": 0.9066, "step": 13629 }, { "epoch": 0.3499801804143978, "grad_norm": 0.76953125, "learning_rate": 0.0001784272842928951, "loss": 0.8534, "step": 13630 }, { "epoch": 0.35000585761031966, "grad_norm": 0.73046875, "learning_rate": 0.00017842451455225158, "loss": 0.9178, "step": 13631 }, { "epoch": 0.3500315348062415, "grad_norm": 0.734375, "learning_rate": 0.00017842174465531504, "loss": 0.8506, "step": 13632 }, { "epoch": 0.3500572120021633, "grad_norm": 0.8125, "learning_rate": 0.0001784189746020911, "loss": 0.924, "step": 13633 }, { "epoch": 0.3500828891980851, "grad_norm": 0.78515625, "learning_rate": 0.00017841620439258517, "loss": 0.8488, "step": 13634 }, { "epoch": 0.35010856639400695, "grad_norm": 0.74609375, "learning_rate": 0.00017841343402680285, "loss": 0.9962, "step": 13635 }, { "epoch": 0.3501342435899288, "grad_norm": 0.7890625, "learning_rate": 0.00017841066350474965, "loss": 0.9562, "step": 13636 }, { "epoch": 0.35015992078585056, "grad_norm": 0.7890625, "learning_rate": 0.0001784078928264311, "loss": 0.9469, "step": 13637 }, { "epoch": 0.3501855979817724, "grad_norm": 0.7578125, "learning_rate": 0.00017840512199185265, "loss": 0.7553, "step": 13638 }, { "epoch": 0.35021127517769424, "grad_norm": 0.76171875, "learning_rate": 0.0001784023510010199, "loss": 0.9415, "step": 13639 }, { "epoch": 0.350236952373616, "grad_norm": 0.7578125, "learning_rate": 0.00017839957985393837, "loss": 0.9639, "step": 13640 }, { "epoch": 0.35026262956953785, "grad_norm": 0.8671875, "learning_rate": 0.00017839680855061352, "loss": 0.9091, "step": 13641 }, { "epoch": 0.3502883067654597, "grad_norm": 0.82421875, "learning_rate": 0.00017839403709105095, "loss": 0.9786, "step": 13642 }, { "epoch": 0.35031398396138147, "grad_norm": 0.76953125, "learning_rate": 0.0001783912654752561, "loss": 0.9418, "step": 13643 }, { "epoch": 0.3503396611573033, "grad_norm": 0.76171875, "learning_rate": 0.00017838849370323457, "loss": 1.1165, "step": 13644 }, { "epoch": 0.35036533835322514, "grad_norm": 0.796875, "learning_rate": 0.00017838572177499183, "loss": 1.0498, "step": 13645 }, { "epoch": 0.350391015549147, "grad_norm": 0.796875, "learning_rate": 0.00017838294969053348, "loss": 1.0133, "step": 13646 }, { "epoch": 0.35041669274506876, "grad_norm": 0.76953125, "learning_rate": 0.00017838017744986492, "loss": 0.881, "step": 13647 }, { "epoch": 0.3504423699409906, "grad_norm": 0.94140625, "learning_rate": 0.0001783774050529918, "loss": 0.9335, "step": 13648 }, { "epoch": 0.35046804713691243, "grad_norm": 0.78125, "learning_rate": 0.0001783746324999196, "loss": 0.9598, "step": 13649 }, { "epoch": 0.3504937243328342, "grad_norm": 0.76171875, "learning_rate": 0.00017837185979065377, "loss": 0.9234, "step": 13650 }, { "epoch": 0.35051940152875605, "grad_norm": 0.82421875, "learning_rate": 0.00017836908692519995, "loss": 1.0719, "step": 13651 }, { "epoch": 0.3505450787246779, "grad_norm": 0.7578125, "learning_rate": 0.00017836631390356363, "loss": 1.0634, "step": 13652 }, { "epoch": 0.35057075592059966, "grad_norm": 1.1015625, "learning_rate": 0.0001783635407257503, "loss": 0.9294, "step": 13653 }, { "epoch": 0.3505964331165215, "grad_norm": 0.8828125, "learning_rate": 0.0001783607673917655, "loss": 0.9547, "step": 13654 }, { "epoch": 0.35062211031244334, "grad_norm": 0.87109375, "learning_rate": 0.00017835799390161482, "loss": 0.8542, "step": 13655 }, { "epoch": 0.35064778750836517, "grad_norm": 0.87109375, "learning_rate": 0.00017835522025530368, "loss": 1.0357, "step": 13656 }, { "epoch": 0.35067346470428695, "grad_norm": 0.87109375, "learning_rate": 0.0001783524464528377, "loss": 0.9838, "step": 13657 }, { "epoch": 0.3506991419002088, "grad_norm": 0.7578125, "learning_rate": 0.00017834967249422237, "loss": 0.7185, "step": 13658 }, { "epoch": 0.3507248190961306, "grad_norm": 0.80078125, "learning_rate": 0.00017834689837946318, "loss": 0.9844, "step": 13659 }, { "epoch": 0.3507504962920524, "grad_norm": 0.78125, "learning_rate": 0.00017834412410856575, "loss": 0.9854, "step": 13660 }, { "epoch": 0.35077617348797424, "grad_norm": 0.8046875, "learning_rate": 0.0001783413496815355, "loss": 0.9783, "step": 13661 }, { "epoch": 0.3508018506838961, "grad_norm": 0.7578125, "learning_rate": 0.00017833857509837807, "loss": 0.8748, "step": 13662 }, { "epoch": 0.35082752787981786, "grad_norm": 0.6953125, "learning_rate": 0.0001783358003590989, "loss": 0.8529, "step": 13663 }, { "epoch": 0.3508532050757397, "grad_norm": 0.796875, "learning_rate": 0.00017833302546370358, "loss": 0.8536, "step": 13664 }, { "epoch": 0.35087888227166153, "grad_norm": 0.78515625, "learning_rate": 0.00017833025041219763, "loss": 0.89, "step": 13665 }, { "epoch": 0.35090455946758337, "grad_norm": 0.7578125, "learning_rate": 0.00017832747520458655, "loss": 0.9548, "step": 13666 }, { "epoch": 0.35093023666350515, "grad_norm": 0.8515625, "learning_rate": 0.0001783246998408759, "loss": 0.8511, "step": 13667 }, { "epoch": 0.350955913859427, "grad_norm": 0.8046875, "learning_rate": 0.00017832192432107116, "loss": 1.0882, "step": 13668 }, { "epoch": 0.3509815910553488, "grad_norm": 0.765625, "learning_rate": 0.00017831914864517792, "loss": 1.0154, "step": 13669 }, { "epoch": 0.3510072682512706, "grad_norm": 0.8203125, "learning_rate": 0.0001783163728132017, "loss": 0.8899, "step": 13670 }, { "epoch": 0.35103294544719243, "grad_norm": 0.76953125, "learning_rate": 0.00017831359682514802, "loss": 0.9496, "step": 13671 }, { "epoch": 0.35105862264311427, "grad_norm": 0.84765625, "learning_rate": 0.0001783108206810224, "loss": 0.9528, "step": 13672 }, { "epoch": 0.35108429983903605, "grad_norm": 1.0546875, "learning_rate": 0.0001783080443808304, "loss": 0.868, "step": 13673 }, { "epoch": 0.3511099770349579, "grad_norm": 0.82421875, "learning_rate": 0.00017830526792457754, "loss": 0.9184, "step": 13674 }, { "epoch": 0.3511356542308797, "grad_norm": 0.80859375, "learning_rate": 0.00017830249131226938, "loss": 0.969, "step": 13675 }, { "epoch": 0.35116133142680156, "grad_norm": 0.72265625, "learning_rate": 0.00017829971454391138, "loss": 0.9642, "step": 13676 }, { "epoch": 0.35118700862272334, "grad_norm": 0.69921875, "learning_rate": 0.00017829693761950918, "loss": 0.9378, "step": 13677 }, { "epoch": 0.3512126858186452, "grad_norm": 0.81640625, "learning_rate": 0.0001782941605390682, "loss": 1.0306, "step": 13678 }, { "epoch": 0.351238363014567, "grad_norm": 0.76171875, "learning_rate": 0.00017829138330259406, "loss": 0.8815, "step": 13679 }, { "epoch": 0.3512640402104888, "grad_norm": 0.8828125, "learning_rate": 0.00017828860591009225, "loss": 0.9361, "step": 13680 }, { "epoch": 0.35128971740641063, "grad_norm": 0.82421875, "learning_rate": 0.0001782858283615683, "loss": 1.0651, "step": 13681 }, { "epoch": 0.35131539460233246, "grad_norm": 0.796875, "learning_rate": 0.0001782830506570278, "loss": 0.9259, "step": 13682 }, { "epoch": 0.35134107179825425, "grad_norm": 0.76171875, "learning_rate": 0.00017828027279647625, "loss": 0.8274, "step": 13683 }, { "epoch": 0.3513667489941761, "grad_norm": 0.7578125, "learning_rate": 0.00017827749477991917, "loss": 0.9865, "step": 13684 }, { "epoch": 0.3513924261900979, "grad_norm": 0.7265625, "learning_rate": 0.00017827471660736212, "loss": 0.9585, "step": 13685 }, { "epoch": 0.35141810338601975, "grad_norm": 0.7734375, "learning_rate": 0.00017827193827881063, "loss": 0.9185, "step": 13686 }, { "epoch": 0.35144378058194153, "grad_norm": 0.796875, "learning_rate": 0.00017826915979427026, "loss": 0.8461, "step": 13687 }, { "epoch": 0.35146945777786337, "grad_norm": 0.79296875, "learning_rate": 0.0001782663811537465, "loss": 0.8148, "step": 13688 }, { "epoch": 0.3514951349737852, "grad_norm": 0.796875, "learning_rate": 0.00017826360235724493, "loss": 1.0486, "step": 13689 }, { "epoch": 0.351520812169707, "grad_norm": 0.7734375, "learning_rate": 0.00017826082340477106, "loss": 0.9516, "step": 13690 }, { "epoch": 0.3515464893656288, "grad_norm": 0.73828125, "learning_rate": 0.00017825804429633042, "loss": 0.8803, "step": 13691 }, { "epoch": 0.35157216656155066, "grad_norm": 0.796875, "learning_rate": 0.0001782552650319286, "loss": 0.9405, "step": 13692 }, { "epoch": 0.35159784375747244, "grad_norm": 0.7734375, "learning_rate": 0.00017825248561157108, "loss": 0.8584, "step": 13693 }, { "epoch": 0.3516235209533943, "grad_norm": 0.8671875, "learning_rate": 0.00017824970603526344, "loss": 0.8786, "step": 13694 }, { "epoch": 0.3516491981493161, "grad_norm": 0.78515625, "learning_rate": 0.0001782469263030112, "loss": 0.8934, "step": 13695 }, { "epoch": 0.35167487534523795, "grad_norm": 0.85546875, "learning_rate": 0.0001782441464148199, "loss": 0.999, "step": 13696 }, { "epoch": 0.35170055254115973, "grad_norm": 0.72265625, "learning_rate": 0.00017824136637069508, "loss": 0.8727, "step": 13697 }, { "epoch": 0.35172622973708156, "grad_norm": 0.8359375, "learning_rate": 0.0001782385861706423, "loss": 0.9632, "step": 13698 }, { "epoch": 0.3517519069330034, "grad_norm": 0.81640625, "learning_rate": 0.00017823580581466706, "loss": 0.9907, "step": 13699 }, { "epoch": 0.3517775841289252, "grad_norm": 0.73828125, "learning_rate": 0.00017823302530277496, "loss": 1.0201, "step": 13700 }, { "epoch": 0.351803261324847, "grad_norm": 0.8203125, "learning_rate": 0.00017823024463497147, "loss": 0.8519, "step": 13701 }, { "epoch": 0.35182893852076885, "grad_norm": 0.7421875, "learning_rate": 0.0001782274638112622, "loss": 0.9342, "step": 13702 }, { "epoch": 0.35185461571669063, "grad_norm": 0.765625, "learning_rate": 0.00017822468283165262, "loss": 0.9625, "step": 13703 }, { "epoch": 0.35188029291261247, "grad_norm": 0.77734375, "learning_rate": 0.00017822190169614836, "loss": 0.9203, "step": 13704 }, { "epoch": 0.3519059701085343, "grad_norm": 0.828125, "learning_rate": 0.0001782191204047549, "loss": 1.0705, "step": 13705 }, { "epoch": 0.35193164730445614, "grad_norm": 0.7578125, "learning_rate": 0.0001782163389574778, "loss": 0.9899, "step": 13706 }, { "epoch": 0.3519573245003779, "grad_norm": 0.80859375, "learning_rate": 0.00017821355735432262, "loss": 0.9523, "step": 13707 }, { "epoch": 0.35198300169629976, "grad_norm": 0.8046875, "learning_rate": 0.00017821077559529482, "loss": 0.8863, "step": 13708 }, { "epoch": 0.3520086788922216, "grad_norm": 0.83984375, "learning_rate": 0.00017820799368040006, "loss": 0.9517, "step": 13709 }, { "epoch": 0.3520343560881434, "grad_norm": 0.77734375, "learning_rate": 0.00017820521160964386, "loss": 1.0329, "step": 13710 }, { "epoch": 0.3520600332840652, "grad_norm": 0.859375, "learning_rate": 0.00017820242938303168, "loss": 1.147, "step": 13711 }, { "epoch": 0.35208571047998705, "grad_norm": 0.78515625, "learning_rate": 0.00017819964700056912, "loss": 1.0501, "step": 13712 }, { "epoch": 0.3521113876759088, "grad_norm": 0.72265625, "learning_rate": 0.00017819686446226177, "loss": 0.8967, "step": 13713 }, { "epoch": 0.35213706487183066, "grad_norm": 0.7578125, "learning_rate": 0.00017819408176811513, "loss": 0.8558, "step": 13714 }, { "epoch": 0.3521627420677525, "grad_norm": 0.81640625, "learning_rate": 0.00017819129891813473, "loss": 0.8899, "step": 13715 }, { "epoch": 0.35218841926367433, "grad_norm": 0.8359375, "learning_rate": 0.00017818851591232612, "loss": 0.9552, "step": 13716 }, { "epoch": 0.3522140964595961, "grad_norm": 0.74609375, "learning_rate": 0.00017818573275069487, "loss": 0.8645, "step": 13717 }, { "epoch": 0.35223977365551795, "grad_norm": 0.796875, "learning_rate": 0.00017818294943324655, "loss": 0.9818, "step": 13718 }, { "epoch": 0.3522654508514398, "grad_norm": 0.79296875, "learning_rate": 0.00017818016595998664, "loss": 1.0665, "step": 13719 }, { "epoch": 0.35229112804736157, "grad_norm": 0.80078125, "learning_rate": 0.00017817738233092073, "loss": 0.9655, "step": 13720 }, { "epoch": 0.3523168052432834, "grad_norm": 0.83203125, "learning_rate": 0.00017817459854605435, "loss": 0.9638, "step": 13721 }, { "epoch": 0.35234248243920524, "grad_norm": 0.71875, "learning_rate": 0.00017817181460539307, "loss": 0.6858, "step": 13722 }, { "epoch": 0.352368159635127, "grad_norm": 0.8046875, "learning_rate": 0.00017816903050894243, "loss": 0.9955, "step": 13723 }, { "epoch": 0.35239383683104886, "grad_norm": 0.7578125, "learning_rate": 0.00017816624625670795, "loss": 0.8776, "step": 13724 }, { "epoch": 0.3524195140269707, "grad_norm": 0.7578125, "learning_rate": 0.0001781634618486952, "loss": 1.1216, "step": 13725 }, { "epoch": 0.35244519122289253, "grad_norm": 0.83984375, "learning_rate": 0.00017816067728490978, "loss": 0.9508, "step": 13726 }, { "epoch": 0.3524708684188143, "grad_norm": 0.77734375, "learning_rate": 0.00017815789256535712, "loss": 0.8835, "step": 13727 }, { "epoch": 0.35249654561473615, "grad_norm": 0.765625, "learning_rate": 0.00017815510769004288, "loss": 0.9126, "step": 13728 }, { "epoch": 0.352522222810658, "grad_norm": 0.85546875, "learning_rate": 0.00017815232265897256, "loss": 0.897, "step": 13729 }, { "epoch": 0.35254790000657976, "grad_norm": 0.76953125, "learning_rate": 0.00017814953747215175, "loss": 1.0577, "step": 13730 }, { "epoch": 0.3525735772025016, "grad_norm": 0.84375, "learning_rate": 0.00017814675212958595, "loss": 0.9059, "step": 13731 }, { "epoch": 0.35259925439842343, "grad_norm": 0.7734375, "learning_rate": 0.00017814396663128073, "loss": 1.0041, "step": 13732 }, { "epoch": 0.3526249315943452, "grad_norm": 0.79296875, "learning_rate": 0.00017814118097724164, "loss": 0.9428, "step": 13733 }, { "epoch": 0.35265060879026705, "grad_norm": 0.74609375, "learning_rate": 0.00017813839516747423, "loss": 0.8404, "step": 13734 }, { "epoch": 0.3526762859861889, "grad_norm": 0.77734375, "learning_rate": 0.00017813560920198405, "loss": 0.9007, "step": 13735 }, { "epoch": 0.3527019631821107, "grad_norm": 0.83203125, "learning_rate": 0.0001781328230807767, "loss": 0.8389, "step": 13736 }, { "epoch": 0.3527276403780325, "grad_norm": 0.7578125, "learning_rate": 0.00017813003680385765, "loss": 0.8494, "step": 13737 }, { "epoch": 0.35275331757395434, "grad_norm": 0.796875, "learning_rate": 0.00017812725037123252, "loss": 0.9566, "step": 13738 }, { "epoch": 0.3527789947698762, "grad_norm": 0.78125, "learning_rate": 0.00017812446378290684, "loss": 0.8902, "step": 13739 }, { "epoch": 0.35280467196579796, "grad_norm": 0.8046875, "learning_rate": 0.00017812167703888613, "loss": 0.9465, "step": 13740 }, { "epoch": 0.3528303491617198, "grad_norm": 0.78125, "learning_rate": 0.00017811889013917597, "loss": 0.8316, "step": 13741 }, { "epoch": 0.35285602635764163, "grad_norm": 0.8359375, "learning_rate": 0.00017811610308378196, "loss": 0.8979, "step": 13742 }, { "epoch": 0.3528817035535634, "grad_norm": 1.3828125, "learning_rate": 0.00017811331587270961, "loss": 0.9358, "step": 13743 }, { "epoch": 0.35290738074948524, "grad_norm": 0.8671875, "learning_rate": 0.00017811052850596446, "loss": 0.9403, "step": 13744 }, { "epoch": 0.3529330579454071, "grad_norm": 0.79296875, "learning_rate": 0.00017810774098355204, "loss": 0.8876, "step": 13745 }, { "epoch": 0.3529587351413289, "grad_norm": 0.796875, "learning_rate": 0.00017810495330547802, "loss": 0.9706, "step": 13746 }, { "epoch": 0.3529844123372507, "grad_norm": 0.76171875, "learning_rate": 0.00017810216547174785, "loss": 0.8491, "step": 13747 }, { "epoch": 0.35301008953317253, "grad_norm": 0.7890625, "learning_rate": 0.00017809937748236712, "loss": 1.0951, "step": 13748 }, { "epoch": 0.35303576672909437, "grad_norm": 0.7578125, "learning_rate": 0.0001780965893373414, "loss": 0.893, "step": 13749 }, { "epoch": 0.35306144392501615, "grad_norm": 0.72265625, "learning_rate": 0.00017809380103667622, "loss": 0.942, "step": 13750 }, { "epoch": 0.353087121120938, "grad_norm": 0.828125, "learning_rate": 0.00017809101258037716, "loss": 0.8984, "step": 13751 }, { "epoch": 0.3531127983168598, "grad_norm": 0.77734375, "learning_rate": 0.00017808822396844974, "loss": 0.8435, "step": 13752 }, { "epoch": 0.3531384755127816, "grad_norm": 0.8203125, "learning_rate": 0.00017808543520089954, "loss": 1.0056, "step": 13753 }, { "epoch": 0.35316415270870344, "grad_norm": 0.7421875, "learning_rate": 0.00017808264627773216, "loss": 0.9231, "step": 13754 }, { "epoch": 0.3531898299046253, "grad_norm": 0.79296875, "learning_rate": 0.00017807985719895308, "loss": 0.9964, "step": 13755 }, { "epoch": 0.3532155071005471, "grad_norm": 0.86328125, "learning_rate": 0.0001780770679645679, "loss": 0.9715, "step": 13756 }, { "epoch": 0.3532411842964689, "grad_norm": 0.734375, "learning_rate": 0.00017807427857458217, "loss": 0.8539, "step": 13757 }, { "epoch": 0.3532668614923907, "grad_norm": 0.83203125, "learning_rate": 0.00017807148902900153, "loss": 1.0423, "step": 13758 }, { "epoch": 0.35329253868831256, "grad_norm": 0.78515625, "learning_rate": 0.0001780686993278314, "loss": 1.0158, "step": 13759 }, { "epoch": 0.35331821588423434, "grad_norm": 0.75390625, "learning_rate": 0.0001780659094710774, "loss": 0.8407, "step": 13760 }, { "epoch": 0.3533438930801562, "grad_norm": 0.8359375, "learning_rate": 0.00017806311945874512, "loss": 1.1043, "step": 13761 }, { "epoch": 0.353369570276078, "grad_norm": 0.8671875, "learning_rate": 0.00017806032929084008, "loss": 0.9338, "step": 13762 }, { "epoch": 0.3533952474719998, "grad_norm": 0.828125, "learning_rate": 0.00017805753896736785, "loss": 0.8795, "step": 13763 }, { "epoch": 0.35342092466792163, "grad_norm": 0.890625, "learning_rate": 0.000178054748488334, "loss": 1.1235, "step": 13764 }, { "epoch": 0.35344660186384347, "grad_norm": 0.78125, "learning_rate": 0.0001780519578537441, "loss": 0.9479, "step": 13765 }, { "epoch": 0.3534722790597653, "grad_norm": 0.75390625, "learning_rate": 0.0001780491670636037, "loss": 1.0455, "step": 13766 }, { "epoch": 0.3534979562556871, "grad_norm": 0.84765625, "learning_rate": 0.00017804637611791834, "loss": 1.1506, "step": 13767 }, { "epoch": 0.3535236334516089, "grad_norm": 0.80859375, "learning_rate": 0.00017804358501669363, "loss": 1.024, "step": 13768 }, { "epoch": 0.35354931064753076, "grad_norm": 0.8046875, "learning_rate": 0.00017804079375993506, "loss": 0.9711, "step": 13769 }, { "epoch": 0.35357498784345254, "grad_norm": 0.8515625, "learning_rate": 0.00017803800234764827, "loss": 0.9614, "step": 13770 }, { "epoch": 0.3536006650393744, "grad_norm": 0.8046875, "learning_rate": 0.00017803521077983879, "loss": 1.0131, "step": 13771 }, { "epoch": 0.3536263422352962, "grad_norm": 0.74609375, "learning_rate": 0.00017803241905651218, "loss": 0.9351, "step": 13772 }, { "epoch": 0.353652019431218, "grad_norm": 0.8203125, "learning_rate": 0.00017802962717767398, "loss": 0.9604, "step": 13773 }, { "epoch": 0.3536776966271398, "grad_norm": 0.77734375, "learning_rate": 0.00017802683514332984, "loss": 1.0755, "step": 13774 }, { "epoch": 0.35370337382306166, "grad_norm": 0.7265625, "learning_rate": 0.00017802404295348524, "loss": 0.9373, "step": 13775 }, { "epoch": 0.3537290510189835, "grad_norm": 0.8203125, "learning_rate": 0.00017802125060814575, "loss": 1.0202, "step": 13776 }, { "epoch": 0.3537547282149053, "grad_norm": 0.7109375, "learning_rate": 0.00017801845810731698, "loss": 1.0344, "step": 13777 }, { "epoch": 0.3537804054108271, "grad_norm": 0.75390625, "learning_rate": 0.00017801566545100442, "loss": 0.9337, "step": 13778 }, { "epoch": 0.35380608260674895, "grad_norm": 0.80078125, "learning_rate": 0.00017801287263921375, "loss": 0.828, "step": 13779 }, { "epoch": 0.35383175980267073, "grad_norm": 0.765625, "learning_rate": 0.00017801007967195045, "loss": 0.8306, "step": 13780 }, { "epoch": 0.35385743699859257, "grad_norm": 0.80859375, "learning_rate": 0.00017800728654922008, "loss": 0.9575, "step": 13781 }, { "epoch": 0.3538831141945144, "grad_norm": 0.7578125, "learning_rate": 0.00017800449327102827, "loss": 0.9021, "step": 13782 }, { "epoch": 0.3539087913904362, "grad_norm": 0.73828125, "learning_rate": 0.00017800169983738054, "loss": 0.8365, "step": 13783 }, { "epoch": 0.353934468586358, "grad_norm": 0.8984375, "learning_rate": 0.00017799890624828248, "loss": 1.0808, "step": 13784 }, { "epoch": 0.35396014578227986, "grad_norm": 0.859375, "learning_rate": 0.00017799611250373962, "loss": 0.8597, "step": 13785 }, { "epoch": 0.3539858229782017, "grad_norm": 0.86328125, "learning_rate": 0.00017799331860375757, "loss": 1.0693, "step": 13786 }, { "epoch": 0.3540115001741235, "grad_norm": 0.77734375, "learning_rate": 0.00017799052454834188, "loss": 0.8721, "step": 13787 }, { "epoch": 0.3540371773700453, "grad_norm": 0.765625, "learning_rate": 0.0001779877303374981, "loss": 0.8556, "step": 13788 }, { "epoch": 0.35406285456596714, "grad_norm": 0.84765625, "learning_rate": 0.00017798493597123184, "loss": 0.9165, "step": 13789 }, { "epoch": 0.3540885317618889, "grad_norm": 0.828125, "learning_rate": 0.00017798214144954867, "loss": 0.8659, "step": 13790 }, { "epoch": 0.35411420895781076, "grad_norm": 0.765625, "learning_rate": 0.00017797934677245408, "loss": 0.9696, "step": 13791 }, { "epoch": 0.3541398861537326, "grad_norm": 0.71875, "learning_rate": 0.00017797655193995372, "loss": 0.853, "step": 13792 }, { "epoch": 0.3541655633496544, "grad_norm": 0.8125, "learning_rate": 0.00017797375695205317, "loss": 0.8544, "step": 13793 }, { "epoch": 0.3541912405455762, "grad_norm": 0.82421875, "learning_rate": 0.00017797096180875791, "loss": 0.9273, "step": 13794 }, { "epoch": 0.35421691774149805, "grad_norm": 0.703125, "learning_rate": 0.0001779681665100736, "loss": 1.0142, "step": 13795 }, { "epoch": 0.3542425949374199, "grad_norm": 0.83984375, "learning_rate": 0.00017796537105600578, "loss": 0.961, "step": 13796 }, { "epoch": 0.35426827213334167, "grad_norm": 0.83203125, "learning_rate": 0.00017796257544656, "loss": 1.0477, "step": 13797 }, { "epoch": 0.3542939493292635, "grad_norm": 0.75, "learning_rate": 0.00017795977968174185, "loss": 0.9675, "step": 13798 }, { "epoch": 0.35431962652518534, "grad_norm": 0.86328125, "learning_rate": 0.0001779569837615569, "loss": 1.0844, "step": 13799 }, { "epoch": 0.3543453037211071, "grad_norm": 0.84765625, "learning_rate": 0.00017795418768601072, "loss": 0.9514, "step": 13800 }, { "epoch": 0.35437098091702895, "grad_norm": 0.80859375, "learning_rate": 0.00017795139145510892, "loss": 0.9306, "step": 13801 }, { "epoch": 0.3543966581129508, "grad_norm": 0.76953125, "learning_rate": 0.000177948595068857, "loss": 0.9437, "step": 13802 }, { "epoch": 0.35442233530887257, "grad_norm": 0.75, "learning_rate": 0.00017794579852726058, "loss": 0.9191, "step": 13803 }, { "epoch": 0.3544480125047944, "grad_norm": 0.828125, "learning_rate": 0.00017794300183032524, "loss": 0.9923, "step": 13804 }, { "epoch": 0.35447368970071624, "grad_norm": 0.72265625, "learning_rate": 0.00017794020497805652, "loss": 0.9319, "step": 13805 }, { "epoch": 0.3544993668966381, "grad_norm": 0.78515625, "learning_rate": 0.00017793740797046001, "loss": 0.9431, "step": 13806 }, { "epoch": 0.35452504409255986, "grad_norm": 0.88671875, "learning_rate": 0.0001779346108075413, "loss": 0.9515, "step": 13807 }, { "epoch": 0.3545507212884817, "grad_norm": 0.71875, "learning_rate": 0.00017793181348930593, "loss": 0.9512, "step": 13808 }, { "epoch": 0.35457639848440353, "grad_norm": 0.73828125, "learning_rate": 0.00017792901601575953, "loss": 0.902, "step": 13809 }, { "epoch": 0.3546020756803253, "grad_norm": 0.80078125, "learning_rate": 0.00017792621838690761, "loss": 0.9641, "step": 13810 }, { "epoch": 0.35462775287624715, "grad_norm": 0.84765625, "learning_rate": 0.0001779234206027558, "loss": 0.9379, "step": 13811 }, { "epoch": 0.354653430072169, "grad_norm": 0.72265625, "learning_rate": 0.00017792062266330962, "loss": 0.8276, "step": 13812 }, { "epoch": 0.35467910726809077, "grad_norm": 0.8515625, "learning_rate": 0.0001779178245685747, "loss": 1.0115, "step": 13813 }, { "epoch": 0.3547047844640126, "grad_norm": 0.765625, "learning_rate": 0.00017791502631855658, "loss": 1.0371, "step": 13814 }, { "epoch": 0.35473046165993444, "grad_norm": 0.7890625, "learning_rate": 0.00017791222791326087, "loss": 0.9267, "step": 13815 }, { "epoch": 0.3547561388558563, "grad_norm": 0.7734375, "learning_rate": 0.00017790942935269312, "loss": 0.9353, "step": 13816 }, { "epoch": 0.35478181605177805, "grad_norm": 0.78125, "learning_rate": 0.0001779066306368589, "loss": 0.874, "step": 13817 }, { "epoch": 0.3548074932476999, "grad_norm": 0.76953125, "learning_rate": 0.00017790383176576383, "loss": 0.8756, "step": 13818 }, { "epoch": 0.3548331704436217, "grad_norm": 0.73828125, "learning_rate": 0.00017790103273941345, "loss": 0.9983, "step": 13819 }, { "epoch": 0.3548588476395435, "grad_norm": 0.75390625, "learning_rate": 0.00017789823355781334, "loss": 0.977, "step": 13820 }, { "epoch": 0.35488452483546534, "grad_norm": 0.82421875, "learning_rate": 0.0001778954342209691, "loss": 0.9782, "step": 13821 }, { "epoch": 0.3549102020313872, "grad_norm": 0.84375, "learning_rate": 0.00017789263472888628, "loss": 1.1334, "step": 13822 }, { "epoch": 0.35493587922730896, "grad_norm": 0.8359375, "learning_rate": 0.0001778898350815705, "loss": 1.0268, "step": 13823 }, { "epoch": 0.3549615564232308, "grad_norm": 0.79296875, "learning_rate": 0.00017788703527902728, "loss": 0.8679, "step": 13824 }, { "epoch": 0.35498723361915263, "grad_norm": 0.76953125, "learning_rate": 0.00017788423532126224, "loss": 0.8789, "step": 13825 }, { "epoch": 0.35501291081507447, "grad_norm": 0.7265625, "learning_rate": 0.00017788143520828097, "loss": 0.8545, "step": 13826 }, { "epoch": 0.35503858801099625, "grad_norm": 0.83203125, "learning_rate": 0.00017787863494008902, "loss": 0.9348, "step": 13827 }, { "epoch": 0.3550642652069181, "grad_norm": 0.86328125, "learning_rate": 0.000177875834516692, "loss": 1.1236, "step": 13828 }, { "epoch": 0.3550899424028399, "grad_norm": 0.796875, "learning_rate": 0.0001778730339380955, "loss": 0.9601, "step": 13829 }, { "epoch": 0.3551156195987617, "grad_norm": 0.79296875, "learning_rate": 0.00017787023320430502, "loss": 0.8766, "step": 13830 }, { "epoch": 0.35514129679468354, "grad_norm": 0.7890625, "learning_rate": 0.00017786743231532626, "loss": 1.0282, "step": 13831 }, { "epoch": 0.3551669739906054, "grad_norm": 0.80859375, "learning_rate": 0.00017786463127116467, "loss": 0.8998, "step": 13832 }, { "epoch": 0.35519265118652715, "grad_norm": 0.8203125, "learning_rate": 0.00017786183007182594, "loss": 0.968, "step": 13833 }, { "epoch": 0.355218328382449, "grad_norm": 0.73046875, "learning_rate": 0.00017785902871731563, "loss": 0.8629, "step": 13834 }, { "epoch": 0.3552440055783708, "grad_norm": 0.74609375, "learning_rate": 0.0001778562272076393, "loss": 0.8201, "step": 13835 }, { "epoch": 0.35526968277429266, "grad_norm": 0.81640625, "learning_rate": 0.00017785342554280253, "loss": 1.1522, "step": 13836 }, { "epoch": 0.35529535997021444, "grad_norm": 0.7890625, "learning_rate": 0.00017785062372281093, "loss": 0.9903, "step": 13837 }, { "epoch": 0.3553210371661363, "grad_norm": 0.8359375, "learning_rate": 0.00017784782174767004, "loss": 0.9475, "step": 13838 }, { "epoch": 0.3553467143620581, "grad_norm": 0.84375, "learning_rate": 0.00017784501961738553, "loss": 0.9633, "step": 13839 }, { "epoch": 0.3553723915579799, "grad_norm": 0.875, "learning_rate": 0.0001778422173319629, "loss": 1.1138, "step": 13840 }, { "epoch": 0.35539806875390173, "grad_norm": 0.890625, "learning_rate": 0.00017783941489140774, "loss": 1.0189, "step": 13841 }, { "epoch": 0.35542374594982357, "grad_norm": 0.84765625, "learning_rate": 0.00017783661229572566, "loss": 1.0346, "step": 13842 }, { "epoch": 0.35544942314574535, "grad_norm": 0.84375, "learning_rate": 0.00017783380954492227, "loss": 1.1053, "step": 13843 }, { "epoch": 0.3554751003416672, "grad_norm": 0.75390625, "learning_rate": 0.0001778310066390031, "loss": 0.7761, "step": 13844 }, { "epoch": 0.355500777537589, "grad_norm": 0.79296875, "learning_rate": 0.00017782820357797378, "loss": 1.0864, "step": 13845 }, { "epoch": 0.3555264547335108, "grad_norm": 0.78125, "learning_rate": 0.0001778254003618399, "loss": 0.9884, "step": 13846 }, { "epoch": 0.35555213192943264, "grad_norm": 1.03125, "learning_rate": 0.000177822596990607, "loss": 1.026, "step": 13847 }, { "epoch": 0.35557780912535447, "grad_norm": 0.8125, "learning_rate": 0.0001778197934642807, "loss": 0.9087, "step": 13848 }, { "epoch": 0.3556034863212763, "grad_norm": 0.83984375, "learning_rate": 0.00017781698978286658, "loss": 1.0304, "step": 13849 }, { "epoch": 0.3556291635171981, "grad_norm": 0.7734375, "learning_rate": 0.00017781418594637021, "loss": 0.9998, "step": 13850 }, { "epoch": 0.3556548407131199, "grad_norm": 0.80078125, "learning_rate": 0.00017781138195479724, "loss": 0.9267, "step": 13851 }, { "epoch": 0.35568051790904176, "grad_norm": 0.78125, "learning_rate": 0.0001778085778081532, "loss": 0.8858, "step": 13852 }, { "epoch": 0.35570619510496354, "grad_norm": 0.76171875, "learning_rate": 0.00017780577350644364, "loss": 1.0219, "step": 13853 }, { "epoch": 0.3557318723008854, "grad_norm": 0.76953125, "learning_rate": 0.00017780296904967426, "loss": 0.7934, "step": 13854 }, { "epoch": 0.3557575494968072, "grad_norm": 0.69921875, "learning_rate": 0.00017780016443785058, "loss": 0.8806, "step": 13855 }, { "epoch": 0.355783226692729, "grad_norm": 0.81640625, "learning_rate": 0.0001777973596709782, "loss": 0.8177, "step": 13856 }, { "epoch": 0.35580890388865083, "grad_norm": 0.84375, "learning_rate": 0.0001777945547490627, "loss": 0.9253, "step": 13857 }, { "epoch": 0.35583458108457267, "grad_norm": 0.80859375, "learning_rate": 0.0001777917496721097, "loss": 0.9419, "step": 13858 }, { "epoch": 0.3558602582804945, "grad_norm": 0.68359375, "learning_rate": 0.00017778894444012475, "loss": 0.8582, "step": 13859 }, { "epoch": 0.3558859354764163, "grad_norm": 0.7265625, "learning_rate": 0.0001777861390531135, "loss": 1.0751, "step": 13860 }, { "epoch": 0.3559116126723381, "grad_norm": 0.86328125, "learning_rate": 0.00017778333351108142, "loss": 0.9803, "step": 13861 }, { "epoch": 0.35593728986825995, "grad_norm": 0.83984375, "learning_rate": 0.00017778052781403423, "loss": 0.9613, "step": 13862 }, { "epoch": 0.35596296706418173, "grad_norm": 0.8203125, "learning_rate": 0.00017777772196197748, "loss": 0.805, "step": 13863 }, { "epoch": 0.35598864426010357, "grad_norm": 0.87890625, "learning_rate": 0.00017777491595491674, "loss": 0.976, "step": 13864 }, { "epoch": 0.3560143214560254, "grad_norm": 0.703125, "learning_rate": 0.00017777210979285764, "loss": 0.8854, "step": 13865 }, { "epoch": 0.3560399986519472, "grad_norm": 0.83203125, "learning_rate": 0.00017776930347580572, "loss": 0.9285, "step": 13866 }, { "epoch": 0.356065675847869, "grad_norm": 0.8046875, "learning_rate": 0.00017776649700376664, "loss": 0.94, "step": 13867 }, { "epoch": 0.35609135304379086, "grad_norm": 0.76953125, "learning_rate": 0.00017776369037674595, "loss": 0.7742, "step": 13868 }, { "epoch": 0.3561170302397127, "grad_norm": 0.76171875, "learning_rate": 0.0001777608835947492, "loss": 0.9345, "step": 13869 }, { "epoch": 0.3561427074356345, "grad_norm": 0.75390625, "learning_rate": 0.0001777580766577821, "loss": 0.9283, "step": 13870 }, { "epoch": 0.3561683846315563, "grad_norm": 0.8125, "learning_rate": 0.00017775526956585012, "loss": 0.8859, "step": 13871 }, { "epoch": 0.35619406182747815, "grad_norm": 1.09375, "learning_rate": 0.00017775246231895896, "loss": 0.9769, "step": 13872 }, { "epoch": 0.35621973902339993, "grad_norm": 0.8125, "learning_rate": 0.00017774965491711413, "loss": 0.8587, "step": 13873 }, { "epoch": 0.35624541621932176, "grad_norm": 0.77734375, "learning_rate": 0.00017774684736032126, "loss": 0.9903, "step": 13874 }, { "epoch": 0.3562710934152436, "grad_norm": 0.796875, "learning_rate": 0.00017774403964858598, "loss": 0.8921, "step": 13875 }, { "epoch": 0.3562967706111654, "grad_norm": 0.7734375, "learning_rate": 0.00017774123178191382, "loss": 0.995, "step": 13876 }, { "epoch": 0.3563224478070872, "grad_norm": 0.765625, "learning_rate": 0.0001777384237603104, "loss": 1.0105, "step": 13877 }, { "epoch": 0.35634812500300905, "grad_norm": 0.8203125, "learning_rate": 0.00017773561558378137, "loss": 0.9028, "step": 13878 }, { "epoch": 0.3563738021989309, "grad_norm": 0.80078125, "learning_rate": 0.00017773280725233223, "loss": 0.8881, "step": 13879 }, { "epoch": 0.35639947939485267, "grad_norm": 0.8359375, "learning_rate": 0.00017772999876596864, "loss": 1.0374, "step": 13880 }, { "epoch": 0.3564251565907745, "grad_norm": 0.82421875, "learning_rate": 0.0001777271901246962, "loss": 1.0844, "step": 13881 }, { "epoch": 0.35645083378669634, "grad_norm": 0.8515625, "learning_rate": 0.0001777243813285205, "loss": 0.9283, "step": 13882 }, { "epoch": 0.3564765109826181, "grad_norm": 0.78515625, "learning_rate": 0.0001777215723774471, "loss": 0.9656, "step": 13883 }, { "epoch": 0.35650218817853996, "grad_norm": 1.2578125, "learning_rate": 0.00017771876327148166, "loss": 0.981, "step": 13884 }, { "epoch": 0.3565278653744618, "grad_norm": 0.7265625, "learning_rate": 0.0001777159540106297, "loss": 0.8223, "step": 13885 }, { "epoch": 0.3565535425703836, "grad_norm": 0.859375, "learning_rate": 0.00017771314459489687, "loss": 1.0344, "step": 13886 }, { "epoch": 0.3565792197663054, "grad_norm": 0.7734375, "learning_rate": 0.0001777103350242888, "loss": 0.8739, "step": 13887 }, { "epoch": 0.35660489696222725, "grad_norm": 0.8671875, "learning_rate": 0.00017770752529881105, "loss": 0.8899, "step": 13888 }, { "epoch": 0.3566305741581491, "grad_norm": 1.1640625, "learning_rate": 0.00017770471541846917, "loss": 0.9013, "step": 13889 }, { "epoch": 0.35665625135407086, "grad_norm": 0.7890625, "learning_rate": 0.00017770190538326887, "loss": 0.9609, "step": 13890 }, { "epoch": 0.3566819285499927, "grad_norm": 0.9140625, "learning_rate": 0.00017769909519321566, "loss": 1.0836, "step": 13891 }, { "epoch": 0.35670760574591454, "grad_norm": 0.81640625, "learning_rate": 0.00017769628484831518, "loss": 0.9468, "step": 13892 }, { "epoch": 0.3567332829418363, "grad_norm": 0.77734375, "learning_rate": 0.00017769347434857302, "loss": 0.8576, "step": 13893 }, { "epoch": 0.35675896013775815, "grad_norm": 0.80859375, "learning_rate": 0.0001776906636939948, "loss": 1.0476, "step": 13894 }, { "epoch": 0.35678463733368, "grad_norm": 0.796875, "learning_rate": 0.00017768785288458606, "loss": 0.9533, "step": 13895 }, { "epoch": 0.35681031452960177, "grad_norm": 0.76171875, "learning_rate": 0.0001776850419203525, "loss": 0.9002, "step": 13896 }, { "epoch": 0.3568359917255236, "grad_norm": 0.76953125, "learning_rate": 0.00017768223080129962, "loss": 0.8632, "step": 13897 }, { "epoch": 0.35686166892144544, "grad_norm": 1.0390625, "learning_rate": 0.00017767941952743312, "loss": 0.8909, "step": 13898 }, { "epoch": 0.3568873461173673, "grad_norm": 0.84765625, "learning_rate": 0.00017767660809875853, "loss": 0.8952, "step": 13899 }, { "epoch": 0.35691302331328906, "grad_norm": 1.0078125, "learning_rate": 0.00017767379651528146, "loss": 0.9662, "step": 13900 }, { "epoch": 0.3569387005092109, "grad_norm": 1.0390625, "learning_rate": 0.00017767098477700755, "loss": 0.9991, "step": 13901 }, { "epoch": 0.35696437770513273, "grad_norm": 1.1015625, "learning_rate": 0.00017766817288394237, "loss": 0.8607, "step": 13902 }, { "epoch": 0.3569900549010545, "grad_norm": 0.76171875, "learning_rate": 0.00017766536083609156, "loss": 0.8252, "step": 13903 }, { "epoch": 0.35701573209697635, "grad_norm": 0.80078125, "learning_rate": 0.0001776625486334607, "loss": 1.0095, "step": 13904 }, { "epoch": 0.3570414092928982, "grad_norm": 0.78515625, "learning_rate": 0.00017765973627605536, "loss": 0.9418, "step": 13905 }, { "epoch": 0.35706708648881996, "grad_norm": 0.82421875, "learning_rate": 0.00017765692376388122, "loss": 1.0198, "step": 13906 }, { "epoch": 0.3570927636847418, "grad_norm": 0.70703125, "learning_rate": 0.0001776541110969438, "loss": 0.9803, "step": 13907 }, { "epoch": 0.35711844088066363, "grad_norm": 0.76171875, "learning_rate": 0.0001776512982752488, "loss": 0.9965, "step": 13908 }, { "epoch": 0.35714411807658547, "grad_norm": 0.796875, "learning_rate": 0.00017764848529880177, "loss": 1.0259, "step": 13909 }, { "epoch": 0.35716979527250725, "grad_norm": 0.91796875, "learning_rate": 0.00017764567216760831, "loss": 0.9504, "step": 13910 }, { "epoch": 0.3571954724684291, "grad_norm": 0.79296875, "learning_rate": 0.00017764285888167402, "loss": 0.8087, "step": 13911 }, { "epoch": 0.3572211496643509, "grad_norm": 0.94140625, "learning_rate": 0.00017764004544100454, "loss": 1.0733, "step": 13912 }, { "epoch": 0.3572468268602727, "grad_norm": 1.46875, "learning_rate": 0.00017763723184560548, "loss": 1.0632, "step": 13913 }, { "epoch": 0.35727250405619454, "grad_norm": 0.76953125, "learning_rate": 0.0001776344180954824, "loss": 0.896, "step": 13914 }, { "epoch": 0.3572981812521164, "grad_norm": 0.84375, "learning_rate": 0.00017763160419064097, "loss": 0.9717, "step": 13915 }, { "epoch": 0.35732385844803816, "grad_norm": 0.7578125, "learning_rate": 0.00017762879013108674, "loss": 1.0469, "step": 13916 }, { "epoch": 0.35734953564396, "grad_norm": 0.81640625, "learning_rate": 0.00017762597591682537, "loss": 0.878, "step": 13917 }, { "epoch": 0.35737521283988183, "grad_norm": 0.86328125, "learning_rate": 0.00017762316154786243, "loss": 1.0468, "step": 13918 }, { "epoch": 0.35740089003580366, "grad_norm": 0.94140625, "learning_rate": 0.00017762034702420353, "loss": 1.0165, "step": 13919 }, { "epoch": 0.35742656723172544, "grad_norm": 0.81640625, "learning_rate": 0.0001776175323458543, "loss": 0.8842, "step": 13920 }, { "epoch": 0.3574522444276473, "grad_norm": 0.7734375, "learning_rate": 0.00017761471751282033, "loss": 0.9769, "step": 13921 }, { "epoch": 0.3574779216235691, "grad_norm": 0.82421875, "learning_rate": 0.00017761190252510724, "loss": 1.0256, "step": 13922 }, { "epoch": 0.3575035988194909, "grad_norm": 0.79296875, "learning_rate": 0.00017760908738272065, "loss": 0.9258, "step": 13923 }, { "epoch": 0.35752927601541273, "grad_norm": 0.8046875, "learning_rate": 0.00017760627208566617, "loss": 1.0937, "step": 13924 }, { "epoch": 0.35755495321133457, "grad_norm": 0.8984375, "learning_rate": 0.00017760345663394938, "loss": 0.8857, "step": 13925 }, { "epoch": 0.35758063040725635, "grad_norm": 0.8515625, "learning_rate": 0.0001776006410275759, "loss": 0.9947, "step": 13926 }, { "epoch": 0.3576063076031782, "grad_norm": 0.82421875, "learning_rate": 0.00017759782526655135, "loss": 0.9427, "step": 13927 }, { "epoch": 0.3576319847991, "grad_norm": 0.77734375, "learning_rate": 0.00017759500935088135, "loss": 1.0549, "step": 13928 }, { "epoch": 0.35765766199502186, "grad_norm": 0.86328125, "learning_rate": 0.00017759219328057152, "loss": 0.9992, "step": 13929 }, { "epoch": 0.35768333919094364, "grad_norm": 0.8203125, "learning_rate": 0.00017758937705562747, "loss": 0.923, "step": 13930 }, { "epoch": 0.3577090163868655, "grad_norm": 0.73828125, "learning_rate": 0.00017758656067605477, "loss": 0.9347, "step": 13931 }, { "epoch": 0.3577346935827873, "grad_norm": 0.78515625, "learning_rate": 0.00017758374414185906, "loss": 0.9571, "step": 13932 }, { "epoch": 0.3577603707787091, "grad_norm": 0.94921875, "learning_rate": 0.000177580927453046, "loss": 1.0714, "step": 13933 }, { "epoch": 0.3577860479746309, "grad_norm": 0.73046875, "learning_rate": 0.00017757811060962114, "loss": 0.9337, "step": 13934 }, { "epoch": 0.35781172517055276, "grad_norm": 0.875, "learning_rate": 0.00017757529361159006, "loss": 0.846, "step": 13935 }, { "epoch": 0.35783740236647454, "grad_norm": 0.796875, "learning_rate": 0.0001775724764589585, "loss": 0.9458, "step": 13936 }, { "epoch": 0.3578630795623964, "grad_norm": 0.74609375, "learning_rate": 0.00017756965915173196, "loss": 0.9019, "step": 13937 }, { "epoch": 0.3578887567583182, "grad_norm": 0.734375, "learning_rate": 0.0001775668416899161, "loss": 0.9384, "step": 13938 }, { "epoch": 0.35791443395424005, "grad_norm": 0.78125, "learning_rate": 0.00017756402407351655, "loss": 0.9582, "step": 13939 }, { "epoch": 0.35794011115016183, "grad_norm": 1.3359375, "learning_rate": 0.0001775612063025389, "loss": 0.8645, "step": 13940 }, { "epoch": 0.35796578834608367, "grad_norm": 0.77734375, "learning_rate": 0.00017755838837698877, "loss": 1.013, "step": 13941 }, { "epoch": 0.3579914655420055, "grad_norm": 0.81640625, "learning_rate": 0.00017755557029687177, "loss": 0.8721, "step": 13942 }, { "epoch": 0.3580171427379273, "grad_norm": 0.828125, "learning_rate": 0.00017755275206219354, "loss": 1.0379, "step": 13943 }, { "epoch": 0.3580428199338491, "grad_norm": 0.80859375, "learning_rate": 0.00017754993367295966, "loss": 1.1322, "step": 13944 }, { "epoch": 0.35806849712977096, "grad_norm": 0.765625, "learning_rate": 0.0001775471151291758, "loss": 0.9269, "step": 13945 }, { "epoch": 0.35809417432569274, "grad_norm": 0.81640625, "learning_rate": 0.0001775442964308475, "loss": 0.8503, "step": 13946 }, { "epoch": 0.3581198515216146, "grad_norm": 0.77734375, "learning_rate": 0.00017754147757798044, "loss": 1.1703, "step": 13947 }, { "epoch": 0.3581455287175364, "grad_norm": 0.8203125, "learning_rate": 0.00017753865857058024, "loss": 0.9669, "step": 13948 }, { "epoch": 0.35817120591345825, "grad_norm": 0.8046875, "learning_rate": 0.00017753583940865246, "loss": 1.0226, "step": 13949 }, { "epoch": 0.35819688310938, "grad_norm": 0.76953125, "learning_rate": 0.00017753302009220279, "loss": 0.9051, "step": 13950 }, { "epoch": 0.35822256030530186, "grad_norm": 0.80078125, "learning_rate": 0.0001775302006212368, "loss": 0.9339, "step": 13951 }, { "epoch": 0.3582482375012237, "grad_norm": 0.83203125, "learning_rate": 0.0001775273809957601, "loss": 1.1391, "step": 13952 }, { "epoch": 0.3582739146971455, "grad_norm": 0.7890625, "learning_rate": 0.00017752456121577836, "loss": 1.068, "step": 13953 }, { "epoch": 0.3582995918930673, "grad_norm": 0.7734375, "learning_rate": 0.0001775217412812972, "loss": 0.8812, "step": 13954 }, { "epoch": 0.35832526908898915, "grad_norm": 0.80859375, "learning_rate": 0.00017751892119232215, "loss": 1.0232, "step": 13955 }, { "epoch": 0.35835094628491093, "grad_norm": 0.78125, "learning_rate": 0.00017751610094885893, "loss": 0.9354, "step": 13956 }, { "epoch": 0.35837662348083277, "grad_norm": 0.7734375, "learning_rate": 0.00017751328055091311, "loss": 0.8501, "step": 13957 }, { "epoch": 0.3584023006767546, "grad_norm": 0.78515625, "learning_rate": 0.00017751045999849033, "loss": 0.9219, "step": 13958 }, { "epoch": 0.35842797787267644, "grad_norm": 0.82421875, "learning_rate": 0.00017750763929159623, "loss": 0.9642, "step": 13959 }, { "epoch": 0.3584536550685982, "grad_norm": 0.77734375, "learning_rate": 0.00017750481843023636, "loss": 1.0441, "step": 13960 }, { "epoch": 0.35847933226452006, "grad_norm": 0.8515625, "learning_rate": 0.0001775019974144164, "loss": 0.954, "step": 13961 }, { "epoch": 0.3585050094604419, "grad_norm": 0.80859375, "learning_rate": 0.000177499176244142, "loss": 1.0355, "step": 13962 }, { "epoch": 0.3585306866563637, "grad_norm": 0.79296875, "learning_rate": 0.0001774963549194187, "loss": 0.9078, "step": 13963 }, { "epoch": 0.3585563638522855, "grad_norm": 0.74609375, "learning_rate": 0.00017749353344025218, "loss": 0.9549, "step": 13964 }, { "epoch": 0.35858204104820735, "grad_norm": 0.859375, "learning_rate": 0.00017749071180664803, "loss": 1.0008, "step": 13965 }, { "epoch": 0.3586077182441291, "grad_norm": 0.77734375, "learning_rate": 0.00017748789001861191, "loss": 1.0592, "step": 13966 }, { "epoch": 0.35863339544005096, "grad_norm": 0.8515625, "learning_rate": 0.0001774850680761494, "loss": 0.8687, "step": 13967 }, { "epoch": 0.3586590726359728, "grad_norm": 0.78515625, "learning_rate": 0.00017748224597926616, "loss": 0.8485, "step": 13968 }, { "epoch": 0.35868474983189463, "grad_norm": 0.73046875, "learning_rate": 0.00017747942372796783, "loss": 0.8593, "step": 13969 }, { "epoch": 0.3587104270278164, "grad_norm": 0.7890625, "learning_rate": 0.00017747660132225998, "loss": 0.954, "step": 13970 }, { "epoch": 0.35873610422373825, "grad_norm": 0.7734375, "learning_rate": 0.0001774737787621483, "loss": 0.8758, "step": 13971 }, { "epoch": 0.3587617814196601, "grad_norm": 0.82421875, "learning_rate": 0.00017747095604763832, "loss": 0.989, "step": 13972 }, { "epoch": 0.35878745861558187, "grad_norm": 0.84375, "learning_rate": 0.00017746813317873574, "loss": 0.9623, "step": 13973 }, { "epoch": 0.3588131358115037, "grad_norm": 0.9453125, "learning_rate": 0.0001774653101554462, "loss": 0.9726, "step": 13974 }, { "epoch": 0.35883881300742554, "grad_norm": 0.79296875, "learning_rate": 0.00017746248697777525, "loss": 0.8588, "step": 13975 }, { "epoch": 0.3588644902033473, "grad_norm": 1.1875, "learning_rate": 0.00017745966364572858, "loss": 0.94, "step": 13976 }, { "epoch": 0.35889016739926916, "grad_norm": 0.859375, "learning_rate": 0.00017745684015931182, "loss": 0.8749, "step": 13977 }, { "epoch": 0.358915844595191, "grad_norm": 0.77734375, "learning_rate": 0.00017745401651853056, "loss": 0.9478, "step": 13978 }, { "epoch": 0.3589415217911128, "grad_norm": 0.82421875, "learning_rate": 0.00017745119272339043, "loss": 0.9092, "step": 13979 }, { "epoch": 0.3589671989870346, "grad_norm": 0.83203125, "learning_rate": 0.00017744836877389708, "loss": 0.787, "step": 13980 }, { "epoch": 0.35899287618295644, "grad_norm": 0.80078125, "learning_rate": 0.0001774455446700561, "loss": 0.8209, "step": 13981 }, { "epoch": 0.3590185533788783, "grad_norm": 0.83984375, "learning_rate": 0.00017744272041187315, "loss": 0.9978, "step": 13982 }, { "epoch": 0.35904423057480006, "grad_norm": 0.71484375, "learning_rate": 0.0001774398959993539, "loss": 0.8733, "step": 13983 }, { "epoch": 0.3590699077707219, "grad_norm": 0.7734375, "learning_rate": 0.0001774370714325039, "loss": 0.9339, "step": 13984 }, { "epoch": 0.35909558496664373, "grad_norm": 0.734375, "learning_rate": 0.0001774342467113288, "loss": 1.0327, "step": 13985 }, { "epoch": 0.3591212621625655, "grad_norm": 0.79296875, "learning_rate": 0.00017743142183583425, "loss": 0.8941, "step": 13986 }, { "epoch": 0.35914693935848735, "grad_norm": 0.7890625, "learning_rate": 0.0001774285968060259, "loss": 0.9673, "step": 13987 }, { "epoch": 0.3591726165544092, "grad_norm": 0.76171875, "learning_rate": 0.0001774257716219093, "loss": 0.9062, "step": 13988 }, { "epoch": 0.359198293750331, "grad_norm": 0.8046875, "learning_rate": 0.00017742294628349017, "loss": 0.8092, "step": 13989 }, { "epoch": 0.3592239709462528, "grad_norm": 0.78515625, "learning_rate": 0.0001774201207907741, "loss": 0.9641, "step": 13990 }, { "epoch": 0.35924964814217464, "grad_norm": 0.8203125, "learning_rate": 0.0001774172951437667, "loss": 0.9328, "step": 13991 }, { "epoch": 0.3592753253380965, "grad_norm": 0.88671875, "learning_rate": 0.00017741446934247364, "loss": 0.9976, "step": 13992 }, { "epoch": 0.35930100253401825, "grad_norm": 0.8046875, "learning_rate": 0.00017741164338690052, "loss": 1.0381, "step": 13993 }, { "epoch": 0.3593266797299401, "grad_norm": 0.765625, "learning_rate": 0.000177408817277053, "loss": 0.9131, "step": 13994 }, { "epoch": 0.3593523569258619, "grad_norm": 0.828125, "learning_rate": 0.0001774059910129367, "loss": 0.8908, "step": 13995 }, { "epoch": 0.3593780341217837, "grad_norm": 0.75390625, "learning_rate": 0.00017740316459455727, "loss": 0.9489, "step": 13996 }, { "epoch": 0.35940371131770554, "grad_norm": 0.7734375, "learning_rate": 0.0001774003380219203, "loss": 0.9511, "step": 13997 }, { "epoch": 0.3594293885136274, "grad_norm": 0.7421875, "learning_rate": 0.00017739751129503142, "loss": 0.9825, "step": 13998 }, { "epoch": 0.3594550657095492, "grad_norm": 0.79296875, "learning_rate": 0.00017739468441389634, "loss": 0.9537, "step": 13999 }, { "epoch": 0.359480742905471, "grad_norm": 0.765625, "learning_rate": 0.00017739185737852063, "loss": 0.9151, "step": 14000 }, { "epoch": 0.359480742905471, "eval_loss": 0.9459433555603027, "eval_model_preparation_time": 0.0065, "eval_runtime": 409.3412, "eval_samples_per_second": 24.429, "eval_steps_per_second": 0.765, "step": 14000 }, { "epoch": 0.35950642010139283, "grad_norm": 0.79296875, "learning_rate": 0.00017738903018890993, "loss": 0.9163, "step": 14001 }, { "epoch": 0.35953209729731467, "grad_norm": 0.75390625, "learning_rate": 0.0001773862028450699, "loss": 1.0208, "step": 14002 }, { "epoch": 0.35955777449323645, "grad_norm": 0.734375, "learning_rate": 0.00017738337534700614, "loss": 1.0114, "step": 14003 }, { "epoch": 0.3595834516891583, "grad_norm": 0.7109375, "learning_rate": 0.0001773805476947243, "loss": 0.83, "step": 14004 }, { "epoch": 0.3596091288850801, "grad_norm": 0.77734375, "learning_rate": 0.00017737771988823005, "loss": 0.9798, "step": 14005 }, { "epoch": 0.3596348060810019, "grad_norm": 0.7578125, "learning_rate": 0.00017737489192752896, "loss": 0.908, "step": 14006 }, { "epoch": 0.35966048327692374, "grad_norm": 0.76171875, "learning_rate": 0.0001773720638126267, "loss": 0.9574, "step": 14007 }, { "epoch": 0.3596861604728456, "grad_norm": 0.77734375, "learning_rate": 0.00017736923554352894, "loss": 0.8844, "step": 14008 }, { "epoch": 0.3597118376687674, "grad_norm": 0.71875, "learning_rate": 0.00017736640712024126, "loss": 0.9067, "step": 14009 }, { "epoch": 0.3597375148646892, "grad_norm": 0.734375, "learning_rate": 0.00017736357854276932, "loss": 0.8635, "step": 14010 }, { "epoch": 0.359763192060611, "grad_norm": 0.76171875, "learning_rate": 0.00017736074981111873, "loss": 0.8563, "step": 14011 }, { "epoch": 0.35978886925653286, "grad_norm": 0.76953125, "learning_rate": 0.0001773579209252952, "loss": 0.9579, "step": 14012 }, { "epoch": 0.35981454645245464, "grad_norm": 1.03125, "learning_rate": 0.0001773550918853043, "loss": 1.0, "step": 14013 }, { "epoch": 0.3598402236483765, "grad_norm": 0.77734375, "learning_rate": 0.0001773522626911517, "loss": 0.8497, "step": 14014 }, { "epoch": 0.3598659008442983, "grad_norm": 0.734375, "learning_rate": 0.000177349433342843, "loss": 0.9691, "step": 14015 }, { "epoch": 0.3598915780402201, "grad_norm": 0.77734375, "learning_rate": 0.00017734660384038388, "loss": 0.8864, "step": 14016 }, { "epoch": 0.35991725523614193, "grad_norm": 0.86328125, "learning_rate": 0.00017734377418378, "loss": 0.9752, "step": 14017 }, { "epoch": 0.35994293243206377, "grad_norm": 0.78515625, "learning_rate": 0.0001773409443730369, "loss": 0.9233, "step": 14018 }, { "epoch": 0.3599686096279856, "grad_norm": 0.8125, "learning_rate": 0.00017733811440816033, "loss": 0.8993, "step": 14019 }, { "epoch": 0.3599942868239074, "grad_norm": 0.87109375, "learning_rate": 0.0001773352842891559, "loss": 0.9025, "step": 14020 }, { "epoch": 0.3600199640198292, "grad_norm": 0.71484375, "learning_rate": 0.0001773324540160292, "loss": 0.9005, "step": 14021 }, { "epoch": 0.36004564121575106, "grad_norm": 0.7734375, "learning_rate": 0.00017732962358878588, "loss": 0.9378, "step": 14022 }, { "epoch": 0.36007131841167284, "grad_norm": 0.8671875, "learning_rate": 0.00017732679300743164, "loss": 0.9072, "step": 14023 }, { "epoch": 0.36009699560759467, "grad_norm": 0.84375, "learning_rate": 0.0001773239622719721, "loss": 1.0257, "step": 14024 }, { "epoch": 0.3601226728035165, "grad_norm": 0.78125, "learning_rate": 0.00017732113138241288, "loss": 1.0487, "step": 14025 }, { "epoch": 0.3601483499994383, "grad_norm": 0.77734375, "learning_rate": 0.0001773183003387596, "loss": 1.0028, "step": 14026 }, { "epoch": 0.3601740271953601, "grad_norm": 0.7734375, "learning_rate": 0.00017731546914101796, "loss": 0.9562, "step": 14027 }, { "epoch": 0.36019970439128196, "grad_norm": 0.765625, "learning_rate": 0.00017731263778919355, "loss": 0.915, "step": 14028 }, { "epoch": 0.3602253815872038, "grad_norm": 0.890625, "learning_rate": 0.00017730980628329206, "loss": 0.9521, "step": 14029 }, { "epoch": 0.3602510587831256, "grad_norm": 0.77734375, "learning_rate": 0.0001773069746233191, "loss": 0.8413, "step": 14030 }, { "epoch": 0.3602767359790474, "grad_norm": 0.76953125, "learning_rate": 0.0001773041428092803, "loss": 0.8845, "step": 14031 }, { "epoch": 0.36030241317496925, "grad_norm": 0.765625, "learning_rate": 0.00017730131084118137, "loss": 1.1272, "step": 14032 }, { "epoch": 0.36032809037089103, "grad_norm": 0.76171875, "learning_rate": 0.0001772984787190279, "loss": 0.9525, "step": 14033 }, { "epoch": 0.36035376756681287, "grad_norm": 0.7109375, "learning_rate": 0.00017729564644282551, "loss": 0.9633, "step": 14034 }, { "epoch": 0.3603794447627347, "grad_norm": 1.4375, "learning_rate": 0.00017729281401257993, "loss": 0.8359, "step": 14035 }, { "epoch": 0.3604051219586565, "grad_norm": 0.859375, "learning_rate": 0.00017728998142829672, "loss": 0.8682, "step": 14036 }, { "epoch": 0.3604307991545783, "grad_norm": 0.78125, "learning_rate": 0.00017728714868998156, "loss": 0.8134, "step": 14037 }, { "epoch": 0.36045647635050015, "grad_norm": 0.83984375, "learning_rate": 0.0001772843157976401, "loss": 0.9076, "step": 14038 }, { "epoch": 0.360482153546422, "grad_norm": 0.74609375, "learning_rate": 0.00017728148275127797, "loss": 0.8576, "step": 14039 }, { "epoch": 0.36050783074234377, "grad_norm": 0.8046875, "learning_rate": 0.00017727864955090087, "loss": 0.8534, "step": 14040 }, { "epoch": 0.3605335079382656, "grad_norm": 0.8203125, "learning_rate": 0.00017727581619651433, "loss": 1.0013, "step": 14041 }, { "epoch": 0.36055918513418744, "grad_norm": 0.84375, "learning_rate": 0.00017727298268812414, "loss": 1.1051, "step": 14042 }, { "epoch": 0.3605848623301092, "grad_norm": 0.75, "learning_rate": 0.00017727014902573583, "loss": 0.8294, "step": 14043 }, { "epoch": 0.36061053952603106, "grad_norm": 0.765625, "learning_rate": 0.00017726731520935509, "loss": 0.9229, "step": 14044 }, { "epoch": 0.3606362167219529, "grad_norm": 0.94921875, "learning_rate": 0.0001772644812389876, "loss": 0.9038, "step": 14045 }, { "epoch": 0.3606618939178747, "grad_norm": 0.734375, "learning_rate": 0.00017726164711463896, "loss": 0.8916, "step": 14046 }, { "epoch": 0.3606875711137965, "grad_norm": 0.8828125, "learning_rate": 0.00017725881283631482, "loss": 1.0838, "step": 14047 }, { "epoch": 0.36071324830971835, "grad_norm": 0.7578125, "learning_rate": 0.00017725597840402086, "loss": 0.9547, "step": 14048 }, { "epoch": 0.36073892550564013, "grad_norm": 0.83203125, "learning_rate": 0.00017725314381776273, "loss": 0.9925, "step": 14049 }, { "epoch": 0.36076460270156196, "grad_norm": 0.77734375, "learning_rate": 0.00017725030907754605, "loss": 1.0079, "step": 14050 }, { "epoch": 0.3607902798974838, "grad_norm": 0.80078125, "learning_rate": 0.00017724747418337646, "loss": 0.9655, "step": 14051 }, { "epoch": 0.36081595709340564, "grad_norm": 0.78125, "learning_rate": 0.00017724463913525965, "loss": 1.0549, "step": 14052 }, { "epoch": 0.3608416342893274, "grad_norm": 0.78125, "learning_rate": 0.00017724180393320128, "loss": 0.9465, "step": 14053 }, { "epoch": 0.36086731148524925, "grad_norm": 0.78515625, "learning_rate": 0.00017723896857720694, "loss": 0.9027, "step": 14054 }, { "epoch": 0.3608929886811711, "grad_norm": 0.82421875, "learning_rate": 0.0001772361330672823, "loss": 0.9974, "step": 14055 }, { "epoch": 0.36091866587709287, "grad_norm": 0.7890625, "learning_rate": 0.00017723329740343307, "loss": 0.838, "step": 14056 }, { "epoch": 0.3609443430730147, "grad_norm": 0.76953125, "learning_rate": 0.0001772304615856648, "loss": 0.9908, "step": 14057 }, { "epoch": 0.36097002026893654, "grad_norm": 0.78515625, "learning_rate": 0.00017722762561398324, "loss": 1.0097, "step": 14058 }, { "epoch": 0.3609956974648583, "grad_norm": 0.78125, "learning_rate": 0.00017722478948839398, "loss": 1.0091, "step": 14059 }, { "epoch": 0.36102137466078016, "grad_norm": 0.83203125, "learning_rate": 0.00017722195320890268, "loss": 0.9956, "step": 14060 }, { "epoch": 0.361047051856702, "grad_norm": 0.765625, "learning_rate": 0.000177219116775515, "loss": 0.9058, "step": 14061 }, { "epoch": 0.36107272905262383, "grad_norm": 0.7734375, "learning_rate": 0.00017721628018823663, "loss": 0.8671, "step": 14062 }, { "epoch": 0.3610984062485456, "grad_norm": 0.8203125, "learning_rate": 0.00017721344344707318, "loss": 0.8776, "step": 14063 }, { "epoch": 0.36112408344446745, "grad_norm": 0.8203125, "learning_rate": 0.0001772106065520303, "loss": 0.9243, "step": 14064 }, { "epoch": 0.3611497606403893, "grad_norm": 0.8515625, "learning_rate": 0.00017720776950311365, "loss": 0.8454, "step": 14065 }, { "epoch": 0.36117543783631106, "grad_norm": 0.81640625, "learning_rate": 0.0001772049323003289, "loss": 0.8984, "step": 14066 }, { "epoch": 0.3612011150322329, "grad_norm": 0.79296875, "learning_rate": 0.00017720209494368166, "loss": 0.9665, "step": 14067 }, { "epoch": 0.36122679222815474, "grad_norm": 0.890625, "learning_rate": 0.0001771992574331777, "loss": 1.0369, "step": 14068 }, { "epoch": 0.3612524694240765, "grad_norm": 0.8828125, "learning_rate": 0.0001771964197688225, "loss": 0.9666, "step": 14069 }, { "epoch": 0.36127814661999835, "grad_norm": 0.7734375, "learning_rate": 0.00017719358195062186, "loss": 1.0288, "step": 14070 }, { "epoch": 0.3613038238159202, "grad_norm": 0.8359375, "learning_rate": 0.00017719074397858138, "loss": 0.8099, "step": 14071 }, { "epoch": 0.361329501011842, "grad_norm": 0.765625, "learning_rate": 0.0001771879058527067, "loss": 0.9806, "step": 14072 }, { "epoch": 0.3613551782077638, "grad_norm": 0.78125, "learning_rate": 0.00017718506757300354, "loss": 0.9383, "step": 14073 }, { "epoch": 0.36138085540368564, "grad_norm": 0.85546875, "learning_rate": 0.00017718222913947745, "loss": 0.9691, "step": 14074 }, { "epoch": 0.3614065325996075, "grad_norm": 1.703125, "learning_rate": 0.0001771793905521342, "loss": 0.9716, "step": 14075 }, { "epoch": 0.36143220979552926, "grad_norm": 0.84765625, "learning_rate": 0.00017717655181097937, "loss": 0.9762, "step": 14076 }, { "epoch": 0.3614578869914511, "grad_norm": 0.71875, "learning_rate": 0.00017717371291601865, "loss": 0.9025, "step": 14077 }, { "epoch": 0.36148356418737293, "grad_norm": 0.87109375, "learning_rate": 0.0001771708738672577, "loss": 0.9967, "step": 14078 }, { "epoch": 0.3615092413832947, "grad_norm": 0.80078125, "learning_rate": 0.00017716803466470216, "loss": 0.9494, "step": 14079 }, { "epoch": 0.36153491857921655, "grad_norm": 1.15625, "learning_rate": 0.0001771651953083577, "loss": 1.0672, "step": 14080 }, { "epoch": 0.3615605957751384, "grad_norm": 0.83984375, "learning_rate": 0.00017716235579822999, "loss": 0.9505, "step": 14081 }, { "epoch": 0.3615862729710602, "grad_norm": 0.73828125, "learning_rate": 0.00017715951613432465, "loss": 0.8287, "step": 14082 }, { "epoch": 0.361611950166982, "grad_norm": 0.73046875, "learning_rate": 0.00017715667631664736, "loss": 0.8388, "step": 14083 }, { "epoch": 0.36163762736290384, "grad_norm": 0.76171875, "learning_rate": 0.0001771538363452038, "loss": 0.9947, "step": 14084 }, { "epoch": 0.36166330455882567, "grad_norm": 0.78125, "learning_rate": 0.0001771509962199996, "loss": 0.9133, "step": 14085 }, { "epoch": 0.36168898175474745, "grad_norm": 0.78125, "learning_rate": 0.00017714815594104045, "loss": 1.0659, "step": 14086 }, { "epoch": 0.3617146589506693, "grad_norm": 0.76953125, "learning_rate": 0.000177145315508332, "loss": 0.9455, "step": 14087 }, { "epoch": 0.3617403361465911, "grad_norm": 0.74609375, "learning_rate": 0.00017714247492187988, "loss": 0.8378, "step": 14088 }, { "epoch": 0.3617660133425129, "grad_norm": 1.015625, "learning_rate": 0.00017713963418168981, "loss": 0.9897, "step": 14089 }, { "epoch": 0.36179169053843474, "grad_norm": 0.76953125, "learning_rate": 0.0001771367932877674, "loss": 1.0585, "step": 14090 }, { "epoch": 0.3618173677343566, "grad_norm": 0.85546875, "learning_rate": 0.0001771339522401183, "loss": 0.8989, "step": 14091 }, { "epoch": 0.3618430449302784, "grad_norm": 0.79296875, "learning_rate": 0.00017713111103874823, "loss": 0.9452, "step": 14092 }, { "epoch": 0.3618687221262002, "grad_norm": 0.82421875, "learning_rate": 0.00017712826968366283, "loss": 1.0097, "step": 14093 }, { "epoch": 0.36189439932212203, "grad_norm": 0.76953125, "learning_rate": 0.00017712542817486775, "loss": 0.838, "step": 14094 }, { "epoch": 0.36192007651804387, "grad_norm": 0.75, "learning_rate": 0.00017712258651236864, "loss": 0.9729, "step": 14095 }, { "epoch": 0.36194575371396565, "grad_norm": 0.71484375, "learning_rate": 0.00017711974469617118, "loss": 0.8254, "step": 14096 }, { "epoch": 0.3619714309098875, "grad_norm": 0.8125, "learning_rate": 0.00017711690272628107, "loss": 0.9258, "step": 14097 }, { "epoch": 0.3619971081058093, "grad_norm": 0.78515625, "learning_rate": 0.00017711406060270392, "loss": 0.9517, "step": 14098 }, { "epoch": 0.3620227853017311, "grad_norm": 0.859375, "learning_rate": 0.0001771112183254454, "loss": 1.044, "step": 14099 }, { "epoch": 0.36204846249765293, "grad_norm": 0.80859375, "learning_rate": 0.0001771083758945112, "loss": 0.8803, "step": 14100 }, { "epoch": 0.36207413969357477, "grad_norm": 0.8046875, "learning_rate": 0.00017710553330990697, "loss": 1.0589, "step": 14101 }, { "epoch": 0.3620998168894966, "grad_norm": 0.80078125, "learning_rate": 0.00017710269057163837, "loss": 1.0051, "step": 14102 }, { "epoch": 0.3621254940854184, "grad_norm": 0.734375, "learning_rate": 0.00017709984767971107, "loss": 0.7988, "step": 14103 }, { "epoch": 0.3621511712813402, "grad_norm": 0.80078125, "learning_rate": 0.00017709700463413075, "loss": 0.8891, "step": 14104 }, { "epoch": 0.36217684847726206, "grad_norm": 0.890625, "learning_rate": 0.00017709416143490308, "loss": 0.9426, "step": 14105 }, { "epoch": 0.36220252567318384, "grad_norm": 0.73828125, "learning_rate": 0.00017709131808203368, "loss": 0.8709, "step": 14106 }, { "epoch": 0.3622282028691057, "grad_norm": 0.80859375, "learning_rate": 0.00017708847457552826, "loss": 0.9408, "step": 14107 }, { "epoch": 0.3622538800650275, "grad_norm": 0.9609375, "learning_rate": 0.00017708563091539247, "loss": 0.9419, "step": 14108 }, { "epoch": 0.3622795572609493, "grad_norm": 0.68359375, "learning_rate": 0.00017708278710163198, "loss": 0.9887, "step": 14109 }, { "epoch": 0.36230523445687113, "grad_norm": 0.77734375, "learning_rate": 0.00017707994313425246, "loss": 0.9043, "step": 14110 }, { "epoch": 0.36233091165279296, "grad_norm": 0.87890625, "learning_rate": 0.00017707709901325957, "loss": 1.0448, "step": 14111 }, { "epoch": 0.3623565888487148, "grad_norm": 0.90234375, "learning_rate": 0.000177074254738659, "loss": 1.0319, "step": 14112 }, { "epoch": 0.3623822660446366, "grad_norm": 0.8515625, "learning_rate": 0.0001770714103104564, "loss": 0.975, "step": 14113 }, { "epoch": 0.3624079432405584, "grad_norm": 0.76953125, "learning_rate": 0.00017706856572865745, "loss": 0.8757, "step": 14114 }, { "epoch": 0.36243362043648025, "grad_norm": 0.84375, "learning_rate": 0.00017706572099326776, "loss": 0.9983, "step": 14115 }, { "epoch": 0.36245929763240203, "grad_norm": 0.80078125, "learning_rate": 0.00017706287610429308, "loss": 0.9232, "step": 14116 }, { "epoch": 0.36248497482832387, "grad_norm": 0.7734375, "learning_rate": 0.00017706003106173905, "loss": 0.9351, "step": 14117 }, { "epoch": 0.3625106520242457, "grad_norm": 0.8515625, "learning_rate": 0.00017705718586561136, "loss": 1.1339, "step": 14118 }, { "epoch": 0.3625363292201675, "grad_norm": 0.796875, "learning_rate": 0.00017705434051591565, "loss": 0.9169, "step": 14119 }, { "epoch": 0.3625620064160893, "grad_norm": 0.875, "learning_rate": 0.00017705149501265756, "loss": 0.9218, "step": 14120 }, { "epoch": 0.36258768361201116, "grad_norm": 0.75390625, "learning_rate": 0.00017704864935584285, "loss": 0.91, "step": 14121 }, { "epoch": 0.362613360807933, "grad_norm": 0.6953125, "learning_rate": 0.00017704580354547714, "loss": 0.9432, "step": 14122 }, { "epoch": 0.3626390380038548, "grad_norm": 0.87109375, "learning_rate": 0.00017704295758156607, "loss": 0.9797, "step": 14123 }, { "epoch": 0.3626647151997766, "grad_norm": 0.90234375, "learning_rate": 0.00017704011146411536, "loss": 1.0512, "step": 14124 }, { "epoch": 0.36269039239569845, "grad_norm": 0.81640625, "learning_rate": 0.00017703726519313067, "loss": 0.8993, "step": 14125 }, { "epoch": 0.3627160695916202, "grad_norm": 0.8984375, "learning_rate": 0.00017703441876861767, "loss": 0.9757, "step": 14126 }, { "epoch": 0.36274174678754206, "grad_norm": 0.8359375, "learning_rate": 0.00017703157219058202, "loss": 1.0093, "step": 14127 }, { "epoch": 0.3627674239834639, "grad_norm": 0.90625, "learning_rate": 0.0001770287254590294, "loss": 1.0305, "step": 14128 }, { "epoch": 0.3627931011793857, "grad_norm": 0.74609375, "learning_rate": 0.0001770258785739655, "loss": 0.8889, "step": 14129 }, { "epoch": 0.3628187783753075, "grad_norm": 0.8671875, "learning_rate": 0.000177023031535396, "loss": 0.9824, "step": 14130 }, { "epoch": 0.36284445557122935, "grad_norm": 0.7421875, "learning_rate": 0.00017702018434332653, "loss": 0.8878, "step": 14131 }, { "epoch": 0.3628701327671512, "grad_norm": 0.77734375, "learning_rate": 0.0001770173369977628, "loss": 1.0531, "step": 14132 }, { "epoch": 0.36289580996307297, "grad_norm": 0.76953125, "learning_rate": 0.00017701448949871047, "loss": 0.938, "step": 14133 }, { "epoch": 0.3629214871589948, "grad_norm": 0.7734375, "learning_rate": 0.00017701164184617522, "loss": 0.915, "step": 14134 }, { "epoch": 0.36294716435491664, "grad_norm": 0.76171875, "learning_rate": 0.0001770087940401627, "loss": 0.9608, "step": 14135 }, { "epoch": 0.3629728415508384, "grad_norm": 0.72265625, "learning_rate": 0.00017700594608067863, "loss": 0.8784, "step": 14136 }, { "epoch": 0.36299851874676026, "grad_norm": 0.77734375, "learning_rate": 0.0001770030979677287, "loss": 0.9372, "step": 14137 }, { "epoch": 0.3630241959426821, "grad_norm": 0.734375, "learning_rate": 0.00017700024970131853, "loss": 0.9102, "step": 14138 }, { "epoch": 0.3630498731386039, "grad_norm": 0.84375, "learning_rate": 0.00017699740128145378, "loss": 1.0429, "step": 14139 }, { "epoch": 0.3630755503345257, "grad_norm": 0.75390625, "learning_rate": 0.0001769945527081402, "loss": 1.0423, "step": 14140 }, { "epoch": 0.36310122753044755, "grad_norm": 0.78125, "learning_rate": 0.00017699170398138345, "loss": 0.8615, "step": 14141 }, { "epoch": 0.3631269047263694, "grad_norm": 0.74609375, "learning_rate": 0.00017698885510118918, "loss": 0.8292, "step": 14142 }, { "epoch": 0.36315258192229116, "grad_norm": 0.85546875, "learning_rate": 0.00017698600606756303, "loss": 0.9612, "step": 14143 }, { "epoch": 0.363178259118213, "grad_norm": 0.80078125, "learning_rate": 0.00017698315688051075, "loss": 1.079, "step": 14144 }, { "epoch": 0.36320393631413483, "grad_norm": 0.75390625, "learning_rate": 0.000176980307540038, "loss": 0.8618, "step": 14145 }, { "epoch": 0.3632296135100566, "grad_norm": 0.765625, "learning_rate": 0.00017697745804615044, "loss": 0.9742, "step": 14146 }, { "epoch": 0.36325529070597845, "grad_norm": 0.78125, "learning_rate": 0.00017697460839885377, "loss": 0.9154, "step": 14147 }, { "epoch": 0.3632809679019003, "grad_norm": 0.7265625, "learning_rate": 0.00017697175859815366, "loss": 0.9471, "step": 14148 }, { "epoch": 0.36330664509782207, "grad_norm": 0.7421875, "learning_rate": 0.0001769689086440558, "loss": 0.9373, "step": 14149 }, { "epoch": 0.3633323222937439, "grad_norm": 0.78125, "learning_rate": 0.00017696605853656584, "loss": 0.9676, "step": 14150 }, { "epoch": 0.36335799948966574, "grad_norm": 0.7890625, "learning_rate": 0.00017696320827568945, "loss": 0.8769, "step": 14151 }, { "epoch": 0.3633836766855876, "grad_norm": 0.79296875, "learning_rate": 0.00017696035786143237, "loss": 0.8858, "step": 14152 }, { "epoch": 0.36340935388150936, "grad_norm": 0.76953125, "learning_rate": 0.00017695750729380025, "loss": 0.8993, "step": 14153 }, { "epoch": 0.3634350310774312, "grad_norm": 0.921875, "learning_rate": 0.00017695465657279876, "loss": 0.8658, "step": 14154 }, { "epoch": 0.36346070827335303, "grad_norm": 1.8203125, "learning_rate": 0.00017695180569843362, "loss": 0.9158, "step": 14155 }, { "epoch": 0.3634863854692748, "grad_norm": 0.7890625, "learning_rate": 0.00017694895467071042, "loss": 0.9247, "step": 14156 }, { "epoch": 0.36351206266519664, "grad_norm": 0.8125, "learning_rate": 0.00017694610348963498, "loss": 1.0802, "step": 14157 }, { "epoch": 0.3635377398611185, "grad_norm": 0.76953125, "learning_rate": 0.00017694325215521282, "loss": 0.933, "step": 14158 }, { "epoch": 0.36356341705704026, "grad_norm": 0.83984375, "learning_rate": 0.00017694040066744976, "loss": 1.0461, "step": 14159 }, { "epoch": 0.3635890942529621, "grad_norm": 0.765625, "learning_rate": 0.00017693754902635146, "loss": 0.9095, "step": 14160 }, { "epoch": 0.36361477144888393, "grad_norm": 0.76171875, "learning_rate": 0.00017693469723192352, "loss": 0.7521, "step": 14161 }, { "epoch": 0.36364044864480577, "grad_norm": 0.78125, "learning_rate": 0.00017693184528417172, "loss": 0.9589, "step": 14162 }, { "epoch": 0.36366612584072755, "grad_norm": 0.8359375, "learning_rate": 0.00017692899318310165, "loss": 0.9208, "step": 14163 }, { "epoch": 0.3636918030366494, "grad_norm": 0.81640625, "learning_rate": 0.0001769261409287191, "loss": 1.1004, "step": 14164 }, { "epoch": 0.3637174802325712, "grad_norm": 0.76171875, "learning_rate": 0.00017692328852102969, "loss": 1.0019, "step": 14165 }, { "epoch": 0.363743157428493, "grad_norm": 0.77734375, "learning_rate": 0.0001769204359600391, "loss": 0.8224, "step": 14166 }, { "epoch": 0.36376883462441484, "grad_norm": 0.77734375, "learning_rate": 0.00017691758324575303, "loss": 0.9254, "step": 14167 }, { "epoch": 0.3637945118203367, "grad_norm": 0.7890625, "learning_rate": 0.00017691473037817718, "loss": 1.0192, "step": 14168 }, { "epoch": 0.36382018901625846, "grad_norm": 0.76953125, "learning_rate": 0.00017691187735731717, "loss": 0.8685, "step": 14169 }, { "epoch": 0.3638458662121803, "grad_norm": 0.83984375, "learning_rate": 0.00017690902418317878, "loss": 1.0751, "step": 14170 }, { "epoch": 0.3638715434081021, "grad_norm": 0.8125, "learning_rate": 0.00017690617085576763, "loss": 0.9745, "step": 14171 }, { "epoch": 0.36389722060402396, "grad_norm": 0.8203125, "learning_rate": 0.00017690331737508946, "loss": 0.8817, "step": 14172 }, { "epoch": 0.36392289779994574, "grad_norm": 0.7265625, "learning_rate": 0.0001769004637411499, "loss": 0.739, "step": 14173 }, { "epoch": 0.3639485749958676, "grad_norm": 0.78515625, "learning_rate": 0.0001768976099539547, "loss": 0.8422, "step": 14174 }, { "epoch": 0.3639742521917894, "grad_norm": 0.76171875, "learning_rate": 0.00017689475601350948, "loss": 1.0282, "step": 14175 }, { "epoch": 0.3639999293877112, "grad_norm": 0.78515625, "learning_rate": 0.00017689190191981993, "loss": 0.9587, "step": 14176 }, { "epoch": 0.36402560658363303, "grad_norm": 0.77734375, "learning_rate": 0.0001768890476728918, "loss": 0.966, "step": 14177 }, { "epoch": 0.36405128377955487, "grad_norm": 0.73046875, "learning_rate": 0.00017688619327273073, "loss": 0.9384, "step": 14178 }, { "epoch": 0.36407696097547665, "grad_norm": 0.78125, "learning_rate": 0.00017688333871934242, "loss": 1.0667, "step": 14179 }, { "epoch": 0.3641026381713985, "grad_norm": 0.8203125, "learning_rate": 0.00017688048401273256, "loss": 0.9917, "step": 14180 }, { "epoch": 0.3641283153673203, "grad_norm": 0.8046875, "learning_rate": 0.00017687762915290684, "loss": 0.9115, "step": 14181 }, { "epoch": 0.36415399256324216, "grad_norm": 0.80859375, "learning_rate": 0.00017687477413987098, "loss": 0.9508, "step": 14182 }, { "epoch": 0.36417966975916394, "grad_norm": 0.84765625, "learning_rate": 0.0001768719189736306, "loss": 0.9091, "step": 14183 }, { "epoch": 0.3642053469550858, "grad_norm": 0.77734375, "learning_rate": 0.00017686906365419145, "loss": 0.9753, "step": 14184 }, { "epoch": 0.3642310241510076, "grad_norm": 0.86328125, "learning_rate": 0.0001768662081815592, "loss": 1.001, "step": 14185 }, { "epoch": 0.3642567013469294, "grad_norm": 0.92578125, "learning_rate": 0.00017686335255573954, "loss": 0.8535, "step": 14186 }, { "epoch": 0.3642823785428512, "grad_norm": 0.79296875, "learning_rate": 0.00017686049677673814, "loss": 1.0594, "step": 14187 }, { "epoch": 0.36430805573877306, "grad_norm": 0.796875, "learning_rate": 0.0001768576408445607, "loss": 0.9283, "step": 14188 }, { "epoch": 0.36433373293469484, "grad_norm": 0.7421875, "learning_rate": 0.000176854784759213, "loss": 1.0615, "step": 14189 }, { "epoch": 0.3643594101306167, "grad_norm": 0.90625, "learning_rate": 0.0001768519285207006, "loss": 0.8083, "step": 14190 }, { "epoch": 0.3643850873265385, "grad_norm": 0.79296875, "learning_rate": 0.00017684907212902923, "loss": 0.9834, "step": 14191 }, { "epoch": 0.36441076452246035, "grad_norm": 0.7578125, "learning_rate": 0.00017684621558420466, "loss": 0.827, "step": 14192 }, { "epoch": 0.36443644171838213, "grad_norm": 0.75390625, "learning_rate": 0.00017684335888623246, "loss": 0.9646, "step": 14193 }, { "epoch": 0.36446211891430397, "grad_norm": 0.7421875, "learning_rate": 0.00017684050203511844, "loss": 0.9477, "step": 14194 }, { "epoch": 0.3644877961102258, "grad_norm": 0.7890625, "learning_rate": 0.0001768376450308682, "loss": 0.9106, "step": 14195 }, { "epoch": 0.3645134733061476, "grad_norm": 0.78125, "learning_rate": 0.00017683478787348751, "loss": 0.8683, "step": 14196 }, { "epoch": 0.3645391505020694, "grad_norm": 1.71875, "learning_rate": 0.00017683193056298202, "loss": 1.0076, "step": 14197 }, { "epoch": 0.36456482769799126, "grad_norm": 0.74609375, "learning_rate": 0.00017682907309935742, "loss": 0.9042, "step": 14198 }, { "epoch": 0.36459050489391304, "grad_norm": 0.73046875, "learning_rate": 0.00017682621548261942, "loss": 0.8774, "step": 14199 }, { "epoch": 0.3646161820898349, "grad_norm": 0.76171875, "learning_rate": 0.0001768233577127737, "loss": 0.9078, "step": 14200 }, { "epoch": 0.3646418592857567, "grad_norm": 0.76953125, "learning_rate": 0.000176820499789826, "loss": 0.7945, "step": 14201 }, { "epoch": 0.36466753648167854, "grad_norm": 0.83203125, "learning_rate": 0.00017681764171378198, "loss": 0.8968, "step": 14202 }, { "epoch": 0.3646932136776003, "grad_norm": 0.69921875, "learning_rate": 0.00017681478348464732, "loss": 0.9603, "step": 14203 }, { "epoch": 0.36471889087352216, "grad_norm": 0.7265625, "learning_rate": 0.00017681192510242774, "loss": 0.8191, "step": 14204 }, { "epoch": 0.364744568069444, "grad_norm": 0.74609375, "learning_rate": 0.00017680906656712892, "loss": 0.9097, "step": 14205 }, { "epoch": 0.3647702452653658, "grad_norm": 0.8203125, "learning_rate": 0.00017680620787875658, "loss": 0.9087, "step": 14206 }, { "epoch": 0.3647959224612876, "grad_norm": 0.78125, "learning_rate": 0.0001768033490373164, "loss": 0.9852, "step": 14207 }, { "epoch": 0.36482159965720945, "grad_norm": 0.796875, "learning_rate": 0.0001768004900428141, "loss": 1.0302, "step": 14208 }, { "epoch": 0.36484727685313123, "grad_norm": 0.7734375, "learning_rate": 0.00017679763089525533, "loss": 1.1233, "step": 14209 }, { "epoch": 0.36487295404905307, "grad_norm": 1.0234375, "learning_rate": 0.00017679477159464587, "loss": 0.8304, "step": 14210 }, { "epoch": 0.3648986312449749, "grad_norm": 0.83203125, "learning_rate": 0.0001767919121409913, "loss": 0.9269, "step": 14211 }, { "epoch": 0.36492430844089674, "grad_norm": 0.80859375, "learning_rate": 0.00017678905253429742, "loss": 0.9194, "step": 14212 }, { "epoch": 0.3649499856368185, "grad_norm": 0.80078125, "learning_rate": 0.0001767861927745699, "loss": 0.9385, "step": 14213 }, { "epoch": 0.36497566283274036, "grad_norm": 0.76953125, "learning_rate": 0.00017678333286181444, "loss": 1.1065, "step": 14214 }, { "epoch": 0.3650013400286622, "grad_norm": 0.8828125, "learning_rate": 0.00017678047279603672, "loss": 0.9982, "step": 14215 }, { "epoch": 0.36502701722458397, "grad_norm": 0.84765625, "learning_rate": 0.00017677761257724246, "loss": 1.0443, "step": 14216 }, { "epoch": 0.3650526944205058, "grad_norm": 0.80078125, "learning_rate": 0.00017677475220543733, "loss": 1.073, "step": 14217 }, { "epoch": 0.36507837161642764, "grad_norm": 0.8125, "learning_rate": 0.0001767718916806271, "loss": 1.051, "step": 14218 }, { "epoch": 0.3651040488123494, "grad_norm": 0.828125, "learning_rate": 0.00017676903100281738, "loss": 1.0226, "step": 14219 }, { "epoch": 0.36512972600827126, "grad_norm": 0.7890625, "learning_rate": 0.00017676617017201393, "loss": 0.8816, "step": 14220 }, { "epoch": 0.3651554032041931, "grad_norm": 0.8125, "learning_rate": 0.00017676330918822244, "loss": 0.8773, "step": 14221 }, { "epoch": 0.36518108040011493, "grad_norm": 0.73828125, "learning_rate": 0.0001767604480514486, "loss": 0.7833, "step": 14222 }, { "epoch": 0.3652067575960367, "grad_norm": 0.828125, "learning_rate": 0.00017675758676169814, "loss": 0.8383, "step": 14223 }, { "epoch": 0.36523243479195855, "grad_norm": 0.8125, "learning_rate": 0.00017675472531897675, "loss": 0.9977, "step": 14224 }, { "epoch": 0.3652581119878804, "grad_norm": 1.25, "learning_rate": 0.00017675186372329008, "loss": 0.9722, "step": 14225 }, { "epoch": 0.36528378918380217, "grad_norm": 0.71875, "learning_rate": 0.00017674900197464392, "loss": 0.877, "step": 14226 }, { "epoch": 0.365309466379724, "grad_norm": 0.74609375, "learning_rate": 0.0001767461400730439, "loss": 1.0334, "step": 14227 }, { "epoch": 0.36533514357564584, "grad_norm": 0.7890625, "learning_rate": 0.0001767432780184958, "loss": 0.9004, "step": 14228 }, { "epoch": 0.3653608207715676, "grad_norm": 0.8125, "learning_rate": 0.00017674041581100525, "loss": 1.0623, "step": 14229 }, { "epoch": 0.36538649796748945, "grad_norm": 0.8671875, "learning_rate": 0.00017673755345057798, "loss": 1.0352, "step": 14230 }, { "epoch": 0.3654121751634113, "grad_norm": 0.765625, "learning_rate": 0.00017673469093721972, "loss": 1.0448, "step": 14231 }, { "epoch": 0.3654378523593331, "grad_norm": 0.734375, "learning_rate": 0.00017673182827093612, "loss": 0.9568, "step": 14232 }, { "epoch": 0.3654635295552549, "grad_norm": 0.859375, "learning_rate": 0.00017672896545173293, "loss": 1.0106, "step": 14233 }, { "epoch": 0.36548920675117674, "grad_norm": 0.75390625, "learning_rate": 0.00017672610247961585, "loss": 0.8824, "step": 14234 }, { "epoch": 0.3655148839470986, "grad_norm": 0.890625, "learning_rate": 0.00017672323935459056, "loss": 1.2539, "step": 14235 }, { "epoch": 0.36554056114302036, "grad_norm": 0.76953125, "learning_rate": 0.00017672037607666284, "loss": 0.8709, "step": 14236 }, { "epoch": 0.3655662383389422, "grad_norm": 0.796875, "learning_rate": 0.00017671751264583828, "loss": 0.9874, "step": 14237 }, { "epoch": 0.36559191553486403, "grad_norm": 0.8359375, "learning_rate": 0.00017671464906212267, "loss": 1.0169, "step": 14238 }, { "epoch": 0.3656175927307858, "grad_norm": 0.7734375, "learning_rate": 0.00017671178532552168, "loss": 0.9022, "step": 14239 }, { "epoch": 0.36564326992670765, "grad_norm": 0.78515625, "learning_rate": 0.00017670892143604102, "loss": 0.9885, "step": 14240 }, { "epoch": 0.3656689471226295, "grad_norm": 0.8046875, "learning_rate": 0.00017670605739368645, "loss": 0.9845, "step": 14241 }, { "epoch": 0.3656946243185513, "grad_norm": 0.79296875, "learning_rate": 0.00017670319319846363, "loss": 0.8437, "step": 14242 }, { "epoch": 0.3657203015144731, "grad_norm": 0.765625, "learning_rate": 0.00017670032885037826, "loss": 1.0139, "step": 14243 }, { "epoch": 0.36574597871039494, "grad_norm": 0.8828125, "learning_rate": 0.00017669746434943606, "loss": 1.2101, "step": 14244 }, { "epoch": 0.3657716559063168, "grad_norm": 0.828125, "learning_rate": 0.00017669459969564275, "loss": 0.8903, "step": 14245 }, { "epoch": 0.36579733310223855, "grad_norm": 0.79296875, "learning_rate": 0.00017669173488900404, "loss": 0.9487, "step": 14246 }, { "epoch": 0.3658230102981604, "grad_norm": 0.8359375, "learning_rate": 0.0001766888699295256, "loss": 0.9652, "step": 14247 }, { "epoch": 0.3658486874940822, "grad_norm": 0.78125, "learning_rate": 0.00017668600481721318, "loss": 0.8184, "step": 14248 }, { "epoch": 0.365874364690004, "grad_norm": 0.80859375, "learning_rate": 0.0001766831395520725, "loss": 0.8899, "step": 14249 }, { "epoch": 0.36590004188592584, "grad_norm": 0.87890625, "learning_rate": 0.00017668027413410923, "loss": 0.8339, "step": 14250 }, { "epoch": 0.3659257190818477, "grad_norm": 0.95703125, "learning_rate": 0.0001766774085633291, "loss": 1.0452, "step": 14251 }, { "epoch": 0.36595139627776946, "grad_norm": 0.7890625, "learning_rate": 0.00017667454283973782, "loss": 0.9108, "step": 14252 }, { "epoch": 0.3659770734736913, "grad_norm": 0.85546875, "learning_rate": 0.00017667167696334111, "loss": 0.9677, "step": 14253 }, { "epoch": 0.36600275066961313, "grad_norm": 0.9765625, "learning_rate": 0.0001766688109341447, "loss": 1.0155, "step": 14254 }, { "epoch": 0.36602842786553497, "grad_norm": 0.84765625, "learning_rate": 0.00017666594475215422, "loss": 1.0309, "step": 14255 }, { "epoch": 0.36605410506145675, "grad_norm": 0.7421875, "learning_rate": 0.00017666307841737546, "loss": 0.9189, "step": 14256 }, { "epoch": 0.3660797822573786, "grad_norm": 0.95703125, "learning_rate": 0.00017666021192981412, "loss": 1.0634, "step": 14257 }, { "epoch": 0.3661054594533004, "grad_norm": 0.80078125, "learning_rate": 0.00017665734528947587, "loss": 0.8141, "step": 14258 }, { "epoch": 0.3661311366492222, "grad_norm": 0.75390625, "learning_rate": 0.00017665447849636652, "loss": 0.8618, "step": 14259 }, { "epoch": 0.36615681384514404, "grad_norm": 0.9453125, "learning_rate": 0.00017665161155049167, "loss": 0.9281, "step": 14260 }, { "epoch": 0.36618249104106587, "grad_norm": 0.8046875, "learning_rate": 0.00017664874445185707, "loss": 0.9352, "step": 14261 }, { "epoch": 0.36620816823698765, "grad_norm": 0.8359375, "learning_rate": 0.00017664587720046847, "loss": 0.8873, "step": 14262 }, { "epoch": 0.3662338454329095, "grad_norm": 0.83203125, "learning_rate": 0.0001766430097963316, "loss": 0.9374, "step": 14263 }, { "epoch": 0.3662595226288313, "grad_norm": 0.81640625, "learning_rate": 0.00017664014223945206, "loss": 1.0237, "step": 14264 }, { "epoch": 0.36628519982475316, "grad_norm": 0.796875, "learning_rate": 0.00017663727452983568, "loss": 0.9837, "step": 14265 }, { "epoch": 0.36631087702067494, "grad_norm": 0.921875, "learning_rate": 0.00017663440666748813, "loss": 0.9662, "step": 14266 }, { "epoch": 0.3663365542165968, "grad_norm": 0.76171875, "learning_rate": 0.00017663153865241514, "loss": 0.8744, "step": 14267 }, { "epoch": 0.3663622314125186, "grad_norm": 0.81640625, "learning_rate": 0.0001766286704846224, "loss": 0.926, "step": 14268 }, { "epoch": 0.3663879086084404, "grad_norm": 0.80078125, "learning_rate": 0.00017662580216411565, "loss": 0.9943, "step": 14269 }, { "epoch": 0.36641358580436223, "grad_norm": 0.8125, "learning_rate": 0.00017662293369090056, "loss": 0.9113, "step": 14270 }, { "epoch": 0.36643926300028407, "grad_norm": 0.79296875, "learning_rate": 0.00017662006506498292, "loss": 0.9339, "step": 14271 }, { "epoch": 0.36646494019620585, "grad_norm": 0.7578125, "learning_rate": 0.00017661719628636843, "loss": 0.8533, "step": 14272 }, { "epoch": 0.3664906173921277, "grad_norm": 0.7890625, "learning_rate": 0.00017661432735506275, "loss": 0.9041, "step": 14273 }, { "epoch": 0.3665162945880495, "grad_norm": 0.8046875, "learning_rate": 0.00017661145827107166, "loss": 0.9434, "step": 14274 }, { "epoch": 0.36654197178397135, "grad_norm": 0.71484375, "learning_rate": 0.00017660858903440085, "loss": 1.0197, "step": 14275 }, { "epoch": 0.36656764897989313, "grad_norm": 0.70703125, "learning_rate": 0.00017660571964505604, "loss": 0.9098, "step": 14276 }, { "epoch": 0.36659332617581497, "grad_norm": 0.7890625, "learning_rate": 0.00017660285010304293, "loss": 0.9224, "step": 14277 }, { "epoch": 0.3666190033717368, "grad_norm": 0.88671875, "learning_rate": 0.0001765999804083673, "loss": 1.2015, "step": 14278 }, { "epoch": 0.3666446805676586, "grad_norm": 0.79296875, "learning_rate": 0.00017659711056103482, "loss": 0.9534, "step": 14279 }, { "epoch": 0.3666703577635804, "grad_norm": 0.8125, "learning_rate": 0.0001765942405610512, "loss": 0.8997, "step": 14280 }, { "epoch": 0.36669603495950226, "grad_norm": 0.734375, "learning_rate": 0.00017659137040842215, "loss": 0.7286, "step": 14281 }, { "epoch": 0.36672171215542404, "grad_norm": 0.91015625, "learning_rate": 0.00017658850010315345, "loss": 0.8983, "step": 14282 }, { "epoch": 0.3667473893513459, "grad_norm": 0.8203125, "learning_rate": 0.0001765856296452508, "loss": 0.9626, "step": 14283 }, { "epoch": 0.3667730665472677, "grad_norm": 0.82421875, "learning_rate": 0.0001765827590347199, "loss": 1.0009, "step": 14284 }, { "epoch": 0.36679874374318955, "grad_norm": 0.921875, "learning_rate": 0.0001765798882715665, "loss": 1.0365, "step": 14285 }, { "epoch": 0.36682442093911133, "grad_norm": 0.89453125, "learning_rate": 0.00017657701735579624, "loss": 1.0015, "step": 14286 }, { "epoch": 0.36685009813503316, "grad_norm": 0.78515625, "learning_rate": 0.00017657414628741494, "loss": 0.9999, "step": 14287 }, { "epoch": 0.366875775330955, "grad_norm": 0.76953125, "learning_rate": 0.00017657127506642828, "loss": 0.8067, "step": 14288 }, { "epoch": 0.3669014525268768, "grad_norm": 0.7578125, "learning_rate": 0.00017656840369284198, "loss": 0.9048, "step": 14289 }, { "epoch": 0.3669271297227986, "grad_norm": 0.82421875, "learning_rate": 0.00017656553216666176, "loss": 1.1577, "step": 14290 }, { "epoch": 0.36695280691872045, "grad_norm": 0.78515625, "learning_rate": 0.0001765626604878934, "loss": 1.1062, "step": 14291 }, { "epoch": 0.36697848411464223, "grad_norm": 0.81640625, "learning_rate": 0.0001765597886565425, "loss": 0.9703, "step": 14292 }, { "epoch": 0.36700416131056407, "grad_norm": 0.7421875, "learning_rate": 0.00017655691667261493, "loss": 0.9254, "step": 14293 }, { "epoch": 0.3670298385064859, "grad_norm": 0.74609375, "learning_rate": 0.00017655404453611627, "loss": 0.9, "step": 14294 }, { "epoch": 0.36705551570240774, "grad_norm": 0.7734375, "learning_rate": 0.00017655117224705237, "loss": 0.9384, "step": 14295 }, { "epoch": 0.3670811928983295, "grad_norm": 0.7734375, "learning_rate": 0.00017654829980542886, "loss": 0.9995, "step": 14296 }, { "epoch": 0.36710687009425136, "grad_norm": 0.80078125, "learning_rate": 0.00017654542721125152, "loss": 0.9616, "step": 14297 }, { "epoch": 0.3671325472901732, "grad_norm": 0.75, "learning_rate": 0.00017654255446452607, "loss": 0.9405, "step": 14298 }, { "epoch": 0.367158224486095, "grad_norm": 0.90234375, "learning_rate": 0.0001765396815652582, "loss": 0.9951, "step": 14299 }, { "epoch": 0.3671839016820168, "grad_norm": 0.79296875, "learning_rate": 0.00017653680851345365, "loss": 1.0143, "step": 14300 }, { "epoch": 0.36720957887793865, "grad_norm": 0.8125, "learning_rate": 0.00017653393530911817, "loss": 1.1079, "step": 14301 }, { "epoch": 0.3672352560738604, "grad_norm": 0.8125, "learning_rate": 0.0001765310619522575, "loss": 0.9324, "step": 14302 }, { "epoch": 0.36726093326978226, "grad_norm": 0.7890625, "learning_rate": 0.00017652818844287728, "loss": 0.7971, "step": 14303 }, { "epoch": 0.3672866104657041, "grad_norm": 0.84375, "learning_rate": 0.00017652531478098334, "loss": 0.933, "step": 14304 }, { "epoch": 0.36731228766162594, "grad_norm": 0.73828125, "learning_rate": 0.00017652244096658134, "loss": 0.9996, "step": 14305 }, { "epoch": 0.3673379648575477, "grad_norm": 0.78125, "learning_rate": 0.000176519566999677, "loss": 1.0884, "step": 14306 }, { "epoch": 0.36736364205346955, "grad_norm": 0.77734375, "learning_rate": 0.00017651669288027612, "loss": 0.907, "step": 14307 }, { "epoch": 0.3673893192493914, "grad_norm": 0.78125, "learning_rate": 0.0001765138186083844, "loss": 0.839, "step": 14308 }, { "epoch": 0.36741499644531317, "grad_norm": 0.859375, "learning_rate": 0.0001765109441840075, "loss": 1.0156, "step": 14309 }, { "epoch": 0.367440673641235, "grad_norm": 0.7734375, "learning_rate": 0.00017650806960715123, "loss": 1.1172, "step": 14310 }, { "epoch": 0.36746635083715684, "grad_norm": 0.77734375, "learning_rate": 0.00017650519487782127, "loss": 0.8306, "step": 14311 }, { "epoch": 0.3674920280330786, "grad_norm": 0.703125, "learning_rate": 0.00017650231999602337, "loss": 0.9589, "step": 14312 }, { "epoch": 0.36751770522900046, "grad_norm": 0.7890625, "learning_rate": 0.00017649944496176325, "loss": 0.9911, "step": 14313 }, { "epoch": 0.3675433824249223, "grad_norm": 0.734375, "learning_rate": 0.00017649656977504664, "loss": 0.957, "step": 14314 }, { "epoch": 0.36756905962084413, "grad_norm": 0.7890625, "learning_rate": 0.0001764936944358793, "loss": 0.982, "step": 14315 }, { "epoch": 0.3675947368167659, "grad_norm": 0.73046875, "learning_rate": 0.00017649081894426693, "loss": 0.9154, "step": 14316 }, { "epoch": 0.36762041401268775, "grad_norm": 0.85546875, "learning_rate": 0.00017648794330021528, "loss": 0.9663, "step": 14317 }, { "epoch": 0.3676460912086096, "grad_norm": 0.7734375, "learning_rate": 0.00017648506750373005, "loss": 0.8589, "step": 14318 }, { "epoch": 0.36767176840453136, "grad_norm": 0.8046875, "learning_rate": 0.000176482191554817, "loss": 1.0893, "step": 14319 }, { "epoch": 0.3676974456004532, "grad_norm": 0.79296875, "learning_rate": 0.00017647931545348183, "loss": 1.0233, "step": 14320 }, { "epoch": 0.36772312279637503, "grad_norm": 0.76953125, "learning_rate": 0.0001764764391997303, "loss": 0.9419, "step": 14321 }, { "epoch": 0.3677487999922968, "grad_norm": 0.79296875, "learning_rate": 0.00017647356279356817, "loss": 1.1068, "step": 14322 }, { "epoch": 0.36777447718821865, "grad_norm": 0.81640625, "learning_rate": 0.00017647068623500108, "loss": 0.9216, "step": 14323 }, { "epoch": 0.3678001543841405, "grad_norm": 0.75390625, "learning_rate": 0.00017646780952403489, "loss": 0.9476, "step": 14324 }, { "epoch": 0.3678258315800623, "grad_norm": 0.8046875, "learning_rate": 0.0001764649326606752, "loss": 1.0043, "step": 14325 }, { "epoch": 0.3678515087759841, "grad_norm": 0.703125, "learning_rate": 0.00017646205564492783, "loss": 1.0803, "step": 14326 }, { "epoch": 0.36787718597190594, "grad_norm": 0.7890625, "learning_rate": 0.00017645917847679848, "loss": 1.094, "step": 14327 }, { "epoch": 0.3679028631678278, "grad_norm": 0.71875, "learning_rate": 0.00017645630115629293, "loss": 0.9982, "step": 14328 }, { "epoch": 0.36792854036374956, "grad_norm": 0.8515625, "learning_rate": 0.00017645342368341683, "loss": 1.0601, "step": 14329 }, { "epoch": 0.3679542175596714, "grad_norm": 0.80859375, "learning_rate": 0.00017645054605817598, "loss": 0.8375, "step": 14330 }, { "epoch": 0.36797989475559323, "grad_norm": 0.77734375, "learning_rate": 0.0001764476682805761, "loss": 0.8891, "step": 14331 }, { "epoch": 0.368005571951515, "grad_norm": 0.81640625, "learning_rate": 0.00017644479035062294, "loss": 0.8812, "step": 14332 }, { "epoch": 0.36803124914743685, "grad_norm": 0.77734375, "learning_rate": 0.00017644191226832222, "loss": 0.8382, "step": 14333 }, { "epoch": 0.3680569263433587, "grad_norm": 0.76171875, "learning_rate": 0.00017643903403367962, "loss": 0.8291, "step": 14334 }, { "epoch": 0.3680826035392805, "grad_norm": 1.078125, "learning_rate": 0.000176436155646701, "loss": 0.981, "step": 14335 }, { "epoch": 0.3681082807352023, "grad_norm": 0.921875, "learning_rate": 0.00017643327710739197, "loss": 0.9634, "step": 14336 }, { "epoch": 0.36813395793112413, "grad_norm": 1.0234375, "learning_rate": 0.00017643039841575836, "loss": 0.9114, "step": 14337 }, { "epoch": 0.36815963512704597, "grad_norm": 0.74609375, "learning_rate": 0.00017642751957180582, "loss": 0.9452, "step": 14338 }, { "epoch": 0.36818531232296775, "grad_norm": 0.8125, "learning_rate": 0.00017642464057554018, "loss": 0.9142, "step": 14339 }, { "epoch": 0.3682109895188896, "grad_norm": 0.87109375, "learning_rate": 0.00017642176142696712, "loss": 0.9927, "step": 14340 }, { "epoch": 0.3682366667148114, "grad_norm": 0.8515625, "learning_rate": 0.0001764188821260924, "loss": 0.9872, "step": 14341 }, { "epoch": 0.3682623439107332, "grad_norm": 0.78515625, "learning_rate": 0.00017641600267292177, "loss": 1.0015, "step": 14342 }, { "epoch": 0.36828802110665504, "grad_norm": 0.8203125, "learning_rate": 0.00017641312306746092, "loss": 0.9817, "step": 14343 }, { "epoch": 0.3683136983025769, "grad_norm": 0.73828125, "learning_rate": 0.00017641024330971564, "loss": 0.9723, "step": 14344 }, { "epoch": 0.3683393754984987, "grad_norm": 0.9765625, "learning_rate": 0.0001764073633996916, "loss": 0.8979, "step": 14345 }, { "epoch": 0.3683650526944205, "grad_norm": 0.77734375, "learning_rate": 0.00017640448333739464, "loss": 0.8522, "step": 14346 }, { "epoch": 0.3683907298903423, "grad_norm": 0.87109375, "learning_rate": 0.00017640160312283043, "loss": 0.9516, "step": 14347 }, { "epoch": 0.36841640708626416, "grad_norm": 0.77734375, "learning_rate": 0.0001763987227560047, "loss": 1.0355, "step": 14348 }, { "epoch": 0.36844208428218594, "grad_norm": 0.8515625, "learning_rate": 0.00017639584223692328, "loss": 0.806, "step": 14349 }, { "epoch": 0.3684677614781078, "grad_norm": 0.80078125, "learning_rate": 0.0001763929615655918, "loss": 1.0177, "step": 14350 }, { "epoch": 0.3684934386740296, "grad_norm": 0.76171875, "learning_rate": 0.00017639008074201603, "loss": 0.8533, "step": 14351 }, { "epoch": 0.3685191158699514, "grad_norm": 0.8203125, "learning_rate": 0.00017638719976620175, "loss": 0.9067, "step": 14352 }, { "epoch": 0.36854479306587323, "grad_norm": 0.76953125, "learning_rate": 0.0001763843186381547, "loss": 0.9108, "step": 14353 }, { "epoch": 0.36857047026179507, "grad_norm": 0.83203125, "learning_rate": 0.00017638143735788057, "loss": 0.9356, "step": 14354 }, { "epoch": 0.3685961474577169, "grad_norm": 0.89453125, "learning_rate": 0.00017637855592538516, "loss": 0.9796, "step": 14355 }, { "epoch": 0.3686218246536387, "grad_norm": 0.74609375, "learning_rate": 0.0001763756743406742, "loss": 1.0285, "step": 14356 }, { "epoch": 0.3686475018495605, "grad_norm": 0.7421875, "learning_rate": 0.00017637279260375338, "loss": 0.9218, "step": 14357 }, { "epoch": 0.36867317904548236, "grad_norm": 0.78125, "learning_rate": 0.00017636991071462853, "loss": 0.9866, "step": 14358 }, { "epoch": 0.36869885624140414, "grad_norm": 0.80078125, "learning_rate": 0.0001763670286733053, "loss": 0.9773, "step": 14359 }, { "epoch": 0.368724533437326, "grad_norm": 0.76953125, "learning_rate": 0.00017636414647978951, "loss": 0.9556, "step": 14360 }, { "epoch": 0.3687502106332478, "grad_norm": 0.75390625, "learning_rate": 0.00017636126413408688, "loss": 0.9771, "step": 14361 }, { "epoch": 0.3687758878291696, "grad_norm": 0.6875, "learning_rate": 0.0001763583816362031, "loss": 0.7646, "step": 14362 }, { "epoch": 0.3688015650250914, "grad_norm": 0.86328125, "learning_rate": 0.00017635549898614402, "loss": 1.122, "step": 14363 }, { "epoch": 0.36882724222101326, "grad_norm": 0.734375, "learning_rate": 0.0001763526161839153, "loss": 0.8907, "step": 14364 }, { "epoch": 0.3688529194169351, "grad_norm": 0.7734375, "learning_rate": 0.0001763497332295227, "loss": 0.828, "step": 14365 }, { "epoch": 0.3688785966128569, "grad_norm": 0.71484375, "learning_rate": 0.000176346850122972, "loss": 0.9623, "step": 14366 }, { "epoch": 0.3689042738087787, "grad_norm": 0.86328125, "learning_rate": 0.00017634396686426893, "loss": 0.9548, "step": 14367 }, { "epoch": 0.36892995100470055, "grad_norm": 0.7890625, "learning_rate": 0.0001763410834534192, "loss": 0.9793, "step": 14368 }, { "epoch": 0.36895562820062233, "grad_norm": 0.80078125, "learning_rate": 0.0001763381998904286, "loss": 0.8809, "step": 14369 }, { "epoch": 0.36898130539654417, "grad_norm": 0.7578125, "learning_rate": 0.00017633531617530288, "loss": 0.9879, "step": 14370 }, { "epoch": 0.369006982592466, "grad_norm": 0.7265625, "learning_rate": 0.00017633243230804775, "loss": 0.953, "step": 14371 }, { "epoch": 0.3690326597883878, "grad_norm": 0.6953125, "learning_rate": 0.000176329548288669, "loss": 0.8698, "step": 14372 }, { "epoch": 0.3690583369843096, "grad_norm": 0.7265625, "learning_rate": 0.0001763266641171723, "loss": 0.8464, "step": 14373 }, { "epoch": 0.36908401418023146, "grad_norm": 0.79296875, "learning_rate": 0.00017632377979356353, "loss": 0.9187, "step": 14374 }, { "epoch": 0.3691096913761533, "grad_norm": 0.9140625, "learning_rate": 0.00017632089531784833, "loss": 0.8844, "step": 14375 }, { "epoch": 0.3691353685720751, "grad_norm": 0.78125, "learning_rate": 0.00017631801069003245, "loss": 0.9646, "step": 14376 }, { "epoch": 0.3691610457679969, "grad_norm": 0.8359375, "learning_rate": 0.0001763151259101217, "loss": 0.8202, "step": 14377 }, { "epoch": 0.36918672296391875, "grad_norm": 0.71875, "learning_rate": 0.0001763122409781218, "loss": 0.952, "step": 14378 }, { "epoch": 0.3692124001598405, "grad_norm": 0.7265625, "learning_rate": 0.00017630935589403848, "loss": 0.902, "step": 14379 }, { "epoch": 0.36923807735576236, "grad_norm": 0.75390625, "learning_rate": 0.00017630647065787751, "loss": 0.8937, "step": 14380 }, { "epoch": 0.3692637545516842, "grad_norm": 0.74609375, "learning_rate": 0.00017630358526964462, "loss": 1.0586, "step": 14381 }, { "epoch": 0.369289431747606, "grad_norm": 0.7265625, "learning_rate": 0.00017630069972934562, "loss": 0.8163, "step": 14382 }, { "epoch": 0.3693151089435278, "grad_norm": 0.84375, "learning_rate": 0.00017629781403698618, "loss": 1.1777, "step": 14383 }, { "epoch": 0.36934078613944965, "grad_norm": 0.80078125, "learning_rate": 0.00017629492819257214, "loss": 1.0092, "step": 14384 }, { "epoch": 0.3693664633353715, "grad_norm": 0.78125, "learning_rate": 0.00017629204219610914, "loss": 0.9737, "step": 14385 }, { "epoch": 0.36939214053129327, "grad_norm": 0.796875, "learning_rate": 0.000176289156047603, "loss": 0.9664, "step": 14386 }, { "epoch": 0.3694178177272151, "grad_norm": 0.75, "learning_rate": 0.0001762862697470595, "loss": 0.872, "step": 14387 }, { "epoch": 0.36944349492313694, "grad_norm": 0.72265625, "learning_rate": 0.00017628338329448432, "loss": 0.8252, "step": 14388 }, { "epoch": 0.3694691721190587, "grad_norm": 0.8203125, "learning_rate": 0.00017628049668988327, "loss": 1.0958, "step": 14389 }, { "epoch": 0.36949484931498056, "grad_norm": 0.8984375, "learning_rate": 0.00017627760993326207, "loss": 0.9155, "step": 14390 }, { "epoch": 0.3695205265109024, "grad_norm": 0.73046875, "learning_rate": 0.0001762747230246265, "loss": 0.8889, "step": 14391 }, { "epoch": 0.3695462037068242, "grad_norm": 0.8125, "learning_rate": 0.00017627183596398226, "loss": 0.885, "step": 14392 }, { "epoch": 0.369571880902746, "grad_norm": 0.796875, "learning_rate": 0.00017626894875133519, "loss": 0.9399, "step": 14393 }, { "epoch": 0.36959755809866784, "grad_norm": 0.875, "learning_rate": 0.00017626606138669096, "loss": 0.8987, "step": 14394 }, { "epoch": 0.3696232352945897, "grad_norm": 0.859375, "learning_rate": 0.00017626317387005536, "loss": 0.9851, "step": 14395 }, { "epoch": 0.36964891249051146, "grad_norm": 0.81640625, "learning_rate": 0.00017626028620143416, "loss": 0.9636, "step": 14396 }, { "epoch": 0.3696745896864333, "grad_norm": 0.796875, "learning_rate": 0.00017625739838083307, "loss": 0.9306, "step": 14397 }, { "epoch": 0.36970026688235513, "grad_norm": 0.7734375, "learning_rate": 0.00017625451040825788, "loss": 0.9278, "step": 14398 }, { "epoch": 0.3697259440782769, "grad_norm": 0.88671875, "learning_rate": 0.00017625162228371438, "loss": 0.9238, "step": 14399 }, { "epoch": 0.36975162127419875, "grad_norm": 0.828125, "learning_rate": 0.00017624873400720823, "loss": 1.012, "step": 14400 }, { "epoch": 0.3697772984701206, "grad_norm": 0.7890625, "learning_rate": 0.0001762458455787453, "loss": 0.8758, "step": 14401 }, { "epoch": 0.36980297566604237, "grad_norm": 0.7578125, "learning_rate": 0.00017624295699833123, "loss": 0.7411, "step": 14402 }, { "epoch": 0.3698286528619642, "grad_norm": 0.7265625, "learning_rate": 0.00017624006826597185, "loss": 0.8871, "step": 14403 }, { "epoch": 0.36985433005788604, "grad_norm": 0.734375, "learning_rate": 0.00017623717938167291, "loss": 0.8678, "step": 14404 }, { "epoch": 0.3698800072538079, "grad_norm": 0.75390625, "learning_rate": 0.00017623429034544015, "loss": 0.9975, "step": 14405 }, { "epoch": 0.36990568444972965, "grad_norm": 0.78125, "learning_rate": 0.00017623140115727935, "loss": 1.0283, "step": 14406 }, { "epoch": 0.3699313616456515, "grad_norm": 0.765625, "learning_rate": 0.00017622851181719624, "loss": 1.0192, "step": 14407 }, { "epoch": 0.3699570388415733, "grad_norm": 0.90625, "learning_rate": 0.0001762256223251966, "loss": 0.9437, "step": 14408 }, { "epoch": 0.3699827160374951, "grad_norm": 0.74609375, "learning_rate": 0.00017622273268128616, "loss": 0.9878, "step": 14409 }, { "epoch": 0.37000839323341694, "grad_norm": 0.8359375, "learning_rate": 0.0001762198428854707, "loss": 0.97, "step": 14410 }, { "epoch": 0.3700340704293388, "grad_norm": 0.75390625, "learning_rate": 0.00017621695293775603, "loss": 1.0532, "step": 14411 }, { "epoch": 0.37005974762526056, "grad_norm": 0.89453125, "learning_rate": 0.0001762140628381478, "loss": 0.9225, "step": 14412 }, { "epoch": 0.3700854248211824, "grad_norm": 0.70703125, "learning_rate": 0.00017621117258665185, "loss": 0.8537, "step": 14413 }, { "epoch": 0.37011110201710423, "grad_norm": 0.78125, "learning_rate": 0.00017620828218327392, "loss": 0.972, "step": 14414 }, { "epoch": 0.37013677921302607, "grad_norm": 0.7578125, "learning_rate": 0.00017620539162801975, "loss": 0.8915, "step": 14415 }, { "epoch": 0.37016245640894785, "grad_norm": 0.78515625, "learning_rate": 0.00017620250092089515, "loss": 1.0576, "step": 14416 }, { "epoch": 0.3701881336048697, "grad_norm": 0.796875, "learning_rate": 0.0001761996100619058, "loss": 0.9085, "step": 14417 }, { "epoch": 0.3702138108007915, "grad_norm": 1.0625, "learning_rate": 0.00017619671905105754, "loss": 1.0028, "step": 14418 }, { "epoch": 0.3702394879967133, "grad_norm": 0.79296875, "learning_rate": 0.0001761938278883561, "loss": 0.9303, "step": 14419 }, { "epoch": 0.37026516519263514, "grad_norm": 0.80859375, "learning_rate": 0.00017619093657380726, "loss": 0.9295, "step": 14420 }, { "epoch": 0.370290842388557, "grad_norm": 0.80078125, "learning_rate": 0.00017618804510741674, "loss": 0.9699, "step": 14421 }, { "epoch": 0.37031651958447875, "grad_norm": 0.78515625, "learning_rate": 0.00017618515348919033, "loss": 0.9538, "step": 14422 }, { "epoch": 0.3703421967804006, "grad_norm": 0.78125, "learning_rate": 0.00017618226171913382, "loss": 1.0234, "step": 14423 }, { "epoch": 0.3703678739763224, "grad_norm": 0.73828125, "learning_rate": 0.0001761793697972529, "loss": 0.9571, "step": 14424 }, { "epoch": 0.37039355117224426, "grad_norm": 0.78515625, "learning_rate": 0.0001761764777235534, "loss": 0.8988, "step": 14425 }, { "epoch": 0.37041922836816604, "grad_norm": 0.80078125, "learning_rate": 0.00017617358549804106, "loss": 1.0009, "step": 14426 }, { "epoch": 0.3704449055640879, "grad_norm": 0.6953125, "learning_rate": 0.00017617069312072164, "loss": 0.872, "step": 14427 }, { "epoch": 0.3704705827600097, "grad_norm": 0.77734375, "learning_rate": 0.00017616780059160093, "loss": 0.8049, "step": 14428 }, { "epoch": 0.3704962599559315, "grad_norm": 0.69921875, "learning_rate": 0.00017616490791068465, "loss": 0.8137, "step": 14429 }, { "epoch": 0.37052193715185333, "grad_norm": 0.75, "learning_rate": 0.0001761620150779786, "loss": 0.7835, "step": 14430 }, { "epoch": 0.37054761434777517, "grad_norm": 0.7265625, "learning_rate": 0.00017615912209348854, "loss": 0.9464, "step": 14431 }, { "epoch": 0.37057329154369695, "grad_norm": 0.796875, "learning_rate": 0.0001761562289572202, "loss": 1.0645, "step": 14432 }, { "epoch": 0.3705989687396188, "grad_norm": 0.85546875, "learning_rate": 0.0001761533356691794, "loss": 1.0413, "step": 14433 }, { "epoch": 0.3706246459355406, "grad_norm": 1.0390625, "learning_rate": 0.00017615044222937188, "loss": 0.9774, "step": 14434 }, { "epoch": 0.37065032313146246, "grad_norm": 0.8515625, "learning_rate": 0.00017614754863780338, "loss": 1.0408, "step": 14435 }, { "epoch": 0.37067600032738424, "grad_norm": 0.7890625, "learning_rate": 0.00017614465489447975, "loss": 0.9474, "step": 14436 }, { "epoch": 0.3707016775233061, "grad_norm": 0.74609375, "learning_rate": 0.00017614176099940666, "loss": 0.8691, "step": 14437 }, { "epoch": 0.3707273547192279, "grad_norm": 0.73828125, "learning_rate": 0.00017613886695258993, "loss": 0.9597, "step": 14438 }, { "epoch": 0.3707530319151497, "grad_norm": 0.69140625, "learning_rate": 0.00017613597275403532, "loss": 1.0309, "step": 14439 }, { "epoch": 0.3707787091110715, "grad_norm": 0.796875, "learning_rate": 0.00017613307840374858, "loss": 1.072, "step": 14440 }, { "epoch": 0.37080438630699336, "grad_norm": 0.85546875, "learning_rate": 0.00017613018390173548, "loss": 1.0642, "step": 14441 }, { "epoch": 0.37083006350291514, "grad_norm": 0.7578125, "learning_rate": 0.00017612728924800181, "loss": 0.9229, "step": 14442 }, { "epoch": 0.370855740698837, "grad_norm": 0.96875, "learning_rate": 0.00017612439444255332, "loss": 0.8433, "step": 14443 }, { "epoch": 0.3708814178947588, "grad_norm": 0.7578125, "learning_rate": 0.00017612149948539584, "loss": 0.8005, "step": 14444 }, { "epoch": 0.37090709509068065, "grad_norm": 0.7890625, "learning_rate": 0.00017611860437653505, "loss": 1.1017, "step": 14445 }, { "epoch": 0.37093277228660243, "grad_norm": 0.796875, "learning_rate": 0.00017611570911597676, "loss": 1.0396, "step": 14446 }, { "epoch": 0.37095844948252427, "grad_norm": 0.828125, "learning_rate": 0.0001761128137037267, "loss": 0.9916, "step": 14447 }, { "epoch": 0.3709841266784461, "grad_norm": 0.91015625, "learning_rate": 0.00017610991813979073, "loss": 0.9611, "step": 14448 }, { "epoch": 0.3710098038743679, "grad_norm": 1.109375, "learning_rate": 0.00017610702242417455, "loss": 0.9791, "step": 14449 }, { "epoch": 0.3710354810702897, "grad_norm": 0.6640625, "learning_rate": 0.00017610412655688396, "loss": 0.7913, "step": 14450 }, { "epoch": 0.37106115826621155, "grad_norm": 0.75, "learning_rate": 0.0001761012305379247, "loss": 0.9505, "step": 14451 }, { "epoch": 0.37108683546213334, "grad_norm": 0.84375, "learning_rate": 0.00017609833436730258, "loss": 0.9063, "step": 14452 }, { "epoch": 0.37111251265805517, "grad_norm": 0.83203125, "learning_rate": 0.00017609543804502335, "loss": 0.9291, "step": 14453 }, { "epoch": 0.371138189853977, "grad_norm": 0.7578125, "learning_rate": 0.00017609254157109277, "loss": 0.7543, "step": 14454 }, { "epoch": 0.37116386704989884, "grad_norm": 0.8359375, "learning_rate": 0.00017608964494551663, "loss": 0.9545, "step": 14455 }, { "epoch": 0.3711895442458206, "grad_norm": 0.83203125, "learning_rate": 0.00017608674816830074, "loss": 0.9625, "step": 14456 }, { "epoch": 0.37121522144174246, "grad_norm": 1.75, "learning_rate": 0.00017608385123945078, "loss": 1.0238, "step": 14457 }, { "epoch": 0.3712408986376643, "grad_norm": 0.84765625, "learning_rate": 0.0001760809541589726, "loss": 1.0792, "step": 14458 }, { "epoch": 0.3712665758335861, "grad_norm": 0.71875, "learning_rate": 0.000176078056926872, "loss": 0.934, "step": 14459 }, { "epoch": 0.3712922530295079, "grad_norm": 0.75390625, "learning_rate": 0.00017607515954315463, "loss": 0.9848, "step": 14460 }, { "epoch": 0.37131793022542975, "grad_norm": 0.7734375, "learning_rate": 0.00017607226200782637, "loss": 0.9805, "step": 14461 }, { "epoch": 0.37134360742135153, "grad_norm": 0.78125, "learning_rate": 0.00017606936432089295, "loss": 1.0014, "step": 14462 }, { "epoch": 0.37136928461727337, "grad_norm": 0.76171875, "learning_rate": 0.00017606646648236018, "loss": 0.9328, "step": 14463 }, { "epoch": 0.3713949618131952, "grad_norm": 0.7890625, "learning_rate": 0.00017606356849223377, "loss": 0.9039, "step": 14464 }, { "epoch": 0.371420639009117, "grad_norm": 0.80078125, "learning_rate": 0.00017606067035051958, "loss": 0.9846, "step": 14465 }, { "epoch": 0.3714463162050388, "grad_norm": 0.78515625, "learning_rate": 0.00017605777205722335, "loss": 0.8346, "step": 14466 }, { "epoch": 0.37147199340096065, "grad_norm": 0.734375, "learning_rate": 0.00017605487361235081, "loss": 0.9544, "step": 14467 }, { "epoch": 0.3714976705968825, "grad_norm": 0.8203125, "learning_rate": 0.0001760519750159078, "loss": 0.9836, "step": 14468 }, { "epoch": 0.37152334779280427, "grad_norm": 0.76171875, "learning_rate": 0.0001760490762679001, "loss": 1.0329, "step": 14469 }, { "epoch": 0.3715490249887261, "grad_norm": 0.86328125, "learning_rate": 0.00017604617736833341, "loss": 0.8519, "step": 14470 }, { "epoch": 0.37157470218464794, "grad_norm": 0.88671875, "learning_rate": 0.0001760432783172136, "loss": 1.0054, "step": 14471 }, { "epoch": 0.3716003793805697, "grad_norm": 0.8046875, "learning_rate": 0.0001760403791145464, "loss": 0.8262, "step": 14472 }, { "epoch": 0.37162605657649156, "grad_norm": 0.89453125, "learning_rate": 0.00017603747976033759, "loss": 1.051, "step": 14473 }, { "epoch": 0.3716517337724134, "grad_norm": 0.7578125, "learning_rate": 0.00017603458025459295, "loss": 1.0609, "step": 14474 }, { "epoch": 0.3716774109683352, "grad_norm": 0.75390625, "learning_rate": 0.00017603168059731822, "loss": 0.9344, "step": 14475 }, { "epoch": 0.371703088164257, "grad_norm": 0.7890625, "learning_rate": 0.00017602878078851926, "loss": 0.9633, "step": 14476 }, { "epoch": 0.37172876536017885, "grad_norm": 0.765625, "learning_rate": 0.0001760258808282018, "loss": 0.8379, "step": 14477 }, { "epoch": 0.3717544425561007, "grad_norm": 0.78125, "learning_rate": 0.00017602298071637163, "loss": 0.9757, "step": 14478 }, { "epoch": 0.37178011975202246, "grad_norm": 0.78125, "learning_rate": 0.00017602008045303452, "loss": 1.0356, "step": 14479 }, { "epoch": 0.3718057969479443, "grad_norm": 0.828125, "learning_rate": 0.00017601718003819628, "loss": 0.9939, "step": 14480 }, { "epoch": 0.37183147414386614, "grad_norm": 0.8125, "learning_rate": 0.00017601427947186262, "loss": 0.9231, "step": 14481 }, { "epoch": 0.3718571513397879, "grad_norm": 0.7578125, "learning_rate": 0.0001760113787540394, "loss": 0.8447, "step": 14482 }, { "epoch": 0.37188282853570975, "grad_norm": 0.7265625, "learning_rate": 0.00017600847788473237, "loss": 0.8695, "step": 14483 }, { "epoch": 0.3719085057316316, "grad_norm": 0.7421875, "learning_rate": 0.0001760055768639473, "loss": 0.8657, "step": 14484 }, { "epoch": 0.37193418292755337, "grad_norm": 0.74609375, "learning_rate": 0.00017600267569169, "loss": 0.9491, "step": 14485 }, { "epoch": 0.3719598601234752, "grad_norm": 0.77734375, "learning_rate": 0.0001759997743679662, "loss": 0.9152, "step": 14486 }, { "epoch": 0.37198553731939704, "grad_norm": 0.8671875, "learning_rate": 0.0001759968728927817, "loss": 0.9179, "step": 14487 }, { "epoch": 0.3720112145153189, "grad_norm": 0.859375, "learning_rate": 0.00017599397126614235, "loss": 0.9808, "step": 14488 }, { "epoch": 0.37203689171124066, "grad_norm": 0.7734375, "learning_rate": 0.00017599106948805384, "loss": 0.9309, "step": 14489 }, { "epoch": 0.3720625689071625, "grad_norm": 0.796875, "learning_rate": 0.000175988167558522, "loss": 0.8506, "step": 14490 }, { "epoch": 0.37208824610308433, "grad_norm": 0.859375, "learning_rate": 0.00017598526547755262, "loss": 1.0778, "step": 14491 }, { "epoch": 0.3721139232990061, "grad_norm": 0.80859375, "learning_rate": 0.00017598236324515147, "loss": 0.8901, "step": 14492 }, { "epoch": 0.37213960049492795, "grad_norm": 0.76953125, "learning_rate": 0.00017597946086132433, "loss": 1.0159, "step": 14493 }, { "epoch": 0.3721652776908498, "grad_norm": 0.8515625, "learning_rate": 0.000175976558326077, "loss": 1.0423, "step": 14494 }, { "epoch": 0.37219095488677156, "grad_norm": 0.8828125, "learning_rate": 0.0001759736556394152, "loss": 0.9136, "step": 14495 }, { "epoch": 0.3722166320826934, "grad_norm": 0.7734375, "learning_rate": 0.00017597075280134482, "loss": 1.105, "step": 14496 }, { "epoch": 0.37224230927861524, "grad_norm": 0.79296875, "learning_rate": 0.00017596784981187158, "loss": 0.9098, "step": 14497 }, { "epoch": 0.37226798647453707, "grad_norm": 0.7734375, "learning_rate": 0.00017596494667100127, "loss": 0.8854, "step": 14498 }, { "epoch": 0.37229366367045885, "grad_norm": 0.7578125, "learning_rate": 0.0001759620433787397, "loss": 0.8572, "step": 14499 }, { "epoch": 0.3723193408663807, "grad_norm": 1.0625, "learning_rate": 0.00017595913993509262, "loss": 0.8722, "step": 14500 }, { "epoch": 0.3723450180623025, "grad_norm": 1.0390625, "learning_rate": 0.00017595623634006585, "loss": 0.7428, "step": 14501 }, { "epoch": 0.3723706952582243, "grad_norm": 0.87109375, "learning_rate": 0.00017595333259366513, "loss": 1.0075, "step": 14502 }, { "epoch": 0.37239637245414614, "grad_norm": 0.82421875, "learning_rate": 0.0001759504286958963, "loss": 0.803, "step": 14503 }, { "epoch": 0.372422049650068, "grad_norm": 0.7109375, "learning_rate": 0.00017594752464676513, "loss": 0.8805, "step": 14504 }, { "epoch": 0.37244772684598976, "grad_norm": 0.81640625, "learning_rate": 0.00017594462044627743, "loss": 0.9565, "step": 14505 }, { "epoch": 0.3724734040419116, "grad_norm": 0.87890625, "learning_rate": 0.00017594171609443892, "loss": 0.9223, "step": 14506 }, { "epoch": 0.37249908123783343, "grad_norm": 0.80859375, "learning_rate": 0.00017593881159125543, "loss": 0.937, "step": 14507 }, { "epoch": 0.37252475843375527, "grad_norm": 0.859375, "learning_rate": 0.00017593590693673277, "loss": 1.0111, "step": 14508 }, { "epoch": 0.37255043562967705, "grad_norm": 0.8828125, "learning_rate": 0.00017593300213087672, "loss": 1.0575, "step": 14509 }, { "epoch": 0.3725761128255989, "grad_norm": 0.84375, "learning_rate": 0.00017593009717369303, "loss": 0.953, "step": 14510 }, { "epoch": 0.3726017900215207, "grad_norm": 0.921875, "learning_rate": 0.0001759271920651875, "loss": 0.8317, "step": 14511 }, { "epoch": 0.3726274672174425, "grad_norm": 0.81640625, "learning_rate": 0.00017592428680536596, "loss": 0.8884, "step": 14512 }, { "epoch": 0.37265314441336433, "grad_norm": 0.79296875, "learning_rate": 0.00017592138139423417, "loss": 0.9824, "step": 14513 }, { "epoch": 0.37267882160928617, "grad_norm": 0.80078125, "learning_rate": 0.00017591847583179793, "loss": 0.8647, "step": 14514 }, { "epoch": 0.37270449880520795, "grad_norm": 0.88671875, "learning_rate": 0.00017591557011806301, "loss": 1.0249, "step": 14515 }, { "epoch": 0.3727301760011298, "grad_norm": 0.7578125, "learning_rate": 0.00017591266425303522, "loss": 0.828, "step": 14516 }, { "epoch": 0.3727558531970516, "grad_norm": 0.8046875, "learning_rate": 0.00017590975823672034, "loss": 0.9336, "step": 14517 }, { "epoch": 0.37278153039297346, "grad_norm": 0.734375, "learning_rate": 0.0001759068520691242, "loss": 0.8566, "step": 14518 }, { "epoch": 0.37280720758889524, "grad_norm": 0.765625, "learning_rate": 0.00017590394575025254, "loss": 0.9503, "step": 14519 }, { "epoch": 0.3728328847848171, "grad_norm": 0.8203125, "learning_rate": 0.00017590103928011115, "loss": 0.9634, "step": 14520 }, { "epoch": 0.3728585619807389, "grad_norm": 0.73828125, "learning_rate": 0.0001758981326587059, "loss": 0.9344, "step": 14521 }, { "epoch": 0.3728842391766607, "grad_norm": 0.8359375, "learning_rate": 0.0001758952258860425, "loss": 0.9503, "step": 14522 }, { "epoch": 0.37290991637258253, "grad_norm": 0.80859375, "learning_rate": 0.00017589231896212673, "loss": 0.8521, "step": 14523 }, { "epoch": 0.37293559356850436, "grad_norm": 0.765625, "learning_rate": 0.00017588941188696449, "loss": 0.9412, "step": 14524 }, { "epoch": 0.37296127076442614, "grad_norm": 0.80859375, "learning_rate": 0.00017588650466056146, "loss": 0.9163, "step": 14525 }, { "epoch": 0.372986947960348, "grad_norm": 0.80859375, "learning_rate": 0.0001758835972829235, "loss": 0.9743, "step": 14526 }, { "epoch": 0.3730126251562698, "grad_norm": 0.75, "learning_rate": 0.0001758806897540564, "loss": 0.9038, "step": 14527 }, { "epoch": 0.37303830235219165, "grad_norm": 0.7734375, "learning_rate": 0.00017587778207396593, "loss": 0.8272, "step": 14528 }, { "epoch": 0.37306397954811343, "grad_norm": 0.76953125, "learning_rate": 0.00017587487424265788, "loss": 0.8794, "step": 14529 }, { "epoch": 0.37308965674403527, "grad_norm": 0.796875, "learning_rate": 0.00017587196626013806, "loss": 0.8864, "step": 14530 }, { "epoch": 0.3731153339399571, "grad_norm": 0.83203125, "learning_rate": 0.00017586905812641229, "loss": 1.1193, "step": 14531 }, { "epoch": 0.3731410111358789, "grad_norm": 0.73828125, "learning_rate": 0.0001758661498414863, "loss": 0.9515, "step": 14532 }, { "epoch": 0.3731666883318007, "grad_norm": 0.76171875, "learning_rate": 0.00017586324140536595, "loss": 0.8446, "step": 14533 }, { "epoch": 0.37319236552772256, "grad_norm": 0.74609375, "learning_rate": 0.00017586033281805704, "loss": 0.9095, "step": 14534 }, { "epoch": 0.37321804272364434, "grad_norm": 0.8515625, "learning_rate": 0.00017585742407956527, "loss": 0.9287, "step": 14535 }, { "epoch": 0.3732437199195662, "grad_norm": 0.72265625, "learning_rate": 0.00017585451518989658, "loss": 0.792, "step": 14536 }, { "epoch": 0.373269397115488, "grad_norm": 0.8125, "learning_rate": 0.00017585160614905663, "loss": 1.018, "step": 14537 }, { "epoch": 0.37329507431140985, "grad_norm": 0.796875, "learning_rate": 0.00017584869695705133, "loss": 1.0025, "step": 14538 }, { "epoch": 0.3733207515073316, "grad_norm": 0.75390625, "learning_rate": 0.0001758457876138864, "loss": 0.8074, "step": 14539 }, { "epoch": 0.37334642870325346, "grad_norm": 0.80078125, "learning_rate": 0.00017584287811956765, "loss": 0.9393, "step": 14540 }, { "epoch": 0.3733721058991753, "grad_norm": 0.7578125, "learning_rate": 0.00017583996847410095, "loss": 0.9747, "step": 14541 }, { "epoch": 0.3733977830950971, "grad_norm": 0.84375, "learning_rate": 0.00017583705867749201, "loss": 1.0153, "step": 14542 }, { "epoch": 0.3734234602910189, "grad_norm": 0.76953125, "learning_rate": 0.00017583414872974666, "loss": 0.9261, "step": 14543 }, { "epoch": 0.37344913748694075, "grad_norm": 0.7890625, "learning_rate": 0.0001758312386308707, "loss": 0.8638, "step": 14544 }, { "epoch": 0.37347481468286253, "grad_norm": 0.8125, "learning_rate": 0.00017582832838086994, "loss": 0.9846, "step": 14545 }, { "epoch": 0.37350049187878437, "grad_norm": 0.85546875, "learning_rate": 0.00017582541797975018, "loss": 1.0032, "step": 14546 }, { "epoch": 0.3735261690747062, "grad_norm": 0.84765625, "learning_rate": 0.00017582250742751717, "loss": 1.0185, "step": 14547 }, { "epoch": 0.37355184627062804, "grad_norm": 0.7890625, "learning_rate": 0.00017581959672417682, "loss": 0.9404, "step": 14548 }, { "epoch": 0.3735775234665498, "grad_norm": 0.81640625, "learning_rate": 0.0001758166858697348, "loss": 0.9301, "step": 14549 }, { "epoch": 0.37360320066247166, "grad_norm": 0.7890625, "learning_rate": 0.000175813774864197, "loss": 1.0194, "step": 14550 }, { "epoch": 0.3736288778583935, "grad_norm": 0.76953125, "learning_rate": 0.00017581086370756918, "loss": 0.9356, "step": 14551 }, { "epoch": 0.3736545550543153, "grad_norm": 0.765625, "learning_rate": 0.0001758079523998572, "loss": 0.9195, "step": 14552 }, { "epoch": 0.3736802322502371, "grad_norm": 0.7578125, "learning_rate": 0.00017580504094106677, "loss": 0.8326, "step": 14553 }, { "epoch": 0.37370590944615895, "grad_norm": 0.78515625, "learning_rate": 0.00017580212933120378, "loss": 0.9969, "step": 14554 }, { "epoch": 0.3737315866420807, "grad_norm": 0.73828125, "learning_rate": 0.00017579921757027395, "loss": 0.8376, "step": 14555 }, { "epoch": 0.37375726383800256, "grad_norm": 0.75390625, "learning_rate": 0.00017579630565828314, "loss": 0.92, "step": 14556 }, { "epoch": 0.3737829410339244, "grad_norm": 0.79296875, "learning_rate": 0.00017579339359523717, "loss": 0.8414, "step": 14557 }, { "epoch": 0.37380861822984623, "grad_norm": 0.76171875, "learning_rate": 0.00017579048138114179, "loss": 0.9976, "step": 14558 }, { "epoch": 0.373834295425768, "grad_norm": 0.7265625, "learning_rate": 0.00017578756901600283, "loss": 0.926, "step": 14559 }, { "epoch": 0.37385997262168985, "grad_norm": 0.83203125, "learning_rate": 0.00017578465649982608, "loss": 0.9352, "step": 14560 }, { "epoch": 0.3738856498176117, "grad_norm": 0.7890625, "learning_rate": 0.0001757817438326174, "loss": 0.8358, "step": 14561 }, { "epoch": 0.37391132701353347, "grad_norm": 0.74609375, "learning_rate": 0.0001757788310143825, "loss": 0.8325, "step": 14562 }, { "epoch": 0.3739370042094553, "grad_norm": 0.8515625, "learning_rate": 0.00017577591804512723, "loss": 0.9295, "step": 14563 }, { "epoch": 0.37396268140537714, "grad_norm": 0.76953125, "learning_rate": 0.00017577300492485742, "loss": 0.9249, "step": 14564 }, { "epoch": 0.3739883586012989, "grad_norm": 0.96875, "learning_rate": 0.00017577009165357886, "loss": 1.0598, "step": 14565 }, { "epoch": 0.37401403579722076, "grad_norm": 0.76171875, "learning_rate": 0.00017576717823129733, "loss": 0.9506, "step": 14566 }, { "epoch": 0.3740397129931426, "grad_norm": 0.72265625, "learning_rate": 0.00017576426465801867, "loss": 0.8049, "step": 14567 }, { "epoch": 0.37406539018906443, "grad_norm": 0.80859375, "learning_rate": 0.0001757613509337487, "loss": 0.9931, "step": 14568 }, { "epoch": 0.3740910673849862, "grad_norm": 0.80078125, "learning_rate": 0.00017575843705849316, "loss": 0.9946, "step": 14569 }, { "epoch": 0.37411674458090804, "grad_norm": 0.91015625, "learning_rate": 0.00017575552303225793, "loss": 0.9152, "step": 14570 }, { "epoch": 0.3741424217768299, "grad_norm": 0.765625, "learning_rate": 0.00017575260885504878, "loss": 0.9342, "step": 14571 }, { "epoch": 0.37416809897275166, "grad_norm": 0.7734375, "learning_rate": 0.0001757496945268715, "loss": 0.8116, "step": 14572 }, { "epoch": 0.3741937761686735, "grad_norm": 0.76171875, "learning_rate": 0.00017574678004773193, "loss": 0.8957, "step": 14573 }, { "epoch": 0.37421945336459533, "grad_norm": 0.78125, "learning_rate": 0.0001757438654176359, "loss": 0.9252, "step": 14574 }, { "epoch": 0.3742451305605171, "grad_norm": 0.7578125, "learning_rate": 0.00017574095063658916, "loss": 0.83, "step": 14575 }, { "epoch": 0.37427080775643895, "grad_norm": 0.828125, "learning_rate": 0.00017573803570459755, "loss": 0.9309, "step": 14576 }, { "epoch": 0.3742964849523608, "grad_norm": 0.83984375, "learning_rate": 0.00017573512062166687, "loss": 0.9791, "step": 14577 }, { "epoch": 0.3743221621482826, "grad_norm": 0.81640625, "learning_rate": 0.00017573220538780296, "loss": 1.0232, "step": 14578 }, { "epoch": 0.3743478393442044, "grad_norm": 0.76171875, "learning_rate": 0.0001757292900030116, "loss": 1.0681, "step": 14579 }, { "epoch": 0.37437351654012624, "grad_norm": 0.8359375, "learning_rate": 0.00017572637446729855, "loss": 1.0533, "step": 14580 }, { "epoch": 0.3743991937360481, "grad_norm": 0.91015625, "learning_rate": 0.00017572345878066976, "loss": 0.9955, "step": 14581 }, { "epoch": 0.37442487093196986, "grad_norm": 0.8046875, "learning_rate": 0.00017572054294313092, "loss": 0.8782, "step": 14582 }, { "epoch": 0.3744505481278917, "grad_norm": 0.78125, "learning_rate": 0.00017571762695468786, "loss": 0.9249, "step": 14583 }, { "epoch": 0.3744762253238135, "grad_norm": 0.796875, "learning_rate": 0.00017571471081534643, "loss": 1.1143, "step": 14584 }, { "epoch": 0.3745019025197353, "grad_norm": 0.77734375, "learning_rate": 0.00017571179452511242, "loss": 0.8423, "step": 14585 }, { "epoch": 0.37452757971565714, "grad_norm": 0.8203125, "learning_rate": 0.00017570887808399165, "loss": 0.8236, "step": 14586 }, { "epoch": 0.374553256911579, "grad_norm": 0.7578125, "learning_rate": 0.0001757059614919899, "loss": 0.9033, "step": 14587 }, { "epoch": 0.3745789341075008, "grad_norm": 0.734375, "learning_rate": 0.000175703044749113, "loss": 0.8228, "step": 14588 }, { "epoch": 0.3746046113034226, "grad_norm": 0.75, "learning_rate": 0.0001757001278553668, "loss": 0.9238, "step": 14589 }, { "epoch": 0.37463028849934443, "grad_norm": 0.82421875, "learning_rate": 0.00017569721081075708, "loss": 0.8553, "step": 14590 }, { "epoch": 0.37465596569526627, "grad_norm": 0.765625, "learning_rate": 0.00017569429361528964, "loss": 0.986, "step": 14591 }, { "epoch": 0.37468164289118805, "grad_norm": 0.74609375, "learning_rate": 0.0001756913762689703, "loss": 0.7436, "step": 14592 }, { "epoch": 0.3747073200871099, "grad_norm": 0.76171875, "learning_rate": 0.00017568845877180493, "loss": 0.8933, "step": 14593 }, { "epoch": 0.3747329972830317, "grad_norm": 0.87109375, "learning_rate": 0.00017568554112379928, "loss": 0.9971, "step": 14594 }, { "epoch": 0.3747586744789535, "grad_norm": 0.8359375, "learning_rate": 0.00017568262332495914, "loss": 0.9534, "step": 14595 }, { "epoch": 0.37478435167487534, "grad_norm": 0.81640625, "learning_rate": 0.00017567970537529041, "loss": 0.9479, "step": 14596 }, { "epoch": 0.3748100288707972, "grad_norm": 0.8515625, "learning_rate": 0.00017567678727479886, "loss": 0.9261, "step": 14597 }, { "epoch": 0.374835706066719, "grad_norm": 0.82421875, "learning_rate": 0.00017567386902349032, "loss": 0.869, "step": 14598 }, { "epoch": 0.3748613832626408, "grad_norm": 0.81640625, "learning_rate": 0.00017567095062137058, "loss": 0.9244, "step": 14599 }, { "epoch": 0.3748870604585626, "grad_norm": 0.86328125, "learning_rate": 0.00017566803206844547, "loss": 1.0038, "step": 14600 }, { "epoch": 0.37491273765448446, "grad_norm": 0.875, "learning_rate": 0.00017566511336472083, "loss": 0.9027, "step": 14601 }, { "epoch": 0.37493841485040624, "grad_norm": 0.76171875, "learning_rate": 0.00017566219451020242, "loss": 0.943, "step": 14602 }, { "epoch": 0.3749640920463281, "grad_norm": 0.91796875, "learning_rate": 0.0001756592755048961, "loss": 0.849, "step": 14603 }, { "epoch": 0.3749897692422499, "grad_norm": 0.80078125, "learning_rate": 0.0001756563563488077, "loss": 0.9623, "step": 14604 }, { "epoch": 0.3750154464381717, "grad_norm": 0.78125, "learning_rate": 0.00017565343704194298, "loss": 0.9551, "step": 14605 }, { "epoch": 0.37504112363409353, "grad_norm": 0.765625, "learning_rate": 0.0001756505175843078, "loss": 0.8947, "step": 14606 }, { "epoch": 0.37506680083001537, "grad_norm": 0.8125, "learning_rate": 0.000175647597975908, "loss": 0.8494, "step": 14607 }, { "epoch": 0.3750924780259372, "grad_norm": 0.86328125, "learning_rate": 0.00017564467821674934, "loss": 0.9143, "step": 14608 }, { "epoch": 0.375118155221859, "grad_norm": 0.81640625, "learning_rate": 0.00017564175830683769, "loss": 0.8621, "step": 14609 }, { "epoch": 0.3751438324177808, "grad_norm": 0.8671875, "learning_rate": 0.00017563883824617884, "loss": 0.9283, "step": 14610 }, { "epoch": 0.37516950961370266, "grad_norm": 0.8515625, "learning_rate": 0.00017563591803477864, "loss": 1.1314, "step": 14611 }, { "epoch": 0.37519518680962444, "grad_norm": 0.74609375, "learning_rate": 0.00017563299767264282, "loss": 0.9081, "step": 14612 }, { "epoch": 0.3752208640055463, "grad_norm": 0.8125, "learning_rate": 0.00017563007715977732, "loss": 1.0299, "step": 14613 }, { "epoch": 0.3752465412014681, "grad_norm": 0.73828125, "learning_rate": 0.00017562715649618792, "loss": 0.9556, "step": 14614 }, { "epoch": 0.3752722183973899, "grad_norm": 0.7578125, "learning_rate": 0.00017562423568188044, "loss": 0.7346, "step": 14615 }, { "epoch": 0.3752978955933117, "grad_norm": 0.8046875, "learning_rate": 0.00017562131471686064, "loss": 0.9513, "step": 14616 }, { "epoch": 0.37532357278923356, "grad_norm": 0.81640625, "learning_rate": 0.0001756183936011344, "loss": 0.8951, "step": 14617 }, { "epoch": 0.3753492499851554, "grad_norm": 0.78125, "learning_rate": 0.00017561547233470754, "loss": 0.8167, "step": 14618 }, { "epoch": 0.3753749271810772, "grad_norm": 0.76953125, "learning_rate": 0.0001756125509175859, "loss": 0.948, "step": 14619 }, { "epoch": 0.375400604376999, "grad_norm": 0.76953125, "learning_rate": 0.00017560962934977522, "loss": 0.8018, "step": 14620 }, { "epoch": 0.37542628157292085, "grad_norm": 0.83984375, "learning_rate": 0.00017560670763128142, "loss": 1.1501, "step": 14621 }, { "epoch": 0.37545195876884263, "grad_norm": 0.734375, "learning_rate": 0.00017560378576211028, "loss": 0.9225, "step": 14622 }, { "epoch": 0.37547763596476447, "grad_norm": 0.828125, "learning_rate": 0.0001756008637422676, "loss": 1.1052, "step": 14623 }, { "epoch": 0.3755033131606863, "grad_norm": 0.796875, "learning_rate": 0.00017559794157175925, "loss": 0.8208, "step": 14624 }, { "epoch": 0.3755289903566081, "grad_norm": 0.87890625, "learning_rate": 0.00017559501925059102, "loss": 0.9191, "step": 14625 }, { "epoch": 0.3755546675525299, "grad_norm": 0.80078125, "learning_rate": 0.00017559209677876875, "loss": 0.8848, "step": 14626 }, { "epoch": 0.37558034474845176, "grad_norm": 0.734375, "learning_rate": 0.00017558917415629827, "loss": 0.8545, "step": 14627 }, { "epoch": 0.3756060219443736, "grad_norm": 0.83203125, "learning_rate": 0.00017558625138318538, "loss": 0.9378, "step": 14628 }, { "epoch": 0.37563169914029537, "grad_norm": 0.76171875, "learning_rate": 0.00017558332845943594, "loss": 0.8325, "step": 14629 }, { "epoch": 0.3756573763362172, "grad_norm": 0.93359375, "learning_rate": 0.00017558040538505573, "loss": 0.9851, "step": 14630 }, { "epoch": 0.37568305353213904, "grad_norm": 0.8125, "learning_rate": 0.0001755774821600506, "loss": 1.0294, "step": 14631 }, { "epoch": 0.3757087307280608, "grad_norm": 0.8125, "learning_rate": 0.00017557455878442637, "loss": 0.9543, "step": 14632 }, { "epoch": 0.37573440792398266, "grad_norm": 1.5, "learning_rate": 0.0001755716352581889, "loss": 0.9348, "step": 14633 }, { "epoch": 0.3757600851199045, "grad_norm": 0.74609375, "learning_rate": 0.00017556871158134395, "loss": 0.9393, "step": 14634 }, { "epoch": 0.3757857623158263, "grad_norm": 0.703125, "learning_rate": 0.0001755657877538974, "loss": 0.9469, "step": 14635 }, { "epoch": 0.3758114395117481, "grad_norm": 0.8515625, "learning_rate": 0.00017556286377585508, "loss": 0.9071, "step": 14636 }, { "epoch": 0.37583711670766995, "grad_norm": 0.8046875, "learning_rate": 0.00017555993964722276, "loss": 0.9729, "step": 14637 }, { "epoch": 0.3758627939035918, "grad_norm": 0.7890625, "learning_rate": 0.00017555701536800636, "loss": 0.9638, "step": 14638 }, { "epoch": 0.37588847109951357, "grad_norm": 0.765625, "learning_rate": 0.0001755540909382116, "loss": 1.009, "step": 14639 }, { "epoch": 0.3759141482954354, "grad_norm": 0.80078125, "learning_rate": 0.00017555116635784436, "loss": 0.9046, "step": 14640 }, { "epoch": 0.37593982549135724, "grad_norm": 0.8125, "learning_rate": 0.00017554824162691052, "loss": 0.9643, "step": 14641 }, { "epoch": 0.375965502687279, "grad_norm": 0.8125, "learning_rate": 0.00017554531674541583, "loss": 0.8482, "step": 14642 }, { "epoch": 0.37599117988320085, "grad_norm": 0.78515625, "learning_rate": 0.00017554239171336614, "loss": 1.0691, "step": 14643 }, { "epoch": 0.3760168570791227, "grad_norm": 0.79296875, "learning_rate": 0.0001755394665307673, "loss": 1.0004, "step": 14644 }, { "epoch": 0.37604253427504447, "grad_norm": 0.79296875, "learning_rate": 0.00017553654119762513, "loss": 0.915, "step": 14645 }, { "epoch": 0.3760682114709663, "grad_norm": 0.85546875, "learning_rate": 0.00017553361571394542, "loss": 1.0396, "step": 14646 }, { "epoch": 0.37609388866688814, "grad_norm": 0.82421875, "learning_rate": 0.00017553069007973407, "loss": 0.9429, "step": 14647 }, { "epoch": 0.37611956586281, "grad_norm": 0.9140625, "learning_rate": 0.0001755277642949969, "loss": 1.0042, "step": 14648 }, { "epoch": 0.37614524305873176, "grad_norm": 0.796875, "learning_rate": 0.00017552483835973968, "loss": 0.8815, "step": 14649 }, { "epoch": 0.3761709202546536, "grad_norm": 0.796875, "learning_rate": 0.00017552191227396827, "loss": 0.9707, "step": 14650 }, { "epoch": 0.37619659745057543, "grad_norm": 0.76171875, "learning_rate": 0.00017551898603768856, "loss": 0.9003, "step": 14651 }, { "epoch": 0.3762222746464972, "grad_norm": 0.84375, "learning_rate": 0.00017551605965090628, "loss": 0.95, "step": 14652 }, { "epoch": 0.37624795184241905, "grad_norm": 0.765625, "learning_rate": 0.00017551313311362735, "loss": 0.9343, "step": 14653 }, { "epoch": 0.3762736290383409, "grad_norm": 0.765625, "learning_rate": 0.00017551020642585753, "loss": 0.8741, "step": 14654 }, { "epoch": 0.37629930623426266, "grad_norm": 0.8046875, "learning_rate": 0.00017550727958760273, "loss": 0.8684, "step": 14655 }, { "epoch": 0.3763249834301845, "grad_norm": 0.8984375, "learning_rate": 0.00017550435259886871, "loss": 0.9518, "step": 14656 }, { "epoch": 0.37635066062610634, "grad_norm": 0.859375, "learning_rate": 0.00017550142545966137, "loss": 0.8697, "step": 14657 }, { "epoch": 0.3763763378220282, "grad_norm": 0.7578125, "learning_rate": 0.0001754984981699865, "loss": 0.8108, "step": 14658 }, { "epoch": 0.37640201501794995, "grad_norm": 0.86328125, "learning_rate": 0.00017549557072984992, "loss": 1.0602, "step": 14659 }, { "epoch": 0.3764276922138718, "grad_norm": 0.79296875, "learning_rate": 0.0001754926431392575, "loss": 0.9549, "step": 14660 }, { "epoch": 0.3764533694097936, "grad_norm": 0.77734375, "learning_rate": 0.00017548971539821506, "loss": 0.8874, "step": 14661 }, { "epoch": 0.3764790466057154, "grad_norm": 0.828125, "learning_rate": 0.00017548678750672845, "loss": 0.8876, "step": 14662 }, { "epoch": 0.37650472380163724, "grad_norm": 0.79296875, "learning_rate": 0.00017548385946480349, "loss": 0.8751, "step": 14663 }, { "epoch": 0.3765304009975591, "grad_norm": 0.8984375, "learning_rate": 0.000175480931272446, "loss": 0.9737, "step": 14664 }, { "epoch": 0.37655607819348086, "grad_norm": 0.828125, "learning_rate": 0.00017547800292966185, "loss": 1.0258, "step": 14665 }, { "epoch": 0.3765817553894027, "grad_norm": 0.85546875, "learning_rate": 0.00017547507443645683, "loss": 1.0154, "step": 14666 }, { "epoch": 0.37660743258532453, "grad_norm": 0.8828125, "learning_rate": 0.00017547214579283682, "loss": 0.8387, "step": 14667 }, { "epoch": 0.3766331097812463, "grad_norm": 0.89453125, "learning_rate": 0.00017546921699880763, "loss": 0.9756, "step": 14668 }, { "epoch": 0.37665878697716815, "grad_norm": 0.78125, "learning_rate": 0.00017546628805437513, "loss": 0.8218, "step": 14669 }, { "epoch": 0.37668446417309, "grad_norm": 0.74609375, "learning_rate": 0.00017546335895954514, "loss": 0.8971, "step": 14670 }, { "epoch": 0.3767101413690118, "grad_norm": 0.83203125, "learning_rate": 0.00017546042971432345, "loss": 0.9464, "step": 14671 }, { "epoch": 0.3767358185649336, "grad_norm": 0.87109375, "learning_rate": 0.00017545750031871594, "loss": 1.1253, "step": 14672 }, { "epoch": 0.37676149576085544, "grad_norm": 0.81640625, "learning_rate": 0.00017545457077272852, "loss": 0.9614, "step": 14673 }, { "epoch": 0.37678717295677727, "grad_norm": 0.7421875, "learning_rate": 0.0001754516410763669, "loss": 0.9934, "step": 14674 }, { "epoch": 0.37681285015269905, "grad_norm": 0.75390625, "learning_rate": 0.00017544871122963698, "loss": 0.9021, "step": 14675 }, { "epoch": 0.3768385273486209, "grad_norm": 0.84375, "learning_rate": 0.0001754457812325446, "loss": 1.1251, "step": 14676 }, { "epoch": 0.3768642045445427, "grad_norm": 0.7734375, "learning_rate": 0.00017544285108509557, "loss": 0.8456, "step": 14677 }, { "epoch": 0.3768898817404645, "grad_norm": 0.76953125, "learning_rate": 0.00017543992078729577, "loss": 0.933, "step": 14678 }, { "epoch": 0.37691555893638634, "grad_norm": 0.8671875, "learning_rate": 0.00017543699033915105, "loss": 0.9079, "step": 14679 }, { "epoch": 0.3769412361323082, "grad_norm": 0.859375, "learning_rate": 0.00017543405974066717, "loss": 0.8798, "step": 14680 }, { "epoch": 0.37696691332823, "grad_norm": 0.84765625, "learning_rate": 0.0001754311289918501, "loss": 0.9778, "step": 14681 }, { "epoch": 0.3769925905241518, "grad_norm": 1.4296875, "learning_rate": 0.00017542819809270553, "loss": 0.8042, "step": 14682 }, { "epoch": 0.37701826772007363, "grad_norm": 0.98828125, "learning_rate": 0.00017542526704323938, "loss": 0.8318, "step": 14683 }, { "epoch": 0.37704394491599547, "grad_norm": 0.78515625, "learning_rate": 0.0001754223358434575, "loss": 1.0114, "step": 14684 }, { "epoch": 0.37706962211191725, "grad_norm": 0.765625, "learning_rate": 0.00017541940449336573, "loss": 1.0857, "step": 14685 }, { "epoch": 0.3770952993078391, "grad_norm": 0.796875, "learning_rate": 0.00017541647299296992, "loss": 1.0234, "step": 14686 }, { "epoch": 0.3771209765037609, "grad_norm": 0.76953125, "learning_rate": 0.00017541354134227585, "loss": 0.916, "step": 14687 }, { "epoch": 0.3771466536996827, "grad_norm": 0.76171875, "learning_rate": 0.00017541060954128943, "loss": 0.8018, "step": 14688 }, { "epoch": 0.37717233089560454, "grad_norm": 0.91796875, "learning_rate": 0.00017540767759001647, "loss": 1.0192, "step": 14689 }, { "epoch": 0.37719800809152637, "grad_norm": 0.79296875, "learning_rate": 0.0001754047454884628, "loss": 0.967, "step": 14690 }, { "epoch": 0.3772236852874482, "grad_norm": 0.8125, "learning_rate": 0.00017540181323663428, "loss": 0.987, "step": 14691 }, { "epoch": 0.37724936248337, "grad_norm": 0.7421875, "learning_rate": 0.0001753988808345368, "loss": 0.9081, "step": 14692 }, { "epoch": 0.3772750396792918, "grad_norm": 0.8125, "learning_rate": 0.00017539594828217613, "loss": 0.9851, "step": 14693 }, { "epoch": 0.37730071687521366, "grad_norm": 0.76171875, "learning_rate": 0.00017539301557955816, "loss": 0.8746, "step": 14694 }, { "epoch": 0.37732639407113544, "grad_norm": 0.85546875, "learning_rate": 0.0001753900827266887, "loss": 0.9336, "step": 14695 }, { "epoch": 0.3773520712670573, "grad_norm": 0.82421875, "learning_rate": 0.00017538714972357365, "loss": 1.0326, "step": 14696 }, { "epoch": 0.3773777484629791, "grad_norm": 0.8046875, "learning_rate": 0.00017538421657021876, "loss": 0.9308, "step": 14697 }, { "epoch": 0.3774034256589009, "grad_norm": 0.69140625, "learning_rate": 0.00017538128326663, "loss": 0.9168, "step": 14698 }, { "epoch": 0.37742910285482273, "grad_norm": 0.7890625, "learning_rate": 0.00017537834981281312, "loss": 0.9833, "step": 14699 }, { "epoch": 0.37745478005074456, "grad_norm": 0.8125, "learning_rate": 0.000175375416208774, "loss": 1.0504, "step": 14700 }, { "epoch": 0.3774804572466664, "grad_norm": 0.8046875, "learning_rate": 0.0001753724824545185, "loss": 0.8862, "step": 14701 }, { "epoch": 0.3775061344425882, "grad_norm": 1.2890625, "learning_rate": 0.00017536954855005242, "loss": 0.8824, "step": 14702 }, { "epoch": 0.37753181163851, "grad_norm": 0.73046875, "learning_rate": 0.00017536661449538168, "loss": 0.8523, "step": 14703 }, { "epoch": 0.37755748883443185, "grad_norm": 0.78515625, "learning_rate": 0.00017536368029051204, "loss": 0.9579, "step": 14704 }, { "epoch": 0.37758316603035363, "grad_norm": 0.8203125, "learning_rate": 0.00017536074593544942, "loss": 0.8934, "step": 14705 }, { "epoch": 0.37760884322627547, "grad_norm": 0.8046875, "learning_rate": 0.00017535781143019965, "loss": 0.9648, "step": 14706 }, { "epoch": 0.3776345204221973, "grad_norm": 0.77734375, "learning_rate": 0.00017535487677476853, "loss": 1.03, "step": 14707 }, { "epoch": 0.3776601976181191, "grad_norm": 0.72265625, "learning_rate": 0.00017535194196916198, "loss": 0.9264, "step": 14708 }, { "epoch": 0.3776858748140409, "grad_norm": 0.74609375, "learning_rate": 0.00017534900701338582, "loss": 0.9645, "step": 14709 }, { "epoch": 0.37771155200996276, "grad_norm": 0.71484375, "learning_rate": 0.00017534607190744587, "loss": 0.7988, "step": 14710 }, { "epoch": 0.3777372292058846, "grad_norm": 0.8125, "learning_rate": 0.00017534313665134804, "loss": 1.0542, "step": 14711 }, { "epoch": 0.3777629064018064, "grad_norm": 0.77734375, "learning_rate": 0.0001753402012450981, "loss": 0.9026, "step": 14712 }, { "epoch": 0.3777885835977282, "grad_norm": 0.875, "learning_rate": 0.00017533726568870196, "loss": 1.0038, "step": 14713 }, { "epoch": 0.37781426079365005, "grad_norm": 0.86328125, "learning_rate": 0.00017533432998216548, "loss": 0.9133, "step": 14714 }, { "epoch": 0.37783993798957183, "grad_norm": 0.73046875, "learning_rate": 0.00017533139412549445, "loss": 0.7561, "step": 14715 }, { "epoch": 0.37786561518549366, "grad_norm": 0.77734375, "learning_rate": 0.0001753284581186948, "loss": 0.8254, "step": 14716 }, { "epoch": 0.3778912923814155, "grad_norm": 0.72265625, "learning_rate": 0.0001753255219617723, "loss": 1.0098, "step": 14717 }, { "epoch": 0.3779169695773373, "grad_norm": 0.83203125, "learning_rate": 0.00017532258565473283, "loss": 0.887, "step": 14718 }, { "epoch": 0.3779426467732591, "grad_norm": 0.875, "learning_rate": 0.00017531964919758228, "loss": 0.9478, "step": 14719 }, { "epoch": 0.37796832396918095, "grad_norm": 0.79296875, "learning_rate": 0.00017531671259032647, "loss": 0.873, "step": 14720 }, { "epoch": 0.3779940011651028, "grad_norm": 0.8359375, "learning_rate": 0.0001753137758329712, "loss": 0.9947, "step": 14721 }, { "epoch": 0.37801967836102457, "grad_norm": 0.8828125, "learning_rate": 0.00017531083892552248, "loss": 0.9364, "step": 14722 }, { "epoch": 0.3780453555569464, "grad_norm": 0.7734375, "learning_rate": 0.000175307901867986, "loss": 0.9244, "step": 14723 }, { "epoch": 0.37807103275286824, "grad_norm": 0.71875, "learning_rate": 0.0001753049646603677, "loss": 0.7935, "step": 14724 }, { "epoch": 0.37809670994879, "grad_norm": 1.1015625, "learning_rate": 0.00017530202730267334, "loss": 0.9985, "step": 14725 }, { "epoch": 0.37812238714471186, "grad_norm": 0.70703125, "learning_rate": 0.0001752990897949089, "loss": 0.8866, "step": 14726 }, { "epoch": 0.3781480643406337, "grad_norm": 0.796875, "learning_rate": 0.00017529615213708017, "loss": 0.9411, "step": 14727 }, { "epoch": 0.3781737415365555, "grad_norm": 0.71875, "learning_rate": 0.000175293214329193, "loss": 0.8704, "step": 14728 }, { "epoch": 0.3781994187324773, "grad_norm": 0.75, "learning_rate": 0.00017529027637125326, "loss": 0.8197, "step": 14729 }, { "epoch": 0.37822509592839915, "grad_norm": 0.8125, "learning_rate": 0.0001752873382632668, "loss": 0.8869, "step": 14730 }, { "epoch": 0.378250773124321, "grad_norm": 0.796875, "learning_rate": 0.00017528440000523946, "loss": 0.9356, "step": 14731 }, { "epoch": 0.37827645032024276, "grad_norm": 0.828125, "learning_rate": 0.00017528146159717713, "loss": 1.032, "step": 14732 }, { "epoch": 0.3783021275161646, "grad_norm": 0.7890625, "learning_rate": 0.00017527852303908566, "loss": 0.8229, "step": 14733 }, { "epoch": 0.37832780471208644, "grad_norm": 0.734375, "learning_rate": 0.0001752755843309709, "loss": 0.8148, "step": 14734 }, { "epoch": 0.3783534819080082, "grad_norm": 0.76953125, "learning_rate": 0.00017527264547283867, "loss": 0.8065, "step": 14735 }, { "epoch": 0.37837915910393005, "grad_norm": 0.8515625, "learning_rate": 0.00017526970646469485, "loss": 0.9271, "step": 14736 }, { "epoch": 0.3784048362998519, "grad_norm": 0.7578125, "learning_rate": 0.0001752667673065453, "loss": 0.7944, "step": 14737 }, { "epoch": 0.37843051349577367, "grad_norm": 0.76953125, "learning_rate": 0.0001752638279983959, "loss": 0.907, "step": 14738 }, { "epoch": 0.3784561906916955, "grad_norm": 0.76171875, "learning_rate": 0.00017526088854025252, "loss": 1.0189, "step": 14739 }, { "epoch": 0.37848186788761734, "grad_norm": 0.8203125, "learning_rate": 0.00017525794893212093, "loss": 0.9251, "step": 14740 }, { "epoch": 0.3785075450835392, "grad_norm": 0.7734375, "learning_rate": 0.00017525500917400707, "loss": 0.8727, "step": 14741 }, { "epoch": 0.37853322227946096, "grad_norm": 0.8671875, "learning_rate": 0.0001752520692659168, "loss": 1.0066, "step": 14742 }, { "epoch": 0.3785588994753828, "grad_norm": 0.7265625, "learning_rate": 0.0001752491292078559, "loss": 0.935, "step": 14743 }, { "epoch": 0.37858457667130463, "grad_norm": 0.83984375, "learning_rate": 0.00017524618899983035, "loss": 0.8262, "step": 14744 }, { "epoch": 0.3786102538672264, "grad_norm": 0.765625, "learning_rate": 0.00017524324864184588, "loss": 0.8863, "step": 14745 }, { "epoch": 0.37863593106314825, "grad_norm": 0.8203125, "learning_rate": 0.00017524030813390845, "loss": 1.0564, "step": 14746 }, { "epoch": 0.3786616082590701, "grad_norm": 0.765625, "learning_rate": 0.00017523736747602388, "loss": 0.8915, "step": 14747 }, { "epoch": 0.37868728545499186, "grad_norm": 0.765625, "learning_rate": 0.000175234426668198, "loss": 0.9054, "step": 14748 }, { "epoch": 0.3787129626509137, "grad_norm": 0.80078125, "learning_rate": 0.00017523148571043672, "loss": 0.9427, "step": 14749 }, { "epoch": 0.37873863984683553, "grad_norm": 0.8671875, "learning_rate": 0.0001752285446027459, "loss": 0.9563, "step": 14750 }, { "epoch": 0.37876431704275737, "grad_norm": 0.72265625, "learning_rate": 0.0001752256033451314, "loss": 1.0691, "step": 14751 }, { "epoch": 0.37878999423867915, "grad_norm": 0.89453125, "learning_rate": 0.00017522266193759905, "loss": 1.0541, "step": 14752 }, { "epoch": 0.378815671434601, "grad_norm": 0.76171875, "learning_rate": 0.0001752197203801547, "loss": 0.9414, "step": 14753 }, { "epoch": 0.3788413486305228, "grad_norm": 0.74609375, "learning_rate": 0.00017521677867280428, "loss": 1.0414, "step": 14754 }, { "epoch": 0.3788670258264446, "grad_norm": 0.87890625, "learning_rate": 0.00017521383681555358, "loss": 0.8524, "step": 14755 }, { "epoch": 0.37889270302236644, "grad_norm": 0.83984375, "learning_rate": 0.00017521089480840852, "loss": 0.876, "step": 14756 }, { "epoch": 0.3789183802182883, "grad_norm": 1.8984375, "learning_rate": 0.00017520795265137495, "loss": 0.9616, "step": 14757 }, { "epoch": 0.37894405741421006, "grad_norm": 0.7109375, "learning_rate": 0.00017520501034445872, "loss": 0.8175, "step": 14758 }, { "epoch": 0.3789697346101319, "grad_norm": 0.80859375, "learning_rate": 0.00017520206788766568, "loss": 1.0651, "step": 14759 }, { "epoch": 0.37899541180605373, "grad_norm": 1.0703125, "learning_rate": 0.00017519912528100173, "loss": 1.0195, "step": 14760 }, { "epoch": 0.37902108900197556, "grad_norm": 0.83203125, "learning_rate": 0.0001751961825244727, "loss": 0.9975, "step": 14761 }, { "epoch": 0.37904676619789734, "grad_norm": 0.7890625, "learning_rate": 0.00017519323961808446, "loss": 0.9619, "step": 14762 }, { "epoch": 0.3790724433938192, "grad_norm": 0.765625, "learning_rate": 0.0001751902965618429, "loss": 0.8135, "step": 14763 }, { "epoch": 0.379098120589741, "grad_norm": 0.796875, "learning_rate": 0.00017518735335575388, "loss": 1.046, "step": 14764 }, { "epoch": 0.3791237977856628, "grad_norm": 0.84375, "learning_rate": 0.00017518440999982326, "loss": 0.9375, "step": 14765 }, { "epoch": 0.37914947498158463, "grad_norm": 0.83984375, "learning_rate": 0.0001751814664940569, "loss": 1.1232, "step": 14766 }, { "epoch": 0.37917515217750647, "grad_norm": 0.72265625, "learning_rate": 0.00017517852283846069, "loss": 0.847, "step": 14767 }, { "epoch": 0.37920082937342825, "grad_norm": 0.828125, "learning_rate": 0.00017517557903304042, "loss": 0.9551, "step": 14768 }, { "epoch": 0.3792265065693501, "grad_norm": 0.83203125, "learning_rate": 0.00017517263507780203, "loss": 0.7958, "step": 14769 }, { "epoch": 0.3792521837652719, "grad_norm": 0.8984375, "learning_rate": 0.00017516969097275138, "loss": 0.9228, "step": 14770 }, { "epoch": 0.37927786096119376, "grad_norm": 0.78515625, "learning_rate": 0.00017516674671789431, "loss": 1.033, "step": 14771 }, { "epoch": 0.37930353815711554, "grad_norm": 0.74609375, "learning_rate": 0.00017516380231323675, "loss": 0.9216, "step": 14772 }, { "epoch": 0.3793292153530374, "grad_norm": 0.78515625, "learning_rate": 0.0001751608577587845, "loss": 0.9283, "step": 14773 }, { "epoch": 0.3793548925489592, "grad_norm": 0.79296875, "learning_rate": 0.0001751579130545434, "loss": 0.8541, "step": 14774 }, { "epoch": 0.379380569744881, "grad_norm": 0.77734375, "learning_rate": 0.00017515496820051942, "loss": 0.9645, "step": 14775 }, { "epoch": 0.3794062469408028, "grad_norm": 0.85546875, "learning_rate": 0.00017515202319671834, "loss": 0.8887, "step": 14776 }, { "epoch": 0.37943192413672466, "grad_norm": 0.82421875, "learning_rate": 0.0001751490780431461, "loss": 1.0078, "step": 14777 }, { "epoch": 0.37945760133264644, "grad_norm": 0.9765625, "learning_rate": 0.00017514613273980853, "loss": 0.9579, "step": 14778 }, { "epoch": 0.3794832785285683, "grad_norm": 0.7890625, "learning_rate": 0.00017514318728671148, "loss": 0.9254, "step": 14779 }, { "epoch": 0.3795089557244901, "grad_norm": 0.77734375, "learning_rate": 0.00017514024168386087, "loss": 0.9191, "step": 14780 }, { "epoch": 0.37953463292041195, "grad_norm": 0.83984375, "learning_rate": 0.00017513729593126253, "loss": 0.7902, "step": 14781 }, { "epoch": 0.37956031011633373, "grad_norm": 0.796875, "learning_rate": 0.00017513435002892237, "loss": 0.9475, "step": 14782 }, { "epoch": 0.37958598731225557, "grad_norm": 0.828125, "learning_rate": 0.00017513140397684625, "loss": 0.916, "step": 14783 }, { "epoch": 0.3796116645081774, "grad_norm": 0.87890625, "learning_rate": 0.00017512845777504, "loss": 0.9892, "step": 14784 }, { "epoch": 0.3796373417040992, "grad_norm": 0.890625, "learning_rate": 0.0001751255114235095, "loss": 1.1218, "step": 14785 }, { "epoch": 0.379663018900021, "grad_norm": 0.7734375, "learning_rate": 0.00017512256492226066, "loss": 0.9506, "step": 14786 }, { "epoch": 0.37968869609594286, "grad_norm": 0.8828125, "learning_rate": 0.00017511961827129934, "loss": 0.9078, "step": 14787 }, { "epoch": 0.37971437329186464, "grad_norm": 0.80078125, "learning_rate": 0.0001751166714706314, "loss": 0.986, "step": 14788 }, { "epoch": 0.3797400504877865, "grad_norm": 0.80859375, "learning_rate": 0.00017511372452026276, "loss": 0.8415, "step": 14789 }, { "epoch": 0.3797657276837083, "grad_norm": 0.80859375, "learning_rate": 0.0001751107774201992, "loss": 1.1193, "step": 14790 }, { "epoch": 0.37979140487963015, "grad_norm": 0.75390625, "learning_rate": 0.00017510783017044666, "loss": 0.9365, "step": 14791 }, { "epoch": 0.3798170820755519, "grad_norm": 0.75, "learning_rate": 0.000175104882771011, "loss": 1.1275, "step": 14792 }, { "epoch": 0.37984275927147376, "grad_norm": 0.80078125, "learning_rate": 0.0001751019352218981, "loss": 0.9406, "step": 14793 }, { "epoch": 0.3798684364673956, "grad_norm": 0.83203125, "learning_rate": 0.00017509898752311382, "loss": 0.8942, "step": 14794 }, { "epoch": 0.3798941136633174, "grad_norm": 0.77734375, "learning_rate": 0.00017509603967466405, "loss": 0.9572, "step": 14795 }, { "epoch": 0.3799197908592392, "grad_norm": 0.80078125, "learning_rate": 0.00017509309167655464, "loss": 1.0164, "step": 14796 }, { "epoch": 0.37994546805516105, "grad_norm": 0.7734375, "learning_rate": 0.0001750901435287915, "loss": 0.9421, "step": 14797 }, { "epoch": 0.37997114525108283, "grad_norm": 0.8359375, "learning_rate": 0.00017508719523138047, "loss": 1.1053, "step": 14798 }, { "epoch": 0.37999682244700467, "grad_norm": 0.8046875, "learning_rate": 0.00017508424678432745, "loss": 0.9112, "step": 14799 }, { "epoch": 0.3800224996429265, "grad_norm": 0.98046875, "learning_rate": 0.0001750812981876383, "loss": 0.9307, "step": 14800 }, { "epoch": 0.38004817683884834, "grad_norm": 0.78125, "learning_rate": 0.0001750783494413189, "loss": 0.9976, "step": 14801 }, { "epoch": 0.3800738540347701, "grad_norm": 0.76171875, "learning_rate": 0.00017507540054537515, "loss": 0.9671, "step": 14802 }, { "epoch": 0.38009953123069196, "grad_norm": 0.7421875, "learning_rate": 0.0001750724514998129, "loss": 0.8548, "step": 14803 }, { "epoch": 0.3801252084266138, "grad_norm": 0.8515625, "learning_rate": 0.00017506950230463804, "loss": 0.949, "step": 14804 }, { "epoch": 0.3801508856225356, "grad_norm": 0.82421875, "learning_rate": 0.00017506655295985643, "loss": 0.7889, "step": 14805 }, { "epoch": 0.3801765628184574, "grad_norm": 0.78515625, "learning_rate": 0.00017506360346547398, "loss": 1.0847, "step": 14806 }, { "epoch": 0.38020224001437924, "grad_norm": 0.7421875, "learning_rate": 0.00017506065382149656, "loss": 0.8417, "step": 14807 }, { "epoch": 0.380227917210301, "grad_norm": 0.77734375, "learning_rate": 0.00017505770402793, "loss": 0.9908, "step": 14808 }, { "epoch": 0.38025359440622286, "grad_norm": 0.80078125, "learning_rate": 0.00017505475408478022, "loss": 0.9381, "step": 14809 }, { "epoch": 0.3802792716021447, "grad_norm": 0.79296875, "learning_rate": 0.0001750518039920531, "loss": 1.0112, "step": 14810 }, { "epoch": 0.38030494879806653, "grad_norm": 0.7578125, "learning_rate": 0.00017504885374975454, "loss": 0.9279, "step": 14811 }, { "epoch": 0.3803306259939883, "grad_norm": 0.734375, "learning_rate": 0.00017504590335789037, "loss": 0.9347, "step": 14812 }, { "epoch": 0.38035630318991015, "grad_norm": 0.7890625, "learning_rate": 0.0001750429528164665, "loss": 0.8843, "step": 14813 }, { "epoch": 0.380381980385832, "grad_norm": 0.8046875, "learning_rate": 0.0001750400021254888, "loss": 0.9442, "step": 14814 }, { "epoch": 0.38040765758175377, "grad_norm": 0.8359375, "learning_rate": 0.00017503705128496316, "loss": 1.0364, "step": 14815 }, { "epoch": 0.3804333347776756, "grad_norm": 0.76953125, "learning_rate": 0.00017503410029489545, "loss": 0.8998, "step": 14816 }, { "epoch": 0.38045901197359744, "grad_norm": 0.7734375, "learning_rate": 0.00017503114915529155, "loss": 0.8501, "step": 14817 }, { "epoch": 0.3804846891695192, "grad_norm": 0.7890625, "learning_rate": 0.00017502819786615736, "loss": 0.9444, "step": 14818 }, { "epoch": 0.38051036636544106, "grad_norm": 0.75390625, "learning_rate": 0.00017502524642749875, "loss": 0.9057, "step": 14819 }, { "epoch": 0.3805360435613629, "grad_norm": 0.84765625, "learning_rate": 0.0001750222948393216, "loss": 1.0651, "step": 14820 }, { "epoch": 0.3805617207572847, "grad_norm": 0.7890625, "learning_rate": 0.00017501934310163177, "loss": 0.946, "step": 14821 }, { "epoch": 0.3805873979532065, "grad_norm": 0.984375, "learning_rate": 0.00017501639121443518, "loss": 1.0203, "step": 14822 }, { "epoch": 0.38061307514912834, "grad_norm": 0.73828125, "learning_rate": 0.00017501343917773773, "loss": 0.844, "step": 14823 }, { "epoch": 0.3806387523450502, "grad_norm": 0.78125, "learning_rate": 0.00017501048699154523, "loss": 0.867, "step": 14824 }, { "epoch": 0.38066442954097196, "grad_norm": 0.78125, "learning_rate": 0.0001750075346558636, "loss": 0.9198, "step": 14825 }, { "epoch": 0.3806901067368938, "grad_norm": 0.82421875, "learning_rate": 0.00017500458217069875, "loss": 0.8909, "step": 14826 }, { "epoch": 0.38071578393281563, "grad_norm": 0.78515625, "learning_rate": 0.00017500162953605654, "loss": 0.8374, "step": 14827 }, { "epoch": 0.3807414611287374, "grad_norm": 0.72265625, "learning_rate": 0.0001749986767519429, "loss": 0.7214, "step": 14828 }, { "epoch": 0.38076713832465925, "grad_norm": 0.8671875, "learning_rate": 0.00017499572381836356, "loss": 1.0294, "step": 14829 }, { "epoch": 0.3807928155205811, "grad_norm": 0.84765625, "learning_rate": 0.0001749927707353246, "loss": 0.9457, "step": 14830 }, { "epoch": 0.3808184927165029, "grad_norm": 0.90234375, "learning_rate": 0.00017498981750283183, "loss": 0.8037, "step": 14831 }, { "epoch": 0.3808441699124247, "grad_norm": 0.734375, "learning_rate": 0.0001749868641208911, "loss": 0.9581, "step": 14832 }, { "epoch": 0.38086984710834654, "grad_norm": 0.70703125, "learning_rate": 0.0001749839105895083, "loss": 0.9154, "step": 14833 }, { "epoch": 0.3808955243042684, "grad_norm": 0.7421875, "learning_rate": 0.00017498095690868942, "loss": 0.9403, "step": 14834 }, { "epoch": 0.38092120150019015, "grad_norm": 0.74609375, "learning_rate": 0.00017497800307844018, "loss": 0.9349, "step": 14835 }, { "epoch": 0.380946878696112, "grad_norm": 0.74609375, "learning_rate": 0.0001749750490987666, "loss": 0.9692, "step": 14836 }, { "epoch": 0.3809725558920338, "grad_norm": 0.8203125, "learning_rate": 0.00017497209496967448, "loss": 0.899, "step": 14837 }, { "epoch": 0.3809982330879556, "grad_norm": 1.140625, "learning_rate": 0.0001749691406911698, "loss": 0.9712, "step": 14838 }, { "epoch": 0.38102391028387744, "grad_norm": 0.80078125, "learning_rate": 0.00017496618626325835, "loss": 0.9254, "step": 14839 }, { "epoch": 0.3810495874797993, "grad_norm": 0.8203125, "learning_rate": 0.0001749632316859461, "loss": 0.9793, "step": 14840 }, { "epoch": 0.3810752646757211, "grad_norm": 0.80078125, "learning_rate": 0.0001749602769592389, "loss": 0.9698, "step": 14841 }, { "epoch": 0.3811009418716429, "grad_norm": 0.734375, "learning_rate": 0.0001749573220831426, "loss": 0.85, "step": 14842 }, { "epoch": 0.38112661906756473, "grad_norm": 0.82421875, "learning_rate": 0.00017495436705766313, "loss": 1.0802, "step": 14843 }, { "epoch": 0.38115229626348657, "grad_norm": 0.8515625, "learning_rate": 0.00017495141188280638, "loss": 0.9133, "step": 14844 }, { "epoch": 0.38117797345940835, "grad_norm": 0.765625, "learning_rate": 0.0001749484565585783, "loss": 1.0647, "step": 14845 }, { "epoch": 0.3812036506553302, "grad_norm": 0.76953125, "learning_rate": 0.00017494550108498462, "loss": 1.0045, "step": 14846 }, { "epoch": 0.381229327851252, "grad_norm": 0.86328125, "learning_rate": 0.0001749425454620314, "loss": 1.1342, "step": 14847 }, { "epoch": 0.3812550050471738, "grad_norm": 0.6953125, "learning_rate": 0.00017493958968972445, "loss": 0.736, "step": 14848 }, { "epoch": 0.38128068224309564, "grad_norm": 0.80078125, "learning_rate": 0.00017493663376806962, "loss": 0.9408, "step": 14849 }, { "epoch": 0.3813063594390175, "grad_norm": 0.86328125, "learning_rate": 0.00017493367769707288, "loss": 0.9125, "step": 14850 }, { "epoch": 0.3813320366349393, "grad_norm": 0.77734375, "learning_rate": 0.00017493072147674007, "loss": 1.0625, "step": 14851 }, { "epoch": 0.3813577138308611, "grad_norm": 0.7421875, "learning_rate": 0.00017492776510707713, "loss": 0.8023, "step": 14852 }, { "epoch": 0.3813833910267829, "grad_norm": 0.80078125, "learning_rate": 0.0001749248085880899, "loss": 0.9364, "step": 14853 }, { "epoch": 0.38140906822270476, "grad_norm": 0.7578125, "learning_rate": 0.00017492185191978428, "loss": 0.8539, "step": 14854 }, { "epoch": 0.38143474541862654, "grad_norm": 0.7890625, "learning_rate": 0.00017491889510216622, "loss": 0.9575, "step": 14855 }, { "epoch": 0.3814604226145484, "grad_norm": 0.828125, "learning_rate": 0.00017491593813524154, "loss": 0.8829, "step": 14856 }, { "epoch": 0.3814860998104702, "grad_norm": 0.77734375, "learning_rate": 0.00017491298101901615, "loss": 0.7491, "step": 14857 }, { "epoch": 0.381511777006392, "grad_norm": 0.81640625, "learning_rate": 0.00017491002375349601, "loss": 1.0953, "step": 14858 }, { "epoch": 0.38153745420231383, "grad_norm": 0.7265625, "learning_rate": 0.00017490706633868693, "loss": 0.9374, "step": 14859 }, { "epoch": 0.38156313139823567, "grad_norm": 0.77734375, "learning_rate": 0.0001749041087745948, "loss": 0.8296, "step": 14860 }, { "epoch": 0.3815888085941575, "grad_norm": 0.75, "learning_rate": 0.0001749011510612256, "loss": 0.9425, "step": 14861 }, { "epoch": 0.3816144857900793, "grad_norm": 0.73828125, "learning_rate": 0.00017489819319858513, "loss": 0.9379, "step": 14862 }, { "epoch": 0.3816401629860011, "grad_norm": 1.2734375, "learning_rate": 0.00017489523518667937, "loss": 0.9191, "step": 14863 }, { "epoch": 0.38166584018192296, "grad_norm": 0.7734375, "learning_rate": 0.00017489227702551413, "loss": 0.8012, "step": 14864 }, { "epoch": 0.38169151737784474, "grad_norm": 0.7734375, "learning_rate": 0.00017488931871509534, "loss": 0.8978, "step": 14865 }, { "epoch": 0.38171719457376657, "grad_norm": 0.91015625, "learning_rate": 0.00017488636025542891, "loss": 1.0197, "step": 14866 }, { "epoch": 0.3817428717696884, "grad_norm": 0.87890625, "learning_rate": 0.00017488340164652078, "loss": 0.8448, "step": 14867 }, { "epoch": 0.3817685489656102, "grad_norm": 0.8359375, "learning_rate": 0.00017488044288837675, "loss": 1.0791, "step": 14868 }, { "epoch": 0.381794226161532, "grad_norm": 0.80859375, "learning_rate": 0.00017487748398100278, "loss": 1.0094, "step": 14869 }, { "epoch": 0.38181990335745386, "grad_norm": 0.859375, "learning_rate": 0.00017487452492440472, "loss": 1.0171, "step": 14870 }, { "epoch": 0.38184558055337564, "grad_norm": 0.84765625, "learning_rate": 0.0001748715657185885, "loss": 0.9714, "step": 14871 }, { "epoch": 0.3818712577492975, "grad_norm": 0.76171875, "learning_rate": 0.00017486860636356004, "loss": 0.8072, "step": 14872 }, { "epoch": 0.3818969349452193, "grad_norm": 0.68359375, "learning_rate": 0.00017486564685932518, "loss": 0.9035, "step": 14873 }, { "epoch": 0.38192261214114115, "grad_norm": 0.8125, "learning_rate": 0.00017486268720588987, "loss": 0.9842, "step": 14874 }, { "epoch": 0.38194828933706293, "grad_norm": 0.91015625, "learning_rate": 0.00017485972740326, "loss": 0.9011, "step": 14875 }, { "epoch": 0.38197396653298477, "grad_norm": 0.82421875, "learning_rate": 0.0001748567674514414, "loss": 1.0384, "step": 14876 }, { "epoch": 0.3819996437289066, "grad_norm": 0.78125, "learning_rate": 0.00017485380735044007, "loss": 0.835, "step": 14877 }, { "epoch": 0.3820253209248284, "grad_norm": 0.83203125, "learning_rate": 0.00017485084710026184, "loss": 0.9816, "step": 14878 }, { "epoch": 0.3820509981207502, "grad_norm": 0.796875, "learning_rate": 0.00017484788670091267, "loss": 0.8621, "step": 14879 }, { "epoch": 0.38207667531667205, "grad_norm": 0.796875, "learning_rate": 0.0001748449261523984, "loss": 0.9869, "step": 14880 }, { "epoch": 0.38210235251259383, "grad_norm": 0.86328125, "learning_rate": 0.00017484196545472494, "loss": 0.8925, "step": 14881 }, { "epoch": 0.38212802970851567, "grad_norm": 0.79296875, "learning_rate": 0.00017483900460789822, "loss": 0.8812, "step": 14882 }, { "epoch": 0.3821537069044375, "grad_norm": 0.7421875, "learning_rate": 0.00017483604361192412, "loss": 0.8509, "step": 14883 }, { "epoch": 0.38217938410035934, "grad_norm": 2.671875, "learning_rate": 0.00017483308246680855, "loss": 0.9273, "step": 14884 }, { "epoch": 0.3822050612962811, "grad_norm": 0.7890625, "learning_rate": 0.0001748301211725574, "loss": 0.8523, "step": 14885 }, { "epoch": 0.38223073849220296, "grad_norm": 0.93359375, "learning_rate": 0.0001748271597291766, "loss": 1.0873, "step": 14886 }, { "epoch": 0.3822564156881248, "grad_norm": 0.828125, "learning_rate": 0.00017482419813667196, "loss": 1.0245, "step": 14887 }, { "epoch": 0.3822820928840466, "grad_norm": 0.76171875, "learning_rate": 0.00017482123639504953, "loss": 0.9788, "step": 14888 }, { "epoch": 0.3823077700799684, "grad_norm": 0.7578125, "learning_rate": 0.0001748182745043151, "loss": 0.8743, "step": 14889 }, { "epoch": 0.38233344727589025, "grad_norm": 0.8046875, "learning_rate": 0.0001748153124644746, "loss": 0.8426, "step": 14890 }, { "epoch": 0.38235912447181203, "grad_norm": 0.73046875, "learning_rate": 0.00017481235027553397, "loss": 0.9836, "step": 14891 }, { "epoch": 0.38238480166773386, "grad_norm": 0.77734375, "learning_rate": 0.00017480938793749905, "loss": 1.0382, "step": 14892 }, { "epoch": 0.3824104788636557, "grad_norm": 0.76953125, "learning_rate": 0.0001748064254503758, "loss": 0.9119, "step": 14893 }, { "epoch": 0.38243615605957754, "grad_norm": 0.76953125, "learning_rate": 0.0001748034628141701, "loss": 0.9924, "step": 14894 }, { "epoch": 0.3824618332554993, "grad_norm": 0.78125, "learning_rate": 0.00017480050002888784, "loss": 1.0503, "step": 14895 }, { "epoch": 0.38248751045142115, "grad_norm": 0.78515625, "learning_rate": 0.00017479753709453494, "loss": 0.7456, "step": 14896 }, { "epoch": 0.382513187647343, "grad_norm": 0.796875, "learning_rate": 0.00017479457401111735, "loss": 0.8982, "step": 14897 }, { "epoch": 0.38253886484326477, "grad_norm": 0.875, "learning_rate": 0.00017479161077864087, "loss": 0.9297, "step": 14898 }, { "epoch": 0.3825645420391866, "grad_norm": 0.7578125, "learning_rate": 0.0001747886473971115, "loss": 0.8997, "step": 14899 }, { "epoch": 0.38259021923510844, "grad_norm": 0.84765625, "learning_rate": 0.0001747856838665351, "loss": 0.9157, "step": 14900 }, { "epoch": 0.3826158964310302, "grad_norm": 0.77734375, "learning_rate": 0.00017478272018691757, "loss": 0.8334, "step": 14901 }, { "epoch": 0.38264157362695206, "grad_norm": 0.80078125, "learning_rate": 0.00017477975635826488, "loss": 0.9387, "step": 14902 }, { "epoch": 0.3826672508228739, "grad_norm": 0.78125, "learning_rate": 0.00017477679238058286, "loss": 0.8856, "step": 14903 }, { "epoch": 0.38269292801879573, "grad_norm": 0.82421875, "learning_rate": 0.00017477382825387746, "loss": 0.895, "step": 14904 }, { "epoch": 0.3827186052147175, "grad_norm": 0.75, "learning_rate": 0.00017477086397815454, "loss": 1.0337, "step": 14905 }, { "epoch": 0.38274428241063935, "grad_norm": 0.8046875, "learning_rate": 0.00017476789955342005, "loss": 1.0093, "step": 14906 }, { "epoch": 0.3827699596065612, "grad_norm": 0.7421875, "learning_rate": 0.0001747649349796799, "loss": 1.0566, "step": 14907 }, { "epoch": 0.38279563680248296, "grad_norm": 0.77734375, "learning_rate": 0.00017476197025694003, "loss": 0.7714, "step": 14908 }, { "epoch": 0.3828213139984048, "grad_norm": 0.8359375, "learning_rate": 0.00017475900538520628, "loss": 1.0442, "step": 14909 }, { "epoch": 0.38284699119432664, "grad_norm": 0.8125, "learning_rate": 0.00017475604036448457, "loss": 0.8771, "step": 14910 }, { "epoch": 0.3828726683902484, "grad_norm": 0.8671875, "learning_rate": 0.00017475307519478083, "loss": 1.209, "step": 14911 }, { "epoch": 0.38289834558617025, "grad_norm": 0.74609375, "learning_rate": 0.00017475010987610097, "loss": 1.0169, "step": 14912 }, { "epoch": 0.3829240227820921, "grad_norm": 0.875, "learning_rate": 0.00017474714440845088, "loss": 0.9369, "step": 14913 }, { "epoch": 0.3829496999780139, "grad_norm": 0.77734375, "learning_rate": 0.00017474417879183649, "loss": 0.9105, "step": 14914 }, { "epoch": 0.3829753771739357, "grad_norm": 0.7578125, "learning_rate": 0.0001747412130262637, "loss": 0.98, "step": 14915 }, { "epoch": 0.38300105436985754, "grad_norm": 0.75390625, "learning_rate": 0.00017473824711173843, "loss": 0.8227, "step": 14916 }, { "epoch": 0.3830267315657794, "grad_norm": 0.7109375, "learning_rate": 0.00017473528104826658, "loss": 0.8121, "step": 14917 }, { "epoch": 0.38305240876170116, "grad_norm": 0.85546875, "learning_rate": 0.0001747323148358541, "loss": 0.9062, "step": 14918 }, { "epoch": 0.383078085957623, "grad_norm": 0.83203125, "learning_rate": 0.0001747293484745068, "loss": 0.9027, "step": 14919 }, { "epoch": 0.38310376315354483, "grad_norm": 0.921875, "learning_rate": 0.00017472638196423072, "loss": 0.9024, "step": 14920 }, { "epoch": 0.3831294403494666, "grad_norm": 0.77734375, "learning_rate": 0.00017472341530503166, "loss": 1.0472, "step": 14921 }, { "epoch": 0.38315511754538845, "grad_norm": 0.81640625, "learning_rate": 0.0001747204484969156, "loss": 0.9354, "step": 14922 }, { "epoch": 0.3831807947413103, "grad_norm": 0.9453125, "learning_rate": 0.00017471748153988846, "loss": 0.8977, "step": 14923 }, { "epoch": 0.3832064719372321, "grad_norm": 0.77734375, "learning_rate": 0.00017471451443395612, "loss": 0.9194, "step": 14924 }, { "epoch": 0.3832321491331539, "grad_norm": 0.765625, "learning_rate": 0.00017471154717912447, "loss": 0.8991, "step": 14925 }, { "epoch": 0.38325782632907573, "grad_norm": 0.7578125, "learning_rate": 0.00017470857977539952, "loss": 0.9591, "step": 14926 }, { "epoch": 0.38328350352499757, "grad_norm": 0.78515625, "learning_rate": 0.00017470561222278705, "loss": 0.9703, "step": 14927 }, { "epoch": 0.38330918072091935, "grad_norm": 0.85546875, "learning_rate": 0.00017470264452129308, "loss": 0.921, "step": 14928 }, { "epoch": 0.3833348579168412, "grad_norm": 1.0703125, "learning_rate": 0.00017469967667092345, "loss": 0.9942, "step": 14929 }, { "epoch": 0.383360535112763, "grad_norm": 0.86328125, "learning_rate": 0.00017469670867168417, "loss": 0.9638, "step": 14930 }, { "epoch": 0.3833862123086848, "grad_norm": 0.7421875, "learning_rate": 0.00017469374052358108, "loss": 0.9197, "step": 14931 }, { "epoch": 0.38341188950460664, "grad_norm": 0.71875, "learning_rate": 0.0001746907722266201, "loss": 0.9833, "step": 14932 }, { "epoch": 0.3834375667005285, "grad_norm": 0.8046875, "learning_rate": 0.00017468780378080716, "loss": 0.9549, "step": 14933 }, { "epoch": 0.3834632438964503, "grad_norm": 0.7421875, "learning_rate": 0.00017468483518614814, "loss": 0.9197, "step": 14934 }, { "epoch": 0.3834889210923721, "grad_norm": 0.76953125, "learning_rate": 0.000174681866442649, "loss": 0.8714, "step": 14935 }, { "epoch": 0.38351459828829393, "grad_norm": 0.75, "learning_rate": 0.0001746788975503157, "loss": 0.9667, "step": 14936 }, { "epoch": 0.38354027548421576, "grad_norm": 0.68359375, "learning_rate": 0.00017467592850915406, "loss": 0.8203, "step": 14937 }, { "epoch": 0.38356595268013755, "grad_norm": 0.7890625, "learning_rate": 0.00017467295931917004, "loss": 0.8544, "step": 14938 }, { "epoch": 0.3835916298760594, "grad_norm": 0.85546875, "learning_rate": 0.00017466998998036956, "loss": 0.8821, "step": 14939 }, { "epoch": 0.3836173070719812, "grad_norm": 0.796875, "learning_rate": 0.00017466702049275857, "loss": 0.8857, "step": 14940 }, { "epoch": 0.383642984267903, "grad_norm": 0.8203125, "learning_rate": 0.00017466405085634292, "loss": 0.9481, "step": 14941 }, { "epoch": 0.38366866146382483, "grad_norm": 0.71875, "learning_rate": 0.00017466108107112857, "loss": 0.9484, "step": 14942 }, { "epoch": 0.38369433865974667, "grad_norm": 0.83203125, "learning_rate": 0.0001746581111371214, "loss": 0.9017, "step": 14943 }, { "epoch": 0.3837200158556685, "grad_norm": 0.80078125, "learning_rate": 0.0001746551410543274, "loss": 0.8456, "step": 14944 }, { "epoch": 0.3837456930515903, "grad_norm": 0.83984375, "learning_rate": 0.00017465217082275242, "loss": 1.0236, "step": 14945 }, { "epoch": 0.3837713702475121, "grad_norm": 0.828125, "learning_rate": 0.00017464920044240242, "loss": 1.0542, "step": 14946 }, { "epoch": 0.38379704744343396, "grad_norm": 0.81640625, "learning_rate": 0.0001746462299132833, "loss": 0.9662, "step": 14947 }, { "epoch": 0.38382272463935574, "grad_norm": 0.80859375, "learning_rate": 0.000174643259235401, "loss": 0.8384, "step": 14948 }, { "epoch": 0.3838484018352776, "grad_norm": 0.8203125, "learning_rate": 0.00017464028840876144, "loss": 0.9871, "step": 14949 }, { "epoch": 0.3838740790311994, "grad_norm": 0.76171875, "learning_rate": 0.0001746373174333705, "loss": 0.9737, "step": 14950 }, { "epoch": 0.3838997562271212, "grad_norm": 0.8359375, "learning_rate": 0.00017463434630923412, "loss": 1.0791, "step": 14951 }, { "epoch": 0.383925433423043, "grad_norm": 0.78125, "learning_rate": 0.00017463137503635824, "loss": 0.9457, "step": 14952 }, { "epoch": 0.38395111061896486, "grad_norm": 0.76953125, "learning_rate": 0.0001746284036147488, "loss": 0.8467, "step": 14953 }, { "epoch": 0.3839767878148867, "grad_norm": 0.7578125, "learning_rate": 0.00017462543204441164, "loss": 0.8898, "step": 14954 }, { "epoch": 0.3840024650108085, "grad_norm": 0.734375, "learning_rate": 0.00017462246032535277, "loss": 0.9067, "step": 14955 }, { "epoch": 0.3840281422067303, "grad_norm": 0.84375, "learning_rate": 0.00017461948845757809, "loss": 0.9075, "step": 14956 }, { "epoch": 0.38405381940265215, "grad_norm": 0.74609375, "learning_rate": 0.00017461651644109347, "loss": 0.9335, "step": 14957 }, { "epoch": 0.38407949659857393, "grad_norm": 0.734375, "learning_rate": 0.0001746135442759049, "loss": 0.8426, "step": 14958 }, { "epoch": 0.38410517379449577, "grad_norm": 0.7578125, "learning_rate": 0.00017461057196201826, "loss": 1.0054, "step": 14959 }, { "epoch": 0.3841308509904176, "grad_norm": 0.80859375, "learning_rate": 0.0001746075994994395, "loss": 0.9298, "step": 14960 }, { "epoch": 0.3841565281863394, "grad_norm": 0.8046875, "learning_rate": 0.0001746046268881745, "loss": 0.8936, "step": 14961 }, { "epoch": 0.3841822053822612, "grad_norm": 0.7265625, "learning_rate": 0.00017460165412822926, "loss": 0.9468, "step": 14962 }, { "epoch": 0.38420788257818306, "grad_norm": 0.8125, "learning_rate": 0.00017459868121960965, "loss": 0.8773, "step": 14963 }, { "epoch": 0.3842335597741049, "grad_norm": 0.75, "learning_rate": 0.0001745957081623216, "loss": 1.0215, "step": 14964 }, { "epoch": 0.3842592369700267, "grad_norm": 0.7578125, "learning_rate": 0.00017459273495637104, "loss": 1.0255, "step": 14965 }, { "epoch": 0.3842849141659485, "grad_norm": 0.71875, "learning_rate": 0.00017458976160176395, "loss": 0.866, "step": 14966 }, { "epoch": 0.38431059136187035, "grad_norm": 0.7734375, "learning_rate": 0.00017458678809850614, "loss": 0.8721, "step": 14967 }, { "epoch": 0.3843362685577921, "grad_norm": 0.83984375, "learning_rate": 0.0001745838144466036, "loss": 0.9493, "step": 14968 }, { "epoch": 0.38436194575371396, "grad_norm": 0.76953125, "learning_rate": 0.00017458084064606228, "loss": 0.9205, "step": 14969 }, { "epoch": 0.3843876229496358, "grad_norm": 0.7734375, "learning_rate": 0.00017457786669688806, "loss": 0.8886, "step": 14970 }, { "epoch": 0.3844133001455576, "grad_norm": 0.79296875, "learning_rate": 0.00017457489259908691, "loss": 0.9486, "step": 14971 }, { "epoch": 0.3844389773414794, "grad_norm": 0.80859375, "learning_rate": 0.00017457191835266477, "loss": 0.8954, "step": 14972 }, { "epoch": 0.38446465453740125, "grad_norm": 0.765625, "learning_rate": 0.00017456894395762745, "loss": 1.0517, "step": 14973 }, { "epoch": 0.3844903317333231, "grad_norm": 0.78125, "learning_rate": 0.000174565969413981, "loss": 0.9068, "step": 14974 }, { "epoch": 0.38451600892924487, "grad_norm": 0.85546875, "learning_rate": 0.00017456299472173132, "loss": 0.8662, "step": 14975 }, { "epoch": 0.3845416861251667, "grad_norm": 0.796875, "learning_rate": 0.00017456001988088433, "loss": 0.8635, "step": 14976 }, { "epoch": 0.38456736332108854, "grad_norm": 0.78515625, "learning_rate": 0.00017455704489144594, "loss": 0.959, "step": 14977 }, { "epoch": 0.3845930405170103, "grad_norm": 0.7734375, "learning_rate": 0.00017455406975342208, "loss": 0.9209, "step": 14978 }, { "epoch": 0.38461871771293216, "grad_norm": 0.83984375, "learning_rate": 0.00017455109446681873, "loss": 0.8475, "step": 14979 }, { "epoch": 0.384644394908854, "grad_norm": 0.765625, "learning_rate": 0.00017454811903164176, "loss": 0.9711, "step": 14980 }, { "epoch": 0.3846700721047758, "grad_norm": 0.80078125, "learning_rate": 0.00017454514344789713, "loss": 0.9029, "step": 14981 }, { "epoch": 0.3846957493006976, "grad_norm": 0.8203125, "learning_rate": 0.00017454216771559073, "loss": 1.1088, "step": 14982 }, { "epoch": 0.38472142649661945, "grad_norm": 0.7265625, "learning_rate": 0.00017453919183472855, "loss": 1.0394, "step": 14983 }, { "epoch": 0.3847471036925413, "grad_norm": 0.86328125, "learning_rate": 0.0001745362158053165, "loss": 0.8825, "step": 14984 }, { "epoch": 0.38477278088846306, "grad_norm": 0.73828125, "learning_rate": 0.0001745332396273605, "loss": 1.1211, "step": 14985 }, { "epoch": 0.3847984580843849, "grad_norm": 0.73046875, "learning_rate": 0.00017453026330086649, "loss": 0.9627, "step": 14986 }, { "epoch": 0.38482413528030673, "grad_norm": 0.8359375, "learning_rate": 0.00017452728682584037, "loss": 1.0433, "step": 14987 }, { "epoch": 0.3848498124762285, "grad_norm": 0.734375, "learning_rate": 0.00017452431020228816, "loss": 0.9376, "step": 14988 }, { "epoch": 0.38487548967215035, "grad_norm": 0.84375, "learning_rate": 0.0001745213334302157, "loss": 0.8894, "step": 14989 }, { "epoch": 0.3849011668680722, "grad_norm": 0.69921875, "learning_rate": 0.00017451835650962895, "loss": 0.8638, "step": 14990 }, { "epoch": 0.38492684406399397, "grad_norm": 0.7265625, "learning_rate": 0.00017451537944053382, "loss": 0.8519, "step": 14991 }, { "epoch": 0.3849525212599158, "grad_norm": 0.78125, "learning_rate": 0.0001745124022229363, "loss": 1.0304, "step": 14992 }, { "epoch": 0.38497819845583764, "grad_norm": 0.85546875, "learning_rate": 0.00017450942485684227, "loss": 1.0858, "step": 14993 }, { "epoch": 0.3850038756517595, "grad_norm": 0.7734375, "learning_rate": 0.0001745064473422577, "loss": 0.9716, "step": 14994 }, { "epoch": 0.38502955284768126, "grad_norm": 0.75390625, "learning_rate": 0.0001745034696791885, "loss": 0.8473, "step": 14995 }, { "epoch": 0.3850552300436031, "grad_norm": 0.7421875, "learning_rate": 0.00017450049186764062, "loss": 0.949, "step": 14996 }, { "epoch": 0.3850809072395249, "grad_norm": 0.8984375, "learning_rate": 0.00017449751390762, "loss": 0.8405, "step": 14997 }, { "epoch": 0.3851065844354467, "grad_norm": 0.85546875, "learning_rate": 0.0001744945357991326, "loss": 1.0058, "step": 14998 }, { "epoch": 0.38513226163136854, "grad_norm": 0.734375, "learning_rate": 0.00017449155754218428, "loss": 0.9201, "step": 14999 }, { "epoch": 0.3851579388272904, "grad_norm": 0.76953125, "learning_rate": 0.000174488579136781, "loss": 0.9798, "step": 15000 }, { "epoch": 0.3851579388272904, "eval_loss": 0.9327870011329651, "eval_model_preparation_time": 0.0065, "eval_runtime": 405.5102, "eval_samples_per_second": 24.66, "eval_steps_per_second": 0.772, "step": 15000 }, { "epoch": 0.38518361602321216, "grad_norm": 0.765625, "learning_rate": 0.00017448560058292873, "loss": 1.0399, "step": 15001 }, { "epoch": 0.385209293219134, "grad_norm": 0.8671875, "learning_rate": 0.0001744826218806334, "loss": 0.9164, "step": 15002 }, { "epoch": 0.38523497041505583, "grad_norm": 0.703125, "learning_rate": 0.0001744796430299009, "loss": 0.9123, "step": 15003 }, { "epoch": 0.38526064761097767, "grad_norm": 0.828125, "learning_rate": 0.0001744766640307372, "loss": 0.8372, "step": 15004 }, { "epoch": 0.38528632480689945, "grad_norm": 0.8515625, "learning_rate": 0.00017447368488314826, "loss": 0.9548, "step": 15005 }, { "epoch": 0.3853120020028213, "grad_norm": 0.83984375, "learning_rate": 0.00017447070558714, "loss": 1.0119, "step": 15006 }, { "epoch": 0.3853376791987431, "grad_norm": 0.73046875, "learning_rate": 0.00017446772614271834, "loss": 0.7284, "step": 15007 }, { "epoch": 0.3853633563946649, "grad_norm": 0.75, "learning_rate": 0.0001744647465498892, "loss": 0.8975, "step": 15008 }, { "epoch": 0.38538903359058674, "grad_norm": 0.796875, "learning_rate": 0.00017446176680865858, "loss": 0.8627, "step": 15009 }, { "epoch": 0.3854147107865086, "grad_norm": 0.82421875, "learning_rate": 0.00017445878691903238, "loss": 0.9364, "step": 15010 }, { "epoch": 0.38544038798243035, "grad_norm": 0.98046875, "learning_rate": 0.00017445580688101652, "loss": 0.7977, "step": 15011 }, { "epoch": 0.3854660651783522, "grad_norm": 0.7578125, "learning_rate": 0.000174452826694617, "loss": 0.8968, "step": 15012 }, { "epoch": 0.385491742374274, "grad_norm": 0.80859375, "learning_rate": 0.00017444984635983968, "loss": 0.8883, "step": 15013 }, { "epoch": 0.38551741957019586, "grad_norm": 0.7265625, "learning_rate": 0.00017444686587669056, "loss": 0.8507, "step": 15014 }, { "epoch": 0.38554309676611764, "grad_norm": 0.79296875, "learning_rate": 0.00017444388524517557, "loss": 0.894, "step": 15015 }, { "epoch": 0.3855687739620395, "grad_norm": 0.78125, "learning_rate": 0.00017444090446530063, "loss": 0.8655, "step": 15016 }, { "epoch": 0.3855944511579613, "grad_norm": 0.7578125, "learning_rate": 0.0001744379235370717, "loss": 0.9672, "step": 15017 }, { "epoch": 0.3856201283538831, "grad_norm": 0.828125, "learning_rate": 0.00017443494246049471, "loss": 0.9177, "step": 15018 }, { "epoch": 0.38564580554980493, "grad_norm": 0.82421875, "learning_rate": 0.0001744319612355756, "loss": 1.071, "step": 15019 }, { "epoch": 0.38567148274572677, "grad_norm": 0.85546875, "learning_rate": 0.00017442897986232032, "loss": 0.9983, "step": 15020 }, { "epoch": 0.38569715994164855, "grad_norm": 0.71875, "learning_rate": 0.0001744259983407348, "loss": 0.9532, "step": 15021 }, { "epoch": 0.3857228371375704, "grad_norm": 0.82421875, "learning_rate": 0.00017442301667082498, "loss": 1.0113, "step": 15022 }, { "epoch": 0.3857485143334922, "grad_norm": 0.7890625, "learning_rate": 0.00017442003485259683, "loss": 0.9259, "step": 15023 }, { "epoch": 0.38577419152941406, "grad_norm": 0.7890625, "learning_rate": 0.00017441705288605627, "loss": 1.0487, "step": 15024 }, { "epoch": 0.38579986872533584, "grad_norm": 0.73046875, "learning_rate": 0.00017441407077120925, "loss": 0.8414, "step": 15025 }, { "epoch": 0.3858255459212577, "grad_norm": 0.80078125, "learning_rate": 0.0001744110885080617, "loss": 1.0196, "step": 15026 }, { "epoch": 0.3858512231171795, "grad_norm": 0.72265625, "learning_rate": 0.00017440810609661958, "loss": 0.9014, "step": 15027 }, { "epoch": 0.3858769003131013, "grad_norm": 0.76953125, "learning_rate": 0.00017440512353688882, "loss": 0.8484, "step": 15028 }, { "epoch": 0.3859025775090231, "grad_norm": 0.73046875, "learning_rate": 0.00017440214082887535, "loss": 0.9361, "step": 15029 }, { "epoch": 0.38592825470494496, "grad_norm": 0.81640625, "learning_rate": 0.00017439915797258515, "loss": 0.9117, "step": 15030 }, { "epoch": 0.38595393190086674, "grad_norm": 0.828125, "learning_rate": 0.00017439617496802417, "loss": 0.9058, "step": 15031 }, { "epoch": 0.3859796090967886, "grad_norm": 0.78125, "learning_rate": 0.00017439319181519828, "loss": 1.0102, "step": 15032 }, { "epoch": 0.3860052862927104, "grad_norm": 0.8515625, "learning_rate": 0.00017439020851411354, "loss": 0.8788, "step": 15033 }, { "epoch": 0.38603096348863225, "grad_norm": 0.84375, "learning_rate": 0.00017438722506477579, "loss": 1.0011, "step": 15034 }, { "epoch": 0.38605664068455403, "grad_norm": 0.88671875, "learning_rate": 0.00017438424146719103, "loss": 1.0355, "step": 15035 }, { "epoch": 0.38608231788047587, "grad_norm": 0.7578125, "learning_rate": 0.00017438125772136523, "loss": 0.9049, "step": 15036 }, { "epoch": 0.3861079950763977, "grad_norm": 0.80078125, "learning_rate": 0.00017437827382730425, "loss": 0.9493, "step": 15037 }, { "epoch": 0.3861336722723195, "grad_norm": 0.81640625, "learning_rate": 0.00017437528978501412, "loss": 1.1483, "step": 15038 }, { "epoch": 0.3861593494682413, "grad_norm": 0.78125, "learning_rate": 0.00017437230559450074, "loss": 0.9302, "step": 15039 }, { "epoch": 0.38618502666416316, "grad_norm": 0.75390625, "learning_rate": 0.00017436932125577007, "loss": 0.9327, "step": 15040 }, { "epoch": 0.38621070386008494, "grad_norm": 0.73828125, "learning_rate": 0.0001743663367688281, "loss": 0.9586, "step": 15041 }, { "epoch": 0.3862363810560068, "grad_norm": 0.76171875, "learning_rate": 0.00017436335213368072, "loss": 0.9539, "step": 15042 }, { "epoch": 0.3862620582519286, "grad_norm": 0.8125, "learning_rate": 0.00017436036735033389, "loss": 1.0519, "step": 15043 }, { "epoch": 0.38628773544785044, "grad_norm": 0.75, "learning_rate": 0.00017435738241879353, "loss": 1.0513, "step": 15044 }, { "epoch": 0.3863134126437722, "grad_norm": 0.73828125, "learning_rate": 0.00017435439733906567, "loss": 0.8765, "step": 15045 }, { "epoch": 0.38633908983969406, "grad_norm": 0.84765625, "learning_rate": 0.0001743514121111562, "loss": 1.0309, "step": 15046 }, { "epoch": 0.3863647670356159, "grad_norm": 0.77734375, "learning_rate": 0.00017434842673507106, "loss": 1.0399, "step": 15047 }, { "epoch": 0.3863904442315377, "grad_norm": 0.7890625, "learning_rate": 0.00017434544121081624, "loss": 0.9019, "step": 15048 }, { "epoch": 0.3864161214274595, "grad_norm": 0.8046875, "learning_rate": 0.00017434245553839769, "loss": 0.8134, "step": 15049 }, { "epoch": 0.38644179862338135, "grad_norm": 0.80078125, "learning_rate": 0.00017433946971782128, "loss": 1.0141, "step": 15050 }, { "epoch": 0.38646747581930313, "grad_norm": 0.7421875, "learning_rate": 0.00017433648374909312, "loss": 1.0336, "step": 15051 }, { "epoch": 0.38649315301522497, "grad_norm": 0.80859375, "learning_rate": 0.00017433349763221897, "loss": 0.8711, "step": 15052 }, { "epoch": 0.3865188302111468, "grad_norm": 0.95703125, "learning_rate": 0.0001743305113672049, "loss": 0.9413, "step": 15053 }, { "epoch": 0.38654450740706864, "grad_norm": 0.8203125, "learning_rate": 0.00017432752495405687, "loss": 0.9281, "step": 15054 }, { "epoch": 0.3865701846029904, "grad_norm": 0.75, "learning_rate": 0.00017432453839278078, "loss": 0.7597, "step": 15055 }, { "epoch": 0.38659586179891225, "grad_norm": 0.78125, "learning_rate": 0.00017432155168338258, "loss": 0.8962, "step": 15056 }, { "epoch": 0.3866215389948341, "grad_norm": 0.7578125, "learning_rate": 0.00017431856482586825, "loss": 0.9426, "step": 15057 }, { "epoch": 0.38664721619075587, "grad_norm": 0.7890625, "learning_rate": 0.00017431557782024375, "loss": 0.9328, "step": 15058 }, { "epoch": 0.3866728933866777, "grad_norm": 0.8046875, "learning_rate": 0.000174312590666515, "loss": 0.9709, "step": 15059 }, { "epoch": 0.38669857058259954, "grad_norm": 0.7578125, "learning_rate": 0.00017430960336468795, "loss": 1.008, "step": 15060 }, { "epoch": 0.3867242477785213, "grad_norm": 0.76953125, "learning_rate": 0.00017430661591476862, "loss": 0.9614, "step": 15061 }, { "epoch": 0.38674992497444316, "grad_norm": 0.8125, "learning_rate": 0.00017430362831676288, "loss": 0.8424, "step": 15062 }, { "epoch": 0.386775602170365, "grad_norm": 0.828125, "learning_rate": 0.00017430064057067672, "loss": 1.0196, "step": 15063 }, { "epoch": 0.38680127936628683, "grad_norm": 0.796875, "learning_rate": 0.00017429765267651612, "loss": 0.8839, "step": 15064 }, { "epoch": 0.3868269565622086, "grad_norm": 0.78515625, "learning_rate": 0.00017429466463428697, "loss": 0.8208, "step": 15065 }, { "epoch": 0.38685263375813045, "grad_norm": 0.75, "learning_rate": 0.00017429167644399533, "loss": 0.8982, "step": 15066 }, { "epoch": 0.3868783109540523, "grad_norm": 0.80078125, "learning_rate": 0.00017428868810564704, "loss": 0.8308, "step": 15067 }, { "epoch": 0.38690398814997407, "grad_norm": 0.79296875, "learning_rate": 0.00017428569961924813, "loss": 0.8086, "step": 15068 }, { "epoch": 0.3869296653458959, "grad_norm": 0.85546875, "learning_rate": 0.0001742827109848045, "loss": 0.9043, "step": 15069 }, { "epoch": 0.38695534254181774, "grad_norm": 0.80078125, "learning_rate": 0.00017427972220232217, "loss": 0.8983, "step": 15070 }, { "epoch": 0.3869810197377395, "grad_norm": 0.8671875, "learning_rate": 0.00017427673327180703, "loss": 0.9857, "step": 15071 }, { "epoch": 0.38700669693366135, "grad_norm": 0.73828125, "learning_rate": 0.0001742737441932651, "loss": 0.8379, "step": 15072 }, { "epoch": 0.3870323741295832, "grad_norm": 0.875, "learning_rate": 0.0001742707549667023, "loss": 0.9103, "step": 15073 }, { "epoch": 0.387058051325505, "grad_norm": 0.76953125, "learning_rate": 0.00017426776559212459, "loss": 0.8606, "step": 15074 }, { "epoch": 0.3870837285214268, "grad_norm": 0.796875, "learning_rate": 0.0001742647760695379, "loss": 1.0005, "step": 15075 }, { "epoch": 0.38710940571734864, "grad_norm": 0.76953125, "learning_rate": 0.00017426178639894826, "loss": 0.9039, "step": 15076 }, { "epoch": 0.3871350829132705, "grad_norm": 0.921875, "learning_rate": 0.00017425879658036157, "loss": 0.8115, "step": 15077 }, { "epoch": 0.38716076010919226, "grad_norm": 0.78125, "learning_rate": 0.00017425580661378383, "loss": 0.9949, "step": 15078 }, { "epoch": 0.3871864373051141, "grad_norm": 0.8203125, "learning_rate": 0.00017425281649922095, "loss": 1.0703, "step": 15079 }, { "epoch": 0.38721211450103593, "grad_norm": 0.765625, "learning_rate": 0.00017424982623667892, "loss": 1.0277, "step": 15080 }, { "epoch": 0.3872377916969577, "grad_norm": 0.8828125, "learning_rate": 0.0001742468358261637, "loss": 0.9907, "step": 15081 }, { "epoch": 0.38726346889287955, "grad_norm": 0.77734375, "learning_rate": 0.00017424384526768124, "loss": 0.8711, "step": 15082 }, { "epoch": 0.3872891460888014, "grad_norm": 0.8125, "learning_rate": 0.0001742408545612375, "loss": 1.0146, "step": 15083 }, { "epoch": 0.38731482328472316, "grad_norm": 0.8203125, "learning_rate": 0.00017423786370683844, "loss": 0.9101, "step": 15084 }, { "epoch": 0.387340500480645, "grad_norm": 0.7734375, "learning_rate": 0.00017423487270449003, "loss": 1.0384, "step": 15085 }, { "epoch": 0.38736617767656684, "grad_norm": 0.78515625, "learning_rate": 0.00017423188155419822, "loss": 0.8784, "step": 15086 }, { "epoch": 0.3873918548724887, "grad_norm": 0.79296875, "learning_rate": 0.00017422889025596895, "loss": 0.9498, "step": 15087 }, { "epoch": 0.38741753206841045, "grad_norm": 0.8125, "learning_rate": 0.00017422589880980826, "loss": 0.8192, "step": 15088 }, { "epoch": 0.3874432092643323, "grad_norm": 0.78125, "learning_rate": 0.00017422290721572203, "loss": 0.8998, "step": 15089 }, { "epoch": 0.3874688864602541, "grad_norm": 0.7890625, "learning_rate": 0.00017421991547371626, "loss": 0.9406, "step": 15090 }, { "epoch": 0.3874945636561759, "grad_norm": 0.80078125, "learning_rate": 0.00017421692358379688, "loss": 0.9987, "step": 15091 }, { "epoch": 0.38752024085209774, "grad_norm": 0.73828125, "learning_rate": 0.00017421393154596989, "loss": 0.9475, "step": 15092 }, { "epoch": 0.3875459180480196, "grad_norm": 0.72265625, "learning_rate": 0.00017421093936024123, "loss": 0.9069, "step": 15093 }, { "epoch": 0.38757159524394136, "grad_norm": 0.83203125, "learning_rate": 0.00017420794702661688, "loss": 0.8508, "step": 15094 }, { "epoch": 0.3875972724398632, "grad_norm": 0.78515625, "learning_rate": 0.0001742049545451028, "loss": 0.9122, "step": 15095 }, { "epoch": 0.38762294963578503, "grad_norm": 0.80078125, "learning_rate": 0.00017420196191570496, "loss": 1.0585, "step": 15096 }, { "epoch": 0.38764862683170687, "grad_norm": 0.76953125, "learning_rate": 0.00017419896913842929, "loss": 0.9546, "step": 15097 }, { "epoch": 0.38767430402762865, "grad_norm": 0.7265625, "learning_rate": 0.0001741959762132818, "loss": 0.7929, "step": 15098 }, { "epoch": 0.3876999812235505, "grad_norm": 0.80078125, "learning_rate": 0.0001741929831402684, "loss": 0.9431, "step": 15099 }, { "epoch": 0.3877256584194723, "grad_norm": 0.80078125, "learning_rate": 0.0001741899899193951, "loss": 0.9098, "step": 15100 }, { "epoch": 0.3877513356153941, "grad_norm": 0.75, "learning_rate": 0.00017418699655066786, "loss": 1.0199, "step": 15101 }, { "epoch": 0.38777701281131594, "grad_norm": 0.76953125, "learning_rate": 0.00017418400303409261, "loss": 0.9089, "step": 15102 }, { "epoch": 0.38780269000723777, "grad_norm": 0.7734375, "learning_rate": 0.00017418100936967537, "loss": 0.8117, "step": 15103 }, { "epoch": 0.38782836720315955, "grad_norm": 0.76171875, "learning_rate": 0.00017417801555742206, "loss": 0.9504, "step": 15104 }, { "epoch": 0.3878540443990814, "grad_norm": 0.7578125, "learning_rate": 0.00017417502159733868, "loss": 1.0666, "step": 15105 }, { "epoch": 0.3878797215950032, "grad_norm": 0.76953125, "learning_rate": 0.00017417202748943117, "loss": 0.9385, "step": 15106 }, { "epoch": 0.38790539879092506, "grad_norm": 0.7578125, "learning_rate": 0.00017416903323370556, "loss": 0.8222, "step": 15107 }, { "epoch": 0.38793107598684684, "grad_norm": 0.8828125, "learning_rate": 0.00017416603883016772, "loss": 0.8956, "step": 15108 }, { "epoch": 0.3879567531827687, "grad_norm": 0.85546875, "learning_rate": 0.00017416304427882366, "loss": 0.9997, "step": 15109 }, { "epoch": 0.3879824303786905, "grad_norm": 0.84765625, "learning_rate": 0.00017416004957967936, "loss": 0.9037, "step": 15110 }, { "epoch": 0.3880081075746123, "grad_norm": 2.484375, "learning_rate": 0.00017415705473274078, "loss": 0.9879, "step": 15111 }, { "epoch": 0.38803378477053413, "grad_norm": 0.828125, "learning_rate": 0.0001741540597380139, "loss": 1.1019, "step": 15112 }, { "epoch": 0.38805946196645597, "grad_norm": 0.8203125, "learning_rate": 0.00017415106459550464, "loss": 0.9211, "step": 15113 }, { "epoch": 0.38808513916237775, "grad_norm": 0.78125, "learning_rate": 0.00017414806930521906, "loss": 0.9186, "step": 15114 }, { "epoch": 0.3881108163582996, "grad_norm": 0.77734375, "learning_rate": 0.00017414507386716302, "loss": 0.7519, "step": 15115 }, { "epoch": 0.3881364935542214, "grad_norm": 0.73046875, "learning_rate": 0.00017414207828134258, "loss": 0.9007, "step": 15116 }, { "epoch": 0.38816217075014325, "grad_norm": 0.8046875, "learning_rate": 0.00017413908254776365, "loss": 0.8904, "step": 15117 }, { "epoch": 0.38818784794606503, "grad_norm": 0.7109375, "learning_rate": 0.00017413608666643225, "loss": 0.8093, "step": 15118 }, { "epoch": 0.38821352514198687, "grad_norm": 0.72265625, "learning_rate": 0.00017413309063735435, "loss": 0.9689, "step": 15119 }, { "epoch": 0.3882392023379087, "grad_norm": 0.90234375, "learning_rate": 0.00017413009446053584, "loss": 1.0029, "step": 15120 }, { "epoch": 0.3882648795338305, "grad_norm": 0.73828125, "learning_rate": 0.0001741270981359828, "loss": 0.9656, "step": 15121 }, { "epoch": 0.3882905567297523, "grad_norm": 0.71484375, "learning_rate": 0.0001741241016637011, "loss": 0.8054, "step": 15122 }, { "epoch": 0.38831623392567416, "grad_norm": 0.84375, "learning_rate": 0.00017412110504369679, "loss": 0.9218, "step": 15123 }, { "epoch": 0.38834191112159594, "grad_norm": 0.77734375, "learning_rate": 0.00017411810827597582, "loss": 0.8428, "step": 15124 }, { "epoch": 0.3883675883175178, "grad_norm": 0.79296875, "learning_rate": 0.00017411511136054415, "loss": 0.8628, "step": 15125 }, { "epoch": 0.3883932655134396, "grad_norm": 0.80078125, "learning_rate": 0.00017411211429740772, "loss": 0.923, "step": 15126 }, { "epoch": 0.38841894270936145, "grad_norm": 0.81640625, "learning_rate": 0.0001741091170865726, "loss": 0.9704, "step": 15127 }, { "epoch": 0.38844461990528323, "grad_norm": 0.8125, "learning_rate": 0.00017410611972804466, "loss": 0.9427, "step": 15128 }, { "epoch": 0.38847029710120506, "grad_norm": 0.796875, "learning_rate": 0.00017410312222182993, "loss": 0.9131, "step": 15129 }, { "epoch": 0.3884959742971269, "grad_norm": 0.8046875, "learning_rate": 0.0001741001245679344, "loss": 1.0033, "step": 15130 }, { "epoch": 0.3885216514930487, "grad_norm": 0.73046875, "learning_rate": 0.000174097126766364, "loss": 0.8669, "step": 15131 }, { "epoch": 0.3885473286889705, "grad_norm": 0.7890625, "learning_rate": 0.00017409412881712468, "loss": 0.8388, "step": 15132 }, { "epoch": 0.38857300588489235, "grad_norm": 0.8125, "learning_rate": 0.00017409113072022249, "loss": 1.0838, "step": 15133 }, { "epoch": 0.38859868308081413, "grad_norm": 0.75, "learning_rate": 0.00017408813247566336, "loss": 0.9618, "step": 15134 }, { "epoch": 0.38862436027673597, "grad_norm": 0.76171875, "learning_rate": 0.00017408513408345328, "loss": 0.9314, "step": 15135 }, { "epoch": 0.3886500374726578, "grad_norm": 0.78515625, "learning_rate": 0.0001740821355435982, "loss": 0.9084, "step": 15136 }, { "epoch": 0.38867571466857964, "grad_norm": 0.75390625, "learning_rate": 0.00017407913685610417, "loss": 0.9661, "step": 15137 }, { "epoch": 0.3887013918645014, "grad_norm": 0.80078125, "learning_rate": 0.00017407613802097703, "loss": 0.8587, "step": 15138 }, { "epoch": 0.38872706906042326, "grad_norm": 0.81640625, "learning_rate": 0.0001740731390382229, "loss": 0.9807, "step": 15139 }, { "epoch": 0.3887527462563451, "grad_norm": 0.74609375, "learning_rate": 0.00017407013990784767, "loss": 0.8853, "step": 15140 }, { "epoch": 0.3887784234522669, "grad_norm": 0.73046875, "learning_rate": 0.00017406714062985734, "loss": 0.9545, "step": 15141 }, { "epoch": 0.3888041006481887, "grad_norm": 0.79296875, "learning_rate": 0.0001740641412042579, "loss": 0.8895, "step": 15142 }, { "epoch": 0.38882977784411055, "grad_norm": 0.93359375, "learning_rate": 0.0001740611416310553, "loss": 0.9531, "step": 15143 }, { "epoch": 0.3888554550400323, "grad_norm": 0.76171875, "learning_rate": 0.00017405814191025557, "loss": 0.9858, "step": 15144 }, { "epoch": 0.38888113223595416, "grad_norm": 0.82421875, "learning_rate": 0.00017405514204186463, "loss": 0.7999, "step": 15145 }, { "epoch": 0.388906809431876, "grad_norm": 0.8203125, "learning_rate": 0.00017405214202588848, "loss": 1.0304, "step": 15146 }, { "epoch": 0.38893248662779784, "grad_norm": 0.76171875, "learning_rate": 0.00017404914186233312, "loss": 1.0034, "step": 15147 }, { "epoch": 0.3889581638237196, "grad_norm": 0.73046875, "learning_rate": 0.00017404614155120445, "loss": 0.7834, "step": 15148 }, { "epoch": 0.38898384101964145, "grad_norm": 0.8046875, "learning_rate": 0.00017404314109250858, "loss": 0.9826, "step": 15149 }, { "epoch": 0.3890095182155633, "grad_norm": 0.79296875, "learning_rate": 0.0001740401404862514, "loss": 0.8657, "step": 15150 }, { "epoch": 0.38903519541148507, "grad_norm": 0.75390625, "learning_rate": 0.0001740371397324389, "loss": 0.9935, "step": 15151 }, { "epoch": 0.3890608726074069, "grad_norm": 0.8125, "learning_rate": 0.00017403413883107705, "loss": 0.8333, "step": 15152 }, { "epoch": 0.38908654980332874, "grad_norm": 0.7578125, "learning_rate": 0.00017403113778217185, "loss": 0.9327, "step": 15153 }, { "epoch": 0.3891122269992505, "grad_norm": 0.72265625, "learning_rate": 0.0001740281365857293, "loss": 0.9516, "step": 15154 }, { "epoch": 0.38913790419517236, "grad_norm": 0.7578125, "learning_rate": 0.00017402513524175536, "loss": 0.9103, "step": 15155 }, { "epoch": 0.3891635813910942, "grad_norm": 0.73828125, "learning_rate": 0.000174022133750256, "loss": 0.8842, "step": 15156 }, { "epoch": 0.38918925858701603, "grad_norm": 0.75390625, "learning_rate": 0.00017401913211123724, "loss": 0.921, "step": 15157 }, { "epoch": 0.3892149357829378, "grad_norm": 0.82421875, "learning_rate": 0.00017401613032470503, "loss": 0.8491, "step": 15158 }, { "epoch": 0.38924061297885965, "grad_norm": 0.85546875, "learning_rate": 0.00017401312839066533, "loss": 0.8516, "step": 15159 }, { "epoch": 0.3892662901747815, "grad_norm": 0.7890625, "learning_rate": 0.00017401012630912415, "loss": 0.969, "step": 15160 }, { "epoch": 0.38929196737070326, "grad_norm": 0.72265625, "learning_rate": 0.0001740071240800875, "loss": 0.919, "step": 15161 }, { "epoch": 0.3893176445666251, "grad_norm": 0.74609375, "learning_rate": 0.00017400412170356133, "loss": 0.9392, "step": 15162 }, { "epoch": 0.38934332176254693, "grad_norm": 0.79296875, "learning_rate": 0.00017400111917955163, "loss": 0.8458, "step": 15163 }, { "epoch": 0.3893689989584687, "grad_norm": 0.78515625, "learning_rate": 0.00017399811650806437, "loss": 1.0023, "step": 15164 }, { "epoch": 0.38939467615439055, "grad_norm": 0.84375, "learning_rate": 0.00017399511368910555, "loss": 0.9828, "step": 15165 }, { "epoch": 0.3894203533503124, "grad_norm": 0.7734375, "learning_rate": 0.00017399211072268117, "loss": 0.9245, "step": 15166 }, { "epoch": 0.3894460305462342, "grad_norm": 0.80859375, "learning_rate": 0.0001739891076087972, "loss": 1.0123, "step": 15167 }, { "epoch": 0.389471707742156, "grad_norm": 0.79296875, "learning_rate": 0.0001739861043474596, "loss": 0.8944, "step": 15168 }, { "epoch": 0.38949738493807784, "grad_norm": 0.7890625, "learning_rate": 0.0001739831009386744, "loss": 0.9672, "step": 15169 }, { "epoch": 0.3895230621339997, "grad_norm": 0.79296875, "learning_rate": 0.00017398009738244755, "loss": 0.915, "step": 15170 }, { "epoch": 0.38954873932992146, "grad_norm": 0.7578125, "learning_rate": 0.00017397709367878504, "loss": 0.8778, "step": 15171 }, { "epoch": 0.3895744165258433, "grad_norm": 0.79296875, "learning_rate": 0.00017397408982769286, "loss": 0.9377, "step": 15172 }, { "epoch": 0.38960009372176513, "grad_norm": 0.76171875, "learning_rate": 0.00017397108582917702, "loss": 0.9313, "step": 15173 }, { "epoch": 0.3896257709176869, "grad_norm": 0.7578125, "learning_rate": 0.0001739680816832435, "loss": 0.8224, "step": 15174 }, { "epoch": 0.38965144811360874, "grad_norm": 0.74609375, "learning_rate": 0.00017396507738989825, "loss": 0.8969, "step": 15175 }, { "epoch": 0.3896771253095306, "grad_norm": 0.7890625, "learning_rate": 0.00017396207294914726, "loss": 0.9055, "step": 15176 }, { "epoch": 0.3897028025054524, "grad_norm": 0.76171875, "learning_rate": 0.00017395906836099658, "loss": 0.9129, "step": 15177 }, { "epoch": 0.3897284797013742, "grad_norm": 0.78125, "learning_rate": 0.00017395606362545214, "loss": 0.9526, "step": 15178 }, { "epoch": 0.38975415689729603, "grad_norm": 0.73046875, "learning_rate": 0.00017395305874251996, "loss": 0.9316, "step": 15179 }, { "epoch": 0.38977983409321787, "grad_norm": 0.83984375, "learning_rate": 0.000173950053712206, "loss": 0.9362, "step": 15180 }, { "epoch": 0.38980551128913965, "grad_norm": 0.82421875, "learning_rate": 0.00017394704853451626, "loss": 0.9734, "step": 15181 }, { "epoch": 0.3898311884850615, "grad_norm": 0.84765625, "learning_rate": 0.00017394404320945675, "loss": 1.0764, "step": 15182 }, { "epoch": 0.3898568656809833, "grad_norm": 0.8046875, "learning_rate": 0.0001739410377370334, "loss": 0.9822, "step": 15183 }, { "epoch": 0.3898825428769051, "grad_norm": 1.1328125, "learning_rate": 0.0001739380321172523, "loss": 0.9903, "step": 15184 }, { "epoch": 0.38990822007282694, "grad_norm": 0.73828125, "learning_rate": 0.00017393502635011933, "loss": 0.7887, "step": 15185 }, { "epoch": 0.3899338972687488, "grad_norm": 0.7578125, "learning_rate": 0.00017393202043564058, "loss": 0.8946, "step": 15186 }, { "epoch": 0.3899595744646706, "grad_norm": 0.74609375, "learning_rate": 0.00017392901437382195, "loss": 0.8775, "step": 15187 }, { "epoch": 0.3899852516605924, "grad_norm": 0.86328125, "learning_rate": 0.0001739260081646695, "loss": 0.9544, "step": 15188 }, { "epoch": 0.3900109288565142, "grad_norm": 0.796875, "learning_rate": 0.00017392300180818916, "loss": 1.0175, "step": 15189 }, { "epoch": 0.39003660605243606, "grad_norm": 0.83203125, "learning_rate": 0.00017391999530438698, "loss": 0.8363, "step": 15190 }, { "epoch": 0.39006228324835784, "grad_norm": 0.859375, "learning_rate": 0.0001739169886532689, "loss": 1.0147, "step": 15191 }, { "epoch": 0.3900879604442797, "grad_norm": 0.7734375, "learning_rate": 0.000173913981854841, "loss": 0.9146, "step": 15192 }, { "epoch": 0.3901136376402015, "grad_norm": 0.7421875, "learning_rate": 0.00017391097490910914, "loss": 0.981, "step": 15193 }, { "epoch": 0.3901393148361233, "grad_norm": 0.91796875, "learning_rate": 0.00017390796781607942, "loss": 1.0626, "step": 15194 }, { "epoch": 0.39016499203204513, "grad_norm": 0.76171875, "learning_rate": 0.0001739049605757578, "loss": 0.8137, "step": 15195 }, { "epoch": 0.39019066922796697, "grad_norm": 0.8046875, "learning_rate": 0.00017390195318815025, "loss": 0.9272, "step": 15196 }, { "epoch": 0.3902163464238888, "grad_norm": 0.94140625, "learning_rate": 0.0001738989456532628, "loss": 0.8402, "step": 15197 }, { "epoch": 0.3902420236198106, "grad_norm": 0.765625, "learning_rate": 0.00017389593797110142, "loss": 0.9618, "step": 15198 }, { "epoch": 0.3902677008157324, "grad_norm": 0.7734375, "learning_rate": 0.0001738929301416721, "loss": 0.9261, "step": 15199 }, { "epoch": 0.39029337801165426, "grad_norm": 0.86328125, "learning_rate": 0.00017388992216498087, "loss": 0.9374, "step": 15200 }, { "epoch": 0.39031905520757604, "grad_norm": 0.7734375, "learning_rate": 0.00017388691404103367, "loss": 0.9954, "step": 15201 }, { "epoch": 0.3903447324034979, "grad_norm": 0.7734375, "learning_rate": 0.00017388390576983652, "loss": 0.8591, "step": 15202 }, { "epoch": 0.3903704095994197, "grad_norm": 0.828125, "learning_rate": 0.00017388089735139546, "loss": 0.9817, "step": 15203 }, { "epoch": 0.3903960867953415, "grad_norm": 0.71875, "learning_rate": 0.00017387788878571643, "loss": 0.8671, "step": 15204 }, { "epoch": 0.3904217639912633, "grad_norm": 0.8359375, "learning_rate": 0.00017387488007280543, "loss": 0.8828, "step": 15205 }, { "epoch": 0.39044744118718516, "grad_norm": 0.76171875, "learning_rate": 0.00017387187121266847, "loss": 0.9447, "step": 15206 }, { "epoch": 0.390473118383107, "grad_norm": 0.76953125, "learning_rate": 0.00017386886220531155, "loss": 0.868, "step": 15207 }, { "epoch": 0.3904987955790288, "grad_norm": 0.84375, "learning_rate": 0.00017386585305074066, "loss": 1.0163, "step": 15208 }, { "epoch": 0.3905244727749506, "grad_norm": 0.796875, "learning_rate": 0.00017386284374896178, "loss": 1.0192, "step": 15209 }, { "epoch": 0.39055014997087245, "grad_norm": 0.80859375, "learning_rate": 0.00017385983429998097, "loss": 0.9303, "step": 15210 }, { "epoch": 0.39057582716679423, "grad_norm": 0.84375, "learning_rate": 0.00017385682470380412, "loss": 0.9945, "step": 15211 }, { "epoch": 0.39060150436271607, "grad_norm": 0.8046875, "learning_rate": 0.00017385381496043731, "loss": 0.8199, "step": 15212 }, { "epoch": 0.3906271815586379, "grad_norm": 0.7890625, "learning_rate": 0.00017385080506988654, "loss": 0.8923, "step": 15213 }, { "epoch": 0.3906528587545597, "grad_norm": 0.80859375, "learning_rate": 0.00017384779503215776, "loss": 0.9179, "step": 15214 }, { "epoch": 0.3906785359504815, "grad_norm": 0.76953125, "learning_rate": 0.00017384478484725702, "loss": 0.8975, "step": 15215 }, { "epoch": 0.39070421314640336, "grad_norm": 0.8515625, "learning_rate": 0.00017384177451519028, "loss": 0.9402, "step": 15216 }, { "epoch": 0.3907298903423252, "grad_norm": 0.84765625, "learning_rate": 0.00017383876403596356, "loss": 0.9549, "step": 15217 }, { "epoch": 0.390755567538247, "grad_norm": 0.76953125, "learning_rate": 0.00017383575340958285, "loss": 0.8228, "step": 15218 }, { "epoch": 0.3907812447341688, "grad_norm": 0.7578125, "learning_rate": 0.00017383274263605415, "loss": 0.8039, "step": 15219 }, { "epoch": 0.39080692193009064, "grad_norm": 0.8359375, "learning_rate": 0.00017382973171538345, "loss": 0.8121, "step": 15220 }, { "epoch": 0.3908325991260124, "grad_norm": 0.79296875, "learning_rate": 0.0001738267206475768, "loss": 0.9503, "step": 15221 }, { "epoch": 0.39085827632193426, "grad_norm": 0.76953125, "learning_rate": 0.00017382370943264014, "loss": 1.0196, "step": 15222 }, { "epoch": 0.3908839535178561, "grad_norm": 0.8046875, "learning_rate": 0.0001738206980705795, "loss": 1.004, "step": 15223 }, { "epoch": 0.3909096307137779, "grad_norm": 0.79296875, "learning_rate": 0.00017381768656140088, "loss": 1.0608, "step": 15224 }, { "epoch": 0.3909353079096997, "grad_norm": 0.796875, "learning_rate": 0.00017381467490511026, "loss": 1.0923, "step": 15225 }, { "epoch": 0.39096098510562155, "grad_norm": 0.8203125, "learning_rate": 0.00017381166310171365, "loss": 0.9416, "step": 15226 }, { "epoch": 0.3909866623015434, "grad_norm": 0.7734375, "learning_rate": 0.0001738086511512171, "loss": 1.0679, "step": 15227 }, { "epoch": 0.39101233949746517, "grad_norm": 0.890625, "learning_rate": 0.00017380563905362658, "loss": 1.0428, "step": 15228 }, { "epoch": 0.391038016693387, "grad_norm": 0.79296875, "learning_rate": 0.0001738026268089481, "loss": 0.9148, "step": 15229 }, { "epoch": 0.39106369388930884, "grad_norm": 1.5390625, "learning_rate": 0.0001737996144171876, "loss": 0.8254, "step": 15230 }, { "epoch": 0.3910893710852306, "grad_norm": 0.75, "learning_rate": 0.00017379660187835113, "loss": 0.9153, "step": 15231 }, { "epoch": 0.39111504828115246, "grad_norm": 0.75390625, "learning_rate": 0.00017379358919244475, "loss": 0.9538, "step": 15232 }, { "epoch": 0.3911407254770743, "grad_norm": 0.76953125, "learning_rate": 0.0001737905763594744, "loss": 0.9329, "step": 15233 }, { "epoch": 0.39116640267299607, "grad_norm": 0.80078125, "learning_rate": 0.00017378756337944608, "loss": 0.9915, "step": 15234 }, { "epoch": 0.3911920798689179, "grad_norm": 0.77734375, "learning_rate": 0.00017378455025236582, "loss": 1.0417, "step": 15235 }, { "epoch": 0.39121775706483974, "grad_norm": 0.78125, "learning_rate": 0.0001737815369782396, "loss": 0.97, "step": 15236 }, { "epoch": 0.3912434342607616, "grad_norm": 0.84765625, "learning_rate": 0.00017377852355707346, "loss": 0.9801, "step": 15237 }, { "epoch": 0.39126911145668336, "grad_norm": 0.70703125, "learning_rate": 0.00017377550998887338, "loss": 0.842, "step": 15238 }, { "epoch": 0.3912947886526052, "grad_norm": 0.73828125, "learning_rate": 0.00017377249627364538, "loss": 0.9854, "step": 15239 }, { "epoch": 0.39132046584852703, "grad_norm": 0.8203125, "learning_rate": 0.00017376948241139544, "loss": 0.8873, "step": 15240 }, { "epoch": 0.3913461430444488, "grad_norm": 0.828125, "learning_rate": 0.0001737664684021296, "loss": 0.9575, "step": 15241 }, { "epoch": 0.39137182024037065, "grad_norm": 0.890625, "learning_rate": 0.00017376345424585386, "loss": 1.0105, "step": 15242 }, { "epoch": 0.3913974974362925, "grad_norm": 0.76171875, "learning_rate": 0.0001737604399425742, "loss": 1.0613, "step": 15243 }, { "epoch": 0.39142317463221427, "grad_norm": 0.73046875, "learning_rate": 0.00017375742549229667, "loss": 0.9726, "step": 15244 }, { "epoch": 0.3914488518281361, "grad_norm": 0.8203125, "learning_rate": 0.0001737544108950272, "loss": 0.9974, "step": 15245 }, { "epoch": 0.39147452902405794, "grad_norm": 0.8046875, "learning_rate": 0.00017375139615077192, "loss": 1.0287, "step": 15246 }, { "epoch": 0.3915002062199798, "grad_norm": 0.9375, "learning_rate": 0.0001737483812595367, "loss": 1.0383, "step": 15247 }, { "epoch": 0.39152588341590155, "grad_norm": 0.84765625, "learning_rate": 0.00017374536622132767, "loss": 0.9709, "step": 15248 }, { "epoch": 0.3915515606118234, "grad_norm": 0.7578125, "learning_rate": 0.0001737423510361508, "loss": 0.8721, "step": 15249 }, { "epoch": 0.3915772378077452, "grad_norm": 0.8046875, "learning_rate": 0.00017373933570401205, "loss": 0.7895, "step": 15250 }, { "epoch": 0.391602915003667, "grad_norm": 0.73046875, "learning_rate": 0.00017373632022491746, "loss": 0.7987, "step": 15251 }, { "epoch": 0.39162859219958884, "grad_norm": 0.8359375, "learning_rate": 0.00017373330459887306, "loss": 0.9449, "step": 15252 }, { "epoch": 0.3916542693955107, "grad_norm": 0.82421875, "learning_rate": 0.00017373028882588484, "loss": 0.7451, "step": 15253 }, { "epoch": 0.39167994659143246, "grad_norm": 0.74609375, "learning_rate": 0.00017372727290595883, "loss": 0.9336, "step": 15254 }, { "epoch": 0.3917056237873543, "grad_norm": 0.75390625, "learning_rate": 0.000173724256839101, "loss": 0.8958, "step": 15255 }, { "epoch": 0.39173130098327613, "grad_norm": 0.75, "learning_rate": 0.00017372124062531741, "loss": 0.7785, "step": 15256 }, { "epoch": 0.39175697817919797, "grad_norm": 0.8203125, "learning_rate": 0.00017371822426461405, "loss": 1.0193, "step": 15257 }, { "epoch": 0.39178265537511975, "grad_norm": 0.78515625, "learning_rate": 0.00017371520775699687, "loss": 0.9253, "step": 15258 }, { "epoch": 0.3918083325710416, "grad_norm": 0.73046875, "learning_rate": 0.000173712191102472, "loss": 0.8788, "step": 15259 }, { "epoch": 0.3918340097669634, "grad_norm": 0.77734375, "learning_rate": 0.00017370917430104536, "loss": 0.8397, "step": 15260 }, { "epoch": 0.3918596869628852, "grad_norm": 0.75, "learning_rate": 0.000173706157352723, "loss": 0.8646, "step": 15261 }, { "epoch": 0.39188536415880704, "grad_norm": 0.765625, "learning_rate": 0.00017370314025751094, "loss": 0.9662, "step": 15262 }, { "epoch": 0.3919110413547289, "grad_norm": 0.79296875, "learning_rate": 0.00017370012301541518, "loss": 0.9493, "step": 15263 }, { "epoch": 0.39193671855065065, "grad_norm": 0.78125, "learning_rate": 0.00017369710562644173, "loss": 1.0136, "step": 15264 }, { "epoch": 0.3919623957465725, "grad_norm": 0.79296875, "learning_rate": 0.0001736940880905966, "loss": 0.8061, "step": 15265 }, { "epoch": 0.3919880729424943, "grad_norm": 0.765625, "learning_rate": 0.00017369107040788578, "loss": 0.8717, "step": 15266 }, { "epoch": 0.39201375013841616, "grad_norm": 0.859375, "learning_rate": 0.00017368805257831537, "loss": 0.9444, "step": 15267 }, { "epoch": 0.39203942733433794, "grad_norm": 0.85546875, "learning_rate": 0.0001736850346018913, "loss": 1.0261, "step": 15268 }, { "epoch": 0.3920651045302598, "grad_norm": 0.7578125, "learning_rate": 0.0001736820164786196, "loss": 1.0334, "step": 15269 }, { "epoch": 0.3920907817261816, "grad_norm": 0.796875, "learning_rate": 0.0001736789982085063, "loss": 0.9734, "step": 15270 }, { "epoch": 0.3921164589221034, "grad_norm": 0.77734375, "learning_rate": 0.00017367597979155744, "loss": 0.906, "step": 15271 }, { "epoch": 0.39214213611802523, "grad_norm": 0.7578125, "learning_rate": 0.00017367296122777897, "loss": 0.982, "step": 15272 }, { "epoch": 0.39216781331394707, "grad_norm": 0.78125, "learning_rate": 0.00017366994251717696, "loss": 0.9598, "step": 15273 }, { "epoch": 0.39219349050986885, "grad_norm": 0.82421875, "learning_rate": 0.0001736669236597574, "loss": 0.9071, "step": 15274 }, { "epoch": 0.3922191677057907, "grad_norm": 0.78125, "learning_rate": 0.00017366390465552636, "loss": 1.0281, "step": 15275 }, { "epoch": 0.3922448449017125, "grad_norm": 0.8203125, "learning_rate": 0.00017366088550448976, "loss": 0.9156, "step": 15276 }, { "epoch": 0.39227052209763436, "grad_norm": 0.7578125, "learning_rate": 0.00017365786620665368, "loss": 0.9225, "step": 15277 }, { "epoch": 0.39229619929355614, "grad_norm": 0.82421875, "learning_rate": 0.00017365484676202413, "loss": 0.961, "step": 15278 }, { "epoch": 0.39232187648947797, "grad_norm": 0.7734375, "learning_rate": 0.00017365182717060714, "loss": 1.0233, "step": 15279 }, { "epoch": 0.3923475536853998, "grad_norm": 0.8203125, "learning_rate": 0.0001736488074324087, "loss": 0.9181, "step": 15280 }, { "epoch": 0.3923732308813216, "grad_norm": 0.7890625, "learning_rate": 0.00017364578754743484, "loss": 0.8418, "step": 15281 }, { "epoch": 0.3923989080772434, "grad_norm": 0.8515625, "learning_rate": 0.00017364276751569155, "loss": 0.9334, "step": 15282 }, { "epoch": 0.39242458527316526, "grad_norm": 0.75390625, "learning_rate": 0.0001736397473371849, "loss": 1.0212, "step": 15283 }, { "epoch": 0.39245026246908704, "grad_norm": 0.80078125, "learning_rate": 0.0001736367270119209, "loss": 0.86, "step": 15284 }, { "epoch": 0.3924759396650089, "grad_norm": 0.8515625, "learning_rate": 0.00017363370653990554, "loss": 0.9779, "step": 15285 }, { "epoch": 0.3925016168609307, "grad_norm": 0.73828125, "learning_rate": 0.00017363068592114484, "loss": 0.9243, "step": 15286 }, { "epoch": 0.3925272940568525, "grad_norm": 0.96484375, "learning_rate": 0.00017362766515564485, "loss": 1.1261, "step": 15287 }, { "epoch": 0.39255297125277433, "grad_norm": 0.74609375, "learning_rate": 0.0001736246442434116, "loss": 0.9727, "step": 15288 }, { "epoch": 0.39257864844869617, "grad_norm": 0.83203125, "learning_rate": 0.00017362162318445106, "loss": 1.1028, "step": 15289 }, { "epoch": 0.392604325644618, "grad_norm": 0.828125, "learning_rate": 0.00017361860197876924, "loss": 1.0314, "step": 15290 }, { "epoch": 0.3926300028405398, "grad_norm": 0.84765625, "learning_rate": 0.00017361558062637224, "loss": 0.9701, "step": 15291 }, { "epoch": 0.3926556800364616, "grad_norm": 0.7578125, "learning_rate": 0.00017361255912726602, "loss": 0.8897, "step": 15292 }, { "epoch": 0.39268135723238345, "grad_norm": 0.87109375, "learning_rate": 0.00017360953748145664, "loss": 0.9164, "step": 15293 }, { "epoch": 0.39270703442830523, "grad_norm": 0.8203125, "learning_rate": 0.00017360651568895004, "loss": 0.9344, "step": 15294 }, { "epoch": 0.39273271162422707, "grad_norm": 0.8125, "learning_rate": 0.00017360349374975239, "loss": 1.0234, "step": 15295 }, { "epoch": 0.3927583888201489, "grad_norm": 0.8046875, "learning_rate": 0.0001736004716638696, "loss": 1.0054, "step": 15296 }, { "epoch": 0.3927840660160707, "grad_norm": 0.796875, "learning_rate": 0.00017359744943130768, "loss": 0.8879, "step": 15297 }, { "epoch": 0.3928097432119925, "grad_norm": 0.86328125, "learning_rate": 0.00017359442705207272, "loss": 0.8993, "step": 15298 }, { "epoch": 0.39283542040791436, "grad_norm": 0.76171875, "learning_rate": 0.0001735914045261707, "loss": 0.8935, "step": 15299 }, { "epoch": 0.3928610976038362, "grad_norm": 0.79296875, "learning_rate": 0.00017358838185360767, "loss": 0.9205, "step": 15300 }, { "epoch": 0.392886774799758, "grad_norm": 0.80078125, "learning_rate": 0.00017358535903438965, "loss": 0.9765, "step": 15301 }, { "epoch": 0.3929124519956798, "grad_norm": 0.80859375, "learning_rate": 0.00017358233606852265, "loss": 0.8741, "step": 15302 }, { "epoch": 0.39293812919160165, "grad_norm": 0.78515625, "learning_rate": 0.00017357931295601268, "loss": 0.8793, "step": 15303 }, { "epoch": 0.39296380638752343, "grad_norm": 0.96875, "learning_rate": 0.0001735762896968658, "loss": 0.8833, "step": 15304 }, { "epoch": 0.39298948358344526, "grad_norm": 0.7890625, "learning_rate": 0.00017357326629108803, "loss": 0.8647, "step": 15305 }, { "epoch": 0.3930151607793671, "grad_norm": 0.796875, "learning_rate": 0.0001735702427386854, "loss": 0.9772, "step": 15306 }, { "epoch": 0.3930408379752889, "grad_norm": 0.84375, "learning_rate": 0.0001735672190396639, "loss": 1.1083, "step": 15307 }, { "epoch": 0.3930665151712107, "grad_norm": 0.734375, "learning_rate": 0.00017356419519402958, "loss": 0.9389, "step": 15308 }, { "epoch": 0.39309219236713255, "grad_norm": 0.91796875, "learning_rate": 0.00017356117120178842, "loss": 1.1802, "step": 15309 }, { "epoch": 0.3931178695630544, "grad_norm": 0.70703125, "learning_rate": 0.00017355814706294654, "loss": 0.9447, "step": 15310 }, { "epoch": 0.39314354675897617, "grad_norm": 0.86328125, "learning_rate": 0.00017355512277750993, "loss": 0.9544, "step": 15311 }, { "epoch": 0.393169223954898, "grad_norm": 0.8125, "learning_rate": 0.00017355209834548458, "loss": 0.93, "step": 15312 }, { "epoch": 0.39319490115081984, "grad_norm": 0.80078125, "learning_rate": 0.00017354907376687653, "loss": 0.9422, "step": 15313 }, { "epoch": 0.3932205783467416, "grad_norm": 0.765625, "learning_rate": 0.00017354604904169186, "loss": 0.8936, "step": 15314 }, { "epoch": 0.39324625554266346, "grad_norm": 0.77734375, "learning_rate": 0.00017354302416993655, "loss": 0.8661, "step": 15315 }, { "epoch": 0.3932719327385853, "grad_norm": 0.82421875, "learning_rate": 0.0001735399991516166, "loss": 0.8593, "step": 15316 }, { "epoch": 0.3932976099345071, "grad_norm": 0.8671875, "learning_rate": 0.0001735369739867381, "loss": 0.8268, "step": 15317 }, { "epoch": 0.3933232871304289, "grad_norm": 0.828125, "learning_rate": 0.00017353394867530706, "loss": 1.0314, "step": 15318 }, { "epoch": 0.39334896432635075, "grad_norm": 0.7578125, "learning_rate": 0.0001735309232173295, "loss": 0.9665, "step": 15319 }, { "epoch": 0.3933746415222726, "grad_norm": 0.77734375, "learning_rate": 0.0001735278976128114, "loss": 0.9209, "step": 15320 }, { "epoch": 0.39340031871819436, "grad_norm": 0.79296875, "learning_rate": 0.0001735248718617589, "loss": 0.8795, "step": 15321 }, { "epoch": 0.3934259959141162, "grad_norm": 0.6875, "learning_rate": 0.00017352184596417796, "loss": 0.7857, "step": 15322 }, { "epoch": 0.39345167311003804, "grad_norm": 0.828125, "learning_rate": 0.00017351881992007463, "loss": 0.8091, "step": 15323 }, { "epoch": 0.3934773503059598, "grad_norm": 0.7265625, "learning_rate": 0.00017351579372945492, "loss": 0.8974, "step": 15324 }, { "epoch": 0.39350302750188165, "grad_norm": 0.79296875, "learning_rate": 0.00017351276739232486, "loss": 1.0882, "step": 15325 }, { "epoch": 0.3935287046978035, "grad_norm": 0.796875, "learning_rate": 0.0001735097409086905, "loss": 0.8337, "step": 15326 }, { "epoch": 0.39355438189372527, "grad_norm": 0.7421875, "learning_rate": 0.00017350671427855788, "loss": 0.8579, "step": 15327 }, { "epoch": 0.3935800590896471, "grad_norm": 0.7734375, "learning_rate": 0.00017350368750193305, "loss": 0.9024, "step": 15328 }, { "epoch": 0.39360573628556894, "grad_norm": 0.78515625, "learning_rate": 0.00017350066057882194, "loss": 0.9549, "step": 15329 }, { "epoch": 0.3936314134814908, "grad_norm": 0.734375, "learning_rate": 0.0001734976335092307, "loss": 0.9421, "step": 15330 }, { "epoch": 0.39365709067741256, "grad_norm": 0.78515625, "learning_rate": 0.0001734946062931653, "loss": 0.9498, "step": 15331 }, { "epoch": 0.3936827678733344, "grad_norm": 0.7734375, "learning_rate": 0.00017349157893063182, "loss": 1.0609, "step": 15332 }, { "epoch": 0.39370844506925623, "grad_norm": 0.69921875, "learning_rate": 0.00017348855142163623, "loss": 0.8818, "step": 15333 }, { "epoch": 0.393734122265178, "grad_norm": 0.79296875, "learning_rate": 0.00017348552376618458, "loss": 0.8958, "step": 15334 }, { "epoch": 0.39375979946109985, "grad_norm": 0.7421875, "learning_rate": 0.00017348249596428294, "loss": 0.8593, "step": 15335 }, { "epoch": 0.3937854766570217, "grad_norm": 0.81640625, "learning_rate": 0.00017347946801593732, "loss": 1.0025, "step": 15336 }, { "epoch": 0.39381115385294346, "grad_norm": 0.78515625, "learning_rate": 0.00017347643992115378, "loss": 0.9597, "step": 15337 }, { "epoch": 0.3938368310488653, "grad_norm": 0.69921875, "learning_rate": 0.0001734734116799383, "loss": 0.884, "step": 15338 }, { "epoch": 0.39386250824478714, "grad_norm": 0.75390625, "learning_rate": 0.00017347038329229694, "loss": 0.7914, "step": 15339 }, { "epoch": 0.39388818544070897, "grad_norm": 0.94921875, "learning_rate": 0.00017346735475823577, "loss": 0.8141, "step": 15340 }, { "epoch": 0.39391386263663075, "grad_norm": 0.8046875, "learning_rate": 0.00017346432607776079, "loss": 0.8705, "step": 15341 }, { "epoch": 0.3939395398325526, "grad_norm": 0.86328125, "learning_rate": 0.00017346129725087804, "loss": 0.8766, "step": 15342 }, { "epoch": 0.3939652170284744, "grad_norm": 0.86328125, "learning_rate": 0.00017345826827759356, "loss": 0.8643, "step": 15343 }, { "epoch": 0.3939908942243962, "grad_norm": 0.71484375, "learning_rate": 0.0001734552391579134, "loss": 0.8621, "step": 15344 }, { "epoch": 0.39401657142031804, "grad_norm": 0.8203125, "learning_rate": 0.0001734522098918436, "loss": 0.9607, "step": 15345 }, { "epoch": 0.3940422486162399, "grad_norm": 0.7890625, "learning_rate": 0.0001734491804793901, "loss": 0.886, "step": 15346 }, { "epoch": 0.39406792581216166, "grad_norm": 0.70703125, "learning_rate": 0.00017344615092055912, "loss": 1.114, "step": 15347 }, { "epoch": 0.3940936030080835, "grad_norm": 0.76953125, "learning_rate": 0.00017344312121535653, "loss": 0.9515, "step": 15348 }, { "epoch": 0.39411928020400533, "grad_norm": 0.765625, "learning_rate": 0.00017344009136378844, "loss": 0.8117, "step": 15349 }, { "epoch": 0.39414495739992716, "grad_norm": 0.81640625, "learning_rate": 0.0001734370613658609, "loss": 0.9661, "step": 15350 }, { "epoch": 0.39417063459584895, "grad_norm": 0.80859375, "learning_rate": 0.00017343403122157993, "loss": 0.941, "step": 15351 }, { "epoch": 0.3941963117917708, "grad_norm": 0.8203125, "learning_rate": 0.00017343100093095156, "loss": 1.0006, "step": 15352 }, { "epoch": 0.3942219889876926, "grad_norm": 0.7578125, "learning_rate": 0.00017342797049398184, "loss": 0.9405, "step": 15353 }, { "epoch": 0.3942476661836144, "grad_norm": 0.7890625, "learning_rate": 0.0001734249399106768, "loss": 0.938, "step": 15354 }, { "epoch": 0.39427334337953623, "grad_norm": 0.84375, "learning_rate": 0.0001734219091810425, "loss": 0.9277, "step": 15355 }, { "epoch": 0.39429902057545807, "grad_norm": 0.77734375, "learning_rate": 0.00017341887830508496, "loss": 1.0046, "step": 15356 }, { "epoch": 0.39432469777137985, "grad_norm": 0.77734375, "learning_rate": 0.00017341584728281024, "loss": 1.0095, "step": 15357 }, { "epoch": 0.3943503749673017, "grad_norm": 0.75390625, "learning_rate": 0.00017341281611422436, "loss": 0.9115, "step": 15358 }, { "epoch": 0.3943760521632235, "grad_norm": 0.77734375, "learning_rate": 0.00017340978479933337, "loss": 0.848, "step": 15359 }, { "epoch": 0.39440172935914536, "grad_norm": 0.80859375, "learning_rate": 0.0001734067533381433, "loss": 0.9079, "step": 15360 }, { "epoch": 0.39442740655506714, "grad_norm": 0.765625, "learning_rate": 0.0001734037217306602, "loss": 0.9397, "step": 15361 }, { "epoch": 0.394453083750989, "grad_norm": 0.73828125, "learning_rate": 0.00017340068997689012, "loss": 0.8309, "step": 15362 }, { "epoch": 0.3944787609469108, "grad_norm": 0.80078125, "learning_rate": 0.0001733976580768391, "loss": 0.9265, "step": 15363 }, { "epoch": 0.3945044381428326, "grad_norm": 0.84765625, "learning_rate": 0.00017339462603051318, "loss": 0.8652, "step": 15364 }, { "epoch": 0.39453011533875443, "grad_norm": 0.7890625, "learning_rate": 0.00017339159383791838, "loss": 0.926, "step": 15365 }, { "epoch": 0.39455579253467626, "grad_norm": 0.88671875, "learning_rate": 0.00017338856149906078, "loss": 0.9742, "step": 15366 }, { "epoch": 0.39458146973059804, "grad_norm": 0.8828125, "learning_rate": 0.0001733855290139464, "loss": 0.9877, "step": 15367 }, { "epoch": 0.3946071469265199, "grad_norm": 0.828125, "learning_rate": 0.00017338249638258133, "loss": 1.0072, "step": 15368 }, { "epoch": 0.3946328241224417, "grad_norm": 0.828125, "learning_rate": 0.00017337946360497153, "loss": 0.9932, "step": 15369 }, { "epoch": 0.39465850131836355, "grad_norm": 0.8203125, "learning_rate": 0.00017337643068112308, "loss": 0.8426, "step": 15370 }, { "epoch": 0.39468417851428533, "grad_norm": 0.84375, "learning_rate": 0.00017337339761104207, "loss": 0.9107, "step": 15371 }, { "epoch": 0.39470985571020717, "grad_norm": 0.74609375, "learning_rate": 0.00017337036439473447, "loss": 1.0393, "step": 15372 }, { "epoch": 0.394735532906129, "grad_norm": 0.84765625, "learning_rate": 0.00017336733103220638, "loss": 1.0784, "step": 15373 }, { "epoch": 0.3947612101020508, "grad_norm": 0.73046875, "learning_rate": 0.0001733642975234638, "loss": 0.8779, "step": 15374 }, { "epoch": 0.3947868872979726, "grad_norm": 0.7578125, "learning_rate": 0.0001733612638685128, "loss": 0.9413, "step": 15375 }, { "epoch": 0.39481256449389446, "grad_norm": 0.828125, "learning_rate": 0.00017335823006735948, "loss": 0.9035, "step": 15376 }, { "epoch": 0.39483824168981624, "grad_norm": 0.85546875, "learning_rate": 0.0001733551961200098, "loss": 0.9737, "step": 15377 }, { "epoch": 0.3948639188857381, "grad_norm": 0.79296875, "learning_rate": 0.00017335216202646984, "loss": 0.8944, "step": 15378 }, { "epoch": 0.3948895960816599, "grad_norm": 0.7421875, "learning_rate": 0.00017334912778674565, "loss": 0.907, "step": 15379 }, { "epoch": 0.39491527327758175, "grad_norm": 0.828125, "learning_rate": 0.0001733460934008433, "loss": 0.9406, "step": 15380 }, { "epoch": 0.3949409504735035, "grad_norm": 0.76953125, "learning_rate": 0.00017334305886876876, "loss": 0.9561, "step": 15381 }, { "epoch": 0.39496662766942536, "grad_norm": 0.82421875, "learning_rate": 0.00017334002419052815, "loss": 0.8831, "step": 15382 }, { "epoch": 0.3949923048653472, "grad_norm": 0.76953125, "learning_rate": 0.0001733369893661275, "loss": 0.8108, "step": 15383 }, { "epoch": 0.395017982061269, "grad_norm": 0.703125, "learning_rate": 0.00017333395439557284, "loss": 0.9511, "step": 15384 }, { "epoch": 0.3950436592571908, "grad_norm": 0.703125, "learning_rate": 0.00017333091927887025, "loss": 0.9412, "step": 15385 }, { "epoch": 0.39506933645311265, "grad_norm": 0.765625, "learning_rate": 0.00017332788401602578, "loss": 0.9208, "step": 15386 }, { "epoch": 0.39509501364903443, "grad_norm": 0.82421875, "learning_rate": 0.00017332484860704543, "loss": 0.9487, "step": 15387 }, { "epoch": 0.39512069084495627, "grad_norm": 0.76953125, "learning_rate": 0.0001733218130519353, "loss": 0.7874, "step": 15388 }, { "epoch": 0.3951463680408781, "grad_norm": 0.796875, "learning_rate": 0.0001733187773507014, "loss": 0.9492, "step": 15389 }, { "epoch": 0.39517204523679994, "grad_norm": 0.86328125, "learning_rate": 0.00017331574150334983, "loss": 0.9318, "step": 15390 }, { "epoch": 0.3951977224327217, "grad_norm": 0.9296875, "learning_rate": 0.00017331270550988655, "loss": 0.8921, "step": 15391 }, { "epoch": 0.39522339962864356, "grad_norm": 0.84375, "learning_rate": 0.00017330966937031774, "loss": 0.9252, "step": 15392 }, { "epoch": 0.3952490768245654, "grad_norm": 0.80859375, "learning_rate": 0.00017330663308464934, "loss": 1.088, "step": 15393 }, { "epoch": 0.3952747540204872, "grad_norm": 0.8046875, "learning_rate": 0.00017330359665288747, "loss": 0.8674, "step": 15394 }, { "epoch": 0.395300431216409, "grad_norm": 0.76953125, "learning_rate": 0.00017330056007503812, "loss": 0.9331, "step": 15395 }, { "epoch": 0.39532610841233085, "grad_norm": 0.84375, "learning_rate": 0.0001732975233511074, "loss": 0.9796, "step": 15396 }, { "epoch": 0.3953517856082526, "grad_norm": 0.78515625, "learning_rate": 0.00017329448648110133, "loss": 0.8537, "step": 15397 }, { "epoch": 0.39537746280417446, "grad_norm": 0.91015625, "learning_rate": 0.00017329144946502597, "loss": 0.9399, "step": 15398 }, { "epoch": 0.3954031400000963, "grad_norm": 0.8046875, "learning_rate": 0.00017328841230288737, "loss": 1.0021, "step": 15399 }, { "epoch": 0.39542881719601813, "grad_norm": 0.78515625, "learning_rate": 0.00017328537499469157, "loss": 0.9106, "step": 15400 }, { "epoch": 0.3954544943919399, "grad_norm": 0.84765625, "learning_rate": 0.00017328233754044465, "loss": 0.8953, "step": 15401 }, { "epoch": 0.39548017158786175, "grad_norm": 0.7890625, "learning_rate": 0.00017327929994015266, "loss": 0.9527, "step": 15402 }, { "epoch": 0.3955058487837836, "grad_norm": 0.80078125, "learning_rate": 0.00017327626219382163, "loss": 0.8742, "step": 15403 }, { "epoch": 0.39553152597970537, "grad_norm": 0.74609375, "learning_rate": 0.00017327322430145764, "loss": 0.9986, "step": 15404 }, { "epoch": 0.3955572031756272, "grad_norm": 0.80078125, "learning_rate": 0.00017327018626306674, "loss": 0.8626, "step": 15405 }, { "epoch": 0.39558288037154904, "grad_norm": 0.7578125, "learning_rate": 0.00017326714807865495, "loss": 0.9197, "step": 15406 }, { "epoch": 0.3956085575674708, "grad_norm": 0.78125, "learning_rate": 0.00017326410974822837, "loss": 0.9358, "step": 15407 }, { "epoch": 0.39563423476339266, "grad_norm": 0.828125, "learning_rate": 0.00017326107127179304, "loss": 0.9719, "step": 15408 }, { "epoch": 0.3956599119593145, "grad_norm": 0.9375, "learning_rate": 0.000173258032649355, "loss": 0.9495, "step": 15409 }, { "epoch": 0.39568558915523633, "grad_norm": 0.734375, "learning_rate": 0.00017325499388092034, "loss": 0.8528, "step": 15410 }, { "epoch": 0.3957112663511581, "grad_norm": 0.84765625, "learning_rate": 0.00017325195496649507, "loss": 0.9937, "step": 15411 }, { "epoch": 0.39573694354707994, "grad_norm": 0.78125, "learning_rate": 0.00017324891590608527, "loss": 1.1134, "step": 15412 }, { "epoch": 0.3957626207430018, "grad_norm": 0.8046875, "learning_rate": 0.00017324587669969704, "loss": 0.9673, "step": 15413 }, { "epoch": 0.39578829793892356, "grad_norm": 0.78125, "learning_rate": 0.00017324283734733635, "loss": 0.9504, "step": 15414 }, { "epoch": 0.3958139751348454, "grad_norm": 0.91015625, "learning_rate": 0.0001732397978490093, "loss": 0.8665, "step": 15415 }, { "epoch": 0.39583965233076723, "grad_norm": 0.8203125, "learning_rate": 0.000173236758204722, "loss": 0.8957, "step": 15416 }, { "epoch": 0.395865329526689, "grad_norm": 0.74609375, "learning_rate": 0.00017323371841448037, "loss": 1.0019, "step": 15417 }, { "epoch": 0.39589100672261085, "grad_norm": 0.703125, "learning_rate": 0.00017323067847829063, "loss": 0.8154, "step": 15418 }, { "epoch": 0.3959166839185327, "grad_norm": 0.8125, "learning_rate": 0.00017322763839615872, "loss": 0.9275, "step": 15419 }, { "epoch": 0.3959423611144545, "grad_norm": 0.7734375, "learning_rate": 0.00017322459816809075, "loss": 0.9896, "step": 15420 }, { "epoch": 0.3959680383103763, "grad_norm": 0.75390625, "learning_rate": 0.00017322155779409278, "loss": 0.8945, "step": 15421 }, { "epoch": 0.39599371550629814, "grad_norm": 0.7890625, "learning_rate": 0.00017321851727417084, "loss": 0.9638, "step": 15422 }, { "epoch": 0.39601939270222, "grad_norm": 0.8203125, "learning_rate": 0.00017321547660833104, "loss": 0.985, "step": 15423 }, { "epoch": 0.39604506989814175, "grad_norm": 0.76171875, "learning_rate": 0.0001732124357965794, "loss": 0.9537, "step": 15424 }, { "epoch": 0.3960707470940636, "grad_norm": 0.83984375, "learning_rate": 0.000173209394838922, "loss": 0.9671, "step": 15425 }, { "epoch": 0.3960964242899854, "grad_norm": 0.76171875, "learning_rate": 0.00017320635373536483, "loss": 0.9214, "step": 15426 }, { "epoch": 0.3961221014859072, "grad_norm": 0.8125, "learning_rate": 0.00017320331248591407, "loss": 0.9945, "step": 15427 }, { "epoch": 0.39614777868182904, "grad_norm": 0.77734375, "learning_rate": 0.0001732002710905757, "loss": 0.9849, "step": 15428 }, { "epoch": 0.3961734558777509, "grad_norm": 0.7890625, "learning_rate": 0.0001731972295493558, "loss": 0.8458, "step": 15429 }, { "epoch": 0.3961991330736727, "grad_norm": 0.8203125, "learning_rate": 0.00017319418786226044, "loss": 0.9651, "step": 15430 }, { "epoch": 0.3962248102695945, "grad_norm": 0.78125, "learning_rate": 0.00017319114602929565, "loss": 0.9911, "step": 15431 }, { "epoch": 0.39625048746551633, "grad_norm": 0.78125, "learning_rate": 0.00017318810405046758, "loss": 0.9467, "step": 15432 }, { "epoch": 0.39627616466143817, "grad_norm": 0.75, "learning_rate": 0.00017318506192578217, "loss": 0.8278, "step": 15433 }, { "epoch": 0.39630184185735995, "grad_norm": 0.7734375, "learning_rate": 0.00017318201965524555, "loss": 1.0584, "step": 15434 }, { "epoch": 0.3963275190532818, "grad_norm": 0.7890625, "learning_rate": 0.0001731789772388638, "loss": 0.9394, "step": 15435 }, { "epoch": 0.3963531962492036, "grad_norm": 0.77734375, "learning_rate": 0.00017317593467664294, "loss": 0.8822, "step": 15436 }, { "epoch": 0.3963788734451254, "grad_norm": 0.76953125, "learning_rate": 0.00017317289196858905, "loss": 0.9302, "step": 15437 }, { "epoch": 0.39640455064104724, "grad_norm": 0.9453125, "learning_rate": 0.0001731698491147082, "loss": 0.9054, "step": 15438 }, { "epoch": 0.3964302278369691, "grad_norm": 0.828125, "learning_rate": 0.00017316680611500647, "loss": 0.9275, "step": 15439 }, { "epoch": 0.3964559050328909, "grad_norm": 0.796875, "learning_rate": 0.00017316376296948988, "loss": 0.963, "step": 15440 }, { "epoch": 0.3964815822288127, "grad_norm": 0.75390625, "learning_rate": 0.00017316071967816453, "loss": 0.8625, "step": 15441 }, { "epoch": 0.3965072594247345, "grad_norm": 0.78515625, "learning_rate": 0.00017315767624103646, "loss": 0.9181, "step": 15442 }, { "epoch": 0.39653293662065636, "grad_norm": 0.72265625, "learning_rate": 0.00017315463265811176, "loss": 0.9004, "step": 15443 }, { "epoch": 0.39655861381657814, "grad_norm": 0.93359375, "learning_rate": 0.00017315158892939648, "loss": 0.9516, "step": 15444 }, { "epoch": 0.3965842910125, "grad_norm": 0.75390625, "learning_rate": 0.0001731485450548967, "loss": 1.0393, "step": 15445 }, { "epoch": 0.3966099682084218, "grad_norm": 0.7421875, "learning_rate": 0.00017314550103461848, "loss": 0.8004, "step": 15446 }, { "epoch": 0.3966356454043436, "grad_norm": 0.796875, "learning_rate": 0.00017314245686856787, "loss": 0.881, "step": 15447 }, { "epoch": 0.39666132260026543, "grad_norm": 0.80859375, "learning_rate": 0.00017313941255675097, "loss": 0.9432, "step": 15448 }, { "epoch": 0.39668699979618727, "grad_norm": 0.7734375, "learning_rate": 0.0001731363680991738, "loss": 0.7934, "step": 15449 }, { "epoch": 0.3967126769921091, "grad_norm": 0.75390625, "learning_rate": 0.00017313332349584248, "loss": 0.8489, "step": 15450 }, { "epoch": 0.3967383541880309, "grad_norm": 0.76171875, "learning_rate": 0.00017313027874676305, "loss": 0.9954, "step": 15451 }, { "epoch": 0.3967640313839527, "grad_norm": 0.828125, "learning_rate": 0.00017312723385194155, "loss": 1.0055, "step": 15452 }, { "epoch": 0.39678970857987456, "grad_norm": 0.828125, "learning_rate": 0.00017312418881138412, "loss": 0.9327, "step": 15453 }, { "epoch": 0.39681538577579634, "grad_norm": 0.79296875, "learning_rate": 0.00017312114362509674, "loss": 0.8926, "step": 15454 }, { "epoch": 0.3968410629717182, "grad_norm": 0.72265625, "learning_rate": 0.00017311809829308555, "loss": 0.8615, "step": 15455 }, { "epoch": 0.39686674016764, "grad_norm": 0.76953125, "learning_rate": 0.00017311505281535663, "loss": 0.9583, "step": 15456 }, { "epoch": 0.3968924173635618, "grad_norm": 0.77734375, "learning_rate": 0.00017311200719191599, "loss": 0.8809, "step": 15457 }, { "epoch": 0.3969180945594836, "grad_norm": 0.72265625, "learning_rate": 0.0001731089614227697, "loss": 0.8791, "step": 15458 }, { "epoch": 0.39694377175540546, "grad_norm": 0.76171875, "learning_rate": 0.00017310591550792387, "loss": 0.9154, "step": 15459 }, { "epoch": 0.3969694489513273, "grad_norm": 0.78125, "learning_rate": 0.00017310286944738457, "loss": 0.9391, "step": 15460 }, { "epoch": 0.3969951261472491, "grad_norm": 0.84765625, "learning_rate": 0.00017309982324115782, "loss": 0.8904, "step": 15461 }, { "epoch": 0.3970208033431709, "grad_norm": 0.83203125, "learning_rate": 0.00017309677688924973, "loss": 0.8289, "step": 15462 }, { "epoch": 0.39704648053909275, "grad_norm": 0.76171875, "learning_rate": 0.0001730937303916664, "loss": 0.8701, "step": 15463 }, { "epoch": 0.39707215773501453, "grad_norm": 0.80859375, "learning_rate": 0.00017309068374841386, "loss": 0.9498, "step": 15464 }, { "epoch": 0.39709783493093637, "grad_norm": 0.75390625, "learning_rate": 0.00017308763695949816, "loss": 0.7561, "step": 15465 }, { "epoch": 0.3971235121268582, "grad_norm": 0.83984375, "learning_rate": 0.00017308459002492543, "loss": 1.0646, "step": 15466 }, { "epoch": 0.39714918932278, "grad_norm": 0.74609375, "learning_rate": 0.00017308154294470168, "loss": 0.8135, "step": 15467 }, { "epoch": 0.3971748665187018, "grad_norm": 0.79296875, "learning_rate": 0.00017307849571883307, "loss": 0.9899, "step": 15468 }, { "epoch": 0.39720054371462366, "grad_norm": 0.83203125, "learning_rate": 0.0001730754483473256, "loss": 0.9742, "step": 15469 }, { "epoch": 0.3972262209105455, "grad_norm": 0.734375, "learning_rate": 0.00017307240083018534, "loss": 0.961, "step": 15470 }, { "epoch": 0.39725189810646727, "grad_norm": 0.82421875, "learning_rate": 0.00017306935316741838, "loss": 0.859, "step": 15471 }, { "epoch": 0.3972775753023891, "grad_norm": 0.79296875, "learning_rate": 0.00017306630535903083, "loss": 0.9943, "step": 15472 }, { "epoch": 0.39730325249831094, "grad_norm": 0.8125, "learning_rate": 0.00017306325740502874, "loss": 0.9326, "step": 15473 }, { "epoch": 0.3973289296942327, "grad_norm": 0.7578125, "learning_rate": 0.00017306020930541816, "loss": 0.8322, "step": 15474 }, { "epoch": 0.39735460689015456, "grad_norm": 0.77734375, "learning_rate": 0.00017305716106020516, "loss": 1.0587, "step": 15475 }, { "epoch": 0.3973802840860764, "grad_norm": 0.8203125, "learning_rate": 0.00017305411266939585, "loss": 0.9718, "step": 15476 }, { "epoch": 0.3974059612819982, "grad_norm": 0.71875, "learning_rate": 0.00017305106413299632, "loss": 0.8745, "step": 15477 }, { "epoch": 0.39743163847792, "grad_norm": 0.66796875, "learning_rate": 0.0001730480154510126, "loss": 0.8217, "step": 15478 }, { "epoch": 0.39745731567384185, "grad_norm": 0.703125, "learning_rate": 0.0001730449666234508, "loss": 0.8261, "step": 15479 }, { "epoch": 0.3974829928697637, "grad_norm": 0.69140625, "learning_rate": 0.00017304191765031695, "loss": 0.8662, "step": 15480 }, { "epoch": 0.39750867006568547, "grad_norm": 0.7734375, "learning_rate": 0.00017303886853161716, "loss": 0.9559, "step": 15481 }, { "epoch": 0.3975343472616073, "grad_norm": 0.81640625, "learning_rate": 0.0001730358192673575, "loss": 0.903, "step": 15482 }, { "epoch": 0.39756002445752914, "grad_norm": 0.7890625, "learning_rate": 0.00017303276985754405, "loss": 0.8661, "step": 15483 }, { "epoch": 0.3975857016534509, "grad_norm": 0.94140625, "learning_rate": 0.0001730297203021829, "loss": 0.9463, "step": 15484 }, { "epoch": 0.39761137884937275, "grad_norm": 0.796875, "learning_rate": 0.0001730266706012801, "loss": 0.9133, "step": 15485 }, { "epoch": 0.3976370560452946, "grad_norm": 0.8125, "learning_rate": 0.00017302362075484178, "loss": 1.1405, "step": 15486 }, { "epoch": 0.39766273324121637, "grad_norm": 0.8125, "learning_rate": 0.00017302057076287394, "loss": 0.9705, "step": 15487 }, { "epoch": 0.3976884104371382, "grad_norm": 0.734375, "learning_rate": 0.0001730175206253827, "loss": 0.9154, "step": 15488 }, { "epoch": 0.39771408763306004, "grad_norm": 0.8203125, "learning_rate": 0.00017301447034237416, "loss": 0.9388, "step": 15489 }, { "epoch": 0.3977397648289819, "grad_norm": 0.7578125, "learning_rate": 0.00017301141991385435, "loss": 1.0102, "step": 15490 }, { "epoch": 0.39776544202490366, "grad_norm": 0.765625, "learning_rate": 0.0001730083693398294, "loss": 1.0174, "step": 15491 }, { "epoch": 0.3977911192208255, "grad_norm": 0.82421875, "learning_rate": 0.00017300531862030533, "loss": 0.8994, "step": 15492 }, { "epoch": 0.39781679641674733, "grad_norm": 0.75, "learning_rate": 0.00017300226775528833, "loss": 0.9426, "step": 15493 }, { "epoch": 0.3978424736126691, "grad_norm": 0.7265625, "learning_rate": 0.00017299921674478433, "loss": 1.1122, "step": 15494 }, { "epoch": 0.39786815080859095, "grad_norm": 0.8515625, "learning_rate": 0.0001729961655887995, "loss": 0.9766, "step": 15495 }, { "epoch": 0.3978938280045128, "grad_norm": 0.75, "learning_rate": 0.0001729931142873399, "loss": 0.9779, "step": 15496 }, { "epoch": 0.39791950520043456, "grad_norm": 0.80859375, "learning_rate": 0.00017299006284041165, "loss": 0.9227, "step": 15497 }, { "epoch": 0.3979451823963564, "grad_norm": 0.7109375, "learning_rate": 0.00017298701124802076, "loss": 0.9247, "step": 15498 }, { "epoch": 0.39797085959227824, "grad_norm": 0.75, "learning_rate": 0.0001729839595101734, "loss": 0.7539, "step": 15499 }, { "epoch": 0.3979965367882, "grad_norm": 0.796875, "learning_rate": 0.00017298090762687553, "loss": 0.9289, "step": 15500 }, { "epoch": 0.39802221398412185, "grad_norm": 0.83984375, "learning_rate": 0.00017297785559813335, "loss": 1.0712, "step": 15501 }, { "epoch": 0.3980478911800437, "grad_norm": 0.84375, "learning_rate": 0.00017297480342395288, "loss": 0.9413, "step": 15502 }, { "epoch": 0.3980735683759655, "grad_norm": 0.734375, "learning_rate": 0.00017297175110434022, "loss": 0.8608, "step": 15503 }, { "epoch": 0.3980992455718873, "grad_norm": 0.7578125, "learning_rate": 0.0001729686986393015, "loss": 0.889, "step": 15504 }, { "epoch": 0.39812492276780914, "grad_norm": 0.79296875, "learning_rate": 0.0001729656460288427, "loss": 0.9508, "step": 15505 }, { "epoch": 0.398150599963731, "grad_norm": 0.81640625, "learning_rate": 0.00017296259327296998, "loss": 1.0021, "step": 15506 }, { "epoch": 0.39817627715965276, "grad_norm": 2.59375, "learning_rate": 0.00017295954037168938, "loss": 1.0458, "step": 15507 }, { "epoch": 0.3982019543555746, "grad_norm": 0.75390625, "learning_rate": 0.00017295648732500702, "loss": 0.8432, "step": 15508 }, { "epoch": 0.39822763155149643, "grad_norm": 0.76953125, "learning_rate": 0.00017295343413292898, "loss": 0.8505, "step": 15509 }, { "epoch": 0.3982533087474182, "grad_norm": 0.75390625, "learning_rate": 0.00017295038079546133, "loss": 0.9204, "step": 15510 }, { "epoch": 0.39827898594334005, "grad_norm": 0.890625, "learning_rate": 0.00017294732731261013, "loss": 1.0088, "step": 15511 }, { "epoch": 0.3983046631392619, "grad_norm": 0.79296875, "learning_rate": 0.00017294427368438154, "loss": 0.8412, "step": 15512 }, { "epoch": 0.3983303403351837, "grad_norm": 0.83203125, "learning_rate": 0.00017294121991078158, "loss": 0.8774, "step": 15513 }, { "epoch": 0.3983560175311055, "grad_norm": 0.84765625, "learning_rate": 0.00017293816599181635, "loss": 0.9717, "step": 15514 }, { "epoch": 0.39838169472702734, "grad_norm": 0.78515625, "learning_rate": 0.00017293511192749197, "loss": 0.844, "step": 15515 }, { "epoch": 0.39840737192294917, "grad_norm": 0.8125, "learning_rate": 0.00017293205771781449, "loss": 0.9495, "step": 15516 }, { "epoch": 0.39843304911887095, "grad_norm": 0.71484375, "learning_rate": 0.00017292900336279, "loss": 0.8809, "step": 15517 }, { "epoch": 0.3984587263147928, "grad_norm": 0.82421875, "learning_rate": 0.0001729259488624246, "loss": 1.0137, "step": 15518 }, { "epoch": 0.3984844035107146, "grad_norm": 0.671875, "learning_rate": 0.00017292289421672437, "loss": 0.8756, "step": 15519 }, { "epoch": 0.3985100807066364, "grad_norm": 0.796875, "learning_rate": 0.00017291983942569538, "loss": 0.9557, "step": 15520 }, { "epoch": 0.39853575790255824, "grad_norm": 0.76171875, "learning_rate": 0.00017291678448934378, "loss": 0.9511, "step": 15521 }, { "epoch": 0.3985614350984801, "grad_norm": 0.765625, "learning_rate": 0.00017291372940767555, "loss": 0.8528, "step": 15522 }, { "epoch": 0.3985871122944019, "grad_norm": 0.7734375, "learning_rate": 0.00017291067418069692, "loss": 0.9397, "step": 15523 }, { "epoch": 0.3986127894903237, "grad_norm": 0.7578125, "learning_rate": 0.00017290761880841383, "loss": 0.9973, "step": 15524 }, { "epoch": 0.39863846668624553, "grad_norm": 0.78125, "learning_rate": 0.0001729045632908325, "loss": 0.8279, "step": 15525 }, { "epoch": 0.39866414388216737, "grad_norm": 0.796875, "learning_rate": 0.00017290150762795894, "loss": 1.0011, "step": 15526 }, { "epoch": 0.39868982107808915, "grad_norm": 0.7890625, "learning_rate": 0.00017289845181979924, "loss": 1.0223, "step": 15527 }, { "epoch": 0.398715498274011, "grad_norm": 0.79296875, "learning_rate": 0.00017289539586635955, "loss": 0.8763, "step": 15528 }, { "epoch": 0.3987411754699328, "grad_norm": 0.7109375, "learning_rate": 0.0001728923397676459, "loss": 0.8818, "step": 15529 }, { "epoch": 0.3987668526658546, "grad_norm": 0.79296875, "learning_rate": 0.0001728892835236644, "loss": 0.8919, "step": 15530 }, { "epoch": 0.39879252986177643, "grad_norm": 0.73046875, "learning_rate": 0.00017288622713442113, "loss": 0.8936, "step": 15531 }, { "epoch": 0.39881820705769827, "grad_norm": 0.8046875, "learning_rate": 0.00017288317059992221, "loss": 0.9125, "step": 15532 }, { "epoch": 0.3988438842536201, "grad_norm": 0.76171875, "learning_rate": 0.0001728801139201737, "loss": 0.9594, "step": 15533 }, { "epoch": 0.3988695614495419, "grad_norm": 0.84375, "learning_rate": 0.0001728770570951817, "loss": 0.893, "step": 15534 }, { "epoch": 0.3988952386454637, "grad_norm": 0.7421875, "learning_rate": 0.00017287400012495235, "loss": 0.9709, "step": 15535 }, { "epoch": 0.39892091584138556, "grad_norm": 0.76953125, "learning_rate": 0.00017287094300949164, "loss": 0.8169, "step": 15536 }, { "epoch": 0.39894659303730734, "grad_norm": 0.74609375, "learning_rate": 0.0001728678857488058, "loss": 0.8448, "step": 15537 }, { "epoch": 0.3989722702332292, "grad_norm": 0.7734375, "learning_rate": 0.00017286482834290079, "loss": 0.8242, "step": 15538 }, { "epoch": 0.398997947429151, "grad_norm": 0.765625, "learning_rate": 0.00017286177079178276, "loss": 0.9159, "step": 15539 }, { "epoch": 0.3990236246250728, "grad_norm": 0.72265625, "learning_rate": 0.0001728587130954578, "loss": 0.8982, "step": 15540 }, { "epoch": 0.39904930182099463, "grad_norm": 0.796875, "learning_rate": 0.00017285565525393203, "loss": 0.953, "step": 15541 }, { "epoch": 0.39907497901691646, "grad_norm": 0.83203125, "learning_rate": 0.00017285259726721152, "loss": 0.9519, "step": 15542 }, { "epoch": 0.3991006562128383, "grad_norm": 0.80859375, "learning_rate": 0.00017284953913530236, "loss": 1.0494, "step": 15543 }, { "epoch": 0.3991263334087601, "grad_norm": 0.75, "learning_rate": 0.00017284648085821064, "loss": 0.8712, "step": 15544 }, { "epoch": 0.3991520106046819, "grad_norm": 0.828125, "learning_rate": 0.00017284342243594248, "loss": 1.132, "step": 15545 }, { "epoch": 0.39917768780060375, "grad_norm": 0.83984375, "learning_rate": 0.00017284036386850394, "loss": 1.0706, "step": 15546 }, { "epoch": 0.39920336499652553, "grad_norm": 0.875, "learning_rate": 0.00017283730515590113, "loss": 0.8631, "step": 15547 }, { "epoch": 0.39922904219244737, "grad_norm": 0.87109375, "learning_rate": 0.0001728342462981402, "loss": 0.9123, "step": 15548 }, { "epoch": 0.3992547193883692, "grad_norm": 0.79296875, "learning_rate": 0.00017283118729522712, "loss": 0.9433, "step": 15549 }, { "epoch": 0.399280396584291, "grad_norm": 0.8125, "learning_rate": 0.00017282812814716812, "loss": 0.9037, "step": 15550 }, { "epoch": 0.3993060737802128, "grad_norm": 0.8203125, "learning_rate": 0.0001728250688539692, "loss": 0.9286, "step": 15551 }, { "epoch": 0.39933175097613466, "grad_norm": 0.81640625, "learning_rate": 0.00017282200941563653, "loss": 0.9539, "step": 15552 }, { "epoch": 0.3993574281720565, "grad_norm": 0.7890625, "learning_rate": 0.00017281894983217614, "loss": 0.9277, "step": 15553 }, { "epoch": 0.3993831053679783, "grad_norm": 0.796875, "learning_rate": 0.0001728158901035942, "loss": 0.9444, "step": 15554 }, { "epoch": 0.3994087825639001, "grad_norm": 0.78125, "learning_rate": 0.00017281283022989674, "loss": 1.0236, "step": 15555 }, { "epoch": 0.39943445975982195, "grad_norm": 0.83203125, "learning_rate": 0.00017280977021108987, "loss": 0.9899, "step": 15556 }, { "epoch": 0.3994601369557437, "grad_norm": 0.84375, "learning_rate": 0.00017280671004717976, "loss": 1.0028, "step": 15557 }, { "epoch": 0.39948581415166556, "grad_norm": 0.72265625, "learning_rate": 0.00017280364973817244, "loss": 0.8388, "step": 15558 }, { "epoch": 0.3995114913475874, "grad_norm": 0.7578125, "learning_rate": 0.00017280058928407396, "loss": 0.8503, "step": 15559 }, { "epoch": 0.3995371685435092, "grad_norm": 0.71875, "learning_rate": 0.00017279752868489055, "loss": 0.997, "step": 15560 }, { "epoch": 0.399562845739431, "grad_norm": 0.75390625, "learning_rate": 0.0001727944679406282, "loss": 0.879, "step": 15561 }, { "epoch": 0.39958852293535285, "grad_norm": 0.71875, "learning_rate": 0.00017279140705129308, "loss": 0.8189, "step": 15562 }, { "epoch": 0.3996142001312747, "grad_norm": 0.8046875, "learning_rate": 0.00017278834601689125, "loss": 0.8866, "step": 15563 }, { "epoch": 0.39963987732719647, "grad_norm": 0.8359375, "learning_rate": 0.00017278528483742882, "loss": 0.9935, "step": 15564 }, { "epoch": 0.3996655545231183, "grad_norm": 0.77734375, "learning_rate": 0.00017278222351291191, "loss": 0.9749, "step": 15565 }, { "epoch": 0.39969123171904014, "grad_norm": 0.74609375, "learning_rate": 0.00017277916204334656, "loss": 0.9456, "step": 15566 }, { "epoch": 0.3997169089149619, "grad_norm": 0.75390625, "learning_rate": 0.00017277610042873892, "loss": 0.8173, "step": 15567 }, { "epoch": 0.39974258611088376, "grad_norm": 0.83203125, "learning_rate": 0.00017277303866909514, "loss": 0.8356, "step": 15568 }, { "epoch": 0.3997682633068056, "grad_norm": 0.77734375, "learning_rate": 0.00017276997676442122, "loss": 1.0936, "step": 15569 }, { "epoch": 0.3997939405027274, "grad_norm": 0.84765625, "learning_rate": 0.00017276691471472332, "loss": 0.9758, "step": 15570 }, { "epoch": 0.3998196176986492, "grad_norm": 0.83203125, "learning_rate": 0.00017276385252000756, "loss": 0.8231, "step": 15571 }, { "epoch": 0.39984529489457105, "grad_norm": 0.7734375, "learning_rate": 0.00017276079018027998, "loss": 0.7541, "step": 15572 }, { "epoch": 0.3998709720904929, "grad_norm": 0.72265625, "learning_rate": 0.00017275772769554672, "loss": 0.8869, "step": 15573 }, { "epoch": 0.39989664928641466, "grad_norm": 0.75390625, "learning_rate": 0.00017275466506581387, "loss": 0.9723, "step": 15574 }, { "epoch": 0.3999223264823365, "grad_norm": 0.79296875, "learning_rate": 0.00017275160229108757, "loss": 1.0267, "step": 15575 }, { "epoch": 0.39994800367825833, "grad_norm": 0.8046875, "learning_rate": 0.0001727485393713739, "loss": 0.9515, "step": 15576 }, { "epoch": 0.3999736808741801, "grad_norm": 0.7890625, "learning_rate": 0.00017274547630667897, "loss": 0.9767, "step": 15577 }, { "epoch": 0.39999935807010195, "grad_norm": 0.80859375, "learning_rate": 0.00017274241309700884, "loss": 0.8436, "step": 15578 }, { "epoch": 0.4000250352660238, "grad_norm": 0.8125, "learning_rate": 0.00017273934974236968, "loss": 0.9746, "step": 15579 }, { "epoch": 0.40005071246194557, "grad_norm": 0.8203125, "learning_rate": 0.00017273628624276755, "loss": 0.8816, "step": 15580 }, { "epoch": 0.4000763896578674, "grad_norm": 0.80859375, "learning_rate": 0.00017273322259820856, "loss": 0.8839, "step": 15581 }, { "epoch": 0.40010206685378924, "grad_norm": 0.7421875, "learning_rate": 0.00017273015880869887, "loss": 0.9215, "step": 15582 }, { "epoch": 0.4001277440497111, "grad_norm": 0.7421875, "learning_rate": 0.00017272709487424452, "loss": 0.9427, "step": 15583 }, { "epoch": 0.40015342124563286, "grad_norm": 0.765625, "learning_rate": 0.00017272403079485164, "loss": 0.9004, "step": 15584 }, { "epoch": 0.4001790984415547, "grad_norm": 0.79296875, "learning_rate": 0.00017272096657052632, "loss": 1.0099, "step": 15585 }, { "epoch": 0.40020477563747653, "grad_norm": 0.82421875, "learning_rate": 0.0001727179022012747, "loss": 0.9528, "step": 15586 }, { "epoch": 0.4002304528333983, "grad_norm": 0.74609375, "learning_rate": 0.00017271483768710287, "loss": 0.9535, "step": 15587 }, { "epoch": 0.40025613002932015, "grad_norm": 0.7421875, "learning_rate": 0.00017271177302801696, "loss": 0.9586, "step": 15588 }, { "epoch": 0.400281807225242, "grad_norm": 0.7890625, "learning_rate": 0.000172708708224023, "loss": 0.9507, "step": 15589 }, { "epoch": 0.40030748442116376, "grad_norm": 0.81640625, "learning_rate": 0.0001727056432751272, "loss": 1.0125, "step": 15590 }, { "epoch": 0.4003331616170856, "grad_norm": 0.8046875, "learning_rate": 0.0001727025781813356, "loss": 0.9503, "step": 15591 }, { "epoch": 0.40035883881300743, "grad_norm": 0.79296875, "learning_rate": 0.00017269951294265432, "loss": 0.9581, "step": 15592 }, { "epoch": 0.40038451600892927, "grad_norm": 0.73828125, "learning_rate": 0.0001726964475590895, "loss": 0.9921, "step": 15593 }, { "epoch": 0.40041019320485105, "grad_norm": 0.7421875, "learning_rate": 0.0001726933820306472, "loss": 0.9764, "step": 15594 }, { "epoch": 0.4004358704007729, "grad_norm": 0.80859375, "learning_rate": 0.00017269031635733356, "loss": 0.8834, "step": 15595 }, { "epoch": 0.4004615475966947, "grad_norm": 0.828125, "learning_rate": 0.0001726872505391547, "loss": 1.0585, "step": 15596 }, { "epoch": 0.4004872247926165, "grad_norm": 0.8125, "learning_rate": 0.00017268418457611672, "loss": 0.7939, "step": 15597 }, { "epoch": 0.40051290198853834, "grad_norm": 0.796875, "learning_rate": 0.00017268111846822573, "loss": 0.912, "step": 15598 }, { "epoch": 0.4005385791844602, "grad_norm": 0.87890625, "learning_rate": 0.0001726780522154878, "loss": 0.9537, "step": 15599 }, { "epoch": 0.40056425638038196, "grad_norm": 0.77734375, "learning_rate": 0.0001726749858179091, "loss": 1.046, "step": 15600 }, { "epoch": 0.4005899335763038, "grad_norm": 0.76953125, "learning_rate": 0.0001726719192754957, "loss": 0.9424, "step": 15601 }, { "epoch": 0.4006156107722256, "grad_norm": 0.828125, "learning_rate": 0.00017266885258825378, "loss": 0.8608, "step": 15602 }, { "epoch": 0.40064128796814746, "grad_norm": 0.78515625, "learning_rate": 0.00017266578575618935, "loss": 0.9803, "step": 15603 }, { "epoch": 0.40066696516406924, "grad_norm": 0.7421875, "learning_rate": 0.0001726627187793086, "loss": 0.8221, "step": 15604 }, { "epoch": 0.4006926423599911, "grad_norm": 0.74609375, "learning_rate": 0.0001726596516576176, "loss": 0.8459, "step": 15605 }, { "epoch": 0.4007183195559129, "grad_norm": 0.75, "learning_rate": 0.0001726565843911225, "loss": 0.9265, "step": 15606 }, { "epoch": 0.4007439967518347, "grad_norm": 0.7890625, "learning_rate": 0.00017265351697982938, "loss": 0.8486, "step": 15607 }, { "epoch": 0.40076967394775653, "grad_norm": 0.8125, "learning_rate": 0.00017265044942374434, "loss": 0.9313, "step": 15608 }, { "epoch": 0.40079535114367837, "grad_norm": 0.75390625, "learning_rate": 0.00017264738172287353, "loss": 0.8566, "step": 15609 }, { "epoch": 0.40082102833960015, "grad_norm": 0.890625, "learning_rate": 0.00017264431387722305, "loss": 1.0683, "step": 15610 }, { "epoch": 0.400846705535522, "grad_norm": 0.83984375, "learning_rate": 0.00017264124588679903, "loss": 0.9914, "step": 15611 }, { "epoch": 0.4008723827314438, "grad_norm": 0.8203125, "learning_rate": 0.00017263817775160756, "loss": 0.9086, "step": 15612 }, { "epoch": 0.40089805992736566, "grad_norm": 0.84765625, "learning_rate": 0.00017263510947165475, "loss": 0.9465, "step": 15613 }, { "epoch": 0.40092373712328744, "grad_norm": 0.86328125, "learning_rate": 0.00017263204104694675, "loss": 0.9636, "step": 15614 }, { "epoch": 0.4009494143192093, "grad_norm": 0.8671875, "learning_rate": 0.00017262897247748962, "loss": 0.919, "step": 15615 }, { "epoch": 0.4009750915151311, "grad_norm": 0.8203125, "learning_rate": 0.00017262590376328953, "loss": 0.9192, "step": 15616 }, { "epoch": 0.4010007687110529, "grad_norm": 0.77734375, "learning_rate": 0.0001726228349043526, "loss": 0.992, "step": 15617 }, { "epoch": 0.4010264459069747, "grad_norm": 0.796875, "learning_rate": 0.00017261976590068488, "loss": 0.8589, "step": 15618 }, { "epoch": 0.40105212310289656, "grad_norm": 0.78515625, "learning_rate": 0.00017261669675229256, "loss": 1.0411, "step": 15619 }, { "epoch": 0.40107780029881834, "grad_norm": 0.84765625, "learning_rate": 0.00017261362745918168, "loss": 0.8318, "step": 15620 }, { "epoch": 0.4011034774947402, "grad_norm": 0.75390625, "learning_rate": 0.00017261055802135844, "loss": 0.9958, "step": 15621 }, { "epoch": 0.401129154690662, "grad_norm": 0.76953125, "learning_rate": 0.0001726074884388289, "loss": 1.0146, "step": 15622 }, { "epoch": 0.40115483188658385, "grad_norm": 0.734375, "learning_rate": 0.0001726044187115992, "loss": 1.0026, "step": 15623 }, { "epoch": 0.40118050908250563, "grad_norm": 0.77734375, "learning_rate": 0.00017260134883967542, "loss": 0.8872, "step": 15624 }, { "epoch": 0.40120618627842747, "grad_norm": 0.96484375, "learning_rate": 0.00017259827882306375, "loss": 1.0725, "step": 15625 }, { "epoch": 0.4012318634743493, "grad_norm": 0.7734375, "learning_rate": 0.00017259520866177023, "loss": 0.9083, "step": 15626 }, { "epoch": 0.4012575406702711, "grad_norm": 0.78125, "learning_rate": 0.00017259213835580105, "loss": 0.9217, "step": 15627 }, { "epoch": 0.4012832178661929, "grad_norm": 0.75, "learning_rate": 0.0001725890679051623, "loss": 0.8674, "step": 15628 }, { "epoch": 0.40130889506211476, "grad_norm": 0.70703125, "learning_rate": 0.00017258599730986008, "loss": 0.9057, "step": 15629 }, { "epoch": 0.40133457225803654, "grad_norm": 0.7890625, "learning_rate": 0.0001725829265699005, "loss": 0.8605, "step": 15630 }, { "epoch": 0.4013602494539584, "grad_norm": 0.76953125, "learning_rate": 0.00017257985568528973, "loss": 0.8271, "step": 15631 }, { "epoch": 0.4013859266498802, "grad_norm": 0.765625, "learning_rate": 0.00017257678465603386, "loss": 0.7503, "step": 15632 }, { "epoch": 0.40141160384580205, "grad_norm": 0.69921875, "learning_rate": 0.00017257371348213901, "loss": 0.8277, "step": 15633 }, { "epoch": 0.4014372810417238, "grad_norm": 0.78515625, "learning_rate": 0.00017257064216361133, "loss": 0.9864, "step": 15634 }, { "epoch": 0.40146295823764566, "grad_norm": 0.80859375, "learning_rate": 0.0001725675707004569, "loss": 0.899, "step": 15635 }, { "epoch": 0.4014886354335675, "grad_norm": 0.8046875, "learning_rate": 0.00017256449909268184, "loss": 1.0193, "step": 15636 }, { "epoch": 0.4015143126294893, "grad_norm": 0.734375, "learning_rate": 0.0001725614273402923, "loss": 0.8254, "step": 15637 }, { "epoch": 0.4015399898254111, "grad_norm": 0.74609375, "learning_rate": 0.00017255835544329438, "loss": 0.8946, "step": 15638 }, { "epoch": 0.40156566702133295, "grad_norm": 0.7734375, "learning_rate": 0.00017255528340169422, "loss": 0.9178, "step": 15639 }, { "epoch": 0.40159134421725473, "grad_norm": 0.70703125, "learning_rate": 0.00017255221121549797, "loss": 0.7947, "step": 15640 }, { "epoch": 0.40161702141317657, "grad_norm": 0.75390625, "learning_rate": 0.00017254913888471168, "loss": 1.0419, "step": 15641 }, { "epoch": 0.4016426986090984, "grad_norm": 0.78125, "learning_rate": 0.0001725460664093415, "loss": 0.9563, "step": 15642 }, { "epoch": 0.40166837580502024, "grad_norm": 0.7578125, "learning_rate": 0.00017254299378939358, "loss": 1.0264, "step": 15643 }, { "epoch": 0.401694053000942, "grad_norm": 0.78515625, "learning_rate": 0.00017253992102487403, "loss": 0.9478, "step": 15644 }, { "epoch": 0.40171973019686386, "grad_norm": 0.7265625, "learning_rate": 0.00017253684811578893, "loss": 0.9504, "step": 15645 }, { "epoch": 0.4017454073927857, "grad_norm": 0.7890625, "learning_rate": 0.0001725337750621445, "loss": 0.932, "step": 15646 }, { "epoch": 0.40177108458870747, "grad_norm": 0.8125, "learning_rate": 0.00017253070186394677, "loss": 0.9063, "step": 15647 }, { "epoch": 0.4017967617846293, "grad_norm": 0.78125, "learning_rate": 0.00017252762852120192, "loss": 0.8689, "step": 15648 }, { "epoch": 0.40182243898055114, "grad_norm": 0.71484375, "learning_rate": 0.00017252455503391603, "loss": 0.8743, "step": 15649 }, { "epoch": 0.4018481161764729, "grad_norm": 0.7578125, "learning_rate": 0.00017252148140209532, "loss": 0.9961, "step": 15650 }, { "epoch": 0.40187379337239476, "grad_norm": 0.78125, "learning_rate": 0.00017251840762574577, "loss": 1.0169, "step": 15651 }, { "epoch": 0.4018994705683166, "grad_norm": 0.80859375, "learning_rate": 0.00017251533370487364, "loss": 0.976, "step": 15652 }, { "epoch": 0.40192514776423843, "grad_norm": 0.80078125, "learning_rate": 0.00017251225963948498, "loss": 0.9222, "step": 15653 }, { "epoch": 0.4019508249601602, "grad_norm": 0.77734375, "learning_rate": 0.00017250918542958594, "loss": 0.9634, "step": 15654 }, { "epoch": 0.40197650215608205, "grad_norm": 0.78125, "learning_rate": 0.0001725061110751826, "loss": 1.0697, "step": 15655 }, { "epoch": 0.4020021793520039, "grad_norm": 0.76171875, "learning_rate": 0.0001725030365762812, "loss": 0.9513, "step": 15656 }, { "epoch": 0.40202785654792567, "grad_norm": 0.72265625, "learning_rate": 0.00017249996193288776, "loss": 0.8821, "step": 15657 }, { "epoch": 0.4020535337438475, "grad_norm": 0.80859375, "learning_rate": 0.00017249688714500848, "loss": 0.8172, "step": 15658 }, { "epoch": 0.40207921093976934, "grad_norm": 3.6875, "learning_rate": 0.0001724938122126494, "loss": 1.0589, "step": 15659 }, { "epoch": 0.4021048881356911, "grad_norm": 0.7890625, "learning_rate": 0.00017249073713581674, "loss": 0.8813, "step": 15660 }, { "epoch": 0.40213056533161295, "grad_norm": 0.8515625, "learning_rate": 0.00017248766191451656, "loss": 1.0557, "step": 15661 }, { "epoch": 0.4021562425275348, "grad_norm": 0.83984375, "learning_rate": 0.00017248458654875502, "loss": 1.0593, "step": 15662 }, { "epoch": 0.4021819197234566, "grad_norm": 0.80859375, "learning_rate": 0.00017248151103853827, "loss": 0.8838, "step": 15663 }, { "epoch": 0.4022075969193784, "grad_norm": 0.8125, "learning_rate": 0.00017247843538387237, "loss": 0.9466, "step": 15664 }, { "epoch": 0.40223327411530024, "grad_norm": 0.7890625, "learning_rate": 0.00017247535958476355, "loss": 1.008, "step": 15665 }, { "epoch": 0.4022589513112221, "grad_norm": 0.8046875, "learning_rate": 0.0001724722836412179, "loss": 0.9867, "step": 15666 }, { "epoch": 0.40228462850714386, "grad_norm": 0.8125, "learning_rate": 0.00017246920755324148, "loss": 0.9183, "step": 15667 }, { "epoch": 0.4023103057030657, "grad_norm": 0.76171875, "learning_rate": 0.00017246613132084048, "loss": 0.8761, "step": 15668 }, { "epoch": 0.40233598289898753, "grad_norm": 0.76953125, "learning_rate": 0.00017246305494402105, "loss": 1.0183, "step": 15669 }, { "epoch": 0.4023616600949093, "grad_norm": 0.7890625, "learning_rate": 0.0001724599784227893, "loss": 0.8875, "step": 15670 }, { "epoch": 0.40238733729083115, "grad_norm": 0.734375, "learning_rate": 0.00017245690175715132, "loss": 0.9542, "step": 15671 }, { "epoch": 0.402413014486753, "grad_norm": 0.8046875, "learning_rate": 0.00017245382494711332, "loss": 0.8922, "step": 15672 }, { "epoch": 0.4024386916826748, "grad_norm": 0.7578125, "learning_rate": 0.00017245074799268135, "loss": 0.8997, "step": 15673 }, { "epoch": 0.4024643688785966, "grad_norm": 0.76953125, "learning_rate": 0.00017244767089386162, "loss": 0.9377, "step": 15674 }, { "epoch": 0.40249004607451844, "grad_norm": 0.71875, "learning_rate": 0.00017244459365066024, "loss": 0.9426, "step": 15675 }, { "epoch": 0.4025157232704403, "grad_norm": 0.80859375, "learning_rate": 0.0001724415162630833, "loss": 0.839, "step": 15676 }, { "epoch": 0.40254140046636205, "grad_norm": 0.875, "learning_rate": 0.00017243843873113694, "loss": 0.9584, "step": 15677 }, { "epoch": 0.4025670776622839, "grad_norm": 0.78515625, "learning_rate": 0.00017243536105482736, "loss": 0.9481, "step": 15678 }, { "epoch": 0.4025927548582057, "grad_norm": 0.77734375, "learning_rate": 0.00017243228323416063, "loss": 0.9629, "step": 15679 }, { "epoch": 0.4026184320541275, "grad_norm": 0.79296875, "learning_rate": 0.0001724292052691429, "loss": 1.0906, "step": 15680 }, { "epoch": 0.40264410925004934, "grad_norm": 0.80078125, "learning_rate": 0.0001724261271597803, "loss": 0.9414, "step": 15681 }, { "epoch": 0.4026697864459712, "grad_norm": 0.8203125, "learning_rate": 0.000172423048906079, "loss": 0.9835, "step": 15682 }, { "epoch": 0.402695463641893, "grad_norm": 0.7109375, "learning_rate": 0.00017241997050804507, "loss": 0.8253, "step": 15683 }, { "epoch": 0.4027211408378148, "grad_norm": 0.796875, "learning_rate": 0.0001724168919656847, "loss": 0.9338, "step": 15684 }, { "epoch": 0.40274681803373663, "grad_norm": 0.765625, "learning_rate": 0.000172413813279004, "loss": 0.9776, "step": 15685 }, { "epoch": 0.40277249522965847, "grad_norm": 0.70703125, "learning_rate": 0.00017241073444800912, "loss": 0.9199, "step": 15686 }, { "epoch": 0.40279817242558025, "grad_norm": 0.7734375, "learning_rate": 0.0001724076554727062, "loss": 0.928, "step": 15687 }, { "epoch": 0.4028238496215021, "grad_norm": 0.71875, "learning_rate": 0.0001724045763531013, "loss": 0.7888, "step": 15688 }, { "epoch": 0.4028495268174239, "grad_norm": 0.7578125, "learning_rate": 0.00017240149708920067, "loss": 0.9506, "step": 15689 }, { "epoch": 0.4028752040133457, "grad_norm": 0.7890625, "learning_rate": 0.00017239841768101042, "loss": 1.0501, "step": 15690 }, { "epoch": 0.40290088120926754, "grad_norm": 0.796875, "learning_rate": 0.0001723953381285366, "loss": 1.0337, "step": 15691 }, { "epoch": 0.4029265584051894, "grad_norm": 0.80078125, "learning_rate": 0.00017239225843178546, "loss": 0.8763, "step": 15692 }, { "epoch": 0.4029522356011112, "grad_norm": 0.81640625, "learning_rate": 0.00017238917859076306, "loss": 1.0357, "step": 15693 }, { "epoch": 0.402977912797033, "grad_norm": 0.83984375, "learning_rate": 0.00017238609860547558, "loss": 0.9793, "step": 15694 }, { "epoch": 0.4030035899929548, "grad_norm": 0.8203125, "learning_rate": 0.00017238301847592914, "loss": 1.0335, "step": 15695 }, { "epoch": 0.40302926718887666, "grad_norm": 0.75390625, "learning_rate": 0.00017237993820212987, "loss": 1.0336, "step": 15696 }, { "epoch": 0.40305494438479844, "grad_norm": 0.75, "learning_rate": 0.00017237685778408393, "loss": 0.9268, "step": 15697 }, { "epoch": 0.4030806215807203, "grad_norm": 0.7734375, "learning_rate": 0.00017237377722179747, "loss": 0.9581, "step": 15698 }, { "epoch": 0.4031062987766421, "grad_norm": 0.734375, "learning_rate": 0.00017237069651527658, "loss": 0.9076, "step": 15699 }, { "epoch": 0.4031319759725639, "grad_norm": 0.8046875, "learning_rate": 0.00017236761566452745, "loss": 1.0126, "step": 15700 }, { "epoch": 0.40315765316848573, "grad_norm": 0.7578125, "learning_rate": 0.0001723645346695562, "loss": 0.9321, "step": 15701 }, { "epoch": 0.40318333036440757, "grad_norm": 0.7578125, "learning_rate": 0.00017236145353036897, "loss": 0.8628, "step": 15702 }, { "epoch": 0.40320900756032935, "grad_norm": 0.7578125, "learning_rate": 0.00017235837224697189, "loss": 0.845, "step": 15703 }, { "epoch": 0.4032346847562512, "grad_norm": 0.77734375, "learning_rate": 0.00017235529081937112, "loss": 0.9438, "step": 15704 }, { "epoch": 0.403260361952173, "grad_norm": 0.91796875, "learning_rate": 0.00017235220924757277, "loss": 0.9769, "step": 15705 }, { "epoch": 0.40328603914809485, "grad_norm": 0.80859375, "learning_rate": 0.000172349127531583, "loss": 0.8626, "step": 15706 }, { "epoch": 0.40331171634401664, "grad_norm": 0.81640625, "learning_rate": 0.000172346045671408, "loss": 0.9231, "step": 15707 }, { "epoch": 0.40333739353993847, "grad_norm": 0.734375, "learning_rate": 0.0001723429636670538, "loss": 0.9602, "step": 15708 }, { "epoch": 0.4033630707358603, "grad_norm": 0.83984375, "learning_rate": 0.00017233988151852664, "loss": 0.9968, "step": 15709 }, { "epoch": 0.4033887479317821, "grad_norm": 1.0078125, "learning_rate": 0.00017233679922583266, "loss": 1.0133, "step": 15710 }, { "epoch": 0.4034144251277039, "grad_norm": 0.828125, "learning_rate": 0.00017233371678897794, "loss": 1.0594, "step": 15711 }, { "epoch": 0.40344010232362576, "grad_norm": 0.82421875, "learning_rate": 0.00017233063420796866, "loss": 1.0415, "step": 15712 }, { "epoch": 0.40346577951954754, "grad_norm": 0.8203125, "learning_rate": 0.00017232755148281097, "loss": 1.0729, "step": 15713 }, { "epoch": 0.4034914567154694, "grad_norm": 0.77734375, "learning_rate": 0.000172324468613511, "loss": 0.9286, "step": 15714 }, { "epoch": 0.4035171339113912, "grad_norm": 1.421875, "learning_rate": 0.0001723213856000749, "loss": 0.9029, "step": 15715 }, { "epoch": 0.40354281110731305, "grad_norm": 0.8046875, "learning_rate": 0.0001723183024425088, "loss": 0.8453, "step": 15716 }, { "epoch": 0.40356848830323483, "grad_norm": 0.83984375, "learning_rate": 0.00017231521914081883, "loss": 0.9712, "step": 15717 }, { "epoch": 0.40359416549915667, "grad_norm": 0.94140625, "learning_rate": 0.0001723121356950112, "loss": 0.877, "step": 15718 }, { "epoch": 0.4036198426950785, "grad_norm": 0.8125, "learning_rate": 0.00017230905210509202, "loss": 0.8676, "step": 15719 }, { "epoch": 0.4036455198910003, "grad_norm": 0.796875, "learning_rate": 0.0001723059683710674, "loss": 0.8935, "step": 15720 }, { "epoch": 0.4036711970869221, "grad_norm": 0.76171875, "learning_rate": 0.00017230288449294353, "loss": 0.8809, "step": 15721 }, { "epoch": 0.40369687428284395, "grad_norm": 0.71875, "learning_rate": 0.00017229980047072657, "loss": 0.862, "step": 15722 }, { "epoch": 0.40372255147876573, "grad_norm": 0.7578125, "learning_rate": 0.0001722967163044226, "loss": 1.0688, "step": 15723 }, { "epoch": 0.40374822867468757, "grad_norm": 1.0390625, "learning_rate": 0.00017229363199403782, "loss": 0.9263, "step": 15724 }, { "epoch": 0.4037739058706094, "grad_norm": 0.8359375, "learning_rate": 0.00017229054753957837, "loss": 0.9377, "step": 15725 }, { "epoch": 0.40379958306653124, "grad_norm": 0.765625, "learning_rate": 0.00017228746294105037, "loss": 0.9131, "step": 15726 }, { "epoch": 0.403825260262453, "grad_norm": 0.74609375, "learning_rate": 0.00017228437819846, "loss": 0.9571, "step": 15727 }, { "epoch": 0.40385093745837486, "grad_norm": 0.8359375, "learning_rate": 0.00017228129331181338, "loss": 1.0129, "step": 15728 }, { "epoch": 0.4038766146542967, "grad_norm": 0.79296875, "learning_rate": 0.00017227820828111668, "loss": 0.9027, "step": 15729 }, { "epoch": 0.4039022918502185, "grad_norm": 0.76953125, "learning_rate": 0.00017227512310637603, "loss": 0.9413, "step": 15730 }, { "epoch": 0.4039279690461403, "grad_norm": 0.7421875, "learning_rate": 0.00017227203778759763, "loss": 0.9821, "step": 15731 }, { "epoch": 0.40395364624206215, "grad_norm": 0.7109375, "learning_rate": 0.00017226895232478755, "loss": 0.8555, "step": 15732 }, { "epoch": 0.40397932343798393, "grad_norm": 0.7421875, "learning_rate": 0.00017226586671795197, "loss": 0.9981, "step": 15733 }, { "epoch": 0.40400500063390576, "grad_norm": 0.8203125, "learning_rate": 0.00017226278096709705, "loss": 0.9741, "step": 15734 }, { "epoch": 0.4040306778298276, "grad_norm": 0.76953125, "learning_rate": 0.00017225969507222896, "loss": 0.9261, "step": 15735 }, { "epoch": 0.40405635502574944, "grad_norm": 1.296875, "learning_rate": 0.0001722566090333538, "loss": 0.8266, "step": 15736 }, { "epoch": 0.4040820322216712, "grad_norm": 0.80859375, "learning_rate": 0.00017225352285047776, "loss": 0.9208, "step": 15737 }, { "epoch": 0.40410770941759305, "grad_norm": 0.734375, "learning_rate": 0.00017225043652360696, "loss": 0.9068, "step": 15738 }, { "epoch": 0.4041333866135149, "grad_norm": 0.7578125, "learning_rate": 0.00017224735005274758, "loss": 0.9755, "step": 15739 }, { "epoch": 0.40415906380943667, "grad_norm": 0.74609375, "learning_rate": 0.00017224426343790574, "loss": 0.8908, "step": 15740 }, { "epoch": 0.4041847410053585, "grad_norm": 0.78125, "learning_rate": 0.0001722411766790876, "loss": 0.905, "step": 15741 }, { "epoch": 0.40421041820128034, "grad_norm": 0.6875, "learning_rate": 0.00017223808977629936, "loss": 0.8528, "step": 15742 }, { "epoch": 0.4042360953972021, "grad_norm": 0.78515625, "learning_rate": 0.0001722350027295471, "loss": 0.8118, "step": 15743 }, { "epoch": 0.40426177259312396, "grad_norm": 0.78125, "learning_rate": 0.00017223191553883703, "loss": 0.8353, "step": 15744 }, { "epoch": 0.4042874497890458, "grad_norm": 0.76171875, "learning_rate": 0.00017222882820417526, "loss": 0.9918, "step": 15745 }, { "epoch": 0.40431312698496763, "grad_norm": 0.76953125, "learning_rate": 0.00017222574072556796, "loss": 0.9588, "step": 15746 }, { "epoch": 0.4043388041808894, "grad_norm": 0.85546875, "learning_rate": 0.0001722226531030213, "loss": 0.9452, "step": 15747 }, { "epoch": 0.40436448137681125, "grad_norm": 0.77734375, "learning_rate": 0.0001722195653365414, "loss": 0.9191, "step": 15748 }, { "epoch": 0.4043901585727331, "grad_norm": 0.8359375, "learning_rate": 0.00017221647742613443, "loss": 0.8525, "step": 15749 }, { "epoch": 0.40441583576865486, "grad_norm": 0.8125, "learning_rate": 0.00017221338937180653, "loss": 0.9428, "step": 15750 }, { "epoch": 0.4044415129645767, "grad_norm": 0.80078125, "learning_rate": 0.0001722103011735639, "loss": 0.9126, "step": 15751 }, { "epoch": 0.40446719016049854, "grad_norm": 0.765625, "learning_rate": 0.00017220721283141266, "loss": 0.9734, "step": 15752 }, { "epoch": 0.4044928673564203, "grad_norm": 0.73046875, "learning_rate": 0.00017220412434535893, "loss": 0.9365, "step": 15753 }, { "epoch": 0.40451854455234215, "grad_norm": 0.7578125, "learning_rate": 0.00017220103571540896, "loss": 1.0596, "step": 15754 }, { "epoch": 0.404544221748264, "grad_norm": 0.7734375, "learning_rate": 0.0001721979469415688, "loss": 0.9646, "step": 15755 }, { "epoch": 0.4045698989441858, "grad_norm": 0.91015625, "learning_rate": 0.00017219485802384465, "loss": 0.9742, "step": 15756 }, { "epoch": 0.4045955761401076, "grad_norm": 0.80078125, "learning_rate": 0.0001721917689622427, "loss": 0.9418, "step": 15757 }, { "epoch": 0.40462125333602944, "grad_norm": 0.72265625, "learning_rate": 0.00017218867975676906, "loss": 0.9088, "step": 15758 }, { "epoch": 0.4046469305319513, "grad_norm": 0.82421875, "learning_rate": 0.00017218559040742991, "loss": 0.9715, "step": 15759 }, { "epoch": 0.40467260772787306, "grad_norm": 0.76953125, "learning_rate": 0.0001721825009142314, "loss": 0.9216, "step": 15760 }, { "epoch": 0.4046982849237949, "grad_norm": 0.78515625, "learning_rate": 0.00017217941127717966, "loss": 0.9894, "step": 15761 }, { "epoch": 0.40472396211971673, "grad_norm": 0.765625, "learning_rate": 0.00017217632149628092, "loss": 0.9293, "step": 15762 }, { "epoch": 0.4047496393156385, "grad_norm": 0.796875, "learning_rate": 0.00017217323157154123, "loss": 0.9103, "step": 15763 }, { "epoch": 0.40477531651156035, "grad_norm": 0.76171875, "learning_rate": 0.00017217014150296687, "loss": 0.9318, "step": 15764 }, { "epoch": 0.4048009937074822, "grad_norm": 0.80078125, "learning_rate": 0.0001721670512905639, "loss": 0.8332, "step": 15765 }, { "epoch": 0.404826670903404, "grad_norm": 0.76171875, "learning_rate": 0.00017216396093433853, "loss": 0.9376, "step": 15766 }, { "epoch": 0.4048523480993258, "grad_norm": 0.734375, "learning_rate": 0.00017216087043429688, "loss": 0.8988, "step": 15767 }, { "epoch": 0.40487802529524763, "grad_norm": 0.72265625, "learning_rate": 0.00017215777979044517, "loss": 0.8532, "step": 15768 }, { "epoch": 0.40490370249116947, "grad_norm": 0.78125, "learning_rate": 0.0001721546890027895, "loss": 0.875, "step": 15769 }, { "epoch": 0.40492937968709125, "grad_norm": 0.75390625, "learning_rate": 0.00017215159807133605, "loss": 0.902, "step": 15770 }, { "epoch": 0.4049550568830131, "grad_norm": 0.82421875, "learning_rate": 0.00017214850699609103, "loss": 1.0208, "step": 15771 }, { "epoch": 0.4049807340789349, "grad_norm": 0.77734375, "learning_rate": 0.0001721454157770605, "loss": 0.9756, "step": 15772 }, { "epoch": 0.4050064112748567, "grad_norm": 0.7421875, "learning_rate": 0.0001721423244142507, "loss": 0.9599, "step": 15773 }, { "epoch": 0.40503208847077854, "grad_norm": 0.7578125, "learning_rate": 0.00017213923290766773, "loss": 1.0642, "step": 15774 }, { "epoch": 0.4050577656667004, "grad_norm": 0.796875, "learning_rate": 0.0001721361412573178, "loss": 0.9136, "step": 15775 }, { "epoch": 0.4050834428626222, "grad_norm": 0.76171875, "learning_rate": 0.00017213304946320706, "loss": 0.7992, "step": 15776 }, { "epoch": 0.405109120058544, "grad_norm": 0.75390625, "learning_rate": 0.00017212995752534165, "loss": 0.8983, "step": 15777 }, { "epoch": 0.40513479725446583, "grad_norm": 0.859375, "learning_rate": 0.00017212686544372782, "loss": 1.0778, "step": 15778 }, { "epoch": 0.40516047445038766, "grad_norm": 0.85546875, "learning_rate": 0.0001721237732183716, "loss": 0.9907, "step": 15779 }, { "epoch": 0.40518615164630944, "grad_norm": 0.8359375, "learning_rate": 0.00017212068084927922, "loss": 0.9907, "step": 15780 }, { "epoch": 0.4052118288422313, "grad_norm": 0.765625, "learning_rate": 0.00017211758833645684, "loss": 0.9038, "step": 15781 }, { "epoch": 0.4052375060381531, "grad_norm": 0.89453125, "learning_rate": 0.00017211449567991065, "loss": 0.9566, "step": 15782 }, { "epoch": 0.4052631832340749, "grad_norm": 0.72265625, "learning_rate": 0.00017211140287964676, "loss": 0.8946, "step": 15783 }, { "epoch": 0.40528886042999673, "grad_norm": 0.8046875, "learning_rate": 0.00017210830993567134, "loss": 0.8855, "step": 15784 }, { "epoch": 0.40531453762591857, "grad_norm": 0.83203125, "learning_rate": 0.00017210521684799058, "loss": 0.8676, "step": 15785 }, { "epoch": 0.4053402148218404, "grad_norm": 0.7890625, "learning_rate": 0.00017210212361661067, "loss": 0.9823, "step": 15786 }, { "epoch": 0.4053658920177622, "grad_norm": 0.8359375, "learning_rate": 0.0001720990302415377, "loss": 0.967, "step": 15787 }, { "epoch": 0.405391569213684, "grad_norm": 0.79296875, "learning_rate": 0.00017209593672277792, "loss": 1.0506, "step": 15788 }, { "epoch": 0.40541724640960586, "grad_norm": 0.81640625, "learning_rate": 0.0001720928430603374, "loss": 0.9466, "step": 15789 }, { "epoch": 0.40544292360552764, "grad_norm": 0.76953125, "learning_rate": 0.00017208974925422238, "loss": 0.9758, "step": 15790 }, { "epoch": 0.4054686008014495, "grad_norm": 0.76171875, "learning_rate": 0.000172086655304439, "loss": 0.7924, "step": 15791 }, { "epoch": 0.4054942779973713, "grad_norm": 0.73828125, "learning_rate": 0.00017208356121099343, "loss": 0.8854, "step": 15792 }, { "epoch": 0.4055199551932931, "grad_norm": 0.76171875, "learning_rate": 0.00017208046697389184, "loss": 0.875, "step": 15793 }, { "epoch": 0.4055456323892149, "grad_norm": 0.76953125, "learning_rate": 0.0001720773725931404, "loss": 1.0175, "step": 15794 }, { "epoch": 0.40557130958513676, "grad_norm": 0.78125, "learning_rate": 0.00017207427806874523, "loss": 0.8875, "step": 15795 }, { "epoch": 0.4055969867810586, "grad_norm": 0.76953125, "learning_rate": 0.00017207118340071254, "loss": 0.9576, "step": 15796 }, { "epoch": 0.4056226639769804, "grad_norm": 0.84375, "learning_rate": 0.0001720680885890485, "loss": 0.8952, "step": 15797 }, { "epoch": 0.4056483411729022, "grad_norm": 0.74609375, "learning_rate": 0.00017206499363375927, "loss": 0.8454, "step": 15798 }, { "epoch": 0.40567401836882405, "grad_norm": 0.796875, "learning_rate": 0.00017206189853485103, "loss": 1.0158, "step": 15799 }, { "epoch": 0.40569969556474583, "grad_norm": 0.75390625, "learning_rate": 0.00017205880329232992, "loss": 0.8899, "step": 15800 }, { "epoch": 0.40572537276066767, "grad_norm": 0.7265625, "learning_rate": 0.00017205570790620214, "loss": 1.0885, "step": 15801 }, { "epoch": 0.4057510499565895, "grad_norm": 0.75390625, "learning_rate": 0.00017205261237647384, "loss": 0.882, "step": 15802 }, { "epoch": 0.4057767271525113, "grad_norm": 0.72265625, "learning_rate": 0.0001720495167031512, "loss": 0.8785, "step": 15803 }, { "epoch": 0.4058024043484331, "grad_norm": 0.8359375, "learning_rate": 0.00017204642088624036, "loss": 0.9354, "step": 15804 }, { "epoch": 0.40582808154435496, "grad_norm": 0.7265625, "learning_rate": 0.0001720433249257475, "loss": 0.8854, "step": 15805 }, { "epoch": 0.4058537587402768, "grad_norm": 0.82421875, "learning_rate": 0.00017204022882167882, "loss": 1.118, "step": 15806 }, { "epoch": 0.4058794359361986, "grad_norm": 0.7734375, "learning_rate": 0.00017203713257404047, "loss": 0.9112, "step": 15807 }, { "epoch": 0.4059051131321204, "grad_norm": 0.8671875, "learning_rate": 0.00017203403618283865, "loss": 1.0127, "step": 15808 }, { "epoch": 0.40593079032804225, "grad_norm": 0.75, "learning_rate": 0.00017203093964807947, "loss": 0.941, "step": 15809 }, { "epoch": 0.405956467523964, "grad_norm": 0.828125, "learning_rate": 0.00017202784296976913, "loss": 0.9779, "step": 15810 }, { "epoch": 0.40598214471988586, "grad_norm": 0.703125, "learning_rate": 0.0001720247461479138, "loss": 0.9295, "step": 15811 }, { "epoch": 0.4060078219158077, "grad_norm": 0.7734375, "learning_rate": 0.00017202164918251968, "loss": 0.9041, "step": 15812 }, { "epoch": 0.4060334991117295, "grad_norm": 0.8203125, "learning_rate": 0.00017201855207359291, "loss": 0.9254, "step": 15813 }, { "epoch": 0.4060591763076513, "grad_norm": 0.74609375, "learning_rate": 0.00017201545482113968, "loss": 0.862, "step": 15814 }, { "epoch": 0.40608485350357315, "grad_norm": 0.8515625, "learning_rate": 0.00017201235742516616, "loss": 0.9454, "step": 15815 }, { "epoch": 0.406110530699495, "grad_norm": 0.83203125, "learning_rate": 0.00017200925988567847, "loss": 0.933, "step": 15816 }, { "epoch": 0.40613620789541677, "grad_norm": 0.7890625, "learning_rate": 0.0001720061622026829, "loss": 0.8295, "step": 15817 }, { "epoch": 0.4061618850913386, "grad_norm": 0.8203125, "learning_rate": 0.0001720030643761855, "loss": 0.8936, "step": 15818 }, { "epoch": 0.40618756228726044, "grad_norm": 0.81640625, "learning_rate": 0.0001719999664061925, "loss": 0.9443, "step": 15819 }, { "epoch": 0.4062132394831822, "grad_norm": 0.734375, "learning_rate": 0.00017199686829271008, "loss": 0.8709, "step": 15820 }, { "epoch": 0.40623891667910406, "grad_norm": 0.83984375, "learning_rate": 0.00017199377003574443, "loss": 0.9583, "step": 15821 }, { "epoch": 0.4062645938750259, "grad_norm": 0.77734375, "learning_rate": 0.00017199067163530164, "loss": 0.9069, "step": 15822 }, { "epoch": 0.4062902710709477, "grad_norm": 0.7578125, "learning_rate": 0.000171987573091388, "loss": 0.9426, "step": 15823 }, { "epoch": 0.4063159482668695, "grad_norm": 0.80078125, "learning_rate": 0.00017198447440400962, "loss": 1.1052, "step": 15824 }, { "epoch": 0.40634162546279134, "grad_norm": 0.7578125, "learning_rate": 0.00017198137557317267, "loss": 0.9408, "step": 15825 }, { "epoch": 0.4063673026587132, "grad_norm": 0.7421875, "learning_rate": 0.00017197827659888332, "loss": 0.9551, "step": 15826 }, { "epoch": 0.40639297985463496, "grad_norm": 0.828125, "learning_rate": 0.00017197517748114783, "loss": 1.0029, "step": 15827 }, { "epoch": 0.4064186570505568, "grad_norm": 0.71875, "learning_rate": 0.00017197207821997225, "loss": 0.9832, "step": 15828 }, { "epoch": 0.40644433424647863, "grad_norm": 0.80859375, "learning_rate": 0.00017196897881536285, "loss": 0.8443, "step": 15829 }, { "epoch": 0.4064700114424004, "grad_norm": 0.81640625, "learning_rate": 0.00017196587926732578, "loss": 0.967, "step": 15830 }, { "epoch": 0.40649568863832225, "grad_norm": 0.91796875, "learning_rate": 0.0001719627795758672, "loss": 0.9945, "step": 15831 }, { "epoch": 0.4065213658342441, "grad_norm": 0.7578125, "learning_rate": 0.0001719596797409933, "loss": 0.8873, "step": 15832 }, { "epoch": 0.40654704303016587, "grad_norm": 0.77734375, "learning_rate": 0.00017195657976271024, "loss": 0.9542, "step": 15833 }, { "epoch": 0.4065727202260877, "grad_norm": 0.69140625, "learning_rate": 0.00017195347964102427, "loss": 0.8506, "step": 15834 }, { "epoch": 0.40659839742200954, "grad_norm": 0.796875, "learning_rate": 0.00017195037937594147, "loss": 0.9113, "step": 15835 }, { "epoch": 0.4066240746179314, "grad_norm": 0.73046875, "learning_rate": 0.0001719472789674681, "loss": 0.9697, "step": 15836 }, { "epoch": 0.40664975181385316, "grad_norm": 0.75, "learning_rate": 0.00017194417841561027, "loss": 0.8806, "step": 15837 }, { "epoch": 0.406675429009775, "grad_norm": 0.78125, "learning_rate": 0.0001719410777203742, "loss": 0.9362, "step": 15838 }, { "epoch": 0.4067011062056968, "grad_norm": 0.7734375, "learning_rate": 0.00017193797688176606, "loss": 0.8286, "step": 15839 }, { "epoch": 0.4067267834016186, "grad_norm": 0.828125, "learning_rate": 0.000171934875899792, "loss": 0.981, "step": 15840 }, { "epoch": 0.40675246059754044, "grad_norm": 0.796875, "learning_rate": 0.0001719317747744583, "loss": 0.8758, "step": 15841 }, { "epoch": 0.4067781377934623, "grad_norm": 0.82421875, "learning_rate": 0.00017192867350577102, "loss": 1.0148, "step": 15842 }, { "epoch": 0.40680381498938406, "grad_norm": 0.8359375, "learning_rate": 0.00017192557209373642, "loss": 0.8627, "step": 15843 }, { "epoch": 0.4068294921853059, "grad_norm": 0.8359375, "learning_rate": 0.0001719224705383606, "loss": 1.0877, "step": 15844 }, { "epoch": 0.40685516938122773, "grad_norm": 0.8203125, "learning_rate": 0.00017191936883964983, "loss": 0.8963, "step": 15845 }, { "epoch": 0.40688084657714957, "grad_norm": 0.8046875, "learning_rate": 0.00017191626699761026, "loss": 0.9137, "step": 15846 }, { "epoch": 0.40690652377307135, "grad_norm": 0.70703125, "learning_rate": 0.00017191316501224805, "loss": 0.9078, "step": 15847 }, { "epoch": 0.4069322009689932, "grad_norm": 0.79296875, "learning_rate": 0.00017191006288356942, "loss": 0.9435, "step": 15848 }, { "epoch": 0.406957878164915, "grad_norm": 0.98046875, "learning_rate": 0.0001719069606115805, "loss": 0.8673, "step": 15849 }, { "epoch": 0.4069835553608368, "grad_norm": 0.7421875, "learning_rate": 0.00017190385819628752, "loss": 0.8982, "step": 15850 }, { "epoch": 0.40700923255675864, "grad_norm": 0.83203125, "learning_rate": 0.00017190075563769662, "loss": 1.0634, "step": 15851 }, { "epoch": 0.4070349097526805, "grad_norm": 0.75, "learning_rate": 0.00017189765293581407, "loss": 0.7958, "step": 15852 }, { "epoch": 0.40706058694860225, "grad_norm": 0.7734375, "learning_rate": 0.00017189455009064596, "loss": 0.9332, "step": 15853 }, { "epoch": 0.4070862641445241, "grad_norm": 0.80078125, "learning_rate": 0.0001718914471021985, "loss": 0.9049, "step": 15854 }, { "epoch": 0.4071119413404459, "grad_norm": 0.73046875, "learning_rate": 0.00017188834397047787, "loss": 0.881, "step": 15855 }, { "epoch": 0.40713761853636776, "grad_norm": 0.78125, "learning_rate": 0.00017188524069549027, "loss": 0.9077, "step": 15856 }, { "epoch": 0.40716329573228954, "grad_norm": 0.6875, "learning_rate": 0.0001718821372772419, "loss": 0.7138, "step": 15857 }, { "epoch": 0.4071889729282114, "grad_norm": 0.7890625, "learning_rate": 0.0001718790337157389, "loss": 0.9776, "step": 15858 }, { "epoch": 0.4072146501241332, "grad_norm": 0.78515625, "learning_rate": 0.00017187593001098748, "loss": 0.9251, "step": 15859 }, { "epoch": 0.407240327320055, "grad_norm": 0.859375, "learning_rate": 0.00017187282616299384, "loss": 0.8868, "step": 15860 }, { "epoch": 0.40726600451597683, "grad_norm": 0.71484375, "learning_rate": 0.00017186972217176413, "loss": 0.8405, "step": 15861 }, { "epoch": 0.40729168171189867, "grad_norm": 0.83984375, "learning_rate": 0.00017186661803730456, "loss": 0.917, "step": 15862 }, { "epoch": 0.40731735890782045, "grad_norm": 0.86328125, "learning_rate": 0.00017186351375962132, "loss": 0.8573, "step": 15863 }, { "epoch": 0.4073430361037423, "grad_norm": 0.80078125, "learning_rate": 0.00017186040933872058, "loss": 0.8753, "step": 15864 }, { "epoch": 0.4073687132996641, "grad_norm": 0.8046875, "learning_rate": 0.00017185730477460855, "loss": 0.8991, "step": 15865 }, { "epoch": 0.40739439049558596, "grad_norm": 0.7421875, "learning_rate": 0.00017185420006729139, "loss": 0.9057, "step": 15866 }, { "epoch": 0.40742006769150774, "grad_norm": 0.7890625, "learning_rate": 0.0001718510952167753, "loss": 0.7857, "step": 15867 }, { "epoch": 0.4074457448874296, "grad_norm": 0.8046875, "learning_rate": 0.00017184799022306648, "loss": 0.936, "step": 15868 }, { "epoch": 0.4074714220833514, "grad_norm": 0.88671875, "learning_rate": 0.0001718448850861711, "loss": 0.912, "step": 15869 }, { "epoch": 0.4074970992792732, "grad_norm": 0.7890625, "learning_rate": 0.00017184177980609534, "loss": 0.9067, "step": 15870 }, { "epoch": 0.407522776475195, "grad_norm": 0.7890625, "learning_rate": 0.00017183867438284541, "loss": 0.7752, "step": 15871 }, { "epoch": 0.40754845367111686, "grad_norm": 0.83203125, "learning_rate": 0.00017183556881642748, "loss": 0.907, "step": 15872 }, { "epoch": 0.40757413086703864, "grad_norm": 0.80078125, "learning_rate": 0.0001718324631068478, "loss": 0.8925, "step": 15873 }, { "epoch": 0.4075998080629605, "grad_norm": 0.75, "learning_rate": 0.00017182935725411244, "loss": 0.9471, "step": 15874 }, { "epoch": 0.4076254852588823, "grad_norm": 0.8359375, "learning_rate": 0.0001718262512582277, "loss": 0.9788, "step": 15875 }, { "epoch": 0.40765116245480415, "grad_norm": 0.76953125, "learning_rate": 0.00017182314511919974, "loss": 1.0045, "step": 15876 }, { "epoch": 0.40767683965072593, "grad_norm": 0.875, "learning_rate": 0.0001718200388370347, "loss": 0.9555, "step": 15877 }, { "epoch": 0.40770251684664777, "grad_norm": 0.77734375, "learning_rate": 0.0001718169324117388, "loss": 0.8586, "step": 15878 }, { "epoch": 0.4077281940425696, "grad_norm": 0.69140625, "learning_rate": 0.0001718138258433183, "loss": 0.847, "step": 15879 }, { "epoch": 0.4077538712384914, "grad_norm": 0.82421875, "learning_rate": 0.0001718107191317793, "loss": 0.9472, "step": 15880 }, { "epoch": 0.4077795484344132, "grad_norm": 0.76171875, "learning_rate": 0.00017180761227712804, "loss": 0.9156, "step": 15881 }, { "epoch": 0.40780522563033506, "grad_norm": 0.72265625, "learning_rate": 0.00017180450527937068, "loss": 0.8057, "step": 15882 }, { "epoch": 0.40783090282625684, "grad_norm": 0.9765625, "learning_rate": 0.00017180139813851344, "loss": 1.0031, "step": 15883 }, { "epoch": 0.40785658002217867, "grad_norm": 0.83203125, "learning_rate": 0.00017179829085456246, "loss": 0.8952, "step": 15884 }, { "epoch": 0.4078822572181005, "grad_norm": 0.92578125, "learning_rate": 0.00017179518342752404, "loss": 0.8712, "step": 15885 }, { "epoch": 0.40790793441402234, "grad_norm": 0.73046875, "learning_rate": 0.00017179207585740425, "loss": 0.8456, "step": 15886 }, { "epoch": 0.4079336116099441, "grad_norm": 0.7734375, "learning_rate": 0.00017178896814420936, "loss": 0.9512, "step": 15887 }, { "epoch": 0.40795928880586596, "grad_norm": 0.76953125, "learning_rate": 0.00017178586028794552, "loss": 0.891, "step": 15888 }, { "epoch": 0.4079849660017878, "grad_norm": 0.8203125, "learning_rate": 0.00017178275228861898, "loss": 0.8961, "step": 15889 }, { "epoch": 0.4080106431977096, "grad_norm": 0.765625, "learning_rate": 0.00017177964414623586, "loss": 1.0799, "step": 15890 }, { "epoch": 0.4080363203936314, "grad_norm": 0.77734375, "learning_rate": 0.00017177653586080243, "loss": 1.0366, "step": 15891 }, { "epoch": 0.40806199758955325, "grad_norm": 0.79296875, "learning_rate": 0.00017177342743232481, "loss": 0.9678, "step": 15892 }, { "epoch": 0.40808767478547503, "grad_norm": 0.7265625, "learning_rate": 0.00017177031886080925, "loss": 0.8674, "step": 15893 }, { "epoch": 0.40811335198139687, "grad_norm": 0.72265625, "learning_rate": 0.0001717672101462619, "loss": 0.9833, "step": 15894 }, { "epoch": 0.4081390291773187, "grad_norm": 0.703125, "learning_rate": 0.00017176410128868903, "loss": 0.8281, "step": 15895 }, { "epoch": 0.40816470637324054, "grad_norm": 0.703125, "learning_rate": 0.0001717609922880968, "loss": 0.8543, "step": 15896 }, { "epoch": 0.4081903835691623, "grad_norm": 0.80078125, "learning_rate": 0.00017175788314449135, "loss": 0.8541, "step": 15897 }, { "epoch": 0.40821606076508415, "grad_norm": 0.78515625, "learning_rate": 0.00017175477385787894, "loss": 0.9245, "step": 15898 }, { "epoch": 0.408241737961006, "grad_norm": 0.79296875, "learning_rate": 0.00017175166442826573, "loss": 0.8987, "step": 15899 }, { "epoch": 0.40826741515692777, "grad_norm": 0.8125, "learning_rate": 0.00017174855485565794, "loss": 0.9413, "step": 15900 }, { "epoch": 0.4082930923528496, "grad_norm": 0.78515625, "learning_rate": 0.00017174544514006178, "loss": 1.0069, "step": 15901 }, { "epoch": 0.40831876954877144, "grad_norm": 0.77734375, "learning_rate": 0.00017174233528148341, "loss": 1.048, "step": 15902 }, { "epoch": 0.4083444467446932, "grad_norm": 0.7109375, "learning_rate": 0.00017173922527992906, "loss": 0.9029, "step": 15903 }, { "epoch": 0.40837012394061506, "grad_norm": 0.79296875, "learning_rate": 0.0001717361151354049, "loss": 0.8894, "step": 15904 }, { "epoch": 0.4083958011365369, "grad_norm": 0.81640625, "learning_rate": 0.00017173300484791716, "loss": 0.8021, "step": 15905 }, { "epoch": 0.4084214783324587, "grad_norm": 0.7578125, "learning_rate": 0.000171729894417472, "loss": 0.9386, "step": 15906 }, { "epoch": 0.4084471555283805, "grad_norm": 0.78515625, "learning_rate": 0.00017172678384407567, "loss": 0.9734, "step": 15907 }, { "epoch": 0.40847283272430235, "grad_norm": 0.75390625, "learning_rate": 0.0001717236731277343, "loss": 0.877, "step": 15908 }, { "epoch": 0.4084985099202242, "grad_norm": 0.76953125, "learning_rate": 0.00017172056226845414, "loss": 1.0307, "step": 15909 }, { "epoch": 0.40852418711614596, "grad_norm": 0.78125, "learning_rate": 0.00017171745126624138, "loss": 0.8003, "step": 15910 }, { "epoch": 0.4085498643120678, "grad_norm": 0.8359375, "learning_rate": 0.00017171434012110224, "loss": 1.0392, "step": 15911 }, { "epoch": 0.40857554150798964, "grad_norm": 0.73828125, "learning_rate": 0.00017171122883304286, "loss": 0.8729, "step": 15912 }, { "epoch": 0.4086012187039114, "grad_norm": 0.74609375, "learning_rate": 0.0001717081174020695, "loss": 0.9868, "step": 15913 }, { "epoch": 0.40862689589983325, "grad_norm": 0.703125, "learning_rate": 0.00017170500582818832, "loss": 0.8525, "step": 15914 }, { "epoch": 0.4086525730957551, "grad_norm": 0.765625, "learning_rate": 0.00017170189411140562, "loss": 0.9961, "step": 15915 }, { "epoch": 0.40867825029167687, "grad_norm": 0.765625, "learning_rate": 0.00017169878225172745, "loss": 0.9094, "step": 15916 }, { "epoch": 0.4087039274875987, "grad_norm": 0.8828125, "learning_rate": 0.00017169567024916008, "loss": 0.9998, "step": 15917 }, { "epoch": 0.40872960468352054, "grad_norm": 0.7890625, "learning_rate": 0.0001716925581037097, "loss": 0.8888, "step": 15918 }, { "epoch": 0.4087552818794424, "grad_norm": 0.82421875, "learning_rate": 0.0001716894458153826, "loss": 0.8504, "step": 15919 }, { "epoch": 0.40878095907536416, "grad_norm": 0.77734375, "learning_rate": 0.00017168633338418484, "loss": 0.9166, "step": 15920 }, { "epoch": 0.408806636271286, "grad_norm": 0.80859375, "learning_rate": 0.00017168322081012274, "loss": 0.8538, "step": 15921 }, { "epoch": 0.40883231346720783, "grad_norm": 0.82421875, "learning_rate": 0.00017168010809320242, "loss": 1.0419, "step": 15922 }, { "epoch": 0.4088579906631296, "grad_norm": 0.90234375, "learning_rate": 0.00017167699523343015, "loss": 0.8829, "step": 15923 }, { "epoch": 0.40888366785905145, "grad_norm": 0.7890625, "learning_rate": 0.0001716738822308121, "loss": 0.9302, "step": 15924 }, { "epoch": 0.4089093450549733, "grad_norm": 0.69140625, "learning_rate": 0.00017167076908535447, "loss": 0.939, "step": 15925 }, { "epoch": 0.40893502225089506, "grad_norm": 0.79296875, "learning_rate": 0.00017166765579706346, "loss": 0.9732, "step": 15926 }, { "epoch": 0.4089606994468169, "grad_norm": 0.72265625, "learning_rate": 0.00017166454236594526, "loss": 0.9364, "step": 15927 }, { "epoch": 0.40898637664273874, "grad_norm": 0.87890625, "learning_rate": 0.00017166142879200614, "loss": 0.8265, "step": 15928 }, { "epoch": 0.40901205383866057, "grad_norm": 0.796875, "learning_rate": 0.0001716583150752523, "loss": 0.9478, "step": 15929 }, { "epoch": 0.40903773103458235, "grad_norm": 0.73046875, "learning_rate": 0.00017165520121568983, "loss": 0.877, "step": 15930 }, { "epoch": 0.4090634082305042, "grad_norm": 0.75390625, "learning_rate": 0.00017165208721332505, "loss": 0.9316, "step": 15931 }, { "epoch": 0.409089085426426, "grad_norm": 0.83203125, "learning_rate": 0.00017164897306816415, "loss": 0.9395, "step": 15932 }, { "epoch": 0.4091147626223478, "grad_norm": 0.7421875, "learning_rate": 0.0001716458587802133, "loss": 0.8071, "step": 15933 }, { "epoch": 0.40914043981826964, "grad_norm": 0.71484375, "learning_rate": 0.00017164274434947876, "loss": 0.8557, "step": 15934 }, { "epoch": 0.4091661170141915, "grad_norm": 0.8046875, "learning_rate": 0.00017163962977596666, "loss": 0.8933, "step": 15935 }, { "epoch": 0.40919179421011326, "grad_norm": 0.83203125, "learning_rate": 0.00017163651505968327, "loss": 0.9573, "step": 15936 }, { "epoch": 0.4092174714060351, "grad_norm": 0.765625, "learning_rate": 0.00017163340020063479, "loss": 1.0228, "step": 15937 }, { "epoch": 0.40924314860195693, "grad_norm": 0.78515625, "learning_rate": 0.00017163028519882737, "loss": 0.8458, "step": 15938 }, { "epoch": 0.40926882579787877, "grad_norm": 0.75390625, "learning_rate": 0.0001716271700542673, "loss": 0.9461, "step": 15939 }, { "epoch": 0.40929450299380055, "grad_norm": 0.77734375, "learning_rate": 0.00017162405476696074, "loss": 0.831, "step": 15940 }, { "epoch": 0.4093201801897224, "grad_norm": 0.80859375, "learning_rate": 0.0001716209393369139, "loss": 0.8939, "step": 15941 }, { "epoch": 0.4093458573856442, "grad_norm": 0.8125, "learning_rate": 0.000171617823764133, "loss": 0.9252, "step": 15942 }, { "epoch": 0.409371534581566, "grad_norm": 0.83203125, "learning_rate": 0.00017161470804862426, "loss": 0.9608, "step": 15943 }, { "epoch": 0.40939721177748783, "grad_norm": 0.796875, "learning_rate": 0.00017161159219039384, "loss": 0.9336, "step": 15944 }, { "epoch": 0.40942288897340967, "grad_norm": 0.74609375, "learning_rate": 0.00017160847618944802, "loss": 0.8631, "step": 15945 }, { "epoch": 0.40944856616933145, "grad_norm": 0.8046875, "learning_rate": 0.00017160536004579298, "loss": 0.8029, "step": 15946 }, { "epoch": 0.4094742433652533, "grad_norm": 0.70703125, "learning_rate": 0.0001716022437594349, "loss": 0.7539, "step": 15947 }, { "epoch": 0.4094999205611751, "grad_norm": 0.734375, "learning_rate": 0.00017159912733038003, "loss": 0.9335, "step": 15948 }, { "epoch": 0.40952559775709696, "grad_norm": 0.7421875, "learning_rate": 0.00017159601075863456, "loss": 0.901, "step": 15949 }, { "epoch": 0.40955127495301874, "grad_norm": 1.1171875, "learning_rate": 0.00017159289404420472, "loss": 0.912, "step": 15950 }, { "epoch": 0.4095769521489406, "grad_norm": 0.8203125, "learning_rate": 0.00017158977718709667, "loss": 0.8474, "step": 15951 }, { "epoch": 0.4096026293448624, "grad_norm": 0.78515625, "learning_rate": 0.0001715866601873167, "loss": 1.0356, "step": 15952 }, { "epoch": 0.4096283065407842, "grad_norm": 0.75, "learning_rate": 0.00017158354304487098, "loss": 0.8548, "step": 15953 }, { "epoch": 0.40965398373670603, "grad_norm": 0.80078125, "learning_rate": 0.00017158042575976575, "loss": 0.9181, "step": 15954 }, { "epoch": 0.40967966093262786, "grad_norm": 0.76171875, "learning_rate": 0.00017157730833200713, "loss": 0.9183, "step": 15955 }, { "epoch": 0.40970533812854965, "grad_norm": 0.796875, "learning_rate": 0.00017157419076160144, "loss": 1.0153, "step": 15956 }, { "epoch": 0.4097310153244715, "grad_norm": 0.78515625, "learning_rate": 0.00017157107304855486, "loss": 0.7823, "step": 15957 }, { "epoch": 0.4097566925203933, "grad_norm": 0.77734375, "learning_rate": 0.00017156795519287357, "loss": 0.9495, "step": 15958 }, { "epoch": 0.40978236971631515, "grad_norm": 0.8203125, "learning_rate": 0.00017156483719456384, "loss": 1.0514, "step": 15959 }, { "epoch": 0.40980804691223693, "grad_norm": 0.80859375, "learning_rate": 0.00017156171905363183, "loss": 0.8749, "step": 15960 }, { "epoch": 0.40983372410815877, "grad_norm": 0.83203125, "learning_rate": 0.00017155860077008379, "loss": 0.9693, "step": 15961 }, { "epoch": 0.4098594013040806, "grad_norm": 0.8125, "learning_rate": 0.00017155548234392594, "loss": 1.1246, "step": 15962 }, { "epoch": 0.4098850785000024, "grad_norm": 0.8671875, "learning_rate": 0.00017155236377516445, "loss": 0.8993, "step": 15963 }, { "epoch": 0.4099107556959242, "grad_norm": 0.81640625, "learning_rate": 0.00017154924506380557, "loss": 0.8174, "step": 15964 }, { "epoch": 0.40993643289184606, "grad_norm": 0.75, "learning_rate": 0.00017154612620985551, "loss": 0.9098, "step": 15965 }, { "epoch": 0.40996211008776784, "grad_norm": 0.78125, "learning_rate": 0.00017154300721332048, "loss": 0.9332, "step": 15966 }, { "epoch": 0.4099877872836897, "grad_norm": 0.8125, "learning_rate": 0.0001715398880742067, "loss": 0.9743, "step": 15967 }, { "epoch": 0.4100134644796115, "grad_norm": 0.78125, "learning_rate": 0.00017153676879252036, "loss": 0.8469, "step": 15968 }, { "epoch": 0.41003914167553335, "grad_norm": 0.7734375, "learning_rate": 0.00017153364936826773, "loss": 0.8401, "step": 15969 }, { "epoch": 0.41006481887145513, "grad_norm": 0.8359375, "learning_rate": 0.00017153052980145502, "loss": 0.9248, "step": 15970 }, { "epoch": 0.41009049606737696, "grad_norm": 0.79296875, "learning_rate": 0.00017152741009208837, "loss": 0.97, "step": 15971 }, { "epoch": 0.4101161732632988, "grad_norm": 0.8046875, "learning_rate": 0.0001715242902401741, "loss": 1.0195, "step": 15972 }, { "epoch": 0.4101418504592206, "grad_norm": 0.8125, "learning_rate": 0.00017152117024571837, "loss": 0.872, "step": 15973 }, { "epoch": 0.4101675276551424, "grad_norm": 0.8125, "learning_rate": 0.0001715180501087274, "loss": 0.9162, "step": 15974 }, { "epoch": 0.41019320485106425, "grad_norm": 0.8125, "learning_rate": 0.00017151492982920742, "loss": 0.8966, "step": 15975 }, { "epoch": 0.41021888204698603, "grad_norm": 0.73828125, "learning_rate": 0.00017151180940716464, "loss": 0.9068, "step": 15976 }, { "epoch": 0.41024455924290787, "grad_norm": 0.88671875, "learning_rate": 0.00017150868884260528, "loss": 0.9385, "step": 15977 }, { "epoch": 0.4102702364388297, "grad_norm": 0.78125, "learning_rate": 0.00017150556813553557, "loss": 0.9048, "step": 15978 }, { "epoch": 0.41029591363475154, "grad_norm": 0.80078125, "learning_rate": 0.0001715024472859617, "loss": 1.048, "step": 15979 }, { "epoch": 0.4103215908306733, "grad_norm": 0.7734375, "learning_rate": 0.00017149932629388994, "loss": 0.9104, "step": 15980 }, { "epoch": 0.41034726802659516, "grad_norm": 0.78515625, "learning_rate": 0.00017149620515932647, "loss": 0.9683, "step": 15981 }, { "epoch": 0.410372945222517, "grad_norm": 0.76171875, "learning_rate": 0.00017149308388227755, "loss": 1.1276, "step": 15982 }, { "epoch": 0.4103986224184388, "grad_norm": 0.77734375, "learning_rate": 0.00017148996246274932, "loss": 0.8297, "step": 15983 }, { "epoch": 0.4104242996143606, "grad_norm": 0.859375, "learning_rate": 0.00017148684090074809, "loss": 1.089, "step": 15984 }, { "epoch": 0.41044997681028245, "grad_norm": 0.796875, "learning_rate": 0.00017148371919628006, "loss": 0.8109, "step": 15985 }, { "epoch": 0.4104756540062042, "grad_norm": 0.7890625, "learning_rate": 0.0001714805973493514, "loss": 1.0377, "step": 15986 }, { "epoch": 0.41050133120212606, "grad_norm": 0.83203125, "learning_rate": 0.00017147747535996833, "loss": 0.8798, "step": 15987 }, { "epoch": 0.4105270083980479, "grad_norm": 0.7421875, "learning_rate": 0.0001714743532281372, "loss": 0.8605, "step": 15988 }, { "epoch": 0.41055268559396974, "grad_norm": 0.73046875, "learning_rate": 0.00017147123095386405, "loss": 0.9379, "step": 15989 }, { "epoch": 0.4105783627898915, "grad_norm": 0.8359375, "learning_rate": 0.00017146810853715526, "loss": 0.7356, "step": 15990 }, { "epoch": 0.41060403998581335, "grad_norm": 0.8125, "learning_rate": 0.00017146498597801695, "loss": 1.011, "step": 15991 }, { "epoch": 0.4106297171817352, "grad_norm": 0.70703125, "learning_rate": 0.0001714618632764554, "loss": 0.9412, "step": 15992 }, { "epoch": 0.41065539437765697, "grad_norm": 0.7421875, "learning_rate": 0.0001714587404324768, "loss": 0.9424, "step": 15993 }, { "epoch": 0.4106810715735788, "grad_norm": 0.796875, "learning_rate": 0.00017145561744608739, "loss": 1.0277, "step": 15994 }, { "epoch": 0.41070674876950064, "grad_norm": 0.7734375, "learning_rate": 0.00017145249431729337, "loss": 0.9218, "step": 15995 }, { "epoch": 0.4107324259654224, "grad_norm": 0.8046875, "learning_rate": 0.000171449371046101, "loss": 0.973, "step": 15996 }, { "epoch": 0.41075810316134426, "grad_norm": 0.80078125, "learning_rate": 0.0001714462476325165, "loss": 0.9412, "step": 15997 }, { "epoch": 0.4107837803572661, "grad_norm": 0.85546875, "learning_rate": 0.00017144312407654607, "loss": 0.8902, "step": 15998 }, { "epoch": 0.41080945755318793, "grad_norm": 0.84765625, "learning_rate": 0.00017144000037819593, "loss": 0.8214, "step": 15999 }, { "epoch": 0.4108351347491097, "grad_norm": 0.796875, "learning_rate": 0.00017143687653747231, "loss": 0.9131, "step": 16000 }, { "epoch": 0.4108351347491097, "eval_loss": 0.9210696220397949, "eval_model_preparation_time": 0.0065, "eval_runtime": 407.7096, "eval_samples_per_second": 24.527, "eval_steps_per_second": 0.768, "step": 16000 }, { "epoch": 0.41086081194503155, "grad_norm": 0.70703125, "learning_rate": 0.00017143375255438147, "loss": 0.818, "step": 16001 }, { "epoch": 0.4108864891409534, "grad_norm": 0.84375, "learning_rate": 0.00017143062842892964, "loss": 1.0292, "step": 16002 }, { "epoch": 0.41091216633687516, "grad_norm": 0.8125, "learning_rate": 0.000171427504161123, "loss": 0.9602, "step": 16003 }, { "epoch": 0.410937843532797, "grad_norm": 0.71875, "learning_rate": 0.00017142437975096778, "loss": 0.9425, "step": 16004 }, { "epoch": 0.41096352072871883, "grad_norm": 0.7890625, "learning_rate": 0.00017142125519847026, "loss": 0.9418, "step": 16005 }, { "epoch": 0.4109891979246406, "grad_norm": 0.76171875, "learning_rate": 0.0001714181305036366, "loss": 0.8732, "step": 16006 }, { "epoch": 0.41101487512056245, "grad_norm": 0.85546875, "learning_rate": 0.00017141500566647303, "loss": 0.9561, "step": 16007 }, { "epoch": 0.4110405523164843, "grad_norm": 0.81640625, "learning_rate": 0.00017141188068698585, "loss": 0.9314, "step": 16008 }, { "epoch": 0.4110662295124061, "grad_norm": 0.78125, "learning_rate": 0.00017140875556518123, "loss": 0.9766, "step": 16009 }, { "epoch": 0.4110919067083279, "grad_norm": 0.77734375, "learning_rate": 0.00017140563030106543, "loss": 0.9489, "step": 16010 }, { "epoch": 0.41111758390424974, "grad_norm": 0.74609375, "learning_rate": 0.00017140250489464464, "loss": 0.8757, "step": 16011 }, { "epoch": 0.4111432611001716, "grad_norm": 0.90234375, "learning_rate": 0.00017139937934592512, "loss": 1.016, "step": 16012 }, { "epoch": 0.41116893829609336, "grad_norm": 0.828125, "learning_rate": 0.00017139625365491306, "loss": 1.0772, "step": 16013 }, { "epoch": 0.4111946154920152, "grad_norm": 0.78515625, "learning_rate": 0.00017139312782161472, "loss": 0.9634, "step": 16014 }, { "epoch": 0.41122029268793703, "grad_norm": 0.79296875, "learning_rate": 0.00017139000184603634, "loss": 0.8679, "step": 16015 }, { "epoch": 0.4112459698838588, "grad_norm": 0.71875, "learning_rate": 0.00017138687572818413, "loss": 0.8668, "step": 16016 }, { "epoch": 0.41127164707978064, "grad_norm": 0.8203125, "learning_rate": 0.00017138374946806432, "loss": 0.8706, "step": 16017 }, { "epoch": 0.4112973242757025, "grad_norm": 0.84375, "learning_rate": 0.00017138062306568317, "loss": 1.0397, "step": 16018 }, { "epoch": 0.4113230014716243, "grad_norm": 0.703125, "learning_rate": 0.00017137749652104683, "loss": 0.8859, "step": 16019 }, { "epoch": 0.4113486786675461, "grad_norm": 0.74609375, "learning_rate": 0.00017137436983416163, "loss": 0.9473, "step": 16020 }, { "epoch": 0.41137435586346793, "grad_norm": 0.74609375, "learning_rate": 0.0001713712430050338, "loss": 0.8688, "step": 16021 }, { "epoch": 0.41140003305938977, "grad_norm": 0.8515625, "learning_rate": 0.00017136811603366944, "loss": 0.9158, "step": 16022 }, { "epoch": 0.41142571025531155, "grad_norm": 0.73046875, "learning_rate": 0.00017136498892007492, "loss": 0.8576, "step": 16023 }, { "epoch": 0.4114513874512334, "grad_norm": 0.7734375, "learning_rate": 0.00017136186166425641, "loss": 0.9401, "step": 16024 }, { "epoch": 0.4114770646471552, "grad_norm": 0.79296875, "learning_rate": 0.00017135873426622018, "loss": 0.9505, "step": 16025 }, { "epoch": 0.411502741843077, "grad_norm": 0.75, "learning_rate": 0.00017135560672597243, "loss": 1.0871, "step": 16026 }, { "epoch": 0.41152841903899884, "grad_norm": 0.72265625, "learning_rate": 0.00017135247904351937, "loss": 0.923, "step": 16027 }, { "epoch": 0.4115540962349207, "grad_norm": 0.83984375, "learning_rate": 0.00017134935121886728, "loss": 0.783, "step": 16028 }, { "epoch": 0.4115797734308425, "grad_norm": 1.125, "learning_rate": 0.00017134622325202237, "loss": 0.9258, "step": 16029 }, { "epoch": 0.4116054506267643, "grad_norm": 0.87109375, "learning_rate": 0.0001713430951429909, "loss": 1.0261, "step": 16030 }, { "epoch": 0.4116311278226861, "grad_norm": 0.76953125, "learning_rate": 0.00017133996689177908, "loss": 0.9681, "step": 16031 }, { "epoch": 0.41165680501860796, "grad_norm": 0.7265625, "learning_rate": 0.00017133683849839316, "loss": 0.7858, "step": 16032 }, { "epoch": 0.41168248221452974, "grad_norm": 0.828125, "learning_rate": 0.00017133370996283938, "loss": 0.8503, "step": 16033 }, { "epoch": 0.4117081594104516, "grad_norm": 0.86328125, "learning_rate": 0.00017133058128512393, "loss": 0.9446, "step": 16034 }, { "epoch": 0.4117338366063734, "grad_norm": 0.6796875, "learning_rate": 0.00017132745246525307, "loss": 0.8603, "step": 16035 }, { "epoch": 0.4117595138022952, "grad_norm": 0.7734375, "learning_rate": 0.00017132432350323303, "loss": 0.9833, "step": 16036 }, { "epoch": 0.41178519099821703, "grad_norm": 0.76171875, "learning_rate": 0.0001713211943990701, "loss": 0.8951, "step": 16037 }, { "epoch": 0.41181086819413887, "grad_norm": 0.81640625, "learning_rate": 0.00017131806515277044, "loss": 0.8921, "step": 16038 }, { "epoch": 0.4118365453900607, "grad_norm": 0.7421875, "learning_rate": 0.00017131493576434033, "loss": 0.9089, "step": 16039 }, { "epoch": 0.4118622225859825, "grad_norm": 0.75, "learning_rate": 0.000171311806233786, "loss": 0.8762, "step": 16040 }, { "epoch": 0.4118878997819043, "grad_norm": 0.74609375, "learning_rate": 0.00017130867656111367, "loss": 0.9352, "step": 16041 }, { "epoch": 0.41191357697782616, "grad_norm": 0.83203125, "learning_rate": 0.0001713055467463296, "loss": 0.9976, "step": 16042 }, { "epoch": 0.41193925417374794, "grad_norm": 0.71875, "learning_rate": 0.00017130241678943998, "loss": 0.9949, "step": 16043 }, { "epoch": 0.4119649313696698, "grad_norm": 0.78515625, "learning_rate": 0.0001712992866904511, "loss": 0.9122, "step": 16044 }, { "epoch": 0.4119906085655916, "grad_norm": 0.7734375, "learning_rate": 0.00017129615644936923, "loss": 0.8223, "step": 16045 }, { "epoch": 0.4120162857615134, "grad_norm": 0.71875, "learning_rate": 0.0001712930260662005, "loss": 0.8629, "step": 16046 }, { "epoch": 0.4120419629574352, "grad_norm": 0.77734375, "learning_rate": 0.00017128989554095124, "loss": 0.905, "step": 16047 }, { "epoch": 0.41206764015335706, "grad_norm": 0.8046875, "learning_rate": 0.00017128676487362764, "loss": 0.9528, "step": 16048 }, { "epoch": 0.4120933173492789, "grad_norm": 0.7734375, "learning_rate": 0.000171283634064236, "loss": 1.1207, "step": 16049 }, { "epoch": 0.4121189945452007, "grad_norm": 0.73046875, "learning_rate": 0.00017128050311278247, "loss": 0.9347, "step": 16050 }, { "epoch": 0.4121446717411225, "grad_norm": 0.7734375, "learning_rate": 0.00017127737201927336, "loss": 0.9449, "step": 16051 }, { "epoch": 0.41217034893704435, "grad_norm": 0.78125, "learning_rate": 0.00017127424078371487, "loss": 0.9691, "step": 16052 }, { "epoch": 0.41219602613296613, "grad_norm": 0.80078125, "learning_rate": 0.00017127110940611329, "loss": 0.855, "step": 16053 }, { "epoch": 0.41222170332888797, "grad_norm": 0.81640625, "learning_rate": 0.0001712679778864748, "loss": 0.9485, "step": 16054 }, { "epoch": 0.4122473805248098, "grad_norm": 0.8046875, "learning_rate": 0.00017126484622480566, "loss": 0.9446, "step": 16055 }, { "epoch": 0.4122730577207316, "grad_norm": 0.7578125, "learning_rate": 0.0001712617144211121, "loss": 0.861, "step": 16056 }, { "epoch": 0.4122987349166534, "grad_norm": 0.796875, "learning_rate": 0.0001712585824754004, "loss": 0.9294, "step": 16057 }, { "epoch": 0.41232441211257526, "grad_norm": 0.8125, "learning_rate": 0.00017125545038767682, "loss": 0.7998, "step": 16058 }, { "epoch": 0.4123500893084971, "grad_norm": 0.83203125, "learning_rate": 0.00017125231815794752, "loss": 1.0298, "step": 16059 }, { "epoch": 0.4123757665044189, "grad_norm": 0.75390625, "learning_rate": 0.0001712491857862188, "loss": 0.8253, "step": 16060 }, { "epoch": 0.4124014437003407, "grad_norm": 0.82421875, "learning_rate": 0.00017124605327249688, "loss": 0.8824, "step": 16061 }, { "epoch": 0.41242712089626254, "grad_norm": 0.81640625, "learning_rate": 0.00017124292061678804, "loss": 1.0339, "step": 16062 }, { "epoch": 0.4124527980921843, "grad_norm": 0.75, "learning_rate": 0.00017123978781909846, "loss": 0.8808, "step": 16063 }, { "epoch": 0.41247847528810616, "grad_norm": 0.84765625, "learning_rate": 0.00017123665487943444, "loss": 1.0938, "step": 16064 }, { "epoch": 0.412504152484028, "grad_norm": 0.8125, "learning_rate": 0.0001712335217978022, "loss": 0.9511, "step": 16065 }, { "epoch": 0.4125298296799498, "grad_norm": 0.84765625, "learning_rate": 0.00017123038857420797, "loss": 0.9694, "step": 16066 }, { "epoch": 0.4125555068758716, "grad_norm": 0.80859375, "learning_rate": 0.000171227255208658, "loss": 0.9009, "step": 16067 }, { "epoch": 0.41258118407179345, "grad_norm": 0.75390625, "learning_rate": 0.00017122412170115858, "loss": 1.0711, "step": 16068 }, { "epoch": 0.4126068612677153, "grad_norm": 0.80859375, "learning_rate": 0.0001712209880517159, "loss": 1.1284, "step": 16069 }, { "epoch": 0.41263253846363707, "grad_norm": 0.765625, "learning_rate": 0.0001712178542603362, "loss": 1.0026, "step": 16070 }, { "epoch": 0.4126582156595589, "grad_norm": 0.80859375, "learning_rate": 0.0001712147203270258, "loss": 1.0843, "step": 16071 }, { "epoch": 0.41268389285548074, "grad_norm": 0.73828125, "learning_rate": 0.00017121158625179084, "loss": 0.78, "step": 16072 }, { "epoch": 0.4127095700514025, "grad_norm": 0.7578125, "learning_rate": 0.00017120845203463767, "loss": 0.915, "step": 16073 }, { "epoch": 0.41273524724732435, "grad_norm": 0.734375, "learning_rate": 0.0001712053176755725, "loss": 0.8991, "step": 16074 }, { "epoch": 0.4127609244432462, "grad_norm": 0.75, "learning_rate": 0.0001712021831746015, "loss": 0.9795, "step": 16075 }, { "epoch": 0.41278660163916797, "grad_norm": 0.8203125, "learning_rate": 0.00017119904853173101, "loss": 1.0876, "step": 16076 }, { "epoch": 0.4128122788350898, "grad_norm": 0.77734375, "learning_rate": 0.00017119591374696727, "loss": 0.9821, "step": 16077 }, { "epoch": 0.41283795603101164, "grad_norm": 0.8359375, "learning_rate": 0.00017119277882031648, "loss": 0.9794, "step": 16078 }, { "epoch": 0.4128636332269335, "grad_norm": 0.79296875, "learning_rate": 0.0001711896437517849, "loss": 0.8193, "step": 16079 }, { "epoch": 0.41288931042285526, "grad_norm": 0.78515625, "learning_rate": 0.00017118650854137883, "loss": 0.8596, "step": 16080 }, { "epoch": 0.4129149876187771, "grad_norm": 0.69140625, "learning_rate": 0.00017118337318910445, "loss": 0.8578, "step": 16081 }, { "epoch": 0.41294066481469893, "grad_norm": 0.91015625, "learning_rate": 0.00017118023769496804, "loss": 1.0478, "step": 16082 }, { "epoch": 0.4129663420106207, "grad_norm": 0.74609375, "learning_rate": 0.00017117710205897586, "loss": 0.8784, "step": 16083 }, { "epoch": 0.41299201920654255, "grad_norm": 0.75390625, "learning_rate": 0.00017117396628113412, "loss": 0.9363, "step": 16084 }, { "epoch": 0.4130176964024644, "grad_norm": 0.7578125, "learning_rate": 0.00017117083036144912, "loss": 1.0668, "step": 16085 }, { "epoch": 0.41304337359838617, "grad_norm": 0.7734375, "learning_rate": 0.0001711676942999271, "loss": 0.9981, "step": 16086 }, { "epoch": 0.413069050794308, "grad_norm": 0.82421875, "learning_rate": 0.00017116455809657426, "loss": 1.0425, "step": 16087 }, { "epoch": 0.41309472799022984, "grad_norm": 0.82421875, "learning_rate": 0.00017116142175139688, "loss": 1.0704, "step": 16088 }, { "epoch": 0.4131204051861517, "grad_norm": 0.8671875, "learning_rate": 0.00017115828526440124, "loss": 1.0226, "step": 16089 }, { "epoch": 0.41314608238207345, "grad_norm": 0.8203125, "learning_rate": 0.00017115514863559354, "loss": 0.8999, "step": 16090 }, { "epoch": 0.4131717595779953, "grad_norm": 0.75, "learning_rate": 0.00017115201186498007, "loss": 0.9384, "step": 16091 }, { "epoch": 0.4131974367739171, "grad_norm": 0.75390625, "learning_rate": 0.00017114887495256706, "loss": 1.0226, "step": 16092 }, { "epoch": 0.4132231139698389, "grad_norm": 0.7578125, "learning_rate": 0.00017114573789836078, "loss": 1.0186, "step": 16093 }, { "epoch": 0.41324879116576074, "grad_norm": 0.8515625, "learning_rate": 0.00017114260070236746, "loss": 0.8989, "step": 16094 }, { "epoch": 0.4132744683616826, "grad_norm": 0.83984375, "learning_rate": 0.00017113946336459336, "loss": 0.8497, "step": 16095 }, { "epoch": 0.41330014555760436, "grad_norm": 0.8515625, "learning_rate": 0.00017113632588504473, "loss": 0.8296, "step": 16096 }, { "epoch": 0.4133258227535262, "grad_norm": 0.8203125, "learning_rate": 0.00017113318826372787, "loss": 0.835, "step": 16097 }, { "epoch": 0.41335149994944803, "grad_norm": 0.796875, "learning_rate": 0.00017113005050064896, "loss": 0.917, "step": 16098 }, { "epoch": 0.41337717714536987, "grad_norm": 0.79296875, "learning_rate": 0.00017112691259581428, "loss": 0.8861, "step": 16099 }, { "epoch": 0.41340285434129165, "grad_norm": 0.8046875, "learning_rate": 0.0001711237745492301, "loss": 0.9019, "step": 16100 }, { "epoch": 0.4134285315372135, "grad_norm": 0.79296875, "learning_rate": 0.00017112063636090264, "loss": 0.9977, "step": 16101 }, { "epoch": 0.4134542087331353, "grad_norm": 0.74609375, "learning_rate": 0.00017111749803083823, "loss": 0.8398, "step": 16102 }, { "epoch": 0.4134798859290571, "grad_norm": 0.7265625, "learning_rate": 0.00017111435955904304, "loss": 0.9342, "step": 16103 }, { "epoch": 0.41350556312497894, "grad_norm": 0.6953125, "learning_rate": 0.00017111122094552332, "loss": 0.8213, "step": 16104 }, { "epoch": 0.4135312403209008, "grad_norm": 0.74609375, "learning_rate": 0.00017110808219028537, "loss": 0.8722, "step": 16105 }, { "epoch": 0.41355691751682255, "grad_norm": 0.75390625, "learning_rate": 0.00017110494329333547, "loss": 1.0607, "step": 16106 }, { "epoch": 0.4135825947127444, "grad_norm": 0.75, "learning_rate": 0.00017110180425467984, "loss": 0.8567, "step": 16107 }, { "epoch": 0.4136082719086662, "grad_norm": 0.7734375, "learning_rate": 0.0001710986650743247, "loss": 0.9433, "step": 16108 }, { "epoch": 0.41363394910458806, "grad_norm": 0.75, "learning_rate": 0.0001710955257522764, "loss": 0.9058, "step": 16109 }, { "epoch": 0.41365962630050984, "grad_norm": 0.765625, "learning_rate": 0.0001710923862885411, "loss": 0.9286, "step": 16110 }, { "epoch": 0.4136853034964317, "grad_norm": 0.73828125, "learning_rate": 0.0001710892466831251, "loss": 0.9396, "step": 16111 }, { "epoch": 0.4137109806923535, "grad_norm": 0.7734375, "learning_rate": 0.00017108610693603463, "loss": 0.8689, "step": 16112 }, { "epoch": 0.4137366578882753, "grad_norm": 0.75390625, "learning_rate": 0.000171082967047276, "loss": 0.8903, "step": 16113 }, { "epoch": 0.41376233508419713, "grad_norm": 0.8125, "learning_rate": 0.00017107982701685542, "loss": 1.0135, "step": 16114 }, { "epoch": 0.41378801228011897, "grad_norm": 0.73828125, "learning_rate": 0.0001710766868447792, "loss": 0.8305, "step": 16115 }, { "epoch": 0.41381368947604075, "grad_norm": 0.80078125, "learning_rate": 0.0001710735465310535, "loss": 0.9568, "step": 16116 }, { "epoch": 0.4138393666719626, "grad_norm": 0.73828125, "learning_rate": 0.00017107040607568472, "loss": 0.8417, "step": 16117 }, { "epoch": 0.4138650438678844, "grad_norm": 0.74609375, "learning_rate": 0.000171067265478679, "loss": 0.8208, "step": 16118 }, { "epoch": 0.4138907210638062, "grad_norm": 0.84375, "learning_rate": 0.00017106412474004264, "loss": 0.8605, "step": 16119 }, { "epoch": 0.41391639825972804, "grad_norm": 0.75, "learning_rate": 0.0001710609838597819, "loss": 0.8392, "step": 16120 }, { "epoch": 0.41394207545564987, "grad_norm": 0.75390625, "learning_rate": 0.00017105784283790305, "loss": 0.9425, "step": 16121 }, { "epoch": 0.4139677526515717, "grad_norm": 0.796875, "learning_rate": 0.0001710547016744123, "loss": 0.9375, "step": 16122 }, { "epoch": 0.4139934298474935, "grad_norm": 0.76953125, "learning_rate": 0.000171051560369316, "loss": 0.9275, "step": 16123 }, { "epoch": 0.4140191070434153, "grad_norm": 0.77734375, "learning_rate": 0.00017104841892262034, "loss": 1.0254, "step": 16124 }, { "epoch": 0.41404478423933716, "grad_norm": 0.71484375, "learning_rate": 0.00017104527733433157, "loss": 0.9007, "step": 16125 }, { "epoch": 0.41407046143525894, "grad_norm": 0.79296875, "learning_rate": 0.000171042135604456, "loss": 1.0191, "step": 16126 }, { "epoch": 0.4140961386311808, "grad_norm": 0.73046875, "learning_rate": 0.00017103899373299987, "loss": 0.7659, "step": 16127 }, { "epoch": 0.4141218158271026, "grad_norm": 0.7265625, "learning_rate": 0.00017103585171996945, "loss": 0.9327, "step": 16128 }, { "epoch": 0.4141474930230244, "grad_norm": 0.796875, "learning_rate": 0.00017103270956537098, "loss": 0.8996, "step": 16129 }, { "epoch": 0.41417317021894623, "grad_norm": 0.80078125, "learning_rate": 0.00017102956726921075, "loss": 0.921, "step": 16130 }, { "epoch": 0.41419884741486807, "grad_norm": 0.80078125, "learning_rate": 0.000171026424831495, "loss": 0.9544, "step": 16131 }, { "epoch": 0.4142245246107899, "grad_norm": 0.77734375, "learning_rate": 0.00017102328225223002, "loss": 0.9181, "step": 16132 }, { "epoch": 0.4142502018067117, "grad_norm": 0.79296875, "learning_rate": 0.000171020139531422, "loss": 0.8357, "step": 16133 }, { "epoch": 0.4142758790026335, "grad_norm": 0.78515625, "learning_rate": 0.00017101699666907734, "loss": 0.9893, "step": 16134 }, { "epoch": 0.41430155619855535, "grad_norm": 0.78515625, "learning_rate": 0.00017101385366520216, "loss": 0.845, "step": 16135 }, { "epoch": 0.41432723339447713, "grad_norm": 0.74609375, "learning_rate": 0.0001710107105198028, "loss": 0.935, "step": 16136 }, { "epoch": 0.41435291059039897, "grad_norm": 0.84765625, "learning_rate": 0.0001710075672328855, "loss": 0.9601, "step": 16137 }, { "epoch": 0.4143785877863208, "grad_norm": 0.75, "learning_rate": 0.00017100442380445653, "loss": 0.8883, "step": 16138 }, { "epoch": 0.4144042649822426, "grad_norm": 0.875, "learning_rate": 0.00017100128023452216, "loss": 0.9628, "step": 16139 }, { "epoch": 0.4144299421781644, "grad_norm": 0.78515625, "learning_rate": 0.00017099813652308868, "loss": 1.0172, "step": 16140 }, { "epoch": 0.41445561937408626, "grad_norm": 0.84765625, "learning_rate": 0.0001709949926701623, "loss": 0.8806, "step": 16141 }, { "epoch": 0.4144812965700081, "grad_norm": 0.81640625, "learning_rate": 0.00017099184867574934, "loss": 0.8761, "step": 16142 }, { "epoch": 0.4145069737659299, "grad_norm": 0.765625, "learning_rate": 0.00017098870453985602, "loss": 0.9843, "step": 16143 }, { "epoch": 0.4145326509618517, "grad_norm": 0.75390625, "learning_rate": 0.00017098556026248863, "loss": 1.1064, "step": 16144 }, { "epoch": 0.41455832815777355, "grad_norm": 1.1640625, "learning_rate": 0.0001709824158436534, "loss": 1.0375, "step": 16145 }, { "epoch": 0.41458400535369533, "grad_norm": 0.80078125, "learning_rate": 0.00017097927128335668, "loss": 0.8962, "step": 16146 }, { "epoch": 0.41460968254961716, "grad_norm": 0.796875, "learning_rate": 0.0001709761265816046, "loss": 0.9297, "step": 16147 }, { "epoch": 0.414635359745539, "grad_norm": 0.71875, "learning_rate": 0.0001709729817384036, "loss": 0.8679, "step": 16148 }, { "epoch": 0.4146610369414608, "grad_norm": 0.81640625, "learning_rate": 0.00017096983675375982, "loss": 1.0507, "step": 16149 }, { "epoch": 0.4146867141373826, "grad_norm": 0.77734375, "learning_rate": 0.00017096669162767957, "loss": 0.9662, "step": 16150 }, { "epoch": 0.41471239133330445, "grad_norm": 0.76171875, "learning_rate": 0.0001709635463601691, "loss": 0.8547, "step": 16151 }, { "epoch": 0.4147380685292263, "grad_norm": 0.76953125, "learning_rate": 0.0001709604009512347, "loss": 0.9935, "step": 16152 }, { "epoch": 0.41476374572514807, "grad_norm": 0.79296875, "learning_rate": 0.00017095725540088266, "loss": 0.8635, "step": 16153 }, { "epoch": 0.4147894229210699, "grad_norm": 0.76953125, "learning_rate": 0.0001709541097091192, "loss": 0.8696, "step": 16154 }, { "epoch": 0.41481510011699174, "grad_norm": 0.80859375, "learning_rate": 0.0001709509638759506, "loss": 1.1014, "step": 16155 }, { "epoch": 0.4148407773129135, "grad_norm": 0.78125, "learning_rate": 0.0001709478179013831, "loss": 0.8738, "step": 16156 }, { "epoch": 0.41486645450883536, "grad_norm": 0.75390625, "learning_rate": 0.00017094467178542305, "loss": 0.9092, "step": 16157 }, { "epoch": 0.4148921317047572, "grad_norm": 0.78125, "learning_rate": 0.0001709415255280767, "loss": 0.9047, "step": 16158 }, { "epoch": 0.414917808900679, "grad_norm": 0.84765625, "learning_rate": 0.00017093837912935026, "loss": 1.0007, "step": 16159 }, { "epoch": 0.4149434860966008, "grad_norm": 0.76171875, "learning_rate": 0.00017093523258925007, "loss": 0.9561, "step": 16160 }, { "epoch": 0.41496916329252265, "grad_norm": 0.73828125, "learning_rate": 0.00017093208590778235, "loss": 0.8559, "step": 16161 }, { "epoch": 0.4149948404884445, "grad_norm": 0.79296875, "learning_rate": 0.0001709289390849534, "loss": 0.8784, "step": 16162 }, { "epoch": 0.41502051768436626, "grad_norm": 0.80859375, "learning_rate": 0.00017092579212076947, "loss": 0.9363, "step": 16163 }, { "epoch": 0.4150461948802881, "grad_norm": 0.7734375, "learning_rate": 0.00017092264501523686, "loss": 0.8059, "step": 16164 }, { "epoch": 0.41507187207620994, "grad_norm": 0.77734375, "learning_rate": 0.00017091949776836177, "loss": 0.927, "step": 16165 }, { "epoch": 0.4150975492721317, "grad_norm": 0.76953125, "learning_rate": 0.00017091635038015058, "loss": 0.7972, "step": 16166 }, { "epoch": 0.41512322646805355, "grad_norm": 0.78125, "learning_rate": 0.00017091320285060951, "loss": 0.9207, "step": 16167 }, { "epoch": 0.4151489036639754, "grad_norm": 0.765625, "learning_rate": 0.0001709100551797448, "loss": 0.955, "step": 16168 }, { "epoch": 0.41517458085989717, "grad_norm": 0.734375, "learning_rate": 0.00017090690736756278, "loss": 0.8961, "step": 16169 }, { "epoch": 0.415200258055819, "grad_norm": 0.78125, "learning_rate": 0.00017090375941406971, "loss": 0.9837, "step": 16170 }, { "epoch": 0.41522593525174084, "grad_norm": 0.828125, "learning_rate": 0.00017090061131927185, "loss": 1.0988, "step": 16171 }, { "epoch": 0.4152516124476627, "grad_norm": 0.76171875, "learning_rate": 0.00017089746308317544, "loss": 0.845, "step": 16172 }, { "epoch": 0.41527728964358446, "grad_norm": 0.76953125, "learning_rate": 0.00017089431470578684, "loss": 0.9162, "step": 16173 }, { "epoch": 0.4153029668395063, "grad_norm": 0.77734375, "learning_rate": 0.00017089116618711225, "loss": 0.8885, "step": 16174 }, { "epoch": 0.41532864403542813, "grad_norm": 0.78515625, "learning_rate": 0.00017088801752715796, "loss": 0.8926, "step": 16175 }, { "epoch": 0.4153543212313499, "grad_norm": 0.7734375, "learning_rate": 0.00017088486872593025, "loss": 0.9657, "step": 16176 }, { "epoch": 0.41537999842727175, "grad_norm": 0.7890625, "learning_rate": 0.00017088171978343542, "loss": 0.8994, "step": 16177 }, { "epoch": 0.4154056756231936, "grad_norm": 0.7265625, "learning_rate": 0.0001708785706996797, "loss": 1.0695, "step": 16178 }, { "epoch": 0.41543135281911536, "grad_norm": 0.82421875, "learning_rate": 0.00017087542147466942, "loss": 0.9197, "step": 16179 }, { "epoch": 0.4154570300150372, "grad_norm": 0.83984375, "learning_rate": 0.0001708722721084108, "loss": 0.8588, "step": 16180 }, { "epoch": 0.41548270721095903, "grad_norm": 0.79296875, "learning_rate": 0.00017086912260091017, "loss": 0.8492, "step": 16181 }, { "epoch": 0.41550838440688087, "grad_norm": 0.796875, "learning_rate": 0.00017086597295217377, "loss": 0.7404, "step": 16182 }, { "epoch": 0.41553406160280265, "grad_norm": 0.76953125, "learning_rate": 0.0001708628231622079, "loss": 0.8539, "step": 16183 }, { "epoch": 0.4155597387987245, "grad_norm": 0.75, "learning_rate": 0.0001708596732310188, "loss": 0.9323, "step": 16184 }, { "epoch": 0.4155854159946463, "grad_norm": 0.71875, "learning_rate": 0.00017085652315861278, "loss": 0.9232, "step": 16185 }, { "epoch": 0.4156110931905681, "grad_norm": 0.765625, "learning_rate": 0.0001708533729449961, "loss": 0.9443, "step": 16186 }, { "epoch": 0.41563677038648994, "grad_norm": 0.76953125, "learning_rate": 0.00017085022259017506, "loss": 0.9253, "step": 16187 }, { "epoch": 0.4156624475824118, "grad_norm": 0.7421875, "learning_rate": 0.0001708470720941559, "loss": 0.8585, "step": 16188 }, { "epoch": 0.41568812477833356, "grad_norm": 0.7578125, "learning_rate": 0.00017084392145694496, "loss": 0.8634, "step": 16189 }, { "epoch": 0.4157138019742554, "grad_norm": 0.796875, "learning_rate": 0.0001708407706785485, "loss": 0.8498, "step": 16190 }, { "epoch": 0.41573947917017723, "grad_norm": 0.78515625, "learning_rate": 0.00017083761975897276, "loss": 0.9008, "step": 16191 }, { "epoch": 0.41576515636609906, "grad_norm": 0.83203125, "learning_rate": 0.00017083446869822403, "loss": 1.0089, "step": 16192 }, { "epoch": 0.41579083356202085, "grad_norm": 0.796875, "learning_rate": 0.0001708313174963086, "loss": 0.9306, "step": 16193 }, { "epoch": 0.4158165107579427, "grad_norm": 0.83984375, "learning_rate": 0.00017082816615323276, "loss": 0.9531, "step": 16194 }, { "epoch": 0.4158421879538645, "grad_norm": 0.7890625, "learning_rate": 0.0001708250146690028, "loss": 0.9214, "step": 16195 }, { "epoch": 0.4158678651497863, "grad_norm": 0.796875, "learning_rate": 0.00017082186304362497, "loss": 0.8394, "step": 16196 }, { "epoch": 0.41589354234570813, "grad_norm": 0.76953125, "learning_rate": 0.0001708187112771056, "loss": 0.9593, "step": 16197 }, { "epoch": 0.41591921954162997, "grad_norm": 0.80078125, "learning_rate": 0.00017081555936945087, "loss": 0.9792, "step": 16198 }, { "epoch": 0.41594489673755175, "grad_norm": 0.77734375, "learning_rate": 0.00017081240732066716, "loss": 0.9114, "step": 16199 }, { "epoch": 0.4159705739334736, "grad_norm": 0.7890625, "learning_rate": 0.0001708092551307607, "loss": 0.9424, "step": 16200 }, { "epoch": 0.4159962511293954, "grad_norm": 0.85546875, "learning_rate": 0.00017080610279973782, "loss": 0.8879, "step": 16201 }, { "epoch": 0.41602192832531726, "grad_norm": 0.7265625, "learning_rate": 0.00017080295032760476, "loss": 0.946, "step": 16202 }, { "epoch": 0.41604760552123904, "grad_norm": 0.69921875, "learning_rate": 0.00017079979771436783, "loss": 0.8442, "step": 16203 }, { "epoch": 0.4160732827171609, "grad_norm": 0.8125, "learning_rate": 0.00017079664496003327, "loss": 1.1063, "step": 16204 }, { "epoch": 0.4160989599130827, "grad_norm": 0.72265625, "learning_rate": 0.0001707934920646074, "loss": 0.9934, "step": 16205 }, { "epoch": 0.4161246371090045, "grad_norm": 0.7421875, "learning_rate": 0.00017079033902809649, "loss": 0.896, "step": 16206 }, { "epoch": 0.4161503143049263, "grad_norm": 0.8046875, "learning_rate": 0.00017078718585050685, "loss": 0.7789, "step": 16207 }, { "epoch": 0.41617599150084816, "grad_norm": 0.82421875, "learning_rate": 0.00017078403253184473, "loss": 0.9124, "step": 16208 }, { "epoch": 0.41620166869676994, "grad_norm": 0.80859375, "learning_rate": 0.0001707808790721164, "loss": 1.1648, "step": 16209 }, { "epoch": 0.4162273458926918, "grad_norm": 0.828125, "learning_rate": 0.00017077772547132822, "loss": 0.8719, "step": 16210 }, { "epoch": 0.4162530230886136, "grad_norm": 0.7890625, "learning_rate": 0.0001707745717294864, "loss": 0.8446, "step": 16211 }, { "epoch": 0.41627870028453545, "grad_norm": 0.78515625, "learning_rate": 0.00017077141784659728, "loss": 0.852, "step": 16212 }, { "epoch": 0.41630437748045723, "grad_norm": 0.7421875, "learning_rate": 0.0001707682638226671, "loss": 0.8275, "step": 16213 }, { "epoch": 0.41633005467637907, "grad_norm": 0.8046875, "learning_rate": 0.00017076510965770213, "loss": 1.0557, "step": 16214 }, { "epoch": 0.4163557318723009, "grad_norm": 0.77734375, "learning_rate": 0.00017076195535170875, "loss": 0.8325, "step": 16215 }, { "epoch": 0.4163814090682227, "grad_norm": 0.72265625, "learning_rate": 0.00017075880090469315, "loss": 0.8473, "step": 16216 }, { "epoch": 0.4164070862641445, "grad_norm": 0.75390625, "learning_rate": 0.00017075564631666165, "loss": 0.8985, "step": 16217 }, { "epoch": 0.41643276346006636, "grad_norm": 0.734375, "learning_rate": 0.00017075249158762055, "loss": 0.8212, "step": 16218 }, { "epoch": 0.41645844065598814, "grad_norm": 0.76171875, "learning_rate": 0.0001707493367175761, "loss": 0.9263, "step": 16219 }, { "epoch": 0.41648411785191, "grad_norm": 0.80859375, "learning_rate": 0.00017074618170653466, "loss": 0.8231, "step": 16220 }, { "epoch": 0.4165097950478318, "grad_norm": 0.7578125, "learning_rate": 0.00017074302655450241, "loss": 0.8739, "step": 16221 }, { "epoch": 0.41653547224375365, "grad_norm": 0.8203125, "learning_rate": 0.00017073987126148575, "loss": 0.9998, "step": 16222 }, { "epoch": 0.4165611494396754, "grad_norm": 0.84375, "learning_rate": 0.0001707367158274909, "loss": 1.061, "step": 16223 }, { "epoch": 0.41658682663559726, "grad_norm": 0.80859375, "learning_rate": 0.00017073356025252415, "loss": 0.9195, "step": 16224 }, { "epoch": 0.4166125038315191, "grad_norm": 0.81640625, "learning_rate": 0.0001707304045365918, "loss": 0.8843, "step": 16225 }, { "epoch": 0.4166381810274409, "grad_norm": 0.796875, "learning_rate": 0.00017072724867970017, "loss": 0.8945, "step": 16226 }, { "epoch": 0.4166638582233627, "grad_norm": 0.8046875, "learning_rate": 0.0001707240926818555, "loss": 0.8339, "step": 16227 }, { "epoch": 0.41668953541928455, "grad_norm": 0.87109375, "learning_rate": 0.00017072093654306412, "loss": 1.1761, "step": 16228 }, { "epoch": 0.41671521261520633, "grad_norm": 0.796875, "learning_rate": 0.00017071778026333227, "loss": 0.9309, "step": 16229 }, { "epoch": 0.41674088981112817, "grad_norm": 0.7578125, "learning_rate": 0.00017071462384266628, "loss": 0.9242, "step": 16230 }, { "epoch": 0.41676656700705, "grad_norm": 0.7578125, "learning_rate": 0.00017071146728107246, "loss": 1.0184, "step": 16231 }, { "epoch": 0.41679224420297184, "grad_norm": 0.7578125, "learning_rate": 0.00017070831057855706, "loss": 0.8149, "step": 16232 }, { "epoch": 0.4168179213988936, "grad_norm": 0.81640625, "learning_rate": 0.00017070515373512638, "loss": 0.8341, "step": 16233 }, { "epoch": 0.41684359859481546, "grad_norm": 0.7265625, "learning_rate": 0.00017070199675078672, "loss": 0.8152, "step": 16234 }, { "epoch": 0.4168692757907373, "grad_norm": 0.671875, "learning_rate": 0.00017069883962554435, "loss": 0.7701, "step": 16235 }, { "epoch": 0.4168949529866591, "grad_norm": 0.828125, "learning_rate": 0.0001706956823594056, "loss": 1.0161, "step": 16236 }, { "epoch": 0.4169206301825809, "grad_norm": 0.80078125, "learning_rate": 0.00017069252495237675, "loss": 0.8954, "step": 16237 }, { "epoch": 0.41694630737850275, "grad_norm": 0.7421875, "learning_rate": 0.00017068936740446408, "loss": 0.9201, "step": 16238 }, { "epoch": 0.4169719845744245, "grad_norm": 0.78515625, "learning_rate": 0.00017068620971567383, "loss": 0.9687, "step": 16239 }, { "epoch": 0.41699766177034636, "grad_norm": 0.72265625, "learning_rate": 0.00017068305188601245, "loss": 0.8583, "step": 16240 }, { "epoch": 0.4170233389662682, "grad_norm": 0.76171875, "learning_rate": 0.00017067989391548606, "loss": 0.8957, "step": 16241 }, { "epoch": 0.41704901616219003, "grad_norm": 0.765625, "learning_rate": 0.00017067673580410103, "loss": 0.9886, "step": 16242 }, { "epoch": 0.4170746933581118, "grad_norm": 0.796875, "learning_rate": 0.00017067357755186366, "loss": 1.0255, "step": 16243 }, { "epoch": 0.41710037055403365, "grad_norm": 0.75390625, "learning_rate": 0.00017067041915878024, "loss": 0.8297, "step": 16244 }, { "epoch": 0.4171260477499555, "grad_norm": 0.91796875, "learning_rate": 0.00017066726062485706, "loss": 0.8824, "step": 16245 }, { "epoch": 0.41715172494587727, "grad_norm": 0.796875, "learning_rate": 0.00017066410195010038, "loss": 0.8998, "step": 16246 }, { "epoch": 0.4171774021417991, "grad_norm": 0.74609375, "learning_rate": 0.00017066094313451658, "loss": 0.7964, "step": 16247 }, { "epoch": 0.41720307933772094, "grad_norm": 0.82421875, "learning_rate": 0.00017065778417811188, "loss": 0.8997, "step": 16248 }, { "epoch": 0.4172287565336427, "grad_norm": 0.765625, "learning_rate": 0.00017065462508089257, "loss": 0.8807, "step": 16249 }, { "epoch": 0.41725443372956456, "grad_norm": 0.7578125, "learning_rate": 0.00017065146584286503, "loss": 0.8463, "step": 16250 }, { "epoch": 0.4172801109254864, "grad_norm": 0.78515625, "learning_rate": 0.00017064830646403543, "loss": 0.9061, "step": 16251 }, { "epoch": 0.4173057881214082, "grad_norm": 0.8203125, "learning_rate": 0.0001706451469444102, "loss": 1.0123, "step": 16252 }, { "epoch": 0.41733146531733, "grad_norm": 0.86328125, "learning_rate": 0.00017064198728399557, "loss": 0.8982, "step": 16253 }, { "epoch": 0.41735714251325184, "grad_norm": 0.7265625, "learning_rate": 0.00017063882748279783, "loss": 0.8576, "step": 16254 }, { "epoch": 0.4173828197091737, "grad_norm": 0.7578125, "learning_rate": 0.00017063566754082325, "loss": 0.7831, "step": 16255 }, { "epoch": 0.41740849690509546, "grad_norm": 0.80078125, "learning_rate": 0.0001706325074580782, "loss": 0.7933, "step": 16256 }, { "epoch": 0.4174341741010173, "grad_norm": 0.8203125, "learning_rate": 0.00017062934723456895, "loss": 0.9551, "step": 16257 }, { "epoch": 0.41745985129693913, "grad_norm": 0.74609375, "learning_rate": 0.00017062618687030176, "loss": 0.9196, "step": 16258 }, { "epoch": 0.4174855284928609, "grad_norm": 0.78515625, "learning_rate": 0.00017062302636528299, "loss": 1.0146, "step": 16259 }, { "epoch": 0.41751120568878275, "grad_norm": 0.7421875, "learning_rate": 0.0001706198657195189, "loss": 1.0688, "step": 16260 }, { "epoch": 0.4175368828847046, "grad_norm": 0.78125, "learning_rate": 0.0001706167049330158, "loss": 0.9609, "step": 16261 }, { "epoch": 0.4175625600806264, "grad_norm": 0.84375, "learning_rate": 0.00017061354400577995, "loss": 1.0461, "step": 16262 }, { "epoch": 0.4175882372765482, "grad_norm": 0.8125, "learning_rate": 0.00017061038293781773, "loss": 0.8332, "step": 16263 }, { "epoch": 0.41761391447247004, "grad_norm": 0.796875, "learning_rate": 0.00017060722172913538, "loss": 1.0865, "step": 16264 }, { "epoch": 0.4176395916683919, "grad_norm": 0.78515625, "learning_rate": 0.0001706040603797392, "loss": 0.869, "step": 16265 }, { "epoch": 0.41766526886431365, "grad_norm": 0.84375, "learning_rate": 0.00017060089888963552, "loss": 1.0119, "step": 16266 }, { "epoch": 0.4176909460602355, "grad_norm": 0.81640625, "learning_rate": 0.00017059773725883063, "loss": 0.9383, "step": 16267 }, { "epoch": 0.4177166232561573, "grad_norm": 0.8828125, "learning_rate": 0.00017059457548733082, "loss": 0.8533, "step": 16268 }, { "epoch": 0.4177423004520791, "grad_norm": 0.88671875, "learning_rate": 0.0001705914135751424, "loss": 0.9971, "step": 16269 }, { "epoch": 0.41776797764800094, "grad_norm": 0.80078125, "learning_rate": 0.00017058825152227168, "loss": 0.9101, "step": 16270 }, { "epoch": 0.4177936548439228, "grad_norm": 0.78515625, "learning_rate": 0.00017058508932872493, "loss": 0.8646, "step": 16271 }, { "epoch": 0.4178193320398446, "grad_norm": 0.8125, "learning_rate": 0.00017058192699450847, "loss": 0.887, "step": 16272 }, { "epoch": 0.4178450092357664, "grad_norm": 0.83203125, "learning_rate": 0.00017057876451962863, "loss": 0.9185, "step": 16273 }, { "epoch": 0.41787068643168823, "grad_norm": 0.7578125, "learning_rate": 0.00017057560190409169, "loss": 0.9742, "step": 16274 }, { "epoch": 0.41789636362761007, "grad_norm": 0.77734375, "learning_rate": 0.0001705724391479039, "loss": 1.022, "step": 16275 }, { "epoch": 0.41792204082353185, "grad_norm": 0.7578125, "learning_rate": 0.00017056927625107166, "loss": 0.9581, "step": 16276 }, { "epoch": 0.4179477180194537, "grad_norm": 0.734375, "learning_rate": 0.0001705661132136012, "loss": 0.9773, "step": 16277 }, { "epoch": 0.4179733952153755, "grad_norm": 0.83984375, "learning_rate": 0.00017056295003549886, "loss": 0.9055, "step": 16278 }, { "epoch": 0.4179990724112973, "grad_norm": 0.65234375, "learning_rate": 0.00017055978671677097, "loss": 0.804, "step": 16279 }, { "epoch": 0.41802474960721914, "grad_norm": 0.75, "learning_rate": 0.0001705566232574238, "loss": 0.9766, "step": 16280 }, { "epoch": 0.418050426803141, "grad_norm": 0.9140625, "learning_rate": 0.0001705534596574636, "loss": 0.8525, "step": 16281 }, { "epoch": 0.4180761039990628, "grad_norm": 0.76171875, "learning_rate": 0.00017055029591689675, "loss": 0.8555, "step": 16282 }, { "epoch": 0.4181017811949846, "grad_norm": 0.7578125, "learning_rate": 0.00017054713203572952, "loss": 0.9648, "step": 16283 }, { "epoch": 0.4181274583909064, "grad_norm": 0.796875, "learning_rate": 0.00017054396801396827, "loss": 0.7494, "step": 16284 }, { "epoch": 0.41815313558682826, "grad_norm": 0.79296875, "learning_rate": 0.0001705408038516192, "loss": 0.9023, "step": 16285 }, { "epoch": 0.41817881278275004, "grad_norm": 0.80078125, "learning_rate": 0.00017053763954868873, "loss": 0.8743, "step": 16286 }, { "epoch": 0.4182044899786719, "grad_norm": 0.72265625, "learning_rate": 0.0001705344751051831, "loss": 0.7888, "step": 16287 }, { "epoch": 0.4182301671745937, "grad_norm": 0.78125, "learning_rate": 0.00017053131052110863, "loss": 0.9143, "step": 16288 }, { "epoch": 0.4182558443705155, "grad_norm": 0.74609375, "learning_rate": 0.00017052814579647163, "loss": 0.9353, "step": 16289 }, { "epoch": 0.41828152156643733, "grad_norm": 0.765625, "learning_rate": 0.00017052498093127842, "loss": 0.9602, "step": 16290 }, { "epoch": 0.41830719876235917, "grad_norm": 0.77734375, "learning_rate": 0.00017052181592553528, "loss": 0.9451, "step": 16291 }, { "epoch": 0.418332875958281, "grad_norm": 0.875, "learning_rate": 0.00017051865077924853, "loss": 0.9725, "step": 16292 }, { "epoch": 0.4183585531542028, "grad_norm": 0.8125, "learning_rate": 0.00017051548549242452, "loss": 0.9074, "step": 16293 }, { "epoch": 0.4183842303501246, "grad_norm": 0.8046875, "learning_rate": 0.00017051232006506944, "loss": 1.0478, "step": 16294 }, { "epoch": 0.41840990754604646, "grad_norm": 0.71875, "learning_rate": 0.00017050915449718973, "loss": 0.9295, "step": 16295 }, { "epoch": 0.41843558474196824, "grad_norm": 0.7890625, "learning_rate": 0.00017050598878879162, "loss": 0.977, "step": 16296 }, { "epoch": 0.41846126193789007, "grad_norm": 0.9375, "learning_rate": 0.00017050282293988147, "loss": 1.0752, "step": 16297 }, { "epoch": 0.4184869391338119, "grad_norm": 0.765625, "learning_rate": 0.00017049965695046553, "loss": 1.0662, "step": 16298 }, { "epoch": 0.4185126163297337, "grad_norm": 0.73046875, "learning_rate": 0.00017049649082055015, "loss": 0.9542, "step": 16299 }, { "epoch": 0.4185382935256555, "grad_norm": 0.765625, "learning_rate": 0.00017049332455014167, "loss": 0.8516, "step": 16300 }, { "epoch": 0.41856397072157736, "grad_norm": 0.6953125, "learning_rate": 0.0001704901581392463, "loss": 0.794, "step": 16301 }, { "epoch": 0.4185896479174992, "grad_norm": 0.7578125, "learning_rate": 0.00017048699158787046, "loss": 0.8321, "step": 16302 }, { "epoch": 0.418615325113421, "grad_norm": 0.96875, "learning_rate": 0.0001704838248960204, "loss": 0.8058, "step": 16303 }, { "epoch": 0.4186410023093428, "grad_norm": 0.88671875, "learning_rate": 0.00017048065806370244, "loss": 1.0012, "step": 16304 }, { "epoch": 0.41866667950526465, "grad_norm": 0.76953125, "learning_rate": 0.00017047749109092288, "loss": 0.9504, "step": 16305 }, { "epoch": 0.41869235670118643, "grad_norm": 0.80078125, "learning_rate": 0.00017047432397768807, "loss": 0.9336, "step": 16306 }, { "epoch": 0.41871803389710827, "grad_norm": 0.8671875, "learning_rate": 0.0001704711567240043, "loss": 1.0199, "step": 16307 }, { "epoch": 0.4187437110930301, "grad_norm": 0.796875, "learning_rate": 0.00017046798932987786, "loss": 0.849, "step": 16308 }, { "epoch": 0.4187693882889519, "grad_norm": 0.76171875, "learning_rate": 0.0001704648217953151, "loss": 0.8405, "step": 16309 }, { "epoch": 0.4187950654848737, "grad_norm": 0.78125, "learning_rate": 0.00017046165412032229, "loss": 0.9322, "step": 16310 }, { "epoch": 0.41882074268079555, "grad_norm": 0.76953125, "learning_rate": 0.00017045848630490578, "loss": 1.0038, "step": 16311 }, { "epoch": 0.4188464198767174, "grad_norm": 0.7890625, "learning_rate": 0.0001704553183490719, "loss": 0.8577, "step": 16312 }, { "epoch": 0.41887209707263917, "grad_norm": 0.84375, "learning_rate": 0.0001704521502528269, "loss": 1.0094, "step": 16313 }, { "epoch": 0.418897774268561, "grad_norm": 0.7421875, "learning_rate": 0.00017044898201617713, "loss": 0.9306, "step": 16314 }, { "epoch": 0.41892345146448284, "grad_norm": 0.82421875, "learning_rate": 0.00017044581363912892, "loss": 1.0088, "step": 16315 }, { "epoch": 0.4189491286604046, "grad_norm": 0.77734375, "learning_rate": 0.00017044264512168855, "loss": 0.8534, "step": 16316 }, { "epoch": 0.41897480585632646, "grad_norm": 0.765625, "learning_rate": 0.00017043947646386238, "loss": 1.0232, "step": 16317 }, { "epoch": 0.4190004830522483, "grad_norm": 0.8359375, "learning_rate": 0.00017043630766565667, "loss": 0.8033, "step": 16318 }, { "epoch": 0.4190261602481701, "grad_norm": 0.78515625, "learning_rate": 0.00017043313872707776, "loss": 0.975, "step": 16319 }, { "epoch": 0.4190518374440919, "grad_norm": 0.75, "learning_rate": 0.000170429969648132, "loss": 0.9726, "step": 16320 }, { "epoch": 0.41907751464001375, "grad_norm": 0.8671875, "learning_rate": 0.00017042680042882565, "loss": 1.0285, "step": 16321 }, { "epoch": 0.41910319183593553, "grad_norm": 0.75, "learning_rate": 0.000170423631069165, "loss": 0.742, "step": 16322 }, { "epoch": 0.41912886903185737, "grad_norm": 0.84375, "learning_rate": 0.00017042046156915647, "loss": 0.989, "step": 16323 }, { "epoch": 0.4191545462277792, "grad_norm": 0.734375, "learning_rate": 0.00017041729192880634, "loss": 0.8259, "step": 16324 }, { "epoch": 0.41918022342370104, "grad_norm": 0.75, "learning_rate": 0.00017041412214812088, "loss": 0.8441, "step": 16325 }, { "epoch": 0.4192059006196228, "grad_norm": 0.7578125, "learning_rate": 0.0001704109522271064, "loss": 0.9571, "step": 16326 }, { "epoch": 0.41923157781554465, "grad_norm": 0.77734375, "learning_rate": 0.0001704077821657693, "loss": 1.0343, "step": 16327 }, { "epoch": 0.4192572550114665, "grad_norm": 0.8046875, "learning_rate": 0.00017040461196411583, "loss": 0.9717, "step": 16328 }, { "epoch": 0.41928293220738827, "grad_norm": 0.8359375, "learning_rate": 0.00017040144162215235, "loss": 0.9599, "step": 16329 }, { "epoch": 0.4193086094033101, "grad_norm": 0.75, "learning_rate": 0.00017039827113988514, "loss": 0.8846, "step": 16330 }, { "epoch": 0.41933428659923194, "grad_norm": 0.73828125, "learning_rate": 0.00017039510051732054, "loss": 0.8195, "step": 16331 }, { "epoch": 0.4193599637951537, "grad_norm": 0.80078125, "learning_rate": 0.00017039192975446484, "loss": 0.7996, "step": 16332 }, { "epoch": 0.41938564099107556, "grad_norm": 0.7578125, "learning_rate": 0.0001703887588513244, "loss": 0.9156, "step": 16333 }, { "epoch": 0.4194113181869974, "grad_norm": 0.8359375, "learning_rate": 0.00017038558780790552, "loss": 0.9829, "step": 16334 }, { "epoch": 0.41943699538291923, "grad_norm": 0.6328125, "learning_rate": 0.00017038241662421455, "loss": 0.8682, "step": 16335 }, { "epoch": 0.419462672578841, "grad_norm": 0.78515625, "learning_rate": 0.00017037924530025772, "loss": 0.9526, "step": 16336 }, { "epoch": 0.41948834977476285, "grad_norm": 0.75390625, "learning_rate": 0.00017037607383604147, "loss": 0.8139, "step": 16337 }, { "epoch": 0.4195140269706847, "grad_norm": 0.80859375, "learning_rate": 0.00017037290223157202, "loss": 1.0748, "step": 16338 }, { "epoch": 0.41953970416660646, "grad_norm": 0.796875, "learning_rate": 0.00017036973048685576, "loss": 0.9637, "step": 16339 }, { "epoch": 0.4195653813625283, "grad_norm": 0.71875, "learning_rate": 0.00017036655860189898, "loss": 0.9085, "step": 16340 }, { "epoch": 0.41959105855845014, "grad_norm": 0.828125, "learning_rate": 0.00017036338657670802, "loss": 0.9306, "step": 16341 }, { "epoch": 0.4196167357543719, "grad_norm": 0.8046875, "learning_rate": 0.00017036021441128917, "loss": 0.8945, "step": 16342 }, { "epoch": 0.41964241295029375, "grad_norm": 0.82421875, "learning_rate": 0.00017035704210564877, "loss": 0.9583, "step": 16343 }, { "epoch": 0.4196680901462156, "grad_norm": 0.71484375, "learning_rate": 0.00017035386965979309, "loss": 0.7837, "step": 16344 }, { "epoch": 0.4196937673421374, "grad_norm": 0.73828125, "learning_rate": 0.00017035069707372857, "loss": 0.999, "step": 16345 }, { "epoch": 0.4197194445380592, "grad_norm": 0.77734375, "learning_rate": 0.00017034752434746145, "loss": 0.8521, "step": 16346 }, { "epoch": 0.41974512173398104, "grad_norm": 0.80078125, "learning_rate": 0.00017034435148099804, "loss": 0.9226, "step": 16347 }, { "epoch": 0.4197707989299029, "grad_norm": 0.80078125, "learning_rate": 0.00017034117847434472, "loss": 0.9088, "step": 16348 }, { "epoch": 0.41979647612582466, "grad_norm": 0.78515625, "learning_rate": 0.0001703380053275078, "loss": 0.974, "step": 16349 }, { "epoch": 0.4198221533217465, "grad_norm": 0.71875, "learning_rate": 0.00017033483204049356, "loss": 0.8413, "step": 16350 }, { "epoch": 0.41984783051766833, "grad_norm": 0.8046875, "learning_rate": 0.00017033165861330836, "loss": 0.9116, "step": 16351 }, { "epoch": 0.4198735077135901, "grad_norm": 0.73046875, "learning_rate": 0.00017032848504595853, "loss": 0.8678, "step": 16352 }, { "epoch": 0.41989918490951195, "grad_norm": 0.7265625, "learning_rate": 0.0001703253113384504, "loss": 0.7922, "step": 16353 }, { "epoch": 0.4199248621054338, "grad_norm": 0.75390625, "learning_rate": 0.00017032213749079022, "loss": 0.8479, "step": 16354 }, { "epoch": 0.4199505393013556, "grad_norm": 0.7734375, "learning_rate": 0.0001703189635029844, "loss": 0.8281, "step": 16355 }, { "epoch": 0.4199762164972774, "grad_norm": 0.8046875, "learning_rate": 0.00017031578937503922, "loss": 0.9747, "step": 16356 }, { "epoch": 0.42000189369319924, "grad_norm": 0.7734375, "learning_rate": 0.00017031261510696104, "loss": 0.9386, "step": 16357 }, { "epoch": 0.42002757088912107, "grad_norm": 0.77734375, "learning_rate": 0.0001703094406987562, "loss": 0.9127, "step": 16358 }, { "epoch": 0.42005324808504285, "grad_norm": 0.75, "learning_rate": 0.00017030626615043094, "loss": 0.9441, "step": 16359 }, { "epoch": 0.4200789252809647, "grad_norm": 0.828125, "learning_rate": 0.0001703030914619917, "loss": 0.8393, "step": 16360 }, { "epoch": 0.4201046024768865, "grad_norm": 0.81640625, "learning_rate": 0.0001702999166334447, "loss": 0.933, "step": 16361 }, { "epoch": 0.4201302796728083, "grad_norm": 0.75390625, "learning_rate": 0.00017029674166479632, "loss": 1.033, "step": 16362 }, { "epoch": 0.42015595686873014, "grad_norm": 0.90234375, "learning_rate": 0.0001702935665560529, "loss": 1.0426, "step": 16363 }, { "epoch": 0.420181634064652, "grad_norm": 0.70703125, "learning_rate": 0.00017029039130722073, "loss": 0.8302, "step": 16364 }, { "epoch": 0.4202073112605738, "grad_norm": 0.80859375, "learning_rate": 0.00017028721591830622, "loss": 0.8898, "step": 16365 }, { "epoch": 0.4202329884564956, "grad_norm": 0.87109375, "learning_rate": 0.00017028404038931557, "loss": 0.9895, "step": 16366 }, { "epoch": 0.42025866565241743, "grad_norm": 1.265625, "learning_rate": 0.00017028086472025523, "loss": 0.8168, "step": 16367 }, { "epoch": 0.42028434284833927, "grad_norm": 0.703125, "learning_rate": 0.00017027768891113144, "loss": 0.8203, "step": 16368 }, { "epoch": 0.42031002004426105, "grad_norm": 0.7734375, "learning_rate": 0.00017027451296195055, "loss": 1.0004, "step": 16369 }, { "epoch": 0.4203356972401829, "grad_norm": 0.78515625, "learning_rate": 0.00017027133687271893, "loss": 0.911, "step": 16370 }, { "epoch": 0.4203613744361047, "grad_norm": 0.734375, "learning_rate": 0.00017026816064344286, "loss": 0.8438, "step": 16371 }, { "epoch": 0.4203870516320265, "grad_norm": 0.8046875, "learning_rate": 0.00017026498427412872, "loss": 0.8828, "step": 16372 }, { "epoch": 0.42041272882794833, "grad_norm": 0.78125, "learning_rate": 0.0001702618077647828, "loss": 0.924, "step": 16373 }, { "epoch": 0.42043840602387017, "grad_norm": 0.7421875, "learning_rate": 0.00017025863111541146, "loss": 0.7622, "step": 16374 }, { "epoch": 0.420464083219792, "grad_norm": 0.8046875, "learning_rate": 0.000170255454326021, "loss": 0.9474, "step": 16375 }, { "epoch": 0.4204897604157138, "grad_norm": 0.7734375, "learning_rate": 0.00017025227739661774, "loss": 1.0141, "step": 16376 }, { "epoch": 0.4205154376116356, "grad_norm": 0.74609375, "learning_rate": 0.00017024910032720808, "loss": 0.9538, "step": 16377 }, { "epoch": 0.42054111480755746, "grad_norm": 0.82421875, "learning_rate": 0.00017024592311779831, "loss": 0.9882, "step": 16378 }, { "epoch": 0.42056679200347924, "grad_norm": 0.76171875, "learning_rate": 0.00017024274576839474, "loss": 0.9206, "step": 16379 }, { "epoch": 0.4205924691994011, "grad_norm": 0.82421875, "learning_rate": 0.00017023956827900372, "loss": 0.9522, "step": 16380 }, { "epoch": 0.4206181463953229, "grad_norm": 0.70703125, "learning_rate": 0.00017023639064963158, "loss": 0.8515, "step": 16381 }, { "epoch": 0.4206438235912447, "grad_norm": 0.87890625, "learning_rate": 0.00017023321288028468, "loss": 1.0007, "step": 16382 }, { "epoch": 0.42066950078716653, "grad_norm": 0.76953125, "learning_rate": 0.00017023003497096935, "loss": 0.831, "step": 16383 }, { "epoch": 0.42069517798308836, "grad_norm": 0.72265625, "learning_rate": 0.00017022685692169186, "loss": 0.8373, "step": 16384 }, { "epoch": 0.4207208551790102, "grad_norm": 0.77734375, "learning_rate": 0.0001702236787324586, "loss": 0.885, "step": 16385 }, { "epoch": 0.420746532374932, "grad_norm": 0.82421875, "learning_rate": 0.0001702205004032759, "loss": 0.8566, "step": 16386 }, { "epoch": 0.4207722095708538, "grad_norm": 0.8046875, "learning_rate": 0.0001702173219341501, "loss": 0.8579, "step": 16387 }, { "epoch": 0.42079788676677565, "grad_norm": 0.79296875, "learning_rate": 0.0001702141433250875, "loss": 0.9479, "step": 16388 }, { "epoch": 0.42082356396269743, "grad_norm": 0.890625, "learning_rate": 0.00017021096457609444, "loss": 0.8279, "step": 16389 }, { "epoch": 0.42084924115861927, "grad_norm": 0.8125, "learning_rate": 0.00017020778568717728, "loss": 0.9708, "step": 16390 }, { "epoch": 0.4208749183545411, "grad_norm": 0.80859375, "learning_rate": 0.00017020460665834235, "loss": 0.9783, "step": 16391 }, { "epoch": 0.4209005955504629, "grad_norm": 0.80859375, "learning_rate": 0.000170201427489596, "loss": 0.9018, "step": 16392 }, { "epoch": 0.4209262727463847, "grad_norm": 0.8046875, "learning_rate": 0.00017019824818094453, "loss": 0.9182, "step": 16393 }, { "epoch": 0.42095194994230656, "grad_norm": 0.7734375, "learning_rate": 0.00017019506873239428, "loss": 0.9086, "step": 16394 }, { "epoch": 0.4209776271382284, "grad_norm": 0.75, "learning_rate": 0.0001701918891439516, "loss": 0.9291, "step": 16395 }, { "epoch": 0.4210033043341502, "grad_norm": 0.7578125, "learning_rate": 0.00017018870941562284, "loss": 1.1117, "step": 16396 }, { "epoch": 0.421028981530072, "grad_norm": 0.875, "learning_rate": 0.00017018552954741433, "loss": 1.0541, "step": 16397 }, { "epoch": 0.42105465872599385, "grad_norm": 0.75, "learning_rate": 0.00017018234953933235, "loss": 0.839, "step": 16398 }, { "epoch": 0.4210803359219156, "grad_norm": 1.0390625, "learning_rate": 0.0001701791693913833, "loss": 1.0274, "step": 16399 }, { "epoch": 0.42110601311783746, "grad_norm": 0.8359375, "learning_rate": 0.00017017598910357354, "loss": 1.0958, "step": 16400 }, { "epoch": 0.4211316903137593, "grad_norm": 0.8046875, "learning_rate": 0.0001701728086759094, "loss": 1.0269, "step": 16401 }, { "epoch": 0.4211573675096811, "grad_norm": 0.77734375, "learning_rate": 0.00017016962810839715, "loss": 0.9001, "step": 16402 }, { "epoch": 0.4211830447056029, "grad_norm": 0.8515625, "learning_rate": 0.00017016644740104316, "loss": 0.9838, "step": 16403 }, { "epoch": 0.42120872190152475, "grad_norm": 0.7578125, "learning_rate": 0.00017016326655385378, "loss": 1.0876, "step": 16404 }, { "epoch": 0.4212343990974466, "grad_norm": 0.8515625, "learning_rate": 0.00017016008556683537, "loss": 0.8961, "step": 16405 }, { "epoch": 0.42126007629336837, "grad_norm": 0.71484375, "learning_rate": 0.00017015690443999423, "loss": 0.7484, "step": 16406 }, { "epoch": 0.4212857534892902, "grad_norm": 0.78125, "learning_rate": 0.00017015372317333668, "loss": 1.0077, "step": 16407 }, { "epoch": 0.42131143068521204, "grad_norm": 0.828125, "learning_rate": 0.00017015054176686918, "loss": 1.0773, "step": 16408 }, { "epoch": 0.4213371078811338, "grad_norm": 0.7421875, "learning_rate": 0.00017014736022059792, "loss": 0.806, "step": 16409 }, { "epoch": 0.42136278507705566, "grad_norm": 0.78125, "learning_rate": 0.00017014417853452934, "loss": 0.8399, "step": 16410 }, { "epoch": 0.4213884622729775, "grad_norm": 0.84375, "learning_rate": 0.00017014099670866975, "loss": 0.9112, "step": 16411 }, { "epoch": 0.4214141394688993, "grad_norm": 0.73828125, "learning_rate": 0.00017013781474302546, "loss": 0.9203, "step": 16412 }, { "epoch": 0.4214398166648211, "grad_norm": 0.76953125, "learning_rate": 0.00017013463263760285, "loss": 0.8776, "step": 16413 }, { "epoch": 0.42146549386074295, "grad_norm": 0.8984375, "learning_rate": 0.00017013145039240828, "loss": 1.0359, "step": 16414 }, { "epoch": 0.4214911710566648, "grad_norm": 0.8046875, "learning_rate": 0.00017012826800744804, "loss": 0.8343, "step": 16415 }, { "epoch": 0.42151684825258656, "grad_norm": 0.734375, "learning_rate": 0.0001701250854827285, "loss": 0.8234, "step": 16416 }, { "epoch": 0.4215425254485084, "grad_norm": 0.7734375, "learning_rate": 0.000170121902818256, "loss": 0.8829, "step": 16417 }, { "epoch": 0.42156820264443023, "grad_norm": 0.8359375, "learning_rate": 0.0001701187200140369, "loss": 0.9559, "step": 16418 }, { "epoch": 0.421593879840352, "grad_norm": 0.828125, "learning_rate": 0.0001701155370700775, "loss": 1.0021, "step": 16419 }, { "epoch": 0.42161955703627385, "grad_norm": 0.83984375, "learning_rate": 0.00017011235398638416, "loss": 0.8658, "step": 16420 }, { "epoch": 0.4216452342321957, "grad_norm": 0.71875, "learning_rate": 0.00017010917076296325, "loss": 0.8434, "step": 16421 }, { "epoch": 0.42167091142811747, "grad_norm": 0.80078125, "learning_rate": 0.0001701059873998211, "loss": 0.9269, "step": 16422 }, { "epoch": 0.4216965886240393, "grad_norm": 0.78515625, "learning_rate": 0.00017010280389696404, "loss": 1.0193, "step": 16423 }, { "epoch": 0.42172226581996114, "grad_norm": 0.77734375, "learning_rate": 0.00017009962025439843, "loss": 0.9271, "step": 16424 }, { "epoch": 0.421747943015883, "grad_norm": 1.3046875, "learning_rate": 0.0001700964364721306, "loss": 1.0153, "step": 16425 }, { "epoch": 0.42177362021180476, "grad_norm": 0.72265625, "learning_rate": 0.00017009325255016694, "loss": 0.9513, "step": 16426 }, { "epoch": 0.4217992974077266, "grad_norm": 0.74609375, "learning_rate": 0.0001700900684885137, "loss": 0.8593, "step": 16427 }, { "epoch": 0.42182497460364843, "grad_norm": 0.85546875, "learning_rate": 0.00017008688428717732, "loss": 0.9151, "step": 16428 }, { "epoch": 0.4218506517995702, "grad_norm": 0.76171875, "learning_rate": 0.0001700836999461641, "loss": 0.9912, "step": 16429 }, { "epoch": 0.42187632899549204, "grad_norm": 0.83203125, "learning_rate": 0.0001700805154654804, "loss": 1.0193, "step": 16430 }, { "epoch": 0.4219020061914139, "grad_norm": 0.79296875, "learning_rate": 0.00017007733084513257, "loss": 0.9992, "step": 16431 }, { "epoch": 0.42192768338733566, "grad_norm": 0.83984375, "learning_rate": 0.00017007414608512694, "loss": 1.0165, "step": 16432 }, { "epoch": 0.4219533605832575, "grad_norm": 0.75, "learning_rate": 0.00017007096118546987, "loss": 0.9825, "step": 16433 }, { "epoch": 0.42197903777917933, "grad_norm": 0.8046875, "learning_rate": 0.00017006777614616772, "loss": 0.8446, "step": 16434 }, { "epoch": 0.42200471497510117, "grad_norm": 0.71484375, "learning_rate": 0.0001700645909672268, "loss": 0.7806, "step": 16435 }, { "epoch": 0.42203039217102295, "grad_norm": 0.74609375, "learning_rate": 0.00017006140564865347, "loss": 0.831, "step": 16436 }, { "epoch": 0.4220560693669448, "grad_norm": 0.75, "learning_rate": 0.00017005822019045412, "loss": 0.9255, "step": 16437 }, { "epoch": 0.4220817465628666, "grad_norm": 0.7265625, "learning_rate": 0.00017005503459263506, "loss": 0.798, "step": 16438 }, { "epoch": 0.4221074237587884, "grad_norm": 0.85546875, "learning_rate": 0.0001700518488552026, "loss": 0.9074, "step": 16439 }, { "epoch": 0.42213310095471024, "grad_norm": 0.87109375, "learning_rate": 0.00017004866297816318, "loss": 0.8265, "step": 16440 }, { "epoch": 0.4221587781506321, "grad_norm": 0.7421875, "learning_rate": 0.00017004547696152308, "loss": 0.9809, "step": 16441 }, { "epoch": 0.42218445534655386, "grad_norm": 0.80078125, "learning_rate": 0.00017004229080528867, "loss": 0.9488, "step": 16442 }, { "epoch": 0.4222101325424757, "grad_norm": 0.78125, "learning_rate": 0.0001700391045094663, "loss": 0.937, "step": 16443 }, { "epoch": 0.4222358097383975, "grad_norm": 0.8203125, "learning_rate": 0.00017003591807406235, "loss": 0.9099, "step": 16444 }, { "epoch": 0.42226148693431936, "grad_norm": 0.8125, "learning_rate": 0.00017003273149908312, "loss": 0.888, "step": 16445 }, { "epoch": 0.42228716413024114, "grad_norm": 0.75390625, "learning_rate": 0.00017002954478453498, "loss": 0.8946, "step": 16446 }, { "epoch": 0.422312841326163, "grad_norm": 0.76953125, "learning_rate": 0.0001700263579304243, "loss": 0.9213, "step": 16447 }, { "epoch": 0.4223385185220848, "grad_norm": 0.7578125, "learning_rate": 0.0001700231709367574, "loss": 0.8609, "step": 16448 }, { "epoch": 0.4223641957180066, "grad_norm": 0.83984375, "learning_rate": 0.00017001998380354063, "loss": 1.0231, "step": 16449 }, { "epoch": 0.42238987291392843, "grad_norm": 0.80078125, "learning_rate": 0.0001700167965307804, "loss": 0.8795, "step": 16450 }, { "epoch": 0.42241555010985027, "grad_norm": 0.734375, "learning_rate": 0.00017001360911848297, "loss": 0.8929, "step": 16451 }, { "epoch": 0.42244122730577205, "grad_norm": 0.76171875, "learning_rate": 0.00017001042156665477, "loss": 0.822, "step": 16452 }, { "epoch": 0.4224669045016939, "grad_norm": 0.765625, "learning_rate": 0.00017000723387530214, "loss": 1.0431, "step": 16453 }, { "epoch": 0.4224925816976157, "grad_norm": 0.796875, "learning_rate": 0.0001700040460444314, "loss": 0.9592, "step": 16454 }, { "epoch": 0.42251825889353756, "grad_norm": 0.8125, "learning_rate": 0.0001700008580740489, "loss": 0.9941, "step": 16455 }, { "epoch": 0.42254393608945934, "grad_norm": 0.828125, "learning_rate": 0.000169997669964161, "loss": 0.962, "step": 16456 }, { "epoch": 0.4225696132853812, "grad_norm": 0.72265625, "learning_rate": 0.0001699944817147741, "loss": 0.8255, "step": 16457 }, { "epoch": 0.422595290481303, "grad_norm": 0.7109375, "learning_rate": 0.00016999129332589453, "loss": 0.8286, "step": 16458 }, { "epoch": 0.4226209676772248, "grad_norm": 0.71484375, "learning_rate": 0.0001699881047975286, "loss": 1.0007, "step": 16459 }, { "epoch": 0.4226466448731466, "grad_norm": 0.74609375, "learning_rate": 0.00016998491612968273, "loss": 0.7736, "step": 16460 }, { "epoch": 0.42267232206906846, "grad_norm": 0.85546875, "learning_rate": 0.00016998172732236324, "loss": 1.0264, "step": 16461 }, { "epoch": 0.42269799926499024, "grad_norm": 0.69140625, "learning_rate": 0.00016997853837557648, "loss": 0.7714, "step": 16462 }, { "epoch": 0.4227236764609121, "grad_norm": 0.75390625, "learning_rate": 0.0001699753492893288, "loss": 1.0705, "step": 16463 }, { "epoch": 0.4227493536568339, "grad_norm": 0.7265625, "learning_rate": 0.00016997216006362658, "loss": 0.8652, "step": 16464 }, { "epoch": 0.42277503085275575, "grad_norm": 0.78125, "learning_rate": 0.00016996897069847619, "loss": 0.9307, "step": 16465 }, { "epoch": 0.42280070804867753, "grad_norm": 0.77734375, "learning_rate": 0.00016996578119388394, "loss": 0.9459, "step": 16466 }, { "epoch": 0.42282638524459937, "grad_norm": 0.7578125, "learning_rate": 0.0001699625915498562, "loss": 0.9682, "step": 16467 }, { "epoch": 0.4228520624405212, "grad_norm": 0.74609375, "learning_rate": 0.00016995940176639935, "loss": 0.7914, "step": 16468 }, { "epoch": 0.422877739636443, "grad_norm": 0.79296875, "learning_rate": 0.00016995621184351972, "loss": 0.995, "step": 16469 }, { "epoch": 0.4229034168323648, "grad_norm": 0.8359375, "learning_rate": 0.0001699530217812237, "loss": 0.8662, "step": 16470 }, { "epoch": 0.42292909402828666, "grad_norm": 0.7421875, "learning_rate": 0.00016994983157951762, "loss": 0.8694, "step": 16471 }, { "epoch": 0.42295477122420844, "grad_norm": 0.76953125, "learning_rate": 0.00016994664123840783, "loss": 0.7724, "step": 16472 }, { "epoch": 0.4229804484201303, "grad_norm": 0.7265625, "learning_rate": 0.00016994345075790071, "loss": 0.7309, "step": 16473 }, { "epoch": 0.4230061256160521, "grad_norm": 0.828125, "learning_rate": 0.0001699402601380026, "loss": 0.9572, "step": 16474 }, { "epoch": 0.42303180281197394, "grad_norm": 0.80859375, "learning_rate": 0.0001699370693787199, "loss": 0.926, "step": 16475 }, { "epoch": 0.4230574800078957, "grad_norm": 0.73828125, "learning_rate": 0.0001699338784800589, "loss": 0.9315, "step": 16476 }, { "epoch": 0.42308315720381756, "grad_norm": 0.73828125, "learning_rate": 0.00016993068744202601, "loss": 0.7945, "step": 16477 }, { "epoch": 0.4231088343997394, "grad_norm": 0.7421875, "learning_rate": 0.00016992749626462762, "loss": 0.8391, "step": 16478 }, { "epoch": 0.4231345115956612, "grad_norm": 0.77734375, "learning_rate": 0.00016992430494786997, "loss": 0.9043, "step": 16479 }, { "epoch": 0.423160188791583, "grad_norm": 0.75, "learning_rate": 0.00016992111349175954, "loss": 0.9161, "step": 16480 }, { "epoch": 0.42318586598750485, "grad_norm": 0.78125, "learning_rate": 0.00016991792189630266, "loss": 1.0186, "step": 16481 }, { "epoch": 0.42321154318342663, "grad_norm": 0.75390625, "learning_rate": 0.00016991473016150563, "loss": 1.0235, "step": 16482 }, { "epoch": 0.42323722037934847, "grad_norm": 0.7578125, "learning_rate": 0.0001699115382873749, "loss": 0.9262, "step": 16483 }, { "epoch": 0.4232628975752703, "grad_norm": 0.703125, "learning_rate": 0.00016990834627391677, "loss": 0.8881, "step": 16484 }, { "epoch": 0.42328857477119214, "grad_norm": 0.72265625, "learning_rate": 0.0001699051541211376, "loss": 0.8849, "step": 16485 }, { "epoch": 0.4233142519671139, "grad_norm": 0.7421875, "learning_rate": 0.00016990196182904385, "loss": 0.876, "step": 16486 }, { "epoch": 0.42333992916303576, "grad_norm": 0.83203125, "learning_rate": 0.0001698987693976417, "loss": 1.0166, "step": 16487 }, { "epoch": 0.4233656063589576, "grad_norm": 0.84765625, "learning_rate": 0.00016989557682693768, "loss": 0.8897, "step": 16488 }, { "epoch": 0.42339128355487937, "grad_norm": 0.7578125, "learning_rate": 0.0001698923841169381, "loss": 0.985, "step": 16489 }, { "epoch": 0.4234169607508012, "grad_norm": 0.7734375, "learning_rate": 0.0001698891912676493, "loss": 0.9812, "step": 16490 }, { "epoch": 0.42344263794672304, "grad_norm": 0.79296875, "learning_rate": 0.00016988599827907764, "loss": 0.921, "step": 16491 }, { "epoch": 0.4234683151426448, "grad_norm": 0.828125, "learning_rate": 0.00016988280515122949, "loss": 0.8267, "step": 16492 }, { "epoch": 0.42349399233856666, "grad_norm": 0.8828125, "learning_rate": 0.00016987961188411123, "loss": 0.9553, "step": 16493 }, { "epoch": 0.4235196695344885, "grad_norm": 0.765625, "learning_rate": 0.0001698764184777292, "loss": 1.0768, "step": 16494 }, { "epoch": 0.42354534673041033, "grad_norm": 0.7578125, "learning_rate": 0.00016987322493208982, "loss": 0.8664, "step": 16495 }, { "epoch": 0.4235710239263321, "grad_norm": 0.73828125, "learning_rate": 0.0001698700312471994, "loss": 1.0457, "step": 16496 }, { "epoch": 0.42359670112225395, "grad_norm": 0.83203125, "learning_rate": 0.0001698668374230643, "loss": 0.9883, "step": 16497 }, { "epoch": 0.4236223783181758, "grad_norm": 0.72265625, "learning_rate": 0.00016986364345969094, "loss": 0.8231, "step": 16498 }, { "epoch": 0.42364805551409757, "grad_norm": 0.796875, "learning_rate": 0.00016986044935708562, "loss": 0.733, "step": 16499 }, { "epoch": 0.4236737327100194, "grad_norm": 0.734375, "learning_rate": 0.00016985725511525472, "loss": 0.9047, "step": 16500 }, { "epoch": 0.42369940990594124, "grad_norm": 0.72265625, "learning_rate": 0.00016985406073420466, "loss": 0.7388, "step": 16501 }, { "epoch": 0.423725087101863, "grad_norm": 0.73828125, "learning_rate": 0.00016985086621394176, "loss": 0.85, "step": 16502 }, { "epoch": 0.42375076429778485, "grad_norm": 0.69921875, "learning_rate": 0.0001698476715544724, "loss": 0.7339, "step": 16503 }, { "epoch": 0.4237764414937067, "grad_norm": 0.82421875, "learning_rate": 0.0001698444767558029, "loss": 0.8512, "step": 16504 }, { "epoch": 0.4238021186896285, "grad_norm": 0.765625, "learning_rate": 0.0001698412818179397, "loss": 0.8133, "step": 16505 }, { "epoch": 0.4238277958855503, "grad_norm": 0.71484375, "learning_rate": 0.0001698380867408891, "loss": 0.9459, "step": 16506 }, { "epoch": 0.42385347308147214, "grad_norm": 0.8359375, "learning_rate": 0.0001698348915246575, "loss": 0.9294, "step": 16507 }, { "epoch": 0.423879150277394, "grad_norm": 0.73828125, "learning_rate": 0.00016983169616925135, "loss": 0.9081, "step": 16508 }, { "epoch": 0.42390482747331576, "grad_norm": 0.79296875, "learning_rate": 0.00016982850067467686, "loss": 1.0489, "step": 16509 }, { "epoch": 0.4239305046692376, "grad_norm": 0.75390625, "learning_rate": 0.0001698253050409405, "loss": 0.9693, "step": 16510 }, { "epoch": 0.42395618186515943, "grad_norm": 0.7421875, "learning_rate": 0.00016982210926804863, "loss": 0.8291, "step": 16511 }, { "epoch": 0.4239818590610812, "grad_norm": 0.76953125, "learning_rate": 0.00016981891335600757, "loss": 0.988, "step": 16512 }, { "epoch": 0.42400753625700305, "grad_norm": 0.7109375, "learning_rate": 0.00016981571730482372, "loss": 0.9074, "step": 16513 }, { "epoch": 0.4240332134529249, "grad_norm": 0.7578125, "learning_rate": 0.00016981252111450347, "loss": 0.9201, "step": 16514 }, { "epoch": 0.4240588906488467, "grad_norm": 0.78125, "learning_rate": 0.00016980932478505317, "loss": 0.8825, "step": 16515 }, { "epoch": 0.4240845678447685, "grad_norm": 0.8203125, "learning_rate": 0.0001698061283164792, "loss": 0.9709, "step": 16516 }, { "epoch": 0.42411024504069034, "grad_norm": 0.81640625, "learning_rate": 0.0001698029317087879, "loss": 0.8919, "step": 16517 }, { "epoch": 0.4241359222366122, "grad_norm": 0.78515625, "learning_rate": 0.0001697997349619857, "loss": 1.03, "step": 16518 }, { "epoch": 0.42416159943253395, "grad_norm": 0.73828125, "learning_rate": 0.00016979653807607892, "loss": 0.8034, "step": 16519 }, { "epoch": 0.4241872766284558, "grad_norm": 0.79296875, "learning_rate": 0.00016979334105107392, "loss": 0.9204, "step": 16520 }, { "epoch": 0.4242129538243776, "grad_norm": 0.7734375, "learning_rate": 0.0001697901438869771, "loss": 0.9842, "step": 16521 }, { "epoch": 0.4242386310202994, "grad_norm": 0.81640625, "learning_rate": 0.00016978694658379486, "loss": 0.9223, "step": 16522 }, { "epoch": 0.42426430821622124, "grad_norm": 0.76171875, "learning_rate": 0.00016978374914153349, "loss": 0.9501, "step": 16523 }, { "epoch": 0.4242899854121431, "grad_norm": 0.78515625, "learning_rate": 0.00016978055156019944, "loss": 0.9591, "step": 16524 }, { "epoch": 0.42431566260806486, "grad_norm": 0.8359375, "learning_rate": 0.00016977735383979907, "loss": 1.1126, "step": 16525 }, { "epoch": 0.4243413398039867, "grad_norm": 0.7265625, "learning_rate": 0.0001697741559803387, "loss": 0.9079, "step": 16526 }, { "epoch": 0.42436701699990853, "grad_norm": 0.75, "learning_rate": 0.00016977095798182478, "loss": 0.8685, "step": 16527 }, { "epoch": 0.42439269419583037, "grad_norm": 0.83203125, "learning_rate": 0.0001697677598442636, "loss": 0.8397, "step": 16528 }, { "epoch": 0.42441837139175215, "grad_norm": 0.7421875, "learning_rate": 0.0001697645615676616, "loss": 0.859, "step": 16529 }, { "epoch": 0.424444048587674, "grad_norm": 0.80078125, "learning_rate": 0.00016976136315202515, "loss": 0.9997, "step": 16530 }, { "epoch": 0.4244697257835958, "grad_norm": 0.71875, "learning_rate": 0.00016975816459736059, "loss": 1.0516, "step": 16531 }, { "epoch": 0.4244954029795176, "grad_norm": 0.73828125, "learning_rate": 0.0001697549659036743, "loss": 0.8652, "step": 16532 }, { "epoch": 0.42452108017543944, "grad_norm": 0.7265625, "learning_rate": 0.00016975176707097265, "loss": 1.0186, "step": 16533 }, { "epoch": 0.42454675737136127, "grad_norm": 0.7734375, "learning_rate": 0.00016974856809926205, "loss": 0.9332, "step": 16534 }, { "epoch": 0.42457243456728305, "grad_norm": 0.7890625, "learning_rate": 0.00016974536898854884, "loss": 0.8746, "step": 16535 }, { "epoch": 0.4245981117632049, "grad_norm": 0.796875, "learning_rate": 0.00016974216973883945, "loss": 0.9941, "step": 16536 }, { "epoch": 0.4246237889591267, "grad_norm": 0.7109375, "learning_rate": 0.00016973897035014017, "loss": 0.8958, "step": 16537 }, { "epoch": 0.42464946615504856, "grad_norm": 0.7578125, "learning_rate": 0.00016973577082245745, "loss": 0.8641, "step": 16538 }, { "epoch": 0.42467514335097034, "grad_norm": 0.76171875, "learning_rate": 0.00016973257115579762, "loss": 0.9277, "step": 16539 }, { "epoch": 0.4247008205468922, "grad_norm": 0.76953125, "learning_rate": 0.00016972937135016706, "loss": 0.9299, "step": 16540 }, { "epoch": 0.424726497742814, "grad_norm": 0.80859375, "learning_rate": 0.0001697261714055722, "loss": 1.0229, "step": 16541 }, { "epoch": 0.4247521749387358, "grad_norm": 0.82421875, "learning_rate": 0.00016972297132201937, "loss": 0.9137, "step": 16542 }, { "epoch": 0.42477785213465763, "grad_norm": 0.7734375, "learning_rate": 0.00016971977109951495, "loss": 0.9848, "step": 16543 }, { "epoch": 0.42480352933057947, "grad_norm": 0.765625, "learning_rate": 0.00016971657073806527, "loss": 0.8387, "step": 16544 }, { "epoch": 0.42482920652650125, "grad_norm": 0.765625, "learning_rate": 0.00016971337023767683, "loss": 0.8411, "step": 16545 }, { "epoch": 0.4248548837224231, "grad_norm": 0.7578125, "learning_rate": 0.00016971016959835592, "loss": 0.8816, "step": 16546 }, { "epoch": 0.4248805609183449, "grad_norm": 0.91796875, "learning_rate": 0.00016970696882010895, "loss": 1.0007, "step": 16547 }, { "epoch": 0.42490623811426675, "grad_norm": 0.78125, "learning_rate": 0.00016970376790294227, "loss": 0.936, "step": 16548 }, { "epoch": 0.42493191531018853, "grad_norm": 0.7734375, "learning_rate": 0.00016970056684686231, "loss": 0.9817, "step": 16549 }, { "epoch": 0.42495759250611037, "grad_norm": 0.7421875, "learning_rate": 0.0001696973656518754, "loss": 0.836, "step": 16550 }, { "epoch": 0.4249832697020322, "grad_norm": 1.265625, "learning_rate": 0.0001696941643179879, "loss": 0.9467, "step": 16551 }, { "epoch": 0.425008946897954, "grad_norm": 0.80078125, "learning_rate": 0.00016969096284520627, "loss": 0.9114, "step": 16552 }, { "epoch": 0.4250346240938758, "grad_norm": 0.859375, "learning_rate": 0.00016968776123353683, "loss": 1.025, "step": 16553 }, { "epoch": 0.42506030128979766, "grad_norm": 0.88671875, "learning_rate": 0.00016968455948298595, "loss": 0.9381, "step": 16554 }, { "epoch": 0.42508597848571944, "grad_norm": 0.74609375, "learning_rate": 0.00016968135759356007, "loss": 0.9101, "step": 16555 }, { "epoch": 0.4251116556816413, "grad_norm": 0.77734375, "learning_rate": 0.00016967815556526557, "loss": 0.9118, "step": 16556 }, { "epoch": 0.4251373328775631, "grad_norm": 0.74609375, "learning_rate": 0.00016967495339810875, "loss": 0.8974, "step": 16557 }, { "epoch": 0.42516301007348495, "grad_norm": 0.765625, "learning_rate": 0.00016967175109209603, "loss": 0.862, "step": 16558 }, { "epoch": 0.42518868726940673, "grad_norm": 0.796875, "learning_rate": 0.00016966854864723386, "loss": 0.9194, "step": 16559 }, { "epoch": 0.42521436446532856, "grad_norm": 0.7734375, "learning_rate": 0.0001696653460635285, "loss": 0.9047, "step": 16560 }, { "epoch": 0.4252400416612504, "grad_norm": 0.79296875, "learning_rate": 0.00016966214334098644, "loss": 0.8528, "step": 16561 }, { "epoch": 0.4252657188571722, "grad_norm": 0.93359375, "learning_rate": 0.00016965894047961402, "loss": 0.913, "step": 16562 }, { "epoch": 0.425291396053094, "grad_norm": 0.91796875, "learning_rate": 0.0001696557374794176, "loss": 0.9978, "step": 16563 }, { "epoch": 0.42531707324901585, "grad_norm": 0.7734375, "learning_rate": 0.0001696525343404036, "loss": 0.8477, "step": 16564 }, { "epoch": 0.42534275044493763, "grad_norm": 0.74609375, "learning_rate": 0.0001696493310625784, "loss": 0.8822, "step": 16565 }, { "epoch": 0.42536842764085947, "grad_norm": 0.734375, "learning_rate": 0.00016964612764594839, "loss": 0.9514, "step": 16566 }, { "epoch": 0.4253941048367813, "grad_norm": 0.71875, "learning_rate": 0.0001696429240905199, "loss": 0.8769, "step": 16567 }, { "epoch": 0.42541978203270314, "grad_norm": 0.828125, "learning_rate": 0.00016963972039629938, "loss": 0.9244, "step": 16568 }, { "epoch": 0.4254454592286249, "grad_norm": 0.77734375, "learning_rate": 0.00016963651656329317, "loss": 0.9679, "step": 16569 }, { "epoch": 0.42547113642454676, "grad_norm": 0.71484375, "learning_rate": 0.0001696333125915077, "loss": 0.8667, "step": 16570 }, { "epoch": 0.4254968136204686, "grad_norm": 0.91015625, "learning_rate": 0.00016963010848094928, "loss": 1.0267, "step": 16571 }, { "epoch": 0.4255224908163904, "grad_norm": 0.83984375, "learning_rate": 0.0001696269042316244, "loss": 1.069, "step": 16572 }, { "epoch": 0.4255481680123122, "grad_norm": 0.734375, "learning_rate": 0.00016962369984353933, "loss": 0.7694, "step": 16573 }, { "epoch": 0.42557384520823405, "grad_norm": 0.8125, "learning_rate": 0.00016962049531670057, "loss": 0.8445, "step": 16574 }, { "epoch": 0.42559952240415583, "grad_norm": 0.8203125, "learning_rate": 0.00016961729065111442, "loss": 1.0251, "step": 16575 }, { "epoch": 0.42562519960007766, "grad_norm": 1.1015625, "learning_rate": 0.0001696140858467873, "loss": 0.8972, "step": 16576 }, { "epoch": 0.4256508767959995, "grad_norm": 0.77734375, "learning_rate": 0.0001696108809037256, "loss": 0.8285, "step": 16577 }, { "epoch": 0.42567655399192134, "grad_norm": 0.83984375, "learning_rate": 0.00016960767582193572, "loss": 0.9471, "step": 16578 }, { "epoch": 0.4257022311878431, "grad_norm": 0.8203125, "learning_rate": 0.00016960447060142402, "loss": 0.9341, "step": 16579 }, { "epoch": 0.42572790838376495, "grad_norm": 0.7421875, "learning_rate": 0.00016960126524219688, "loss": 0.9742, "step": 16580 }, { "epoch": 0.4257535855796868, "grad_norm": 0.80859375, "learning_rate": 0.00016959805974426072, "loss": 1.0294, "step": 16581 }, { "epoch": 0.42577926277560857, "grad_norm": 0.796875, "learning_rate": 0.00016959485410762194, "loss": 0.8285, "step": 16582 }, { "epoch": 0.4258049399715304, "grad_norm": 0.86328125, "learning_rate": 0.00016959164833228683, "loss": 0.8527, "step": 16583 }, { "epoch": 0.42583061716745224, "grad_norm": 0.7734375, "learning_rate": 0.00016958844241826194, "loss": 0.9971, "step": 16584 }, { "epoch": 0.425856294363374, "grad_norm": 0.74609375, "learning_rate": 0.0001695852363655535, "loss": 0.8203, "step": 16585 }, { "epoch": 0.42588197155929586, "grad_norm": 0.8046875, "learning_rate": 0.000169582030174168, "loss": 0.9344, "step": 16586 }, { "epoch": 0.4259076487552177, "grad_norm": 0.76953125, "learning_rate": 0.00016957882384411178, "loss": 0.9114, "step": 16587 }, { "epoch": 0.42593332595113953, "grad_norm": 0.734375, "learning_rate": 0.00016957561737539125, "loss": 0.8691, "step": 16588 }, { "epoch": 0.4259590031470613, "grad_norm": 0.77734375, "learning_rate": 0.00016957241076801283, "loss": 1.047, "step": 16589 }, { "epoch": 0.42598468034298315, "grad_norm": 0.8046875, "learning_rate": 0.00016956920402198288, "loss": 1.0524, "step": 16590 }, { "epoch": 0.426010357538905, "grad_norm": 0.734375, "learning_rate": 0.00016956599713730778, "loss": 0.9674, "step": 16591 }, { "epoch": 0.42603603473482676, "grad_norm": 0.7421875, "learning_rate": 0.0001695627901139939, "loss": 1.0212, "step": 16592 }, { "epoch": 0.4260617119307486, "grad_norm": 0.80078125, "learning_rate": 0.00016955958295204767, "loss": 0.8536, "step": 16593 }, { "epoch": 0.42608738912667043, "grad_norm": 0.81640625, "learning_rate": 0.0001695563756514755, "loss": 0.9306, "step": 16594 }, { "epoch": 0.4261130663225922, "grad_norm": 0.79296875, "learning_rate": 0.00016955316821228375, "loss": 0.9181, "step": 16595 }, { "epoch": 0.42613874351851405, "grad_norm": 0.94921875, "learning_rate": 0.00016954996063447882, "loss": 0.9397, "step": 16596 }, { "epoch": 0.4261644207144359, "grad_norm": 0.734375, "learning_rate": 0.00016954675291806708, "loss": 0.8586, "step": 16597 }, { "epoch": 0.4261900979103577, "grad_norm": 0.7734375, "learning_rate": 0.00016954354506305497, "loss": 0.8688, "step": 16598 }, { "epoch": 0.4262157751062795, "grad_norm": 0.70703125, "learning_rate": 0.00016954033706944883, "loss": 0.8943, "step": 16599 }, { "epoch": 0.42624145230220134, "grad_norm": 0.8515625, "learning_rate": 0.00016953712893725512, "loss": 0.8771, "step": 16600 }, { "epoch": 0.4262671294981232, "grad_norm": 0.7109375, "learning_rate": 0.00016953392066648015, "loss": 0.8733, "step": 16601 }, { "epoch": 0.42629280669404496, "grad_norm": 0.73828125, "learning_rate": 0.0001695307122571304, "loss": 0.8868, "step": 16602 }, { "epoch": 0.4263184838899668, "grad_norm": 0.7265625, "learning_rate": 0.0001695275037092122, "loss": 0.9743, "step": 16603 }, { "epoch": 0.42634416108588863, "grad_norm": 0.83984375, "learning_rate": 0.00016952429502273194, "loss": 0.8911, "step": 16604 }, { "epoch": 0.4263698382818104, "grad_norm": 0.74609375, "learning_rate": 0.00016952108619769607, "loss": 0.9243, "step": 16605 }, { "epoch": 0.42639551547773225, "grad_norm": 0.796875, "learning_rate": 0.00016951787723411098, "loss": 0.8246, "step": 16606 }, { "epoch": 0.4264211926736541, "grad_norm": 0.69921875, "learning_rate": 0.00016951466813198298, "loss": 0.9481, "step": 16607 }, { "epoch": 0.4264468698695759, "grad_norm": 0.703125, "learning_rate": 0.00016951145889131856, "loss": 0.8373, "step": 16608 }, { "epoch": 0.4264725470654977, "grad_norm": 0.78125, "learning_rate": 0.00016950824951212406, "loss": 0.9742, "step": 16609 }, { "epoch": 0.42649822426141953, "grad_norm": 0.8046875, "learning_rate": 0.00016950503999440595, "loss": 0.8196, "step": 16610 }, { "epoch": 0.42652390145734137, "grad_norm": 0.80859375, "learning_rate": 0.0001695018303381705, "loss": 0.8982, "step": 16611 }, { "epoch": 0.42654957865326315, "grad_norm": 0.80859375, "learning_rate": 0.00016949862054342423, "loss": 0.8764, "step": 16612 }, { "epoch": 0.426575255849185, "grad_norm": 0.74609375, "learning_rate": 0.00016949541061017347, "loss": 0.8767, "step": 16613 }, { "epoch": 0.4266009330451068, "grad_norm": 0.87890625, "learning_rate": 0.00016949220053842466, "loss": 0.9711, "step": 16614 }, { "epoch": 0.4266266102410286, "grad_norm": 0.8828125, "learning_rate": 0.00016948899032818412, "loss": 1.0237, "step": 16615 }, { "epoch": 0.42665228743695044, "grad_norm": 0.7421875, "learning_rate": 0.00016948577997945833, "loss": 0.9131, "step": 16616 }, { "epoch": 0.4266779646328723, "grad_norm": 0.78125, "learning_rate": 0.00016948256949225366, "loss": 0.9279, "step": 16617 }, { "epoch": 0.4267036418287941, "grad_norm": 0.78515625, "learning_rate": 0.0001694793588665765, "loss": 0.9213, "step": 16618 }, { "epoch": 0.4267293190247159, "grad_norm": 0.74609375, "learning_rate": 0.00016947614810243326, "loss": 0.9529, "step": 16619 }, { "epoch": 0.42675499622063773, "grad_norm": 0.7890625, "learning_rate": 0.0001694729371998303, "loss": 0.9584, "step": 16620 }, { "epoch": 0.42678067341655956, "grad_norm": 0.828125, "learning_rate": 0.0001694697261587741, "loss": 0.8829, "step": 16621 }, { "epoch": 0.42680635061248134, "grad_norm": 0.75, "learning_rate": 0.00016946651497927099, "loss": 1.0193, "step": 16622 }, { "epoch": 0.4268320278084032, "grad_norm": 0.7890625, "learning_rate": 0.00016946330366132738, "loss": 0.8842, "step": 16623 }, { "epoch": 0.426857705004325, "grad_norm": 0.8203125, "learning_rate": 0.00016946009220494972, "loss": 0.7777, "step": 16624 }, { "epoch": 0.4268833822002468, "grad_norm": 0.80078125, "learning_rate": 0.00016945688061014433, "loss": 0.9346, "step": 16625 }, { "epoch": 0.42690905939616863, "grad_norm": 0.80078125, "learning_rate": 0.00016945366887691766, "loss": 0.9667, "step": 16626 }, { "epoch": 0.42693473659209047, "grad_norm": 0.77734375, "learning_rate": 0.0001694504570052761, "loss": 0.937, "step": 16627 }, { "epoch": 0.4269604137880123, "grad_norm": 0.79296875, "learning_rate": 0.00016944724499522607, "loss": 0.8742, "step": 16628 }, { "epoch": 0.4269860909839341, "grad_norm": 0.83203125, "learning_rate": 0.00016944403284677395, "loss": 0.8671, "step": 16629 }, { "epoch": 0.4270117681798559, "grad_norm": 0.82421875, "learning_rate": 0.00016944082055992614, "loss": 0.9711, "step": 16630 }, { "epoch": 0.42703744537577776, "grad_norm": 0.765625, "learning_rate": 0.00016943760813468902, "loss": 0.8654, "step": 16631 }, { "epoch": 0.42706312257169954, "grad_norm": 0.74609375, "learning_rate": 0.00016943439557106903, "loss": 0.7781, "step": 16632 }, { "epoch": 0.4270887997676214, "grad_norm": 0.77734375, "learning_rate": 0.00016943118286907258, "loss": 1.0118, "step": 16633 }, { "epoch": 0.4271144769635432, "grad_norm": 0.76953125, "learning_rate": 0.00016942797002870606, "loss": 1.1045, "step": 16634 }, { "epoch": 0.427140154159465, "grad_norm": 0.828125, "learning_rate": 0.00016942475704997585, "loss": 1.0329, "step": 16635 }, { "epoch": 0.4271658313553868, "grad_norm": 0.703125, "learning_rate": 0.00016942154393288837, "loss": 0.8545, "step": 16636 }, { "epoch": 0.42719150855130866, "grad_norm": 0.80078125, "learning_rate": 0.00016941833067745003, "loss": 0.8873, "step": 16637 }, { "epoch": 0.4272171857472305, "grad_norm": 0.765625, "learning_rate": 0.00016941511728366722, "loss": 0.8914, "step": 16638 }, { "epoch": 0.4272428629431523, "grad_norm": 0.796875, "learning_rate": 0.00016941190375154637, "loss": 0.7977, "step": 16639 }, { "epoch": 0.4272685401390741, "grad_norm": 0.8125, "learning_rate": 0.00016940869008109385, "loss": 1.039, "step": 16640 }, { "epoch": 0.42729421733499595, "grad_norm": 0.82421875, "learning_rate": 0.00016940547627231612, "loss": 0.9252, "step": 16641 }, { "epoch": 0.42731989453091773, "grad_norm": 0.80078125, "learning_rate": 0.00016940226232521946, "loss": 1.0602, "step": 16642 }, { "epoch": 0.42734557172683957, "grad_norm": 0.83203125, "learning_rate": 0.00016939904823981046, "loss": 0.8344, "step": 16643 }, { "epoch": 0.4273712489227614, "grad_norm": 0.78515625, "learning_rate": 0.00016939583401609536, "loss": 0.9669, "step": 16644 }, { "epoch": 0.4273969261186832, "grad_norm": 0.80078125, "learning_rate": 0.00016939261965408063, "loss": 0.9059, "step": 16645 }, { "epoch": 0.427422603314605, "grad_norm": 0.8203125, "learning_rate": 0.0001693894051537727, "loss": 0.9644, "step": 16646 }, { "epoch": 0.42744828051052686, "grad_norm": 0.7421875, "learning_rate": 0.00016938619051517795, "loss": 0.998, "step": 16647 }, { "epoch": 0.4274739577064487, "grad_norm": 0.7890625, "learning_rate": 0.0001693829757383028, "loss": 0.9615, "step": 16648 }, { "epoch": 0.4274996349023705, "grad_norm": 0.7890625, "learning_rate": 0.00016937976082315363, "loss": 0.8985, "step": 16649 }, { "epoch": 0.4275253120982923, "grad_norm": 0.81640625, "learning_rate": 0.00016937654576973686, "loss": 0.9979, "step": 16650 }, { "epoch": 0.42755098929421415, "grad_norm": 0.75, "learning_rate": 0.0001693733305780589, "loss": 0.9622, "step": 16651 }, { "epoch": 0.4275766664901359, "grad_norm": 0.77734375, "learning_rate": 0.0001693701152481262, "loss": 0.9799, "step": 16652 }, { "epoch": 0.42760234368605776, "grad_norm": 0.70703125, "learning_rate": 0.00016936689977994505, "loss": 0.8166, "step": 16653 }, { "epoch": 0.4276280208819796, "grad_norm": 0.8125, "learning_rate": 0.000169363684173522, "loss": 0.9749, "step": 16654 }, { "epoch": 0.4276536980779014, "grad_norm": 0.734375, "learning_rate": 0.0001693604684288634, "loss": 0.8464, "step": 16655 }, { "epoch": 0.4276793752738232, "grad_norm": 0.8359375, "learning_rate": 0.00016935725254597559, "loss": 0.8967, "step": 16656 }, { "epoch": 0.42770505246974505, "grad_norm": 0.73046875, "learning_rate": 0.0001693540365248651, "loss": 1.0252, "step": 16657 }, { "epoch": 0.4277307296656669, "grad_norm": 0.72265625, "learning_rate": 0.00016935082036553825, "loss": 0.8411, "step": 16658 }, { "epoch": 0.42775640686158867, "grad_norm": 0.72265625, "learning_rate": 0.00016934760406800146, "loss": 0.8063, "step": 16659 }, { "epoch": 0.4277820840575105, "grad_norm": 0.89453125, "learning_rate": 0.00016934438763226118, "loss": 0.9594, "step": 16660 }, { "epoch": 0.42780776125343234, "grad_norm": 0.83203125, "learning_rate": 0.00016934117105832383, "loss": 1.0534, "step": 16661 }, { "epoch": 0.4278334384493541, "grad_norm": 0.7265625, "learning_rate": 0.00016933795434619575, "loss": 0.9025, "step": 16662 }, { "epoch": 0.42785911564527596, "grad_norm": 0.71484375, "learning_rate": 0.0001693347374958834, "loss": 0.8231, "step": 16663 }, { "epoch": 0.4278847928411978, "grad_norm": 0.765625, "learning_rate": 0.00016933152050739318, "loss": 0.9605, "step": 16664 }, { "epoch": 0.4279104700371196, "grad_norm": 0.84765625, "learning_rate": 0.00016932830338073148, "loss": 0.8966, "step": 16665 }, { "epoch": 0.4279361472330414, "grad_norm": 0.78515625, "learning_rate": 0.00016932508611590478, "loss": 0.8325, "step": 16666 }, { "epoch": 0.42796182442896324, "grad_norm": 0.78515625, "learning_rate": 0.00016932186871291944, "loss": 0.9062, "step": 16667 }, { "epoch": 0.4279875016248851, "grad_norm": 0.73828125, "learning_rate": 0.00016931865117178187, "loss": 0.9238, "step": 16668 }, { "epoch": 0.42801317882080686, "grad_norm": 0.78515625, "learning_rate": 0.00016931543349249845, "loss": 0.8736, "step": 16669 }, { "epoch": 0.4280388560167287, "grad_norm": 0.7734375, "learning_rate": 0.00016931221567507567, "loss": 0.9695, "step": 16670 }, { "epoch": 0.42806453321265053, "grad_norm": 0.73828125, "learning_rate": 0.00016930899771951992, "loss": 0.7732, "step": 16671 }, { "epoch": 0.4280902104085723, "grad_norm": 0.875, "learning_rate": 0.00016930577962583757, "loss": 0.9426, "step": 16672 }, { "epoch": 0.42811588760449415, "grad_norm": 0.7734375, "learning_rate": 0.00016930256139403507, "loss": 0.9211, "step": 16673 }, { "epoch": 0.428141564800416, "grad_norm": 0.8125, "learning_rate": 0.0001692993430241188, "loss": 0.9384, "step": 16674 }, { "epoch": 0.42816724199633777, "grad_norm": 0.75, "learning_rate": 0.00016929612451609526, "loss": 0.9188, "step": 16675 }, { "epoch": 0.4281929191922596, "grad_norm": 0.74609375, "learning_rate": 0.00016929290586997076, "loss": 0.8294, "step": 16676 }, { "epoch": 0.42821859638818144, "grad_norm": 0.921875, "learning_rate": 0.00016928968708575175, "loss": 0.9248, "step": 16677 }, { "epoch": 0.4282442735841033, "grad_norm": 0.7578125, "learning_rate": 0.00016928646816344466, "loss": 0.913, "step": 16678 }, { "epoch": 0.42826995078002505, "grad_norm": 0.80859375, "learning_rate": 0.00016928324910305592, "loss": 1.0417, "step": 16679 }, { "epoch": 0.4282956279759469, "grad_norm": 0.79296875, "learning_rate": 0.00016928002990459188, "loss": 0.9, "step": 16680 }, { "epoch": 0.4283213051718687, "grad_norm": 0.75, "learning_rate": 0.00016927681056805907, "loss": 0.9167, "step": 16681 }, { "epoch": 0.4283469823677905, "grad_norm": 0.81640625, "learning_rate": 0.00016927359109346376, "loss": 0.8802, "step": 16682 }, { "epoch": 0.42837265956371234, "grad_norm": 0.94140625, "learning_rate": 0.00016927037148081248, "loss": 0.9614, "step": 16683 }, { "epoch": 0.4283983367596342, "grad_norm": 0.75, "learning_rate": 0.0001692671517301116, "loss": 0.7906, "step": 16684 }, { "epoch": 0.42842401395555596, "grad_norm": 0.7734375, "learning_rate": 0.00016926393184136753, "loss": 0.9415, "step": 16685 }, { "epoch": 0.4284496911514778, "grad_norm": 0.81640625, "learning_rate": 0.00016926071181458673, "loss": 0.8655, "step": 16686 }, { "epoch": 0.42847536834739963, "grad_norm": 0.828125, "learning_rate": 0.00016925749164977555, "loss": 0.9889, "step": 16687 }, { "epoch": 0.42850104554332147, "grad_norm": 0.7421875, "learning_rate": 0.00016925427134694047, "loss": 1.0363, "step": 16688 }, { "epoch": 0.42852672273924325, "grad_norm": 0.78125, "learning_rate": 0.0001692510509060879, "loss": 0.8018, "step": 16689 }, { "epoch": 0.4285523999351651, "grad_norm": 0.78515625, "learning_rate": 0.0001692478303272242, "loss": 1.0788, "step": 16690 }, { "epoch": 0.4285780771310869, "grad_norm": 0.83203125, "learning_rate": 0.00016924460961035586, "loss": 0.9453, "step": 16691 }, { "epoch": 0.4286037543270087, "grad_norm": 0.765625, "learning_rate": 0.00016924138875548924, "loss": 0.8669, "step": 16692 }, { "epoch": 0.42862943152293054, "grad_norm": 0.75, "learning_rate": 0.0001692381677626308, "loss": 0.874, "step": 16693 }, { "epoch": 0.4286551087188524, "grad_norm": 0.81640625, "learning_rate": 0.00016923494663178694, "loss": 0.9958, "step": 16694 }, { "epoch": 0.42868078591477415, "grad_norm": 0.6875, "learning_rate": 0.0001692317253629641, "loss": 0.8699, "step": 16695 }, { "epoch": 0.428706463110696, "grad_norm": 0.765625, "learning_rate": 0.0001692285039561687, "loss": 0.8698, "step": 16696 }, { "epoch": 0.4287321403066178, "grad_norm": 0.8671875, "learning_rate": 0.00016922528241140712, "loss": 1.018, "step": 16697 }, { "epoch": 0.42875781750253966, "grad_norm": 0.80078125, "learning_rate": 0.0001692220607286858, "loss": 0.8549, "step": 16698 }, { "epoch": 0.42878349469846144, "grad_norm": 0.7890625, "learning_rate": 0.00016921883890801115, "loss": 0.9155, "step": 16699 }, { "epoch": 0.4288091718943833, "grad_norm": 0.76953125, "learning_rate": 0.00016921561694938965, "loss": 0.9676, "step": 16700 }, { "epoch": 0.4288348490903051, "grad_norm": 0.75390625, "learning_rate": 0.00016921239485282765, "loss": 0.8066, "step": 16701 }, { "epoch": 0.4288605262862269, "grad_norm": 0.73046875, "learning_rate": 0.00016920917261833163, "loss": 0.9576, "step": 16702 }, { "epoch": 0.42888620348214873, "grad_norm": 0.8359375, "learning_rate": 0.00016920595024590796, "loss": 0.9942, "step": 16703 }, { "epoch": 0.42891188067807057, "grad_norm": 0.78125, "learning_rate": 0.00016920272773556305, "loss": 0.9728, "step": 16704 }, { "epoch": 0.42893755787399235, "grad_norm": 0.80859375, "learning_rate": 0.0001691995050873034, "loss": 0.9985, "step": 16705 }, { "epoch": 0.4289632350699142, "grad_norm": 0.92578125, "learning_rate": 0.00016919628230113536, "loss": 0.8853, "step": 16706 }, { "epoch": 0.428988912265836, "grad_norm": 0.73828125, "learning_rate": 0.00016919305937706543, "loss": 0.8295, "step": 16707 }, { "epoch": 0.42901458946175786, "grad_norm": 0.80078125, "learning_rate": 0.00016918983631509992, "loss": 1.0484, "step": 16708 }, { "epoch": 0.42904026665767964, "grad_norm": 0.8203125, "learning_rate": 0.00016918661311524536, "loss": 0.9223, "step": 16709 }, { "epoch": 0.4290659438536015, "grad_norm": 0.71875, "learning_rate": 0.0001691833897775081, "loss": 0.8496, "step": 16710 }, { "epoch": 0.4290916210495233, "grad_norm": 0.80078125, "learning_rate": 0.0001691801663018946, "loss": 0.9599, "step": 16711 }, { "epoch": 0.4291172982454451, "grad_norm": 0.6953125, "learning_rate": 0.0001691769426884113, "loss": 0.7035, "step": 16712 }, { "epoch": 0.4291429754413669, "grad_norm": 0.82421875, "learning_rate": 0.00016917371893706459, "loss": 0.8136, "step": 16713 }, { "epoch": 0.42916865263728876, "grad_norm": 0.75, "learning_rate": 0.0001691704950478609, "loss": 0.9578, "step": 16714 }, { "epoch": 0.42919432983321054, "grad_norm": 0.76171875, "learning_rate": 0.00016916727102080662, "loss": 0.9796, "step": 16715 }, { "epoch": 0.4292200070291324, "grad_norm": 0.8046875, "learning_rate": 0.00016916404685590828, "loss": 0.9885, "step": 16716 }, { "epoch": 0.4292456842250542, "grad_norm": 0.7578125, "learning_rate": 0.00016916082255317224, "loss": 0.8479, "step": 16717 }, { "epoch": 0.42927136142097605, "grad_norm": 0.83203125, "learning_rate": 0.00016915759811260487, "loss": 0.8625, "step": 16718 }, { "epoch": 0.42929703861689783, "grad_norm": 0.7890625, "learning_rate": 0.0001691543735342127, "loss": 0.9286, "step": 16719 }, { "epoch": 0.42932271581281967, "grad_norm": 0.8984375, "learning_rate": 0.00016915114881800212, "loss": 0.849, "step": 16720 }, { "epoch": 0.4293483930087415, "grad_norm": 0.796875, "learning_rate": 0.00016914792396397954, "loss": 0.9751, "step": 16721 }, { "epoch": 0.4293740702046633, "grad_norm": 0.81640625, "learning_rate": 0.00016914469897215138, "loss": 0.8649, "step": 16722 }, { "epoch": 0.4293997474005851, "grad_norm": 0.8828125, "learning_rate": 0.00016914147384252407, "loss": 0.9483, "step": 16723 }, { "epoch": 0.42942542459650695, "grad_norm": 0.74609375, "learning_rate": 0.00016913824857510407, "loss": 1.0217, "step": 16724 }, { "epoch": 0.42945110179242874, "grad_norm": 0.7734375, "learning_rate": 0.00016913502316989778, "loss": 0.8814, "step": 16725 }, { "epoch": 0.42947677898835057, "grad_norm": 0.765625, "learning_rate": 0.0001691317976269116, "loss": 0.9227, "step": 16726 }, { "epoch": 0.4295024561842724, "grad_norm": 0.80859375, "learning_rate": 0.00016912857194615206, "loss": 1.007, "step": 16727 }, { "epoch": 0.42952813338019424, "grad_norm": 0.7890625, "learning_rate": 0.00016912534612762548, "loss": 0.8994, "step": 16728 }, { "epoch": 0.429553810576116, "grad_norm": 0.8984375, "learning_rate": 0.00016912212017133833, "loss": 0.9127, "step": 16729 }, { "epoch": 0.42957948777203786, "grad_norm": 0.7734375, "learning_rate": 0.000169118894077297, "loss": 0.985, "step": 16730 }, { "epoch": 0.4296051649679597, "grad_norm": 0.75390625, "learning_rate": 0.00016911566784550802, "loss": 0.7658, "step": 16731 }, { "epoch": 0.4296308421638815, "grad_norm": 0.75, "learning_rate": 0.00016911244147597775, "loss": 1.0108, "step": 16732 }, { "epoch": 0.4296565193598033, "grad_norm": 0.76953125, "learning_rate": 0.0001691092149687126, "loss": 0.9212, "step": 16733 }, { "epoch": 0.42968219655572515, "grad_norm": 0.73046875, "learning_rate": 0.00016910598832371908, "loss": 0.8642, "step": 16734 }, { "epoch": 0.42970787375164693, "grad_norm": 0.828125, "learning_rate": 0.0001691027615410035, "loss": 0.9335, "step": 16735 }, { "epoch": 0.42973355094756877, "grad_norm": 0.75390625, "learning_rate": 0.0001690995346205724, "loss": 0.8955, "step": 16736 }, { "epoch": 0.4297592281434906, "grad_norm": 0.71484375, "learning_rate": 0.00016909630756243216, "loss": 0.9423, "step": 16737 }, { "epoch": 0.4297849053394124, "grad_norm": 0.93359375, "learning_rate": 0.00016909308036658919, "loss": 0.9223, "step": 16738 }, { "epoch": 0.4298105825353342, "grad_norm": 0.78125, "learning_rate": 0.00016908985303305, "loss": 0.8785, "step": 16739 }, { "epoch": 0.42983625973125605, "grad_norm": 0.7421875, "learning_rate": 0.00016908662556182094, "loss": 1.0263, "step": 16740 }, { "epoch": 0.4298619369271779, "grad_norm": 0.7734375, "learning_rate": 0.0001690833979529085, "loss": 0.8513, "step": 16741 }, { "epoch": 0.42988761412309967, "grad_norm": 1.40625, "learning_rate": 0.00016908017020631906, "loss": 0.9441, "step": 16742 }, { "epoch": 0.4299132913190215, "grad_norm": 0.83203125, "learning_rate": 0.0001690769423220591, "loss": 0.9899, "step": 16743 }, { "epoch": 0.42993896851494334, "grad_norm": 0.73046875, "learning_rate": 0.00016907371430013504, "loss": 0.7652, "step": 16744 }, { "epoch": 0.4299646457108651, "grad_norm": 0.73046875, "learning_rate": 0.0001690704861405533, "loss": 0.8923, "step": 16745 }, { "epoch": 0.42999032290678696, "grad_norm": 0.7421875, "learning_rate": 0.0001690672578433203, "loss": 0.8053, "step": 16746 }, { "epoch": 0.4300160001027088, "grad_norm": 0.7890625, "learning_rate": 0.00016906402940844253, "loss": 1.0669, "step": 16747 }, { "epoch": 0.4300416772986306, "grad_norm": 0.73828125, "learning_rate": 0.00016906080083592637, "loss": 0.7943, "step": 16748 }, { "epoch": 0.4300673544945524, "grad_norm": 0.79296875, "learning_rate": 0.00016905757212577828, "loss": 0.861, "step": 16749 }, { "epoch": 0.43009303169047425, "grad_norm": 0.76171875, "learning_rate": 0.00016905434327800465, "loss": 0.9103, "step": 16750 }, { "epoch": 0.4301187088863961, "grad_norm": 0.7421875, "learning_rate": 0.00016905111429261197, "loss": 0.8978, "step": 16751 }, { "epoch": 0.43014438608231786, "grad_norm": 0.83203125, "learning_rate": 0.0001690478851696067, "loss": 0.8962, "step": 16752 }, { "epoch": 0.4301700632782397, "grad_norm": 0.76171875, "learning_rate": 0.0001690446559089952, "loss": 0.8843, "step": 16753 }, { "epoch": 0.43019574047416154, "grad_norm": 0.87890625, "learning_rate": 0.00016904142651078395, "loss": 0.935, "step": 16754 }, { "epoch": 0.4302214176700833, "grad_norm": 0.80859375, "learning_rate": 0.00016903819697497934, "loss": 1.0851, "step": 16755 }, { "epoch": 0.43024709486600515, "grad_norm": 0.85546875, "learning_rate": 0.00016903496730158785, "loss": 0.8791, "step": 16756 }, { "epoch": 0.430272772061927, "grad_norm": 0.796875, "learning_rate": 0.00016903173749061593, "loss": 0.9683, "step": 16757 }, { "epoch": 0.43029844925784877, "grad_norm": 0.7734375, "learning_rate": 0.00016902850754206997, "loss": 1.0201, "step": 16758 }, { "epoch": 0.4303241264537706, "grad_norm": 0.81640625, "learning_rate": 0.00016902527745595643, "loss": 0.9578, "step": 16759 }, { "epoch": 0.43034980364969244, "grad_norm": 0.6875, "learning_rate": 0.00016902204723228176, "loss": 0.8486, "step": 16760 }, { "epoch": 0.4303754808456143, "grad_norm": 0.70703125, "learning_rate": 0.00016901881687105238, "loss": 0.8102, "step": 16761 }, { "epoch": 0.43040115804153606, "grad_norm": 0.72265625, "learning_rate": 0.00016901558637227472, "loss": 0.8282, "step": 16762 }, { "epoch": 0.4304268352374579, "grad_norm": 0.75, "learning_rate": 0.00016901235573595527, "loss": 0.9272, "step": 16763 }, { "epoch": 0.43045251243337973, "grad_norm": 0.80859375, "learning_rate": 0.00016900912496210037, "loss": 1.0135, "step": 16764 }, { "epoch": 0.4304781896293015, "grad_norm": 0.7421875, "learning_rate": 0.00016900589405071656, "loss": 0.8399, "step": 16765 }, { "epoch": 0.43050386682522335, "grad_norm": 0.6953125, "learning_rate": 0.00016900266300181024, "loss": 0.8232, "step": 16766 }, { "epoch": 0.4305295440211452, "grad_norm": 0.88671875, "learning_rate": 0.0001689994318153878, "loss": 1.184, "step": 16767 }, { "epoch": 0.43055522121706696, "grad_norm": 0.75, "learning_rate": 0.00016899620049145577, "loss": 0.788, "step": 16768 }, { "epoch": 0.4305808984129888, "grad_norm": 0.8515625, "learning_rate": 0.00016899296903002053, "loss": 1.0246, "step": 16769 }, { "epoch": 0.43060657560891064, "grad_norm": 0.88671875, "learning_rate": 0.00016898973743108853, "loss": 0.9788, "step": 16770 }, { "epoch": 0.43063225280483247, "grad_norm": 0.8515625, "learning_rate": 0.00016898650569466622, "loss": 0.9243, "step": 16771 }, { "epoch": 0.43065793000075425, "grad_norm": 0.7734375, "learning_rate": 0.00016898327382076006, "loss": 0.9865, "step": 16772 }, { "epoch": 0.4306836071966761, "grad_norm": 0.77734375, "learning_rate": 0.00016898004180937643, "loss": 1.0195, "step": 16773 }, { "epoch": 0.4307092843925979, "grad_norm": 0.8359375, "learning_rate": 0.0001689768096605218, "loss": 1.0096, "step": 16774 }, { "epoch": 0.4307349615885197, "grad_norm": 0.8203125, "learning_rate": 0.00016897357737420267, "loss": 1.0771, "step": 16775 }, { "epoch": 0.43076063878444154, "grad_norm": 0.73046875, "learning_rate": 0.00016897034495042538, "loss": 0.9264, "step": 16776 }, { "epoch": 0.4307863159803634, "grad_norm": 0.83984375, "learning_rate": 0.00016896711238919646, "loss": 0.9389, "step": 16777 }, { "epoch": 0.43081199317628516, "grad_norm": 0.83984375, "learning_rate": 0.00016896387969052228, "loss": 0.9277, "step": 16778 }, { "epoch": 0.430837670372207, "grad_norm": 0.7578125, "learning_rate": 0.00016896064685440932, "loss": 0.8899, "step": 16779 }, { "epoch": 0.43086334756812883, "grad_norm": 0.7421875, "learning_rate": 0.00016895741388086406, "loss": 0.7609, "step": 16780 }, { "epoch": 0.43088902476405067, "grad_norm": 0.76171875, "learning_rate": 0.0001689541807698929, "loss": 0.9077, "step": 16781 }, { "epoch": 0.43091470195997245, "grad_norm": 0.80859375, "learning_rate": 0.00016895094752150222, "loss": 1.0722, "step": 16782 }, { "epoch": 0.4309403791558943, "grad_norm": 0.76171875, "learning_rate": 0.0001689477141356986, "loss": 0.7983, "step": 16783 }, { "epoch": 0.4309660563518161, "grad_norm": 0.7890625, "learning_rate": 0.00016894448061248837, "loss": 1.0713, "step": 16784 }, { "epoch": 0.4309917335477379, "grad_norm": 0.7890625, "learning_rate": 0.00016894124695187804, "loss": 0.8106, "step": 16785 }, { "epoch": 0.43101741074365973, "grad_norm": 0.734375, "learning_rate": 0.00016893801315387403, "loss": 0.8527, "step": 16786 }, { "epoch": 0.43104308793958157, "grad_norm": 0.875, "learning_rate": 0.00016893477921848277, "loss": 0.8715, "step": 16787 }, { "epoch": 0.43106876513550335, "grad_norm": 0.81640625, "learning_rate": 0.00016893154514571076, "loss": 0.9273, "step": 16788 }, { "epoch": 0.4310944423314252, "grad_norm": 0.82421875, "learning_rate": 0.00016892831093556437, "loss": 0.8805, "step": 16789 }, { "epoch": 0.431120119527347, "grad_norm": 0.73828125, "learning_rate": 0.0001689250765880501, "loss": 0.9147, "step": 16790 }, { "epoch": 0.43114579672326886, "grad_norm": 0.87109375, "learning_rate": 0.00016892184210317437, "loss": 1.0132, "step": 16791 }, { "epoch": 0.43117147391919064, "grad_norm": 0.75390625, "learning_rate": 0.00016891860748094367, "loss": 0.9621, "step": 16792 }, { "epoch": 0.4311971511151125, "grad_norm": 0.8046875, "learning_rate": 0.00016891537272136438, "loss": 0.965, "step": 16793 }, { "epoch": 0.4312228283110343, "grad_norm": 0.828125, "learning_rate": 0.00016891213782444295, "loss": 0.8118, "step": 16794 }, { "epoch": 0.4312485055069561, "grad_norm": 0.8203125, "learning_rate": 0.00016890890279018587, "loss": 1.0129, "step": 16795 }, { "epoch": 0.43127418270287793, "grad_norm": 0.84375, "learning_rate": 0.0001689056676185996, "loss": 0.9755, "step": 16796 }, { "epoch": 0.43129985989879976, "grad_norm": 0.83203125, "learning_rate": 0.0001689024323096905, "loss": 1.0287, "step": 16797 }, { "epoch": 0.43132553709472155, "grad_norm": 0.78515625, "learning_rate": 0.00016889919686346514, "loss": 0.9948, "step": 16798 }, { "epoch": 0.4313512142906434, "grad_norm": 0.81640625, "learning_rate": 0.0001688959612799299, "loss": 0.8888, "step": 16799 }, { "epoch": 0.4313768914865652, "grad_norm": 0.83984375, "learning_rate": 0.0001688927255590912, "loss": 0.9152, "step": 16800 }, { "epoch": 0.43140256868248705, "grad_norm": 0.71484375, "learning_rate": 0.00016888948970095554, "loss": 0.8876, "step": 16801 }, { "epoch": 0.43142824587840883, "grad_norm": 0.77734375, "learning_rate": 0.00016888625370552936, "loss": 0.8423, "step": 16802 }, { "epoch": 0.43145392307433067, "grad_norm": 0.73828125, "learning_rate": 0.00016888301757281906, "loss": 0.9112, "step": 16803 }, { "epoch": 0.4314796002702525, "grad_norm": 0.7578125, "learning_rate": 0.00016887978130283114, "loss": 1.0364, "step": 16804 }, { "epoch": 0.4315052774661743, "grad_norm": 0.82421875, "learning_rate": 0.00016887654489557206, "loss": 0.9901, "step": 16805 }, { "epoch": 0.4315309546620961, "grad_norm": 0.74609375, "learning_rate": 0.00016887330835104828, "loss": 0.9191, "step": 16806 }, { "epoch": 0.43155663185801796, "grad_norm": 0.8515625, "learning_rate": 0.00016887007166926617, "loss": 0.9597, "step": 16807 }, { "epoch": 0.43158230905393974, "grad_norm": 0.89453125, "learning_rate": 0.0001688668348502322, "loss": 1.0607, "step": 16808 }, { "epoch": 0.4316079862498616, "grad_norm": 0.90234375, "learning_rate": 0.00016886359789395288, "loss": 0.8777, "step": 16809 }, { "epoch": 0.4316336634457834, "grad_norm": 0.79296875, "learning_rate": 0.00016886036080043465, "loss": 0.8638, "step": 16810 }, { "epoch": 0.43165934064170525, "grad_norm": 0.7890625, "learning_rate": 0.0001688571235696839, "loss": 0.9785, "step": 16811 }, { "epoch": 0.431685017837627, "grad_norm": 0.76953125, "learning_rate": 0.00016885388620170716, "loss": 0.9161, "step": 16812 }, { "epoch": 0.43171069503354886, "grad_norm": 0.80859375, "learning_rate": 0.00016885064869651083, "loss": 0.9671, "step": 16813 }, { "epoch": 0.4317363722294707, "grad_norm": 0.7265625, "learning_rate": 0.00016884741105410137, "loss": 0.8577, "step": 16814 }, { "epoch": 0.4317620494253925, "grad_norm": 0.74609375, "learning_rate": 0.00016884417327448525, "loss": 0.8918, "step": 16815 }, { "epoch": 0.4317877266213143, "grad_norm": 0.78125, "learning_rate": 0.00016884093535766889, "loss": 0.9099, "step": 16816 }, { "epoch": 0.43181340381723615, "grad_norm": 0.74609375, "learning_rate": 0.0001688376973036588, "loss": 0.8116, "step": 16817 }, { "epoch": 0.43183908101315793, "grad_norm": 0.77734375, "learning_rate": 0.00016883445911246135, "loss": 0.8083, "step": 16818 }, { "epoch": 0.43186475820907977, "grad_norm": 0.81640625, "learning_rate": 0.0001688312207840831, "loss": 1.0506, "step": 16819 }, { "epoch": 0.4318904354050016, "grad_norm": 0.828125, "learning_rate": 0.00016882798231853042, "loss": 0.8986, "step": 16820 }, { "epoch": 0.43191611260092344, "grad_norm": 0.8515625, "learning_rate": 0.00016882474371580978, "loss": 0.8886, "step": 16821 }, { "epoch": 0.4319417897968452, "grad_norm": 0.8125, "learning_rate": 0.00016882150497592763, "loss": 0.8983, "step": 16822 }, { "epoch": 0.43196746699276706, "grad_norm": 0.76171875, "learning_rate": 0.00016881826609889045, "loss": 0.8809, "step": 16823 }, { "epoch": 0.4319931441886889, "grad_norm": 0.875, "learning_rate": 0.00016881502708470468, "loss": 1.0035, "step": 16824 }, { "epoch": 0.4320188213846107, "grad_norm": 0.8046875, "learning_rate": 0.0001688117879333768, "loss": 1.0167, "step": 16825 }, { "epoch": 0.4320444985805325, "grad_norm": 0.76953125, "learning_rate": 0.00016880854864491322, "loss": 0.802, "step": 16826 }, { "epoch": 0.43207017577645435, "grad_norm": 0.71875, "learning_rate": 0.0001688053092193204, "loss": 0.8124, "step": 16827 }, { "epoch": 0.4320958529723761, "grad_norm": 0.76953125, "learning_rate": 0.00016880206965660483, "loss": 0.8108, "step": 16828 }, { "epoch": 0.43212153016829796, "grad_norm": 0.75, "learning_rate": 0.00016879882995677296, "loss": 0.9463, "step": 16829 }, { "epoch": 0.4321472073642198, "grad_norm": 0.7421875, "learning_rate": 0.00016879559011983124, "loss": 0.7386, "step": 16830 }, { "epoch": 0.43217288456014163, "grad_norm": 0.796875, "learning_rate": 0.0001687923501457861, "loss": 0.8343, "step": 16831 }, { "epoch": 0.4321985617560634, "grad_norm": 0.7734375, "learning_rate": 0.00016878911003464405, "loss": 0.8317, "step": 16832 }, { "epoch": 0.43222423895198525, "grad_norm": 0.84375, "learning_rate": 0.0001687858697864115, "loss": 0.9566, "step": 16833 }, { "epoch": 0.4322499161479071, "grad_norm": 0.78125, "learning_rate": 0.00016878262940109493, "loss": 0.9926, "step": 16834 }, { "epoch": 0.43227559334382887, "grad_norm": 0.76171875, "learning_rate": 0.00016877938887870078, "loss": 0.9064, "step": 16835 }, { "epoch": 0.4323012705397507, "grad_norm": 0.78125, "learning_rate": 0.00016877614821923556, "loss": 0.9173, "step": 16836 }, { "epoch": 0.43232694773567254, "grad_norm": 0.796875, "learning_rate": 0.0001687729074227056, "loss": 0.8219, "step": 16837 }, { "epoch": 0.4323526249315943, "grad_norm": 0.81640625, "learning_rate": 0.00016876966648911754, "loss": 0.8606, "step": 16838 }, { "epoch": 0.43237830212751616, "grad_norm": 0.79296875, "learning_rate": 0.00016876642541847772, "loss": 0.8728, "step": 16839 }, { "epoch": 0.432403979323438, "grad_norm": 0.8203125, "learning_rate": 0.0001687631842107926, "loss": 0.9514, "step": 16840 }, { "epoch": 0.43242965651935983, "grad_norm": 0.82421875, "learning_rate": 0.0001687599428660687, "loss": 0.9601, "step": 16841 }, { "epoch": 0.4324553337152816, "grad_norm": 0.859375, "learning_rate": 0.00016875670138431244, "loss": 0.9158, "step": 16842 }, { "epoch": 0.43248101091120345, "grad_norm": 0.75390625, "learning_rate": 0.00016875345976553026, "loss": 0.8651, "step": 16843 }, { "epoch": 0.4325066881071253, "grad_norm": 0.765625, "learning_rate": 0.0001687502180097287, "loss": 0.8528, "step": 16844 }, { "epoch": 0.43253236530304706, "grad_norm": 0.75390625, "learning_rate": 0.0001687469761169141, "loss": 0.9054, "step": 16845 }, { "epoch": 0.4325580424989689, "grad_norm": 0.8671875, "learning_rate": 0.000168743734087093, "loss": 0.9229, "step": 16846 }, { "epoch": 0.43258371969489073, "grad_norm": 0.73828125, "learning_rate": 0.00016874049192027186, "loss": 0.8557, "step": 16847 }, { "epoch": 0.4326093968908125, "grad_norm": 0.75390625, "learning_rate": 0.00016873724961645714, "loss": 0.7639, "step": 16848 }, { "epoch": 0.43263507408673435, "grad_norm": 0.84375, "learning_rate": 0.00016873400717565525, "loss": 0.8733, "step": 16849 }, { "epoch": 0.4326607512826562, "grad_norm": 0.91796875, "learning_rate": 0.00016873076459787275, "loss": 0.9249, "step": 16850 }, { "epoch": 0.432686428478578, "grad_norm": 0.875, "learning_rate": 0.000168727521883116, "loss": 1.079, "step": 16851 }, { "epoch": 0.4327121056744998, "grad_norm": 0.7578125, "learning_rate": 0.00016872427903139153, "loss": 0.9418, "step": 16852 }, { "epoch": 0.43273778287042164, "grad_norm": 0.83203125, "learning_rate": 0.0001687210360427058, "loss": 0.9333, "step": 16853 }, { "epoch": 0.4327634600663435, "grad_norm": 0.81640625, "learning_rate": 0.0001687177929170652, "loss": 0.8755, "step": 16854 }, { "epoch": 0.43278913726226526, "grad_norm": 0.8125, "learning_rate": 0.00016871454965447627, "loss": 0.9596, "step": 16855 }, { "epoch": 0.4328148144581871, "grad_norm": 0.875, "learning_rate": 0.00016871130625494544, "loss": 0.8178, "step": 16856 }, { "epoch": 0.4328404916541089, "grad_norm": 0.7890625, "learning_rate": 0.0001687080627184792, "loss": 0.9119, "step": 16857 }, { "epoch": 0.4328661688500307, "grad_norm": 0.78515625, "learning_rate": 0.000168704819045084, "loss": 0.9363, "step": 16858 }, { "epoch": 0.43289184604595254, "grad_norm": 0.76171875, "learning_rate": 0.0001687015752347663, "loss": 0.804, "step": 16859 }, { "epoch": 0.4329175232418744, "grad_norm": 0.875, "learning_rate": 0.00016869833128753254, "loss": 0.9407, "step": 16860 }, { "epoch": 0.4329432004377962, "grad_norm": 0.78125, "learning_rate": 0.00016869508720338921, "loss": 0.9749, "step": 16861 }, { "epoch": 0.432968877633718, "grad_norm": 0.83203125, "learning_rate": 0.0001686918429823428, "loss": 0.9518, "step": 16862 }, { "epoch": 0.43299455482963983, "grad_norm": 0.84765625, "learning_rate": 0.0001686885986243998, "loss": 0.9914, "step": 16863 }, { "epoch": 0.43302023202556167, "grad_norm": 0.76171875, "learning_rate": 0.00016868535412956653, "loss": 0.9619, "step": 16864 }, { "epoch": 0.43304590922148345, "grad_norm": 0.69140625, "learning_rate": 0.00016868210949784964, "loss": 0.8389, "step": 16865 }, { "epoch": 0.4330715864174053, "grad_norm": 0.8515625, "learning_rate": 0.00016867886472925547, "loss": 0.9243, "step": 16866 }, { "epoch": 0.4330972636133271, "grad_norm": 1.015625, "learning_rate": 0.0001686756198237905, "loss": 1.064, "step": 16867 }, { "epoch": 0.4331229408092489, "grad_norm": 0.8046875, "learning_rate": 0.00016867237478146126, "loss": 0.9906, "step": 16868 }, { "epoch": 0.43314861800517074, "grad_norm": 0.78515625, "learning_rate": 0.00016866912960227418, "loss": 0.8224, "step": 16869 }, { "epoch": 0.4331742952010926, "grad_norm": 0.80859375, "learning_rate": 0.00016866588428623568, "loss": 1.0311, "step": 16870 }, { "epoch": 0.4331999723970144, "grad_norm": 0.765625, "learning_rate": 0.00016866263883335233, "loss": 0.9099, "step": 16871 }, { "epoch": 0.4332256495929362, "grad_norm": 0.77734375, "learning_rate": 0.00016865939324363054, "loss": 0.8875, "step": 16872 }, { "epoch": 0.433251326788858, "grad_norm": 0.77734375, "learning_rate": 0.00016865614751707678, "loss": 0.9233, "step": 16873 }, { "epoch": 0.43327700398477986, "grad_norm": 1.0, "learning_rate": 0.00016865290165369746, "loss": 0.914, "step": 16874 }, { "epoch": 0.43330268118070164, "grad_norm": 0.70703125, "learning_rate": 0.00016864965565349918, "loss": 0.845, "step": 16875 }, { "epoch": 0.4333283583766235, "grad_norm": 0.828125, "learning_rate": 0.00016864640951648832, "loss": 0.991, "step": 16876 }, { "epoch": 0.4333540355725453, "grad_norm": 0.80859375, "learning_rate": 0.00016864316324267137, "loss": 0.8934, "step": 16877 }, { "epoch": 0.4333797127684671, "grad_norm": 0.81640625, "learning_rate": 0.00016863991683205474, "loss": 0.987, "step": 16878 }, { "epoch": 0.43340538996438893, "grad_norm": 0.765625, "learning_rate": 0.00016863667028464504, "loss": 0.9939, "step": 16879 }, { "epoch": 0.43343106716031077, "grad_norm": 0.73046875, "learning_rate": 0.0001686334236004486, "loss": 0.869, "step": 16880 }, { "epoch": 0.4334567443562326, "grad_norm": 0.74609375, "learning_rate": 0.00016863017677947196, "loss": 0.8455, "step": 16881 }, { "epoch": 0.4334824215521544, "grad_norm": 0.78125, "learning_rate": 0.00016862692982172158, "loss": 0.899, "step": 16882 }, { "epoch": 0.4335080987480762, "grad_norm": 0.75, "learning_rate": 0.00016862368272720391, "loss": 0.8795, "step": 16883 }, { "epoch": 0.43353377594399806, "grad_norm": 0.734375, "learning_rate": 0.0001686204354959255, "loss": 0.824, "step": 16884 }, { "epoch": 0.43355945313991984, "grad_norm": 0.76953125, "learning_rate": 0.00016861718812789271, "loss": 0.9431, "step": 16885 }, { "epoch": 0.4335851303358417, "grad_norm": 0.765625, "learning_rate": 0.00016861394062311205, "loss": 1.0174, "step": 16886 }, { "epoch": 0.4336108075317635, "grad_norm": 0.8125, "learning_rate": 0.00016861069298159005, "loss": 0.9821, "step": 16887 }, { "epoch": 0.4336364847276853, "grad_norm": 0.75390625, "learning_rate": 0.00016860744520333308, "loss": 0.8055, "step": 16888 }, { "epoch": 0.4336621619236071, "grad_norm": 0.75390625, "learning_rate": 0.0001686041972883477, "loss": 0.9331, "step": 16889 }, { "epoch": 0.43368783911952896, "grad_norm": 0.75390625, "learning_rate": 0.00016860094923664036, "loss": 0.824, "step": 16890 }, { "epoch": 0.4337135163154508, "grad_norm": 0.78125, "learning_rate": 0.00016859770104821754, "loss": 0.7601, "step": 16891 }, { "epoch": 0.4337391935113726, "grad_norm": 0.76953125, "learning_rate": 0.00016859445272308566, "loss": 0.8323, "step": 16892 }, { "epoch": 0.4337648707072944, "grad_norm": 0.83203125, "learning_rate": 0.00016859120426125122, "loss": 0.8326, "step": 16893 }, { "epoch": 0.43379054790321625, "grad_norm": 0.69921875, "learning_rate": 0.00016858795566272074, "loss": 0.7223, "step": 16894 }, { "epoch": 0.43381622509913803, "grad_norm": 0.78515625, "learning_rate": 0.00016858470692750067, "loss": 0.7829, "step": 16895 }, { "epoch": 0.43384190229505987, "grad_norm": 0.8515625, "learning_rate": 0.00016858145805559747, "loss": 0.9206, "step": 16896 }, { "epoch": 0.4338675794909817, "grad_norm": 0.73046875, "learning_rate": 0.0001685782090470176, "loss": 0.8579, "step": 16897 }, { "epoch": 0.4338932566869035, "grad_norm": 0.83203125, "learning_rate": 0.00016857495990176756, "loss": 0.9937, "step": 16898 }, { "epoch": 0.4339189338828253, "grad_norm": 0.76171875, "learning_rate": 0.0001685717106198538, "loss": 0.8606, "step": 16899 }, { "epoch": 0.43394461107874716, "grad_norm": 0.83984375, "learning_rate": 0.00016856846120128286, "loss": 1.0185, "step": 16900 }, { "epoch": 0.433970288274669, "grad_norm": 0.74609375, "learning_rate": 0.0001685652116460611, "loss": 0.8989, "step": 16901 }, { "epoch": 0.43399596547059077, "grad_norm": 0.796875, "learning_rate": 0.00016856196195419513, "loss": 0.8862, "step": 16902 }, { "epoch": 0.4340216426665126, "grad_norm": 0.7734375, "learning_rate": 0.00016855871212569134, "loss": 1.0607, "step": 16903 }, { "epoch": 0.43404731986243444, "grad_norm": 0.77734375, "learning_rate": 0.00016855546216055625, "loss": 0.9837, "step": 16904 }, { "epoch": 0.4340729970583562, "grad_norm": 0.7421875, "learning_rate": 0.0001685522120587963, "loss": 0.8601, "step": 16905 }, { "epoch": 0.43409867425427806, "grad_norm": 0.765625, "learning_rate": 0.000168548961820418, "loss": 0.8672, "step": 16906 }, { "epoch": 0.4341243514501999, "grad_norm": 0.8046875, "learning_rate": 0.00016854571144542778, "loss": 1.0602, "step": 16907 }, { "epoch": 0.4341500286461217, "grad_norm": 0.8671875, "learning_rate": 0.00016854246093383216, "loss": 0.8504, "step": 16908 }, { "epoch": 0.4341757058420435, "grad_norm": 0.765625, "learning_rate": 0.0001685392102856376, "loss": 0.9512, "step": 16909 }, { "epoch": 0.43420138303796535, "grad_norm": 0.8515625, "learning_rate": 0.00016853595950085058, "loss": 0.8887, "step": 16910 }, { "epoch": 0.4342270602338872, "grad_norm": 0.80078125, "learning_rate": 0.00016853270857947764, "loss": 0.9192, "step": 16911 }, { "epoch": 0.43425273742980897, "grad_norm": 0.78125, "learning_rate": 0.00016852945752152514, "loss": 1.067, "step": 16912 }, { "epoch": 0.4342784146257308, "grad_norm": 0.7421875, "learning_rate": 0.00016852620632699965, "loss": 0.8415, "step": 16913 }, { "epoch": 0.43430409182165264, "grad_norm": 0.7578125, "learning_rate": 0.0001685229549959076, "loss": 0.7675, "step": 16914 }, { "epoch": 0.4343297690175744, "grad_norm": 0.828125, "learning_rate": 0.0001685197035282555, "loss": 0.8782, "step": 16915 }, { "epoch": 0.43435544621349625, "grad_norm": 0.75390625, "learning_rate": 0.00016851645192404984, "loss": 0.8961, "step": 16916 }, { "epoch": 0.4343811234094181, "grad_norm": 0.73828125, "learning_rate": 0.00016851320018329707, "loss": 0.8094, "step": 16917 }, { "epoch": 0.43440680060533987, "grad_norm": 0.75390625, "learning_rate": 0.00016850994830600366, "loss": 0.8459, "step": 16918 }, { "epoch": 0.4344324778012617, "grad_norm": 0.76171875, "learning_rate": 0.00016850669629217611, "loss": 1.0457, "step": 16919 }, { "epoch": 0.43445815499718354, "grad_norm": 0.8203125, "learning_rate": 0.00016850344414182092, "loss": 0.8557, "step": 16920 }, { "epoch": 0.4344838321931054, "grad_norm": 0.70703125, "learning_rate": 0.00016850019185494456, "loss": 0.8304, "step": 16921 }, { "epoch": 0.43450950938902716, "grad_norm": 0.8046875, "learning_rate": 0.0001684969394315535, "loss": 0.8756, "step": 16922 }, { "epoch": 0.434535186584949, "grad_norm": 0.80078125, "learning_rate": 0.00016849368687165423, "loss": 0.9763, "step": 16923 }, { "epoch": 0.43456086378087083, "grad_norm": 0.70703125, "learning_rate": 0.00016849043417525324, "loss": 0.8833, "step": 16924 }, { "epoch": 0.4345865409767926, "grad_norm": 0.7578125, "learning_rate": 0.00016848718134235697, "loss": 0.9636, "step": 16925 }, { "epoch": 0.43461221817271445, "grad_norm": 0.83203125, "learning_rate": 0.00016848392837297196, "loss": 0.9609, "step": 16926 }, { "epoch": 0.4346378953686363, "grad_norm": 0.7890625, "learning_rate": 0.00016848067526710464, "loss": 0.9328, "step": 16927 }, { "epoch": 0.43466357256455807, "grad_norm": 0.7578125, "learning_rate": 0.00016847742202476154, "loss": 0.8368, "step": 16928 }, { "epoch": 0.4346892497604799, "grad_norm": 0.73046875, "learning_rate": 0.00016847416864594912, "loss": 0.8404, "step": 16929 }, { "epoch": 0.43471492695640174, "grad_norm": 0.87109375, "learning_rate": 0.00016847091513067387, "loss": 1.0537, "step": 16930 }, { "epoch": 0.4347406041523236, "grad_norm": 0.7890625, "learning_rate": 0.00016846766147894226, "loss": 1.0696, "step": 16931 }, { "epoch": 0.43476628134824535, "grad_norm": 0.7421875, "learning_rate": 0.0001684644076907608, "loss": 1.0723, "step": 16932 }, { "epoch": 0.4347919585441672, "grad_norm": 0.79296875, "learning_rate": 0.00016846115376613596, "loss": 0.8841, "step": 16933 }, { "epoch": 0.434817635740089, "grad_norm": 0.80859375, "learning_rate": 0.00016845789970507423, "loss": 0.9333, "step": 16934 }, { "epoch": 0.4348433129360108, "grad_norm": 0.7265625, "learning_rate": 0.00016845464550758205, "loss": 0.814, "step": 16935 }, { "epoch": 0.43486899013193264, "grad_norm": 0.82421875, "learning_rate": 0.00016845139117366599, "loss": 0.9653, "step": 16936 }, { "epoch": 0.4348946673278545, "grad_norm": 0.734375, "learning_rate": 0.00016844813670333246, "loss": 0.9792, "step": 16937 }, { "epoch": 0.43492034452377626, "grad_norm": 0.765625, "learning_rate": 0.000168444882096588, "loss": 0.9597, "step": 16938 }, { "epoch": 0.4349460217196981, "grad_norm": 0.796875, "learning_rate": 0.00016844162735343905, "loss": 1.0356, "step": 16939 }, { "epoch": 0.43497169891561993, "grad_norm": 0.83984375, "learning_rate": 0.00016843837247389215, "loss": 0.8627, "step": 16940 }, { "epoch": 0.4349973761115417, "grad_norm": 0.8046875, "learning_rate": 0.00016843511745795372, "loss": 0.9067, "step": 16941 }, { "epoch": 0.43502305330746355, "grad_norm": 0.6953125, "learning_rate": 0.00016843186230563028, "loss": 0.9088, "step": 16942 }, { "epoch": 0.4350487305033854, "grad_norm": 0.7421875, "learning_rate": 0.00016842860701692835, "loss": 0.9159, "step": 16943 }, { "epoch": 0.4350744076993072, "grad_norm": 0.8203125, "learning_rate": 0.0001684253515918544, "loss": 0.9289, "step": 16944 }, { "epoch": 0.435100084895229, "grad_norm": 0.78125, "learning_rate": 0.00016842209603041487, "loss": 0.8682, "step": 16945 }, { "epoch": 0.43512576209115084, "grad_norm": 0.765625, "learning_rate": 0.0001684188403326163, "loss": 0.836, "step": 16946 }, { "epoch": 0.43515143928707267, "grad_norm": 0.7734375, "learning_rate": 0.00016841558449846516, "loss": 0.9743, "step": 16947 }, { "epoch": 0.43517711648299445, "grad_norm": 0.8203125, "learning_rate": 0.0001684123285279679, "loss": 0.8092, "step": 16948 }, { "epoch": 0.4352027936789163, "grad_norm": 0.79296875, "learning_rate": 0.0001684090724211311, "loss": 0.9933, "step": 16949 }, { "epoch": 0.4352284708748381, "grad_norm": 0.75, "learning_rate": 0.0001684058161779612, "loss": 1.0388, "step": 16950 }, { "epoch": 0.4352541480707599, "grad_norm": 0.765625, "learning_rate": 0.00016840255979846467, "loss": 0.9153, "step": 16951 }, { "epoch": 0.43527982526668174, "grad_norm": 0.72265625, "learning_rate": 0.000168399303282648, "loss": 0.8935, "step": 16952 }, { "epoch": 0.4353055024626036, "grad_norm": 0.75390625, "learning_rate": 0.00016839604663051774, "loss": 0.9953, "step": 16953 }, { "epoch": 0.4353311796585254, "grad_norm": 0.76171875, "learning_rate": 0.00016839278984208029, "loss": 0.9777, "step": 16954 }, { "epoch": 0.4353568568544472, "grad_norm": 0.7734375, "learning_rate": 0.00016838953291734222, "loss": 0.9672, "step": 16955 }, { "epoch": 0.43538253405036903, "grad_norm": 0.71875, "learning_rate": 0.00016838627585630996, "loss": 0.932, "step": 16956 }, { "epoch": 0.43540821124629087, "grad_norm": 0.73828125, "learning_rate": 0.0001683830186589901, "loss": 0.9576, "step": 16957 }, { "epoch": 0.43543388844221265, "grad_norm": 0.8359375, "learning_rate": 0.00016837976132538898, "loss": 0.9509, "step": 16958 }, { "epoch": 0.4354595656381345, "grad_norm": 0.80859375, "learning_rate": 0.00016837650385551317, "loss": 0.7966, "step": 16959 }, { "epoch": 0.4354852428340563, "grad_norm": 0.83203125, "learning_rate": 0.0001683732462493692, "loss": 0.8598, "step": 16960 }, { "epoch": 0.4355109200299781, "grad_norm": 0.7890625, "learning_rate": 0.00016836998850696354, "loss": 0.8106, "step": 16961 }, { "epoch": 0.43553659722589994, "grad_norm": 0.75390625, "learning_rate": 0.00016836673062830263, "loss": 0.9791, "step": 16962 }, { "epoch": 0.43556227442182177, "grad_norm": 0.76953125, "learning_rate": 0.00016836347261339304, "loss": 0.9906, "step": 16963 }, { "epoch": 0.4355879516177436, "grad_norm": 0.76953125, "learning_rate": 0.0001683602144622412, "loss": 0.8569, "step": 16964 }, { "epoch": 0.4356136288136654, "grad_norm": 0.77734375, "learning_rate": 0.00016835695617485366, "loss": 0.8778, "step": 16965 }, { "epoch": 0.4356393060095872, "grad_norm": 0.71484375, "learning_rate": 0.00016835369775123683, "loss": 0.8155, "step": 16966 }, { "epoch": 0.43566498320550906, "grad_norm": 0.88671875, "learning_rate": 0.00016835043919139728, "loss": 0.991, "step": 16967 }, { "epoch": 0.43569066040143084, "grad_norm": 0.7578125, "learning_rate": 0.00016834718049534147, "loss": 1.0084, "step": 16968 }, { "epoch": 0.4357163375973527, "grad_norm": 0.7265625, "learning_rate": 0.0001683439216630759, "loss": 0.9186, "step": 16969 }, { "epoch": 0.4357420147932745, "grad_norm": 0.9375, "learning_rate": 0.00016834066269460712, "loss": 0.8004, "step": 16970 }, { "epoch": 0.4357676919891963, "grad_norm": 0.75, "learning_rate": 0.00016833740358994153, "loss": 0.859, "step": 16971 }, { "epoch": 0.43579336918511813, "grad_norm": 0.7578125, "learning_rate": 0.00016833414434908565, "loss": 0.8728, "step": 16972 }, { "epoch": 0.43581904638103997, "grad_norm": 0.765625, "learning_rate": 0.000168330884972046, "loss": 0.7675, "step": 16973 }, { "epoch": 0.4358447235769618, "grad_norm": 1.0859375, "learning_rate": 0.0001683276254588291, "loss": 0.9249, "step": 16974 }, { "epoch": 0.4358704007728836, "grad_norm": 0.765625, "learning_rate": 0.00016832436580944138, "loss": 1.013, "step": 16975 }, { "epoch": 0.4358960779688054, "grad_norm": 0.8203125, "learning_rate": 0.0001683211060238894, "loss": 0.957, "step": 16976 }, { "epoch": 0.43592175516472725, "grad_norm": 0.83984375, "learning_rate": 0.00016831784610217963, "loss": 0.9492, "step": 16977 }, { "epoch": 0.43594743236064903, "grad_norm": 0.71484375, "learning_rate": 0.00016831458604431854, "loss": 0.8508, "step": 16978 }, { "epoch": 0.43597310955657087, "grad_norm": 0.73046875, "learning_rate": 0.00016831132585031264, "loss": 0.8345, "step": 16979 }, { "epoch": 0.4359987867524927, "grad_norm": 0.82421875, "learning_rate": 0.00016830806552016843, "loss": 1.0591, "step": 16980 }, { "epoch": 0.4360244639484145, "grad_norm": 0.7578125, "learning_rate": 0.00016830480505389245, "loss": 0.9604, "step": 16981 }, { "epoch": 0.4360501411443363, "grad_norm": 0.69140625, "learning_rate": 0.00016830154445149117, "loss": 0.8705, "step": 16982 }, { "epoch": 0.43607581834025816, "grad_norm": 0.73828125, "learning_rate": 0.00016829828371297108, "loss": 0.8612, "step": 16983 }, { "epoch": 0.43610149553618, "grad_norm": 0.8203125, "learning_rate": 0.00016829502283833864, "loss": 0.8991, "step": 16984 }, { "epoch": 0.4361271727321018, "grad_norm": 0.85546875, "learning_rate": 0.00016829176182760045, "loss": 0.9348, "step": 16985 }, { "epoch": 0.4361528499280236, "grad_norm": 0.7578125, "learning_rate": 0.0001682885006807629, "loss": 0.7796, "step": 16986 }, { "epoch": 0.43617852712394545, "grad_norm": 0.80078125, "learning_rate": 0.0001682852393978325, "loss": 1.0738, "step": 16987 }, { "epoch": 0.43620420431986723, "grad_norm": 0.7734375, "learning_rate": 0.00016828197797881587, "loss": 1.0294, "step": 16988 }, { "epoch": 0.43622988151578906, "grad_norm": 0.76953125, "learning_rate": 0.0001682787164237194, "loss": 0.9295, "step": 16989 }, { "epoch": 0.4362555587117109, "grad_norm": 0.8828125, "learning_rate": 0.0001682754547325496, "loss": 0.8956, "step": 16990 }, { "epoch": 0.4362812359076327, "grad_norm": 0.75390625, "learning_rate": 0.000168272192905313, "loss": 0.8176, "step": 16991 }, { "epoch": 0.4363069131035545, "grad_norm": 0.8125, "learning_rate": 0.00016826893094201606, "loss": 0.9158, "step": 16992 }, { "epoch": 0.43633259029947635, "grad_norm": 0.79296875, "learning_rate": 0.00016826566884266532, "loss": 0.8545, "step": 16993 }, { "epoch": 0.4363582674953982, "grad_norm": 0.69921875, "learning_rate": 0.00016826240660726727, "loss": 0.8819, "step": 16994 }, { "epoch": 0.43638394469131997, "grad_norm": 0.79296875, "learning_rate": 0.00016825914423582842, "loss": 0.8702, "step": 16995 }, { "epoch": 0.4364096218872418, "grad_norm": 0.765625, "learning_rate": 0.00016825588172835528, "loss": 0.8177, "step": 16996 }, { "epoch": 0.43643529908316364, "grad_norm": 0.7421875, "learning_rate": 0.0001682526190848543, "loss": 0.9477, "step": 16997 }, { "epoch": 0.4364609762790854, "grad_norm": 0.7734375, "learning_rate": 0.000168249356305332, "loss": 0.9385, "step": 16998 }, { "epoch": 0.43648665347500726, "grad_norm": 0.7890625, "learning_rate": 0.00016824609338979496, "loss": 0.9709, "step": 16999 }, { "epoch": 0.4365123306709291, "grad_norm": 0.78125, "learning_rate": 0.00016824283033824955, "loss": 0.9649, "step": 17000 }, { "epoch": 0.4365123306709291, "eval_loss": 0.9112228751182556, "eval_model_preparation_time": 0.0065, "eval_runtime": 404.9315, "eval_samples_per_second": 24.696, "eval_steps_per_second": 0.773, "step": 17000 }, { "epoch": 0.4365380078668509, "grad_norm": 0.8203125, "learning_rate": 0.0001682395671507024, "loss": 0.9291, "step": 17001 }, { "epoch": 0.4365636850627727, "grad_norm": 0.81640625, "learning_rate": 0.0001682363038271599, "loss": 0.9989, "step": 17002 }, { "epoch": 0.43658936225869455, "grad_norm": 0.76171875, "learning_rate": 0.00016823304036762865, "loss": 0.8449, "step": 17003 }, { "epoch": 0.4366150394546164, "grad_norm": 0.78515625, "learning_rate": 0.00016822977677211513, "loss": 0.9396, "step": 17004 }, { "epoch": 0.43664071665053816, "grad_norm": 0.78125, "learning_rate": 0.0001682265130406258, "loss": 0.8723, "step": 17005 }, { "epoch": 0.43666639384646, "grad_norm": 0.8984375, "learning_rate": 0.00016822324917316719, "loss": 1.0017, "step": 17006 }, { "epoch": 0.43669207104238184, "grad_norm": 0.828125, "learning_rate": 0.00016821998516974583, "loss": 0.9312, "step": 17007 }, { "epoch": 0.4367177482383036, "grad_norm": 0.75, "learning_rate": 0.00016821672103036817, "loss": 0.8643, "step": 17008 }, { "epoch": 0.43674342543422545, "grad_norm": 0.75390625, "learning_rate": 0.00016821345675504076, "loss": 0.8738, "step": 17009 }, { "epoch": 0.4367691026301473, "grad_norm": 0.7578125, "learning_rate": 0.0001682101923437701, "loss": 0.9244, "step": 17010 }, { "epoch": 0.43679477982606907, "grad_norm": 0.7421875, "learning_rate": 0.00016820692779656268, "loss": 0.8961, "step": 17011 }, { "epoch": 0.4368204570219909, "grad_norm": 0.73828125, "learning_rate": 0.00016820366311342503, "loss": 0.8609, "step": 17012 }, { "epoch": 0.43684613421791274, "grad_norm": 0.79296875, "learning_rate": 0.00016820039829436363, "loss": 0.9416, "step": 17013 }, { "epoch": 0.4368718114138346, "grad_norm": 0.76171875, "learning_rate": 0.00016819713333938497, "loss": 0.9539, "step": 17014 }, { "epoch": 0.43689748860975636, "grad_norm": 0.7578125, "learning_rate": 0.0001681938682484956, "loss": 0.9313, "step": 17015 }, { "epoch": 0.4369231658056782, "grad_norm": 0.828125, "learning_rate": 0.00016819060302170204, "loss": 0.929, "step": 17016 }, { "epoch": 0.43694884300160003, "grad_norm": 0.75, "learning_rate": 0.00016818733765901076, "loss": 0.8927, "step": 17017 }, { "epoch": 0.4369745201975218, "grad_norm": 0.7734375, "learning_rate": 0.00016818407216042823, "loss": 1.0019, "step": 17018 }, { "epoch": 0.43700019739344365, "grad_norm": 0.84765625, "learning_rate": 0.00016818080652596104, "loss": 0.9869, "step": 17019 }, { "epoch": 0.4370258745893655, "grad_norm": 0.82421875, "learning_rate": 0.00016817754075561565, "loss": 0.7201, "step": 17020 }, { "epoch": 0.43705155178528726, "grad_norm": 0.74609375, "learning_rate": 0.0001681742748493986, "loss": 0.8067, "step": 17021 }, { "epoch": 0.4370772289812091, "grad_norm": 0.82421875, "learning_rate": 0.00016817100880731637, "loss": 1.006, "step": 17022 }, { "epoch": 0.43710290617713093, "grad_norm": 0.74609375, "learning_rate": 0.00016816774262937547, "loss": 0.8939, "step": 17023 }, { "epoch": 0.43712858337305277, "grad_norm": 0.83203125, "learning_rate": 0.00016816447631558243, "loss": 0.9775, "step": 17024 }, { "epoch": 0.43715426056897455, "grad_norm": 0.78125, "learning_rate": 0.00016816120986594372, "loss": 0.9215, "step": 17025 }, { "epoch": 0.4371799377648964, "grad_norm": 0.734375, "learning_rate": 0.0001681579432804659, "loss": 0.8514, "step": 17026 }, { "epoch": 0.4372056149608182, "grad_norm": 0.81640625, "learning_rate": 0.0001681546765591554, "loss": 0.8136, "step": 17027 }, { "epoch": 0.43723129215674, "grad_norm": 0.765625, "learning_rate": 0.00016815140970201885, "loss": 0.9358, "step": 17028 }, { "epoch": 0.43725696935266184, "grad_norm": 0.85546875, "learning_rate": 0.00016814814270906268, "loss": 1.0639, "step": 17029 }, { "epoch": 0.4372826465485837, "grad_norm": 0.77734375, "learning_rate": 0.00016814487558029344, "loss": 0.9375, "step": 17030 }, { "epoch": 0.43730832374450546, "grad_norm": 0.85546875, "learning_rate": 0.00016814160831571758, "loss": 0.8739, "step": 17031 }, { "epoch": 0.4373340009404273, "grad_norm": 0.78125, "learning_rate": 0.00016813834091534166, "loss": 0.867, "step": 17032 }, { "epoch": 0.43735967813634913, "grad_norm": 0.734375, "learning_rate": 0.00016813507337917218, "loss": 0.8048, "step": 17033 }, { "epoch": 0.43738535533227096, "grad_norm": 0.79296875, "learning_rate": 0.00016813180570721567, "loss": 0.997, "step": 17034 }, { "epoch": 0.43741103252819274, "grad_norm": 0.88671875, "learning_rate": 0.00016812853789947862, "loss": 0.9823, "step": 17035 }, { "epoch": 0.4374367097241146, "grad_norm": 0.71875, "learning_rate": 0.00016812526995596753, "loss": 0.8363, "step": 17036 }, { "epoch": 0.4374623869200364, "grad_norm": 0.7578125, "learning_rate": 0.00016812200187668895, "loss": 1.0766, "step": 17037 }, { "epoch": 0.4374880641159582, "grad_norm": 0.76953125, "learning_rate": 0.0001681187336616494, "loss": 0.8654, "step": 17038 }, { "epoch": 0.43751374131188003, "grad_norm": 0.79296875, "learning_rate": 0.0001681154653108553, "loss": 0.8666, "step": 17039 }, { "epoch": 0.43753941850780187, "grad_norm": 0.78515625, "learning_rate": 0.00016811219682431324, "loss": 0.9249, "step": 17040 }, { "epoch": 0.43756509570372365, "grad_norm": 0.81640625, "learning_rate": 0.00016810892820202976, "loss": 0.9079, "step": 17041 }, { "epoch": 0.4375907728996455, "grad_norm": 0.86328125, "learning_rate": 0.00016810565944401135, "loss": 0.9807, "step": 17042 }, { "epoch": 0.4376164500955673, "grad_norm": 0.7578125, "learning_rate": 0.0001681023905502645, "loss": 0.7954, "step": 17043 }, { "epoch": 0.43764212729148916, "grad_norm": 0.80859375, "learning_rate": 0.0001680991215207957, "loss": 0.9567, "step": 17044 }, { "epoch": 0.43766780448741094, "grad_norm": 0.84375, "learning_rate": 0.00016809585235561154, "loss": 0.8233, "step": 17045 }, { "epoch": 0.4376934816833328, "grad_norm": 0.80859375, "learning_rate": 0.00016809258305471847, "loss": 0.8878, "step": 17046 }, { "epoch": 0.4377191588792546, "grad_norm": 0.80078125, "learning_rate": 0.00016808931361812304, "loss": 0.8603, "step": 17047 }, { "epoch": 0.4377448360751764, "grad_norm": 0.76171875, "learning_rate": 0.0001680860440458318, "loss": 0.9475, "step": 17048 }, { "epoch": 0.4377705132710982, "grad_norm": 0.81640625, "learning_rate": 0.00016808277433785117, "loss": 0.8072, "step": 17049 }, { "epoch": 0.43779619046702006, "grad_norm": 0.77734375, "learning_rate": 0.00016807950449418776, "loss": 0.9829, "step": 17050 }, { "epoch": 0.43782186766294184, "grad_norm": 0.734375, "learning_rate": 0.00016807623451484802, "loss": 0.8337, "step": 17051 }, { "epoch": 0.4378475448588637, "grad_norm": 0.83984375, "learning_rate": 0.0001680729643998385, "loss": 1.0066, "step": 17052 }, { "epoch": 0.4378732220547855, "grad_norm": 0.8203125, "learning_rate": 0.00016806969414916574, "loss": 0.9453, "step": 17053 }, { "epoch": 0.43789889925070735, "grad_norm": 0.7265625, "learning_rate": 0.00016806642376283615, "loss": 0.8675, "step": 17054 }, { "epoch": 0.43792457644662913, "grad_norm": 0.76953125, "learning_rate": 0.00016806315324085643, "loss": 1.021, "step": 17055 }, { "epoch": 0.43795025364255097, "grad_norm": 0.88671875, "learning_rate": 0.0001680598825832329, "loss": 0.9774, "step": 17056 }, { "epoch": 0.4379759308384728, "grad_norm": 0.8203125, "learning_rate": 0.00016805661178997223, "loss": 0.9454, "step": 17057 }, { "epoch": 0.4380016080343946, "grad_norm": 0.7421875, "learning_rate": 0.00016805334086108088, "loss": 0.8654, "step": 17058 }, { "epoch": 0.4380272852303164, "grad_norm": 0.765625, "learning_rate": 0.00016805006979656533, "loss": 0.9184, "step": 17059 }, { "epoch": 0.43805296242623826, "grad_norm": 0.7109375, "learning_rate": 0.00016804679859643216, "loss": 0.8739, "step": 17060 }, { "epoch": 0.43807863962216004, "grad_norm": 0.7734375, "learning_rate": 0.00016804352726068787, "loss": 0.9952, "step": 17061 }, { "epoch": 0.4381043168180819, "grad_norm": 0.78515625, "learning_rate": 0.000168040255789339, "loss": 0.8442, "step": 17062 }, { "epoch": 0.4381299940140037, "grad_norm": 0.80078125, "learning_rate": 0.000168036984182392, "loss": 0.9348, "step": 17063 }, { "epoch": 0.43815567120992555, "grad_norm": 0.734375, "learning_rate": 0.00016803371243985347, "loss": 0.9491, "step": 17064 }, { "epoch": 0.4381813484058473, "grad_norm": 0.76953125, "learning_rate": 0.00016803044056172987, "loss": 0.8486, "step": 17065 }, { "epoch": 0.43820702560176916, "grad_norm": 0.74609375, "learning_rate": 0.00016802716854802778, "loss": 0.9091, "step": 17066 }, { "epoch": 0.438232702797691, "grad_norm": 0.76171875, "learning_rate": 0.00016802389639875367, "loss": 0.8254, "step": 17067 }, { "epoch": 0.4382583799936128, "grad_norm": 0.7421875, "learning_rate": 0.00016802062411391404, "loss": 0.9901, "step": 17068 }, { "epoch": 0.4382840571895346, "grad_norm": 0.859375, "learning_rate": 0.00016801735169351552, "loss": 0.9878, "step": 17069 }, { "epoch": 0.43830973438545645, "grad_norm": 0.828125, "learning_rate": 0.00016801407913756452, "loss": 1.0522, "step": 17070 }, { "epoch": 0.43833541158137823, "grad_norm": 0.77734375, "learning_rate": 0.00016801080644606763, "loss": 0.8986, "step": 17071 }, { "epoch": 0.43836108877730007, "grad_norm": 0.71484375, "learning_rate": 0.0001680075336190313, "loss": 0.848, "step": 17072 }, { "epoch": 0.4383867659732219, "grad_norm": 0.734375, "learning_rate": 0.00016800426065646215, "loss": 0.8603, "step": 17073 }, { "epoch": 0.43841244316914374, "grad_norm": 0.71484375, "learning_rate": 0.00016800098755836662, "loss": 0.8366, "step": 17074 }, { "epoch": 0.4384381203650655, "grad_norm": 0.7265625, "learning_rate": 0.00016799771432475128, "loss": 0.8849, "step": 17075 }, { "epoch": 0.43846379756098736, "grad_norm": 0.80078125, "learning_rate": 0.00016799444095562263, "loss": 1.0509, "step": 17076 }, { "epoch": 0.4384894747569092, "grad_norm": 0.77734375, "learning_rate": 0.0001679911674509872, "loss": 0.9412, "step": 17077 }, { "epoch": 0.438515151952831, "grad_norm": 0.78125, "learning_rate": 0.00016798789381085153, "loss": 0.9282, "step": 17078 }, { "epoch": 0.4385408291487528, "grad_norm": 0.765625, "learning_rate": 0.0001679846200352221, "loss": 0.9472, "step": 17079 }, { "epoch": 0.43856650634467464, "grad_norm": 0.8984375, "learning_rate": 0.00016798134612410547, "loss": 0.9632, "step": 17080 }, { "epoch": 0.4385921835405964, "grad_norm": 0.8203125, "learning_rate": 0.00016797807207750818, "loss": 0.8444, "step": 17081 }, { "epoch": 0.43861786073651826, "grad_norm": 0.765625, "learning_rate": 0.00016797479789543672, "loss": 1.0118, "step": 17082 }, { "epoch": 0.4386435379324401, "grad_norm": 0.71484375, "learning_rate": 0.00016797152357789764, "loss": 0.8404, "step": 17083 }, { "epoch": 0.43866921512836193, "grad_norm": 0.8125, "learning_rate": 0.00016796824912489744, "loss": 0.988, "step": 17084 }, { "epoch": 0.4386948923242837, "grad_norm": 0.7578125, "learning_rate": 0.00016796497453644267, "loss": 0.8301, "step": 17085 }, { "epoch": 0.43872056952020555, "grad_norm": 0.80859375, "learning_rate": 0.0001679616998125398, "loss": 0.9584, "step": 17086 }, { "epoch": 0.4387462467161274, "grad_norm": 0.78515625, "learning_rate": 0.00016795842495319544, "loss": 0.9937, "step": 17087 }, { "epoch": 0.43877192391204917, "grad_norm": 0.78125, "learning_rate": 0.00016795514995841611, "loss": 0.8153, "step": 17088 }, { "epoch": 0.438797601107971, "grad_norm": 0.75390625, "learning_rate": 0.00016795187482820825, "loss": 0.9158, "step": 17089 }, { "epoch": 0.43882327830389284, "grad_norm": 0.76171875, "learning_rate": 0.00016794859956257847, "loss": 0.9281, "step": 17090 }, { "epoch": 0.4388489554998146, "grad_norm": 0.7578125, "learning_rate": 0.00016794532416153325, "loss": 0.7848, "step": 17091 }, { "epoch": 0.43887463269573646, "grad_norm": 0.76171875, "learning_rate": 0.00016794204862507917, "loss": 0.8342, "step": 17092 }, { "epoch": 0.4389003098916583, "grad_norm": 0.75390625, "learning_rate": 0.00016793877295322269, "loss": 0.9435, "step": 17093 }, { "epoch": 0.4389259870875801, "grad_norm": 0.7109375, "learning_rate": 0.00016793549714597038, "loss": 0.8964, "step": 17094 }, { "epoch": 0.4389516642835019, "grad_norm": 0.76171875, "learning_rate": 0.00016793222120332877, "loss": 0.9988, "step": 17095 }, { "epoch": 0.43897734147942374, "grad_norm": 0.796875, "learning_rate": 0.00016792894512530438, "loss": 0.882, "step": 17096 }, { "epoch": 0.4390030186753456, "grad_norm": 0.8359375, "learning_rate": 0.0001679256689119037, "loss": 0.9524, "step": 17097 }, { "epoch": 0.43902869587126736, "grad_norm": 0.76953125, "learning_rate": 0.00016792239256313332, "loss": 0.9295, "step": 17098 }, { "epoch": 0.4390543730671892, "grad_norm": 1.9375, "learning_rate": 0.00016791911607899978, "loss": 0.9072, "step": 17099 }, { "epoch": 0.43908005026311103, "grad_norm": 0.76953125, "learning_rate": 0.00016791583945950956, "loss": 0.905, "step": 17100 }, { "epoch": 0.4391057274590328, "grad_norm": 0.7578125, "learning_rate": 0.0001679125627046692, "loss": 0.8261, "step": 17101 }, { "epoch": 0.43913140465495465, "grad_norm": 0.79296875, "learning_rate": 0.00016790928581448525, "loss": 0.8548, "step": 17102 }, { "epoch": 0.4391570818508765, "grad_norm": 0.75390625, "learning_rate": 0.00016790600878896421, "loss": 0.9309, "step": 17103 }, { "epoch": 0.4391827590467983, "grad_norm": 0.80078125, "learning_rate": 0.00016790273162811263, "loss": 0.9295, "step": 17104 }, { "epoch": 0.4392084362427201, "grad_norm": 0.75, "learning_rate": 0.00016789945433193703, "loss": 0.8222, "step": 17105 }, { "epoch": 0.43923411343864194, "grad_norm": 0.78515625, "learning_rate": 0.00016789617690044398, "loss": 0.9128, "step": 17106 }, { "epoch": 0.4392597906345638, "grad_norm": 0.76953125, "learning_rate": 0.00016789289933363995, "loss": 0.8798, "step": 17107 }, { "epoch": 0.43928546783048555, "grad_norm": 0.98046875, "learning_rate": 0.00016788962163153152, "loss": 1.0011, "step": 17108 }, { "epoch": 0.4393111450264074, "grad_norm": 0.8671875, "learning_rate": 0.0001678863437941252, "loss": 0.8786, "step": 17109 }, { "epoch": 0.4393368222223292, "grad_norm": 0.79296875, "learning_rate": 0.00016788306582142754, "loss": 0.9849, "step": 17110 }, { "epoch": 0.439362499418251, "grad_norm": 0.796875, "learning_rate": 0.00016787978771344505, "loss": 0.9791, "step": 17111 }, { "epoch": 0.43938817661417284, "grad_norm": 0.75, "learning_rate": 0.00016787650947018426, "loss": 0.8678, "step": 17112 }, { "epoch": 0.4394138538100947, "grad_norm": 0.765625, "learning_rate": 0.00016787323109165175, "loss": 0.9783, "step": 17113 }, { "epoch": 0.4394395310060165, "grad_norm": 0.71875, "learning_rate": 0.00016786995257785404, "loss": 0.8379, "step": 17114 }, { "epoch": 0.4394652082019383, "grad_norm": 0.77734375, "learning_rate": 0.0001678666739287976, "loss": 0.8896, "step": 17115 }, { "epoch": 0.43949088539786013, "grad_norm": 0.7890625, "learning_rate": 0.00016786339514448904, "loss": 1.0434, "step": 17116 }, { "epoch": 0.43951656259378197, "grad_norm": 0.875, "learning_rate": 0.00016786011622493482, "loss": 1.0044, "step": 17117 }, { "epoch": 0.43954223978970375, "grad_norm": 0.74609375, "learning_rate": 0.0001678568371701416, "loss": 0.9171, "step": 17118 }, { "epoch": 0.4395679169856256, "grad_norm": 0.73046875, "learning_rate": 0.00016785355798011576, "loss": 0.7939, "step": 17119 }, { "epoch": 0.4395935941815474, "grad_norm": 0.8046875, "learning_rate": 0.0001678502786548639, "loss": 0.9767, "step": 17120 }, { "epoch": 0.4396192713774692, "grad_norm": 0.7421875, "learning_rate": 0.00016784699919439262, "loss": 0.8453, "step": 17121 }, { "epoch": 0.43964494857339104, "grad_norm": 0.81640625, "learning_rate": 0.00016784371959870838, "loss": 0.9395, "step": 17122 }, { "epoch": 0.4396706257693129, "grad_norm": 0.7890625, "learning_rate": 0.00016784043986781773, "loss": 0.9116, "step": 17123 }, { "epoch": 0.4396963029652347, "grad_norm": 0.80859375, "learning_rate": 0.00016783716000172717, "loss": 0.8355, "step": 17124 }, { "epoch": 0.4397219801611565, "grad_norm": 0.72265625, "learning_rate": 0.0001678338800004433, "loss": 1.0744, "step": 17125 }, { "epoch": 0.4397476573570783, "grad_norm": 0.78515625, "learning_rate": 0.0001678305998639727, "loss": 0.9893, "step": 17126 }, { "epoch": 0.43977333455300016, "grad_norm": 0.81640625, "learning_rate": 0.00016782731959232177, "loss": 0.9236, "step": 17127 }, { "epoch": 0.43979901174892194, "grad_norm": 0.79296875, "learning_rate": 0.00016782403918549714, "loss": 0.9533, "step": 17128 }, { "epoch": 0.4398246889448438, "grad_norm": 0.75, "learning_rate": 0.00016782075864350533, "loss": 0.9606, "step": 17129 }, { "epoch": 0.4398503661407656, "grad_norm": 0.8125, "learning_rate": 0.00016781747796635287, "loss": 0.9511, "step": 17130 }, { "epoch": 0.4398760433366874, "grad_norm": 0.76953125, "learning_rate": 0.0001678141971540463, "loss": 0.8393, "step": 17131 }, { "epoch": 0.43990172053260923, "grad_norm": 0.8359375, "learning_rate": 0.00016781091620659218, "loss": 1.0341, "step": 17132 }, { "epoch": 0.43992739772853107, "grad_norm": 3.875, "learning_rate": 0.000167807635123997, "loss": 0.8898, "step": 17133 }, { "epoch": 0.4399530749244529, "grad_norm": 0.8125, "learning_rate": 0.00016780435390626735, "loss": 1.0084, "step": 17134 }, { "epoch": 0.4399787521203747, "grad_norm": 0.8125, "learning_rate": 0.00016780107255340973, "loss": 1.0133, "step": 17135 }, { "epoch": 0.4400044293162965, "grad_norm": 0.80078125, "learning_rate": 0.0001677977910654307, "loss": 0.9756, "step": 17136 }, { "epoch": 0.44003010651221836, "grad_norm": 0.77734375, "learning_rate": 0.0001677945094423368, "loss": 0.9738, "step": 17137 }, { "epoch": 0.44005578370814014, "grad_norm": 0.7578125, "learning_rate": 0.00016779122768413457, "loss": 0.8384, "step": 17138 }, { "epoch": 0.44008146090406197, "grad_norm": 0.76953125, "learning_rate": 0.00016778794579083055, "loss": 0.8409, "step": 17139 }, { "epoch": 0.4401071380999838, "grad_norm": 0.84765625, "learning_rate": 0.00016778466376243128, "loss": 0.8991, "step": 17140 }, { "epoch": 0.4401328152959056, "grad_norm": 0.8046875, "learning_rate": 0.0001677813815989433, "loss": 0.9999, "step": 17141 }, { "epoch": 0.4401584924918274, "grad_norm": 0.87109375, "learning_rate": 0.00016777809930037312, "loss": 0.7909, "step": 17142 }, { "epoch": 0.44018416968774926, "grad_norm": 0.82421875, "learning_rate": 0.00016777481686672735, "loss": 0.9762, "step": 17143 }, { "epoch": 0.44020984688367104, "grad_norm": 0.796875, "learning_rate": 0.00016777153429801245, "loss": 0.8519, "step": 17144 }, { "epoch": 0.4402355240795929, "grad_norm": 0.83203125, "learning_rate": 0.00016776825159423503, "loss": 0.8966, "step": 17145 }, { "epoch": 0.4402612012755147, "grad_norm": 0.76171875, "learning_rate": 0.00016776496875540162, "loss": 0.7511, "step": 17146 }, { "epoch": 0.44028687847143655, "grad_norm": 0.76953125, "learning_rate": 0.00016776168578151875, "loss": 0.8987, "step": 17147 }, { "epoch": 0.44031255566735833, "grad_norm": 0.765625, "learning_rate": 0.00016775840267259292, "loss": 0.8557, "step": 17148 }, { "epoch": 0.44033823286328017, "grad_norm": 0.71484375, "learning_rate": 0.00016775511942863073, "loss": 0.9744, "step": 17149 }, { "epoch": 0.440363910059202, "grad_norm": 0.6953125, "learning_rate": 0.0001677518360496387, "loss": 0.8747, "step": 17150 }, { "epoch": 0.4403895872551238, "grad_norm": 0.73828125, "learning_rate": 0.0001677485525356234, "loss": 0.9502, "step": 17151 }, { "epoch": 0.4404152644510456, "grad_norm": 0.79296875, "learning_rate": 0.00016774526888659134, "loss": 0.9227, "step": 17152 }, { "epoch": 0.44044094164696745, "grad_norm": 0.8359375, "learning_rate": 0.0001677419851025491, "loss": 0.9199, "step": 17153 }, { "epoch": 0.44046661884288923, "grad_norm": 0.7734375, "learning_rate": 0.0001677387011835032, "loss": 0.8384, "step": 17154 }, { "epoch": 0.44049229603881107, "grad_norm": 0.7109375, "learning_rate": 0.0001677354171294602, "loss": 0.8136, "step": 17155 }, { "epoch": 0.4405179732347329, "grad_norm": 0.68359375, "learning_rate": 0.00016773213294042657, "loss": 0.8244, "step": 17156 }, { "epoch": 0.44054365043065474, "grad_norm": 0.7578125, "learning_rate": 0.000167728848616409, "loss": 0.8826, "step": 17157 }, { "epoch": 0.4405693276265765, "grad_norm": 0.73828125, "learning_rate": 0.0001677255641574139, "loss": 0.7737, "step": 17158 }, { "epoch": 0.44059500482249836, "grad_norm": 0.8125, "learning_rate": 0.00016772227956344786, "loss": 0.9255, "step": 17159 }, { "epoch": 0.4406206820184202, "grad_norm": 0.69140625, "learning_rate": 0.00016771899483451744, "loss": 0.8398, "step": 17160 }, { "epoch": 0.440646359214342, "grad_norm": 0.890625, "learning_rate": 0.0001677157099706292, "loss": 0.892, "step": 17161 }, { "epoch": 0.4406720364102638, "grad_norm": 0.83203125, "learning_rate": 0.00016771242497178967, "loss": 0.9561, "step": 17162 }, { "epoch": 0.44069771360618565, "grad_norm": 0.75390625, "learning_rate": 0.00016770913983800537, "loss": 0.8749, "step": 17163 }, { "epoch": 0.44072339080210743, "grad_norm": 0.82421875, "learning_rate": 0.00016770585456928289, "loss": 0.8873, "step": 17164 }, { "epoch": 0.44074906799802926, "grad_norm": 0.8125, "learning_rate": 0.00016770256916562873, "loss": 0.9188, "step": 17165 }, { "epoch": 0.4407747451939511, "grad_norm": 0.78125, "learning_rate": 0.0001676992836270495, "loss": 0.961, "step": 17166 }, { "epoch": 0.44080042238987294, "grad_norm": 0.828125, "learning_rate": 0.0001676959979535517, "loss": 0.8851, "step": 17167 }, { "epoch": 0.4408260995857947, "grad_norm": 0.8125, "learning_rate": 0.0001676927121451419, "loss": 0.8638, "step": 17168 }, { "epoch": 0.44085177678171655, "grad_norm": 0.76171875, "learning_rate": 0.00016768942620182663, "loss": 0.9167, "step": 17169 }, { "epoch": 0.4408774539776384, "grad_norm": 0.734375, "learning_rate": 0.00016768614012361243, "loss": 0.8622, "step": 17170 }, { "epoch": 0.44090313117356017, "grad_norm": 0.74609375, "learning_rate": 0.0001676828539105059, "loss": 0.8695, "step": 17171 }, { "epoch": 0.440928808369482, "grad_norm": 0.78125, "learning_rate": 0.00016767956756251354, "loss": 0.9693, "step": 17172 }, { "epoch": 0.44095448556540384, "grad_norm": 0.83203125, "learning_rate": 0.00016767628107964191, "loss": 0.8575, "step": 17173 }, { "epoch": 0.4409801627613256, "grad_norm": 0.828125, "learning_rate": 0.00016767299446189757, "loss": 0.8477, "step": 17174 }, { "epoch": 0.44100583995724746, "grad_norm": 0.80859375, "learning_rate": 0.00016766970770928707, "loss": 0.9768, "step": 17175 }, { "epoch": 0.4410315171531693, "grad_norm": 0.828125, "learning_rate": 0.00016766642082181697, "loss": 0.8334, "step": 17176 }, { "epoch": 0.44105719434909113, "grad_norm": 0.80078125, "learning_rate": 0.00016766313379949375, "loss": 0.8976, "step": 17177 }, { "epoch": 0.4410828715450129, "grad_norm": 0.83203125, "learning_rate": 0.00016765984664232405, "loss": 0.9318, "step": 17178 }, { "epoch": 0.44110854874093475, "grad_norm": 0.75, "learning_rate": 0.0001676565593503144, "loss": 0.8377, "step": 17179 }, { "epoch": 0.4411342259368566, "grad_norm": 0.91796875, "learning_rate": 0.00016765327192347135, "loss": 0.9671, "step": 17180 }, { "epoch": 0.44115990313277836, "grad_norm": 0.7734375, "learning_rate": 0.0001676499843618014, "loss": 1.0423, "step": 17181 }, { "epoch": 0.4411855803287002, "grad_norm": 0.7265625, "learning_rate": 0.00016764669666531113, "loss": 0.9996, "step": 17182 }, { "epoch": 0.44121125752462204, "grad_norm": 0.8125, "learning_rate": 0.00016764340883400718, "loss": 0.9728, "step": 17183 }, { "epoch": 0.4412369347205438, "grad_norm": 0.78125, "learning_rate": 0.00016764012086789597, "loss": 0.9017, "step": 17184 }, { "epoch": 0.44126261191646565, "grad_norm": 0.796875, "learning_rate": 0.00016763683276698412, "loss": 0.9303, "step": 17185 }, { "epoch": 0.4412882891123875, "grad_norm": 0.75390625, "learning_rate": 0.00016763354453127818, "loss": 0.7796, "step": 17186 }, { "epoch": 0.4413139663083093, "grad_norm": 0.78515625, "learning_rate": 0.00016763025616078466, "loss": 0.8702, "step": 17187 }, { "epoch": 0.4413396435042311, "grad_norm": 0.94140625, "learning_rate": 0.00016762696765551017, "loss": 0.8639, "step": 17188 }, { "epoch": 0.44136532070015294, "grad_norm": 0.73828125, "learning_rate": 0.00016762367901546128, "loss": 0.7987, "step": 17189 }, { "epoch": 0.4413909978960748, "grad_norm": 0.71875, "learning_rate": 0.00016762039024064443, "loss": 0.8769, "step": 17190 }, { "epoch": 0.44141667509199656, "grad_norm": 0.77734375, "learning_rate": 0.00016761710133106632, "loss": 0.8446, "step": 17191 }, { "epoch": 0.4414423522879184, "grad_norm": 0.828125, "learning_rate": 0.0001676138122867334, "loss": 0.931, "step": 17192 }, { "epoch": 0.44146802948384023, "grad_norm": 0.7890625, "learning_rate": 0.00016761052310765225, "loss": 0.8252, "step": 17193 }, { "epoch": 0.441493706679762, "grad_norm": 0.69140625, "learning_rate": 0.00016760723379382947, "loss": 0.809, "step": 17194 }, { "epoch": 0.44151938387568385, "grad_norm": 0.8671875, "learning_rate": 0.00016760394434527158, "loss": 0.9942, "step": 17195 }, { "epoch": 0.4415450610716057, "grad_norm": 0.8828125, "learning_rate": 0.0001676006547619851, "loss": 0.9635, "step": 17196 }, { "epoch": 0.4415707382675275, "grad_norm": 0.875, "learning_rate": 0.00016759736504397664, "loss": 0.9979, "step": 17197 }, { "epoch": 0.4415964154634493, "grad_norm": 0.8125, "learning_rate": 0.00016759407519125277, "loss": 0.985, "step": 17198 }, { "epoch": 0.44162209265937113, "grad_norm": 0.83203125, "learning_rate": 0.00016759078520381997, "loss": 0.9394, "step": 17199 }, { "epoch": 0.44164776985529297, "grad_norm": 0.796875, "learning_rate": 0.00016758749508168484, "loss": 0.975, "step": 17200 }, { "epoch": 0.44167344705121475, "grad_norm": 0.82421875, "learning_rate": 0.00016758420482485396, "loss": 0.9713, "step": 17201 }, { "epoch": 0.4416991242471366, "grad_norm": 0.7109375, "learning_rate": 0.00016758091443333384, "loss": 0.8996, "step": 17202 }, { "epoch": 0.4417248014430584, "grad_norm": 0.83203125, "learning_rate": 0.0001675776239071311, "loss": 0.8831, "step": 17203 }, { "epoch": 0.4417504786389802, "grad_norm": 0.765625, "learning_rate": 0.0001675743332462522, "loss": 0.6599, "step": 17204 }, { "epoch": 0.44177615583490204, "grad_norm": 0.796875, "learning_rate": 0.0001675710424507038, "loss": 0.9759, "step": 17205 }, { "epoch": 0.4418018330308239, "grad_norm": 0.7578125, "learning_rate": 0.00016756775152049243, "loss": 0.9987, "step": 17206 }, { "epoch": 0.4418275102267457, "grad_norm": 0.85546875, "learning_rate": 0.00016756446045562462, "loss": 1.0185, "step": 17207 }, { "epoch": 0.4418531874226675, "grad_norm": 0.75390625, "learning_rate": 0.00016756116925610695, "loss": 1.0184, "step": 17208 }, { "epoch": 0.44187886461858933, "grad_norm": 0.73046875, "learning_rate": 0.00016755787792194595, "loss": 0.8006, "step": 17209 }, { "epoch": 0.44190454181451116, "grad_norm": 0.79296875, "learning_rate": 0.00016755458645314818, "loss": 1.0653, "step": 17210 }, { "epoch": 0.44193021901043295, "grad_norm": 0.85546875, "learning_rate": 0.00016755129484972026, "loss": 1.0267, "step": 17211 }, { "epoch": 0.4419558962063548, "grad_norm": 0.8046875, "learning_rate": 0.0001675480031116687, "loss": 0.7868, "step": 17212 }, { "epoch": 0.4419815734022766, "grad_norm": 0.7890625, "learning_rate": 0.00016754471123900012, "loss": 0.7981, "step": 17213 }, { "epoch": 0.4420072505981984, "grad_norm": 0.81640625, "learning_rate": 0.00016754141923172098, "loss": 1.0445, "step": 17214 }, { "epoch": 0.44203292779412023, "grad_norm": 0.76171875, "learning_rate": 0.00016753812708983787, "loss": 0.8844, "step": 17215 }, { "epoch": 0.44205860499004207, "grad_norm": 0.796875, "learning_rate": 0.0001675348348133574, "loss": 0.8907, "step": 17216 }, { "epoch": 0.4420842821859639, "grad_norm": 0.8125, "learning_rate": 0.00016753154240228612, "loss": 0.9581, "step": 17217 }, { "epoch": 0.4421099593818857, "grad_norm": 0.7890625, "learning_rate": 0.00016752824985663057, "loss": 0.8198, "step": 17218 }, { "epoch": 0.4421356365778075, "grad_norm": 0.84375, "learning_rate": 0.0001675249571763973, "loss": 0.8742, "step": 17219 }, { "epoch": 0.44216131377372936, "grad_norm": 0.7265625, "learning_rate": 0.0001675216643615929, "loss": 0.901, "step": 17220 }, { "epoch": 0.44218699096965114, "grad_norm": 0.796875, "learning_rate": 0.00016751837141222394, "loss": 0.9135, "step": 17221 }, { "epoch": 0.442212668165573, "grad_norm": 0.71875, "learning_rate": 0.00016751507832829694, "loss": 0.9164, "step": 17222 }, { "epoch": 0.4422383453614948, "grad_norm": 0.8203125, "learning_rate": 0.0001675117851098185, "loss": 0.7623, "step": 17223 }, { "epoch": 0.4422640225574166, "grad_norm": 0.80859375, "learning_rate": 0.00016750849175679514, "loss": 1.031, "step": 17224 }, { "epoch": 0.44228969975333843, "grad_norm": 0.78125, "learning_rate": 0.0001675051982692335, "loss": 1.0335, "step": 17225 }, { "epoch": 0.44231537694926026, "grad_norm": 0.73046875, "learning_rate": 0.0001675019046471401, "loss": 0.859, "step": 17226 }, { "epoch": 0.4423410541451821, "grad_norm": 0.70703125, "learning_rate": 0.0001674986108905215, "loss": 0.8526, "step": 17227 }, { "epoch": 0.4423667313411039, "grad_norm": 0.76171875, "learning_rate": 0.00016749531699938422, "loss": 0.8303, "step": 17228 }, { "epoch": 0.4423924085370257, "grad_norm": 0.74609375, "learning_rate": 0.0001674920229737349, "loss": 0.8915, "step": 17229 }, { "epoch": 0.44241808573294755, "grad_norm": 0.76953125, "learning_rate": 0.00016748872881358008, "loss": 1.0319, "step": 17230 }, { "epoch": 0.44244376292886933, "grad_norm": 0.7890625, "learning_rate": 0.00016748543451892628, "loss": 0.9147, "step": 17231 }, { "epoch": 0.44246944012479117, "grad_norm": 0.7734375, "learning_rate": 0.00016748214008978013, "loss": 1.0282, "step": 17232 }, { "epoch": 0.442495117320713, "grad_norm": 0.71875, "learning_rate": 0.00016747884552614819, "loss": 0.8927, "step": 17233 }, { "epoch": 0.4425207945166348, "grad_norm": 0.71484375, "learning_rate": 0.00016747555082803698, "loss": 0.8473, "step": 17234 }, { "epoch": 0.4425464717125566, "grad_norm": 0.7421875, "learning_rate": 0.00016747225599545313, "loss": 0.8897, "step": 17235 }, { "epoch": 0.44257214890847846, "grad_norm": 0.76953125, "learning_rate": 0.00016746896102840311, "loss": 0.9123, "step": 17236 }, { "epoch": 0.4425978261044003, "grad_norm": 0.8359375, "learning_rate": 0.0001674656659268936, "loss": 0.9983, "step": 17237 }, { "epoch": 0.4426235033003221, "grad_norm": 0.8046875, "learning_rate": 0.00016746237069093108, "loss": 0.9623, "step": 17238 }, { "epoch": 0.4426491804962439, "grad_norm": 0.80859375, "learning_rate": 0.00016745907532052216, "loss": 1.025, "step": 17239 }, { "epoch": 0.44267485769216575, "grad_norm": 0.7109375, "learning_rate": 0.0001674557798156734, "loss": 0.7626, "step": 17240 }, { "epoch": 0.4427005348880875, "grad_norm": 0.76171875, "learning_rate": 0.00016745248417639137, "loss": 0.9369, "step": 17241 }, { "epoch": 0.44272621208400936, "grad_norm": 0.77734375, "learning_rate": 0.0001674491884026826, "loss": 1.0098, "step": 17242 }, { "epoch": 0.4427518892799312, "grad_norm": 0.7109375, "learning_rate": 0.00016744589249455372, "loss": 0.853, "step": 17243 }, { "epoch": 0.442777566475853, "grad_norm": 0.78515625, "learning_rate": 0.00016744259645201125, "loss": 0.9535, "step": 17244 }, { "epoch": 0.4428032436717748, "grad_norm": 0.84375, "learning_rate": 0.00016743930027506178, "loss": 1.0004, "step": 17245 }, { "epoch": 0.44282892086769665, "grad_norm": 0.7890625, "learning_rate": 0.0001674360039637119, "loss": 0.881, "step": 17246 }, { "epoch": 0.4428545980636185, "grad_norm": 0.74609375, "learning_rate": 0.00016743270751796814, "loss": 0.8982, "step": 17247 }, { "epoch": 0.44288027525954027, "grad_norm": 0.79296875, "learning_rate": 0.0001674294109378371, "loss": 0.9669, "step": 17248 }, { "epoch": 0.4429059524554621, "grad_norm": 0.859375, "learning_rate": 0.00016742611422332528, "loss": 0.989, "step": 17249 }, { "epoch": 0.44293162965138394, "grad_norm": 0.81640625, "learning_rate": 0.00016742281737443934, "loss": 0.9385, "step": 17250 }, { "epoch": 0.4429573068473057, "grad_norm": 0.734375, "learning_rate": 0.00016741952039118582, "loss": 0.8678, "step": 17251 }, { "epoch": 0.44298298404322756, "grad_norm": 0.7109375, "learning_rate": 0.0001674162232735713, "loss": 0.8115, "step": 17252 }, { "epoch": 0.4430086612391494, "grad_norm": 0.80078125, "learning_rate": 0.0001674129260216023, "loss": 0.9003, "step": 17253 }, { "epoch": 0.4430343384350712, "grad_norm": 0.77734375, "learning_rate": 0.0001674096286352855, "loss": 0.8919, "step": 17254 }, { "epoch": 0.443060015630993, "grad_norm": 0.77734375, "learning_rate": 0.00016740633111462732, "loss": 0.9955, "step": 17255 }, { "epoch": 0.44308569282691485, "grad_norm": 0.8125, "learning_rate": 0.00016740303345963445, "loss": 0.9621, "step": 17256 }, { "epoch": 0.4431113700228367, "grad_norm": 0.80078125, "learning_rate": 0.00016739973567031342, "loss": 0.9213, "step": 17257 }, { "epoch": 0.44313704721875846, "grad_norm": 0.8046875, "learning_rate": 0.00016739643774667077, "loss": 1.0423, "step": 17258 }, { "epoch": 0.4431627244146803, "grad_norm": 0.8046875, "learning_rate": 0.00016739313968871315, "loss": 0.843, "step": 17259 }, { "epoch": 0.44318840161060213, "grad_norm": 0.734375, "learning_rate": 0.0001673898414964471, "loss": 0.8329, "step": 17260 }, { "epoch": 0.4432140788065239, "grad_norm": 0.78125, "learning_rate": 0.00016738654316987914, "loss": 0.951, "step": 17261 }, { "epoch": 0.44323975600244575, "grad_norm": 0.7578125, "learning_rate": 0.0001673832447090159, "loss": 0.7792, "step": 17262 }, { "epoch": 0.4432654331983676, "grad_norm": 0.83984375, "learning_rate": 0.00016737994611386393, "loss": 1.0284, "step": 17263 }, { "epoch": 0.44329111039428937, "grad_norm": 0.7578125, "learning_rate": 0.00016737664738442984, "loss": 0.8601, "step": 17264 }, { "epoch": 0.4433167875902112, "grad_norm": 0.73046875, "learning_rate": 0.00016737334852072016, "loss": 0.8641, "step": 17265 }, { "epoch": 0.44334246478613304, "grad_norm": 0.76953125, "learning_rate": 0.00016737004952274148, "loss": 0.9196, "step": 17266 }, { "epoch": 0.4433681419820549, "grad_norm": 0.8515625, "learning_rate": 0.0001673667503905004, "loss": 1.0341, "step": 17267 }, { "epoch": 0.44339381917797666, "grad_norm": 0.796875, "learning_rate": 0.00016736345112400346, "loss": 0.9795, "step": 17268 }, { "epoch": 0.4434194963738985, "grad_norm": 0.78125, "learning_rate": 0.00016736015172325725, "loss": 0.9841, "step": 17269 }, { "epoch": 0.44344517356982033, "grad_norm": 0.78515625, "learning_rate": 0.00016735685218826835, "loss": 0.911, "step": 17270 }, { "epoch": 0.4434708507657421, "grad_norm": 0.796875, "learning_rate": 0.0001673535525190433, "loss": 0.9108, "step": 17271 }, { "epoch": 0.44349652796166394, "grad_norm": 0.7890625, "learning_rate": 0.00016735025271558872, "loss": 0.858, "step": 17272 }, { "epoch": 0.4435222051575858, "grad_norm": 0.921875, "learning_rate": 0.00016734695277791114, "loss": 1.0643, "step": 17273 }, { "epoch": 0.44354788235350756, "grad_norm": 0.7578125, "learning_rate": 0.00016734365270601722, "loss": 0.8611, "step": 17274 }, { "epoch": 0.4435735595494294, "grad_norm": 0.8359375, "learning_rate": 0.00016734035249991343, "loss": 0.9345, "step": 17275 }, { "epoch": 0.44359923674535123, "grad_norm": 0.78515625, "learning_rate": 0.00016733705215960646, "loss": 0.8506, "step": 17276 }, { "epoch": 0.44362491394127307, "grad_norm": 0.76953125, "learning_rate": 0.00016733375168510279, "loss": 0.9892, "step": 17277 }, { "epoch": 0.44365059113719485, "grad_norm": 0.78515625, "learning_rate": 0.000167330451076409, "loss": 0.8234, "step": 17278 }, { "epoch": 0.4436762683331167, "grad_norm": 0.8515625, "learning_rate": 0.00016732715033353178, "loss": 0.8191, "step": 17279 }, { "epoch": 0.4437019455290385, "grad_norm": 0.80859375, "learning_rate": 0.00016732384945647758, "loss": 0.9311, "step": 17280 }, { "epoch": 0.4437276227249603, "grad_norm": 0.80859375, "learning_rate": 0.00016732054844525304, "loss": 0.9857, "step": 17281 }, { "epoch": 0.44375329992088214, "grad_norm": 0.86328125, "learning_rate": 0.00016731724729986475, "loss": 1.0004, "step": 17282 }, { "epoch": 0.443778977116804, "grad_norm": 0.8125, "learning_rate": 0.00016731394602031925, "loss": 0.9129, "step": 17283 }, { "epoch": 0.44380465431272575, "grad_norm": 0.88671875, "learning_rate": 0.00016731064460662315, "loss": 0.963, "step": 17284 }, { "epoch": 0.4438303315086476, "grad_norm": 0.83984375, "learning_rate": 0.00016730734305878296, "loss": 1.0104, "step": 17285 }, { "epoch": 0.4438560087045694, "grad_norm": 0.8671875, "learning_rate": 0.0001673040413768054, "loss": 0.9615, "step": 17286 }, { "epoch": 0.44388168590049126, "grad_norm": 0.84375, "learning_rate": 0.0001673007395606969, "loss": 0.8953, "step": 17287 }, { "epoch": 0.44390736309641304, "grad_norm": 0.76171875, "learning_rate": 0.00016729743761046412, "loss": 1.0417, "step": 17288 }, { "epoch": 0.4439330402923349, "grad_norm": 0.765625, "learning_rate": 0.00016729413552611365, "loss": 0.8521, "step": 17289 }, { "epoch": 0.4439587174882567, "grad_norm": 0.796875, "learning_rate": 0.000167290833307652, "loss": 1.0041, "step": 17290 }, { "epoch": 0.4439843946841785, "grad_norm": 0.765625, "learning_rate": 0.00016728753095508582, "loss": 0.801, "step": 17291 }, { "epoch": 0.44401007188010033, "grad_norm": 0.8671875, "learning_rate": 0.00016728422846842172, "loss": 0.9387, "step": 17292 }, { "epoch": 0.44403574907602217, "grad_norm": 0.7734375, "learning_rate": 0.00016728092584766618, "loss": 0.9187, "step": 17293 }, { "epoch": 0.44406142627194395, "grad_norm": 0.7578125, "learning_rate": 0.0001672776230928258, "loss": 0.9093, "step": 17294 }, { "epoch": 0.4440871034678658, "grad_norm": 0.8671875, "learning_rate": 0.00016727432020390722, "loss": 0.9523, "step": 17295 }, { "epoch": 0.4441127806637876, "grad_norm": 0.73046875, "learning_rate": 0.00016727101718091704, "loss": 0.9077, "step": 17296 }, { "epoch": 0.44413845785970946, "grad_norm": 0.91796875, "learning_rate": 0.00016726771402386176, "loss": 1.0164, "step": 17297 }, { "epoch": 0.44416413505563124, "grad_norm": 0.7578125, "learning_rate": 0.000167264410732748, "loss": 0.9156, "step": 17298 }, { "epoch": 0.4441898122515531, "grad_norm": 0.73046875, "learning_rate": 0.00016726110730758237, "loss": 0.9268, "step": 17299 }, { "epoch": 0.4442154894474749, "grad_norm": 0.75, "learning_rate": 0.0001672578037483714, "loss": 0.8873, "step": 17300 }, { "epoch": 0.4442411666433967, "grad_norm": 0.7265625, "learning_rate": 0.00016725450005512172, "loss": 0.8434, "step": 17301 }, { "epoch": 0.4442668438393185, "grad_norm": 0.77734375, "learning_rate": 0.0001672511962278399, "loss": 1.0363, "step": 17302 }, { "epoch": 0.44429252103524036, "grad_norm": 0.79296875, "learning_rate": 0.00016724789226653253, "loss": 0.8893, "step": 17303 }, { "epoch": 0.44431819823116214, "grad_norm": 0.765625, "learning_rate": 0.00016724458817120614, "loss": 0.9905, "step": 17304 }, { "epoch": 0.444343875427084, "grad_norm": 0.89453125, "learning_rate": 0.00016724128394186743, "loss": 0.9289, "step": 17305 }, { "epoch": 0.4443695526230058, "grad_norm": 0.765625, "learning_rate": 0.00016723797957852286, "loss": 0.8989, "step": 17306 }, { "epoch": 0.44439522981892765, "grad_norm": 0.703125, "learning_rate": 0.00016723467508117907, "loss": 0.8999, "step": 17307 }, { "epoch": 0.44442090701484943, "grad_norm": 0.76171875, "learning_rate": 0.00016723137044984268, "loss": 0.8439, "step": 17308 }, { "epoch": 0.44444658421077127, "grad_norm": 0.76953125, "learning_rate": 0.00016722806568452022, "loss": 0.8375, "step": 17309 }, { "epoch": 0.4444722614066931, "grad_norm": 0.8046875, "learning_rate": 0.0001672247607852183, "loss": 0.8895, "step": 17310 }, { "epoch": 0.4444979386026149, "grad_norm": 0.7734375, "learning_rate": 0.0001672214557519435, "loss": 0.9395, "step": 17311 }, { "epoch": 0.4445236157985367, "grad_norm": 0.77734375, "learning_rate": 0.0001672181505847024, "loss": 0.8388, "step": 17312 }, { "epoch": 0.44454929299445856, "grad_norm": 0.7421875, "learning_rate": 0.00016721484528350162, "loss": 0.9817, "step": 17313 }, { "epoch": 0.44457497019038034, "grad_norm": 0.84765625, "learning_rate": 0.00016721153984834773, "loss": 0.9663, "step": 17314 }, { "epoch": 0.4446006473863022, "grad_norm": 0.76171875, "learning_rate": 0.00016720823427924728, "loss": 0.8267, "step": 17315 }, { "epoch": 0.444626324582224, "grad_norm": 0.7109375, "learning_rate": 0.0001672049285762069, "loss": 0.9395, "step": 17316 }, { "epoch": 0.44465200177814584, "grad_norm": 0.734375, "learning_rate": 0.0001672016227392332, "loss": 0.9841, "step": 17317 }, { "epoch": 0.4446776789740676, "grad_norm": 0.7890625, "learning_rate": 0.0001671983167683327, "loss": 0.9045, "step": 17318 }, { "epoch": 0.44470335616998946, "grad_norm": 0.81640625, "learning_rate": 0.00016719501066351204, "loss": 1.0052, "step": 17319 }, { "epoch": 0.4447290333659113, "grad_norm": 0.7265625, "learning_rate": 0.0001671917044247778, "loss": 0.8776, "step": 17320 }, { "epoch": 0.4447547105618331, "grad_norm": 0.80078125, "learning_rate": 0.00016718839805213653, "loss": 0.899, "step": 17321 }, { "epoch": 0.4447803877577549, "grad_norm": 0.82421875, "learning_rate": 0.0001671850915455949, "loss": 1.0239, "step": 17322 }, { "epoch": 0.44480606495367675, "grad_norm": 0.8828125, "learning_rate": 0.00016718178490515942, "loss": 0.9659, "step": 17323 }, { "epoch": 0.44483174214959853, "grad_norm": 0.81640625, "learning_rate": 0.0001671784781308367, "loss": 0.9451, "step": 17324 }, { "epoch": 0.44485741934552037, "grad_norm": 0.8203125, "learning_rate": 0.0001671751712226334, "loss": 0.9423, "step": 17325 }, { "epoch": 0.4448830965414422, "grad_norm": 0.74609375, "learning_rate": 0.000167171864180556, "loss": 0.8297, "step": 17326 }, { "epoch": 0.44490877373736404, "grad_norm": 0.75, "learning_rate": 0.00016716855700461115, "loss": 0.82, "step": 17327 }, { "epoch": 0.4449344509332858, "grad_norm": 0.8125, "learning_rate": 0.00016716524969480543, "loss": 0.9862, "step": 17328 }, { "epoch": 0.44496012812920765, "grad_norm": 0.72265625, "learning_rate": 0.00016716194225114544, "loss": 0.8635, "step": 17329 }, { "epoch": 0.4449858053251295, "grad_norm": 0.81640625, "learning_rate": 0.00016715863467363774, "loss": 1.0632, "step": 17330 }, { "epoch": 0.44501148252105127, "grad_norm": 0.7265625, "learning_rate": 0.000167155326962289, "loss": 0.8672, "step": 17331 }, { "epoch": 0.4450371597169731, "grad_norm": 0.73828125, "learning_rate": 0.00016715201911710574, "loss": 0.9999, "step": 17332 }, { "epoch": 0.44506283691289494, "grad_norm": 0.82421875, "learning_rate": 0.00016714871113809456, "loss": 0.8877, "step": 17333 }, { "epoch": 0.4450885141088167, "grad_norm": 0.78125, "learning_rate": 0.00016714540302526206, "loss": 0.8613, "step": 17334 }, { "epoch": 0.44511419130473856, "grad_norm": 0.765625, "learning_rate": 0.00016714209477861487, "loss": 0.9514, "step": 17335 }, { "epoch": 0.4451398685006604, "grad_norm": 0.75390625, "learning_rate": 0.0001671387863981595, "loss": 1.0146, "step": 17336 }, { "epoch": 0.44516554569658223, "grad_norm": 0.74609375, "learning_rate": 0.00016713547788390262, "loss": 0.9381, "step": 17337 }, { "epoch": 0.445191222892504, "grad_norm": 0.75390625, "learning_rate": 0.00016713216923585082, "loss": 0.8683, "step": 17338 }, { "epoch": 0.44521690008842585, "grad_norm": 0.7890625, "learning_rate": 0.00016712886045401064, "loss": 0.9629, "step": 17339 }, { "epoch": 0.4452425772843477, "grad_norm": 0.7578125, "learning_rate": 0.0001671255515383887, "loss": 0.918, "step": 17340 }, { "epoch": 0.44526825448026947, "grad_norm": 0.796875, "learning_rate": 0.00016712224248899163, "loss": 1.016, "step": 17341 }, { "epoch": 0.4452939316761913, "grad_norm": 0.86328125, "learning_rate": 0.00016711893330582597, "loss": 1.0301, "step": 17342 }, { "epoch": 0.44531960887211314, "grad_norm": 0.7578125, "learning_rate": 0.00016711562398889833, "loss": 0.9162, "step": 17343 }, { "epoch": 0.4453452860680349, "grad_norm": 0.76953125, "learning_rate": 0.00016711231453821532, "loss": 0.9773, "step": 17344 }, { "epoch": 0.44537096326395675, "grad_norm": 0.7421875, "learning_rate": 0.00016710900495378356, "loss": 0.8804, "step": 17345 }, { "epoch": 0.4453966404598786, "grad_norm": 0.76953125, "learning_rate": 0.00016710569523560958, "loss": 0.8832, "step": 17346 }, { "epoch": 0.4454223176558004, "grad_norm": 0.97265625, "learning_rate": 0.00016710238538370002, "loss": 0.9355, "step": 17347 }, { "epoch": 0.4454479948517222, "grad_norm": 0.78125, "learning_rate": 0.0001670990753980615, "loss": 0.8825, "step": 17348 }, { "epoch": 0.44547367204764404, "grad_norm": 0.7265625, "learning_rate": 0.00016709576527870054, "loss": 0.9392, "step": 17349 }, { "epoch": 0.4454993492435659, "grad_norm": 0.8125, "learning_rate": 0.0001670924550256238, "loss": 0.813, "step": 17350 }, { "epoch": 0.44552502643948766, "grad_norm": 0.76953125, "learning_rate": 0.00016708914463883784, "loss": 0.8839, "step": 17351 }, { "epoch": 0.4455507036354095, "grad_norm": 0.828125, "learning_rate": 0.0001670858341183493, "loss": 1.016, "step": 17352 }, { "epoch": 0.44557638083133133, "grad_norm": 0.76171875, "learning_rate": 0.00016708252346416474, "loss": 0.919, "step": 17353 }, { "epoch": 0.4456020580272531, "grad_norm": 0.8828125, "learning_rate": 0.00016707921267629077, "loss": 0.9729, "step": 17354 }, { "epoch": 0.44562773522317495, "grad_norm": 0.80078125, "learning_rate": 0.000167075901754734, "loss": 1.0135, "step": 17355 }, { "epoch": 0.4456534124190968, "grad_norm": 0.76953125, "learning_rate": 0.000167072590699501, "loss": 0.9641, "step": 17356 }, { "epoch": 0.44567908961501856, "grad_norm": 0.83984375, "learning_rate": 0.00016706927951059836, "loss": 1.0827, "step": 17357 }, { "epoch": 0.4457047668109404, "grad_norm": 0.80859375, "learning_rate": 0.00016706596818803274, "loss": 0.9224, "step": 17358 }, { "epoch": 0.44573044400686224, "grad_norm": 0.88671875, "learning_rate": 0.00016706265673181072, "loss": 0.943, "step": 17359 }, { "epoch": 0.4457561212027841, "grad_norm": 0.72265625, "learning_rate": 0.00016705934514193887, "loss": 0.8408, "step": 17360 }, { "epoch": 0.44578179839870585, "grad_norm": 0.76171875, "learning_rate": 0.00016705603341842377, "loss": 0.939, "step": 17361 }, { "epoch": 0.4458074755946277, "grad_norm": 0.98828125, "learning_rate": 0.0001670527215612721, "loss": 0.9136, "step": 17362 }, { "epoch": 0.4458331527905495, "grad_norm": 0.8046875, "learning_rate": 0.00016704940957049038, "loss": 0.9161, "step": 17363 }, { "epoch": 0.4458588299864713, "grad_norm": 0.83203125, "learning_rate": 0.00016704609744608523, "loss": 0.7499, "step": 17364 }, { "epoch": 0.44588450718239314, "grad_norm": 0.73828125, "learning_rate": 0.0001670427851880633, "loss": 0.8825, "step": 17365 }, { "epoch": 0.445910184378315, "grad_norm": 0.76953125, "learning_rate": 0.00016703947279643114, "loss": 0.8898, "step": 17366 }, { "epoch": 0.44593586157423676, "grad_norm": 0.84375, "learning_rate": 0.00016703616027119537, "loss": 0.8526, "step": 17367 }, { "epoch": 0.4459615387701586, "grad_norm": 0.73046875, "learning_rate": 0.0001670328476123626, "loss": 0.8046, "step": 17368 }, { "epoch": 0.44598721596608043, "grad_norm": 0.79296875, "learning_rate": 0.00016702953481993938, "loss": 0.9355, "step": 17369 }, { "epoch": 0.44601289316200227, "grad_norm": 0.765625, "learning_rate": 0.00016702622189393237, "loss": 1.0455, "step": 17370 }, { "epoch": 0.44603857035792405, "grad_norm": 0.82421875, "learning_rate": 0.00016702290883434818, "loss": 0.847, "step": 17371 }, { "epoch": 0.4460642475538459, "grad_norm": 0.76953125, "learning_rate": 0.00016701959564119337, "loss": 0.89, "step": 17372 }, { "epoch": 0.4460899247497677, "grad_norm": 0.80859375, "learning_rate": 0.00016701628231447456, "loss": 0.8286, "step": 17373 }, { "epoch": 0.4461156019456895, "grad_norm": 0.8671875, "learning_rate": 0.00016701296885419836, "loss": 1.0059, "step": 17374 }, { "epoch": 0.44614127914161134, "grad_norm": 0.78515625, "learning_rate": 0.00016700965526037135, "loss": 0.8661, "step": 17375 }, { "epoch": 0.44616695633753317, "grad_norm": 0.78515625, "learning_rate": 0.00016700634153300016, "loss": 0.9797, "step": 17376 }, { "epoch": 0.44619263353345495, "grad_norm": 0.73828125, "learning_rate": 0.00016700302767209138, "loss": 0.9698, "step": 17377 }, { "epoch": 0.4462183107293768, "grad_norm": 0.75390625, "learning_rate": 0.00016699971367765165, "loss": 0.818, "step": 17378 }, { "epoch": 0.4462439879252986, "grad_norm": 0.7421875, "learning_rate": 0.00016699639954968748, "loss": 0.8321, "step": 17379 }, { "epoch": 0.44626966512122046, "grad_norm": 0.76171875, "learning_rate": 0.00016699308528820558, "loss": 0.894, "step": 17380 }, { "epoch": 0.44629534231714224, "grad_norm": 0.734375, "learning_rate": 0.00016698977089321247, "loss": 0.8879, "step": 17381 }, { "epoch": 0.4463210195130641, "grad_norm": 0.71875, "learning_rate": 0.00016698645636471486, "loss": 0.884, "step": 17382 }, { "epoch": 0.4463466967089859, "grad_norm": 0.890625, "learning_rate": 0.00016698314170271927, "loss": 0.8765, "step": 17383 }, { "epoch": 0.4463723739049077, "grad_norm": 0.73828125, "learning_rate": 0.00016697982690723232, "loss": 0.8216, "step": 17384 }, { "epoch": 0.44639805110082953, "grad_norm": 0.72265625, "learning_rate": 0.00016697651197826062, "loss": 0.8121, "step": 17385 }, { "epoch": 0.44642372829675137, "grad_norm": 0.7578125, "learning_rate": 0.00016697319691581078, "loss": 0.9128, "step": 17386 }, { "epoch": 0.44644940549267315, "grad_norm": 0.75, "learning_rate": 0.00016696988171988942, "loss": 0.9747, "step": 17387 }, { "epoch": 0.446475082688595, "grad_norm": 0.8984375, "learning_rate": 0.00016696656639050316, "loss": 0.9878, "step": 17388 }, { "epoch": 0.4465007598845168, "grad_norm": 0.75, "learning_rate": 0.00016696325092765854, "loss": 0.921, "step": 17389 }, { "epoch": 0.44652643708043865, "grad_norm": 0.91015625, "learning_rate": 0.00016695993533136222, "loss": 1.0408, "step": 17390 }, { "epoch": 0.44655211427636043, "grad_norm": 0.73046875, "learning_rate": 0.0001669566196016208, "loss": 0.8997, "step": 17391 }, { "epoch": 0.44657779147228227, "grad_norm": 0.7890625, "learning_rate": 0.00016695330373844088, "loss": 0.8817, "step": 17392 }, { "epoch": 0.4466034686682041, "grad_norm": 0.79296875, "learning_rate": 0.00016694998774182911, "loss": 0.8232, "step": 17393 }, { "epoch": 0.4466291458641259, "grad_norm": 0.8203125, "learning_rate": 0.00016694667161179202, "loss": 0.796, "step": 17394 }, { "epoch": 0.4466548230600477, "grad_norm": 0.87109375, "learning_rate": 0.00016694335534833625, "loss": 0.9811, "step": 17395 }, { "epoch": 0.44668050025596956, "grad_norm": 0.890625, "learning_rate": 0.00016694003895146846, "loss": 0.9327, "step": 17396 }, { "epoch": 0.44670617745189134, "grad_norm": 0.71875, "learning_rate": 0.0001669367224211952, "loss": 0.8569, "step": 17397 }, { "epoch": 0.4467318546478132, "grad_norm": 0.78515625, "learning_rate": 0.0001669334057575231, "loss": 0.911, "step": 17398 }, { "epoch": 0.446757531843735, "grad_norm": 0.765625, "learning_rate": 0.00016693008896045877, "loss": 0.9059, "step": 17399 }, { "epoch": 0.44678320903965685, "grad_norm": 0.77734375, "learning_rate": 0.00016692677203000882, "loss": 1.0439, "step": 17400 }, { "epoch": 0.44680888623557863, "grad_norm": 0.828125, "learning_rate": 0.00016692345496617988, "loss": 1.0366, "step": 17401 }, { "epoch": 0.44683456343150046, "grad_norm": 0.75390625, "learning_rate": 0.0001669201377689785, "loss": 0.9302, "step": 17402 }, { "epoch": 0.4468602406274223, "grad_norm": 0.8203125, "learning_rate": 0.00016691682043841133, "loss": 1.0343, "step": 17403 }, { "epoch": 0.4468859178233441, "grad_norm": 0.76171875, "learning_rate": 0.000166913502974485, "loss": 0.8937, "step": 17404 }, { "epoch": 0.4469115950192659, "grad_norm": 0.75390625, "learning_rate": 0.0001669101853772061, "loss": 0.9834, "step": 17405 }, { "epoch": 0.44693727221518775, "grad_norm": 0.76953125, "learning_rate": 0.00016690686764658127, "loss": 1.0659, "step": 17406 }, { "epoch": 0.44696294941110953, "grad_norm": 0.76953125, "learning_rate": 0.00016690354978261707, "loss": 0.9534, "step": 17407 }, { "epoch": 0.44698862660703137, "grad_norm": 0.80859375, "learning_rate": 0.00016690023178532015, "loss": 0.8759, "step": 17408 }, { "epoch": 0.4470143038029532, "grad_norm": 0.78515625, "learning_rate": 0.00016689691365469707, "loss": 0.8298, "step": 17409 }, { "epoch": 0.44703998099887504, "grad_norm": 0.83984375, "learning_rate": 0.00016689359539075454, "loss": 0.9914, "step": 17410 }, { "epoch": 0.4470656581947968, "grad_norm": 0.7890625, "learning_rate": 0.0001668902769934991, "loss": 0.924, "step": 17411 }, { "epoch": 0.44709133539071866, "grad_norm": 0.82421875, "learning_rate": 0.00016688695846293734, "loss": 1.0692, "step": 17412 }, { "epoch": 0.4471170125866405, "grad_norm": 0.765625, "learning_rate": 0.00016688363979907596, "loss": 1.0369, "step": 17413 }, { "epoch": 0.4471426897825623, "grad_norm": 0.78515625, "learning_rate": 0.0001668803210019215, "loss": 0.94, "step": 17414 }, { "epoch": 0.4471683669784841, "grad_norm": 0.73828125, "learning_rate": 0.00016687700207148064, "loss": 0.8941, "step": 17415 }, { "epoch": 0.44719404417440595, "grad_norm": 0.77734375, "learning_rate": 0.00016687368300775991, "loss": 0.8551, "step": 17416 }, { "epoch": 0.4472197213703277, "grad_norm": 0.72265625, "learning_rate": 0.000166870363810766, "loss": 0.8384, "step": 17417 }, { "epoch": 0.44724539856624956, "grad_norm": 0.73828125, "learning_rate": 0.0001668670444805055, "loss": 0.9556, "step": 17418 }, { "epoch": 0.4472710757621714, "grad_norm": 0.8203125, "learning_rate": 0.00016686372501698498, "loss": 0.9915, "step": 17419 }, { "epoch": 0.44729675295809324, "grad_norm": 0.7734375, "learning_rate": 0.00016686040542021114, "loss": 0.8888, "step": 17420 }, { "epoch": 0.447322430154015, "grad_norm": 0.75390625, "learning_rate": 0.00016685708569019053, "loss": 0.9015, "step": 17421 }, { "epoch": 0.44734810734993685, "grad_norm": 0.84375, "learning_rate": 0.0001668537658269298, "loss": 0.9643, "step": 17422 }, { "epoch": 0.4473737845458587, "grad_norm": 0.8125, "learning_rate": 0.00016685044583043555, "loss": 0.9482, "step": 17423 }, { "epoch": 0.44739946174178047, "grad_norm": 0.828125, "learning_rate": 0.0001668471257007144, "loss": 1.2293, "step": 17424 }, { "epoch": 0.4474251389377023, "grad_norm": 0.75, "learning_rate": 0.00016684380543777297, "loss": 0.8321, "step": 17425 }, { "epoch": 0.44745081613362414, "grad_norm": 0.7265625, "learning_rate": 0.00016684048504161784, "loss": 0.7866, "step": 17426 }, { "epoch": 0.4474764933295459, "grad_norm": 0.76171875, "learning_rate": 0.00016683716451225568, "loss": 0.8977, "step": 17427 }, { "epoch": 0.44750217052546776, "grad_norm": 0.7265625, "learning_rate": 0.0001668338438496931, "loss": 0.8961, "step": 17428 }, { "epoch": 0.4475278477213896, "grad_norm": 0.77734375, "learning_rate": 0.0001668305230539367, "loss": 0.9631, "step": 17429 }, { "epoch": 0.44755352491731143, "grad_norm": 0.7421875, "learning_rate": 0.00016682720212499313, "loss": 0.9392, "step": 17430 }, { "epoch": 0.4475792021132332, "grad_norm": 0.7578125, "learning_rate": 0.00016682388106286896, "loss": 0.859, "step": 17431 }, { "epoch": 0.44760487930915505, "grad_norm": 0.90234375, "learning_rate": 0.00016682055986757082, "loss": 0.9555, "step": 17432 }, { "epoch": 0.4476305565050769, "grad_norm": 0.91015625, "learning_rate": 0.00016681723853910536, "loss": 0.7861, "step": 17433 }, { "epoch": 0.44765623370099866, "grad_norm": 0.734375, "learning_rate": 0.00016681391707747912, "loss": 1.0315, "step": 17434 }, { "epoch": 0.4476819108969205, "grad_norm": 0.8046875, "learning_rate": 0.00016681059548269883, "loss": 0.9689, "step": 17435 }, { "epoch": 0.44770758809284233, "grad_norm": 0.73828125, "learning_rate": 0.00016680727375477107, "loss": 0.8198, "step": 17436 }, { "epoch": 0.4477332652887641, "grad_norm": 0.8515625, "learning_rate": 0.00016680395189370246, "loss": 1.0193, "step": 17437 }, { "epoch": 0.44775894248468595, "grad_norm": 0.72265625, "learning_rate": 0.00016680062989949957, "loss": 0.8962, "step": 17438 }, { "epoch": 0.4477846196806078, "grad_norm": 0.765625, "learning_rate": 0.00016679730777216907, "loss": 0.8146, "step": 17439 }, { "epoch": 0.4478102968765296, "grad_norm": 0.765625, "learning_rate": 0.00016679398551171758, "loss": 0.8504, "step": 17440 }, { "epoch": 0.4478359740724514, "grad_norm": 0.77734375, "learning_rate": 0.0001667906631181517, "loss": 1.0039, "step": 17441 }, { "epoch": 0.44786165126837324, "grad_norm": 0.78515625, "learning_rate": 0.00016678734059147804, "loss": 0.9287, "step": 17442 }, { "epoch": 0.4478873284642951, "grad_norm": 0.765625, "learning_rate": 0.00016678401793170328, "loss": 0.8571, "step": 17443 }, { "epoch": 0.44791300566021686, "grad_norm": 0.8203125, "learning_rate": 0.000166780695138834, "loss": 0.9256, "step": 17444 }, { "epoch": 0.4479386828561387, "grad_norm": 0.7109375, "learning_rate": 0.00016677737221287682, "loss": 0.898, "step": 17445 }, { "epoch": 0.44796436005206053, "grad_norm": 0.8203125, "learning_rate": 0.00016677404915383837, "loss": 0.9078, "step": 17446 }, { "epoch": 0.4479900372479823, "grad_norm": 0.80078125, "learning_rate": 0.00016677072596172525, "loss": 0.8905, "step": 17447 }, { "epoch": 0.44801571444390415, "grad_norm": 0.89453125, "learning_rate": 0.00016676740263654413, "loss": 0.8837, "step": 17448 }, { "epoch": 0.448041391639826, "grad_norm": 0.76953125, "learning_rate": 0.00016676407917830157, "loss": 0.851, "step": 17449 }, { "epoch": 0.4480670688357478, "grad_norm": 0.74609375, "learning_rate": 0.00016676075558700427, "loss": 0.8816, "step": 17450 }, { "epoch": 0.4480927460316696, "grad_norm": 0.73828125, "learning_rate": 0.00016675743186265882, "loss": 0.8752, "step": 17451 }, { "epoch": 0.44811842322759143, "grad_norm": 0.7890625, "learning_rate": 0.00016675410800527182, "loss": 1.0133, "step": 17452 }, { "epoch": 0.44814410042351327, "grad_norm": 0.83203125, "learning_rate": 0.0001667507840148499, "loss": 1.0337, "step": 17453 }, { "epoch": 0.44816977761943505, "grad_norm": 0.8046875, "learning_rate": 0.00016674745989139972, "loss": 0.9074, "step": 17454 }, { "epoch": 0.4481954548153569, "grad_norm": 0.74609375, "learning_rate": 0.00016674413563492788, "loss": 1.0401, "step": 17455 }, { "epoch": 0.4482211320112787, "grad_norm": 0.83203125, "learning_rate": 0.00016674081124544101, "loss": 0.8992, "step": 17456 }, { "epoch": 0.4482468092072005, "grad_norm": 0.79296875, "learning_rate": 0.00016673748672294575, "loss": 0.9206, "step": 17457 }, { "epoch": 0.44827248640312234, "grad_norm": 0.76953125, "learning_rate": 0.00016673416206744866, "loss": 0.8445, "step": 17458 }, { "epoch": 0.4482981635990442, "grad_norm": 0.81640625, "learning_rate": 0.00016673083727895642, "loss": 0.9723, "step": 17459 }, { "epoch": 0.448323840794966, "grad_norm": 0.78125, "learning_rate": 0.00016672751235747568, "loss": 0.9823, "step": 17460 }, { "epoch": 0.4483495179908878, "grad_norm": 0.80078125, "learning_rate": 0.000166724187303013, "loss": 0.9295, "step": 17461 }, { "epoch": 0.4483751951868096, "grad_norm": 0.79296875, "learning_rate": 0.00016672086211557506, "loss": 0.815, "step": 17462 }, { "epoch": 0.44840087238273146, "grad_norm": 0.80078125, "learning_rate": 0.0001667175367951685, "loss": 0.8818, "step": 17463 }, { "epoch": 0.44842654957865324, "grad_norm": 0.76171875, "learning_rate": 0.00016671421134179987, "loss": 0.9444, "step": 17464 }, { "epoch": 0.4484522267745751, "grad_norm": 0.77734375, "learning_rate": 0.00016671088575547586, "loss": 1.0462, "step": 17465 }, { "epoch": 0.4484779039704969, "grad_norm": 0.8125, "learning_rate": 0.00016670756003620308, "loss": 0.8652, "step": 17466 }, { "epoch": 0.4485035811664187, "grad_norm": 0.75, "learning_rate": 0.00016670423418398814, "loss": 0.9186, "step": 17467 }, { "epoch": 0.44852925836234053, "grad_norm": 0.69921875, "learning_rate": 0.00016670090819883774, "loss": 0.9146, "step": 17468 }, { "epoch": 0.44855493555826237, "grad_norm": 0.765625, "learning_rate": 0.0001666975820807584, "loss": 0.9472, "step": 17469 }, { "epoch": 0.4485806127541842, "grad_norm": 0.8359375, "learning_rate": 0.00016669425582975682, "loss": 0.9897, "step": 17470 }, { "epoch": 0.448606289950106, "grad_norm": 0.8515625, "learning_rate": 0.0001666909294458396, "loss": 0.961, "step": 17471 }, { "epoch": 0.4486319671460278, "grad_norm": 0.79296875, "learning_rate": 0.0001666876029290134, "loss": 1.0614, "step": 17472 }, { "epoch": 0.44865764434194966, "grad_norm": 0.76171875, "learning_rate": 0.00016668427627928486, "loss": 0.857, "step": 17473 }, { "epoch": 0.44868332153787144, "grad_norm": 0.8046875, "learning_rate": 0.00016668094949666052, "loss": 0.899, "step": 17474 }, { "epoch": 0.4487089987337933, "grad_norm": 0.8828125, "learning_rate": 0.0001666776225811471, "loss": 0.9016, "step": 17475 }, { "epoch": 0.4487346759297151, "grad_norm": 0.8046875, "learning_rate": 0.0001666742955327512, "loss": 0.9146, "step": 17476 }, { "epoch": 0.4487603531256369, "grad_norm": 0.7890625, "learning_rate": 0.00016667096835147946, "loss": 0.8966, "step": 17477 }, { "epoch": 0.4487860303215587, "grad_norm": 0.8515625, "learning_rate": 0.0001666676410373385, "loss": 0.9619, "step": 17478 }, { "epoch": 0.44881170751748056, "grad_norm": 0.8359375, "learning_rate": 0.00016666431359033492, "loss": 0.9453, "step": 17479 }, { "epoch": 0.4488373847134024, "grad_norm": 0.80859375, "learning_rate": 0.00016666098601047545, "loss": 0.9385, "step": 17480 }, { "epoch": 0.4488630619093242, "grad_norm": 0.83984375, "learning_rate": 0.00016665765829776657, "loss": 1.0333, "step": 17481 }, { "epoch": 0.448888739105246, "grad_norm": 0.70703125, "learning_rate": 0.00016665433045221506, "loss": 0.9421, "step": 17482 }, { "epoch": 0.44891441630116785, "grad_norm": 0.73046875, "learning_rate": 0.0001666510024738275, "loss": 0.9245, "step": 17483 }, { "epoch": 0.44894009349708963, "grad_norm": 0.78515625, "learning_rate": 0.00016664767436261048, "loss": 1.0749, "step": 17484 }, { "epoch": 0.44896577069301147, "grad_norm": 0.77734375, "learning_rate": 0.00016664434611857068, "loss": 0.9961, "step": 17485 }, { "epoch": 0.4489914478889333, "grad_norm": 0.7265625, "learning_rate": 0.00016664101774171473, "loss": 0.8454, "step": 17486 }, { "epoch": 0.4490171250848551, "grad_norm": 0.78515625, "learning_rate": 0.00016663768923204922, "loss": 0.9439, "step": 17487 }, { "epoch": 0.4490428022807769, "grad_norm": 0.82421875, "learning_rate": 0.00016663436058958082, "loss": 0.9502, "step": 17488 }, { "epoch": 0.44906847947669876, "grad_norm": 0.7734375, "learning_rate": 0.0001666310318143162, "loss": 0.8834, "step": 17489 }, { "epoch": 0.4490941566726206, "grad_norm": 0.71484375, "learning_rate": 0.0001666277029062619, "loss": 0.798, "step": 17490 }, { "epoch": 0.4491198338685424, "grad_norm": 0.87890625, "learning_rate": 0.00016662437386542463, "loss": 0.8848, "step": 17491 }, { "epoch": 0.4491455110644642, "grad_norm": 0.7734375, "learning_rate": 0.000166621044691811, "loss": 0.837, "step": 17492 }, { "epoch": 0.44917118826038605, "grad_norm": 0.77734375, "learning_rate": 0.00016661771538542763, "loss": 0.9056, "step": 17493 }, { "epoch": 0.4491968654563078, "grad_norm": 0.83203125, "learning_rate": 0.00016661438594628121, "loss": 0.836, "step": 17494 }, { "epoch": 0.44922254265222966, "grad_norm": 0.78125, "learning_rate": 0.0001666110563743783, "loss": 0.8096, "step": 17495 }, { "epoch": 0.4492482198481515, "grad_norm": 0.88671875, "learning_rate": 0.0001666077266697256, "loss": 1.0932, "step": 17496 }, { "epoch": 0.4492738970440733, "grad_norm": 0.765625, "learning_rate": 0.00016660439683232968, "loss": 0.9332, "step": 17497 }, { "epoch": 0.4492995742399951, "grad_norm": 0.6640625, "learning_rate": 0.00016660106686219726, "loss": 0.8195, "step": 17498 }, { "epoch": 0.44932525143591695, "grad_norm": 0.77734375, "learning_rate": 0.00016659773675933487, "loss": 0.9367, "step": 17499 }, { "epoch": 0.4493509286318388, "grad_norm": 0.765625, "learning_rate": 0.00016659440652374926, "loss": 0.835, "step": 17500 }, { "epoch": 0.44937660582776057, "grad_norm": 0.734375, "learning_rate": 0.00016659107615544697, "loss": 1.0437, "step": 17501 }, { "epoch": 0.4494022830236824, "grad_norm": 0.7421875, "learning_rate": 0.0001665877456544347, "loss": 0.8661, "step": 17502 }, { "epoch": 0.44942796021960424, "grad_norm": 0.796875, "learning_rate": 0.00016658441502071907, "loss": 0.8886, "step": 17503 }, { "epoch": 0.449453637415526, "grad_norm": 0.78125, "learning_rate": 0.00016658108425430675, "loss": 0.8452, "step": 17504 }, { "epoch": 0.44947931461144786, "grad_norm": 0.80859375, "learning_rate": 0.00016657775335520428, "loss": 0.9821, "step": 17505 }, { "epoch": 0.4495049918073697, "grad_norm": 0.83203125, "learning_rate": 0.0001665744223234184, "loss": 0.8507, "step": 17506 }, { "epoch": 0.44953066900329147, "grad_norm": 0.7734375, "learning_rate": 0.0001665710911589557, "loss": 0.9675, "step": 17507 }, { "epoch": 0.4495563461992133, "grad_norm": 0.828125, "learning_rate": 0.00016656775986182283, "loss": 0.9761, "step": 17508 }, { "epoch": 0.44958202339513514, "grad_norm": 0.7890625, "learning_rate": 0.00016656442843202641, "loss": 0.8624, "step": 17509 }, { "epoch": 0.449607700591057, "grad_norm": 0.7890625, "learning_rate": 0.00016656109686957312, "loss": 0.8585, "step": 17510 }, { "epoch": 0.44963337778697876, "grad_norm": 0.77734375, "learning_rate": 0.0001665577651744696, "loss": 0.8462, "step": 17511 }, { "epoch": 0.4496590549829006, "grad_norm": 0.8203125, "learning_rate": 0.00016655443334672243, "loss": 0.8875, "step": 17512 }, { "epoch": 0.44968473217882243, "grad_norm": 0.78125, "learning_rate": 0.0001665511013863383, "loss": 0.9251, "step": 17513 }, { "epoch": 0.4497104093747442, "grad_norm": 0.79296875, "learning_rate": 0.0001665477692933238, "loss": 0.8987, "step": 17514 }, { "epoch": 0.44973608657066605, "grad_norm": 0.78125, "learning_rate": 0.00016654443706768564, "loss": 0.8962, "step": 17515 }, { "epoch": 0.4497617637665879, "grad_norm": 0.78125, "learning_rate": 0.00016654110470943042, "loss": 0.8651, "step": 17516 }, { "epoch": 0.44978744096250967, "grad_norm": 0.76953125, "learning_rate": 0.0001665377722185648, "loss": 0.8838, "step": 17517 }, { "epoch": 0.4498131181584315, "grad_norm": 0.80078125, "learning_rate": 0.00016653443959509543, "loss": 0.8693, "step": 17518 }, { "epoch": 0.44983879535435334, "grad_norm": 0.828125, "learning_rate": 0.0001665311068390289, "loss": 0.8701, "step": 17519 }, { "epoch": 0.4498644725502752, "grad_norm": 0.8671875, "learning_rate": 0.0001665277739503719, "loss": 0.9804, "step": 17520 }, { "epoch": 0.44989014974619695, "grad_norm": 0.7265625, "learning_rate": 0.00016652444092913102, "loss": 0.8672, "step": 17521 }, { "epoch": 0.4499158269421188, "grad_norm": 0.87109375, "learning_rate": 0.00016652110777531297, "loss": 0.9107, "step": 17522 }, { "epoch": 0.4499415041380406, "grad_norm": 0.79296875, "learning_rate": 0.00016651777448892436, "loss": 0.835, "step": 17523 }, { "epoch": 0.4499671813339624, "grad_norm": 0.70703125, "learning_rate": 0.00016651444106997182, "loss": 0.9473, "step": 17524 }, { "epoch": 0.44999285852988424, "grad_norm": 0.7265625, "learning_rate": 0.000166511107518462, "loss": 0.9799, "step": 17525 }, { "epoch": 0.4500185357258061, "grad_norm": 0.7265625, "learning_rate": 0.0001665077738344016, "loss": 0.8873, "step": 17526 }, { "epoch": 0.45004421292172786, "grad_norm": 0.7734375, "learning_rate": 0.00016650444001779717, "loss": 0.7874, "step": 17527 }, { "epoch": 0.4500698901176497, "grad_norm": 0.75390625, "learning_rate": 0.00016650110606865538, "loss": 1.0092, "step": 17528 }, { "epoch": 0.45009556731357153, "grad_norm": 0.7265625, "learning_rate": 0.00016649777198698295, "loss": 0.8894, "step": 17529 }, { "epoch": 0.45012124450949337, "grad_norm": 0.79296875, "learning_rate": 0.00016649443777278644, "loss": 0.9421, "step": 17530 }, { "epoch": 0.45014692170541515, "grad_norm": 0.7421875, "learning_rate": 0.00016649110342607253, "loss": 0.8591, "step": 17531 }, { "epoch": 0.450172598901337, "grad_norm": 0.7890625, "learning_rate": 0.00016648776894684784, "loss": 0.9895, "step": 17532 }, { "epoch": 0.4501982760972588, "grad_norm": 0.94140625, "learning_rate": 0.00016648443433511903, "loss": 1.0488, "step": 17533 }, { "epoch": 0.4502239532931806, "grad_norm": 0.76171875, "learning_rate": 0.00016648109959089275, "loss": 0.818, "step": 17534 }, { "epoch": 0.45024963048910244, "grad_norm": 0.8046875, "learning_rate": 0.00016647776471417566, "loss": 0.9581, "step": 17535 }, { "epoch": 0.4502753076850243, "grad_norm": 0.79296875, "learning_rate": 0.0001664744297049744, "loss": 0.9791, "step": 17536 }, { "epoch": 0.45030098488094605, "grad_norm": 0.76953125, "learning_rate": 0.0001664710945632956, "loss": 1.0532, "step": 17537 }, { "epoch": 0.4503266620768679, "grad_norm": 0.83984375, "learning_rate": 0.00016646775928914588, "loss": 0.8672, "step": 17538 }, { "epoch": 0.4503523392727897, "grad_norm": 0.90234375, "learning_rate": 0.00016646442388253195, "loss": 0.914, "step": 17539 }, { "epoch": 0.45037801646871156, "grad_norm": 0.859375, "learning_rate": 0.00016646108834346039, "loss": 0.8909, "step": 17540 }, { "epoch": 0.45040369366463334, "grad_norm": 0.78125, "learning_rate": 0.00016645775267193791, "loss": 0.7857, "step": 17541 }, { "epoch": 0.4504293708605552, "grad_norm": 0.80859375, "learning_rate": 0.00016645441686797118, "loss": 0.9525, "step": 17542 }, { "epoch": 0.450455048056477, "grad_norm": 0.73828125, "learning_rate": 0.00016645108093156673, "loss": 0.8472, "step": 17543 }, { "epoch": 0.4504807252523988, "grad_norm": 0.80078125, "learning_rate": 0.0001664477448627313, "loss": 1.0138, "step": 17544 }, { "epoch": 0.45050640244832063, "grad_norm": 0.8046875, "learning_rate": 0.00016644440866147152, "loss": 0.9353, "step": 17545 }, { "epoch": 0.45053207964424247, "grad_norm": 0.7421875, "learning_rate": 0.00016644107232779404, "loss": 0.8364, "step": 17546 }, { "epoch": 0.45055775684016425, "grad_norm": 0.8046875, "learning_rate": 0.0001664377358617055, "loss": 0.9201, "step": 17547 }, { "epoch": 0.4505834340360861, "grad_norm": 0.859375, "learning_rate": 0.00016643439926321252, "loss": 0.8449, "step": 17548 }, { "epoch": 0.4506091112320079, "grad_norm": 0.78125, "learning_rate": 0.00016643106253232186, "loss": 0.8976, "step": 17549 }, { "epoch": 0.45063478842792976, "grad_norm": 0.765625, "learning_rate": 0.00016642772566904005, "loss": 0.9293, "step": 17550 }, { "epoch": 0.45066046562385154, "grad_norm": 0.81640625, "learning_rate": 0.00016642438867337376, "loss": 1.0241, "step": 17551 }, { "epoch": 0.45068614281977337, "grad_norm": 0.77734375, "learning_rate": 0.00016642105154532968, "loss": 0.9351, "step": 17552 }, { "epoch": 0.4507118200156952, "grad_norm": 0.83984375, "learning_rate": 0.00016641771428491445, "loss": 1.0073, "step": 17553 }, { "epoch": 0.450737497211617, "grad_norm": 0.80859375, "learning_rate": 0.00016641437689213473, "loss": 0.914, "step": 17554 }, { "epoch": 0.4507631744075388, "grad_norm": 0.796875, "learning_rate": 0.00016641103936699715, "loss": 0.8835, "step": 17555 }, { "epoch": 0.45078885160346066, "grad_norm": 0.81640625, "learning_rate": 0.00016640770170950835, "loss": 0.9406, "step": 17556 }, { "epoch": 0.45081452879938244, "grad_norm": 0.78515625, "learning_rate": 0.000166404363919675, "loss": 0.8535, "step": 17557 }, { "epoch": 0.4508402059953043, "grad_norm": 0.75390625, "learning_rate": 0.00016640102599750376, "loss": 0.8612, "step": 17558 }, { "epoch": 0.4508658831912261, "grad_norm": 0.76171875, "learning_rate": 0.00016639768794300127, "loss": 0.8882, "step": 17559 }, { "epoch": 0.4508915603871479, "grad_norm": 0.79296875, "learning_rate": 0.00016639434975617418, "loss": 0.9975, "step": 17560 }, { "epoch": 0.45091723758306973, "grad_norm": 0.8203125, "learning_rate": 0.00016639101143702917, "loss": 0.8288, "step": 17561 }, { "epoch": 0.45094291477899157, "grad_norm": 0.69140625, "learning_rate": 0.00016638767298557286, "loss": 0.852, "step": 17562 }, { "epoch": 0.4509685919749134, "grad_norm": 0.74609375, "learning_rate": 0.0001663843344018119, "loss": 0.8584, "step": 17563 }, { "epoch": 0.4509942691708352, "grad_norm": 0.76171875, "learning_rate": 0.00016638099568575297, "loss": 0.8696, "step": 17564 }, { "epoch": 0.451019946366757, "grad_norm": 0.76953125, "learning_rate": 0.0001663776568374027, "loss": 0.8608, "step": 17565 }, { "epoch": 0.45104562356267885, "grad_norm": 0.83203125, "learning_rate": 0.0001663743178567678, "loss": 0.8933, "step": 17566 }, { "epoch": 0.45107130075860064, "grad_norm": 0.75390625, "learning_rate": 0.0001663709787438548, "loss": 1.0395, "step": 17567 }, { "epoch": 0.45109697795452247, "grad_norm": 0.8203125, "learning_rate": 0.0001663676394986705, "loss": 0.9897, "step": 17568 }, { "epoch": 0.4511226551504443, "grad_norm": 0.8359375, "learning_rate": 0.00016636430012122148, "loss": 0.8714, "step": 17569 }, { "epoch": 0.4511483323463661, "grad_norm": 0.8359375, "learning_rate": 0.00016636096061151438, "loss": 0.9764, "step": 17570 }, { "epoch": 0.4511740095422879, "grad_norm": 0.91015625, "learning_rate": 0.0001663576209695559, "loss": 1.0549, "step": 17571 }, { "epoch": 0.45119968673820976, "grad_norm": 0.765625, "learning_rate": 0.00016635428119535266, "loss": 0.9548, "step": 17572 }, { "epoch": 0.4512253639341316, "grad_norm": 0.8046875, "learning_rate": 0.00016635094128891135, "loss": 0.9467, "step": 17573 }, { "epoch": 0.4512510411300534, "grad_norm": 0.80859375, "learning_rate": 0.0001663476012502386, "loss": 0.9165, "step": 17574 }, { "epoch": 0.4512767183259752, "grad_norm": 0.828125, "learning_rate": 0.00016634426107934108, "loss": 0.9375, "step": 17575 }, { "epoch": 0.45130239552189705, "grad_norm": 0.76953125, "learning_rate": 0.00016634092077622543, "loss": 0.9016, "step": 17576 }, { "epoch": 0.45132807271781883, "grad_norm": 0.83984375, "learning_rate": 0.00016633758034089832, "loss": 1.0317, "step": 17577 }, { "epoch": 0.45135374991374067, "grad_norm": 0.796875, "learning_rate": 0.0001663342397733664, "loss": 0.8778, "step": 17578 }, { "epoch": 0.4513794271096625, "grad_norm": 0.796875, "learning_rate": 0.00016633089907363636, "loss": 1.0491, "step": 17579 }, { "epoch": 0.4514051043055843, "grad_norm": 0.71875, "learning_rate": 0.00016632755824171479, "loss": 0.9528, "step": 17580 }, { "epoch": 0.4514307815015061, "grad_norm": 0.6953125, "learning_rate": 0.0001663242172776084, "loss": 0.8868, "step": 17581 }, { "epoch": 0.45145645869742795, "grad_norm": 0.71484375, "learning_rate": 0.00016632087618132383, "loss": 0.8317, "step": 17582 }, { "epoch": 0.4514821358933498, "grad_norm": 0.80078125, "learning_rate": 0.00016631753495286778, "loss": 0.9383, "step": 17583 }, { "epoch": 0.45150781308927157, "grad_norm": 0.84375, "learning_rate": 0.00016631419359224684, "loss": 1.1327, "step": 17584 }, { "epoch": 0.4515334902851934, "grad_norm": 0.75, "learning_rate": 0.0001663108520994677, "loss": 0.8643, "step": 17585 }, { "epoch": 0.45155916748111524, "grad_norm": 0.7734375, "learning_rate": 0.00016630751047453702, "loss": 0.9352, "step": 17586 }, { "epoch": 0.451584844677037, "grad_norm": 0.76171875, "learning_rate": 0.00016630416871746148, "loss": 0.9882, "step": 17587 }, { "epoch": 0.45161052187295886, "grad_norm": 0.7421875, "learning_rate": 0.00016630082682824772, "loss": 0.948, "step": 17588 }, { "epoch": 0.4516361990688807, "grad_norm": 0.71484375, "learning_rate": 0.00016629748480690242, "loss": 0.8637, "step": 17589 }, { "epoch": 0.4516618762648025, "grad_norm": 0.79296875, "learning_rate": 0.00016629414265343216, "loss": 1.0284, "step": 17590 }, { "epoch": 0.4516875534607243, "grad_norm": 0.7421875, "learning_rate": 0.00016629080036784373, "loss": 0.9123, "step": 17591 }, { "epoch": 0.45171323065664615, "grad_norm": 0.76171875, "learning_rate": 0.00016628745795014367, "loss": 0.7366, "step": 17592 }, { "epoch": 0.451738907852568, "grad_norm": 0.80078125, "learning_rate": 0.00016628411540033872, "loss": 0.9038, "step": 17593 }, { "epoch": 0.45176458504848976, "grad_norm": 0.734375, "learning_rate": 0.00016628077271843553, "loss": 0.9907, "step": 17594 }, { "epoch": 0.4517902622444116, "grad_norm": 0.73046875, "learning_rate": 0.00016627742990444073, "loss": 0.8683, "step": 17595 }, { "epoch": 0.45181593944033344, "grad_norm": 0.8828125, "learning_rate": 0.000166274086958361, "loss": 1.0556, "step": 17596 }, { "epoch": 0.4518416166362552, "grad_norm": 0.76953125, "learning_rate": 0.00016627074388020298, "loss": 0.9242, "step": 17597 }, { "epoch": 0.45186729383217705, "grad_norm": 0.8125, "learning_rate": 0.00016626740066997338, "loss": 0.9072, "step": 17598 }, { "epoch": 0.4518929710280989, "grad_norm": 0.77734375, "learning_rate": 0.00016626405732767882, "loss": 0.8659, "step": 17599 }, { "epoch": 0.45191864822402067, "grad_norm": 0.828125, "learning_rate": 0.000166260713853326, "loss": 0.9653, "step": 17600 }, { "epoch": 0.4519443254199425, "grad_norm": 0.84375, "learning_rate": 0.00016625737024692153, "loss": 0.8947, "step": 17601 }, { "epoch": 0.45197000261586434, "grad_norm": 0.828125, "learning_rate": 0.00016625402650847216, "loss": 0.8347, "step": 17602 }, { "epoch": 0.4519956798117862, "grad_norm": 0.8125, "learning_rate": 0.00016625068263798447, "loss": 0.999, "step": 17603 }, { "epoch": 0.45202135700770796, "grad_norm": 0.78125, "learning_rate": 0.00016624733863546514, "loss": 0.8652, "step": 17604 }, { "epoch": 0.4520470342036298, "grad_norm": 0.8359375, "learning_rate": 0.00016624399450092085, "loss": 0.9183, "step": 17605 }, { "epoch": 0.45207271139955163, "grad_norm": 0.76953125, "learning_rate": 0.00016624065023435825, "loss": 0.9203, "step": 17606 }, { "epoch": 0.4520983885954734, "grad_norm": 0.73828125, "learning_rate": 0.00016623730583578404, "loss": 0.923, "step": 17607 }, { "epoch": 0.45212406579139525, "grad_norm": 0.7734375, "learning_rate": 0.0001662339613052049, "loss": 0.8849, "step": 17608 }, { "epoch": 0.4521497429873171, "grad_norm": 0.94921875, "learning_rate": 0.00016623061664262744, "loss": 0.8679, "step": 17609 }, { "epoch": 0.45217542018323886, "grad_norm": 0.765625, "learning_rate": 0.0001662272718480583, "loss": 0.8337, "step": 17610 }, { "epoch": 0.4522010973791607, "grad_norm": 0.7890625, "learning_rate": 0.0001662239269215042, "loss": 0.8895, "step": 17611 }, { "epoch": 0.45222677457508254, "grad_norm": 0.79296875, "learning_rate": 0.0001662205818629718, "loss": 1.0044, "step": 17612 }, { "epoch": 0.45225245177100437, "grad_norm": 0.8046875, "learning_rate": 0.00016621723667246778, "loss": 1.0143, "step": 17613 }, { "epoch": 0.45227812896692615, "grad_norm": 0.80859375, "learning_rate": 0.0001662138913499988, "loss": 0.9538, "step": 17614 }, { "epoch": 0.452303806162848, "grad_norm": 0.71875, "learning_rate": 0.00016621054589557147, "loss": 0.7906, "step": 17615 }, { "epoch": 0.4523294833587698, "grad_norm": 0.8125, "learning_rate": 0.00016620720030919252, "loss": 0.9591, "step": 17616 }, { "epoch": 0.4523551605546916, "grad_norm": 0.79296875, "learning_rate": 0.00016620385459086863, "loss": 1.0174, "step": 17617 }, { "epoch": 0.45238083775061344, "grad_norm": 0.84375, "learning_rate": 0.0001662005087406064, "loss": 1.0123, "step": 17618 }, { "epoch": 0.4524065149465353, "grad_norm": 0.87890625, "learning_rate": 0.00016619716275841255, "loss": 0.9325, "step": 17619 }, { "epoch": 0.45243219214245706, "grad_norm": 0.78125, "learning_rate": 0.00016619381664429375, "loss": 0.9292, "step": 17620 }, { "epoch": 0.4524578693383789, "grad_norm": 0.71875, "learning_rate": 0.00016619047039825665, "loss": 0.9629, "step": 17621 }, { "epoch": 0.45248354653430073, "grad_norm": 0.8359375, "learning_rate": 0.0001661871240203079, "loss": 0.919, "step": 17622 }, { "epoch": 0.45250922373022257, "grad_norm": 0.84765625, "learning_rate": 0.00016618377751045422, "loss": 0.9983, "step": 17623 }, { "epoch": 0.45253490092614435, "grad_norm": 0.75390625, "learning_rate": 0.00016618043086870222, "loss": 0.906, "step": 17624 }, { "epoch": 0.4525605781220662, "grad_norm": 0.79296875, "learning_rate": 0.0001661770840950586, "loss": 0.8829, "step": 17625 }, { "epoch": 0.452586255317988, "grad_norm": 0.84375, "learning_rate": 0.00016617373718953005, "loss": 0.9954, "step": 17626 }, { "epoch": 0.4526119325139098, "grad_norm": 0.79296875, "learning_rate": 0.0001661703901521232, "loss": 0.9228, "step": 17627 }, { "epoch": 0.45263760970983163, "grad_norm": 0.83984375, "learning_rate": 0.00016616704298284476, "loss": 0.9233, "step": 17628 }, { "epoch": 0.45266328690575347, "grad_norm": 0.734375, "learning_rate": 0.00016616369568170136, "loss": 0.9298, "step": 17629 }, { "epoch": 0.45268896410167525, "grad_norm": 0.8046875, "learning_rate": 0.0001661603482486997, "loss": 0.8046, "step": 17630 }, { "epoch": 0.4527146412975971, "grad_norm": 0.97265625, "learning_rate": 0.00016615700068384643, "loss": 0.8605, "step": 17631 }, { "epoch": 0.4527403184935189, "grad_norm": 0.8359375, "learning_rate": 0.00016615365298714823, "loss": 0.782, "step": 17632 }, { "epoch": 0.45276599568944076, "grad_norm": 0.734375, "learning_rate": 0.00016615030515861179, "loss": 0.9847, "step": 17633 }, { "epoch": 0.45279167288536254, "grad_norm": 0.8203125, "learning_rate": 0.00016614695719824378, "loss": 0.8553, "step": 17634 }, { "epoch": 0.4528173500812844, "grad_norm": 0.828125, "learning_rate": 0.00016614360910605086, "loss": 0.9801, "step": 17635 }, { "epoch": 0.4528430272772062, "grad_norm": 0.78125, "learning_rate": 0.0001661402608820397, "loss": 1.0304, "step": 17636 }, { "epoch": 0.452868704473128, "grad_norm": 0.76953125, "learning_rate": 0.00016613691252621694, "loss": 0.9315, "step": 17637 }, { "epoch": 0.45289438166904983, "grad_norm": 0.76171875, "learning_rate": 0.0001661335640385893, "loss": 0.9706, "step": 17638 }, { "epoch": 0.45292005886497166, "grad_norm": 0.8203125, "learning_rate": 0.00016613021541916347, "loss": 0.9203, "step": 17639 }, { "epoch": 0.45294573606089344, "grad_norm": 0.79296875, "learning_rate": 0.00016612686666794607, "loss": 0.8858, "step": 17640 }, { "epoch": 0.4529714132568153, "grad_norm": 0.80859375, "learning_rate": 0.00016612351778494378, "loss": 0.9791, "step": 17641 }, { "epoch": 0.4529970904527371, "grad_norm": 0.92578125, "learning_rate": 0.00016612016877016333, "loss": 0.9869, "step": 17642 }, { "epoch": 0.45302276764865895, "grad_norm": 0.796875, "learning_rate": 0.00016611681962361137, "loss": 0.7944, "step": 17643 }, { "epoch": 0.45304844484458073, "grad_norm": 0.734375, "learning_rate": 0.0001661134703452945, "loss": 0.9157, "step": 17644 }, { "epoch": 0.45307412204050257, "grad_norm": 0.78125, "learning_rate": 0.0001661101209352195, "loss": 0.987, "step": 17645 }, { "epoch": 0.4530997992364244, "grad_norm": 0.83203125, "learning_rate": 0.000166106771393393, "loss": 0.9277, "step": 17646 }, { "epoch": 0.4531254764323462, "grad_norm": 0.80078125, "learning_rate": 0.00016610342171982168, "loss": 0.915, "step": 17647 }, { "epoch": 0.453151153628268, "grad_norm": 0.78515625, "learning_rate": 0.00016610007191451219, "loss": 0.8768, "step": 17648 }, { "epoch": 0.45317683082418986, "grad_norm": 0.8046875, "learning_rate": 0.0001660967219774712, "loss": 0.9943, "step": 17649 }, { "epoch": 0.45320250802011164, "grad_norm": 0.765625, "learning_rate": 0.00016609337190870546, "loss": 0.8894, "step": 17650 }, { "epoch": 0.4532281852160335, "grad_norm": 0.7734375, "learning_rate": 0.0001660900217082216, "loss": 0.9849, "step": 17651 }, { "epoch": 0.4532538624119553, "grad_norm": 0.81640625, "learning_rate": 0.00016608667137602627, "loss": 0.9042, "step": 17652 }, { "epoch": 0.45327953960787715, "grad_norm": 0.7421875, "learning_rate": 0.0001660833209121262, "loss": 1.0515, "step": 17653 }, { "epoch": 0.4533052168037989, "grad_norm": 0.75390625, "learning_rate": 0.00016607997031652802, "loss": 0.8035, "step": 17654 }, { "epoch": 0.45333089399972076, "grad_norm": 0.76171875, "learning_rate": 0.00016607661958923842, "loss": 0.9419, "step": 17655 }, { "epoch": 0.4533565711956426, "grad_norm": 0.8359375, "learning_rate": 0.00016607326873026412, "loss": 0.893, "step": 17656 }, { "epoch": 0.4533822483915644, "grad_norm": 0.7109375, "learning_rate": 0.00016606991773961173, "loss": 0.875, "step": 17657 }, { "epoch": 0.4534079255874862, "grad_norm": 0.80078125, "learning_rate": 0.00016606656661728798, "loss": 1.0044, "step": 17658 }, { "epoch": 0.45343360278340805, "grad_norm": 0.8203125, "learning_rate": 0.00016606321536329955, "loss": 0.9058, "step": 17659 }, { "epoch": 0.45345927997932983, "grad_norm": 0.75390625, "learning_rate": 0.00016605986397765307, "loss": 0.8361, "step": 17660 }, { "epoch": 0.45348495717525167, "grad_norm": 0.7734375, "learning_rate": 0.00016605651246035525, "loss": 0.8655, "step": 17661 }, { "epoch": 0.4535106343711735, "grad_norm": 0.796875, "learning_rate": 0.0001660531608114128, "loss": 0.9589, "step": 17662 }, { "epoch": 0.45353631156709534, "grad_norm": 0.78125, "learning_rate": 0.00016604980903083233, "loss": 0.9275, "step": 17663 }, { "epoch": 0.4535619887630171, "grad_norm": 0.88671875, "learning_rate": 0.00016604645711862056, "loss": 0.851, "step": 17664 }, { "epoch": 0.45358766595893896, "grad_norm": 0.86328125, "learning_rate": 0.00016604310507478416, "loss": 0.8973, "step": 17665 }, { "epoch": 0.4536133431548608, "grad_norm": 0.79296875, "learning_rate": 0.00016603975289932984, "loss": 0.9417, "step": 17666 }, { "epoch": 0.4536390203507826, "grad_norm": 0.765625, "learning_rate": 0.00016603640059226427, "loss": 0.9225, "step": 17667 }, { "epoch": 0.4536646975467044, "grad_norm": 0.8359375, "learning_rate": 0.0001660330481535941, "loss": 0.937, "step": 17668 }, { "epoch": 0.45369037474262625, "grad_norm": 0.7890625, "learning_rate": 0.00016602969558332598, "loss": 0.7995, "step": 17669 }, { "epoch": 0.453716051938548, "grad_norm": 0.73828125, "learning_rate": 0.0001660263428814667, "loss": 0.8926, "step": 17670 }, { "epoch": 0.45374172913446986, "grad_norm": 0.80078125, "learning_rate": 0.00016602299004802288, "loss": 0.9453, "step": 17671 }, { "epoch": 0.4537674063303917, "grad_norm": 0.77734375, "learning_rate": 0.0001660196370830012, "loss": 0.925, "step": 17672 }, { "epoch": 0.45379308352631353, "grad_norm": 0.8125, "learning_rate": 0.00016601628398640834, "loss": 0.8556, "step": 17673 }, { "epoch": 0.4538187607222353, "grad_norm": 0.7109375, "learning_rate": 0.00016601293075825099, "loss": 0.6541, "step": 17674 }, { "epoch": 0.45384443791815715, "grad_norm": 0.80859375, "learning_rate": 0.00016600957739853583, "loss": 0.9884, "step": 17675 }, { "epoch": 0.453870115114079, "grad_norm": 0.82421875, "learning_rate": 0.00016600622390726955, "loss": 0.8685, "step": 17676 }, { "epoch": 0.45389579231000077, "grad_norm": 0.73046875, "learning_rate": 0.00016600287028445884, "loss": 0.7945, "step": 17677 }, { "epoch": 0.4539214695059226, "grad_norm": 0.76953125, "learning_rate": 0.00016599951653011033, "loss": 0.9562, "step": 17678 }, { "epoch": 0.45394714670184444, "grad_norm": 0.75, "learning_rate": 0.00016599616264423078, "loss": 0.9194, "step": 17679 }, { "epoch": 0.4539728238977662, "grad_norm": 0.78125, "learning_rate": 0.00016599280862682685, "loss": 0.9491, "step": 17680 }, { "epoch": 0.45399850109368806, "grad_norm": 0.765625, "learning_rate": 0.00016598945447790522, "loss": 0.8194, "step": 17681 }, { "epoch": 0.4540241782896099, "grad_norm": 0.76171875, "learning_rate": 0.00016598610019747252, "loss": 0.8114, "step": 17682 }, { "epoch": 0.45404985548553173, "grad_norm": 0.7734375, "learning_rate": 0.0001659827457855355, "loss": 0.8908, "step": 17683 }, { "epoch": 0.4540755326814535, "grad_norm": 0.8125, "learning_rate": 0.00016597939124210085, "loss": 0.8967, "step": 17684 }, { "epoch": 0.45410120987737534, "grad_norm": 0.796875, "learning_rate": 0.0001659760365671752, "loss": 0.8304, "step": 17685 }, { "epoch": 0.4541268870732972, "grad_norm": 0.796875, "learning_rate": 0.0001659726817607653, "loss": 0.897, "step": 17686 }, { "epoch": 0.45415256426921896, "grad_norm": 0.73046875, "learning_rate": 0.00016596932682287778, "loss": 0.8723, "step": 17687 }, { "epoch": 0.4541782414651408, "grad_norm": 0.80078125, "learning_rate": 0.00016596597175351938, "loss": 0.9007, "step": 17688 }, { "epoch": 0.45420391866106263, "grad_norm": 0.7734375, "learning_rate": 0.00016596261655269675, "loss": 0.7929, "step": 17689 }, { "epoch": 0.4542295958569844, "grad_norm": 0.796875, "learning_rate": 0.00016595926122041656, "loss": 0.9884, "step": 17690 }, { "epoch": 0.45425527305290625, "grad_norm": 0.78125, "learning_rate": 0.00016595590575668555, "loss": 0.9064, "step": 17691 }, { "epoch": 0.4542809502488281, "grad_norm": 0.78125, "learning_rate": 0.00016595255016151032, "loss": 0.7991, "step": 17692 }, { "epoch": 0.4543066274447499, "grad_norm": 0.80859375, "learning_rate": 0.00016594919443489769, "loss": 0.9565, "step": 17693 }, { "epoch": 0.4543323046406717, "grad_norm": 0.78125, "learning_rate": 0.00016594583857685425, "loss": 0.9329, "step": 17694 }, { "epoch": 0.45435798183659354, "grad_norm": 0.81640625, "learning_rate": 0.00016594248258738667, "loss": 0.8918, "step": 17695 }, { "epoch": 0.4543836590325154, "grad_norm": 0.8125, "learning_rate": 0.00016593912646650172, "loss": 0.9595, "step": 17696 }, { "epoch": 0.45440933622843716, "grad_norm": 0.73828125, "learning_rate": 0.00016593577021420603, "loss": 0.8795, "step": 17697 }, { "epoch": 0.454435013424359, "grad_norm": 0.8515625, "learning_rate": 0.00016593241383050633, "loss": 0.901, "step": 17698 }, { "epoch": 0.4544606906202808, "grad_norm": 0.71484375, "learning_rate": 0.00016592905731540924, "loss": 0.9016, "step": 17699 }, { "epoch": 0.4544863678162026, "grad_norm": 0.76171875, "learning_rate": 0.00016592570066892152, "loss": 0.8478, "step": 17700 }, { "epoch": 0.45451204501212444, "grad_norm": 0.796875, "learning_rate": 0.00016592234389104985, "loss": 0.8307, "step": 17701 }, { "epoch": 0.4545377222080463, "grad_norm": 0.76953125, "learning_rate": 0.00016591898698180087, "loss": 1.0105, "step": 17702 }, { "epoch": 0.4545633994039681, "grad_norm": 0.8046875, "learning_rate": 0.00016591562994118132, "loss": 0.9991, "step": 17703 }, { "epoch": 0.4545890765998899, "grad_norm": 0.77734375, "learning_rate": 0.00016591227276919787, "loss": 0.9578, "step": 17704 }, { "epoch": 0.45461475379581173, "grad_norm": 0.7421875, "learning_rate": 0.00016590891546585724, "loss": 1.1164, "step": 17705 }, { "epoch": 0.45464043099173357, "grad_norm": 0.80078125, "learning_rate": 0.00016590555803116608, "loss": 0.9447, "step": 17706 }, { "epoch": 0.45466610818765535, "grad_norm": 0.81640625, "learning_rate": 0.00016590220046513107, "loss": 0.956, "step": 17707 }, { "epoch": 0.4546917853835772, "grad_norm": 0.80859375, "learning_rate": 0.00016589884276775894, "loss": 0.8847, "step": 17708 }, { "epoch": 0.454717462579499, "grad_norm": 0.8203125, "learning_rate": 0.00016589548493905638, "loss": 0.9818, "step": 17709 }, { "epoch": 0.4547431397754208, "grad_norm": 0.796875, "learning_rate": 0.00016589212697903008, "loss": 0.8524, "step": 17710 }, { "epoch": 0.45476881697134264, "grad_norm": 0.7578125, "learning_rate": 0.00016588876888768672, "loss": 0.9588, "step": 17711 }, { "epoch": 0.4547944941672645, "grad_norm": 0.765625, "learning_rate": 0.00016588541066503296, "loss": 0.9097, "step": 17712 }, { "epoch": 0.4548201713631863, "grad_norm": 0.71484375, "learning_rate": 0.00016588205231107556, "loss": 0.9404, "step": 17713 }, { "epoch": 0.4548458485591081, "grad_norm": 0.8125, "learning_rate": 0.00016587869382582118, "loss": 0.948, "step": 17714 }, { "epoch": 0.4548715257550299, "grad_norm": 0.7578125, "learning_rate": 0.00016587533520927652, "loss": 0.8005, "step": 17715 }, { "epoch": 0.45489720295095176, "grad_norm": 0.80078125, "learning_rate": 0.00016587197646144826, "loss": 0.9651, "step": 17716 }, { "epoch": 0.45492288014687354, "grad_norm": 0.77734375, "learning_rate": 0.00016586861758234307, "loss": 0.9887, "step": 17717 }, { "epoch": 0.4549485573427954, "grad_norm": 0.75, "learning_rate": 0.0001658652585719677, "loss": 0.8343, "step": 17718 }, { "epoch": 0.4549742345387172, "grad_norm": 0.77734375, "learning_rate": 0.00016586189943032883, "loss": 0.9223, "step": 17719 }, { "epoch": 0.454999911734639, "grad_norm": 0.83203125, "learning_rate": 0.00016585854015743314, "loss": 0.981, "step": 17720 }, { "epoch": 0.45502558893056083, "grad_norm": 0.8125, "learning_rate": 0.00016585518075328733, "loss": 0.9646, "step": 17721 }, { "epoch": 0.45505126612648267, "grad_norm": 0.8046875, "learning_rate": 0.00016585182121789805, "loss": 1.1557, "step": 17722 }, { "epoch": 0.4550769433224045, "grad_norm": 0.8515625, "learning_rate": 0.0001658484615512721, "loss": 0.9063, "step": 17723 }, { "epoch": 0.4551026205183263, "grad_norm": 0.78515625, "learning_rate": 0.00016584510175341607, "loss": 0.9715, "step": 17724 }, { "epoch": 0.4551282977142481, "grad_norm": 0.74609375, "learning_rate": 0.00016584174182433673, "loss": 0.7553, "step": 17725 }, { "epoch": 0.45515397491016996, "grad_norm": 0.8359375, "learning_rate": 0.00016583838176404073, "loss": 0.9823, "step": 17726 }, { "epoch": 0.45517965210609174, "grad_norm": 0.94140625, "learning_rate": 0.00016583502157253477, "loss": 0.9152, "step": 17727 }, { "epoch": 0.4552053293020136, "grad_norm": 0.78125, "learning_rate": 0.00016583166124982558, "loss": 1.0028, "step": 17728 }, { "epoch": 0.4552310064979354, "grad_norm": 0.82421875, "learning_rate": 0.00016582830079591985, "loss": 0.8479, "step": 17729 }, { "epoch": 0.4552566836938572, "grad_norm": 0.828125, "learning_rate": 0.00016582494021082425, "loss": 0.9156, "step": 17730 }, { "epoch": 0.455282360889779, "grad_norm": 0.71484375, "learning_rate": 0.00016582157949454547, "loss": 0.7595, "step": 17731 }, { "epoch": 0.45530803808570086, "grad_norm": 0.81640625, "learning_rate": 0.00016581821864709024, "loss": 0.7455, "step": 17732 }, { "epoch": 0.4553337152816227, "grad_norm": 0.75, "learning_rate": 0.00016581485766846527, "loss": 1.0259, "step": 17733 }, { "epoch": 0.4553593924775445, "grad_norm": 0.83203125, "learning_rate": 0.0001658114965586772, "loss": 1.0352, "step": 17734 }, { "epoch": 0.4553850696734663, "grad_norm": 0.6796875, "learning_rate": 0.00016580813531773278, "loss": 0.946, "step": 17735 }, { "epoch": 0.45541074686938815, "grad_norm": 0.71484375, "learning_rate": 0.00016580477394563867, "loss": 0.8418, "step": 17736 }, { "epoch": 0.45543642406530993, "grad_norm": 0.71875, "learning_rate": 0.0001658014124424016, "loss": 0.7802, "step": 17737 }, { "epoch": 0.45546210126123177, "grad_norm": 0.72265625, "learning_rate": 0.00016579805080802826, "loss": 0.892, "step": 17738 }, { "epoch": 0.4554877784571536, "grad_norm": 0.79296875, "learning_rate": 0.00016579468904252538, "loss": 0.975, "step": 17739 }, { "epoch": 0.4555134556530754, "grad_norm": 0.74609375, "learning_rate": 0.0001657913271458996, "loss": 0.8112, "step": 17740 }, { "epoch": 0.4555391328489972, "grad_norm": 0.8046875, "learning_rate": 0.00016578796511815765, "loss": 0.935, "step": 17741 }, { "epoch": 0.45556481004491906, "grad_norm": 0.75, "learning_rate": 0.0001657846029593062, "loss": 0.8378, "step": 17742 }, { "epoch": 0.4555904872408409, "grad_norm": 0.7421875, "learning_rate": 0.000165781240669352, "loss": 0.864, "step": 17743 }, { "epoch": 0.45561616443676267, "grad_norm": 0.78125, "learning_rate": 0.00016577787824830176, "loss": 0.8278, "step": 17744 }, { "epoch": 0.4556418416326845, "grad_norm": 0.7734375, "learning_rate": 0.00016577451569616209, "loss": 0.9473, "step": 17745 }, { "epoch": 0.45566751882860634, "grad_norm": 0.80859375, "learning_rate": 0.00016577115301293983, "loss": 0.9229, "step": 17746 }, { "epoch": 0.4556931960245281, "grad_norm": 0.796875, "learning_rate": 0.00016576779019864156, "loss": 1.0282, "step": 17747 }, { "epoch": 0.45571887322044996, "grad_norm": 0.89453125, "learning_rate": 0.000165764427253274, "loss": 0.8987, "step": 17748 }, { "epoch": 0.4557445504163718, "grad_norm": 0.84375, "learning_rate": 0.0001657610641768439, "loss": 1.0006, "step": 17749 }, { "epoch": 0.4557702276122936, "grad_norm": 0.75390625, "learning_rate": 0.0001657577009693579, "loss": 0.9168, "step": 17750 }, { "epoch": 0.4557959048082154, "grad_norm": 0.6875, "learning_rate": 0.00016575433763082282, "loss": 0.8734, "step": 17751 }, { "epoch": 0.45582158200413725, "grad_norm": 0.74609375, "learning_rate": 0.00016575097416124523, "loss": 0.8956, "step": 17752 }, { "epoch": 0.4558472592000591, "grad_norm": 0.8125, "learning_rate": 0.00016574761056063192, "loss": 0.8244, "step": 17753 }, { "epoch": 0.45587293639598087, "grad_norm": 0.78125, "learning_rate": 0.00016574424682898954, "loss": 0.8471, "step": 17754 }, { "epoch": 0.4558986135919027, "grad_norm": 0.765625, "learning_rate": 0.0001657408829663248, "loss": 0.874, "step": 17755 }, { "epoch": 0.45592429078782454, "grad_norm": 0.79296875, "learning_rate": 0.00016573751897264446, "loss": 1.1302, "step": 17756 }, { "epoch": 0.4559499679837463, "grad_norm": 0.76171875, "learning_rate": 0.00016573415484795516, "loss": 0.9249, "step": 17757 }, { "epoch": 0.45597564517966815, "grad_norm": 0.84765625, "learning_rate": 0.0001657307905922636, "loss": 1.1425, "step": 17758 }, { "epoch": 0.45600132237559, "grad_norm": 0.76953125, "learning_rate": 0.00016572742620557656, "loss": 0.8852, "step": 17759 }, { "epoch": 0.45602699957151177, "grad_norm": 0.84765625, "learning_rate": 0.00016572406168790068, "loss": 0.921, "step": 17760 }, { "epoch": 0.4560526767674336, "grad_norm": 0.69921875, "learning_rate": 0.00016572069703924268, "loss": 0.8515, "step": 17761 }, { "epoch": 0.45607835396335544, "grad_norm": 0.71875, "learning_rate": 0.00016571733225960927, "loss": 0.8323, "step": 17762 }, { "epoch": 0.4561040311592772, "grad_norm": 0.7734375, "learning_rate": 0.00016571396734900716, "loss": 0.897, "step": 17763 }, { "epoch": 0.45612970835519906, "grad_norm": 0.7421875, "learning_rate": 0.00016571060230744303, "loss": 0.9406, "step": 17764 }, { "epoch": 0.4561553855511209, "grad_norm": 0.875, "learning_rate": 0.00016570723713492359, "loss": 0.889, "step": 17765 }, { "epoch": 0.45618106274704273, "grad_norm": 0.73828125, "learning_rate": 0.0001657038718314556, "loss": 0.8834, "step": 17766 }, { "epoch": 0.4562067399429645, "grad_norm": 0.73046875, "learning_rate": 0.0001657005063970457, "loss": 0.7988, "step": 17767 }, { "epoch": 0.45623241713888635, "grad_norm": 0.7890625, "learning_rate": 0.00016569714083170065, "loss": 0.8573, "step": 17768 }, { "epoch": 0.4562580943348082, "grad_norm": 0.8671875, "learning_rate": 0.00016569377513542713, "loss": 0.9912, "step": 17769 }, { "epoch": 0.45628377153072996, "grad_norm": 0.6953125, "learning_rate": 0.00016569040930823183, "loss": 0.8179, "step": 17770 }, { "epoch": 0.4563094487266518, "grad_norm": 0.73828125, "learning_rate": 0.00016568704335012148, "loss": 0.9266, "step": 17771 }, { "epoch": 0.45633512592257364, "grad_norm": 0.796875, "learning_rate": 0.00016568367726110282, "loss": 0.9526, "step": 17772 }, { "epoch": 0.4563608031184954, "grad_norm": 0.76953125, "learning_rate": 0.0001656803110411825, "loss": 0.9393, "step": 17773 }, { "epoch": 0.45638648031441725, "grad_norm": 0.84765625, "learning_rate": 0.00016567694469036726, "loss": 0.8629, "step": 17774 }, { "epoch": 0.4564121575103391, "grad_norm": 0.78515625, "learning_rate": 0.00016567357820866375, "loss": 0.9915, "step": 17775 }, { "epoch": 0.4564378347062609, "grad_norm": 0.8359375, "learning_rate": 0.00016567021159607877, "loss": 0.8915, "step": 17776 }, { "epoch": 0.4564635119021827, "grad_norm": 0.74609375, "learning_rate": 0.00016566684485261904, "loss": 0.9645, "step": 17777 }, { "epoch": 0.45648918909810454, "grad_norm": 0.80078125, "learning_rate": 0.00016566347797829117, "loss": 0.7892, "step": 17778 }, { "epoch": 0.4565148662940264, "grad_norm": 0.74609375, "learning_rate": 0.00016566011097310192, "loss": 0.8049, "step": 17779 }, { "epoch": 0.45654054348994816, "grad_norm": 0.796875, "learning_rate": 0.000165656743837058, "loss": 0.8232, "step": 17780 }, { "epoch": 0.45656622068587, "grad_norm": 0.76953125, "learning_rate": 0.00016565337657016612, "loss": 0.8457, "step": 17781 }, { "epoch": 0.45659189788179183, "grad_norm": 0.8125, "learning_rate": 0.00016565000917243298, "loss": 0.8451, "step": 17782 }, { "epoch": 0.4566175750777136, "grad_norm": 0.7421875, "learning_rate": 0.00016564664164386535, "loss": 0.7312, "step": 17783 }, { "epoch": 0.45664325227363545, "grad_norm": 0.78515625, "learning_rate": 0.00016564327398446986, "loss": 0.8069, "step": 17784 }, { "epoch": 0.4566689294695573, "grad_norm": 0.7734375, "learning_rate": 0.00016563990619425324, "loss": 0.8017, "step": 17785 }, { "epoch": 0.4566946066654791, "grad_norm": 0.77734375, "learning_rate": 0.00016563653827322226, "loss": 0.8624, "step": 17786 }, { "epoch": 0.4567202838614009, "grad_norm": 0.76171875, "learning_rate": 0.00016563317022138353, "loss": 0.9189, "step": 17787 }, { "epoch": 0.45674596105732274, "grad_norm": 0.75, "learning_rate": 0.00016562980203874386, "loss": 0.8243, "step": 17788 }, { "epoch": 0.45677163825324457, "grad_norm": 0.80078125, "learning_rate": 0.00016562643372530992, "loss": 0.8819, "step": 17789 }, { "epoch": 0.45679731544916635, "grad_norm": 0.8046875, "learning_rate": 0.0001656230652810884, "loss": 0.87, "step": 17790 }, { "epoch": 0.4568229926450882, "grad_norm": 0.8046875, "learning_rate": 0.0001656196967060861, "loss": 0.9404, "step": 17791 }, { "epoch": 0.45684866984101, "grad_norm": 0.84765625, "learning_rate": 0.0001656163280003096, "loss": 0.8973, "step": 17792 }, { "epoch": 0.4568743470369318, "grad_norm": 0.8203125, "learning_rate": 0.0001656129591637657, "loss": 0.8742, "step": 17793 }, { "epoch": 0.45690002423285364, "grad_norm": 0.734375, "learning_rate": 0.0001656095901964611, "loss": 0.7816, "step": 17794 }, { "epoch": 0.4569257014287755, "grad_norm": 0.71484375, "learning_rate": 0.00016560622109840255, "loss": 0.882, "step": 17795 }, { "epoch": 0.4569513786246973, "grad_norm": 0.7890625, "learning_rate": 0.00016560285186959672, "loss": 0.8807, "step": 17796 }, { "epoch": 0.4569770558206191, "grad_norm": 0.6875, "learning_rate": 0.0001655994825100503, "loss": 0.8162, "step": 17797 }, { "epoch": 0.45700273301654093, "grad_norm": 0.71875, "learning_rate": 0.00016559611301977006, "loss": 0.9372, "step": 17798 }, { "epoch": 0.45702841021246277, "grad_norm": 0.7890625, "learning_rate": 0.00016559274339876268, "loss": 0.881, "step": 17799 }, { "epoch": 0.45705408740838455, "grad_norm": 0.76171875, "learning_rate": 0.00016558937364703492, "loss": 0.8379, "step": 17800 }, { "epoch": 0.4570797646043064, "grad_norm": 0.7421875, "learning_rate": 0.0001655860037645934, "loss": 0.8602, "step": 17801 }, { "epoch": 0.4571054418002282, "grad_norm": 0.73828125, "learning_rate": 0.00016558263375144496, "loss": 0.8115, "step": 17802 }, { "epoch": 0.45713111899615, "grad_norm": 0.890625, "learning_rate": 0.0001655792636075962, "loss": 0.8122, "step": 17803 }, { "epoch": 0.45715679619207183, "grad_norm": 0.69140625, "learning_rate": 0.00016557589333305393, "loss": 0.9976, "step": 17804 }, { "epoch": 0.45718247338799367, "grad_norm": 0.76953125, "learning_rate": 0.00016557252292782483, "loss": 0.9123, "step": 17805 }, { "epoch": 0.4572081505839155, "grad_norm": 0.73828125, "learning_rate": 0.00016556915239191561, "loss": 0.8006, "step": 17806 }, { "epoch": 0.4572338277798373, "grad_norm": 0.73828125, "learning_rate": 0.00016556578172533296, "loss": 0.8327, "step": 17807 }, { "epoch": 0.4572595049757591, "grad_norm": 0.74609375, "learning_rate": 0.0001655624109280837, "loss": 0.8119, "step": 17808 }, { "epoch": 0.45728518217168096, "grad_norm": 0.74609375, "learning_rate": 0.00016555904000017442, "loss": 0.7957, "step": 17809 }, { "epoch": 0.45731085936760274, "grad_norm": 0.83984375, "learning_rate": 0.00016555566894161188, "loss": 0.9482, "step": 17810 }, { "epoch": 0.4573365365635246, "grad_norm": 0.73828125, "learning_rate": 0.00016555229775240286, "loss": 1.055, "step": 17811 }, { "epoch": 0.4573622137594464, "grad_norm": 0.68359375, "learning_rate": 0.00016554892643255402, "loss": 0.8187, "step": 17812 }, { "epoch": 0.4573878909553682, "grad_norm": 0.734375, "learning_rate": 0.0001655455549820721, "loss": 0.9121, "step": 17813 }, { "epoch": 0.45741356815129003, "grad_norm": 0.765625, "learning_rate": 0.0001655421834009638, "loss": 0.9385, "step": 17814 }, { "epoch": 0.45743924534721186, "grad_norm": 0.76171875, "learning_rate": 0.00016553881168923583, "loss": 0.8473, "step": 17815 }, { "epoch": 0.4574649225431337, "grad_norm": 0.74609375, "learning_rate": 0.00016553543984689496, "loss": 0.9228, "step": 17816 }, { "epoch": 0.4574905997390555, "grad_norm": 0.7265625, "learning_rate": 0.0001655320678739478, "loss": 0.9116, "step": 17817 }, { "epoch": 0.4575162769349773, "grad_norm": 0.79296875, "learning_rate": 0.00016552869577040125, "loss": 0.845, "step": 17818 }, { "epoch": 0.45754195413089915, "grad_norm": 0.84765625, "learning_rate": 0.00016552532353626188, "loss": 0.9203, "step": 17819 }, { "epoch": 0.45756763132682093, "grad_norm": 0.76953125, "learning_rate": 0.00016552195117153646, "loss": 0.8943, "step": 17820 }, { "epoch": 0.45759330852274277, "grad_norm": 0.7421875, "learning_rate": 0.0001655185786762317, "loss": 0.8085, "step": 17821 }, { "epoch": 0.4576189857186646, "grad_norm": 0.796875, "learning_rate": 0.00016551520605035435, "loss": 0.9294, "step": 17822 }, { "epoch": 0.4576446629145864, "grad_norm": 0.79296875, "learning_rate": 0.00016551183329391114, "loss": 0.9603, "step": 17823 }, { "epoch": 0.4576703401105082, "grad_norm": 0.8828125, "learning_rate": 0.0001655084604069087, "loss": 0.8494, "step": 17824 }, { "epoch": 0.45769601730643006, "grad_norm": 0.7578125, "learning_rate": 0.00016550508738935387, "loss": 1.0737, "step": 17825 }, { "epoch": 0.4577216945023519, "grad_norm": 0.75, "learning_rate": 0.0001655017142412533, "loss": 0.8271, "step": 17826 }, { "epoch": 0.4577473716982737, "grad_norm": 0.765625, "learning_rate": 0.00016549834096261373, "loss": 0.9228, "step": 17827 }, { "epoch": 0.4577730488941955, "grad_norm": 0.80859375, "learning_rate": 0.00016549496755344185, "loss": 0.8701, "step": 17828 }, { "epoch": 0.45779872609011735, "grad_norm": 0.81640625, "learning_rate": 0.00016549159401374445, "loss": 0.9038, "step": 17829 }, { "epoch": 0.4578244032860391, "grad_norm": 0.79296875, "learning_rate": 0.0001654882203435282, "loss": 0.9453, "step": 17830 }, { "epoch": 0.45785008048196096, "grad_norm": 0.75, "learning_rate": 0.00016548484654279987, "loss": 0.9036, "step": 17831 }, { "epoch": 0.4578757576778828, "grad_norm": 0.76171875, "learning_rate": 0.00016548147261156616, "loss": 0.9599, "step": 17832 }, { "epoch": 0.4579014348738046, "grad_norm": 0.7578125, "learning_rate": 0.00016547809854983378, "loss": 0.9144, "step": 17833 }, { "epoch": 0.4579271120697264, "grad_norm": 0.74609375, "learning_rate": 0.00016547472435760944, "loss": 0.9466, "step": 17834 }, { "epoch": 0.45795278926564825, "grad_norm": 0.7265625, "learning_rate": 0.0001654713500348999, "loss": 0.9029, "step": 17835 }, { "epoch": 0.4579784664615701, "grad_norm": 0.78515625, "learning_rate": 0.0001654679755817119, "loss": 0.9025, "step": 17836 }, { "epoch": 0.45800414365749187, "grad_norm": 0.80859375, "learning_rate": 0.0001654646009980521, "loss": 0.7985, "step": 17837 }, { "epoch": 0.4580298208534137, "grad_norm": 0.83984375, "learning_rate": 0.00016546122628392727, "loss": 0.9154, "step": 17838 }, { "epoch": 0.45805549804933554, "grad_norm": 0.765625, "learning_rate": 0.00016545785143934417, "loss": 0.9206, "step": 17839 }, { "epoch": 0.4580811752452573, "grad_norm": 0.796875, "learning_rate": 0.00016545447646430943, "loss": 0.8886, "step": 17840 }, { "epoch": 0.45810685244117916, "grad_norm": 0.76953125, "learning_rate": 0.00016545110135882986, "loss": 0.9246, "step": 17841 }, { "epoch": 0.458132529637101, "grad_norm": 0.79296875, "learning_rate": 0.00016544772612291214, "loss": 0.9432, "step": 17842 }, { "epoch": 0.4581582068330228, "grad_norm": 0.78125, "learning_rate": 0.000165444350756563, "loss": 0.9395, "step": 17843 }, { "epoch": 0.4581838840289446, "grad_norm": 0.7734375, "learning_rate": 0.00016544097525978923, "loss": 0.9817, "step": 17844 }, { "epoch": 0.45820956122486645, "grad_norm": 0.79296875, "learning_rate": 0.0001654375996325975, "loss": 0.8572, "step": 17845 }, { "epoch": 0.4582352384207883, "grad_norm": 0.80078125, "learning_rate": 0.00016543422387499455, "loss": 0.9621, "step": 17846 }, { "epoch": 0.45826091561671006, "grad_norm": 0.7421875, "learning_rate": 0.00016543084798698705, "loss": 0.9254, "step": 17847 }, { "epoch": 0.4582865928126319, "grad_norm": 0.76953125, "learning_rate": 0.0001654274719685818, "loss": 0.9293, "step": 17848 }, { "epoch": 0.45831227000855373, "grad_norm": 0.79296875, "learning_rate": 0.00016542409581978554, "loss": 1.0537, "step": 17849 }, { "epoch": 0.4583379472044755, "grad_norm": 0.734375, "learning_rate": 0.00016542071954060494, "loss": 0.9566, "step": 17850 }, { "epoch": 0.45836362440039735, "grad_norm": 0.75390625, "learning_rate": 0.00016541734313104677, "loss": 0.8668, "step": 17851 }, { "epoch": 0.4583893015963192, "grad_norm": 0.7890625, "learning_rate": 0.00016541396659111774, "loss": 0.8406, "step": 17852 }, { "epoch": 0.45841497879224097, "grad_norm": 0.8671875, "learning_rate": 0.0001654105899208246, "loss": 0.9677, "step": 17853 }, { "epoch": 0.4584406559881628, "grad_norm": 0.78515625, "learning_rate": 0.00016540721312017403, "loss": 0.9777, "step": 17854 }, { "epoch": 0.45846633318408464, "grad_norm": 0.8046875, "learning_rate": 0.00016540383618917278, "loss": 0.7919, "step": 17855 }, { "epoch": 0.4584920103800065, "grad_norm": 0.77734375, "learning_rate": 0.00016540045912782762, "loss": 0.956, "step": 17856 }, { "epoch": 0.45851768757592826, "grad_norm": 0.875, "learning_rate": 0.00016539708193614528, "loss": 0.9353, "step": 17857 }, { "epoch": 0.4585433647718501, "grad_norm": 0.79296875, "learning_rate": 0.00016539370461413243, "loss": 0.9994, "step": 17858 }, { "epoch": 0.45856904196777193, "grad_norm": 0.73046875, "learning_rate": 0.00016539032716179582, "loss": 0.8414, "step": 17859 }, { "epoch": 0.4585947191636937, "grad_norm": 0.85546875, "learning_rate": 0.00016538694957914224, "loss": 1.029, "step": 17860 }, { "epoch": 0.45862039635961555, "grad_norm": 0.859375, "learning_rate": 0.00016538357186617834, "loss": 0.8725, "step": 17861 }, { "epoch": 0.4586460735555374, "grad_norm": 0.80078125, "learning_rate": 0.0001653801940229109, "loss": 1.0525, "step": 17862 }, { "epoch": 0.45867175075145916, "grad_norm": 0.734375, "learning_rate": 0.0001653768160493466, "loss": 0.8923, "step": 17863 }, { "epoch": 0.458697427947381, "grad_norm": 0.79296875, "learning_rate": 0.00016537343794549228, "loss": 0.8077, "step": 17864 }, { "epoch": 0.45872310514330283, "grad_norm": 0.75, "learning_rate": 0.00016537005971135456, "loss": 0.7582, "step": 17865 }, { "epoch": 0.45874878233922467, "grad_norm": 0.80859375, "learning_rate": 0.00016536668134694023, "loss": 1.0169, "step": 17866 }, { "epoch": 0.45877445953514645, "grad_norm": 0.76171875, "learning_rate": 0.000165363302852256, "loss": 0.7511, "step": 17867 }, { "epoch": 0.4588001367310683, "grad_norm": 0.73828125, "learning_rate": 0.0001653599242273086, "loss": 0.701, "step": 17868 }, { "epoch": 0.4588258139269901, "grad_norm": 0.765625, "learning_rate": 0.0001653565454721048, "loss": 1.0287, "step": 17869 }, { "epoch": 0.4588514911229119, "grad_norm": 0.78125, "learning_rate": 0.00016535316658665127, "loss": 0.8653, "step": 17870 }, { "epoch": 0.45887716831883374, "grad_norm": 0.8203125, "learning_rate": 0.00016534978757095484, "loss": 1.0869, "step": 17871 }, { "epoch": 0.4589028455147556, "grad_norm": 0.74609375, "learning_rate": 0.00016534640842502215, "loss": 0.8168, "step": 17872 }, { "epoch": 0.45892852271067736, "grad_norm": 0.91796875, "learning_rate": 0.00016534302914885998, "loss": 1.047, "step": 17873 }, { "epoch": 0.4589541999065992, "grad_norm": 0.78515625, "learning_rate": 0.00016533964974247504, "loss": 0.9826, "step": 17874 }, { "epoch": 0.45897987710252103, "grad_norm": 0.8828125, "learning_rate": 0.0001653362702058741, "loss": 0.8321, "step": 17875 }, { "epoch": 0.45900555429844286, "grad_norm": 0.75390625, "learning_rate": 0.00016533289053906387, "loss": 1.1268, "step": 17876 }, { "epoch": 0.45903123149436464, "grad_norm": 0.83984375, "learning_rate": 0.00016532951074205108, "loss": 0.9456, "step": 17877 }, { "epoch": 0.4590569086902865, "grad_norm": 0.890625, "learning_rate": 0.0001653261308148425, "loss": 0.9215, "step": 17878 }, { "epoch": 0.4590825858862083, "grad_norm": 0.76171875, "learning_rate": 0.00016532275075744482, "loss": 0.9854, "step": 17879 }, { "epoch": 0.4591082630821301, "grad_norm": 0.8515625, "learning_rate": 0.00016531937056986479, "loss": 0.9874, "step": 17880 }, { "epoch": 0.45913394027805193, "grad_norm": 0.7109375, "learning_rate": 0.00016531599025210918, "loss": 0.804, "step": 17881 }, { "epoch": 0.45915961747397377, "grad_norm": 0.81640625, "learning_rate": 0.0001653126098041847, "loss": 0.7791, "step": 17882 }, { "epoch": 0.45918529466989555, "grad_norm": 0.8046875, "learning_rate": 0.00016530922922609808, "loss": 0.9407, "step": 17883 }, { "epoch": 0.4592109718658174, "grad_norm": 0.78125, "learning_rate": 0.00016530584851785605, "loss": 0.7851, "step": 17884 }, { "epoch": 0.4592366490617392, "grad_norm": 0.8203125, "learning_rate": 0.0001653024676794654, "loss": 0.9533, "step": 17885 }, { "epoch": 0.45926232625766106, "grad_norm": 0.76953125, "learning_rate": 0.0001652990867109328, "loss": 0.9941, "step": 17886 }, { "epoch": 0.45928800345358284, "grad_norm": 0.80078125, "learning_rate": 0.00016529570561226505, "loss": 0.9576, "step": 17887 }, { "epoch": 0.4593136806495047, "grad_norm": 0.76171875, "learning_rate": 0.00016529232438346883, "loss": 0.8498, "step": 17888 }, { "epoch": 0.4593393578454265, "grad_norm": 0.81640625, "learning_rate": 0.0001652889430245509, "loss": 0.9769, "step": 17889 }, { "epoch": 0.4593650350413483, "grad_norm": 0.77734375, "learning_rate": 0.000165285561535518, "loss": 0.9531, "step": 17890 }, { "epoch": 0.4593907122372701, "grad_norm": 0.8671875, "learning_rate": 0.0001652821799163769, "loss": 0.9977, "step": 17891 }, { "epoch": 0.45941638943319196, "grad_norm": 0.91796875, "learning_rate": 0.00016527879816713429, "loss": 0.7913, "step": 17892 }, { "epoch": 0.45944206662911374, "grad_norm": 0.71484375, "learning_rate": 0.00016527541628779695, "loss": 1.0295, "step": 17893 }, { "epoch": 0.4594677438250356, "grad_norm": 0.71484375, "learning_rate": 0.0001652720342783716, "loss": 0.8985, "step": 17894 }, { "epoch": 0.4594934210209574, "grad_norm": 0.76171875, "learning_rate": 0.00016526865213886495, "loss": 0.8889, "step": 17895 }, { "epoch": 0.45951909821687925, "grad_norm": 0.7421875, "learning_rate": 0.00016526526986928382, "loss": 0.9138, "step": 17896 }, { "epoch": 0.45954477541280103, "grad_norm": 0.8046875, "learning_rate": 0.00016526188746963488, "loss": 1.0414, "step": 17897 }, { "epoch": 0.45957045260872287, "grad_norm": 0.7109375, "learning_rate": 0.00016525850493992488, "loss": 0.9058, "step": 17898 }, { "epoch": 0.4595961298046447, "grad_norm": 0.77734375, "learning_rate": 0.0001652551222801606, "loss": 0.8017, "step": 17899 }, { "epoch": 0.4596218070005665, "grad_norm": 0.74609375, "learning_rate": 0.00016525173949034872, "loss": 0.7997, "step": 17900 }, { "epoch": 0.4596474841964883, "grad_norm": 0.81640625, "learning_rate": 0.00016524835657049603, "loss": 1.0064, "step": 17901 }, { "epoch": 0.45967316139241016, "grad_norm": 0.74609375, "learning_rate": 0.00016524497352060928, "loss": 0.7831, "step": 17902 }, { "epoch": 0.45969883858833194, "grad_norm": 0.70703125, "learning_rate": 0.0001652415903406952, "loss": 0.8358, "step": 17903 }, { "epoch": 0.4597245157842538, "grad_norm": 0.828125, "learning_rate": 0.0001652382070307605, "loss": 0.8608, "step": 17904 }, { "epoch": 0.4597501929801756, "grad_norm": 0.86328125, "learning_rate": 0.00016523482359081197, "loss": 1.036, "step": 17905 }, { "epoch": 0.45977587017609745, "grad_norm": 0.79296875, "learning_rate": 0.0001652314400208563, "loss": 0.8436, "step": 17906 }, { "epoch": 0.4598015473720192, "grad_norm": 0.8203125, "learning_rate": 0.00016522805632090027, "loss": 0.7545, "step": 17907 }, { "epoch": 0.45982722456794106, "grad_norm": 0.77734375, "learning_rate": 0.00016522467249095062, "loss": 0.8115, "step": 17908 }, { "epoch": 0.4598529017638629, "grad_norm": 0.75, "learning_rate": 0.00016522128853101409, "loss": 0.893, "step": 17909 }, { "epoch": 0.4598785789597847, "grad_norm": 0.7421875, "learning_rate": 0.00016521790444109743, "loss": 0.7963, "step": 17910 }, { "epoch": 0.4599042561557065, "grad_norm": 0.91015625, "learning_rate": 0.00016521452022120735, "loss": 0.9971, "step": 17911 }, { "epoch": 0.45992993335162835, "grad_norm": 0.78125, "learning_rate": 0.00016521113587135062, "loss": 0.9178, "step": 17912 }, { "epoch": 0.45995561054755013, "grad_norm": 0.7421875, "learning_rate": 0.00016520775139153404, "loss": 0.7967, "step": 17913 }, { "epoch": 0.45998128774347197, "grad_norm": 0.73046875, "learning_rate": 0.00016520436678176426, "loss": 0.8306, "step": 17914 }, { "epoch": 0.4600069649393938, "grad_norm": 0.76171875, "learning_rate": 0.00016520098204204805, "loss": 0.8974, "step": 17915 }, { "epoch": 0.46003264213531564, "grad_norm": 0.74609375, "learning_rate": 0.0001651975971723922, "loss": 0.957, "step": 17916 }, { "epoch": 0.4600583193312374, "grad_norm": 0.82421875, "learning_rate": 0.00016519421217280343, "loss": 0.8568, "step": 17917 }, { "epoch": 0.46008399652715926, "grad_norm": 0.78125, "learning_rate": 0.0001651908270432885, "loss": 0.9001, "step": 17918 }, { "epoch": 0.4601096737230811, "grad_norm": 0.77734375, "learning_rate": 0.0001651874417838541, "loss": 0.7582, "step": 17919 }, { "epoch": 0.4601353509190029, "grad_norm": 0.78515625, "learning_rate": 0.00016518405639450703, "loss": 0.9293, "step": 17920 }, { "epoch": 0.4601610281149247, "grad_norm": 0.7578125, "learning_rate": 0.00016518067087525405, "loss": 0.719, "step": 17921 }, { "epoch": 0.46018670531084654, "grad_norm": 0.80078125, "learning_rate": 0.00016517728522610185, "loss": 0.8259, "step": 17922 }, { "epoch": 0.4602123825067683, "grad_norm": 0.76953125, "learning_rate": 0.0001651738994470572, "loss": 1.0, "step": 17923 }, { "epoch": 0.46023805970269016, "grad_norm": 0.76953125, "learning_rate": 0.00016517051353812692, "loss": 0.8132, "step": 17924 }, { "epoch": 0.460263736898612, "grad_norm": 0.77734375, "learning_rate": 0.00016516712749931764, "loss": 0.9548, "step": 17925 }, { "epoch": 0.46028941409453383, "grad_norm": 0.765625, "learning_rate": 0.00016516374133063616, "loss": 0.9214, "step": 17926 }, { "epoch": 0.4603150912904556, "grad_norm": 0.7734375, "learning_rate": 0.00016516035503208926, "loss": 0.8892, "step": 17927 }, { "epoch": 0.46034076848637745, "grad_norm": 0.87109375, "learning_rate": 0.0001651569686036836, "loss": 0.9734, "step": 17928 }, { "epoch": 0.4603664456822993, "grad_norm": 0.76171875, "learning_rate": 0.00016515358204542607, "loss": 0.8857, "step": 17929 }, { "epoch": 0.46039212287822107, "grad_norm": 0.71484375, "learning_rate": 0.00016515019535732328, "loss": 0.8531, "step": 17930 }, { "epoch": 0.4604178000741429, "grad_norm": 0.73046875, "learning_rate": 0.00016514680853938206, "loss": 0.7684, "step": 17931 }, { "epoch": 0.46044347727006474, "grad_norm": 0.75, "learning_rate": 0.0001651434215916091, "loss": 0.8947, "step": 17932 }, { "epoch": 0.4604691544659865, "grad_norm": 0.78125, "learning_rate": 0.0001651400345140112, "loss": 1.0361, "step": 17933 }, { "epoch": 0.46049483166190835, "grad_norm": 0.84765625, "learning_rate": 0.00016513664730659512, "loss": 1.1593, "step": 17934 }, { "epoch": 0.4605205088578302, "grad_norm": 0.8046875, "learning_rate": 0.00016513325996936757, "loss": 0.9719, "step": 17935 }, { "epoch": 0.460546186053752, "grad_norm": 0.74609375, "learning_rate": 0.00016512987250233532, "loss": 0.8425, "step": 17936 }, { "epoch": 0.4605718632496738, "grad_norm": 0.7890625, "learning_rate": 0.0001651264849055051, "loss": 0.8254, "step": 17937 }, { "epoch": 0.46059754044559564, "grad_norm": 0.81640625, "learning_rate": 0.0001651230971788837, "loss": 0.9373, "step": 17938 }, { "epoch": 0.4606232176415175, "grad_norm": 0.73046875, "learning_rate": 0.00016511970932247785, "loss": 1.0185, "step": 17939 }, { "epoch": 0.46064889483743926, "grad_norm": 0.7578125, "learning_rate": 0.00016511632133629425, "loss": 0.8814, "step": 17940 }, { "epoch": 0.4606745720333611, "grad_norm": 0.6953125, "learning_rate": 0.0001651129332203398, "loss": 0.7815, "step": 17941 }, { "epoch": 0.46070024922928293, "grad_norm": 0.71484375, "learning_rate": 0.00016510954497462108, "loss": 0.8708, "step": 17942 }, { "epoch": 0.4607259264252047, "grad_norm": 0.79296875, "learning_rate": 0.00016510615659914493, "loss": 0.9325, "step": 17943 }, { "epoch": 0.46075160362112655, "grad_norm": 0.7734375, "learning_rate": 0.00016510276809391812, "loss": 0.8821, "step": 17944 }, { "epoch": 0.4607772808170484, "grad_norm": 0.73828125, "learning_rate": 0.00016509937945894735, "loss": 0.9566, "step": 17945 }, { "epoch": 0.4608029580129702, "grad_norm": 0.80078125, "learning_rate": 0.0001650959906942394, "loss": 0.9602, "step": 17946 }, { "epoch": 0.460828635208892, "grad_norm": 0.75390625, "learning_rate": 0.000165092601799801, "loss": 0.8612, "step": 17947 }, { "epoch": 0.46085431240481384, "grad_norm": 0.8046875, "learning_rate": 0.00016508921277563892, "loss": 0.8175, "step": 17948 }, { "epoch": 0.4608799896007357, "grad_norm": 0.76171875, "learning_rate": 0.00016508582362175997, "loss": 0.8625, "step": 17949 }, { "epoch": 0.46090566679665745, "grad_norm": 0.70703125, "learning_rate": 0.00016508243433817086, "loss": 0.8458, "step": 17950 }, { "epoch": 0.4609313439925793, "grad_norm": 0.7734375, "learning_rate": 0.0001650790449248783, "loss": 0.8174, "step": 17951 }, { "epoch": 0.4609570211885011, "grad_norm": 0.734375, "learning_rate": 0.00016507565538188907, "loss": 0.7805, "step": 17952 }, { "epoch": 0.4609826983844229, "grad_norm": 0.796875, "learning_rate": 0.00016507226570920995, "loss": 0.8451, "step": 17953 }, { "epoch": 0.46100837558034474, "grad_norm": 0.76171875, "learning_rate": 0.00016506887590684766, "loss": 0.9116, "step": 17954 }, { "epoch": 0.4610340527762666, "grad_norm": 0.796875, "learning_rate": 0.00016506548597480905, "loss": 0.7175, "step": 17955 }, { "epoch": 0.4610597299721884, "grad_norm": 0.7578125, "learning_rate": 0.00016506209591310076, "loss": 0.9481, "step": 17956 }, { "epoch": 0.4610854071681102, "grad_norm": 0.8125, "learning_rate": 0.00016505870572172958, "loss": 0.7908, "step": 17957 }, { "epoch": 0.46111108436403203, "grad_norm": 0.796875, "learning_rate": 0.00016505531540070227, "loss": 1.0309, "step": 17958 }, { "epoch": 0.46113676155995387, "grad_norm": 0.83203125, "learning_rate": 0.0001650519249500256, "loss": 0.8347, "step": 17959 }, { "epoch": 0.46116243875587565, "grad_norm": 0.8046875, "learning_rate": 0.00016504853436970633, "loss": 0.8298, "step": 17960 }, { "epoch": 0.4611881159517975, "grad_norm": 0.7578125, "learning_rate": 0.0001650451436597512, "loss": 0.8176, "step": 17961 }, { "epoch": 0.4612137931477193, "grad_norm": 0.69140625, "learning_rate": 0.00016504175282016698, "loss": 0.7189, "step": 17962 }, { "epoch": 0.4612394703436411, "grad_norm": 0.7890625, "learning_rate": 0.00016503836185096044, "loss": 0.9168, "step": 17963 }, { "epoch": 0.46126514753956294, "grad_norm": 0.79296875, "learning_rate": 0.0001650349707521383, "loss": 0.7773, "step": 17964 }, { "epoch": 0.4612908247354848, "grad_norm": 0.8359375, "learning_rate": 0.00016503157952370734, "loss": 1.1432, "step": 17965 }, { "epoch": 0.4613165019314066, "grad_norm": 0.84375, "learning_rate": 0.0001650281881656743, "loss": 0.9784, "step": 17966 }, { "epoch": 0.4613421791273284, "grad_norm": 0.80078125, "learning_rate": 0.00016502479667804596, "loss": 0.7573, "step": 17967 }, { "epoch": 0.4613678563232502, "grad_norm": 0.73046875, "learning_rate": 0.0001650214050608291, "loss": 0.8594, "step": 17968 }, { "epoch": 0.46139353351917206, "grad_norm": 0.8046875, "learning_rate": 0.00016501801331403045, "loss": 0.9068, "step": 17969 }, { "epoch": 0.46141921071509384, "grad_norm": 0.765625, "learning_rate": 0.00016501462143765676, "loss": 0.9802, "step": 17970 }, { "epoch": 0.4614448879110157, "grad_norm": 0.7890625, "learning_rate": 0.00016501122943171477, "loss": 0.873, "step": 17971 }, { "epoch": 0.4614705651069375, "grad_norm": 0.80078125, "learning_rate": 0.0001650078372962113, "loss": 1.0033, "step": 17972 }, { "epoch": 0.4614962423028593, "grad_norm": 0.79296875, "learning_rate": 0.0001650044450311531, "loss": 0.958, "step": 17973 }, { "epoch": 0.46152191949878113, "grad_norm": 0.81640625, "learning_rate": 0.0001650010526365469, "loss": 0.9487, "step": 17974 }, { "epoch": 0.46154759669470297, "grad_norm": 0.7578125, "learning_rate": 0.00016499766011239945, "loss": 0.9409, "step": 17975 }, { "epoch": 0.46157327389062475, "grad_norm": 0.79296875, "learning_rate": 0.00016499426745871757, "loss": 0.9531, "step": 17976 }, { "epoch": 0.4615989510865466, "grad_norm": 0.7890625, "learning_rate": 0.00016499087467550794, "loss": 0.89, "step": 17977 }, { "epoch": 0.4616246282824684, "grad_norm": 0.76953125, "learning_rate": 0.00016498748176277744, "loss": 0.9588, "step": 17978 }, { "epoch": 0.46165030547839025, "grad_norm": 0.75, "learning_rate": 0.0001649840887205327, "loss": 0.9199, "step": 17979 }, { "epoch": 0.46167598267431204, "grad_norm": 0.82421875, "learning_rate": 0.00016498069554878058, "loss": 0.933, "step": 17980 }, { "epoch": 0.46170165987023387, "grad_norm": 0.734375, "learning_rate": 0.00016497730224752778, "loss": 0.8637, "step": 17981 }, { "epoch": 0.4617273370661557, "grad_norm": 0.8125, "learning_rate": 0.00016497390881678106, "loss": 1.0247, "step": 17982 }, { "epoch": 0.4617530142620775, "grad_norm": 0.78515625, "learning_rate": 0.00016497051525654727, "loss": 0.8315, "step": 17983 }, { "epoch": 0.4617786914579993, "grad_norm": 0.765625, "learning_rate": 0.00016496712156683306, "loss": 0.9005, "step": 17984 }, { "epoch": 0.46180436865392116, "grad_norm": 0.8046875, "learning_rate": 0.00016496372774764528, "loss": 0.8285, "step": 17985 }, { "epoch": 0.46183004584984294, "grad_norm": 0.734375, "learning_rate": 0.00016496033379899064, "loss": 0.847, "step": 17986 }, { "epoch": 0.4618557230457648, "grad_norm": 0.8515625, "learning_rate": 0.00016495693972087591, "loss": 0.9094, "step": 17987 }, { "epoch": 0.4618814002416866, "grad_norm": 0.77734375, "learning_rate": 0.00016495354551330788, "loss": 0.8302, "step": 17988 }, { "epoch": 0.46190707743760845, "grad_norm": 0.765625, "learning_rate": 0.00016495015117629332, "loss": 0.8889, "step": 17989 }, { "epoch": 0.46193275463353023, "grad_norm": 0.78125, "learning_rate": 0.00016494675670983896, "loss": 0.9579, "step": 17990 }, { "epoch": 0.46195843182945207, "grad_norm": 0.8046875, "learning_rate": 0.00016494336211395159, "loss": 0.9271, "step": 17991 }, { "epoch": 0.4619841090253739, "grad_norm": 0.8828125, "learning_rate": 0.00016493996738863796, "loss": 1.0151, "step": 17992 }, { "epoch": 0.4620097862212957, "grad_norm": 0.70703125, "learning_rate": 0.00016493657253390483, "loss": 0.8145, "step": 17993 }, { "epoch": 0.4620354634172175, "grad_norm": 0.72265625, "learning_rate": 0.00016493317754975897, "loss": 0.9059, "step": 17994 }, { "epoch": 0.46206114061313935, "grad_norm": 0.7421875, "learning_rate": 0.00016492978243620715, "loss": 1.0877, "step": 17995 }, { "epoch": 0.46208681780906113, "grad_norm": 0.734375, "learning_rate": 0.00016492638719325616, "loss": 0.8735, "step": 17996 }, { "epoch": 0.46211249500498297, "grad_norm": 0.74609375, "learning_rate": 0.00016492299182091274, "loss": 0.9088, "step": 17997 }, { "epoch": 0.4621381722009048, "grad_norm": 0.828125, "learning_rate": 0.00016491959631918367, "loss": 0.8969, "step": 17998 }, { "epoch": 0.46216384939682664, "grad_norm": 0.73046875, "learning_rate": 0.0001649162006880757, "loss": 0.834, "step": 17999 }, { "epoch": 0.4621895265927484, "grad_norm": 0.8359375, "learning_rate": 0.0001649128049275956, "loss": 0.8521, "step": 18000 }, { "epoch": 0.4621895265927484, "eval_loss": 0.8996534943580627, "eval_model_preparation_time": 0.0065, "eval_runtime": 403.3605, "eval_samples_per_second": 24.792, "eval_steps_per_second": 0.776, "step": 18000 }, { "epoch": 0.46221520378867026, "grad_norm": 0.78515625, "learning_rate": 0.00016490940903775015, "loss": 1.0669, "step": 18001 }, { "epoch": 0.4622408809845921, "grad_norm": 0.7265625, "learning_rate": 0.0001649060130185461, "loss": 0.8496, "step": 18002 }, { "epoch": 0.4622665581805139, "grad_norm": 0.86328125, "learning_rate": 0.00016490261686999026, "loss": 0.9214, "step": 18003 }, { "epoch": 0.4622922353764357, "grad_norm": 0.78125, "learning_rate": 0.00016489922059208936, "loss": 0.7453, "step": 18004 }, { "epoch": 0.46231791257235755, "grad_norm": 0.7890625, "learning_rate": 0.00016489582418485014, "loss": 0.8727, "step": 18005 }, { "epoch": 0.46234358976827933, "grad_norm": 0.73046875, "learning_rate": 0.00016489242764827942, "loss": 0.7963, "step": 18006 }, { "epoch": 0.46236926696420116, "grad_norm": 0.73828125, "learning_rate": 0.00016488903098238396, "loss": 0.8651, "step": 18007 }, { "epoch": 0.462394944160123, "grad_norm": 0.7734375, "learning_rate": 0.0001648856341871705, "loss": 0.8404, "step": 18008 }, { "epoch": 0.46242062135604484, "grad_norm": 0.83984375, "learning_rate": 0.0001648822372626459, "loss": 0.8374, "step": 18009 }, { "epoch": 0.4624462985519666, "grad_norm": 0.76953125, "learning_rate": 0.0001648788402088168, "loss": 0.9875, "step": 18010 }, { "epoch": 0.46247197574788845, "grad_norm": 0.7734375, "learning_rate": 0.00016487544302569006, "loss": 0.8843, "step": 18011 }, { "epoch": 0.4624976529438103, "grad_norm": 0.78515625, "learning_rate": 0.0001648720457132724, "loss": 1.0226, "step": 18012 }, { "epoch": 0.46252333013973207, "grad_norm": 0.7265625, "learning_rate": 0.00016486864827157063, "loss": 0.8666, "step": 18013 }, { "epoch": 0.4625490073356539, "grad_norm": 0.76953125, "learning_rate": 0.00016486525070059149, "loss": 0.9014, "step": 18014 }, { "epoch": 0.46257468453157574, "grad_norm": 0.7265625, "learning_rate": 0.00016486185300034175, "loss": 0.8226, "step": 18015 }, { "epoch": 0.4626003617274975, "grad_norm": 0.83203125, "learning_rate": 0.00016485845517082823, "loss": 0.8525, "step": 18016 }, { "epoch": 0.46262603892341936, "grad_norm": 0.8125, "learning_rate": 0.00016485505721205765, "loss": 0.9167, "step": 18017 }, { "epoch": 0.4626517161193412, "grad_norm": 0.83984375, "learning_rate": 0.00016485165912403683, "loss": 0.9242, "step": 18018 }, { "epoch": 0.46267739331526303, "grad_norm": 0.796875, "learning_rate": 0.00016484826090677247, "loss": 0.9163, "step": 18019 }, { "epoch": 0.4627030705111848, "grad_norm": 0.7421875, "learning_rate": 0.00016484486256027137, "loss": 0.7344, "step": 18020 }, { "epoch": 0.46272874770710665, "grad_norm": 0.74609375, "learning_rate": 0.00016484146408454037, "loss": 0.9351, "step": 18021 }, { "epoch": 0.4627544249030285, "grad_norm": 0.73046875, "learning_rate": 0.0001648380654795862, "loss": 0.7779, "step": 18022 }, { "epoch": 0.46278010209895026, "grad_norm": 0.7734375, "learning_rate": 0.00016483466674541557, "loss": 0.8924, "step": 18023 }, { "epoch": 0.4628057792948721, "grad_norm": 0.765625, "learning_rate": 0.00016483126788203531, "loss": 0.8394, "step": 18024 }, { "epoch": 0.46283145649079394, "grad_norm": 0.8203125, "learning_rate": 0.0001648278688894522, "loss": 0.882, "step": 18025 }, { "epoch": 0.4628571336867157, "grad_norm": 0.80078125, "learning_rate": 0.00016482446976767302, "loss": 0.9615, "step": 18026 }, { "epoch": 0.46288281088263755, "grad_norm": 0.93359375, "learning_rate": 0.0001648210705167045, "loss": 0.8776, "step": 18027 }, { "epoch": 0.4629084880785594, "grad_norm": 0.81640625, "learning_rate": 0.00016481767113655344, "loss": 0.9761, "step": 18028 }, { "epoch": 0.4629341652744812, "grad_norm": 0.80859375, "learning_rate": 0.00016481427162722665, "loss": 0.9153, "step": 18029 }, { "epoch": 0.462959842470403, "grad_norm": 0.76171875, "learning_rate": 0.00016481087198873086, "loss": 0.8487, "step": 18030 }, { "epoch": 0.46298551966632484, "grad_norm": 0.72265625, "learning_rate": 0.00016480747222107284, "loss": 0.8233, "step": 18031 }, { "epoch": 0.4630111968622467, "grad_norm": 0.78125, "learning_rate": 0.00016480407232425938, "loss": 0.8452, "step": 18032 }, { "epoch": 0.46303687405816846, "grad_norm": 0.78125, "learning_rate": 0.00016480067229829727, "loss": 0.8862, "step": 18033 }, { "epoch": 0.4630625512540903, "grad_norm": 0.78515625, "learning_rate": 0.00016479727214319326, "loss": 1.0275, "step": 18034 }, { "epoch": 0.46308822845001213, "grad_norm": 0.8359375, "learning_rate": 0.0001647938718589542, "loss": 1.1197, "step": 18035 }, { "epoch": 0.4631139056459339, "grad_norm": 0.77734375, "learning_rate": 0.00016479047144558675, "loss": 0.7652, "step": 18036 }, { "epoch": 0.46313958284185575, "grad_norm": 0.85546875, "learning_rate": 0.00016478707090309772, "loss": 1.0769, "step": 18037 }, { "epoch": 0.4631652600377776, "grad_norm": 0.765625, "learning_rate": 0.00016478367023149395, "loss": 0.965, "step": 18038 }, { "epoch": 0.4631909372336994, "grad_norm": 0.76953125, "learning_rate": 0.00016478026943078216, "loss": 0.9382, "step": 18039 }, { "epoch": 0.4632166144296212, "grad_norm": 0.796875, "learning_rate": 0.00016477686850096916, "loss": 0.8995, "step": 18040 }, { "epoch": 0.46324229162554303, "grad_norm": 0.78125, "learning_rate": 0.0001647734674420617, "loss": 0.9453, "step": 18041 }, { "epoch": 0.46326796882146487, "grad_norm": 0.7734375, "learning_rate": 0.0001647700662540666, "loss": 0.858, "step": 18042 }, { "epoch": 0.46329364601738665, "grad_norm": 0.94140625, "learning_rate": 0.00016476666493699056, "loss": 0.975, "step": 18043 }, { "epoch": 0.4633193232133085, "grad_norm": 0.75390625, "learning_rate": 0.00016476326349084043, "loss": 0.8761, "step": 18044 }, { "epoch": 0.4633450004092303, "grad_norm": 0.796875, "learning_rate": 0.000164759861915623, "loss": 0.801, "step": 18045 }, { "epoch": 0.4633706776051521, "grad_norm": 0.76953125, "learning_rate": 0.00016475646021134495, "loss": 0.8625, "step": 18046 }, { "epoch": 0.46339635480107394, "grad_norm": 0.71875, "learning_rate": 0.00016475305837801316, "loss": 0.912, "step": 18047 }, { "epoch": 0.4634220319969958, "grad_norm": 0.80078125, "learning_rate": 0.0001647496564156344, "loss": 0.9135, "step": 18048 }, { "epoch": 0.4634477091929176, "grad_norm": 0.7890625, "learning_rate": 0.00016474625432421534, "loss": 0.7802, "step": 18049 }, { "epoch": 0.4634733863888394, "grad_norm": 0.828125, "learning_rate": 0.00016474285210376293, "loss": 0.8749, "step": 18050 }, { "epoch": 0.46349906358476123, "grad_norm": 0.8125, "learning_rate": 0.00016473944975428382, "loss": 0.8955, "step": 18051 }, { "epoch": 0.46352474078068306, "grad_norm": 0.8046875, "learning_rate": 0.00016473604727578484, "loss": 0.863, "step": 18052 }, { "epoch": 0.46355041797660484, "grad_norm": 0.7578125, "learning_rate": 0.0001647326446682728, "loss": 0.8853, "step": 18053 }, { "epoch": 0.4635760951725267, "grad_norm": 0.78515625, "learning_rate": 0.00016472924193175438, "loss": 0.8593, "step": 18054 }, { "epoch": 0.4636017723684485, "grad_norm": 0.81640625, "learning_rate": 0.0001647258390662365, "loss": 0.9053, "step": 18055 }, { "epoch": 0.4636274495643703, "grad_norm": 0.73046875, "learning_rate": 0.0001647224360717258, "loss": 0.8982, "step": 18056 }, { "epoch": 0.46365312676029213, "grad_norm": 0.73828125, "learning_rate": 0.00016471903294822918, "loss": 0.9322, "step": 18057 }, { "epoch": 0.46367880395621397, "grad_norm": 0.73828125, "learning_rate": 0.00016471562969575337, "loss": 1.0279, "step": 18058 }, { "epoch": 0.4637044811521358, "grad_norm": 0.87890625, "learning_rate": 0.00016471222631430515, "loss": 0.9318, "step": 18059 }, { "epoch": 0.4637301583480576, "grad_norm": 0.765625, "learning_rate": 0.00016470882280389127, "loss": 0.8655, "step": 18060 }, { "epoch": 0.4637558355439794, "grad_norm": 0.79296875, "learning_rate": 0.0001647054191645186, "loss": 0.8997, "step": 18061 }, { "epoch": 0.46378151273990126, "grad_norm": 0.80078125, "learning_rate": 0.00016470201539619386, "loss": 0.8834, "step": 18062 }, { "epoch": 0.46380718993582304, "grad_norm": 0.80859375, "learning_rate": 0.00016469861149892383, "loss": 0.9575, "step": 18063 }, { "epoch": 0.4638328671317449, "grad_norm": 0.73046875, "learning_rate": 0.00016469520747271535, "loss": 0.8295, "step": 18064 }, { "epoch": 0.4638585443276667, "grad_norm": 0.6953125, "learning_rate": 0.00016469180331757512, "loss": 0.8422, "step": 18065 }, { "epoch": 0.4638842215235885, "grad_norm": 0.84375, "learning_rate": 0.00016468839903351, "loss": 0.9773, "step": 18066 }, { "epoch": 0.4639098987195103, "grad_norm": 0.81640625, "learning_rate": 0.00016468499462052674, "loss": 0.8739, "step": 18067 }, { "epoch": 0.46393557591543216, "grad_norm": 0.7578125, "learning_rate": 0.00016468159007863213, "loss": 0.7716, "step": 18068 }, { "epoch": 0.463961253111354, "grad_norm": 0.734375, "learning_rate": 0.00016467818540783294, "loss": 0.9163, "step": 18069 }, { "epoch": 0.4639869303072758, "grad_norm": 0.78125, "learning_rate": 0.00016467478060813598, "loss": 0.831, "step": 18070 }, { "epoch": 0.4640126075031976, "grad_norm": 0.71484375, "learning_rate": 0.000164671375679548, "loss": 0.9326, "step": 18071 }, { "epoch": 0.46403828469911945, "grad_norm": 0.78515625, "learning_rate": 0.00016466797062207586, "loss": 0.9521, "step": 18072 }, { "epoch": 0.46406396189504123, "grad_norm": 0.79296875, "learning_rate": 0.00016466456543572626, "loss": 0.9053, "step": 18073 }, { "epoch": 0.46408963909096307, "grad_norm": 0.76953125, "learning_rate": 0.00016466116012050602, "loss": 0.8601, "step": 18074 }, { "epoch": 0.4641153162868849, "grad_norm": 0.7265625, "learning_rate": 0.00016465775467642196, "loss": 0.8376, "step": 18075 }, { "epoch": 0.4641409934828067, "grad_norm": 0.78515625, "learning_rate": 0.0001646543491034808, "loss": 0.9507, "step": 18076 }, { "epoch": 0.4641666706787285, "grad_norm": 0.796875, "learning_rate": 0.0001646509434016894, "loss": 0.8382, "step": 18077 }, { "epoch": 0.46419234787465036, "grad_norm": 0.82421875, "learning_rate": 0.00016464753757105445, "loss": 0.8752, "step": 18078 }, { "epoch": 0.4642180250705722, "grad_norm": 0.74609375, "learning_rate": 0.00016464413161158286, "loss": 0.8721, "step": 18079 }, { "epoch": 0.464243702266494, "grad_norm": 0.765625, "learning_rate": 0.00016464072552328131, "loss": 0.8792, "step": 18080 }, { "epoch": 0.4642693794624158, "grad_norm": 0.7578125, "learning_rate": 0.00016463731930615666, "loss": 0.8639, "step": 18081 }, { "epoch": 0.46429505665833765, "grad_norm": 0.8125, "learning_rate": 0.00016463391296021567, "loss": 0.9279, "step": 18082 }, { "epoch": 0.4643207338542594, "grad_norm": 0.84765625, "learning_rate": 0.00016463050648546514, "loss": 0.9501, "step": 18083 }, { "epoch": 0.46434641105018126, "grad_norm": 0.77734375, "learning_rate": 0.0001646270998819118, "loss": 0.8808, "step": 18084 }, { "epoch": 0.4643720882461031, "grad_norm": 0.7421875, "learning_rate": 0.0001646236931495625, "loss": 0.8428, "step": 18085 }, { "epoch": 0.4643977654420249, "grad_norm": 0.79296875, "learning_rate": 0.00016462028628842407, "loss": 0.9196, "step": 18086 }, { "epoch": 0.4644234426379467, "grad_norm": 0.69140625, "learning_rate": 0.0001646168792985032, "loss": 0.8252, "step": 18087 }, { "epoch": 0.46444911983386855, "grad_norm": 0.76171875, "learning_rate": 0.00016461347217980676, "loss": 1.044, "step": 18088 }, { "epoch": 0.4644747970297904, "grad_norm": 0.734375, "learning_rate": 0.00016461006493234147, "loss": 0.9556, "step": 18089 }, { "epoch": 0.46450047422571217, "grad_norm": 0.79296875, "learning_rate": 0.00016460665755611416, "loss": 0.9427, "step": 18090 }, { "epoch": 0.464526151421634, "grad_norm": 0.84375, "learning_rate": 0.00016460325005113164, "loss": 0.9915, "step": 18091 }, { "epoch": 0.46455182861755584, "grad_norm": 0.734375, "learning_rate": 0.00016459984241740068, "loss": 0.861, "step": 18092 }, { "epoch": 0.4645775058134776, "grad_norm": 0.71484375, "learning_rate": 0.00016459643465492805, "loss": 0.7977, "step": 18093 }, { "epoch": 0.46460318300939946, "grad_norm": 0.7578125, "learning_rate": 0.00016459302676372057, "loss": 0.9747, "step": 18094 }, { "epoch": 0.4646288602053213, "grad_norm": 0.7421875, "learning_rate": 0.00016458961874378503, "loss": 1.0541, "step": 18095 }, { "epoch": 0.4646545374012431, "grad_norm": 0.765625, "learning_rate": 0.0001645862105951282, "loss": 0.8711, "step": 18096 }, { "epoch": 0.4646802145971649, "grad_norm": 0.68359375, "learning_rate": 0.00016458280231775689, "loss": 0.8658, "step": 18097 }, { "epoch": 0.46470589179308674, "grad_norm": 0.75, "learning_rate": 0.0001645793939116779, "loss": 0.7941, "step": 18098 }, { "epoch": 0.4647315689890086, "grad_norm": 0.7890625, "learning_rate": 0.000164575985376898, "loss": 0.8914, "step": 18099 }, { "epoch": 0.46475724618493036, "grad_norm": 0.90625, "learning_rate": 0.000164572576713424, "loss": 0.935, "step": 18100 }, { "epoch": 0.4647829233808522, "grad_norm": 0.78515625, "learning_rate": 0.00016456916792126266, "loss": 1.0561, "step": 18101 }, { "epoch": 0.46480860057677403, "grad_norm": 0.80078125, "learning_rate": 0.00016456575900042082, "loss": 0.8422, "step": 18102 }, { "epoch": 0.4648342777726958, "grad_norm": 0.8515625, "learning_rate": 0.00016456234995090526, "loss": 0.9906, "step": 18103 }, { "epoch": 0.46485995496861765, "grad_norm": 0.9375, "learning_rate": 0.00016455894077272278, "loss": 0.963, "step": 18104 }, { "epoch": 0.4648856321645395, "grad_norm": 0.72265625, "learning_rate": 0.00016455553146588013, "loss": 0.8826, "step": 18105 }, { "epoch": 0.46491130936046127, "grad_norm": 0.73828125, "learning_rate": 0.00016455212203038416, "loss": 0.7902, "step": 18106 }, { "epoch": 0.4649369865563831, "grad_norm": 0.73046875, "learning_rate": 0.00016454871246624164, "loss": 0.9971, "step": 18107 }, { "epoch": 0.46496266375230494, "grad_norm": 0.75, "learning_rate": 0.00016454530277345937, "loss": 0.9435, "step": 18108 }, { "epoch": 0.4649883409482268, "grad_norm": 0.74609375, "learning_rate": 0.00016454189295204413, "loss": 0.8876, "step": 18109 }, { "epoch": 0.46501401814414856, "grad_norm": 0.75, "learning_rate": 0.00016453848300200275, "loss": 0.9845, "step": 18110 }, { "epoch": 0.4650396953400704, "grad_norm": 0.84765625, "learning_rate": 0.00016453507292334198, "loss": 0.9927, "step": 18111 }, { "epoch": 0.4650653725359922, "grad_norm": 0.77734375, "learning_rate": 0.00016453166271606864, "loss": 0.84, "step": 18112 }, { "epoch": 0.465091049731914, "grad_norm": 0.79296875, "learning_rate": 0.00016452825238018952, "loss": 1.0215, "step": 18113 }, { "epoch": 0.46511672692783584, "grad_norm": 0.796875, "learning_rate": 0.00016452484191571145, "loss": 0.7724, "step": 18114 }, { "epoch": 0.4651424041237577, "grad_norm": 0.79296875, "learning_rate": 0.0001645214313226412, "loss": 0.8763, "step": 18115 }, { "epoch": 0.46516808131967946, "grad_norm": 0.80078125, "learning_rate": 0.00016451802060098554, "loss": 0.9301, "step": 18116 }, { "epoch": 0.4651937585156013, "grad_norm": 0.76171875, "learning_rate": 0.0001645146097507513, "loss": 1.0008, "step": 18117 }, { "epoch": 0.46521943571152313, "grad_norm": 0.765625, "learning_rate": 0.00016451119877194524, "loss": 0.8572, "step": 18118 }, { "epoch": 0.46524511290744497, "grad_norm": 0.76171875, "learning_rate": 0.00016450778766457423, "loss": 0.9262, "step": 18119 }, { "epoch": 0.46527079010336675, "grad_norm": 0.7734375, "learning_rate": 0.000164504376428645, "loss": 0.7941, "step": 18120 }, { "epoch": 0.4652964672992886, "grad_norm": 0.83984375, "learning_rate": 0.00016450096506416442, "loss": 0.9856, "step": 18121 }, { "epoch": 0.4653221444952104, "grad_norm": 0.77734375, "learning_rate": 0.0001644975535711392, "loss": 0.9265, "step": 18122 }, { "epoch": 0.4653478216911322, "grad_norm": 0.8046875, "learning_rate": 0.0001644941419495762, "loss": 0.8952, "step": 18123 }, { "epoch": 0.46537349888705404, "grad_norm": 0.734375, "learning_rate": 0.0001644907301994822, "loss": 0.827, "step": 18124 }, { "epoch": 0.4653991760829759, "grad_norm": 0.78515625, "learning_rate": 0.000164487318320864, "loss": 0.8776, "step": 18125 }, { "epoch": 0.46542485327889765, "grad_norm": 0.89453125, "learning_rate": 0.00016448390631372837, "loss": 0.9737, "step": 18126 }, { "epoch": 0.4654505304748195, "grad_norm": 0.7578125, "learning_rate": 0.0001644804941780822, "loss": 0.8338, "step": 18127 }, { "epoch": 0.4654762076707413, "grad_norm": 0.82421875, "learning_rate": 0.00016447708191393218, "loss": 0.9375, "step": 18128 }, { "epoch": 0.46550188486666316, "grad_norm": 0.8203125, "learning_rate": 0.00016447366952128517, "loss": 0.9855, "step": 18129 }, { "epoch": 0.46552756206258494, "grad_norm": 0.796875, "learning_rate": 0.00016447025700014794, "loss": 0.8919, "step": 18130 }, { "epoch": 0.4655532392585068, "grad_norm": 0.83203125, "learning_rate": 0.00016446684435052737, "loss": 0.9401, "step": 18131 }, { "epoch": 0.4655789164544286, "grad_norm": 0.734375, "learning_rate": 0.00016446343157243014, "loss": 0.9231, "step": 18132 }, { "epoch": 0.4656045936503504, "grad_norm": 0.7890625, "learning_rate": 0.00016446001866586313, "loss": 0.8802, "step": 18133 }, { "epoch": 0.46563027084627223, "grad_norm": 0.734375, "learning_rate": 0.00016445660563083316, "loss": 0.9415, "step": 18134 }, { "epoch": 0.46565594804219407, "grad_norm": 0.81640625, "learning_rate": 0.00016445319246734697, "loss": 0.9305, "step": 18135 }, { "epoch": 0.46568162523811585, "grad_norm": 0.796875, "learning_rate": 0.00016444977917541142, "loss": 0.8179, "step": 18136 }, { "epoch": 0.4657073024340377, "grad_norm": 0.80078125, "learning_rate": 0.00016444636575503325, "loss": 0.8899, "step": 18137 }, { "epoch": 0.4657329796299595, "grad_norm": 0.703125, "learning_rate": 0.00016444295220621928, "loss": 0.7807, "step": 18138 }, { "epoch": 0.46575865682588136, "grad_norm": 0.73046875, "learning_rate": 0.00016443953852897635, "loss": 0.8714, "step": 18139 }, { "epoch": 0.46578433402180314, "grad_norm": 0.73828125, "learning_rate": 0.00016443612472331122, "loss": 0.9347, "step": 18140 }, { "epoch": 0.465810011217725, "grad_norm": 0.76953125, "learning_rate": 0.00016443271078923078, "loss": 0.9293, "step": 18141 }, { "epoch": 0.4658356884136468, "grad_norm": 0.77734375, "learning_rate": 0.0001644292967267417, "loss": 0.9679, "step": 18142 }, { "epoch": 0.4658613656095686, "grad_norm": 0.734375, "learning_rate": 0.00016442588253585088, "loss": 0.8686, "step": 18143 }, { "epoch": 0.4658870428054904, "grad_norm": 0.6875, "learning_rate": 0.00016442246821656505, "loss": 0.7812, "step": 18144 }, { "epoch": 0.46591272000141226, "grad_norm": 0.7734375, "learning_rate": 0.00016441905376889113, "loss": 0.8333, "step": 18145 }, { "epoch": 0.46593839719733404, "grad_norm": 0.73046875, "learning_rate": 0.0001644156391928358, "loss": 0.9852, "step": 18146 }, { "epoch": 0.4659640743932559, "grad_norm": 0.84375, "learning_rate": 0.00016441222448840596, "loss": 0.8879, "step": 18147 }, { "epoch": 0.4659897515891777, "grad_norm": 0.76953125, "learning_rate": 0.00016440880965560838, "loss": 0.9062, "step": 18148 }, { "epoch": 0.46601542878509955, "grad_norm": 0.796875, "learning_rate": 0.0001644053946944498, "loss": 0.9784, "step": 18149 }, { "epoch": 0.46604110598102133, "grad_norm": 0.7734375, "learning_rate": 0.00016440197960493713, "loss": 0.9427, "step": 18150 }, { "epoch": 0.46606678317694317, "grad_norm": 0.75, "learning_rate": 0.00016439856438707712, "loss": 0.8271, "step": 18151 }, { "epoch": 0.466092460372865, "grad_norm": 0.73828125, "learning_rate": 0.0001643951490408766, "loss": 0.7981, "step": 18152 }, { "epoch": 0.4661181375687868, "grad_norm": 0.80078125, "learning_rate": 0.00016439173356634233, "loss": 0.8732, "step": 18153 }, { "epoch": 0.4661438147647086, "grad_norm": 0.71875, "learning_rate": 0.0001643883179634812, "loss": 0.8141, "step": 18154 }, { "epoch": 0.46616949196063046, "grad_norm": 0.73828125, "learning_rate": 0.00016438490223229997, "loss": 0.8164, "step": 18155 }, { "epoch": 0.46619516915655224, "grad_norm": 0.81640625, "learning_rate": 0.0001643814863728054, "loss": 0.8745, "step": 18156 }, { "epoch": 0.46622084635247407, "grad_norm": 0.76171875, "learning_rate": 0.00016437807038500437, "loss": 0.7466, "step": 18157 }, { "epoch": 0.4662465235483959, "grad_norm": 0.73828125, "learning_rate": 0.00016437465426890366, "loss": 0.8393, "step": 18158 }, { "epoch": 0.46627220074431774, "grad_norm": 0.875, "learning_rate": 0.00016437123802451006, "loss": 0.9008, "step": 18159 }, { "epoch": 0.4662978779402395, "grad_norm": 0.79296875, "learning_rate": 0.00016436782165183046, "loss": 0.8108, "step": 18160 }, { "epoch": 0.46632355513616136, "grad_norm": 0.79296875, "learning_rate": 0.00016436440515087155, "loss": 0.8179, "step": 18161 }, { "epoch": 0.4663492323320832, "grad_norm": 0.84765625, "learning_rate": 0.0001643609885216402, "loss": 1.0766, "step": 18162 }, { "epoch": 0.466374909528005, "grad_norm": 0.83203125, "learning_rate": 0.0001643575717641432, "loss": 0.8833, "step": 18163 }, { "epoch": 0.4664005867239268, "grad_norm": 0.80078125, "learning_rate": 0.0001643541548783874, "loss": 1.0352, "step": 18164 }, { "epoch": 0.46642626391984865, "grad_norm": 0.765625, "learning_rate": 0.00016435073786437955, "loss": 0.9079, "step": 18165 }, { "epoch": 0.46645194111577043, "grad_norm": 0.765625, "learning_rate": 0.00016434732072212653, "loss": 1.0371, "step": 18166 }, { "epoch": 0.46647761831169227, "grad_norm": 0.74609375, "learning_rate": 0.0001643439034516351, "loss": 0.79, "step": 18167 }, { "epoch": 0.4665032955076141, "grad_norm": 0.7578125, "learning_rate": 0.00016434048605291207, "loss": 1.0019, "step": 18168 }, { "epoch": 0.46652897270353594, "grad_norm": 0.81640625, "learning_rate": 0.00016433706852596428, "loss": 0.9138, "step": 18169 }, { "epoch": 0.4665546498994577, "grad_norm": 0.74609375, "learning_rate": 0.00016433365087079851, "loss": 0.8635, "step": 18170 }, { "epoch": 0.46658032709537955, "grad_norm": 0.7265625, "learning_rate": 0.00016433023308742162, "loss": 0.8239, "step": 18171 }, { "epoch": 0.4666060042913014, "grad_norm": 0.71875, "learning_rate": 0.00016432681517584033, "loss": 0.8674, "step": 18172 }, { "epoch": 0.46663168148722317, "grad_norm": 0.8515625, "learning_rate": 0.00016432339713606155, "loss": 0.8205, "step": 18173 }, { "epoch": 0.466657358683145, "grad_norm": 0.74609375, "learning_rate": 0.00016431997896809204, "loss": 0.8345, "step": 18174 }, { "epoch": 0.46668303587906684, "grad_norm": 0.80078125, "learning_rate": 0.00016431656067193864, "loss": 0.9925, "step": 18175 }, { "epoch": 0.4667087130749886, "grad_norm": 0.7578125, "learning_rate": 0.0001643131422476081, "loss": 0.8824, "step": 18176 }, { "epoch": 0.46673439027091046, "grad_norm": 0.74609375, "learning_rate": 0.0001643097236951073, "loss": 0.9236, "step": 18177 }, { "epoch": 0.4667600674668323, "grad_norm": 0.796875, "learning_rate": 0.00016430630501444303, "loss": 0.8989, "step": 18178 }, { "epoch": 0.4667857446627541, "grad_norm": 0.72265625, "learning_rate": 0.0001643028862056221, "loss": 0.8555, "step": 18179 }, { "epoch": 0.4668114218586759, "grad_norm": 0.72265625, "learning_rate": 0.00016429946726865136, "loss": 0.9241, "step": 18180 }, { "epoch": 0.46683709905459775, "grad_norm": 0.77734375, "learning_rate": 0.00016429604820353754, "loss": 0.8066, "step": 18181 }, { "epoch": 0.4668627762505196, "grad_norm": 0.7734375, "learning_rate": 0.00016429262901028753, "loss": 0.944, "step": 18182 }, { "epoch": 0.46688845344644136, "grad_norm": 0.77734375, "learning_rate": 0.0001642892096889081, "loss": 0.9775, "step": 18183 }, { "epoch": 0.4669141306423632, "grad_norm": 0.80859375, "learning_rate": 0.0001642857902394061, "loss": 0.9306, "step": 18184 }, { "epoch": 0.46693980783828504, "grad_norm": 0.7109375, "learning_rate": 0.00016428237066178831, "loss": 0.9415, "step": 18185 }, { "epoch": 0.4669654850342068, "grad_norm": 0.72265625, "learning_rate": 0.00016427895095606155, "loss": 0.8781, "step": 18186 }, { "epoch": 0.46699116223012865, "grad_norm": 0.7578125, "learning_rate": 0.0001642755311222327, "loss": 1.0455, "step": 18187 }, { "epoch": 0.4670168394260505, "grad_norm": 0.8203125, "learning_rate": 0.0001642721111603085, "loss": 0.9122, "step": 18188 }, { "epoch": 0.46704251662197227, "grad_norm": 0.765625, "learning_rate": 0.00016426869107029576, "loss": 0.9074, "step": 18189 }, { "epoch": 0.4670681938178941, "grad_norm": 0.7578125, "learning_rate": 0.00016426527085220134, "loss": 0.8557, "step": 18190 }, { "epoch": 0.46709387101381594, "grad_norm": 0.73828125, "learning_rate": 0.00016426185050603205, "loss": 0.8529, "step": 18191 }, { "epoch": 0.4671195482097378, "grad_norm": 0.80078125, "learning_rate": 0.00016425843003179468, "loss": 0.9157, "step": 18192 }, { "epoch": 0.46714522540565956, "grad_norm": 0.8046875, "learning_rate": 0.0001642550094294961, "loss": 0.9725, "step": 18193 }, { "epoch": 0.4671709026015814, "grad_norm": 0.7265625, "learning_rate": 0.00016425158869914306, "loss": 0.9036, "step": 18194 }, { "epoch": 0.46719657979750323, "grad_norm": 0.828125, "learning_rate": 0.00016424816784074242, "loss": 1.0405, "step": 18195 }, { "epoch": 0.467222256993425, "grad_norm": 0.7578125, "learning_rate": 0.00016424474685430095, "loss": 0.8863, "step": 18196 }, { "epoch": 0.46724793418934685, "grad_norm": 0.8046875, "learning_rate": 0.00016424132573982552, "loss": 0.8502, "step": 18197 }, { "epoch": 0.4672736113852687, "grad_norm": 0.72265625, "learning_rate": 0.00016423790449732295, "loss": 0.7584, "step": 18198 }, { "epoch": 0.46729928858119046, "grad_norm": 0.73046875, "learning_rate": 0.00016423448312680004, "loss": 0.8713, "step": 18199 }, { "epoch": 0.4673249657771123, "grad_norm": 0.72265625, "learning_rate": 0.0001642310616282636, "loss": 0.8802, "step": 18200 }, { "epoch": 0.46735064297303414, "grad_norm": 0.77734375, "learning_rate": 0.00016422764000172043, "loss": 0.815, "step": 18201 }, { "epoch": 0.46737632016895597, "grad_norm": 0.8359375, "learning_rate": 0.00016422421824717742, "loss": 0.841, "step": 18202 }, { "epoch": 0.46740199736487775, "grad_norm": 0.703125, "learning_rate": 0.0001642207963646413, "loss": 0.8054, "step": 18203 }, { "epoch": 0.4674276745607996, "grad_norm": 0.7890625, "learning_rate": 0.00016421737435411897, "loss": 0.9657, "step": 18204 }, { "epoch": 0.4674533517567214, "grad_norm": 0.765625, "learning_rate": 0.0001642139522156172, "loss": 0.9076, "step": 18205 }, { "epoch": 0.4674790289526432, "grad_norm": 0.76953125, "learning_rate": 0.0001642105299491428, "loss": 0.9166, "step": 18206 }, { "epoch": 0.46750470614856504, "grad_norm": 0.828125, "learning_rate": 0.00016420710755470265, "loss": 0.8625, "step": 18207 }, { "epoch": 0.4675303833444869, "grad_norm": 0.7265625, "learning_rate": 0.00016420368503230353, "loss": 0.8904, "step": 18208 }, { "epoch": 0.46755606054040866, "grad_norm": 0.74609375, "learning_rate": 0.00016420026238195225, "loss": 0.8691, "step": 18209 }, { "epoch": 0.4675817377363305, "grad_norm": 0.8203125, "learning_rate": 0.00016419683960365564, "loss": 0.9276, "step": 18210 }, { "epoch": 0.46760741493225233, "grad_norm": 0.7421875, "learning_rate": 0.00016419341669742058, "loss": 0.8331, "step": 18211 }, { "epoch": 0.46763309212817417, "grad_norm": 0.76953125, "learning_rate": 0.00016418999366325375, "loss": 1.005, "step": 18212 }, { "epoch": 0.46765876932409595, "grad_norm": 0.82421875, "learning_rate": 0.00016418657050116213, "loss": 0.9696, "step": 18213 }, { "epoch": 0.4676844465200178, "grad_norm": 0.7734375, "learning_rate": 0.00016418314721115243, "loss": 1.0013, "step": 18214 }, { "epoch": 0.4677101237159396, "grad_norm": 0.75, "learning_rate": 0.00016417972379323157, "loss": 0.7605, "step": 18215 }, { "epoch": 0.4677358009118614, "grad_norm": 0.7109375, "learning_rate": 0.0001641763002474063, "loss": 0.8741, "step": 18216 }, { "epoch": 0.46776147810778324, "grad_norm": 0.90234375, "learning_rate": 0.0001641728765736834, "loss": 1.0394, "step": 18217 }, { "epoch": 0.46778715530370507, "grad_norm": 0.85546875, "learning_rate": 0.00016416945277206982, "loss": 0.7781, "step": 18218 }, { "epoch": 0.46781283249962685, "grad_norm": 0.8046875, "learning_rate": 0.0001641660288425723, "loss": 0.8631, "step": 18219 }, { "epoch": 0.4678385096955487, "grad_norm": 0.79296875, "learning_rate": 0.0001641626047851977, "loss": 0.8873, "step": 18220 }, { "epoch": 0.4678641868914705, "grad_norm": 0.765625, "learning_rate": 0.00016415918059995278, "loss": 0.8804, "step": 18221 }, { "epoch": 0.46788986408739236, "grad_norm": 0.7734375, "learning_rate": 0.00016415575628684443, "loss": 0.8301, "step": 18222 }, { "epoch": 0.46791554128331414, "grad_norm": 0.7734375, "learning_rate": 0.00016415233184587945, "loss": 0.8839, "step": 18223 }, { "epoch": 0.467941218479236, "grad_norm": 0.78125, "learning_rate": 0.0001641489072770647, "loss": 0.893, "step": 18224 }, { "epoch": 0.4679668956751578, "grad_norm": 0.80078125, "learning_rate": 0.0001641454825804069, "loss": 0.9018, "step": 18225 }, { "epoch": 0.4679925728710796, "grad_norm": 0.8125, "learning_rate": 0.00016414205775591302, "loss": 0.9606, "step": 18226 }, { "epoch": 0.46801825006700143, "grad_norm": 0.72265625, "learning_rate": 0.00016413863280358978, "loss": 0.8307, "step": 18227 }, { "epoch": 0.46804392726292326, "grad_norm": 0.7890625, "learning_rate": 0.00016413520772344406, "loss": 0.9598, "step": 18228 }, { "epoch": 0.46806960445884505, "grad_norm": 0.7734375, "learning_rate": 0.00016413178251548262, "loss": 0.8839, "step": 18229 }, { "epoch": 0.4680952816547669, "grad_norm": 0.87109375, "learning_rate": 0.00016412835717971236, "loss": 1.0321, "step": 18230 }, { "epoch": 0.4681209588506887, "grad_norm": 0.8203125, "learning_rate": 0.0001641249317161401, "loss": 0.8081, "step": 18231 }, { "epoch": 0.46814663604661055, "grad_norm": 0.75, "learning_rate": 0.00016412150612477258, "loss": 0.8285, "step": 18232 }, { "epoch": 0.46817231324253233, "grad_norm": 0.8203125, "learning_rate": 0.00016411808040561675, "loss": 0.9497, "step": 18233 }, { "epoch": 0.46819799043845417, "grad_norm": 0.74609375, "learning_rate": 0.00016411465455867935, "loss": 0.8211, "step": 18234 }, { "epoch": 0.468223667634376, "grad_norm": 0.83203125, "learning_rate": 0.00016411122858396723, "loss": 1.0052, "step": 18235 }, { "epoch": 0.4682493448302978, "grad_norm": 0.7734375, "learning_rate": 0.00016410780248148725, "loss": 0.9082, "step": 18236 }, { "epoch": 0.4682750220262196, "grad_norm": 0.7734375, "learning_rate": 0.00016410437625124618, "loss": 0.8063, "step": 18237 }, { "epoch": 0.46830069922214146, "grad_norm": 0.77734375, "learning_rate": 0.0001641009498932509, "loss": 0.8908, "step": 18238 }, { "epoch": 0.46832637641806324, "grad_norm": 0.80859375, "learning_rate": 0.0001640975234075082, "loss": 0.8169, "step": 18239 }, { "epoch": 0.4683520536139851, "grad_norm": 0.796875, "learning_rate": 0.00016409409679402493, "loss": 0.8806, "step": 18240 }, { "epoch": 0.4683777308099069, "grad_norm": 0.7578125, "learning_rate": 0.00016409067005280793, "loss": 0.93, "step": 18241 }, { "epoch": 0.46840340800582875, "grad_norm": 0.84375, "learning_rate": 0.00016408724318386399, "loss": 0.9285, "step": 18242 }, { "epoch": 0.46842908520175053, "grad_norm": 0.70703125, "learning_rate": 0.0001640838161872, "loss": 0.9835, "step": 18243 }, { "epoch": 0.46845476239767236, "grad_norm": 0.78515625, "learning_rate": 0.00016408038906282272, "loss": 0.956, "step": 18244 }, { "epoch": 0.4684804395935942, "grad_norm": 0.73828125, "learning_rate": 0.00016407696181073905, "loss": 0.8501, "step": 18245 }, { "epoch": 0.468506116789516, "grad_norm": 0.8125, "learning_rate": 0.00016407353443095575, "loss": 0.971, "step": 18246 }, { "epoch": 0.4685317939854378, "grad_norm": 0.765625, "learning_rate": 0.00016407010692347967, "loss": 0.8253, "step": 18247 }, { "epoch": 0.46855747118135965, "grad_norm": 0.82421875, "learning_rate": 0.0001640666792883177, "loss": 1.0228, "step": 18248 }, { "epoch": 0.46858314837728143, "grad_norm": 0.8125, "learning_rate": 0.00016406325152547657, "loss": 0.9217, "step": 18249 }, { "epoch": 0.46860882557320327, "grad_norm": 0.8203125, "learning_rate": 0.0001640598236349632, "loss": 0.9825, "step": 18250 }, { "epoch": 0.4686345027691251, "grad_norm": 0.80078125, "learning_rate": 0.00016405639561678438, "loss": 0.8162, "step": 18251 }, { "epoch": 0.46866017996504694, "grad_norm": 0.7109375, "learning_rate": 0.00016405296747094695, "loss": 0.8445, "step": 18252 }, { "epoch": 0.4686858571609687, "grad_norm": 0.8046875, "learning_rate": 0.00016404953919745775, "loss": 1.0191, "step": 18253 }, { "epoch": 0.46871153435689056, "grad_norm": 0.8046875, "learning_rate": 0.00016404611079632358, "loss": 0.9024, "step": 18254 }, { "epoch": 0.4687372115528124, "grad_norm": 0.75, "learning_rate": 0.00016404268226755133, "loss": 0.8574, "step": 18255 }, { "epoch": 0.4687628887487342, "grad_norm": 0.78125, "learning_rate": 0.00016403925361114778, "loss": 0.9753, "step": 18256 }, { "epoch": 0.468788565944656, "grad_norm": 0.7890625, "learning_rate": 0.00016403582482711978, "loss": 0.8604, "step": 18257 }, { "epoch": 0.46881424314057785, "grad_norm": 0.8046875, "learning_rate": 0.0001640323959154742, "loss": 0.8164, "step": 18258 }, { "epoch": 0.4688399203364996, "grad_norm": 0.8203125, "learning_rate": 0.0001640289668762178, "loss": 0.778, "step": 18259 }, { "epoch": 0.46886559753242146, "grad_norm": 0.80078125, "learning_rate": 0.0001640255377093575, "loss": 0.961, "step": 18260 }, { "epoch": 0.4688912747283433, "grad_norm": 0.73046875, "learning_rate": 0.00016402210841490002, "loss": 0.8869, "step": 18261 }, { "epoch": 0.46891695192426514, "grad_norm": 0.7734375, "learning_rate": 0.0001640186789928523, "loss": 0.9184, "step": 18262 }, { "epoch": 0.4689426291201869, "grad_norm": 0.8203125, "learning_rate": 0.00016401524944322112, "loss": 0.8196, "step": 18263 }, { "epoch": 0.46896830631610875, "grad_norm": 0.78515625, "learning_rate": 0.00016401181976601333, "loss": 0.8737, "step": 18264 }, { "epoch": 0.4689939835120306, "grad_norm": 0.8828125, "learning_rate": 0.00016400838996123577, "loss": 1.0503, "step": 18265 }, { "epoch": 0.46901966070795237, "grad_norm": 0.71484375, "learning_rate": 0.00016400496002889529, "loss": 0.9362, "step": 18266 }, { "epoch": 0.4690453379038742, "grad_norm": 0.8046875, "learning_rate": 0.00016400152996899866, "loss": 0.9589, "step": 18267 }, { "epoch": 0.46907101509979604, "grad_norm": 0.8203125, "learning_rate": 0.0001639980997815528, "loss": 1.0345, "step": 18268 }, { "epoch": 0.4690966922957178, "grad_norm": 0.78125, "learning_rate": 0.00016399466946656453, "loss": 1.004, "step": 18269 }, { "epoch": 0.46912236949163966, "grad_norm": 0.828125, "learning_rate": 0.0001639912390240406, "loss": 0.935, "step": 18270 }, { "epoch": 0.4691480466875615, "grad_norm": 0.8203125, "learning_rate": 0.00016398780845398796, "loss": 1.0321, "step": 18271 }, { "epoch": 0.46917372388348333, "grad_norm": 0.74609375, "learning_rate": 0.0001639843777564134, "loss": 0.8497, "step": 18272 }, { "epoch": 0.4691994010794051, "grad_norm": 0.7578125, "learning_rate": 0.00016398094693132373, "loss": 0.8701, "step": 18273 }, { "epoch": 0.46922507827532695, "grad_norm": 0.73828125, "learning_rate": 0.0001639775159787258, "loss": 0.9171, "step": 18274 }, { "epoch": 0.4692507554712488, "grad_norm": 0.86328125, "learning_rate": 0.00016397408489862649, "loss": 1.0069, "step": 18275 }, { "epoch": 0.46927643266717056, "grad_norm": 0.76953125, "learning_rate": 0.00016397065369103258, "loss": 0.9305, "step": 18276 }, { "epoch": 0.4693021098630924, "grad_norm": 0.76171875, "learning_rate": 0.00016396722235595098, "loss": 0.8711, "step": 18277 }, { "epoch": 0.46932778705901423, "grad_norm": 0.7578125, "learning_rate": 0.00016396379089338845, "loss": 0.8361, "step": 18278 }, { "epoch": 0.469353464254936, "grad_norm": 0.73828125, "learning_rate": 0.00016396035930335188, "loss": 0.8897, "step": 18279 }, { "epoch": 0.46937914145085785, "grad_norm": 0.77734375, "learning_rate": 0.0001639569275858481, "loss": 0.8794, "step": 18280 }, { "epoch": 0.4694048186467797, "grad_norm": 0.74609375, "learning_rate": 0.0001639534957408839, "loss": 0.999, "step": 18281 }, { "epoch": 0.4694304958427015, "grad_norm": 0.75, "learning_rate": 0.00016395006376846618, "loss": 0.7923, "step": 18282 }, { "epoch": 0.4694561730386233, "grad_norm": 0.734375, "learning_rate": 0.00016394663166860175, "loss": 0.9148, "step": 18283 }, { "epoch": 0.46948185023454514, "grad_norm": 0.75390625, "learning_rate": 0.0001639431994412975, "loss": 0.9605, "step": 18284 }, { "epoch": 0.469507527430467, "grad_norm": 0.7734375, "learning_rate": 0.0001639397670865602, "loss": 0.8864, "step": 18285 }, { "epoch": 0.46953320462638876, "grad_norm": 0.87109375, "learning_rate": 0.0001639363346043967, "loss": 0.9418, "step": 18286 }, { "epoch": 0.4695588818223106, "grad_norm": 0.76953125, "learning_rate": 0.00016393290199481387, "loss": 0.9065, "step": 18287 }, { "epoch": 0.46958455901823243, "grad_norm": 0.71875, "learning_rate": 0.00016392946925781855, "loss": 0.7435, "step": 18288 }, { "epoch": 0.4696102362141542, "grad_norm": 0.734375, "learning_rate": 0.0001639260363934176, "loss": 0.9289, "step": 18289 }, { "epoch": 0.46963591341007604, "grad_norm": 0.76171875, "learning_rate": 0.00016392260340161778, "loss": 0.846, "step": 18290 }, { "epoch": 0.4696615906059979, "grad_norm": 0.76171875, "learning_rate": 0.00016391917028242602, "loss": 1.009, "step": 18291 }, { "epoch": 0.4696872678019197, "grad_norm": 0.79296875, "learning_rate": 0.00016391573703584912, "loss": 0.8968, "step": 18292 }, { "epoch": 0.4697129449978415, "grad_norm": 0.796875, "learning_rate": 0.00016391230366189394, "loss": 0.8347, "step": 18293 }, { "epoch": 0.46973862219376333, "grad_norm": 0.75390625, "learning_rate": 0.0001639088701605673, "loss": 0.8078, "step": 18294 }, { "epoch": 0.46976429938968517, "grad_norm": 0.828125, "learning_rate": 0.00016390543653187605, "loss": 0.9267, "step": 18295 }, { "epoch": 0.46978997658560695, "grad_norm": 0.75, "learning_rate": 0.00016390200277582704, "loss": 0.9449, "step": 18296 }, { "epoch": 0.4698156537815288, "grad_norm": 0.78125, "learning_rate": 0.0001638985688924271, "loss": 0.9881, "step": 18297 }, { "epoch": 0.4698413309774506, "grad_norm": 0.7578125, "learning_rate": 0.00016389513488168314, "loss": 0.8673, "step": 18298 }, { "epoch": 0.4698670081733724, "grad_norm": 0.79296875, "learning_rate": 0.0001638917007436019, "loss": 0.9854, "step": 18299 }, { "epoch": 0.46989268536929424, "grad_norm": 0.76953125, "learning_rate": 0.00016388826647819026, "loss": 0.8956, "step": 18300 }, { "epoch": 0.4699183625652161, "grad_norm": 0.6953125, "learning_rate": 0.0001638848320854551, "loss": 0.906, "step": 18301 }, { "epoch": 0.4699440397611379, "grad_norm": 0.91015625, "learning_rate": 0.00016388139756540323, "loss": 0.8845, "step": 18302 }, { "epoch": 0.4699697169570597, "grad_norm": 0.7421875, "learning_rate": 0.00016387796291804152, "loss": 0.8985, "step": 18303 }, { "epoch": 0.4699953941529815, "grad_norm": 0.76171875, "learning_rate": 0.0001638745281433768, "loss": 0.9938, "step": 18304 }, { "epoch": 0.47002107134890336, "grad_norm": 0.80859375, "learning_rate": 0.0001638710932414159, "loss": 0.9424, "step": 18305 }, { "epoch": 0.47004674854482514, "grad_norm": 0.80859375, "learning_rate": 0.0001638676582121657, "loss": 1.0574, "step": 18306 }, { "epoch": 0.470072425740747, "grad_norm": 0.80078125, "learning_rate": 0.000163864223055633, "loss": 0.8722, "step": 18307 }, { "epoch": 0.4700981029366688, "grad_norm": 0.7421875, "learning_rate": 0.00016386078777182468, "loss": 0.8121, "step": 18308 }, { "epoch": 0.4701237801325906, "grad_norm": 0.76953125, "learning_rate": 0.00016385735236074757, "loss": 0.9737, "step": 18309 }, { "epoch": 0.47014945732851243, "grad_norm": 0.828125, "learning_rate": 0.00016385391682240854, "loss": 0.9401, "step": 18310 }, { "epoch": 0.47017513452443427, "grad_norm": 0.7890625, "learning_rate": 0.0001638504811568144, "loss": 0.9305, "step": 18311 }, { "epoch": 0.4702008117203561, "grad_norm": 0.81640625, "learning_rate": 0.00016384704536397205, "loss": 0.792, "step": 18312 }, { "epoch": 0.4702264889162779, "grad_norm": 0.79296875, "learning_rate": 0.0001638436094438883, "loss": 0.9786, "step": 18313 }, { "epoch": 0.4702521661121997, "grad_norm": 0.74609375, "learning_rate": 0.00016384017339657, "loss": 0.9694, "step": 18314 }, { "epoch": 0.47027784330812156, "grad_norm": 0.796875, "learning_rate": 0.00016383673722202398, "loss": 0.9182, "step": 18315 }, { "epoch": 0.47030352050404334, "grad_norm": 0.8671875, "learning_rate": 0.00016383330092025714, "loss": 0.9574, "step": 18316 }, { "epoch": 0.4703291976999652, "grad_norm": 0.7734375, "learning_rate": 0.00016382986449127627, "loss": 0.9159, "step": 18317 }, { "epoch": 0.470354874895887, "grad_norm": 0.796875, "learning_rate": 0.0001638264279350883, "loss": 0.8755, "step": 18318 }, { "epoch": 0.4703805520918088, "grad_norm": 0.76171875, "learning_rate": 0.00016382299125169996, "loss": 0.9326, "step": 18319 }, { "epoch": 0.4704062292877306, "grad_norm": 0.734375, "learning_rate": 0.0001638195544411182, "loss": 1.0579, "step": 18320 }, { "epoch": 0.47043190648365246, "grad_norm": 0.73828125, "learning_rate": 0.00016381611750334983, "loss": 0.879, "step": 18321 }, { "epoch": 0.4704575836795743, "grad_norm": 0.7734375, "learning_rate": 0.00016381268043840166, "loss": 0.9924, "step": 18322 }, { "epoch": 0.4704832608754961, "grad_norm": 0.7421875, "learning_rate": 0.00016380924324628065, "loss": 0.8795, "step": 18323 }, { "epoch": 0.4705089380714179, "grad_norm": 0.734375, "learning_rate": 0.00016380580592699354, "loss": 0.8242, "step": 18324 }, { "epoch": 0.47053461526733975, "grad_norm": 0.76171875, "learning_rate": 0.00016380236848054724, "loss": 0.88, "step": 18325 }, { "epoch": 0.47056029246326153, "grad_norm": 0.7890625, "learning_rate": 0.00016379893090694858, "loss": 0.9188, "step": 18326 }, { "epoch": 0.47058596965918337, "grad_norm": 0.765625, "learning_rate": 0.00016379549320620437, "loss": 0.8856, "step": 18327 }, { "epoch": 0.4706116468551052, "grad_norm": 0.8125, "learning_rate": 0.00016379205537832156, "loss": 0.9191, "step": 18328 }, { "epoch": 0.470637324051027, "grad_norm": 0.7578125, "learning_rate": 0.00016378861742330692, "loss": 0.8685, "step": 18329 }, { "epoch": 0.4706630012469488, "grad_norm": 0.8046875, "learning_rate": 0.00016378517934116733, "loss": 0.9791, "step": 18330 }, { "epoch": 0.47068867844287066, "grad_norm": 0.72265625, "learning_rate": 0.00016378174113190966, "loss": 0.7475, "step": 18331 }, { "epoch": 0.4707143556387925, "grad_norm": 0.73828125, "learning_rate": 0.0001637783027955407, "loss": 0.7995, "step": 18332 }, { "epoch": 0.4707400328347143, "grad_norm": 0.76953125, "learning_rate": 0.0001637748643320674, "loss": 0.9923, "step": 18333 }, { "epoch": 0.4707657100306361, "grad_norm": 0.765625, "learning_rate": 0.00016377142574149653, "loss": 0.7963, "step": 18334 }, { "epoch": 0.47079138722655794, "grad_norm": 0.78125, "learning_rate": 0.00016376798702383494, "loss": 0.8337, "step": 18335 }, { "epoch": 0.4708170644224797, "grad_norm": 0.80078125, "learning_rate": 0.00016376454817908955, "loss": 0.8614, "step": 18336 }, { "epoch": 0.47084274161840156, "grad_norm": 0.79296875, "learning_rate": 0.00016376110920726715, "loss": 0.8244, "step": 18337 }, { "epoch": 0.4708684188143234, "grad_norm": 0.76171875, "learning_rate": 0.00016375767010837463, "loss": 0.8188, "step": 18338 }, { "epoch": 0.4708940960102452, "grad_norm": 0.765625, "learning_rate": 0.00016375423088241885, "loss": 0.8874, "step": 18339 }, { "epoch": 0.470919773206167, "grad_norm": 0.80859375, "learning_rate": 0.00016375079152940663, "loss": 0.989, "step": 18340 }, { "epoch": 0.47094545040208885, "grad_norm": 0.7421875, "learning_rate": 0.00016374735204934484, "loss": 0.9742, "step": 18341 }, { "epoch": 0.4709711275980107, "grad_norm": 0.7265625, "learning_rate": 0.00016374391244224033, "loss": 0.8973, "step": 18342 }, { "epoch": 0.47099680479393247, "grad_norm": 0.7421875, "learning_rate": 0.00016374047270809996, "loss": 0.8426, "step": 18343 }, { "epoch": 0.4710224819898543, "grad_norm": 0.76171875, "learning_rate": 0.00016373703284693061, "loss": 0.7794, "step": 18344 }, { "epoch": 0.47104815918577614, "grad_norm": 0.77734375, "learning_rate": 0.00016373359285873907, "loss": 0.8141, "step": 18345 }, { "epoch": 0.4710738363816979, "grad_norm": 0.8359375, "learning_rate": 0.00016373015274353227, "loss": 1.053, "step": 18346 }, { "epoch": 0.47109951357761976, "grad_norm": 0.77734375, "learning_rate": 0.000163726712501317, "loss": 0.9716, "step": 18347 }, { "epoch": 0.4711251907735416, "grad_norm": 0.78125, "learning_rate": 0.0001637232721321002, "loss": 0.9685, "step": 18348 }, { "epoch": 0.47115086796946337, "grad_norm": 0.734375, "learning_rate": 0.00016371983163588865, "loss": 0.891, "step": 18349 }, { "epoch": 0.4711765451653852, "grad_norm": 0.8046875, "learning_rate": 0.00016371639101268922, "loss": 0.8149, "step": 18350 }, { "epoch": 0.47120222236130704, "grad_norm": 0.79296875, "learning_rate": 0.00016371295026250876, "loss": 0.8886, "step": 18351 }, { "epoch": 0.4712278995572289, "grad_norm": 0.8046875, "learning_rate": 0.00016370950938535418, "loss": 0.815, "step": 18352 }, { "epoch": 0.47125357675315066, "grad_norm": 0.796875, "learning_rate": 0.0001637060683812323, "loss": 1.0043, "step": 18353 }, { "epoch": 0.4712792539490725, "grad_norm": 0.71875, "learning_rate": 0.00016370262725014999, "loss": 0.8389, "step": 18354 }, { "epoch": 0.47130493114499433, "grad_norm": 0.6875, "learning_rate": 0.00016369918599211408, "loss": 0.8642, "step": 18355 }, { "epoch": 0.4713306083409161, "grad_norm": 0.66796875, "learning_rate": 0.00016369574460713143, "loss": 0.9084, "step": 18356 }, { "epoch": 0.47135628553683795, "grad_norm": 0.72265625, "learning_rate": 0.00016369230309520893, "loss": 0.8801, "step": 18357 }, { "epoch": 0.4713819627327598, "grad_norm": 0.7421875, "learning_rate": 0.00016368886145635343, "loss": 0.9022, "step": 18358 }, { "epoch": 0.47140763992868157, "grad_norm": 0.76171875, "learning_rate": 0.0001636854196905718, "loss": 0.954, "step": 18359 }, { "epoch": 0.4714333171246034, "grad_norm": 0.80859375, "learning_rate": 0.00016368197779787082, "loss": 0.9002, "step": 18360 }, { "epoch": 0.47145899432052524, "grad_norm": 0.765625, "learning_rate": 0.00016367853577825748, "loss": 0.9673, "step": 18361 }, { "epoch": 0.4714846715164471, "grad_norm": 0.765625, "learning_rate": 0.00016367509363173852, "loss": 0.9562, "step": 18362 }, { "epoch": 0.47151034871236885, "grad_norm": 0.7578125, "learning_rate": 0.00016367165135832088, "loss": 0.9532, "step": 18363 }, { "epoch": 0.4715360259082907, "grad_norm": 0.8359375, "learning_rate": 0.00016366820895801138, "loss": 0.864, "step": 18364 }, { "epoch": 0.4715617031042125, "grad_norm": 0.74609375, "learning_rate": 0.0001636647664308169, "loss": 0.9061, "step": 18365 }, { "epoch": 0.4715873803001343, "grad_norm": 0.7734375, "learning_rate": 0.00016366132377674427, "loss": 0.9073, "step": 18366 }, { "epoch": 0.47161305749605614, "grad_norm": 0.8203125, "learning_rate": 0.00016365788099580037, "loss": 0.9307, "step": 18367 }, { "epoch": 0.471638734691978, "grad_norm": 0.9375, "learning_rate": 0.00016365443808799207, "loss": 0.9507, "step": 18368 }, { "epoch": 0.47166441188789976, "grad_norm": 0.765625, "learning_rate": 0.00016365099505332627, "loss": 0.9623, "step": 18369 }, { "epoch": 0.4716900890838216, "grad_norm": 0.6953125, "learning_rate": 0.00016364755189180973, "loss": 0.9136, "step": 18370 }, { "epoch": 0.47171576627974343, "grad_norm": 0.7421875, "learning_rate": 0.00016364410860344937, "loss": 0.9554, "step": 18371 }, { "epoch": 0.47174144347566527, "grad_norm": 0.671875, "learning_rate": 0.0001636406651882521, "loss": 0.7272, "step": 18372 }, { "epoch": 0.47176712067158705, "grad_norm": 0.80078125, "learning_rate": 0.00016363722164622468, "loss": 0.8375, "step": 18373 }, { "epoch": 0.4717927978675089, "grad_norm": 0.83203125, "learning_rate": 0.00016363377797737406, "loss": 0.8513, "step": 18374 }, { "epoch": 0.4718184750634307, "grad_norm": 0.74609375, "learning_rate": 0.00016363033418170702, "loss": 0.8903, "step": 18375 }, { "epoch": 0.4718441522593525, "grad_norm": 0.86328125, "learning_rate": 0.00016362689025923053, "loss": 0.8034, "step": 18376 }, { "epoch": 0.47186982945527434, "grad_norm": 0.74609375, "learning_rate": 0.00016362344620995137, "loss": 0.8687, "step": 18377 }, { "epoch": 0.4718955066511962, "grad_norm": 0.78125, "learning_rate": 0.00016362000203387644, "loss": 0.7982, "step": 18378 }, { "epoch": 0.47192118384711795, "grad_norm": 0.7578125, "learning_rate": 0.00016361655773101258, "loss": 0.8163, "step": 18379 }, { "epoch": 0.4719468610430398, "grad_norm": 0.828125, "learning_rate": 0.00016361311330136667, "loss": 0.9506, "step": 18380 }, { "epoch": 0.4719725382389616, "grad_norm": 0.8515625, "learning_rate": 0.00016360966874494557, "loss": 0.9539, "step": 18381 }, { "epoch": 0.4719982154348834, "grad_norm": 0.73046875, "learning_rate": 0.00016360622406175613, "loss": 0.856, "step": 18382 }, { "epoch": 0.47202389263080524, "grad_norm": 0.734375, "learning_rate": 0.00016360277925180525, "loss": 0.9714, "step": 18383 }, { "epoch": 0.4720495698267271, "grad_norm": 0.71484375, "learning_rate": 0.0001635993343150998, "loss": 0.8417, "step": 18384 }, { "epoch": 0.4720752470226489, "grad_norm": 0.8125, "learning_rate": 0.00016359588925164657, "loss": 0.8751, "step": 18385 }, { "epoch": 0.4721009242185707, "grad_norm": 0.7578125, "learning_rate": 0.00016359244406145251, "loss": 0.8515, "step": 18386 }, { "epoch": 0.47212660141449253, "grad_norm": 0.78515625, "learning_rate": 0.00016358899874452444, "loss": 0.7974, "step": 18387 }, { "epoch": 0.47215227861041437, "grad_norm": 0.81640625, "learning_rate": 0.00016358555330086926, "loss": 1.008, "step": 18388 }, { "epoch": 0.47217795580633615, "grad_norm": 1.0390625, "learning_rate": 0.0001635821077304938, "loss": 0.8719, "step": 18389 }, { "epoch": 0.472203633002258, "grad_norm": 0.80859375, "learning_rate": 0.00016357866203340496, "loss": 0.9336, "step": 18390 }, { "epoch": 0.4722293101981798, "grad_norm": 0.8125, "learning_rate": 0.00016357521620960957, "loss": 0.957, "step": 18391 }, { "epoch": 0.4722549873941016, "grad_norm": 0.76953125, "learning_rate": 0.0001635717702591145, "loss": 0.9285, "step": 18392 }, { "epoch": 0.47228066459002344, "grad_norm": 0.7578125, "learning_rate": 0.00016356832418192667, "loss": 1.0198, "step": 18393 }, { "epoch": 0.47230634178594527, "grad_norm": 0.703125, "learning_rate": 0.00016356487797805288, "loss": 0.7815, "step": 18394 }, { "epoch": 0.4723320189818671, "grad_norm": 0.79296875, "learning_rate": 0.00016356143164750005, "loss": 0.9205, "step": 18395 }, { "epoch": 0.4723576961777889, "grad_norm": 0.91796875, "learning_rate": 0.000163557985190275, "loss": 0.8676, "step": 18396 }, { "epoch": 0.4723833733737107, "grad_norm": 0.73828125, "learning_rate": 0.00016355453860638467, "loss": 0.8533, "step": 18397 }, { "epoch": 0.47240905056963256, "grad_norm": 0.80859375, "learning_rate": 0.0001635510918958359, "loss": 0.8732, "step": 18398 }, { "epoch": 0.47243472776555434, "grad_norm": 0.8203125, "learning_rate": 0.0001635476450586355, "loss": 0.8159, "step": 18399 }, { "epoch": 0.4724604049614762, "grad_norm": 0.796875, "learning_rate": 0.00016354419809479039, "loss": 1.0039, "step": 18400 }, { "epoch": 0.472486082157398, "grad_norm": 0.8125, "learning_rate": 0.00016354075100430743, "loss": 0.8834, "step": 18401 }, { "epoch": 0.4725117593533198, "grad_norm": 0.79296875, "learning_rate": 0.00016353730378719348, "loss": 0.9209, "step": 18402 }, { "epoch": 0.47253743654924163, "grad_norm": 0.734375, "learning_rate": 0.00016353385644345546, "loss": 0.9488, "step": 18403 }, { "epoch": 0.47256311374516347, "grad_norm": 0.73828125, "learning_rate": 0.00016353040897310018, "loss": 0.6785, "step": 18404 }, { "epoch": 0.4725887909410853, "grad_norm": 0.8671875, "learning_rate": 0.00016352696137613454, "loss": 1.0356, "step": 18405 }, { "epoch": 0.4726144681370071, "grad_norm": 0.71484375, "learning_rate": 0.00016352351365256541, "loss": 0.7941, "step": 18406 }, { "epoch": 0.4726401453329289, "grad_norm": 0.79296875, "learning_rate": 0.00016352006580239964, "loss": 0.9747, "step": 18407 }, { "epoch": 0.47266582252885075, "grad_norm": 0.7265625, "learning_rate": 0.00016351661782564416, "loss": 0.9663, "step": 18408 }, { "epoch": 0.47269149972477253, "grad_norm": 0.75390625, "learning_rate": 0.00016351316972230574, "loss": 0.9698, "step": 18409 }, { "epoch": 0.47271717692069437, "grad_norm": 0.71875, "learning_rate": 0.00016350972149239135, "loss": 0.8627, "step": 18410 }, { "epoch": 0.4727428541166162, "grad_norm": 0.828125, "learning_rate": 0.0001635062731359078, "loss": 0.936, "step": 18411 }, { "epoch": 0.472768531312538, "grad_norm": 0.765625, "learning_rate": 0.000163502824652862, "loss": 0.9552, "step": 18412 }, { "epoch": 0.4727942085084598, "grad_norm": 0.75, "learning_rate": 0.0001634993760432608, "loss": 0.8697, "step": 18413 }, { "epoch": 0.47281988570438166, "grad_norm": 0.76953125, "learning_rate": 0.00016349592730711108, "loss": 0.8773, "step": 18414 }, { "epoch": 0.4728455629003035, "grad_norm": 0.75, "learning_rate": 0.00016349247844441972, "loss": 0.8712, "step": 18415 }, { "epoch": 0.4728712400962253, "grad_norm": 0.80859375, "learning_rate": 0.00016348902945519355, "loss": 1.0684, "step": 18416 }, { "epoch": 0.4728969172921471, "grad_norm": 0.78125, "learning_rate": 0.0001634855803394395, "loss": 0.9195, "step": 18417 }, { "epoch": 0.47292259448806895, "grad_norm": 0.70703125, "learning_rate": 0.00016348213109716443, "loss": 0.8802, "step": 18418 }, { "epoch": 0.47294827168399073, "grad_norm": 0.84765625, "learning_rate": 0.00016347868172837522, "loss": 0.9551, "step": 18419 }, { "epoch": 0.47297394887991256, "grad_norm": 0.7890625, "learning_rate": 0.0001634752322330787, "loss": 1.1448, "step": 18420 }, { "epoch": 0.4729996260758344, "grad_norm": 0.859375, "learning_rate": 0.0001634717826112818, "loss": 1.0474, "step": 18421 }, { "epoch": 0.4730253032717562, "grad_norm": 0.7734375, "learning_rate": 0.00016346833286299135, "loss": 0.9556, "step": 18422 }, { "epoch": 0.473050980467678, "grad_norm": 0.8046875, "learning_rate": 0.00016346488298821428, "loss": 0.8494, "step": 18423 }, { "epoch": 0.47307665766359985, "grad_norm": 0.65625, "learning_rate": 0.0001634614329869574, "loss": 0.9006, "step": 18424 }, { "epoch": 0.4731023348595217, "grad_norm": 0.8046875, "learning_rate": 0.00016345798285922762, "loss": 0.9257, "step": 18425 }, { "epoch": 0.47312801205544347, "grad_norm": 0.765625, "learning_rate": 0.00016345453260503183, "loss": 0.9443, "step": 18426 }, { "epoch": 0.4731536892513653, "grad_norm": 0.875, "learning_rate": 0.00016345108222437686, "loss": 0.9738, "step": 18427 }, { "epoch": 0.47317936644728714, "grad_norm": 0.734375, "learning_rate": 0.00016344763171726964, "loss": 0.8081, "step": 18428 }, { "epoch": 0.4732050436432089, "grad_norm": 0.74609375, "learning_rate": 0.000163444181083717, "loss": 0.9137, "step": 18429 }, { "epoch": 0.47323072083913076, "grad_norm": 0.75390625, "learning_rate": 0.00016344073032372586, "loss": 0.7465, "step": 18430 }, { "epoch": 0.4732563980350526, "grad_norm": 0.80078125, "learning_rate": 0.00016343727943730308, "loss": 0.9373, "step": 18431 }, { "epoch": 0.4732820752309744, "grad_norm": 0.76171875, "learning_rate": 0.00016343382842445552, "loss": 0.7855, "step": 18432 }, { "epoch": 0.4733077524268962, "grad_norm": 0.98828125, "learning_rate": 0.00016343037728519004, "loss": 0.8375, "step": 18433 }, { "epoch": 0.47333342962281805, "grad_norm": 0.72265625, "learning_rate": 0.00016342692601951358, "loss": 0.9196, "step": 18434 }, { "epoch": 0.4733591068187399, "grad_norm": 0.8203125, "learning_rate": 0.00016342347462743297, "loss": 0.7691, "step": 18435 }, { "epoch": 0.47338478401466166, "grad_norm": 0.8515625, "learning_rate": 0.00016342002310895516, "loss": 0.8513, "step": 18436 }, { "epoch": 0.4734104612105835, "grad_norm": 0.8671875, "learning_rate": 0.0001634165714640869, "loss": 1.0337, "step": 18437 }, { "epoch": 0.47343613840650534, "grad_norm": 0.78125, "learning_rate": 0.00016341311969283518, "loss": 0.8802, "step": 18438 }, { "epoch": 0.4734618156024271, "grad_norm": 0.73828125, "learning_rate": 0.00016340966779520682, "loss": 0.9143, "step": 18439 }, { "epoch": 0.47348749279834895, "grad_norm": 0.76171875, "learning_rate": 0.0001634062157712087, "loss": 0.937, "step": 18440 }, { "epoch": 0.4735131699942708, "grad_norm": 0.77734375, "learning_rate": 0.00016340276362084775, "loss": 1.019, "step": 18441 }, { "epoch": 0.47353884719019257, "grad_norm": 0.734375, "learning_rate": 0.00016339931134413081, "loss": 0.8292, "step": 18442 }, { "epoch": 0.4735645243861144, "grad_norm": 0.77734375, "learning_rate": 0.00016339585894106483, "loss": 0.8067, "step": 18443 }, { "epoch": 0.47359020158203624, "grad_norm": 0.7421875, "learning_rate": 0.00016339240641165655, "loss": 0.9974, "step": 18444 }, { "epoch": 0.4736158787779581, "grad_norm": 0.86328125, "learning_rate": 0.00016338895375591297, "loss": 0.9337, "step": 18445 }, { "epoch": 0.47364155597387986, "grad_norm": 0.7734375, "learning_rate": 0.0001633855009738409, "loss": 0.8919, "step": 18446 }, { "epoch": 0.4736672331698017, "grad_norm": 0.6484375, "learning_rate": 0.00016338204806544727, "loss": 0.7293, "step": 18447 }, { "epoch": 0.47369291036572353, "grad_norm": 0.796875, "learning_rate": 0.00016337859503073892, "loss": 0.987, "step": 18448 }, { "epoch": 0.4737185875616453, "grad_norm": 0.78515625, "learning_rate": 0.00016337514186972284, "loss": 0.9529, "step": 18449 }, { "epoch": 0.47374426475756715, "grad_norm": 0.86328125, "learning_rate": 0.00016337168858240574, "loss": 1.0208, "step": 18450 }, { "epoch": 0.473769941953489, "grad_norm": 0.734375, "learning_rate": 0.00016336823516879462, "loss": 0.8235, "step": 18451 }, { "epoch": 0.47379561914941076, "grad_norm": 0.69921875, "learning_rate": 0.00016336478162889634, "loss": 0.8663, "step": 18452 }, { "epoch": 0.4738212963453326, "grad_norm": 0.796875, "learning_rate": 0.00016336132796271774, "loss": 0.9704, "step": 18453 }, { "epoch": 0.47384697354125443, "grad_norm": 0.83984375, "learning_rate": 0.00016335787417026576, "loss": 0.9438, "step": 18454 }, { "epoch": 0.47387265073717627, "grad_norm": 0.73046875, "learning_rate": 0.00016335442025154726, "loss": 0.9069, "step": 18455 }, { "epoch": 0.47389832793309805, "grad_norm": 0.7734375, "learning_rate": 0.00016335096620656914, "loss": 0.9565, "step": 18456 }, { "epoch": 0.4739240051290199, "grad_norm": 0.8515625, "learning_rate": 0.00016334751203533822, "loss": 0.8939, "step": 18457 }, { "epoch": 0.4739496823249417, "grad_norm": 0.79296875, "learning_rate": 0.00016334405773786145, "loss": 0.9171, "step": 18458 }, { "epoch": 0.4739753595208635, "grad_norm": 0.84375, "learning_rate": 0.0001633406033141457, "loss": 0.7849, "step": 18459 }, { "epoch": 0.47400103671678534, "grad_norm": 0.72265625, "learning_rate": 0.00016333714876419784, "loss": 0.801, "step": 18460 }, { "epoch": 0.4740267139127072, "grad_norm": 0.77734375, "learning_rate": 0.00016333369408802478, "loss": 0.9686, "step": 18461 }, { "epoch": 0.47405239110862896, "grad_norm": 0.80078125, "learning_rate": 0.0001633302392856334, "loss": 0.978, "step": 18462 }, { "epoch": 0.4740780683045508, "grad_norm": 0.8203125, "learning_rate": 0.00016332678435703057, "loss": 0.939, "step": 18463 }, { "epoch": 0.47410374550047263, "grad_norm": 0.76953125, "learning_rate": 0.00016332332930222315, "loss": 0.9021, "step": 18464 }, { "epoch": 0.47412942269639446, "grad_norm": 0.7734375, "learning_rate": 0.00016331987412121808, "loss": 0.8232, "step": 18465 }, { "epoch": 0.47415509989231625, "grad_norm": 0.84765625, "learning_rate": 0.0001633164188140222, "loss": 0.8736, "step": 18466 }, { "epoch": 0.4741807770882381, "grad_norm": 0.78125, "learning_rate": 0.00016331296338064244, "loss": 0.9978, "step": 18467 }, { "epoch": 0.4742064542841599, "grad_norm": 0.7890625, "learning_rate": 0.00016330950782108566, "loss": 0.9565, "step": 18468 }, { "epoch": 0.4742321314800817, "grad_norm": 0.80859375, "learning_rate": 0.00016330605213535875, "loss": 0.8894, "step": 18469 }, { "epoch": 0.47425780867600353, "grad_norm": 0.71484375, "learning_rate": 0.0001633025963234686, "loss": 0.8826, "step": 18470 }, { "epoch": 0.47428348587192537, "grad_norm": 0.73046875, "learning_rate": 0.00016329914038542203, "loss": 0.8466, "step": 18471 }, { "epoch": 0.47430916306784715, "grad_norm": 0.71875, "learning_rate": 0.00016329568432122606, "loss": 0.8033, "step": 18472 }, { "epoch": 0.474334840263769, "grad_norm": 0.734375, "learning_rate": 0.0001632922281308875, "loss": 0.9705, "step": 18473 }, { "epoch": 0.4743605174596908, "grad_norm": 0.77734375, "learning_rate": 0.00016328877181441326, "loss": 0.7367, "step": 18474 }, { "epoch": 0.47438619465561266, "grad_norm": 0.73046875, "learning_rate": 0.00016328531537181015, "loss": 0.8756, "step": 18475 }, { "epoch": 0.47441187185153444, "grad_norm": 0.74609375, "learning_rate": 0.0001632818588030852, "loss": 0.8192, "step": 18476 }, { "epoch": 0.4744375490474563, "grad_norm": 0.80078125, "learning_rate": 0.00016327840210824516, "loss": 0.8382, "step": 18477 }, { "epoch": 0.4744632262433781, "grad_norm": 0.7421875, "learning_rate": 0.00016327494528729704, "loss": 0.9383, "step": 18478 }, { "epoch": 0.4744889034392999, "grad_norm": 0.7109375, "learning_rate": 0.00016327148834024762, "loss": 0.8311, "step": 18479 }, { "epoch": 0.4745145806352217, "grad_norm": 0.7265625, "learning_rate": 0.00016326803126710384, "loss": 0.772, "step": 18480 }, { "epoch": 0.47454025783114356, "grad_norm": 0.7734375, "learning_rate": 0.0001632645740678726, "loss": 0.9048, "step": 18481 }, { "epoch": 0.47456593502706534, "grad_norm": 0.78125, "learning_rate": 0.0001632611167425608, "loss": 0.9444, "step": 18482 }, { "epoch": 0.4745916122229872, "grad_norm": 0.7734375, "learning_rate": 0.0001632576592911753, "loss": 0.8069, "step": 18483 }, { "epoch": 0.474617289418909, "grad_norm": 0.73046875, "learning_rate": 0.00016325420171372297, "loss": 0.8334, "step": 18484 }, { "epoch": 0.47464296661483085, "grad_norm": 0.765625, "learning_rate": 0.00016325074401021076, "loss": 0.9481, "step": 18485 }, { "epoch": 0.47466864381075263, "grad_norm": 0.7578125, "learning_rate": 0.0001632472861806455, "loss": 0.8431, "step": 18486 }, { "epoch": 0.47469432100667447, "grad_norm": 0.7421875, "learning_rate": 0.00016324382822503414, "loss": 0.8478, "step": 18487 }, { "epoch": 0.4747199982025963, "grad_norm": 0.78515625, "learning_rate": 0.00016324037014338354, "loss": 0.9435, "step": 18488 }, { "epoch": 0.4747456753985181, "grad_norm": 0.7578125, "learning_rate": 0.0001632369119357006, "loss": 0.8575, "step": 18489 }, { "epoch": 0.4747713525944399, "grad_norm": 0.8046875, "learning_rate": 0.0001632334536019922, "loss": 0.87, "step": 18490 }, { "epoch": 0.47479702979036176, "grad_norm": 0.75390625, "learning_rate": 0.00016322999514226526, "loss": 0.9295, "step": 18491 }, { "epoch": 0.47482270698628354, "grad_norm": 0.74609375, "learning_rate": 0.0001632265365565266, "loss": 0.8113, "step": 18492 }, { "epoch": 0.4748483841822054, "grad_norm": 0.78125, "learning_rate": 0.00016322307784478318, "loss": 0.8289, "step": 18493 }, { "epoch": 0.4748740613781272, "grad_norm": 0.8203125, "learning_rate": 0.0001632196190070419, "loss": 0.9958, "step": 18494 }, { "epoch": 0.47489973857404905, "grad_norm": 0.84375, "learning_rate": 0.00016321616004330962, "loss": 0.8258, "step": 18495 }, { "epoch": 0.4749254157699708, "grad_norm": 0.78515625, "learning_rate": 0.00016321270095359322, "loss": 1.0154, "step": 18496 }, { "epoch": 0.47495109296589266, "grad_norm": 0.73828125, "learning_rate": 0.00016320924173789965, "loss": 0.8387, "step": 18497 }, { "epoch": 0.4749767701618145, "grad_norm": 0.78515625, "learning_rate": 0.00016320578239623575, "loss": 0.8647, "step": 18498 }, { "epoch": 0.4750024473577363, "grad_norm": 0.76953125, "learning_rate": 0.00016320232292860846, "loss": 0.9304, "step": 18499 }, { "epoch": 0.4750281245536581, "grad_norm": 0.78125, "learning_rate": 0.00016319886333502462, "loss": 0.8561, "step": 18500 }, { "epoch": 0.47505380174957995, "grad_norm": 0.8359375, "learning_rate": 0.00016319540361549117, "loss": 0.978, "step": 18501 }, { "epoch": 0.47507947894550173, "grad_norm": 0.875, "learning_rate": 0.000163191943770015, "loss": 0.9374, "step": 18502 }, { "epoch": 0.47510515614142357, "grad_norm": 0.7734375, "learning_rate": 0.000163188483798603, "loss": 0.7918, "step": 18503 }, { "epoch": 0.4751308333373454, "grad_norm": 0.73828125, "learning_rate": 0.00016318502370126203, "loss": 0.8135, "step": 18504 }, { "epoch": 0.47515651053326724, "grad_norm": 0.97265625, "learning_rate": 0.000163181563477999, "loss": 0.8399, "step": 18505 }, { "epoch": 0.475182187729189, "grad_norm": 0.859375, "learning_rate": 0.00016317810312882087, "loss": 0.9164, "step": 18506 }, { "epoch": 0.47520786492511086, "grad_norm": 0.80859375, "learning_rate": 0.00016317464265373443, "loss": 0.8633, "step": 18507 }, { "epoch": 0.4752335421210327, "grad_norm": 0.75, "learning_rate": 0.00016317118205274667, "loss": 0.8482, "step": 18508 }, { "epoch": 0.4752592193169545, "grad_norm": 0.7734375, "learning_rate": 0.00016316772132586447, "loss": 0.9038, "step": 18509 }, { "epoch": 0.4752848965128763, "grad_norm": 0.8203125, "learning_rate": 0.0001631642604730947, "loss": 0.9236, "step": 18510 }, { "epoch": 0.47531057370879815, "grad_norm": 0.7734375, "learning_rate": 0.00016316079949444423, "loss": 0.8858, "step": 18511 }, { "epoch": 0.4753362509047199, "grad_norm": 0.73046875, "learning_rate": 0.00016315733838992, "loss": 0.8733, "step": 18512 }, { "epoch": 0.47536192810064176, "grad_norm": 0.74609375, "learning_rate": 0.00016315387715952893, "loss": 0.8115, "step": 18513 }, { "epoch": 0.4753876052965636, "grad_norm": 0.78515625, "learning_rate": 0.00016315041580327786, "loss": 0.8254, "step": 18514 }, { "epoch": 0.47541328249248543, "grad_norm": 0.7890625, "learning_rate": 0.00016314695432117376, "loss": 0.8857, "step": 18515 }, { "epoch": 0.4754389596884072, "grad_norm": 0.7421875, "learning_rate": 0.00016314349271322342, "loss": 0.8418, "step": 18516 }, { "epoch": 0.47546463688432905, "grad_norm": 0.76953125, "learning_rate": 0.00016314003097943385, "loss": 0.8228, "step": 18517 }, { "epoch": 0.4754903140802509, "grad_norm": 0.7890625, "learning_rate": 0.00016313656911981186, "loss": 0.8928, "step": 18518 }, { "epoch": 0.47551599127617267, "grad_norm": 0.76171875, "learning_rate": 0.00016313310713436443, "loss": 0.8913, "step": 18519 }, { "epoch": 0.4755416684720945, "grad_norm": 0.75390625, "learning_rate": 0.00016312964502309839, "loss": 1.052, "step": 18520 }, { "epoch": 0.47556734566801634, "grad_norm": 0.76171875, "learning_rate": 0.0001631261827860207, "loss": 1.0812, "step": 18521 }, { "epoch": 0.4755930228639381, "grad_norm": 0.7578125, "learning_rate": 0.00016312272042313822, "loss": 1.0253, "step": 18522 }, { "epoch": 0.47561870005985996, "grad_norm": 0.7890625, "learning_rate": 0.00016311925793445784, "loss": 0.9223, "step": 18523 }, { "epoch": 0.4756443772557818, "grad_norm": 0.8046875, "learning_rate": 0.0001631157953199865, "loss": 0.8677, "step": 18524 }, { "epoch": 0.47567005445170363, "grad_norm": 0.8515625, "learning_rate": 0.00016311233257973107, "loss": 0.9738, "step": 18525 }, { "epoch": 0.4756957316476254, "grad_norm": 0.75390625, "learning_rate": 0.00016310886971369846, "loss": 0.9902, "step": 18526 }, { "epoch": 0.47572140884354724, "grad_norm": 0.8671875, "learning_rate": 0.0001631054067218956, "loss": 0.9586, "step": 18527 }, { "epoch": 0.4757470860394691, "grad_norm": 0.7890625, "learning_rate": 0.00016310194360432933, "loss": 0.8337, "step": 18528 }, { "epoch": 0.47577276323539086, "grad_norm": 0.73046875, "learning_rate": 0.0001630984803610066, "loss": 0.7956, "step": 18529 }, { "epoch": 0.4757984404313127, "grad_norm": 0.703125, "learning_rate": 0.0001630950169919343, "loss": 0.943, "step": 18530 }, { "epoch": 0.47582411762723453, "grad_norm": 0.83203125, "learning_rate": 0.00016309155349711934, "loss": 0.9881, "step": 18531 }, { "epoch": 0.4758497948231563, "grad_norm": 0.796875, "learning_rate": 0.00016308808987656857, "loss": 0.8742, "step": 18532 }, { "epoch": 0.47587547201907815, "grad_norm": 0.7421875, "learning_rate": 0.000163084626130289, "loss": 0.8494, "step": 18533 }, { "epoch": 0.475901149215, "grad_norm": 0.7734375, "learning_rate": 0.00016308116225828743, "loss": 0.8144, "step": 18534 }, { "epoch": 0.4759268264109218, "grad_norm": 0.73828125, "learning_rate": 0.00016307769826057083, "loss": 0.8615, "step": 18535 }, { "epoch": 0.4759525036068436, "grad_norm": 0.796875, "learning_rate": 0.00016307423413714604, "loss": 0.9475, "step": 18536 }, { "epoch": 0.47597818080276544, "grad_norm": 0.84375, "learning_rate": 0.00016307076988802, "loss": 0.8033, "step": 18537 }, { "epoch": 0.4760038579986873, "grad_norm": 0.7734375, "learning_rate": 0.00016306730551319965, "loss": 0.9391, "step": 18538 }, { "epoch": 0.47602953519460905, "grad_norm": 0.8125, "learning_rate": 0.0001630638410126918, "loss": 0.8281, "step": 18539 }, { "epoch": 0.4760552123905309, "grad_norm": 0.91015625, "learning_rate": 0.00016306037638650345, "loss": 0.8626, "step": 18540 }, { "epoch": 0.4760808895864527, "grad_norm": 0.7265625, "learning_rate": 0.00016305691163464147, "loss": 0.7292, "step": 18541 }, { "epoch": 0.4761065667823745, "grad_norm": 0.71875, "learning_rate": 0.00016305344675711275, "loss": 0.9381, "step": 18542 }, { "epoch": 0.47613224397829634, "grad_norm": 0.78515625, "learning_rate": 0.0001630499817539242, "loss": 0.8029, "step": 18543 }, { "epoch": 0.4761579211742182, "grad_norm": 0.77734375, "learning_rate": 0.00016304651662508274, "loss": 0.9177, "step": 18544 }, { "epoch": 0.47618359837014, "grad_norm": 0.7421875, "learning_rate": 0.00016304305137059528, "loss": 0.8167, "step": 18545 }, { "epoch": 0.4762092755660618, "grad_norm": 0.72265625, "learning_rate": 0.0001630395859904687, "loss": 0.7515, "step": 18546 }, { "epoch": 0.47623495276198363, "grad_norm": 0.75390625, "learning_rate": 0.0001630361204847099, "loss": 0.8949, "step": 18547 }, { "epoch": 0.47626062995790547, "grad_norm": 0.76171875, "learning_rate": 0.00016303265485332586, "loss": 0.8268, "step": 18548 }, { "epoch": 0.47628630715382725, "grad_norm": 0.71484375, "learning_rate": 0.0001630291890963234, "loss": 0.8881, "step": 18549 }, { "epoch": 0.4763119843497491, "grad_norm": 0.80078125, "learning_rate": 0.00016302572321370945, "loss": 0.9025, "step": 18550 }, { "epoch": 0.4763376615456709, "grad_norm": 0.7421875, "learning_rate": 0.00016302225720549093, "loss": 1.1236, "step": 18551 }, { "epoch": 0.4763633387415927, "grad_norm": 0.68359375, "learning_rate": 0.00016301879107167474, "loss": 0.8596, "step": 18552 }, { "epoch": 0.47638901593751454, "grad_norm": 0.75, "learning_rate": 0.0001630153248122678, "loss": 0.9245, "step": 18553 }, { "epoch": 0.4764146931334364, "grad_norm": 0.79296875, "learning_rate": 0.00016301185842727703, "loss": 0.974, "step": 18554 }, { "epoch": 0.4764403703293582, "grad_norm": 0.84765625, "learning_rate": 0.0001630083919167093, "loss": 0.8444, "step": 18555 }, { "epoch": 0.47646604752528, "grad_norm": 0.7578125, "learning_rate": 0.00016300492528057152, "loss": 0.8823, "step": 18556 }, { "epoch": 0.4764917247212018, "grad_norm": 0.80859375, "learning_rate": 0.00016300145851887062, "loss": 0.9793, "step": 18557 }, { "epoch": 0.47651740191712366, "grad_norm": 0.78125, "learning_rate": 0.00016299799163161353, "loss": 1.0341, "step": 18558 }, { "epoch": 0.47654307911304544, "grad_norm": 0.71875, "learning_rate": 0.0001629945246188071, "loss": 0.961, "step": 18559 }, { "epoch": 0.4765687563089673, "grad_norm": 0.73828125, "learning_rate": 0.0001629910574804583, "loss": 0.9394, "step": 18560 }, { "epoch": 0.4765944335048891, "grad_norm": 0.76171875, "learning_rate": 0.000162987590216574, "loss": 0.8476, "step": 18561 }, { "epoch": 0.4766201107008109, "grad_norm": 0.734375, "learning_rate": 0.00016298412282716114, "loss": 0.9123, "step": 18562 }, { "epoch": 0.47664578789673273, "grad_norm": 0.78515625, "learning_rate": 0.0001629806553122266, "loss": 1.0247, "step": 18563 }, { "epoch": 0.47667146509265457, "grad_norm": 0.7265625, "learning_rate": 0.0001629771876717773, "loss": 0.8413, "step": 18564 }, { "epoch": 0.4766971422885764, "grad_norm": 0.74609375, "learning_rate": 0.00016297371990582016, "loss": 0.7955, "step": 18565 }, { "epoch": 0.4767228194844982, "grad_norm": 0.7734375, "learning_rate": 0.00016297025201436206, "loss": 0.9279, "step": 18566 }, { "epoch": 0.47674849668042, "grad_norm": 0.734375, "learning_rate": 0.00016296678399740997, "loss": 0.8279, "step": 18567 }, { "epoch": 0.47677417387634186, "grad_norm": 0.82421875, "learning_rate": 0.00016296331585497072, "loss": 0.9838, "step": 18568 }, { "epoch": 0.47679985107226364, "grad_norm": 0.875, "learning_rate": 0.0001629598475870513, "loss": 0.7248, "step": 18569 }, { "epoch": 0.4768255282681855, "grad_norm": 0.90234375, "learning_rate": 0.0001629563791936586, "loss": 0.9718, "step": 18570 }, { "epoch": 0.4768512054641073, "grad_norm": 0.8203125, "learning_rate": 0.00016295291067479952, "loss": 0.9125, "step": 18571 }, { "epoch": 0.4768768826600291, "grad_norm": 0.7265625, "learning_rate": 0.00016294944203048097, "loss": 0.6809, "step": 18572 }, { "epoch": 0.4769025598559509, "grad_norm": 0.8203125, "learning_rate": 0.00016294597326070988, "loss": 0.9085, "step": 18573 }, { "epoch": 0.47692823705187276, "grad_norm": 0.8359375, "learning_rate": 0.00016294250436549312, "loss": 0.8523, "step": 18574 }, { "epoch": 0.4769539142477946, "grad_norm": 0.78515625, "learning_rate": 0.00016293903534483767, "loss": 0.9608, "step": 18575 }, { "epoch": 0.4769795914437164, "grad_norm": 0.7421875, "learning_rate": 0.00016293556619875039, "loss": 0.7969, "step": 18576 }, { "epoch": 0.4770052686396382, "grad_norm": 0.76171875, "learning_rate": 0.0001629320969272382, "loss": 0.9347, "step": 18577 }, { "epoch": 0.47703094583556005, "grad_norm": 0.78515625, "learning_rate": 0.00016292862753030805, "loss": 0.9749, "step": 18578 }, { "epoch": 0.47705662303148183, "grad_norm": 0.70703125, "learning_rate": 0.0001629251580079668, "loss": 0.8972, "step": 18579 }, { "epoch": 0.47708230022740367, "grad_norm": 0.83203125, "learning_rate": 0.00016292168836022142, "loss": 0.9207, "step": 18580 }, { "epoch": 0.4771079774233255, "grad_norm": 0.7890625, "learning_rate": 0.00016291821858707882, "loss": 0.9235, "step": 18581 }, { "epoch": 0.4771336546192473, "grad_norm": 0.74609375, "learning_rate": 0.00016291474868854584, "loss": 0.8529, "step": 18582 }, { "epoch": 0.4771593318151691, "grad_norm": 0.73828125, "learning_rate": 0.0001629112786646295, "loss": 0.9599, "step": 18583 }, { "epoch": 0.47718500901109095, "grad_norm": 0.70703125, "learning_rate": 0.00016290780851533663, "loss": 0.9041, "step": 18584 }, { "epoch": 0.4772106862070128, "grad_norm": 0.75390625, "learning_rate": 0.00016290433824067417, "loss": 1.0061, "step": 18585 }, { "epoch": 0.47723636340293457, "grad_norm": 0.765625, "learning_rate": 0.0001629008678406491, "loss": 0.9777, "step": 18586 }, { "epoch": 0.4772620405988564, "grad_norm": 0.76953125, "learning_rate": 0.00016289739731526823, "loss": 0.8523, "step": 18587 }, { "epoch": 0.47728771779477824, "grad_norm": 0.6953125, "learning_rate": 0.00016289392666453853, "loss": 0.7866, "step": 18588 }, { "epoch": 0.4773133949907, "grad_norm": 0.84375, "learning_rate": 0.00016289045588846693, "loss": 0.9737, "step": 18589 }, { "epoch": 0.47733907218662186, "grad_norm": 0.71875, "learning_rate": 0.00016288698498706034, "loss": 0.9267, "step": 18590 }, { "epoch": 0.4773647493825437, "grad_norm": 0.7265625, "learning_rate": 0.00016288351396032567, "loss": 0.9823, "step": 18591 }, { "epoch": 0.4773904265784655, "grad_norm": 0.74609375, "learning_rate": 0.00016288004280826983, "loss": 0.8509, "step": 18592 }, { "epoch": 0.4774161037743873, "grad_norm": 0.83203125, "learning_rate": 0.00016287657153089973, "loss": 0.8091, "step": 18593 }, { "epoch": 0.47744178097030915, "grad_norm": 0.80078125, "learning_rate": 0.00016287310012822232, "loss": 0.9195, "step": 18594 }, { "epoch": 0.47746745816623093, "grad_norm": 0.71484375, "learning_rate": 0.0001628696286002445, "loss": 0.8625, "step": 18595 }, { "epoch": 0.47749313536215277, "grad_norm": 0.78515625, "learning_rate": 0.0001628661569469732, "loss": 0.949, "step": 18596 }, { "epoch": 0.4775188125580746, "grad_norm": 0.8046875, "learning_rate": 0.0001628626851684153, "loss": 0.9448, "step": 18597 }, { "epoch": 0.47754448975399644, "grad_norm": 0.84375, "learning_rate": 0.00016285921326457777, "loss": 0.7735, "step": 18598 }, { "epoch": 0.4775701669499182, "grad_norm": 0.76171875, "learning_rate": 0.00016285574123546748, "loss": 0.9136, "step": 18599 }, { "epoch": 0.47759584414584005, "grad_norm": 0.95703125, "learning_rate": 0.0001628522690810914, "loss": 0.8558, "step": 18600 }, { "epoch": 0.4776215213417619, "grad_norm": 0.74609375, "learning_rate": 0.0001628487968014564, "loss": 0.8564, "step": 18601 }, { "epoch": 0.47764719853768367, "grad_norm": 0.7734375, "learning_rate": 0.00016284532439656945, "loss": 0.8754, "step": 18602 }, { "epoch": 0.4776728757336055, "grad_norm": 0.73046875, "learning_rate": 0.00016284185186643747, "loss": 0.809, "step": 18603 }, { "epoch": 0.47769855292952734, "grad_norm": 0.83203125, "learning_rate": 0.0001628383792110673, "loss": 0.8656, "step": 18604 }, { "epoch": 0.4777242301254491, "grad_norm": 0.75390625, "learning_rate": 0.00016283490643046594, "loss": 0.7907, "step": 18605 }, { "epoch": 0.47774990732137096, "grad_norm": 0.8203125, "learning_rate": 0.00016283143352464029, "loss": 0.9182, "step": 18606 }, { "epoch": 0.4777755845172928, "grad_norm": 0.828125, "learning_rate": 0.00016282796049359727, "loss": 0.9097, "step": 18607 }, { "epoch": 0.47780126171321463, "grad_norm": 0.88671875, "learning_rate": 0.00016282448733734382, "loss": 0.8107, "step": 18608 }, { "epoch": 0.4778269389091364, "grad_norm": 0.88671875, "learning_rate": 0.00016282101405588678, "loss": 1.0807, "step": 18609 }, { "epoch": 0.47785261610505825, "grad_norm": 0.83203125, "learning_rate": 0.00016281754064923317, "loss": 1.0051, "step": 18610 }, { "epoch": 0.4778782933009801, "grad_norm": 0.8046875, "learning_rate": 0.00016281406711738988, "loss": 0.9046, "step": 18611 }, { "epoch": 0.47790397049690186, "grad_norm": 0.76171875, "learning_rate": 0.00016281059346036383, "loss": 0.8714, "step": 18612 }, { "epoch": 0.4779296476928237, "grad_norm": 0.79296875, "learning_rate": 0.00016280711967816194, "loss": 1.1344, "step": 18613 }, { "epoch": 0.47795532488874554, "grad_norm": 0.73046875, "learning_rate": 0.00016280364577079114, "loss": 0.7667, "step": 18614 }, { "epoch": 0.4779810020846673, "grad_norm": 0.84375, "learning_rate": 0.00016280017173825835, "loss": 1.0256, "step": 18615 }, { "epoch": 0.47800667928058915, "grad_norm": 0.81640625, "learning_rate": 0.00016279669758057045, "loss": 1.0381, "step": 18616 }, { "epoch": 0.478032356476511, "grad_norm": 0.78125, "learning_rate": 0.00016279322329773444, "loss": 0.9157, "step": 18617 }, { "epoch": 0.4780580336724328, "grad_norm": 0.84765625, "learning_rate": 0.0001627897488897572, "loss": 0.8712, "step": 18618 }, { "epoch": 0.4780837108683546, "grad_norm": 0.765625, "learning_rate": 0.00016278627435664563, "loss": 0.9123, "step": 18619 }, { "epoch": 0.47810938806427644, "grad_norm": 0.703125, "learning_rate": 0.00016278279969840674, "loss": 0.7441, "step": 18620 }, { "epoch": 0.4781350652601983, "grad_norm": 0.76953125, "learning_rate": 0.00016277932491504736, "loss": 0.8332, "step": 18621 }, { "epoch": 0.47816074245612006, "grad_norm": 0.7109375, "learning_rate": 0.0001627758500065745, "loss": 0.8291, "step": 18622 }, { "epoch": 0.4781864196520419, "grad_norm": 0.7890625, "learning_rate": 0.00016277237497299503, "loss": 0.9334, "step": 18623 }, { "epoch": 0.47821209684796373, "grad_norm": 0.8359375, "learning_rate": 0.00016276889981431584, "loss": 0.9174, "step": 18624 }, { "epoch": 0.4782377740438855, "grad_norm": 0.9609375, "learning_rate": 0.00016276542453054396, "loss": 1.0175, "step": 18625 }, { "epoch": 0.47826345123980735, "grad_norm": 1.171875, "learning_rate": 0.00016276194912168623, "loss": 0.8378, "step": 18626 }, { "epoch": 0.4782891284357292, "grad_norm": 0.7578125, "learning_rate": 0.0001627584735877496, "loss": 0.9584, "step": 18627 }, { "epoch": 0.478314805631651, "grad_norm": 0.70703125, "learning_rate": 0.00016275499792874103, "loss": 0.7631, "step": 18628 }, { "epoch": 0.4783404828275728, "grad_norm": 0.71875, "learning_rate": 0.0001627515221446674, "loss": 0.9004, "step": 18629 }, { "epoch": 0.47836616002349464, "grad_norm": 0.76171875, "learning_rate": 0.00016274804623553566, "loss": 0.7479, "step": 18630 }, { "epoch": 0.47839183721941647, "grad_norm": 0.90234375, "learning_rate": 0.00016274457020135273, "loss": 0.8366, "step": 18631 }, { "epoch": 0.47841751441533825, "grad_norm": 0.7734375, "learning_rate": 0.0001627410940421255, "loss": 0.9054, "step": 18632 }, { "epoch": 0.4784431916112601, "grad_norm": 0.71484375, "learning_rate": 0.00016273761775786096, "loss": 0.7492, "step": 18633 }, { "epoch": 0.4784688688071819, "grad_norm": 0.796875, "learning_rate": 0.00016273414134856606, "loss": 0.9531, "step": 18634 }, { "epoch": 0.4784945460031037, "grad_norm": 0.7421875, "learning_rate": 0.00016273066481424766, "loss": 0.741, "step": 18635 }, { "epoch": 0.47852022319902554, "grad_norm": 0.7421875, "learning_rate": 0.00016272718815491267, "loss": 0.7921, "step": 18636 }, { "epoch": 0.4785459003949474, "grad_norm": 0.76953125, "learning_rate": 0.0001627237113705681, "loss": 0.8753, "step": 18637 }, { "epoch": 0.4785715775908692, "grad_norm": 0.7421875, "learning_rate": 0.00016272023446122082, "loss": 0.9776, "step": 18638 }, { "epoch": 0.478597254786791, "grad_norm": 0.78125, "learning_rate": 0.0001627167574268778, "loss": 1.0592, "step": 18639 }, { "epoch": 0.47862293198271283, "grad_norm": 0.6953125, "learning_rate": 0.00016271328026754595, "loss": 0.9485, "step": 18640 }, { "epoch": 0.47864860917863467, "grad_norm": 0.77734375, "learning_rate": 0.00016270980298323218, "loss": 0.8199, "step": 18641 }, { "epoch": 0.47867428637455645, "grad_norm": 0.71484375, "learning_rate": 0.00016270632557394344, "loss": 0.8658, "step": 18642 }, { "epoch": 0.4786999635704783, "grad_norm": 0.7578125, "learning_rate": 0.00016270284803968665, "loss": 0.8958, "step": 18643 }, { "epoch": 0.4787256407664001, "grad_norm": 0.8203125, "learning_rate": 0.00016269937038046876, "loss": 0.963, "step": 18644 }, { "epoch": 0.4787513179623219, "grad_norm": 0.76171875, "learning_rate": 0.00016269589259629668, "loss": 0.8885, "step": 18645 }, { "epoch": 0.47877699515824373, "grad_norm": 0.75, "learning_rate": 0.00016269241468717734, "loss": 0.8707, "step": 18646 }, { "epoch": 0.47880267235416557, "grad_norm": 0.765625, "learning_rate": 0.0001626889366531177, "loss": 0.7695, "step": 18647 }, { "epoch": 0.4788283495500874, "grad_norm": 0.765625, "learning_rate": 0.0001626854584941247, "loss": 0.8594, "step": 18648 }, { "epoch": 0.4788540267460092, "grad_norm": 0.76953125, "learning_rate": 0.00016268198021020518, "loss": 0.9161, "step": 18649 }, { "epoch": 0.478879703941931, "grad_norm": 0.7734375, "learning_rate": 0.00016267850180136617, "loss": 0.8445, "step": 18650 }, { "epoch": 0.47890538113785286, "grad_norm": 0.71875, "learning_rate": 0.00016267502326761454, "loss": 0.8508, "step": 18651 }, { "epoch": 0.47893105833377464, "grad_norm": 0.79296875, "learning_rate": 0.00016267154460895727, "loss": 0.8313, "step": 18652 }, { "epoch": 0.4789567355296965, "grad_norm": 0.79296875, "learning_rate": 0.00016266806582540132, "loss": 0.8525, "step": 18653 }, { "epoch": 0.4789824127256183, "grad_norm": 0.8203125, "learning_rate": 0.0001626645869169535, "loss": 1.0562, "step": 18654 }, { "epoch": 0.4790080899215401, "grad_norm": 0.73828125, "learning_rate": 0.00016266110788362088, "loss": 0.8399, "step": 18655 }, { "epoch": 0.47903376711746193, "grad_norm": 0.6953125, "learning_rate": 0.00016265762872541027, "loss": 0.8525, "step": 18656 }, { "epoch": 0.47905944431338376, "grad_norm": 0.7421875, "learning_rate": 0.00016265414944232872, "loss": 0.9678, "step": 18657 }, { "epoch": 0.4790851215093056, "grad_norm": 0.72265625, "learning_rate": 0.0001626506700343831, "loss": 0.897, "step": 18658 }, { "epoch": 0.4791107987052274, "grad_norm": 0.8515625, "learning_rate": 0.00016264719050158033, "loss": 1.1745, "step": 18659 }, { "epoch": 0.4791364759011492, "grad_norm": 0.79296875, "learning_rate": 0.0001626437108439274, "loss": 1.0028, "step": 18660 }, { "epoch": 0.47916215309707105, "grad_norm": 0.76171875, "learning_rate": 0.00016264023106143117, "loss": 0.8535, "step": 18661 }, { "epoch": 0.47918783029299283, "grad_norm": 0.73828125, "learning_rate": 0.00016263675115409863, "loss": 1.0171, "step": 18662 }, { "epoch": 0.47921350748891467, "grad_norm": 0.7734375, "learning_rate": 0.00016263327112193672, "loss": 0.8919, "step": 18663 }, { "epoch": 0.4792391846848365, "grad_norm": 0.765625, "learning_rate": 0.00016262979096495235, "loss": 0.9373, "step": 18664 }, { "epoch": 0.4792648618807583, "grad_norm": 0.765625, "learning_rate": 0.00016262631068315243, "loss": 0.9927, "step": 18665 }, { "epoch": 0.4792905390766801, "grad_norm": 0.79296875, "learning_rate": 0.00016262283027654399, "loss": 0.8713, "step": 18666 }, { "epoch": 0.47931621627260196, "grad_norm": 0.76953125, "learning_rate": 0.0001626193497451339, "loss": 0.8866, "step": 18667 }, { "epoch": 0.4793418934685238, "grad_norm": 0.71484375, "learning_rate": 0.00016261586908892907, "loss": 0.8768, "step": 18668 }, { "epoch": 0.4793675706644456, "grad_norm": 0.796875, "learning_rate": 0.00016261238830793647, "loss": 0.9823, "step": 18669 }, { "epoch": 0.4793932478603674, "grad_norm": 0.72265625, "learning_rate": 0.00016260890740216305, "loss": 0.9031, "step": 18670 }, { "epoch": 0.47941892505628925, "grad_norm": 0.77734375, "learning_rate": 0.00016260542637161574, "loss": 0.7811, "step": 18671 }, { "epoch": 0.479444602252211, "grad_norm": 0.734375, "learning_rate": 0.00016260194521630145, "loss": 0.7752, "step": 18672 }, { "epoch": 0.47947027944813286, "grad_norm": 0.67578125, "learning_rate": 0.00016259846393622715, "loss": 0.8182, "step": 18673 }, { "epoch": 0.4794959566440547, "grad_norm": 0.69921875, "learning_rate": 0.00016259498253139978, "loss": 0.8714, "step": 18674 }, { "epoch": 0.4795216338399765, "grad_norm": 0.81640625, "learning_rate": 0.00016259150100182625, "loss": 0.9669, "step": 18675 }, { "epoch": 0.4795473110358983, "grad_norm": 0.85546875, "learning_rate": 0.0001625880193475135, "loss": 1.0252, "step": 18676 }, { "epoch": 0.47957298823182015, "grad_norm": 0.7578125, "learning_rate": 0.00016258453756846849, "loss": 0.8746, "step": 18677 }, { "epoch": 0.479598665427742, "grad_norm": 0.72265625, "learning_rate": 0.00016258105566469813, "loss": 0.7908, "step": 18678 }, { "epoch": 0.47962434262366377, "grad_norm": 0.78515625, "learning_rate": 0.00016257757363620943, "loss": 0.8861, "step": 18679 }, { "epoch": 0.4796500198195856, "grad_norm": 0.73828125, "learning_rate": 0.00016257409148300926, "loss": 0.972, "step": 18680 }, { "epoch": 0.47967569701550744, "grad_norm": 0.83984375, "learning_rate": 0.00016257060920510456, "loss": 0.841, "step": 18681 }, { "epoch": 0.4797013742114292, "grad_norm": 0.78515625, "learning_rate": 0.00016256712680250227, "loss": 0.9486, "step": 18682 }, { "epoch": 0.47972705140735106, "grad_norm": 0.72265625, "learning_rate": 0.0001625636442752094, "loss": 0.7737, "step": 18683 }, { "epoch": 0.4797527286032729, "grad_norm": 0.7734375, "learning_rate": 0.0001625601616232328, "loss": 1.0573, "step": 18684 }, { "epoch": 0.4797784057991947, "grad_norm": 0.7734375, "learning_rate": 0.00016255667884657946, "loss": 0.8465, "step": 18685 }, { "epoch": 0.4798040829951165, "grad_norm": 0.73828125, "learning_rate": 0.00016255319594525632, "loss": 0.7899, "step": 18686 }, { "epoch": 0.47982976019103835, "grad_norm": 0.89453125, "learning_rate": 0.0001625497129192703, "loss": 0.9564, "step": 18687 }, { "epoch": 0.4798554373869602, "grad_norm": 0.78515625, "learning_rate": 0.00016254622976862834, "loss": 0.9478, "step": 18688 }, { "epoch": 0.47988111458288196, "grad_norm": 0.85546875, "learning_rate": 0.00016254274649333742, "loss": 0.8546, "step": 18689 }, { "epoch": 0.4799067917788038, "grad_norm": 0.7578125, "learning_rate": 0.00016253926309340444, "loss": 0.8438, "step": 18690 }, { "epoch": 0.47993246897472563, "grad_norm": 0.9921875, "learning_rate": 0.00016253577956883638, "loss": 1.0047, "step": 18691 }, { "epoch": 0.4799581461706474, "grad_norm": 0.8125, "learning_rate": 0.00016253229591964012, "loss": 0.8965, "step": 18692 }, { "epoch": 0.47998382336656925, "grad_norm": 0.73828125, "learning_rate": 0.0001625288121458227, "loss": 0.8131, "step": 18693 }, { "epoch": 0.4800095005624911, "grad_norm": 0.82421875, "learning_rate": 0.00016252532824739094, "loss": 0.8814, "step": 18694 }, { "epoch": 0.48003517775841287, "grad_norm": 0.765625, "learning_rate": 0.00016252184422435188, "loss": 0.9479, "step": 18695 }, { "epoch": 0.4800608549543347, "grad_norm": 0.85546875, "learning_rate": 0.00016251836007671243, "loss": 0.7719, "step": 18696 }, { "epoch": 0.48008653215025654, "grad_norm": 0.87109375, "learning_rate": 0.00016251487580447953, "loss": 0.9042, "step": 18697 }, { "epoch": 0.4801122093461784, "grad_norm": 0.796875, "learning_rate": 0.00016251139140766014, "loss": 0.9289, "step": 18698 }, { "epoch": 0.48013788654210016, "grad_norm": 0.81640625, "learning_rate": 0.00016250790688626118, "loss": 0.8177, "step": 18699 }, { "epoch": 0.480163563738022, "grad_norm": 0.88671875, "learning_rate": 0.00016250442224028964, "loss": 0.9324, "step": 18700 }, { "epoch": 0.48018924093394383, "grad_norm": 0.77734375, "learning_rate": 0.0001625009374697524, "loss": 0.8637, "step": 18701 }, { "epoch": 0.4802149181298656, "grad_norm": 0.82421875, "learning_rate": 0.00016249745257465646, "loss": 0.926, "step": 18702 }, { "epoch": 0.48024059532578744, "grad_norm": 0.796875, "learning_rate": 0.00016249396755500873, "loss": 0.9143, "step": 18703 }, { "epoch": 0.4802662725217093, "grad_norm": 0.73828125, "learning_rate": 0.00016249048241081615, "loss": 0.8913, "step": 18704 }, { "epoch": 0.48029194971763106, "grad_norm": 0.890625, "learning_rate": 0.00016248699714208571, "loss": 0.9123, "step": 18705 }, { "epoch": 0.4803176269135529, "grad_norm": 0.73828125, "learning_rate": 0.00016248351174882433, "loss": 0.8238, "step": 18706 }, { "epoch": 0.48034330410947473, "grad_norm": 0.85546875, "learning_rate": 0.0001624800262310389, "loss": 0.8919, "step": 18707 }, { "epoch": 0.48036898130539657, "grad_norm": 0.80078125, "learning_rate": 0.0001624765405887365, "loss": 0.9298, "step": 18708 }, { "epoch": 0.48039465850131835, "grad_norm": 0.82421875, "learning_rate": 0.00016247305482192394, "loss": 1.0049, "step": 18709 }, { "epoch": 0.4804203356972402, "grad_norm": 0.71875, "learning_rate": 0.00016246956893060826, "loss": 0.9069, "step": 18710 }, { "epoch": 0.480446012893162, "grad_norm": 0.7578125, "learning_rate": 0.00016246608291479637, "loss": 1.0159, "step": 18711 }, { "epoch": 0.4804716900890838, "grad_norm": 0.796875, "learning_rate": 0.00016246259677449518, "loss": 0.8015, "step": 18712 }, { "epoch": 0.48049736728500564, "grad_norm": 0.81640625, "learning_rate": 0.00016245911050971173, "loss": 0.9458, "step": 18713 }, { "epoch": 0.4805230444809275, "grad_norm": 0.859375, "learning_rate": 0.0001624556241204529, "loss": 0.9551, "step": 18714 }, { "epoch": 0.48054872167684926, "grad_norm": 0.7890625, "learning_rate": 0.00016245213760672562, "loss": 0.7728, "step": 18715 }, { "epoch": 0.4805743988727711, "grad_norm": 1.4765625, "learning_rate": 0.00016244865096853693, "loss": 1.0843, "step": 18716 }, { "epoch": 0.4806000760686929, "grad_norm": 0.765625, "learning_rate": 0.00016244516420589367, "loss": 0.8929, "step": 18717 }, { "epoch": 0.48062575326461476, "grad_norm": 0.8046875, "learning_rate": 0.00016244167731880284, "loss": 0.8721, "step": 18718 }, { "epoch": 0.48065143046053654, "grad_norm": 0.71875, "learning_rate": 0.0001624381903072714, "loss": 0.8701, "step": 18719 }, { "epoch": 0.4806771076564584, "grad_norm": 0.76171875, "learning_rate": 0.00016243470317130632, "loss": 0.813, "step": 18720 }, { "epoch": 0.4807027848523802, "grad_norm": 0.78125, "learning_rate": 0.00016243121591091446, "loss": 0.8316, "step": 18721 }, { "epoch": 0.480728462048302, "grad_norm": 0.84765625, "learning_rate": 0.00016242772852610284, "loss": 0.876, "step": 18722 }, { "epoch": 0.48075413924422383, "grad_norm": 0.7265625, "learning_rate": 0.0001624242410168784, "loss": 0.9892, "step": 18723 }, { "epoch": 0.48077981644014567, "grad_norm": 0.7890625, "learning_rate": 0.00016242075338324813, "loss": 0.8975, "step": 18724 }, { "epoch": 0.48080549363606745, "grad_norm": 0.796875, "learning_rate": 0.00016241726562521888, "loss": 1.0929, "step": 18725 }, { "epoch": 0.4808311708319893, "grad_norm": 0.74609375, "learning_rate": 0.0001624137777427977, "loss": 0.8084, "step": 18726 }, { "epoch": 0.4808568480279111, "grad_norm": 0.7421875, "learning_rate": 0.00016241028973599145, "loss": 0.9528, "step": 18727 }, { "epoch": 0.48088252522383296, "grad_norm": 0.79296875, "learning_rate": 0.00016240680160480717, "loss": 0.8272, "step": 18728 }, { "epoch": 0.48090820241975474, "grad_norm": 0.74609375, "learning_rate": 0.00016240331334925175, "loss": 0.7391, "step": 18729 }, { "epoch": 0.4809338796156766, "grad_norm": 0.71484375, "learning_rate": 0.00016239982496933216, "loss": 0.8841, "step": 18730 }, { "epoch": 0.4809595568115984, "grad_norm": 0.765625, "learning_rate": 0.00016239633646505537, "loss": 0.889, "step": 18731 }, { "epoch": 0.4809852340075202, "grad_norm": 0.7734375, "learning_rate": 0.00016239284783642831, "loss": 0.9438, "step": 18732 }, { "epoch": 0.481010911203442, "grad_norm": 0.7734375, "learning_rate": 0.00016238935908345796, "loss": 0.9815, "step": 18733 }, { "epoch": 0.48103658839936386, "grad_norm": 0.75390625, "learning_rate": 0.00016238587020615125, "loss": 0.9039, "step": 18734 }, { "epoch": 0.48106226559528564, "grad_norm": 0.875, "learning_rate": 0.0001623823812045151, "loss": 0.8793, "step": 18735 }, { "epoch": 0.4810879427912075, "grad_norm": 0.73828125, "learning_rate": 0.00016237889207855653, "loss": 0.9141, "step": 18736 }, { "epoch": 0.4811136199871293, "grad_norm": 0.890625, "learning_rate": 0.00016237540282828245, "loss": 0.8933, "step": 18737 }, { "epoch": 0.48113929718305115, "grad_norm": 0.83203125, "learning_rate": 0.00016237191345369983, "loss": 1.0345, "step": 18738 }, { "epoch": 0.48116497437897293, "grad_norm": 0.828125, "learning_rate": 0.00016236842395481562, "loss": 0.899, "step": 18739 }, { "epoch": 0.48119065157489477, "grad_norm": 0.83984375, "learning_rate": 0.00016236493433163676, "loss": 1.0833, "step": 18740 }, { "epoch": 0.4812163287708166, "grad_norm": 0.8671875, "learning_rate": 0.00016236144458417026, "loss": 1.0066, "step": 18741 }, { "epoch": 0.4812420059667384, "grad_norm": 0.75, "learning_rate": 0.000162357954712423, "loss": 0.8869, "step": 18742 }, { "epoch": 0.4812676831626602, "grad_norm": 0.76171875, "learning_rate": 0.00016235446471640197, "loss": 0.9139, "step": 18743 }, { "epoch": 0.48129336035858206, "grad_norm": 0.87890625, "learning_rate": 0.00016235097459611411, "loss": 0.9836, "step": 18744 }, { "epoch": 0.48131903755450384, "grad_norm": 0.75, "learning_rate": 0.00016234748435156645, "loss": 0.7787, "step": 18745 }, { "epoch": 0.4813447147504257, "grad_norm": 0.8203125, "learning_rate": 0.00016234399398276585, "loss": 0.924, "step": 18746 }, { "epoch": 0.4813703919463475, "grad_norm": 0.734375, "learning_rate": 0.00016234050348971928, "loss": 0.8618, "step": 18747 }, { "epoch": 0.48139606914226934, "grad_norm": 0.73828125, "learning_rate": 0.00016233701287243375, "loss": 1.037, "step": 18748 }, { "epoch": 0.4814217463381911, "grad_norm": 0.7265625, "learning_rate": 0.00016233352213091616, "loss": 0.8167, "step": 18749 }, { "epoch": 0.48144742353411296, "grad_norm": 0.7578125, "learning_rate": 0.00016233003126517353, "loss": 0.8676, "step": 18750 }, { "epoch": 0.4814731007300348, "grad_norm": 0.78515625, "learning_rate": 0.00016232654027521272, "loss": 0.8998, "step": 18751 }, { "epoch": 0.4814987779259566, "grad_norm": 0.77734375, "learning_rate": 0.0001623230491610408, "loss": 0.8739, "step": 18752 }, { "epoch": 0.4815244551218784, "grad_norm": 0.74609375, "learning_rate": 0.00016231955792266465, "loss": 0.9403, "step": 18753 }, { "epoch": 0.48155013231780025, "grad_norm": 0.79296875, "learning_rate": 0.00016231606656009123, "loss": 0.8745, "step": 18754 }, { "epoch": 0.48157580951372203, "grad_norm": 0.7890625, "learning_rate": 0.00016231257507332757, "loss": 0.8282, "step": 18755 }, { "epoch": 0.48160148670964387, "grad_norm": 0.7890625, "learning_rate": 0.00016230908346238053, "loss": 0.9525, "step": 18756 }, { "epoch": 0.4816271639055657, "grad_norm": 0.81640625, "learning_rate": 0.00016230559172725713, "loss": 0.9383, "step": 18757 }, { "epoch": 0.48165284110148754, "grad_norm": 0.78515625, "learning_rate": 0.00016230209986796429, "loss": 0.9426, "step": 18758 }, { "epoch": 0.4816785182974093, "grad_norm": 0.78125, "learning_rate": 0.00016229860788450904, "loss": 0.9126, "step": 18759 }, { "epoch": 0.48170419549333116, "grad_norm": 0.734375, "learning_rate": 0.00016229511577689826, "loss": 0.8382, "step": 18760 }, { "epoch": 0.481729872689253, "grad_norm": 0.78515625, "learning_rate": 0.00016229162354513895, "loss": 0.8352, "step": 18761 }, { "epoch": 0.48175554988517477, "grad_norm": 0.71875, "learning_rate": 0.00016228813118923804, "loss": 0.861, "step": 18762 }, { "epoch": 0.4817812270810966, "grad_norm": 0.75, "learning_rate": 0.00016228463870920256, "loss": 0.8398, "step": 18763 }, { "epoch": 0.48180690427701844, "grad_norm": 0.76171875, "learning_rate": 0.00016228114610503938, "loss": 0.8099, "step": 18764 }, { "epoch": 0.4818325814729402, "grad_norm": 0.8203125, "learning_rate": 0.0001622776533767555, "loss": 1.0559, "step": 18765 }, { "epoch": 0.48185825866886206, "grad_norm": 0.80078125, "learning_rate": 0.0001622741605243579, "loss": 0.8951, "step": 18766 }, { "epoch": 0.4818839358647839, "grad_norm": 0.8203125, "learning_rate": 0.00016227066754785353, "loss": 0.8948, "step": 18767 }, { "epoch": 0.48190961306070573, "grad_norm": 0.765625, "learning_rate": 0.00016226717444724935, "loss": 0.9425, "step": 18768 }, { "epoch": 0.4819352902566275, "grad_norm": 0.75390625, "learning_rate": 0.0001622636812225523, "loss": 0.9405, "step": 18769 }, { "epoch": 0.48196096745254935, "grad_norm": 0.796875, "learning_rate": 0.00016226018787376934, "loss": 0.8811, "step": 18770 }, { "epoch": 0.4819866446484712, "grad_norm": 0.7890625, "learning_rate": 0.00016225669440090746, "loss": 0.9523, "step": 18771 }, { "epoch": 0.48201232184439297, "grad_norm": 0.76171875, "learning_rate": 0.0001622532008039736, "loss": 0.8881, "step": 18772 }, { "epoch": 0.4820379990403148, "grad_norm": 0.8046875, "learning_rate": 0.00016224970708297477, "loss": 0.8237, "step": 18773 }, { "epoch": 0.48206367623623664, "grad_norm": 0.75, "learning_rate": 0.0001622462132379179, "loss": 0.7616, "step": 18774 }, { "epoch": 0.4820893534321584, "grad_norm": 0.73046875, "learning_rate": 0.0001622427192688099, "loss": 0.8974, "step": 18775 }, { "epoch": 0.48211503062808025, "grad_norm": 0.7421875, "learning_rate": 0.00016223922517565783, "loss": 0.8209, "step": 18776 }, { "epoch": 0.4821407078240021, "grad_norm": 0.78125, "learning_rate": 0.00016223573095846858, "loss": 1.0561, "step": 18777 }, { "epoch": 0.4821663850199239, "grad_norm": 0.8125, "learning_rate": 0.00016223223661724915, "loss": 0.872, "step": 18778 }, { "epoch": 0.4821920622158457, "grad_norm": 0.81640625, "learning_rate": 0.0001622287421520065, "loss": 1.0209, "step": 18779 }, { "epoch": 0.48221773941176754, "grad_norm": 0.734375, "learning_rate": 0.00016222524756274756, "loss": 0.8857, "step": 18780 }, { "epoch": 0.4822434166076894, "grad_norm": 0.83203125, "learning_rate": 0.00016222175284947936, "loss": 0.8159, "step": 18781 }, { "epoch": 0.48226909380361116, "grad_norm": 0.76953125, "learning_rate": 0.0001622182580122088, "loss": 0.8489, "step": 18782 }, { "epoch": 0.482294770999533, "grad_norm": 0.6640625, "learning_rate": 0.00016221476305094287, "loss": 0.8031, "step": 18783 }, { "epoch": 0.48232044819545483, "grad_norm": 0.72265625, "learning_rate": 0.00016221126796568856, "loss": 0.83, "step": 18784 }, { "epoch": 0.4823461253913766, "grad_norm": 0.734375, "learning_rate": 0.00016220777275645284, "loss": 0.9219, "step": 18785 }, { "epoch": 0.48237180258729845, "grad_norm": 0.8515625, "learning_rate": 0.00016220427742324258, "loss": 0.9328, "step": 18786 }, { "epoch": 0.4823974797832203, "grad_norm": 0.83984375, "learning_rate": 0.00016220078196606483, "loss": 1.0077, "step": 18787 }, { "epoch": 0.4824231569791421, "grad_norm": 0.77734375, "learning_rate": 0.00016219728638492656, "loss": 0.8392, "step": 18788 }, { "epoch": 0.4824488341750639, "grad_norm": 0.79296875, "learning_rate": 0.0001621937906798347, "loss": 0.8055, "step": 18789 }, { "epoch": 0.48247451137098574, "grad_norm": 0.765625, "learning_rate": 0.00016219029485079625, "loss": 0.9155, "step": 18790 }, { "epoch": 0.4825001885669076, "grad_norm": 0.76953125, "learning_rate": 0.00016218679889781816, "loss": 1.0621, "step": 18791 }, { "epoch": 0.48252586576282935, "grad_norm": 0.8359375, "learning_rate": 0.0001621833028209074, "loss": 1.0242, "step": 18792 }, { "epoch": 0.4825515429587512, "grad_norm": 0.79296875, "learning_rate": 0.0001621798066200709, "loss": 0.9272, "step": 18793 }, { "epoch": 0.482577220154673, "grad_norm": 0.6953125, "learning_rate": 0.0001621763102953157, "loss": 0.8683, "step": 18794 }, { "epoch": 0.4826028973505948, "grad_norm": 0.8125, "learning_rate": 0.00016217281384664872, "loss": 0.8884, "step": 18795 }, { "epoch": 0.48262857454651664, "grad_norm": 0.7890625, "learning_rate": 0.00016216931727407695, "loss": 0.8543, "step": 18796 }, { "epoch": 0.4826542517424385, "grad_norm": 0.7421875, "learning_rate": 0.0001621658205776073, "loss": 1.0132, "step": 18797 }, { "epoch": 0.48267992893836026, "grad_norm": 0.8125, "learning_rate": 0.00016216232375724683, "loss": 0.9487, "step": 18798 }, { "epoch": 0.4827056061342821, "grad_norm": 0.81640625, "learning_rate": 0.00016215882681300245, "loss": 0.9403, "step": 18799 }, { "epoch": 0.48273128333020393, "grad_norm": 0.7265625, "learning_rate": 0.00016215532974488115, "loss": 0.89, "step": 18800 }, { "epoch": 0.48275696052612577, "grad_norm": 0.6796875, "learning_rate": 0.00016215183255288986, "loss": 0.8493, "step": 18801 }, { "epoch": 0.48278263772204755, "grad_norm": 0.76171875, "learning_rate": 0.00016214833523703562, "loss": 1.0209, "step": 18802 }, { "epoch": 0.4828083149179694, "grad_norm": 0.78515625, "learning_rate": 0.00016214483779732533, "loss": 0.8271, "step": 18803 }, { "epoch": 0.4828339921138912, "grad_norm": 0.76953125, "learning_rate": 0.00016214134023376602, "loss": 0.8131, "step": 18804 }, { "epoch": 0.482859669309813, "grad_norm": 0.734375, "learning_rate": 0.00016213784254636462, "loss": 0.8276, "step": 18805 }, { "epoch": 0.48288534650573484, "grad_norm": 0.8046875, "learning_rate": 0.00016213434473512812, "loss": 0.8834, "step": 18806 }, { "epoch": 0.48291102370165667, "grad_norm": 0.7578125, "learning_rate": 0.00016213084680006347, "loss": 0.9076, "step": 18807 }, { "epoch": 0.48293670089757845, "grad_norm": 0.7265625, "learning_rate": 0.00016212734874117767, "loss": 0.9025, "step": 18808 }, { "epoch": 0.4829623780935003, "grad_norm": 0.8359375, "learning_rate": 0.00016212385055847767, "loss": 0.8187, "step": 18809 }, { "epoch": 0.4829880552894221, "grad_norm": 0.73828125, "learning_rate": 0.00016212035225197043, "loss": 0.8498, "step": 18810 }, { "epoch": 0.48301373248534396, "grad_norm": 0.88671875, "learning_rate": 0.00016211685382166297, "loss": 0.8596, "step": 18811 }, { "epoch": 0.48303940968126574, "grad_norm": 0.76171875, "learning_rate": 0.00016211335526756222, "loss": 0.8626, "step": 18812 }, { "epoch": 0.4830650868771876, "grad_norm": 0.79296875, "learning_rate": 0.00016210985658967516, "loss": 0.9513, "step": 18813 }, { "epoch": 0.4830907640731094, "grad_norm": 0.7421875, "learning_rate": 0.00016210635778800877, "loss": 0.7822, "step": 18814 }, { "epoch": 0.4831164412690312, "grad_norm": 0.76953125, "learning_rate": 0.00016210285886257003, "loss": 0.9141, "step": 18815 }, { "epoch": 0.48314211846495303, "grad_norm": 0.83984375, "learning_rate": 0.00016209935981336586, "loss": 0.972, "step": 18816 }, { "epoch": 0.48316779566087487, "grad_norm": 0.75390625, "learning_rate": 0.00016209586064040333, "loss": 0.9362, "step": 18817 }, { "epoch": 0.48319347285679665, "grad_norm": 0.75390625, "learning_rate": 0.00016209236134368934, "loss": 0.9102, "step": 18818 }, { "epoch": 0.4832191500527185, "grad_norm": 0.83203125, "learning_rate": 0.00016208886192323085, "loss": 1.0498, "step": 18819 }, { "epoch": 0.4832448272486403, "grad_norm": 0.82421875, "learning_rate": 0.0001620853623790349, "loss": 0.9006, "step": 18820 }, { "epoch": 0.48327050444456215, "grad_norm": 0.78125, "learning_rate": 0.00016208186271110843, "loss": 1.0008, "step": 18821 }, { "epoch": 0.48329618164048394, "grad_norm": 0.8125, "learning_rate": 0.0001620783629194584, "loss": 0.9501, "step": 18822 }, { "epoch": 0.48332185883640577, "grad_norm": 0.71484375, "learning_rate": 0.00016207486300409178, "loss": 0.8328, "step": 18823 }, { "epoch": 0.4833475360323276, "grad_norm": 0.69140625, "learning_rate": 0.0001620713629650156, "loss": 0.9464, "step": 18824 }, { "epoch": 0.4833732132282494, "grad_norm": 0.828125, "learning_rate": 0.0001620678628022368, "loss": 0.9951, "step": 18825 }, { "epoch": 0.4833988904241712, "grad_norm": 0.74609375, "learning_rate": 0.00016206436251576235, "loss": 0.9887, "step": 18826 }, { "epoch": 0.48342456762009306, "grad_norm": 0.84375, "learning_rate": 0.00016206086210559923, "loss": 1.0115, "step": 18827 }, { "epoch": 0.48345024481601484, "grad_norm": 0.71875, "learning_rate": 0.00016205736157175443, "loss": 0.9834, "step": 18828 }, { "epoch": 0.4834759220119367, "grad_norm": 0.78515625, "learning_rate": 0.0001620538609142349, "loss": 0.8472, "step": 18829 }, { "epoch": 0.4835015992078585, "grad_norm": 0.75, "learning_rate": 0.0001620503601330476, "loss": 0.8186, "step": 18830 }, { "epoch": 0.48352727640378035, "grad_norm": 0.78515625, "learning_rate": 0.0001620468592281996, "loss": 0.9501, "step": 18831 }, { "epoch": 0.48355295359970213, "grad_norm": 0.76171875, "learning_rate": 0.00016204335819969777, "loss": 1.0183, "step": 18832 }, { "epoch": 0.48357863079562396, "grad_norm": 0.76953125, "learning_rate": 0.00016203985704754917, "loss": 0.8354, "step": 18833 }, { "epoch": 0.4836043079915458, "grad_norm": 0.71875, "learning_rate": 0.00016203635577176072, "loss": 0.8833, "step": 18834 }, { "epoch": 0.4836299851874676, "grad_norm": 0.84375, "learning_rate": 0.0001620328543723394, "loss": 0.9012, "step": 18835 }, { "epoch": 0.4836556623833894, "grad_norm": 0.7265625, "learning_rate": 0.00016202935284929221, "loss": 0.8775, "step": 18836 }, { "epoch": 0.48368133957931125, "grad_norm": 0.7890625, "learning_rate": 0.00016202585120262612, "loss": 0.9771, "step": 18837 }, { "epoch": 0.48370701677523303, "grad_norm": 0.765625, "learning_rate": 0.00016202234943234816, "loss": 0.9898, "step": 18838 }, { "epoch": 0.48373269397115487, "grad_norm": 0.7578125, "learning_rate": 0.00016201884753846518, "loss": 0.9059, "step": 18839 }, { "epoch": 0.4837583711670767, "grad_norm": 0.8125, "learning_rate": 0.0001620153455209843, "loss": 1.0341, "step": 18840 }, { "epoch": 0.48378404836299854, "grad_norm": 2.875, "learning_rate": 0.0001620118433799124, "loss": 0.8199, "step": 18841 }, { "epoch": 0.4838097255589203, "grad_norm": 0.8359375, "learning_rate": 0.0001620083411152565, "loss": 0.9497, "step": 18842 }, { "epoch": 0.48383540275484216, "grad_norm": 0.76171875, "learning_rate": 0.0001620048387270236, "loss": 0.9509, "step": 18843 }, { "epoch": 0.483861079950764, "grad_norm": 0.82421875, "learning_rate": 0.00016200133621522067, "loss": 0.9581, "step": 18844 }, { "epoch": 0.4838867571466858, "grad_norm": 0.7890625, "learning_rate": 0.00016199783357985465, "loss": 1.0322, "step": 18845 }, { "epoch": 0.4839124343426076, "grad_norm": 0.78515625, "learning_rate": 0.00016199433082093257, "loss": 0.8902, "step": 18846 }, { "epoch": 0.48393811153852945, "grad_norm": 0.75, "learning_rate": 0.00016199082793846137, "loss": 0.7742, "step": 18847 }, { "epoch": 0.48396378873445123, "grad_norm": 0.703125, "learning_rate": 0.00016198732493244805, "loss": 0.7989, "step": 18848 }, { "epoch": 0.48398946593037306, "grad_norm": 0.7265625, "learning_rate": 0.00016198382180289959, "loss": 0.8577, "step": 18849 }, { "epoch": 0.4840151431262949, "grad_norm": 0.77734375, "learning_rate": 0.00016198031854982298, "loss": 0.8654, "step": 18850 }, { "epoch": 0.48404082032221674, "grad_norm": 0.7890625, "learning_rate": 0.00016197681517322518, "loss": 0.8606, "step": 18851 }, { "epoch": 0.4840664975181385, "grad_norm": 0.734375, "learning_rate": 0.0001619733116731132, "loss": 0.9202, "step": 18852 }, { "epoch": 0.48409217471406035, "grad_norm": 0.7578125, "learning_rate": 0.00016196980804949402, "loss": 0.8777, "step": 18853 }, { "epoch": 0.4841178519099822, "grad_norm": 0.81640625, "learning_rate": 0.00016196630430237457, "loss": 0.9724, "step": 18854 }, { "epoch": 0.48414352910590397, "grad_norm": 0.71484375, "learning_rate": 0.00016196280043176192, "loss": 0.9412, "step": 18855 }, { "epoch": 0.4841692063018258, "grad_norm": 0.7421875, "learning_rate": 0.00016195929643766298, "loss": 0.7658, "step": 18856 }, { "epoch": 0.48419488349774764, "grad_norm": 0.859375, "learning_rate": 0.0001619557923200848, "loss": 0.9083, "step": 18857 }, { "epoch": 0.4842205606936694, "grad_norm": 0.7578125, "learning_rate": 0.00016195228807903424, "loss": 0.7915, "step": 18858 }, { "epoch": 0.48424623788959126, "grad_norm": 0.7890625, "learning_rate": 0.00016194878371451841, "loss": 1.0727, "step": 18859 }, { "epoch": 0.4842719150855131, "grad_norm": 1.1171875, "learning_rate": 0.00016194527922654427, "loss": 0.9109, "step": 18860 }, { "epoch": 0.48429759228143493, "grad_norm": 0.859375, "learning_rate": 0.00016194177461511874, "loss": 0.7787, "step": 18861 }, { "epoch": 0.4843232694773567, "grad_norm": 0.75390625, "learning_rate": 0.00016193826988024888, "loss": 0.8665, "step": 18862 }, { "epoch": 0.48434894667327855, "grad_norm": 0.76953125, "learning_rate": 0.00016193476502194163, "loss": 0.9508, "step": 18863 }, { "epoch": 0.4843746238692004, "grad_norm": 0.83984375, "learning_rate": 0.000161931260040204, "loss": 0.9347, "step": 18864 }, { "epoch": 0.48440030106512216, "grad_norm": 0.7578125, "learning_rate": 0.00016192775493504294, "loss": 0.9368, "step": 18865 }, { "epoch": 0.484425978261044, "grad_norm": 0.7109375, "learning_rate": 0.0001619242497064655, "loss": 0.8922, "step": 18866 }, { "epoch": 0.48445165545696584, "grad_norm": 0.75, "learning_rate": 0.00016192074435447858, "loss": 0.8237, "step": 18867 }, { "epoch": 0.4844773326528876, "grad_norm": 0.828125, "learning_rate": 0.00016191723887908923, "loss": 0.9911, "step": 18868 }, { "epoch": 0.48450300984880945, "grad_norm": 0.76171875, "learning_rate": 0.00016191373328030442, "loss": 0.9361, "step": 18869 }, { "epoch": 0.4845286870447313, "grad_norm": 0.7734375, "learning_rate": 0.00016191022755813114, "loss": 0.9967, "step": 18870 }, { "epoch": 0.4845543642406531, "grad_norm": 0.796875, "learning_rate": 0.00016190672171257636, "loss": 0.8448, "step": 18871 }, { "epoch": 0.4845800414365749, "grad_norm": 0.8046875, "learning_rate": 0.00016190321574364704, "loss": 0.8515, "step": 18872 }, { "epoch": 0.48460571863249674, "grad_norm": 0.75, "learning_rate": 0.00016189970965135023, "loss": 0.9004, "step": 18873 }, { "epoch": 0.4846313958284186, "grad_norm": 0.7734375, "learning_rate": 0.00016189620343569293, "loss": 0.9228, "step": 18874 }, { "epoch": 0.48465707302434036, "grad_norm": 0.7265625, "learning_rate": 0.00016189269709668202, "loss": 0.8517, "step": 18875 }, { "epoch": 0.4846827502202622, "grad_norm": 0.76171875, "learning_rate": 0.0001618891906343246, "loss": 0.8272, "step": 18876 }, { "epoch": 0.48470842741618403, "grad_norm": 0.796875, "learning_rate": 0.0001618856840486276, "loss": 0.975, "step": 18877 }, { "epoch": 0.4847341046121058, "grad_norm": 0.8203125, "learning_rate": 0.00016188217733959802, "loss": 0.8599, "step": 18878 }, { "epoch": 0.48475978180802765, "grad_norm": 0.6953125, "learning_rate": 0.00016187867050724287, "loss": 0.7769, "step": 18879 }, { "epoch": 0.4847854590039495, "grad_norm": 0.8046875, "learning_rate": 0.0001618751635515691, "loss": 0.9432, "step": 18880 }, { "epoch": 0.4848111361998713, "grad_norm": 0.76953125, "learning_rate": 0.00016187165647258369, "loss": 0.9424, "step": 18881 }, { "epoch": 0.4848368133957931, "grad_norm": 0.80078125, "learning_rate": 0.00016186814927029368, "loss": 0.7972, "step": 18882 }, { "epoch": 0.48486249059171493, "grad_norm": 0.78515625, "learning_rate": 0.00016186464194470606, "loss": 0.9941, "step": 18883 }, { "epoch": 0.48488816778763677, "grad_norm": 0.8203125, "learning_rate": 0.0001618611344958278, "loss": 0.8362, "step": 18884 }, { "epoch": 0.48491384498355855, "grad_norm": 0.86328125, "learning_rate": 0.00016185762692366585, "loss": 0.9435, "step": 18885 }, { "epoch": 0.4849395221794804, "grad_norm": 0.82421875, "learning_rate": 0.00016185411922822724, "loss": 0.9439, "step": 18886 }, { "epoch": 0.4849651993754022, "grad_norm": 0.7890625, "learning_rate": 0.000161850611409519, "loss": 0.9115, "step": 18887 }, { "epoch": 0.484990876571324, "grad_norm": 0.796875, "learning_rate": 0.00016184710346754804, "loss": 0.8569, "step": 18888 }, { "epoch": 0.48501655376724584, "grad_norm": 0.6953125, "learning_rate": 0.0001618435954023214, "loss": 0.8476, "step": 18889 }, { "epoch": 0.4850422309631677, "grad_norm": 0.80078125, "learning_rate": 0.00016184008721384607, "loss": 0.9976, "step": 18890 }, { "epoch": 0.4850679081590895, "grad_norm": 0.74609375, "learning_rate": 0.000161836578902129, "loss": 0.855, "step": 18891 }, { "epoch": 0.4850935853550113, "grad_norm": 0.84375, "learning_rate": 0.00016183307046717724, "loss": 0.8363, "step": 18892 }, { "epoch": 0.48511926255093313, "grad_norm": 0.75390625, "learning_rate": 0.00016182956190899776, "loss": 1.0017, "step": 18893 }, { "epoch": 0.48514493974685496, "grad_norm": 0.70703125, "learning_rate": 0.00016182605322759755, "loss": 0.7704, "step": 18894 }, { "epoch": 0.48517061694277674, "grad_norm": 0.78125, "learning_rate": 0.00016182254442298358, "loss": 0.8956, "step": 18895 }, { "epoch": 0.4851962941386986, "grad_norm": 0.8359375, "learning_rate": 0.00016181903549516288, "loss": 0.9022, "step": 18896 }, { "epoch": 0.4852219713346204, "grad_norm": 0.734375, "learning_rate": 0.00016181552644414242, "loss": 0.9071, "step": 18897 }, { "epoch": 0.4852476485305422, "grad_norm": 0.79296875, "learning_rate": 0.0001618120172699292, "loss": 0.9304, "step": 18898 }, { "epoch": 0.48527332572646403, "grad_norm": 0.76171875, "learning_rate": 0.00016180850797253022, "loss": 0.9089, "step": 18899 }, { "epoch": 0.48529900292238587, "grad_norm": 0.765625, "learning_rate": 0.00016180499855195245, "loss": 0.9891, "step": 18900 }, { "epoch": 0.4853246801183077, "grad_norm": 0.76171875, "learning_rate": 0.0001618014890082029, "loss": 1.0368, "step": 18901 }, { "epoch": 0.4853503573142295, "grad_norm": 0.71875, "learning_rate": 0.00016179797934128858, "loss": 0.931, "step": 18902 }, { "epoch": 0.4853760345101513, "grad_norm": 0.7734375, "learning_rate": 0.00016179446955121647, "loss": 0.8494, "step": 18903 }, { "epoch": 0.48540171170607316, "grad_norm": 0.80859375, "learning_rate": 0.0001617909596379936, "loss": 1.0426, "step": 18904 }, { "epoch": 0.48542738890199494, "grad_norm": 0.8671875, "learning_rate": 0.00016178744960162686, "loss": 0.9184, "step": 18905 }, { "epoch": 0.4854530660979168, "grad_norm": 0.76171875, "learning_rate": 0.00016178393944212336, "loss": 0.8114, "step": 18906 }, { "epoch": 0.4854787432938386, "grad_norm": 0.71875, "learning_rate": 0.00016178042915949002, "loss": 0.8274, "step": 18907 }, { "epoch": 0.4855044204897604, "grad_norm": 0.796875, "learning_rate": 0.00016177691875373388, "loss": 0.8855, "step": 18908 }, { "epoch": 0.4855300976856822, "grad_norm": 0.88671875, "learning_rate": 0.0001617734082248619, "loss": 0.8909, "step": 18909 }, { "epoch": 0.48555577488160406, "grad_norm": 0.78125, "learning_rate": 0.00016176989757288115, "loss": 0.9525, "step": 18910 }, { "epoch": 0.4855814520775259, "grad_norm": 0.76171875, "learning_rate": 0.00016176638679779853, "loss": 0.6793, "step": 18911 }, { "epoch": 0.4856071292734477, "grad_norm": 0.7890625, "learning_rate": 0.00016176287589962106, "loss": 0.8944, "step": 18912 }, { "epoch": 0.4856328064693695, "grad_norm": 0.890625, "learning_rate": 0.0001617593648783558, "loss": 0.8193, "step": 18913 }, { "epoch": 0.48565848366529135, "grad_norm": 0.828125, "learning_rate": 0.0001617558537340097, "loss": 0.8645, "step": 18914 }, { "epoch": 0.48568416086121313, "grad_norm": 0.8046875, "learning_rate": 0.00016175234246658973, "loss": 0.9014, "step": 18915 }, { "epoch": 0.48570983805713497, "grad_norm": 0.796875, "learning_rate": 0.00016174883107610298, "loss": 0.9175, "step": 18916 }, { "epoch": 0.4857355152530568, "grad_norm": 0.80078125, "learning_rate": 0.00016174531956255635, "loss": 0.9745, "step": 18917 }, { "epoch": 0.4857611924489786, "grad_norm": 0.8125, "learning_rate": 0.00016174180792595687, "loss": 0.8731, "step": 18918 }, { "epoch": 0.4857868696449004, "grad_norm": 0.8984375, "learning_rate": 0.00016173829616631156, "loss": 0.9309, "step": 18919 }, { "epoch": 0.48581254684082226, "grad_norm": 0.8046875, "learning_rate": 0.00016173478428362737, "loss": 0.9915, "step": 18920 }, { "epoch": 0.4858382240367441, "grad_norm": 0.78515625, "learning_rate": 0.00016173127227791137, "loss": 0.84, "step": 18921 }, { "epoch": 0.4858639012326659, "grad_norm": 0.73828125, "learning_rate": 0.00016172776014917052, "loss": 0.8434, "step": 18922 }, { "epoch": 0.4858895784285877, "grad_norm": 0.78125, "learning_rate": 0.0001617242478974118, "loss": 0.8372, "step": 18923 }, { "epoch": 0.48591525562450955, "grad_norm": 0.73828125, "learning_rate": 0.0001617207355226422, "loss": 0.9462, "step": 18924 }, { "epoch": 0.4859409328204313, "grad_norm": 0.83203125, "learning_rate": 0.00016171722302486883, "loss": 1.0483, "step": 18925 }, { "epoch": 0.48596661001635316, "grad_norm": 0.72265625, "learning_rate": 0.00016171371040409852, "loss": 0.7946, "step": 18926 }, { "epoch": 0.485992287212275, "grad_norm": 0.8125, "learning_rate": 0.00016171019766033842, "loss": 0.734, "step": 18927 }, { "epoch": 0.4860179644081968, "grad_norm": 0.7890625, "learning_rate": 0.00016170668479359546, "loss": 0.9586, "step": 18928 }, { "epoch": 0.4860436416041186, "grad_norm": 0.8984375, "learning_rate": 0.00016170317180387665, "loss": 1.0004, "step": 18929 }, { "epoch": 0.48606931880004045, "grad_norm": 0.80859375, "learning_rate": 0.00016169965869118898, "loss": 1.0196, "step": 18930 }, { "epoch": 0.4860949959959623, "grad_norm": 0.78125, "learning_rate": 0.00016169614545553945, "loss": 0.8728, "step": 18931 }, { "epoch": 0.48612067319188407, "grad_norm": 0.87109375, "learning_rate": 0.0001616926320969351, "loss": 0.8595, "step": 18932 }, { "epoch": 0.4861463503878059, "grad_norm": 0.8125, "learning_rate": 0.0001616891186153829, "loss": 0.9376, "step": 18933 }, { "epoch": 0.48617202758372774, "grad_norm": 0.79296875, "learning_rate": 0.00016168560501088987, "loss": 0.9488, "step": 18934 }, { "epoch": 0.4861977047796495, "grad_norm": 0.84765625, "learning_rate": 0.00016168209128346298, "loss": 0.9679, "step": 18935 }, { "epoch": 0.48622338197557136, "grad_norm": 0.703125, "learning_rate": 0.00016167857743310928, "loss": 0.8232, "step": 18936 }, { "epoch": 0.4862490591714932, "grad_norm": 0.82421875, "learning_rate": 0.00016167506345983573, "loss": 0.8618, "step": 18937 }, { "epoch": 0.486274736367415, "grad_norm": 0.89453125, "learning_rate": 0.00016167154936364936, "loss": 0.8839, "step": 18938 }, { "epoch": 0.4863004135633368, "grad_norm": 0.72265625, "learning_rate": 0.00016166803514455714, "loss": 0.8185, "step": 18939 }, { "epoch": 0.48632609075925864, "grad_norm": 0.72265625, "learning_rate": 0.00016166452080256614, "loss": 0.8693, "step": 18940 }, { "epoch": 0.4863517679551805, "grad_norm": 0.765625, "learning_rate": 0.0001616610063376833, "loss": 0.8739, "step": 18941 }, { "epoch": 0.48637744515110226, "grad_norm": 0.8203125, "learning_rate": 0.00016165749174991564, "loss": 1.0246, "step": 18942 }, { "epoch": 0.4864031223470241, "grad_norm": 0.71484375, "learning_rate": 0.00016165397703927017, "loss": 0.8221, "step": 18943 }, { "epoch": 0.48642879954294593, "grad_norm": 0.796875, "learning_rate": 0.00016165046220575387, "loss": 0.8178, "step": 18944 }, { "epoch": 0.4864544767388677, "grad_norm": 0.8671875, "learning_rate": 0.0001616469472493738, "loss": 1.0122, "step": 18945 }, { "epoch": 0.48648015393478955, "grad_norm": 0.71875, "learning_rate": 0.00016164343217013691, "loss": 0.8282, "step": 18946 }, { "epoch": 0.4865058311307114, "grad_norm": 0.75390625, "learning_rate": 0.0001616399169680502, "loss": 0.8478, "step": 18947 }, { "epoch": 0.48653150832663317, "grad_norm": 0.7109375, "learning_rate": 0.00016163640164312075, "loss": 0.8884, "step": 18948 }, { "epoch": 0.486557185522555, "grad_norm": 0.734375, "learning_rate": 0.00016163288619535553, "loss": 0.9424, "step": 18949 }, { "epoch": 0.48658286271847684, "grad_norm": 0.84375, "learning_rate": 0.0001616293706247615, "loss": 0.9335, "step": 18950 }, { "epoch": 0.4866085399143987, "grad_norm": 0.73828125, "learning_rate": 0.00016162585493134573, "loss": 0.8965, "step": 18951 }, { "epoch": 0.48663421711032046, "grad_norm": 0.77734375, "learning_rate": 0.00016162233911511519, "loss": 0.8261, "step": 18952 }, { "epoch": 0.4866598943062423, "grad_norm": 0.796875, "learning_rate": 0.00016161882317607687, "loss": 0.9863, "step": 18953 }, { "epoch": 0.4866855715021641, "grad_norm": 0.78515625, "learning_rate": 0.0001616153071142378, "loss": 0.9123, "step": 18954 }, { "epoch": 0.4867112486980859, "grad_norm": 0.71484375, "learning_rate": 0.00016161179092960502, "loss": 0.7399, "step": 18955 }, { "epoch": 0.48673692589400774, "grad_norm": 0.796875, "learning_rate": 0.0001616082746221855, "loss": 0.8195, "step": 18956 }, { "epoch": 0.4867626030899296, "grad_norm": 0.8125, "learning_rate": 0.00016160475819198625, "loss": 0.8499, "step": 18957 }, { "epoch": 0.48678828028585136, "grad_norm": 0.875, "learning_rate": 0.00016160124163901429, "loss": 0.9595, "step": 18958 }, { "epoch": 0.4868139574817732, "grad_norm": 0.78125, "learning_rate": 0.0001615977249632766, "loss": 0.8185, "step": 18959 }, { "epoch": 0.48683963467769503, "grad_norm": 0.75, "learning_rate": 0.00016159420816478018, "loss": 0.95, "step": 18960 }, { "epoch": 0.48686531187361687, "grad_norm": 0.84375, "learning_rate": 0.00016159069124353212, "loss": 0.8789, "step": 18961 }, { "epoch": 0.48689098906953865, "grad_norm": 0.76953125, "learning_rate": 0.00016158717419953935, "loss": 1.0124, "step": 18962 }, { "epoch": 0.4869166662654605, "grad_norm": 0.6953125, "learning_rate": 0.00016158365703280894, "loss": 0.7205, "step": 18963 }, { "epoch": 0.4869423434613823, "grad_norm": 0.71484375, "learning_rate": 0.00016158013974334784, "loss": 0.8781, "step": 18964 }, { "epoch": 0.4869680206573041, "grad_norm": 0.765625, "learning_rate": 0.00016157662233116306, "loss": 0.9608, "step": 18965 }, { "epoch": 0.48699369785322594, "grad_norm": 0.85546875, "learning_rate": 0.00016157310479626165, "loss": 0.9751, "step": 18966 }, { "epoch": 0.4870193750491478, "grad_norm": 0.7734375, "learning_rate": 0.00016156958713865063, "loss": 0.8846, "step": 18967 }, { "epoch": 0.48704505224506955, "grad_norm": 0.90625, "learning_rate": 0.00016156606935833695, "loss": 0.9204, "step": 18968 }, { "epoch": 0.4870707294409914, "grad_norm": 0.75390625, "learning_rate": 0.0001615625514553277, "loss": 0.9022, "step": 18969 }, { "epoch": 0.4870964066369132, "grad_norm": 0.92578125, "learning_rate": 0.00016155903342962978, "loss": 0.9487, "step": 18970 }, { "epoch": 0.48712208383283506, "grad_norm": 0.71484375, "learning_rate": 0.00016155551528125033, "loss": 0.8033, "step": 18971 }, { "epoch": 0.48714776102875684, "grad_norm": 0.734375, "learning_rate": 0.00016155199701019628, "loss": 0.8575, "step": 18972 }, { "epoch": 0.4871734382246787, "grad_norm": 0.8125, "learning_rate": 0.00016154847861647465, "loss": 0.9225, "step": 18973 }, { "epoch": 0.4871991154206005, "grad_norm": 1.9375, "learning_rate": 0.00016154496010009246, "loss": 0.8502, "step": 18974 }, { "epoch": 0.4872247926165223, "grad_norm": 0.7265625, "learning_rate": 0.00016154144146105672, "loss": 0.9138, "step": 18975 }, { "epoch": 0.48725046981244413, "grad_norm": 0.984375, "learning_rate": 0.00016153792269937446, "loss": 0.8553, "step": 18976 }, { "epoch": 0.48727614700836597, "grad_norm": 0.79296875, "learning_rate": 0.00016153440381505267, "loss": 0.9385, "step": 18977 }, { "epoch": 0.48730182420428775, "grad_norm": 0.8203125, "learning_rate": 0.00016153088480809835, "loss": 0.8839, "step": 18978 }, { "epoch": 0.4873275014002096, "grad_norm": 0.796875, "learning_rate": 0.00016152736567851857, "loss": 1.0908, "step": 18979 }, { "epoch": 0.4873531785961314, "grad_norm": 0.8125, "learning_rate": 0.0001615238464263203, "loss": 0.89, "step": 18980 }, { "epoch": 0.48737885579205326, "grad_norm": 0.72265625, "learning_rate": 0.00016152032705151053, "loss": 0.9489, "step": 18981 }, { "epoch": 0.48740453298797504, "grad_norm": 0.83984375, "learning_rate": 0.00016151680755409635, "loss": 0.9952, "step": 18982 }, { "epoch": 0.4874302101838969, "grad_norm": 0.8359375, "learning_rate": 0.00016151328793408473, "loss": 0.9048, "step": 18983 }, { "epoch": 0.4874558873798187, "grad_norm": 0.78515625, "learning_rate": 0.00016150976819148265, "loss": 0.9806, "step": 18984 }, { "epoch": 0.4874815645757405, "grad_norm": 0.78515625, "learning_rate": 0.00016150624832629717, "loss": 0.8669, "step": 18985 }, { "epoch": 0.4875072417716623, "grad_norm": 0.734375, "learning_rate": 0.00016150272833853532, "loss": 0.861, "step": 18986 }, { "epoch": 0.48753291896758416, "grad_norm": 0.734375, "learning_rate": 0.00016149920822820403, "loss": 0.8931, "step": 18987 }, { "epoch": 0.48755859616350594, "grad_norm": 0.7734375, "learning_rate": 0.0001614956879953104, "loss": 0.9611, "step": 18988 }, { "epoch": 0.4875842733594278, "grad_norm": 0.6875, "learning_rate": 0.00016149216763986147, "loss": 0.7637, "step": 18989 }, { "epoch": 0.4876099505553496, "grad_norm": 0.8046875, "learning_rate": 0.00016148864716186413, "loss": 0.8779, "step": 18990 }, { "epoch": 0.48763562775127145, "grad_norm": 0.75390625, "learning_rate": 0.00016148512656132552, "loss": 0.8364, "step": 18991 }, { "epoch": 0.48766130494719323, "grad_norm": 0.71875, "learning_rate": 0.00016148160583825258, "loss": 0.935, "step": 18992 }, { "epoch": 0.48768698214311507, "grad_norm": 0.73828125, "learning_rate": 0.00016147808499265235, "loss": 0.855, "step": 18993 }, { "epoch": 0.4877126593390369, "grad_norm": 0.7578125, "learning_rate": 0.00016147456402453188, "loss": 0.8538, "step": 18994 }, { "epoch": 0.4877383365349587, "grad_norm": 0.7890625, "learning_rate": 0.00016147104293389814, "loss": 0.8945, "step": 18995 }, { "epoch": 0.4877640137308805, "grad_norm": 0.765625, "learning_rate": 0.00016146752172075814, "loss": 1.0018, "step": 18996 }, { "epoch": 0.48778969092680236, "grad_norm": 0.8125, "learning_rate": 0.00016146400038511896, "loss": 0.9479, "step": 18997 }, { "epoch": 0.48781536812272414, "grad_norm": 0.78515625, "learning_rate": 0.00016146047892698758, "loss": 0.9195, "step": 18998 }, { "epoch": 0.48784104531864597, "grad_norm": 0.765625, "learning_rate": 0.000161456957346371, "loss": 0.9381, "step": 18999 }, { "epoch": 0.4878667225145678, "grad_norm": 0.8046875, "learning_rate": 0.00016145343564327624, "loss": 0.8493, "step": 19000 }, { "epoch": 0.4878667225145678, "eval_loss": 0.8943595886230469, "eval_model_preparation_time": 0.0065, "eval_runtime": 406.8839, "eval_samples_per_second": 24.577, "eval_steps_per_second": 0.769, "step": 19000 }, { "epoch": 0.48789239971048964, "grad_norm": 0.6953125, "learning_rate": 0.00016144991381771036, "loss": 0.9363, "step": 19001 }, { "epoch": 0.4879180769064114, "grad_norm": 0.734375, "learning_rate": 0.00016144639186968033, "loss": 0.9164, "step": 19002 }, { "epoch": 0.48794375410233326, "grad_norm": 0.75, "learning_rate": 0.0001614428697991932, "loss": 1.0026, "step": 19003 }, { "epoch": 0.4879694312982551, "grad_norm": 0.80859375, "learning_rate": 0.000161439347606256, "loss": 0.8822, "step": 19004 }, { "epoch": 0.4879951084941769, "grad_norm": 0.73046875, "learning_rate": 0.00016143582529087568, "loss": 0.8658, "step": 19005 }, { "epoch": 0.4880207856900987, "grad_norm": 0.80859375, "learning_rate": 0.00016143230285305937, "loss": 0.931, "step": 19006 }, { "epoch": 0.48804646288602055, "grad_norm": 0.80859375, "learning_rate": 0.00016142878029281398, "loss": 0.8566, "step": 19007 }, { "epoch": 0.48807214008194233, "grad_norm": 0.80859375, "learning_rate": 0.00016142525761014663, "loss": 0.8301, "step": 19008 }, { "epoch": 0.48809781727786417, "grad_norm": 0.81640625, "learning_rate": 0.00016142173480506426, "loss": 0.8058, "step": 19009 }, { "epoch": 0.488123494473786, "grad_norm": 0.7578125, "learning_rate": 0.00016141821187757392, "loss": 0.9344, "step": 19010 }, { "epoch": 0.4881491716697078, "grad_norm": 0.83984375, "learning_rate": 0.00016141468882768263, "loss": 0.9833, "step": 19011 }, { "epoch": 0.4881748488656296, "grad_norm": 0.78125, "learning_rate": 0.00016141116565539742, "loss": 0.9849, "step": 19012 }, { "epoch": 0.48820052606155145, "grad_norm": 0.875, "learning_rate": 0.0001614076423607253, "loss": 0.8869, "step": 19013 }, { "epoch": 0.4882262032574733, "grad_norm": 0.74609375, "learning_rate": 0.0001614041189436733, "loss": 0.8002, "step": 19014 }, { "epoch": 0.48825188045339507, "grad_norm": 0.78515625, "learning_rate": 0.00016140059540424844, "loss": 0.924, "step": 19015 }, { "epoch": 0.4882775576493169, "grad_norm": 0.8828125, "learning_rate": 0.00016139707174245773, "loss": 0.9283, "step": 19016 }, { "epoch": 0.48830323484523874, "grad_norm": 0.89453125, "learning_rate": 0.0001613935479583082, "loss": 0.9046, "step": 19017 }, { "epoch": 0.4883289120411605, "grad_norm": 0.74609375, "learning_rate": 0.00016139002405180688, "loss": 0.9676, "step": 19018 }, { "epoch": 0.48835458923708236, "grad_norm": 0.703125, "learning_rate": 0.0001613865000229608, "loss": 0.9597, "step": 19019 }, { "epoch": 0.4883802664330042, "grad_norm": 0.86328125, "learning_rate": 0.00016138297587177695, "loss": 0.9514, "step": 19020 }, { "epoch": 0.488405943628926, "grad_norm": 0.7109375, "learning_rate": 0.00016137945159826238, "loss": 0.7943, "step": 19021 }, { "epoch": 0.4884316208248478, "grad_norm": 0.8515625, "learning_rate": 0.0001613759272024241, "loss": 0.9851, "step": 19022 }, { "epoch": 0.48845729802076965, "grad_norm": 0.78125, "learning_rate": 0.00016137240268426916, "loss": 0.7961, "step": 19023 }, { "epoch": 0.4884829752166915, "grad_norm": 0.83984375, "learning_rate": 0.00016136887804380455, "loss": 0.8939, "step": 19024 }, { "epoch": 0.48850865241261326, "grad_norm": 0.84375, "learning_rate": 0.00016136535328103733, "loss": 0.9056, "step": 19025 }, { "epoch": 0.4885343296085351, "grad_norm": 0.72265625, "learning_rate": 0.00016136182839597449, "loss": 0.8975, "step": 19026 }, { "epoch": 0.48856000680445694, "grad_norm": 0.75390625, "learning_rate": 0.00016135830338862304, "loss": 0.9481, "step": 19027 }, { "epoch": 0.4885856840003787, "grad_norm": 0.765625, "learning_rate": 0.0001613547782589901, "loss": 0.804, "step": 19028 }, { "epoch": 0.48861136119630055, "grad_norm": 0.7578125, "learning_rate": 0.00016135125300708258, "loss": 0.8632, "step": 19029 }, { "epoch": 0.4886370383922224, "grad_norm": 0.76171875, "learning_rate": 0.00016134772763290757, "loss": 1.0169, "step": 19030 }, { "epoch": 0.48866271558814417, "grad_norm": 0.78515625, "learning_rate": 0.0001613442021364721, "loss": 0.8166, "step": 19031 }, { "epoch": 0.488688392784066, "grad_norm": 0.8046875, "learning_rate": 0.00016134067651778314, "loss": 0.9337, "step": 19032 }, { "epoch": 0.48871406997998784, "grad_norm": 0.828125, "learning_rate": 0.00016133715077684774, "loss": 0.9033, "step": 19033 }, { "epoch": 0.4887397471759097, "grad_norm": 0.79296875, "learning_rate": 0.000161333624913673, "loss": 0.8995, "step": 19034 }, { "epoch": 0.48876542437183146, "grad_norm": 0.76171875, "learning_rate": 0.00016133009892826584, "loss": 0.8608, "step": 19035 }, { "epoch": 0.4887911015677533, "grad_norm": 0.7109375, "learning_rate": 0.00016132657282063331, "loss": 0.9215, "step": 19036 }, { "epoch": 0.48881677876367513, "grad_norm": 0.7265625, "learning_rate": 0.0001613230465907825, "loss": 0.9282, "step": 19037 }, { "epoch": 0.4888424559595969, "grad_norm": 0.77734375, "learning_rate": 0.00016131952023872042, "loss": 0.8451, "step": 19038 }, { "epoch": 0.48886813315551875, "grad_norm": 0.8046875, "learning_rate": 0.00016131599376445404, "loss": 0.9142, "step": 19039 }, { "epoch": 0.4888938103514406, "grad_norm": 0.8828125, "learning_rate": 0.00016131246716799043, "loss": 1.0614, "step": 19040 }, { "epoch": 0.48891948754736236, "grad_norm": 0.71484375, "learning_rate": 0.0001613089404493366, "loss": 0.8266, "step": 19041 }, { "epoch": 0.4889451647432842, "grad_norm": 0.81640625, "learning_rate": 0.0001613054136084996, "loss": 0.9253, "step": 19042 }, { "epoch": 0.48897084193920604, "grad_norm": 0.73828125, "learning_rate": 0.00016130188664548648, "loss": 0.9402, "step": 19043 }, { "epoch": 0.48899651913512787, "grad_norm": 0.83203125, "learning_rate": 0.00016129835956030418, "loss": 0.8737, "step": 19044 }, { "epoch": 0.48902219633104965, "grad_norm": 0.80078125, "learning_rate": 0.0001612948323529598, "loss": 0.9015, "step": 19045 }, { "epoch": 0.4890478735269715, "grad_norm": 0.6796875, "learning_rate": 0.00016129130502346039, "loss": 0.8699, "step": 19046 }, { "epoch": 0.4890735507228933, "grad_norm": 0.76171875, "learning_rate": 0.0001612877775718129, "loss": 0.8377, "step": 19047 }, { "epoch": 0.4890992279188151, "grad_norm": 0.72265625, "learning_rate": 0.00016128424999802446, "loss": 0.8247, "step": 19048 }, { "epoch": 0.48912490511473694, "grad_norm": 0.765625, "learning_rate": 0.000161280722302102, "loss": 0.8437, "step": 19049 }, { "epoch": 0.4891505823106588, "grad_norm": 0.75390625, "learning_rate": 0.0001612771944840526, "loss": 0.8957, "step": 19050 }, { "epoch": 0.48917625950658056, "grad_norm": 0.734375, "learning_rate": 0.0001612736665438833, "loss": 0.7136, "step": 19051 }, { "epoch": 0.4892019367025024, "grad_norm": 0.71875, "learning_rate": 0.0001612701384816011, "loss": 0.9939, "step": 19052 }, { "epoch": 0.48922761389842423, "grad_norm": 0.8046875, "learning_rate": 0.00016126661029721307, "loss": 1.0073, "step": 19053 }, { "epoch": 0.48925329109434607, "grad_norm": 0.8671875, "learning_rate": 0.00016126308199072622, "loss": 0.9689, "step": 19054 }, { "epoch": 0.48927896829026785, "grad_norm": 0.70703125, "learning_rate": 0.00016125955356214759, "loss": 0.7575, "step": 19055 }, { "epoch": 0.4893046454861897, "grad_norm": 0.74609375, "learning_rate": 0.00016125602501148418, "loss": 0.8696, "step": 19056 }, { "epoch": 0.4893303226821115, "grad_norm": 0.75, "learning_rate": 0.00016125249633874305, "loss": 0.8346, "step": 19057 }, { "epoch": 0.4893559998780333, "grad_norm": 0.7734375, "learning_rate": 0.00016124896754393122, "loss": 0.9052, "step": 19058 }, { "epoch": 0.48938167707395513, "grad_norm": 0.765625, "learning_rate": 0.00016124543862705576, "loss": 0.8285, "step": 19059 }, { "epoch": 0.48940735426987697, "grad_norm": 0.80078125, "learning_rate": 0.00016124190958812364, "loss": 0.8786, "step": 19060 }, { "epoch": 0.48943303146579875, "grad_norm": 0.81640625, "learning_rate": 0.00016123838042714195, "loss": 0.9528, "step": 19061 }, { "epoch": 0.4894587086617206, "grad_norm": 0.77734375, "learning_rate": 0.0001612348511441177, "loss": 0.9475, "step": 19062 }, { "epoch": 0.4894843858576424, "grad_norm": 0.8984375, "learning_rate": 0.00016123132173905788, "loss": 1.1434, "step": 19063 }, { "epoch": 0.48951006305356426, "grad_norm": 0.7734375, "learning_rate": 0.0001612277922119696, "loss": 0.9164, "step": 19064 }, { "epoch": 0.48953574024948604, "grad_norm": 0.75390625, "learning_rate": 0.00016122426256285987, "loss": 0.8073, "step": 19065 }, { "epoch": 0.4895614174454079, "grad_norm": 0.78515625, "learning_rate": 0.00016122073279173572, "loss": 0.82, "step": 19066 }, { "epoch": 0.4895870946413297, "grad_norm": 0.75390625, "learning_rate": 0.00016121720289860413, "loss": 0.7861, "step": 19067 }, { "epoch": 0.4896127718372515, "grad_norm": 0.7421875, "learning_rate": 0.00016121367288347225, "loss": 0.883, "step": 19068 }, { "epoch": 0.48963844903317333, "grad_norm": 0.72265625, "learning_rate": 0.000161210142746347, "loss": 0.8144, "step": 19069 }, { "epoch": 0.48966412622909516, "grad_norm": 0.84375, "learning_rate": 0.0001612066124872355, "loss": 0.8836, "step": 19070 }, { "epoch": 0.48968980342501695, "grad_norm": 0.92578125, "learning_rate": 0.00016120308210614473, "loss": 1.0358, "step": 19071 }, { "epoch": 0.4897154806209388, "grad_norm": 0.8046875, "learning_rate": 0.00016119955160308176, "loss": 0.8675, "step": 19072 }, { "epoch": 0.4897411578168606, "grad_norm": 0.7578125, "learning_rate": 0.00016119602097805358, "loss": 0.8926, "step": 19073 }, { "epoch": 0.48976683501278245, "grad_norm": 0.734375, "learning_rate": 0.0001611924902310673, "loss": 1.0126, "step": 19074 }, { "epoch": 0.48979251220870423, "grad_norm": 0.74609375, "learning_rate": 0.0001611889593621299, "loss": 0.8397, "step": 19075 }, { "epoch": 0.48981818940462607, "grad_norm": 0.71484375, "learning_rate": 0.00016118542837124838, "loss": 0.8149, "step": 19076 }, { "epoch": 0.4898438666005479, "grad_norm": 0.73046875, "learning_rate": 0.0001611818972584299, "loss": 0.7798, "step": 19077 }, { "epoch": 0.4898695437964697, "grad_norm": 0.8125, "learning_rate": 0.00016117836602368138, "loss": 0.8722, "step": 19078 }, { "epoch": 0.4898952209923915, "grad_norm": 0.75, "learning_rate": 0.00016117483466700995, "loss": 0.905, "step": 19079 }, { "epoch": 0.48992089818831336, "grad_norm": 0.76171875, "learning_rate": 0.00016117130318842255, "loss": 0.785, "step": 19080 }, { "epoch": 0.48994657538423514, "grad_norm": 0.74609375, "learning_rate": 0.0001611677715879263, "loss": 0.861, "step": 19081 }, { "epoch": 0.489972252580157, "grad_norm": 0.671875, "learning_rate": 0.0001611642398655282, "loss": 0.821, "step": 19082 }, { "epoch": 0.4899979297760788, "grad_norm": 0.7734375, "learning_rate": 0.0001611607080212353, "loss": 1.0065, "step": 19083 }, { "epoch": 0.49002360697200065, "grad_norm": 0.77734375, "learning_rate": 0.0001611571760550546, "loss": 0.7901, "step": 19084 }, { "epoch": 0.4900492841679224, "grad_norm": 0.74609375, "learning_rate": 0.0001611536439669932, "loss": 0.9098, "step": 19085 }, { "epoch": 0.49007496136384426, "grad_norm": 0.78125, "learning_rate": 0.0001611501117570581, "loss": 0.9229, "step": 19086 }, { "epoch": 0.4901006385597661, "grad_norm": 0.78125, "learning_rate": 0.00016114657942525637, "loss": 0.9275, "step": 19087 }, { "epoch": 0.4901263157556879, "grad_norm": 0.7578125, "learning_rate": 0.000161143046971595, "loss": 0.8608, "step": 19088 }, { "epoch": 0.4901519929516097, "grad_norm": 0.7421875, "learning_rate": 0.0001611395143960811, "loss": 0.8876, "step": 19089 }, { "epoch": 0.49017767014753155, "grad_norm": 0.8515625, "learning_rate": 0.00016113598169872163, "loss": 0.8984, "step": 19090 }, { "epoch": 0.49020334734345333, "grad_norm": 0.7578125, "learning_rate": 0.0001611324488795237, "loss": 0.8122, "step": 19091 }, { "epoch": 0.49022902453937517, "grad_norm": 0.8203125, "learning_rate": 0.00016112891593849434, "loss": 0.9075, "step": 19092 }, { "epoch": 0.490254701735297, "grad_norm": 0.828125, "learning_rate": 0.0001611253828756405, "loss": 0.9233, "step": 19093 }, { "epoch": 0.49028037893121884, "grad_norm": 0.74609375, "learning_rate": 0.00016112184969096938, "loss": 0.849, "step": 19094 }, { "epoch": 0.4903060561271406, "grad_norm": 0.8125, "learning_rate": 0.0001611183163844879, "loss": 0.9578, "step": 19095 }, { "epoch": 0.49033173332306246, "grad_norm": 0.7421875, "learning_rate": 0.00016111478295620312, "loss": 0.8548, "step": 19096 }, { "epoch": 0.4903574105189843, "grad_norm": 0.79296875, "learning_rate": 0.0001611112494061221, "loss": 0.9358, "step": 19097 }, { "epoch": 0.4903830877149061, "grad_norm": 0.73828125, "learning_rate": 0.00016110771573425188, "loss": 0.7448, "step": 19098 }, { "epoch": 0.4904087649108279, "grad_norm": 0.7734375, "learning_rate": 0.00016110418194059952, "loss": 0.905, "step": 19099 }, { "epoch": 0.49043444210674975, "grad_norm": 0.79296875, "learning_rate": 0.00016110064802517203, "loss": 0.8925, "step": 19100 }, { "epoch": 0.4904601193026715, "grad_norm": 0.78125, "learning_rate": 0.0001610971139879765, "loss": 0.9705, "step": 19101 }, { "epoch": 0.49048579649859336, "grad_norm": 0.7890625, "learning_rate": 0.00016109357982901988, "loss": 1.0066, "step": 19102 }, { "epoch": 0.4905114736945152, "grad_norm": 0.828125, "learning_rate": 0.0001610900455483093, "loss": 0.9011, "step": 19103 }, { "epoch": 0.49053715089043703, "grad_norm": 0.875, "learning_rate": 0.0001610865111458518, "loss": 1.0112, "step": 19104 }, { "epoch": 0.4905628280863588, "grad_norm": 0.7578125, "learning_rate": 0.0001610829766216544, "loss": 0.88, "step": 19105 }, { "epoch": 0.49058850528228065, "grad_norm": 0.8359375, "learning_rate": 0.00016107944197572412, "loss": 0.8308, "step": 19106 }, { "epoch": 0.4906141824782025, "grad_norm": 0.72265625, "learning_rate": 0.00016107590720806805, "loss": 0.9452, "step": 19107 }, { "epoch": 0.49063985967412427, "grad_norm": 0.75, "learning_rate": 0.0001610723723186932, "loss": 0.8497, "step": 19108 }, { "epoch": 0.4906655368700461, "grad_norm": 0.7109375, "learning_rate": 0.00016106883730760663, "loss": 0.846, "step": 19109 }, { "epoch": 0.49069121406596794, "grad_norm": 0.7890625, "learning_rate": 0.00016106530217481537, "loss": 0.9919, "step": 19110 }, { "epoch": 0.4907168912618897, "grad_norm": 0.734375, "learning_rate": 0.0001610617669203265, "loss": 0.9411, "step": 19111 }, { "epoch": 0.49074256845781156, "grad_norm": 0.73828125, "learning_rate": 0.00016105823154414703, "loss": 0.8106, "step": 19112 }, { "epoch": 0.4907682456537334, "grad_norm": 0.7421875, "learning_rate": 0.00016105469604628403, "loss": 0.8333, "step": 19113 }, { "epoch": 0.49079392284965523, "grad_norm": 0.7578125, "learning_rate": 0.00016105116042674456, "loss": 0.8255, "step": 19114 }, { "epoch": 0.490819600045577, "grad_norm": 0.81640625, "learning_rate": 0.0001610476246855356, "loss": 0.9038, "step": 19115 }, { "epoch": 0.49084527724149885, "grad_norm": 0.73828125, "learning_rate": 0.00016104408882266426, "loss": 0.8793, "step": 19116 }, { "epoch": 0.4908709544374207, "grad_norm": 0.765625, "learning_rate": 0.00016104055283813756, "loss": 0.9826, "step": 19117 }, { "epoch": 0.49089663163334246, "grad_norm": 0.75, "learning_rate": 0.00016103701673196255, "loss": 0.9763, "step": 19118 }, { "epoch": 0.4909223088292643, "grad_norm": 0.7734375, "learning_rate": 0.00016103348050414628, "loss": 0.9707, "step": 19119 }, { "epoch": 0.49094798602518613, "grad_norm": 0.77734375, "learning_rate": 0.0001610299441546958, "loss": 0.9127, "step": 19120 }, { "epoch": 0.4909736632211079, "grad_norm": 0.80078125, "learning_rate": 0.00016102640768361816, "loss": 0.801, "step": 19121 }, { "epoch": 0.49099934041702975, "grad_norm": 0.76171875, "learning_rate": 0.00016102287109092037, "loss": 0.942, "step": 19122 }, { "epoch": 0.4910250176129516, "grad_norm": 0.796875, "learning_rate": 0.00016101933437660956, "loss": 0.8386, "step": 19123 }, { "epoch": 0.4910506948088734, "grad_norm": 0.72265625, "learning_rate": 0.0001610157975406927, "loss": 0.8198, "step": 19124 }, { "epoch": 0.4910763720047952, "grad_norm": 0.765625, "learning_rate": 0.00016101226058317687, "loss": 0.8754, "step": 19125 }, { "epoch": 0.49110204920071704, "grad_norm": 0.78515625, "learning_rate": 0.0001610087235040691, "loss": 0.8261, "step": 19126 }, { "epoch": 0.4911277263966389, "grad_norm": 0.75, "learning_rate": 0.0001610051863033765, "loss": 0.9022, "step": 19127 }, { "epoch": 0.49115340359256066, "grad_norm": 0.80859375, "learning_rate": 0.00016100164898110602, "loss": 0.828, "step": 19128 }, { "epoch": 0.4911790807884825, "grad_norm": 0.7734375, "learning_rate": 0.00016099811153726478, "loss": 0.8084, "step": 19129 }, { "epoch": 0.4912047579844043, "grad_norm": 0.8203125, "learning_rate": 0.00016099457397185984, "loss": 0.8798, "step": 19130 }, { "epoch": 0.4912304351803261, "grad_norm": 0.85546875, "learning_rate": 0.0001609910362848982, "loss": 0.9399, "step": 19131 }, { "epoch": 0.49125611237624794, "grad_norm": 0.79296875, "learning_rate": 0.00016098749847638695, "loss": 0.8532, "step": 19132 }, { "epoch": 0.4912817895721698, "grad_norm": 0.85546875, "learning_rate": 0.0001609839605463331, "loss": 0.9515, "step": 19133 }, { "epoch": 0.4913074667680916, "grad_norm": 0.7421875, "learning_rate": 0.00016098042249474377, "loss": 0.7709, "step": 19134 }, { "epoch": 0.4913331439640134, "grad_norm": 0.75390625, "learning_rate": 0.00016097688432162595, "loss": 0.9226, "step": 19135 }, { "epoch": 0.49135882115993523, "grad_norm": 0.87109375, "learning_rate": 0.00016097334602698665, "loss": 0.8569, "step": 19136 }, { "epoch": 0.49138449835585707, "grad_norm": 0.81640625, "learning_rate": 0.00016096980761083304, "loss": 0.997, "step": 19137 }, { "epoch": 0.49141017555177885, "grad_norm": 0.77734375, "learning_rate": 0.0001609662690731721, "loss": 0.8688, "step": 19138 }, { "epoch": 0.4914358527477007, "grad_norm": 0.81640625, "learning_rate": 0.00016096273041401089, "loss": 0.7622, "step": 19139 }, { "epoch": 0.4914615299436225, "grad_norm": 0.81640625, "learning_rate": 0.00016095919163335647, "loss": 0.9683, "step": 19140 }, { "epoch": 0.4914872071395443, "grad_norm": 0.85546875, "learning_rate": 0.00016095565273121587, "loss": 0.84, "step": 19141 }, { "epoch": 0.49151288433546614, "grad_norm": 0.7421875, "learning_rate": 0.00016095211370759614, "loss": 0.8619, "step": 19142 }, { "epoch": 0.491538561531388, "grad_norm": 0.7734375, "learning_rate": 0.0001609485745625044, "loss": 0.8242, "step": 19143 }, { "epoch": 0.4915642387273098, "grad_norm": 0.8046875, "learning_rate": 0.00016094503529594763, "loss": 0.9884, "step": 19144 }, { "epoch": 0.4915899159232316, "grad_norm": 0.73828125, "learning_rate": 0.0001609414959079329, "loss": 0.8562, "step": 19145 }, { "epoch": 0.4916155931191534, "grad_norm": 0.8203125, "learning_rate": 0.0001609379563984673, "loss": 0.8315, "step": 19146 }, { "epoch": 0.49164127031507526, "grad_norm": 0.71484375, "learning_rate": 0.00016093441676755788, "loss": 0.9272, "step": 19147 }, { "epoch": 0.49166694751099704, "grad_norm": 0.73046875, "learning_rate": 0.00016093087701521164, "loss": 0.905, "step": 19148 }, { "epoch": 0.4916926247069189, "grad_norm": 0.79296875, "learning_rate": 0.00016092733714143567, "loss": 0.964, "step": 19149 }, { "epoch": 0.4917183019028407, "grad_norm": 1.1171875, "learning_rate": 0.000160923797146237, "loss": 0.7351, "step": 19150 }, { "epoch": 0.4917439790987625, "grad_norm": 0.8125, "learning_rate": 0.00016092025702962273, "loss": 0.8629, "step": 19151 }, { "epoch": 0.49176965629468433, "grad_norm": 0.63671875, "learning_rate": 0.0001609167167915999, "loss": 0.7137, "step": 19152 }, { "epoch": 0.49179533349060617, "grad_norm": 0.82421875, "learning_rate": 0.00016091317643217553, "loss": 1.1063, "step": 19153 }, { "epoch": 0.491821010686528, "grad_norm": 0.7578125, "learning_rate": 0.00016090963595135672, "loss": 0.8836, "step": 19154 }, { "epoch": 0.4918466878824498, "grad_norm": 0.78125, "learning_rate": 0.0001609060953491505, "loss": 0.8521, "step": 19155 }, { "epoch": 0.4918723650783716, "grad_norm": 0.76953125, "learning_rate": 0.0001609025546255639, "loss": 0.8074, "step": 19156 }, { "epoch": 0.49189804227429346, "grad_norm": 0.7890625, "learning_rate": 0.00016089901378060406, "loss": 0.9321, "step": 19157 }, { "epoch": 0.49192371947021524, "grad_norm": 0.734375, "learning_rate": 0.00016089547281427797, "loss": 0.8996, "step": 19158 }, { "epoch": 0.4919493966661371, "grad_norm": 0.92578125, "learning_rate": 0.0001608919317265927, "loss": 0.8614, "step": 19159 }, { "epoch": 0.4919750738620589, "grad_norm": 0.76953125, "learning_rate": 0.00016088839051755534, "loss": 1.0034, "step": 19160 }, { "epoch": 0.4920007510579807, "grad_norm": 0.70703125, "learning_rate": 0.00016088484918717286, "loss": 0.8586, "step": 19161 }, { "epoch": 0.4920264282539025, "grad_norm": 0.8046875, "learning_rate": 0.0001608813077354524, "loss": 0.9322, "step": 19162 }, { "epoch": 0.49205210544982436, "grad_norm": 0.7734375, "learning_rate": 0.00016087776616240102, "loss": 0.8825, "step": 19163 }, { "epoch": 0.4920777826457462, "grad_norm": 0.80078125, "learning_rate": 0.00016087422446802571, "loss": 0.8344, "step": 19164 }, { "epoch": 0.492103459841668, "grad_norm": 0.80859375, "learning_rate": 0.00016087068265233358, "loss": 0.9292, "step": 19165 }, { "epoch": 0.4921291370375898, "grad_norm": 0.75, "learning_rate": 0.0001608671407153317, "loss": 0.9999, "step": 19166 }, { "epoch": 0.49215481423351165, "grad_norm": 0.81640625, "learning_rate": 0.0001608635986570271, "loss": 0.8774, "step": 19167 }, { "epoch": 0.49218049142943343, "grad_norm": 0.796875, "learning_rate": 0.00016086005647742682, "loss": 0.9044, "step": 19168 }, { "epoch": 0.49220616862535527, "grad_norm": 0.76171875, "learning_rate": 0.00016085651417653798, "loss": 0.8831, "step": 19169 }, { "epoch": 0.4922318458212771, "grad_norm": 0.81640625, "learning_rate": 0.0001608529717543676, "loss": 0.9588, "step": 19170 }, { "epoch": 0.4922575230171989, "grad_norm": 0.80078125, "learning_rate": 0.0001608494292109227, "loss": 0.9217, "step": 19171 }, { "epoch": 0.4922832002131207, "grad_norm": 0.828125, "learning_rate": 0.00016084588654621045, "loss": 0.8489, "step": 19172 }, { "epoch": 0.49230887740904256, "grad_norm": 0.703125, "learning_rate": 0.00016084234376023782, "loss": 0.8199, "step": 19173 }, { "epoch": 0.4923345546049644, "grad_norm": 0.73046875, "learning_rate": 0.00016083880085301187, "loss": 0.8642, "step": 19174 }, { "epoch": 0.49236023180088617, "grad_norm": 0.80078125, "learning_rate": 0.0001608352578245397, "loss": 0.9824, "step": 19175 }, { "epoch": 0.492385908996808, "grad_norm": 0.80078125, "learning_rate": 0.00016083171467482836, "loss": 0.9208, "step": 19176 }, { "epoch": 0.49241158619272984, "grad_norm": 0.7578125, "learning_rate": 0.00016082817140388489, "loss": 0.838, "step": 19177 }, { "epoch": 0.4924372633886516, "grad_norm": 0.80078125, "learning_rate": 0.0001608246280117164, "loss": 0.8504, "step": 19178 }, { "epoch": 0.49246294058457346, "grad_norm": 0.76953125, "learning_rate": 0.00016082108449832992, "loss": 0.8886, "step": 19179 }, { "epoch": 0.4924886177804953, "grad_norm": 0.79296875, "learning_rate": 0.0001608175408637325, "loss": 0.975, "step": 19180 }, { "epoch": 0.4925142949764171, "grad_norm": 0.77734375, "learning_rate": 0.00016081399710793118, "loss": 1.0011, "step": 19181 }, { "epoch": 0.4925399721723389, "grad_norm": 0.75390625, "learning_rate": 0.0001608104532309331, "loss": 0.975, "step": 19182 }, { "epoch": 0.49256564936826075, "grad_norm": 0.8046875, "learning_rate": 0.0001608069092327453, "loss": 0.9459, "step": 19183 }, { "epoch": 0.4925913265641826, "grad_norm": 0.79296875, "learning_rate": 0.00016080336511337478, "loss": 0.8202, "step": 19184 }, { "epoch": 0.49261700376010437, "grad_norm": 0.7734375, "learning_rate": 0.00016079982087282863, "loss": 0.7942, "step": 19185 }, { "epoch": 0.4926426809560262, "grad_norm": 0.796875, "learning_rate": 0.00016079627651111394, "loss": 1.0512, "step": 19186 }, { "epoch": 0.49266835815194804, "grad_norm": 0.77734375, "learning_rate": 0.00016079273202823782, "loss": 0.8241, "step": 19187 }, { "epoch": 0.4926940353478698, "grad_norm": 0.7421875, "learning_rate": 0.0001607891874242072, "loss": 0.9126, "step": 19188 }, { "epoch": 0.49271971254379165, "grad_norm": 0.796875, "learning_rate": 0.00016078564269902926, "loss": 0.9143, "step": 19189 }, { "epoch": 0.4927453897397135, "grad_norm": 0.73828125, "learning_rate": 0.00016078209785271103, "loss": 0.871, "step": 19190 }, { "epoch": 0.49277106693563527, "grad_norm": 0.84765625, "learning_rate": 0.00016077855288525956, "loss": 1.0322, "step": 19191 }, { "epoch": 0.4927967441315571, "grad_norm": 0.80859375, "learning_rate": 0.0001607750077966819, "loss": 0.9064, "step": 19192 }, { "epoch": 0.49282242132747894, "grad_norm": 0.78515625, "learning_rate": 0.00016077146258698522, "loss": 0.8823, "step": 19193 }, { "epoch": 0.4928480985234008, "grad_norm": 0.70703125, "learning_rate": 0.00016076791725617642, "loss": 0.8994, "step": 19194 }, { "epoch": 0.49287377571932256, "grad_norm": 0.80859375, "learning_rate": 0.00016076437180426268, "loss": 0.9049, "step": 19195 }, { "epoch": 0.4928994529152444, "grad_norm": 0.81640625, "learning_rate": 0.00016076082623125103, "loss": 1.0274, "step": 19196 }, { "epoch": 0.49292513011116623, "grad_norm": 0.76171875, "learning_rate": 0.00016075728053714853, "loss": 0.8927, "step": 19197 }, { "epoch": 0.492950807307088, "grad_norm": 0.8046875, "learning_rate": 0.00016075373472196228, "loss": 1.0416, "step": 19198 }, { "epoch": 0.49297648450300985, "grad_norm": 0.90625, "learning_rate": 0.00016075018878569934, "loss": 0.8566, "step": 19199 }, { "epoch": 0.4930021616989317, "grad_norm": 0.8046875, "learning_rate": 0.00016074664272836673, "loss": 0.8779, "step": 19200 }, { "epoch": 0.49302783889485347, "grad_norm": 0.7890625, "learning_rate": 0.00016074309654997158, "loss": 0.895, "step": 19201 }, { "epoch": 0.4930535160907753, "grad_norm": 0.765625, "learning_rate": 0.00016073955025052087, "loss": 0.9032, "step": 19202 }, { "epoch": 0.49307919328669714, "grad_norm": 0.71484375, "learning_rate": 0.00016073600383002178, "loss": 0.8899, "step": 19203 }, { "epoch": 0.493104870482619, "grad_norm": 0.77734375, "learning_rate": 0.00016073245728848127, "loss": 0.7969, "step": 19204 }, { "epoch": 0.49313054767854075, "grad_norm": 0.73046875, "learning_rate": 0.0001607289106259065, "loss": 1.0596, "step": 19205 }, { "epoch": 0.4931562248744626, "grad_norm": 0.796875, "learning_rate": 0.00016072536384230447, "loss": 0.9947, "step": 19206 }, { "epoch": 0.4931819020703844, "grad_norm": 0.7421875, "learning_rate": 0.00016072181693768228, "loss": 0.8086, "step": 19207 }, { "epoch": 0.4932075792663062, "grad_norm": 0.7578125, "learning_rate": 0.00016071826991204705, "loss": 0.7742, "step": 19208 }, { "epoch": 0.49323325646222804, "grad_norm": 0.8046875, "learning_rate": 0.00016071472276540572, "loss": 0.8326, "step": 19209 }, { "epoch": 0.4932589336581499, "grad_norm": 0.80859375, "learning_rate": 0.00016071117549776545, "loss": 0.8844, "step": 19210 }, { "epoch": 0.49328461085407166, "grad_norm": 0.76953125, "learning_rate": 0.00016070762810913329, "loss": 0.8263, "step": 19211 }, { "epoch": 0.4933102880499935, "grad_norm": 0.83203125, "learning_rate": 0.00016070408059951633, "loss": 0.8346, "step": 19212 }, { "epoch": 0.49333596524591533, "grad_norm": 0.74609375, "learning_rate": 0.0001607005329689216, "loss": 0.8459, "step": 19213 }, { "epoch": 0.4933616424418371, "grad_norm": 0.75390625, "learning_rate": 0.00016069698521735618, "loss": 0.9051, "step": 19214 }, { "epoch": 0.49338731963775895, "grad_norm": 0.74609375, "learning_rate": 0.00016069343734482718, "loss": 0.9269, "step": 19215 }, { "epoch": 0.4934129968336808, "grad_norm": 0.76171875, "learning_rate": 0.00016068988935134164, "loss": 0.9501, "step": 19216 }, { "epoch": 0.4934386740296026, "grad_norm": 0.75390625, "learning_rate": 0.00016068634123690662, "loss": 0.8032, "step": 19217 }, { "epoch": 0.4934643512255244, "grad_norm": 0.74609375, "learning_rate": 0.0001606827930015292, "loss": 0.9353, "step": 19218 }, { "epoch": 0.49349002842144624, "grad_norm": 0.76953125, "learning_rate": 0.0001606792446452165, "loss": 1.0264, "step": 19219 }, { "epoch": 0.4935157056173681, "grad_norm": 0.77734375, "learning_rate": 0.0001606756961679755, "loss": 0.9292, "step": 19220 }, { "epoch": 0.49354138281328985, "grad_norm": 0.71875, "learning_rate": 0.00016067214756981333, "loss": 1.0448, "step": 19221 }, { "epoch": 0.4935670600092117, "grad_norm": 0.87109375, "learning_rate": 0.00016066859885073705, "loss": 0.9063, "step": 19222 }, { "epoch": 0.4935927372051335, "grad_norm": 0.7421875, "learning_rate": 0.00016066505001075372, "loss": 0.8816, "step": 19223 }, { "epoch": 0.4936184144010553, "grad_norm": 0.796875, "learning_rate": 0.00016066150104987044, "loss": 0.9522, "step": 19224 }, { "epoch": 0.49364409159697714, "grad_norm": 0.77734375, "learning_rate": 0.00016065795196809426, "loss": 0.8807, "step": 19225 }, { "epoch": 0.493669768792899, "grad_norm": 0.8046875, "learning_rate": 0.0001606544027654323, "loss": 0.914, "step": 19226 }, { "epoch": 0.4936954459888208, "grad_norm": 0.7109375, "learning_rate": 0.0001606508534418915, "loss": 0.9278, "step": 19227 }, { "epoch": 0.4937211231847426, "grad_norm": 0.75, "learning_rate": 0.00016064730399747914, "loss": 0.8284, "step": 19228 }, { "epoch": 0.49374680038066443, "grad_norm": 0.80078125, "learning_rate": 0.0001606437544322021, "loss": 0.934, "step": 19229 }, { "epoch": 0.49377247757658627, "grad_norm": 0.72265625, "learning_rate": 0.00016064020474606755, "loss": 0.8704, "step": 19230 }, { "epoch": 0.49379815477250805, "grad_norm": 0.72265625, "learning_rate": 0.00016063665493908257, "loss": 0.8921, "step": 19231 }, { "epoch": 0.4938238319684299, "grad_norm": 0.78125, "learning_rate": 0.00016063310501125423, "loss": 0.7808, "step": 19232 }, { "epoch": 0.4938495091643517, "grad_norm": 0.7421875, "learning_rate": 0.00016062955496258958, "loss": 0.8527, "step": 19233 }, { "epoch": 0.4938751863602735, "grad_norm": 0.75390625, "learning_rate": 0.0001606260047930957, "loss": 0.7969, "step": 19234 }, { "epoch": 0.49390086355619534, "grad_norm": 0.75390625, "learning_rate": 0.00016062245450277964, "loss": 1.0068, "step": 19235 }, { "epoch": 0.49392654075211717, "grad_norm": 0.7265625, "learning_rate": 0.0001606189040916485, "loss": 0.8592, "step": 19236 }, { "epoch": 0.493952217948039, "grad_norm": 0.78515625, "learning_rate": 0.0001606153535597094, "loss": 0.9091, "step": 19237 }, { "epoch": 0.4939778951439608, "grad_norm": 0.71875, "learning_rate": 0.00016061180290696933, "loss": 0.8641, "step": 19238 }, { "epoch": 0.4940035723398826, "grad_norm": 0.74609375, "learning_rate": 0.00016060825213343546, "loss": 0.8798, "step": 19239 }, { "epoch": 0.49402924953580446, "grad_norm": 0.78125, "learning_rate": 0.00016060470123911483, "loss": 0.9374, "step": 19240 }, { "epoch": 0.49405492673172624, "grad_norm": 0.734375, "learning_rate": 0.00016060115022401445, "loss": 0.8083, "step": 19241 }, { "epoch": 0.4940806039276481, "grad_norm": 0.78125, "learning_rate": 0.00016059759908814146, "loss": 0.8981, "step": 19242 }, { "epoch": 0.4941062811235699, "grad_norm": 0.8046875, "learning_rate": 0.00016059404783150296, "loss": 0.8361, "step": 19243 }, { "epoch": 0.4941319583194917, "grad_norm": 0.92578125, "learning_rate": 0.00016059049645410597, "loss": 1.0318, "step": 19244 }, { "epoch": 0.49415763551541353, "grad_norm": 0.74609375, "learning_rate": 0.0001605869449559576, "loss": 0.8518, "step": 19245 }, { "epoch": 0.49418331271133537, "grad_norm": 0.85546875, "learning_rate": 0.00016058339333706494, "loss": 1.0988, "step": 19246 }, { "epoch": 0.4942089899072572, "grad_norm": 0.75, "learning_rate": 0.00016057984159743502, "loss": 0.9117, "step": 19247 }, { "epoch": 0.494234667103179, "grad_norm": 0.890625, "learning_rate": 0.00016057628973707496, "loss": 0.8916, "step": 19248 }, { "epoch": 0.4942603442991008, "grad_norm": 0.85546875, "learning_rate": 0.00016057273775599184, "loss": 0.9275, "step": 19249 }, { "epoch": 0.49428602149502265, "grad_norm": 0.86328125, "learning_rate": 0.0001605691856541927, "loss": 0.9643, "step": 19250 }, { "epoch": 0.49431169869094443, "grad_norm": 0.82421875, "learning_rate": 0.0001605656334316847, "loss": 0.8456, "step": 19251 }, { "epoch": 0.49433737588686627, "grad_norm": 0.82421875, "learning_rate": 0.0001605620810884748, "loss": 0.8586, "step": 19252 }, { "epoch": 0.4943630530827881, "grad_norm": 0.7890625, "learning_rate": 0.00016055852862457016, "loss": 1.0622, "step": 19253 }, { "epoch": 0.4943887302787099, "grad_norm": 0.86328125, "learning_rate": 0.00016055497603997788, "loss": 0.986, "step": 19254 }, { "epoch": 0.4944144074746317, "grad_norm": 0.69140625, "learning_rate": 0.00016055142333470495, "loss": 0.7737, "step": 19255 }, { "epoch": 0.49444008467055356, "grad_norm": 0.78125, "learning_rate": 0.0001605478705087585, "loss": 0.7916, "step": 19256 }, { "epoch": 0.4944657618664754, "grad_norm": 0.77734375, "learning_rate": 0.00016054431756214565, "loss": 0.9557, "step": 19257 }, { "epoch": 0.4944914390623972, "grad_norm": 0.86328125, "learning_rate": 0.00016054076449487344, "loss": 0.8078, "step": 19258 }, { "epoch": 0.494517116258319, "grad_norm": 0.77734375, "learning_rate": 0.00016053721130694895, "loss": 0.8613, "step": 19259 }, { "epoch": 0.49454279345424085, "grad_norm": 0.796875, "learning_rate": 0.00016053365799837926, "loss": 0.9121, "step": 19260 }, { "epoch": 0.49456847065016263, "grad_norm": 0.8125, "learning_rate": 0.00016053010456917145, "loss": 0.9121, "step": 19261 }, { "epoch": 0.49459414784608446, "grad_norm": 0.75, "learning_rate": 0.0001605265510193326, "loss": 1.0557, "step": 19262 }, { "epoch": 0.4946198250420063, "grad_norm": 0.79296875, "learning_rate": 0.00016052299734886982, "loss": 0.6789, "step": 19263 }, { "epoch": 0.4946455022379281, "grad_norm": 0.83984375, "learning_rate": 0.0001605194435577902, "loss": 0.9249, "step": 19264 }, { "epoch": 0.4946711794338499, "grad_norm": 0.83203125, "learning_rate": 0.00016051588964610076, "loss": 0.9806, "step": 19265 }, { "epoch": 0.49469685662977175, "grad_norm": 0.7265625, "learning_rate": 0.00016051233561380863, "loss": 0.8169, "step": 19266 }, { "epoch": 0.4947225338256936, "grad_norm": 0.75, "learning_rate": 0.00016050878146092086, "loss": 0.912, "step": 19267 }, { "epoch": 0.49474821102161537, "grad_norm": 0.75, "learning_rate": 0.00016050522718744458, "loss": 1.0044, "step": 19268 }, { "epoch": 0.4947738882175372, "grad_norm": 0.80859375, "learning_rate": 0.00016050167279338683, "loss": 0.8968, "step": 19269 }, { "epoch": 0.49479956541345904, "grad_norm": 0.78515625, "learning_rate": 0.00016049811827875473, "loss": 0.9878, "step": 19270 }, { "epoch": 0.4948252426093808, "grad_norm": 0.76171875, "learning_rate": 0.00016049456364355534, "loss": 0.8709, "step": 19271 }, { "epoch": 0.49485091980530266, "grad_norm": 0.71875, "learning_rate": 0.00016049100888779574, "loss": 0.8782, "step": 19272 }, { "epoch": 0.4948765970012245, "grad_norm": 0.7578125, "learning_rate": 0.00016048745401148304, "loss": 1.1569, "step": 19273 }, { "epoch": 0.4949022741971463, "grad_norm": 0.84375, "learning_rate": 0.00016048389901462428, "loss": 0.8734, "step": 19274 }, { "epoch": 0.4949279513930681, "grad_norm": 0.765625, "learning_rate": 0.00016048034389722662, "loss": 1.0323, "step": 19275 }, { "epoch": 0.49495362858898995, "grad_norm": 0.7578125, "learning_rate": 0.00016047678865929705, "loss": 1.024, "step": 19276 }, { "epoch": 0.4949793057849118, "grad_norm": 0.7421875, "learning_rate": 0.0001604732333008427, "loss": 0.8084, "step": 19277 }, { "epoch": 0.49500498298083356, "grad_norm": 0.7265625, "learning_rate": 0.0001604696778218707, "loss": 0.9287, "step": 19278 }, { "epoch": 0.4950306601767554, "grad_norm": 0.89453125, "learning_rate": 0.00016046612222238809, "loss": 0.8271, "step": 19279 }, { "epoch": 0.49505633737267724, "grad_norm": 0.70703125, "learning_rate": 0.00016046256650240195, "loss": 0.7907, "step": 19280 }, { "epoch": 0.495082014568599, "grad_norm": 0.7890625, "learning_rate": 0.00016045901066191935, "loss": 0.8461, "step": 19281 }, { "epoch": 0.49510769176452085, "grad_norm": 0.71875, "learning_rate": 0.0001604554547009474, "loss": 0.8153, "step": 19282 }, { "epoch": 0.4951333689604427, "grad_norm": 0.7578125, "learning_rate": 0.0001604518986194932, "loss": 0.8726, "step": 19283 }, { "epoch": 0.49515904615636447, "grad_norm": 0.76171875, "learning_rate": 0.0001604483424175639, "loss": 0.8469, "step": 19284 }, { "epoch": 0.4951847233522863, "grad_norm": 0.7734375, "learning_rate": 0.00016044478609516643, "loss": 0.8621, "step": 19285 }, { "epoch": 0.49521040054820814, "grad_norm": 0.8125, "learning_rate": 0.00016044122965230798, "loss": 0.8025, "step": 19286 }, { "epoch": 0.49523607774413, "grad_norm": 0.7578125, "learning_rate": 0.00016043767308899563, "loss": 0.8802, "step": 19287 }, { "epoch": 0.49526175494005176, "grad_norm": 0.77734375, "learning_rate": 0.00016043411640523646, "loss": 0.9515, "step": 19288 }, { "epoch": 0.4952874321359736, "grad_norm": 0.7578125, "learning_rate": 0.00016043055960103753, "loss": 0.7889, "step": 19289 }, { "epoch": 0.49531310933189543, "grad_norm": 0.76171875, "learning_rate": 0.00016042700267640596, "loss": 0.9596, "step": 19290 }, { "epoch": 0.4953387865278172, "grad_norm": 0.83203125, "learning_rate": 0.00016042344563134886, "loss": 0.8715, "step": 19291 }, { "epoch": 0.49536446372373905, "grad_norm": 0.83203125, "learning_rate": 0.00016041988846587327, "loss": 0.7875, "step": 19292 }, { "epoch": 0.4953901409196609, "grad_norm": 0.73828125, "learning_rate": 0.00016041633117998632, "loss": 0.8509, "step": 19293 }, { "epoch": 0.49541581811558266, "grad_norm": 0.796875, "learning_rate": 0.00016041277377369504, "loss": 0.9065, "step": 19294 }, { "epoch": 0.4954414953115045, "grad_norm": 0.75390625, "learning_rate": 0.00016040921624700655, "loss": 0.8098, "step": 19295 }, { "epoch": 0.49546717250742633, "grad_norm": 0.8046875, "learning_rate": 0.000160405658599928, "loss": 0.9903, "step": 19296 }, { "epoch": 0.49549284970334817, "grad_norm": 0.765625, "learning_rate": 0.0001604021008324664, "loss": 0.8774, "step": 19297 }, { "epoch": 0.49551852689926995, "grad_norm": 0.83984375, "learning_rate": 0.00016039854294462892, "loss": 0.8713, "step": 19298 }, { "epoch": 0.4955442040951918, "grad_norm": 0.71484375, "learning_rate": 0.00016039498493642252, "loss": 0.7492, "step": 19299 }, { "epoch": 0.4955698812911136, "grad_norm": 0.7890625, "learning_rate": 0.00016039142680785443, "loss": 0.8659, "step": 19300 }, { "epoch": 0.4955955584870354, "grad_norm": 0.78125, "learning_rate": 0.00016038786855893165, "loss": 0.942, "step": 19301 }, { "epoch": 0.49562123568295724, "grad_norm": 0.72265625, "learning_rate": 0.0001603843101896613, "loss": 0.7701, "step": 19302 }, { "epoch": 0.4956469128788791, "grad_norm": 0.765625, "learning_rate": 0.00016038075170005046, "loss": 0.8561, "step": 19303 }, { "epoch": 0.49567259007480086, "grad_norm": 0.78515625, "learning_rate": 0.00016037719309010628, "loss": 0.9052, "step": 19304 }, { "epoch": 0.4956982672707227, "grad_norm": 0.76171875, "learning_rate": 0.0001603736343598358, "loss": 0.9032, "step": 19305 }, { "epoch": 0.49572394446664453, "grad_norm": 0.73046875, "learning_rate": 0.0001603700755092461, "loss": 1.0151, "step": 19306 }, { "epoch": 0.49574962166256636, "grad_norm": 0.78515625, "learning_rate": 0.0001603665165383443, "loss": 0.9269, "step": 19307 }, { "epoch": 0.49577529885848814, "grad_norm": 0.87890625, "learning_rate": 0.0001603629574471375, "loss": 0.9031, "step": 19308 }, { "epoch": 0.49580097605441, "grad_norm": 0.79296875, "learning_rate": 0.00016035939823563276, "loss": 0.9239, "step": 19309 }, { "epoch": 0.4958266532503318, "grad_norm": 0.78515625, "learning_rate": 0.00016035583890383718, "loss": 0.9681, "step": 19310 }, { "epoch": 0.4958523304462536, "grad_norm": 0.7734375, "learning_rate": 0.0001603522794517579, "loss": 0.7657, "step": 19311 }, { "epoch": 0.49587800764217543, "grad_norm": 0.85546875, "learning_rate": 0.00016034871987940196, "loss": 0.9157, "step": 19312 }, { "epoch": 0.49590368483809727, "grad_norm": 0.73046875, "learning_rate": 0.00016034516018677647, "loss": 0.8818, "step": 19313 }, { "epoch": 0.49592936203401905, "grad_norm": 0.7578125, "learning_rate": 0.00016034160037388853, "loss": 0.8225, "step": 19314 }, { "epoch": 0.4959550392299409, "grad_norm": 0.75, "learning_rate": 0.00016033804044074523, "loss": 0.849, "step": 19315 }, { "epoch": 0.4959807164258627, "grad_norm": 0.78515625, "learning_rate": 0.00016033448038735366, "loss": 0.9037, "step": 19316 }, { "epoch": 0.49600639362178456, "grad_norm": 0.7578125, "learning_rate": 0.00016033092021372093, "loss": 0.8007, "step": 19317 }, { "epoch": 0.49603207081770634, "grad_norm": 0.734375, "learning_rate": 0.00016032735991985413, "loss": 0.7612, "step": 19318 }, { "epoch": 0.4960577480136282, "grad_norm": 0.82421875, "learning_rate": 0.00016032379950576035, "loss": 0.9752, "step": 19319 }, { "epoch": 0.49608342520955, "grad_norm": 0.76953125, "learning_rate": 0.00016032023897144667, "loss": 0.9643, "step": 19320 }, { "epoch": 0.4961091024054718, "grad_norm": 0.73828125, "learning_rate": 0.00016031667831692024, "loss": 0.787, "step": 19321 }, { "epoch": 0.4961347796013936, "grad_norm": 0.69921875, "learning_rate": 0.00016031311754218807, "loss": 0.742, "step": 19322 }, { "epoch": 0.49616045679731546, "grad_norm": 0.76953125, "learning_rate": 0.0001603095566472573, "loss": 0.8527, "step": 19323 }, { "epoch": 0.49618613399323724, "grad_norm": 0.75, "learning_rate": 0.0001603059956321351, "loss": 0.8082, "step": 19324 }, { "epoch": 0.4962118111891591, "grad_norm": 0.75390625, "learning_rate": 0.00016030243449682846, "loss": 0.8936, "step": 19325 }, { "epoch": 0.4962374883850809, "grad_norm": 0.73828125, "learning_rate": 0.00016029887324134448, "loss": 0.8494, "step": 19326 }, { "epoch": 0.49626316558100275, "grad_norm": 0.77734375, "learning_rate": 0.00016029531186569033, "loss": 0.8724, "step": 19327 }, { "epoch": 0.49628884277692453, "grad_norm": 0.765625, "learning_rate": 0.0001602917503698731, "loss": 0.9662, "step": 19328 }, { "epoch": 0.49631451997284637, "grad_norm": 0.8125, "learning_rate": 0.0001602881887538998, "loss": 0.9319, "step": 19329 }, { "epoch": 0.4963401971687682, "grad_norm": 0.6953125, "learning_rate": 0.0001602846270177776, "loss": 0.7889, "step": 19330 }, { "epoch": 0.49636587436469, "grad_norm": 0.7734375, "learning_rate": 0.0001602810651615136, "loss": 0.8505, "step": 19331 }, { "epoch": 0.4963915515606118, "grad_norm": 0.734375, "learning_rate": 0.00016027750318511487, "loss": 0.8615, "step": 19332 }, { "epoch": 0.49641722875653366, "grad_norm": 0.7578125, "learning_rate": 0.00016027394108858854, "loss": 0.8445, "step": 19333 }, { "epoch": 0.49644290595245544, "grad_norm": 0.82421875, "learning_rate": 0.00016027037887194164, "loss": 0.8867, "step": 19334 }, { "epoch": 0.4964685831483773, "grad_norm": 0.78125, "learning_rate": 0.00016026681653518137, "loss": 0.7465, "step": 19335 }, { "epoch": 0.4964942603442991, "grad_norm": 0.8671875, "learning_rate": 0.00016026325407831476, "loss": 0.941, "step": 19336 }, { "epoch": 0.49651993754022095, "grad_norm": 0.82421875, "learning_rate": 0.00016025969150134894, "loss": 0.844, "step": 19337 }, { "epoch": 0.4965456147361427, "grad_norm": 0.7109375, "learning_rate": 0.00016025612880429097, "loss": 0.7837, "step": 19338 }, { "epoch": 0.49657129193206456, "grad_norm": 0.81640625, "learning_rate": 0.00016025256598714801, "loss": 0.9836, "step": 19339 }, { "epoch": 0.4965969691279864, "grad_norm": 0.7890625, "learning_rate": 0.00016024900304992712, "loss": 0.7911, "step": 19340 }, { "epoch": 0.4966226463239082, "grad_norm": 0.7578125, "learning_rate": 0.0001602454399926354, "loss": 0.9034, "step": 19341 }, { "epoch": 0.49664832351983, "grad_norm": 0.84765625, "learning_rate": 0.00016024187681527995, "loss": 1.0112, "step": 19342 }, { "epoch": 0.49667400071575185, "grad_norm": 0.8046875, "learning_rate": 0.0001602383135178679, "loss": 0.8532, "step": 19343 }, { "epoch": 0.49669967791167363, "grad_norm": 0.84375, "learning_rate": 0.00016023475010040633, "loss": 0.8855, "step": 19344 }, { "epoch": 0.49672535510759547, "grad_norm": 0.76171875, "learning_rate": 0.00016023118656290235, "loss": 0.9065, "step": 19345 }, { "epoch": 0.4967510323035173, "grad_norm": 0.8203125, "learning_rate": 0.00016022762290536304, "loss": 1.0071, "step": 19346 }, { "epoch": 0.49677670949943914, "grad_norm": 0.76171875, "learning_rate": 0.00016022405912779553, "loss": 0.919, "step": 19347 }, { "epoch": 0.4968023866953609, "grad_norm": 0.765625, "learning_rate": 0.0001602204952302069, "loss": 0.9208, "step": 19348 }, { "epoch": 0.49682806389128276, "grad_norm": 0.78515625, "learning_rate": 0.00016021693121260429, "loss": 0.9327, "step": 19349 }, { "epoch": 0.4968537410872046, "grad_norm": 0.7734375, "learning_rate": 0.00016021336707499473, "loss": 0.9031, "step": 19350 }, { "epoch": 0.4968794182831264, "grad_norm": 0.77734375, "learning_rate": 0.0001602098028173854, "loss": 0.9853, "step": 19351 }, { "epoch": 0.4969050954790482, "grad_norm": 0.7890625, "learning_rate": 0.00016020623843978338, "loss": 0.9811, "step": 19352 }, { "epoch": 0.49693077267497004, "grad_norm": 0.8046875, "learning_rate": 0.00016020267394219575, "loss": 1.0786, "step": 19353 }, { "epoch": 0.4969564498708918, "grad_norm": 0.76953125, "learning_rate": 0.0001601991093246296, "loss": 0.8732, "step": 19354 }, { "epoch": 0.49698212706681366, "grad_norm": 0.84375, "learning_rate": 0.0001601955445870921, "loss": 0.9101, "step": 19355 }, { "epoch": 0.4970078042627355, "grad_norm": 0.76953125, "learning_rate": 0.0001601919797295903, "loss": 0.9337, "step": 19356 }, { "epoch": 0.49703348145865733, "grad_norm": 0.7734375, "learning_rate": 0.00016018841475213137, "loss": 0.9579, "step": 19357 }, { "epoch": 0.4970591586545791, "grad_norm": 0.703125, "learning_rate": 0.0001601848496547223, "loss": 0.8625, "step": 19358 }, { "epoch": 0.49708483585050095, "grad_norm": 0.76953125, "learning_rate": 0.0001601812844373703, "loss": 0.7726, "step": 19359 }, { "epoch": 0.4971105130464228, "grad_norm": 0.77734375, "learning_rate": 0.00016017771910008239, "loss": 0.8656, "step": 19360 }, { "epoch": 0.49713619024234457, "grad_norm": 0.77734375, "learning_rate": 0.00016017415364286575, "loss": 0.8108, "step": 19361 }, { "epoch": 0.4971618674382664, "grad_norm": 0.8203125, "learning_rate": 0.00016017058806572747, "loss": 0.8559, "step": 19362 }, { "epoch": 0.49718754463418824, "grad_norm": 0.78125, "learning_rate": 0.00016016702236867463, "loss": 0.8655, "step": 19363 }, { "epoch": 0.49721322183011, "grad_norm": 0.76953125, "learning_rate": 0.00016016345655171435, "loss": 0.83, "step": 19364 }, { "epoch": 0.49723889902603186, "grad_norm": 0.86328125, "learning_rate": 0.00016015989061485378, "loss": 0.8988, "step": 19365 }, { "epoch": 0.4972645762219537, "grad_norm": 0.74609375, "learning_rate": 0.00016015632455809994, "loss": 0.9532, "step": 19366 }, { "epoch": 0.4972902534178755, "grad_norm": 0.83984375, "learning_rate": 0.00016015275838145995, "loss": 0.8672, "step": 19367 }, { "epoch": 0.4973159306137973, "grad_norm": 0.84765625, "learning_rate": 0.000160149192084941, "loss": 0.8204, "step": 19368 }, { "epoch": 0.49734160780971914, "grad_norm": 0.73046875, "learning_rate": 0.0001601456256685501, "loss": 0.8181, "step": 19369 }, { "epoch": 0.497367285005641, "grad_norm": 0.73046875, "learning_rate": 0.00016014205913229446, "loss": 0.8032, "step": 19370 }, { "epoch": 0.49739296220156276, "grad_norm": 0.78515625, "learning_rate": 0.00016013849247618106, "loss": 0.7918, "step": 19371 }, { "epoch": 0.4974186393974846, "grad_norm": 0.796875, "learning_rate": 0.00016013492570021712, "loss": 0.8645, "step": 19372 }, { "epoch": 0.49744431659340643, "grad_norm": 0.80078125, "learning_rate": 0.00016013135880440967, "loss": 0.8977, "step": 19373 }, { "epoch": 0.4974699937893282, "grad_norm": 0.69140625, "learning_rate": 0.00016012779178876588, "loss": 0.7334, "step": 19374 }, { "epoch": 0.49749567098525005, "grad_norm": 0.83984375, "learning_rate": 0.00016012422465329284, "loss": 0.9179, "step": 19375 }, { "epoch": 0.4975213481811719, "grad_norm": 0.76953125, "learning_rate": 0.00016012065739799764, "loss": 1.0869, "step": 19376 }, { "epoch": 0.4975470253770937, "grad_norm": 0.75390625, "learning_rate": 0.0001601170900228874, "loss": 1.0858, "step": 19377 }, { "epoch": 0.4975727025730155, "grad_norm": 0.7578125, "learning_rate": 0.00016011352252796922, "loss": 0.9303, "step": 19378 }, { "epoch": 0.49759837976893734, "grad_norm": 0.71484375, "learning_rate": 0.00016010995491325022, "loss": 0.8859, "step": 19379 }, { "epoch": 0.4976240569648592, "grad_norm": 0.7421875, "learning_rate": 0.00016010638717873753, "loss": 0.8667, "step": 19380 }, { "epoch": 0.49764973416078095, "grad_norm": 0.6953125, "learning_rate": 0.00016010281932443823, "loss": 0.895, "step": 19381 }, { "epoch": 0.4976754113567028, "grad_norm": 0.7265625, "learning_rate": 0.00016009925135035944, "loss": 0.7871, "step": 19382 }, { "epoch": 0.4977010885526246, "grad_norm": 0.73828125, "learning_rate": 0.00016009568325650827, "loss": 0.9218, "step": 19383 }, { "epoch": 0.4977267657485464, "grad_norm": 0.859375, "learning_rate": 0.00016009211504289182, "loss": 0.9464, "step": 19384 }, { "epoch": 0.49775244294446824, "grad_norm": 0.76953125, "learning_rate": 0.00016008854670951725, "loss": 0.879, "step": 19385 }, { "epoch": 0.4977781201403901, "grad_norm": 0.765625, "learning_rate": 0.00016008497825639162, "loss": 0.9381, "step": 19386 }, { "epoch": 0.4978037973363119, "grad_norm": 0.7578125, "learning_rate": 0.000160081409683522, "loss": 0.9331, "step": 19387 }, { "epoch": 0.4978294745322337, "grad_norm": 0.72265625, "learning_rate": 0.0001600778409909156, "loss": 0.8196, "step": 19388 }, { "epoch": 0.49785515172815553, "grad_norm": 0.88671875, "learning_rate": 0.00016007427217857952, "loss": 0.6991, "step": 19389 }, { "epoch": 0.49788082892407737, "grad_norm": 0.73046875, "learning_rate": 0.00016007070324652084, "loss": 0.8784, "step": 19390 }, { "epoch": 0.49790650611999915, "grad_norm": 0.80078125, "learning_rate": 0.0001600671341947466, "loss": 0.8146, "step": 19391 }, { "epoch": 0.497932183315921, "grad_norm": 0.73828125, "learning_rate": 0.00016006356502326405, "loss": 0.8817, "step": 19392 }, { "epoch": 0.4979578605118428, "grad_norm": 0.6875, "learning_rate": 0.00016005999573208022, "loss": 0.7921, "step": 19393 }, { "epoch": 0.4979835377077646, "grad_norm": 0.8046875, "learning_rate": 0.00016005642632120224, "loss": 1.0463, "step": 19394 }, { "epoch": 0.49800921490368644, "grad_norm": 0.80859375, "learning_rate": 0.00016005285679063724, "loss": 1.0286, "step": 19395 }, { "epoch": 0.4980348920996083, "grad_norm": 0.8359375, "learning_rate": 0.00016004928714039233, "loss": 0.9096, "step": 19396 }, { "epoch": 0.4980605692955301, "grad_norm": 0.796875, "learning_rate": 0.00016004571737047462, "loss": 0.9538, "step": 19397 }, { "epoch": 0.4980862464914519, "grad_norm": 0.8203125, "learning_rate": 0.0001600421474808912, "loss": 0.7543, "step": 19398 }, { "epoch": 0.4981119236873737, "grad_norm": 0.76953125, "learning_rate": 0.0001600385774716492, "loss": 0.7958, "step": 19399 }, { "epoch": 0.49813760088329556, "grad_norm": 0.703125, "learning_rate": 0.00016003500734275574, "loss": 0.8412, "step": 19400 }, { "epoch": 0.49816327807921734, "grad_norm": 0.78125, "learning_rate": 0.00016003143709421793, "loss": 1.0333, "step": 19401 }, { "epoch": 0.4981889552751392, "grad_norm": 0.71484375, "learning_rate": 0.0001600278667260429, "loss": 0.8105, "step": 19402 }, { "epoch": 0.498214632471061, "grad_norm": 0.79296875, "learning_rate": 0.00016002429623823775, "loss": 0.932, "step": 19403 }, { "epoch": 0.4982403096669828, "grad_norm": 0.74609375, "learning_rate": 0.00016002072563080962, "loss": 1.0813, "step": 19404 }, { "epoch": 0.49826598686290463, "grad_norm": 0.7578125, "learning_rate": 0.00016001715490376558, "loss": 0.8606, "step": 19405 }, { "epoch": 0.49829166405882647, "grad_norm": 0.80078125, "learning_rate": 0.0001600135840571128, "loss": 0.9139, "step": 19406 }, { "epoch": 0.4983173412547483, "grad_norm": 0.77734375, "learning_rate": 0.0001600100130908583, "loss": 0.6974, "step": 19407 }, { "epoch": 0.4983430184506701, "grad_norm": 0.92578125, "learning_rate": 0.0001600064420050093, "loss": 0.9093, "step": 19408 }, { "epoch": 0.4983686956465919, "grad_norm": 0.7265625, "learning_rate": 0.0001600028707995729, "loss": 0.9119, "step": 19409 }, { "epoch": 0.49839437284251376, "grad_norm": 0.76171875, "learning_rate": 0.0001599992994745562, "loss": 0.9255, "step": 19410 }, { "epoch": 0.49842005003843554, "grad_norm": 0.75390625, "learning_rate": 0.0001599957280299663, "loss": 0.966, "step": 19411 }, { "epoch": 0.49844572723435737, "grad_norm": 0.73828125, "learning_rate": 0.00015999215646581033, "loss": 0.9035, "step": 19412 }, { "epoch": 0.4984714044302792, "grad_norm": 0.82421875, "learning_rate": 0.00015998858478209543, "loss": 0.8939, "step": 19413 }, { "epoch": 0.498497081626201, "grad_norm": 0.8203125, "learning_rate": 0.0001599850129788287, "loss": 0.8076, "step": 19414 }, { "epoch": 0.4985227588221228, "grad_norm": 0.83203125, "learning_rate": 0.00015998144105601724, "loss": 0.9197, "step": 19415 }, { "epoch": 0.49854843601804466, "grad_norm": 0.73828125, "learning_rate": 0.0001599778690136682, "loss": 0.7707, "step": 19416 }, { "epoch": 0.49857411321396644, "grad_norm": 0.78515625, "learning_rate": 0.00015997429685178868, "loss": 0.931, "step": 19417 }, { "epoch": 0.4985997904098883, "grad_norm": 0.73828125, "learning_rate": 0.0001599707245703858, "loss": 0.9067, "step": 19418 }, { "epoch": 0.4986254676058101, "grad_norm": 0.70703125, "learning_rate": 0.00015996715216946668, "loss": 0.909, "step": 19419 }, { "epoch": 0.49865114480173195, "grad_norm": 0.78515625, "learning_rate": 0.00015996357964903843, "loss": 0.8827, "step": 19420 }, { "epoch": 0.49867682199765373, "grad_norm": 0.8515625, "learning_rate": 0.0001599600070091082, "loss": 0.8674, "step": 19421 }, { "epoch": 0.49870249919357557, "grad_norm": 0.734375, "learning_rate": 0.00015995643424968309, "loss": 0.9161, "step": 19422 }, { "epoch": 0.4987281763894974, "grad_norm": 0.76953125, "learning_rate": 0.00015995286137077025, "loss": 1.0166, "step": 19423 }, { "epoch": 0.4987538535854192, "grad_norm": 0.74609375, "learning_rate": 0.00015994928837237673, "loss": 0.9422, "step": 19424 }, { "epoch": 0.498779530781341, "grad_norm": 0.80859375, "learning_rate": 0.00015994571525450971, "loss": 0.8038, "step": 19425 }, { "epoch": 0.49880520797726285, "grad_norm": 0.703125, "learning_rate": 0.0001599421420171763, "loss": 0.7588, "step": 19426 }, { "epoch": 0.49883088517318463, "grad_norm": 0.796875, "learning_rate": 0.00015993856866038358, "loss": 0.8572, "step": 19427 }, { "epoch": 0.49885656236910647, "grad_norm": 0.78125, "learning_rate": 0.00015993499518413877, "loss": 0.9722, "step": 19428 }, { "epoch": 0.4988822395650283, "grad_norm": 0.87890625, "learning_rate": 0.0001599314215884489, "loss": 0.8662, "step": 19429 }, { "epoch": 0.49890791676095014, "grad_norm": 0.81640625, "learning_rate": 0.00015992784787332112, "loss": 0.8356, "step": 19430 }, { "epoch": 0.4989335939568719, "grad_norm": 0.8125, "learning_rate": 0.00015992427403876253, "loss": 0.9864, "step": 19431 }, { "epoch": 0.49895927115279376, "grad_norm": 0.7421875, "learning_rate": 0.0001599207000847803, "loss": 0.843, "step": 19432 }, { "epoch": 0.4989849483487156, "grad_norm": 0.91796875, "learning_rate": 0.0001599171260113815, "loss": 0.8135, "step": 19433 }, { "epoch": 0.4990106255446374, "grad_norm": 0.7890625, "learning_rate": 0.0001599135518185733, "loss": 0.9597, "step": 19434 }, { "epoch": 0.4990363027405592, "grad_norm": 0.7421875, "learning_rate": 0.00015990997750636282, "loss": 0.9187, "step": 19435 }, { "epoch": 0.49906197993648105, "grad_norm": 0.74609375, "learning_rate": 0.00015990640307475716, "loss": 0.9543, "step": 19436 }, { "epoch": 0.49908765713240283, "grad_norm": 0.8125, "learning_rate": 0.00015990282852376342, "loss": 1.0355, "step": 19437 }, { "epoch": 0.49911333432832466, "grad_norm": 0.76953125, "learning_rate": 0.00015989925385338877, "loss": 0.9457, "step": 19438 }, { "epoch": 0.4991390115242465, "grad_norm": 0.8046875, "learning_rate": 0.00015989567906364035, "loss": 1.0172, "step": 19439 }, { "epoch": 0.49916468872016834, "grad_norm": 0.79296875, "learning_rate": 0.0001598921041545252, "loss": 0.915, "step": 19440 }, { "epoch": 0.4991903659160901, "grad_norm": 0.78125, "learning_rate": 0.00015988852912605052, "loss": 0.8978, "step": 19441 }, { "epoch": 0.49921604311201195, "grad_norm": 0.7421875, "learning_rate": 0.0001598849539782234, "loss": 0.9019, "step": 19442 }, { "epoch": 0.4992417203079338, "grad_norm": 0.94140625, "learning_rate": 0.000159881378711051, "loss": 0.8214, "step": 19443 }, { "epoch": 0.49926739750385557, "grad_norm": 0.796875, "learning_rate": 0.0001598778033245404, "loss": 0.8443, "step": 19444 }, { "epoch": 0.4992930746997774, "grad_norm": 0.7734375, "learning_rate": 0.00015987422781869876, "loss": 0.933, "step": 19445 }, { "epoch": 0.49931875189569924, "grad_norm": 0.8046875, "learning_rate": 0.0001598706521935332, "loss": 1.0368, "step": 19446 }, { "epoch": 0.499344429091621, "grad_norm": 0.73828125, "learning_rate": 0.00015986707644905082, "loss": 0.8872, "step": 19447 }, { "epoch": 0.49937010628754286, "grad_norm": 0.74609375, "learning_rate": 0.00015986350058525876, "loss": 0.886, "step": 19448 }, { "epoch": 0.4993957834834647, "grad_norm": 0.75390625, "learning_rate": 0.0001598599246021642, "loss": 0.8952, "step": 19449 }, { "epoch": 0.49942146067938653, "grad_norm": 0.7265625, "learning_rate": 0.00015985634849977415, "loss": 0.9343, "step": 19450 }, { "epoch": 0.4994471378753083, "grad_norm": 0.76953125, "learning_rate": 0.00015985277227809584, "loss": 0.8478, "step": 19451 }, { "epoch": 0.49947281507123015, "grad_norm": 0.83203125, "learning_rate": 0.00015984919593713636, "loss": 1.0489, "step": 19452 }, { "epoch": 0.499498492267152, "grad_norm": 0.7734375, "learning_rate": 0.0001598456194769028, "loss": 0.789, "step": 19453 }, { "epoch": 0.49952416946307376, "grad_norm": 0.7421875, "learning_rate": 0.0001598420428974024, "loss": 0.9232, "step": 19454 }, { "epoch": 0.4995498466589956, "grad_norm": 0.77734375, "learning_rate": 0.00015983846619864215, "loss": 0.9259, "step": 19455 }, { "epoch": 0.49957552385491744, "grad_norm": 0.6953125, "learning_rate": 0.00015983488938062928, "loss": 0.7369, "step": 19456 }, { "epoch": 0.4996012010508392, "grad_norm": 0.75, "learning_rate": 0.00015983131244337085, "loss": 0.7409, "step": 19457 }, { "epoch": 0.49962687824676105, "grad_norm": 0.84375, "learning_rate": 0.00015982773538687404, "loss": 1.0464, "step": 19458 }, { "epoch": 0.4996525554426829, "grad_norm": 0.828125, "learning_rate": 0.00015982415821114593, "loss": 0.9413, "step": 19459 }, { "epoch": 0.4996782326386047, "grad_norm": 0.7421875, "learning_rate": 0.00015982058091619373, "loss": 0.8818, "step": 19460 }, { "epoch": 0.4997039098345265, "grad_norm": 0.8046875, "learning_rate": 0.00015981700350202447, "loss": 0.8659, "step": 19461 }, { "epoch": 0.49972958703044834, "grad_norm": 0.75390625, "learning_rate": 0.00015981342596864534, "loss": 0.8172, "step": 19462 }, { "epoch": 0.4997552642263702, "grad_norm": 0.7421875, "learning_rate": 0.00015980984831606346, "loss": 1.0051, "step": 19463 }, { "epoch": 0.49978094142229196, "grad_norm": 0.7890625, "learning_rate": 0.00015980627054428594, "loss": 0.9336, "step": 19464 }, { "epoch": 0.4998066186182138, "grad_norm": 0.76171875, "learning_rate": 0.00015980269265331994, "loss": 0.8437, "step": 19465 }, { "epoch": 0.49983229581413563, "grad_norm": 0.80078125, "learning_rate": 0.00015979911464317255, "loss": 0.9827, "step": 19466 }, { "epoch": 0.4998579730100574, "grad_norm": 0.76953125, "learning_rate": 0.00015979553651385096, "loss": 0.8547, "step": 19467 }, { "epoch": 0.49988365020597925, "grad_norm": 0.796875, "learning_rate": 0.00015979195826536225, "loss": 0.9387, "step": 19468 }, { "epoch": 0.4999093274019011, "grad_norm": 0.83984375, "learning_rate": 0.00015978837989771358, "loss": 0.9488, "step": 19469 }, { "epoch": 0.4999350045978229, "grad_norm": 0.81640625, "learning_rate": 0.00015978480141091205, "loss": 1.0032, "step": 19470 }, { "epoch": 0.4999606817937447, "grad_norm": 0.71875, "learning_rate": 0.00015978122280496483, "loss": 0.9597, "step": 19471 }, { "epoch": 0.49998635898966654, "grad_norm": 0.75, "learning_rate": 0.000159777644079879, "loss": 0.8135, "step": 19472 }, { "epoch": 0.5000120361855883, "grad_norm": 0.81640625, "learning_rate": 0.00015977406523566177, "loss": 0.9455, "step": 19473 }, { "epoch": 0.5000377133815102, "grad_norm": 0.734375, "learning_rate": 0.0001597704862723202, "loss": 0.9278, "step": 19474 }, { "epoch": 0.500063390577432, "grad_norm": 0.80078125, "learning_rate": 0.00015976690718986144, "loss": 0.9569, "step": 19475 }, { "epoch": 0.5000890677733538, "grad_norm": 0.77734375, "learning_rate": 0.0001597633279882927, "loss": 1.0054, "step": 19476 }, { "epoch": 0.5001147449692757, "grad_norm": 0.70703125, "learning_rate": 0.00015975974866762097, "loss": 0.9169, "step": 19477 }, { "epoch": 0.5001404221651975, "grad_norm": 0.765625, "learning_rate": 0.00015975616922785348, "loss": 0.9156, "step": 19478 }, { "epoch": 0.5001660993611192, "grad_norm": 0.734375, "learning_rate": 0.00015975258966899734, "loss": 0.9407, "step": 19479 }, { "epoch": 0.5001917765570411, "grad_norm": 0.77734375, "learning_rate": 0.00015974900999105967, "loss": 0.8125, "step": 19480 }, { "epoch": 0.5002174537529629, "grad_norm": 0.73046875, "learning_rate": 0.00015974543019404766, "loss": 0.9276, "step": 19481 }, { "epoch": 0.5002431309488847, "grad_norm": 0.72265625, "learning_rate": 0.0001597418502779684, "loss": 0.8284, "step": 19482 }, { "epoch": 0.5002688081448066, "grad_norm": 0.703125, "learning_rate": 0.00015973827024282904, "loss": 0.8018, "step": 19483 }, { "epoch": 0.5002944853407284, "grad_norm": 0.80078125, "learning_rate": 0.00015973469008863665, "loss": 0.9562, "step": 19484 }, { "epoch": 0.5003201625366502, "grad_norm": 0.8359375, "learning_rate": 0.00015973110981539845, "loss": 0.9177, "step": 19485 }, { "epoch": 0.500345839732572, "grad_norm": 0.75390625, "learning_rate": 0.00015972752942312155, "loss": 0.6844, "step": 19486 }, { "epoch": 0.5003715169284938, "grad_norm": 0.83984375, "learning_rate": 0.00015972394891181305, "loss": 0.9663, "step": 19487 }, { "epoch": 0.5003971941244156, "grad_norm": 0.72265625, "learning_rate": 0.00015972036828148015, "loss": 0.7562, "step": 19488 }, { "epoch": 0.5004228713203375, "grad_norm": 0.7109375, "learning_rate": 0.00015971678753212994, "loss": 0.8397, "step": 19489 }, { "epoch": 0.5004485485162593, "grad_norm": 0.765625, "learning_rate": 0.00015971320666376956, "loss": 1.0024, "step": 19490 }, { "epoch": 0.5004742257121811, "grad_norm": 0.77734375, "learning_rate": 0.00015970962567640618, "loss": 0.8435, "step": 19491 }, { "epoch": 0.5004999029081029, "grad_norm": 0.84375, "learning_rate": 0.00015970604457004686, "loss": 0.8043, "step": 19492 }, { "epoch": 0.5005255801040247, "grad_norm": 0.74609375, "learning_rate": 0.0001597024633446988, "loss": 0.8433, "step": 19493 }, { "epoch": 0.5005512572999465, "grad_norm": 0.74609375, "learning_rate": 0.00015969888200036914, "loss": 0.9282, "step": 19494 }, { "epoch": 0.5005769344958684, "grad_norm": 0.79296875, "learning_rate": 0.00015969530053706502, "loss": 0.7962, "step": 19495 }, { "epoch": 0.5006026116917902, "grad_norm": 0.73828125, "learning_rate": 0.00015969171895479354, "loss": 0.9211, "step": 19496 }, { "epoch": 0.500628288887712, "grad_norm": 0.81640625, "learning_rate": 0.00015968813725356182, "loss": 1.0181, "step": 19497 }, { "epoch": 0.5006539660836339, "grad_norm": 0.82421875, "learning_rate": 0.00015968455543337708, "loss": 1.1063, "step": 19498 }, { "epoch": 0.5006796432795556, "grad_norm": 0.83203125, "learning_rate": 0.0001596809734942464, "loss": 0.7969, "step": 19499 }, { "epoch": 0.5007053204754774, "grad_norm": 1.1875, "learning_rate": 0.00015967739143617692, "loss": 0.9195, "step": 19500 }, { "epoch": 0.5007309976713993, "grad_norm": 0.6796875, "learning_rate": 0.0001596738092591758, "loss": 0.7744, "step": 19501 }, { "epoch": 0.5007566748673211, "grad_norm": 0.74609375, "learning_rate": 0.0001596702269632502, "loss": 0.874, "step": 19502 }, { "epoch": 0.500782352063243, "grad_norm": 0.7578125, "learning_rate": 0.00015966664454840717, "loss": 0.9949, "step": 19503 }, { "epoch": 0.5008080292591648, "grad_norm": 0.7421875, "learning_rate": 0.00015966306201465393, "loss": 1.0794, "step": 19504 }, { "epoch": 0.5008337064550866, "grad_norm": 0.765625, "learning_rate": 0.0001596594793619976, "loss": 0.8305, "step": 19505 }, { "epoch": 0.5008593836510083, "grad_norm": 0.83203125, "learning_rate": 0.00015965589659044528, "loss": 0.9081, "step": 19506 }, { "epoch": 0.5008850608469302, "grad_norm": 0.72265625, "learning_rate": 0.0001596523137000042, "loss": 0.8743, "step": 19507 }, { "epoch": 0.500910738042852, "grad_norm": 0.79296875, "learning_rate": 0.00015964873069068144, "loss": 1.0105, "step": 19508 }, { "epoch": 0.5009364152387739, "grad_norm": 0.83984375, "learning_rate": 0.00015964514756248413, "loss": 0.9503, "step": 19509 }, { "epoch": 0.5009620924346957, "grad_norm": 0.73046875, "learning_rate": 0.00015964156431541942, "loss": 0.9449, "step": 19510 }, { "epoch": 0.5009877696306175, "grad_norm": 0.828125, "learning_rate": 0.00015963798094949447, "loss": 0.9434, "step": 19511 }, { "epoch": 0.5010134468265393, "grad_norm": 0.734375, "learning_rate": 0.0001596343974647164, "loss": 0.8333, "step": 19512 }, { "epoch": 0.5010391240224611, "grad_norm": 0.8125, "learning_rate": 0.0001596308138610924, "loss": 1.0177, "step": 19513 }, { "epoch": 0.5010648012183829, "grad_norm": 0.77734375, "learning_rate": 0.00015962723013862956, "loss": 0.9974, "step": 19514 }, { "epoch": 0.5010904784143048, "grad_norm": 0.72265625, "learning_rate": 0.00015962364629733503, "loss": 0.9734, "step": 19515 }, { "epoch": 0.5011161556102266, "grad_norm": 0.8125, "learning_rate": 0.00015962006233721595, "loss": 1.0949, "step": 19516 }, { "epoch": 0.5011418328061484, "grad_norm": 0.73046875, "learning_rate": 0.00015961647825827948, "loss": 0.7584, "step": 19517 }, { "epoch": 0.5011675100020703, "grad_norm": 0.74609375, "learning_rate": 0.00015961289406053278, "loss": 0.8218, "step": 19518 }, { "epoch": 0.501193187197992, "grad_norm": 0.7734375, "learning_rate": 0.00015960930974398289, "loss": 0.8305, "step": 19519 }, { "epoch": 0.5012188643939138, "grad_norm": 0.73046875, "learning_rate": 0.0001596057253086371, "loss": 0.7455, "step": 19520 }, { "epoch": 0.5012445415898357, "grad_norm": 0.79296875, "learning_rate": 0.00015960214075450246, "loss": 0.8736, "step": 19521 }, { "epoch": 0.5012702187857575, "grad_norm": 0.80859375, "learning_rate": 0.00015959855608158616, "loss": 1.0155, "step": 19522 }, { "epoch": 0.5012958959816793, "grad_norm": 0.8828125, "learning_rate": 0.0001595949712898953, "loss": 0.929, "step": 19523 }, { "epoch": 0.5013215731776012, "grad_norm": 0.8046875, "learning_rate": 0.00015959138637943701, "loss": 0.9293, "step": 19524 }, { "epoch": 0.501347250373523, "grad_norm": 0.8671875, "learning_rate": 0.0001595878013502185, "loss": 1.0091, "step": 19525 }, { "epoch": 0.5013729275694447, "grad_norm": 0.83984375, "learning_rate": 0.0001595842162022469, "loss": 0.9447, "step": 19526 }, { "epoch": 0.5013986047653666, "grad_norm": 0.8046875, "learning_rate": 0.00015958063093552934, "loss": 1.031, "step": 19527 }, { "epoch": 0.5014242819612884, "grad_norm": 0.74609375, "learning_rate": 0.00015957704555007296, "loss": 0.9306, "step": 19528 }, { "epoch": 0.5014499591572102, "grad_norm": 0.9609375, "learning_rate": 0.00015957346004588492, "loss": 0.8514, "step": 19529 }, { "epoch": 0.5014756363531321, "grad_norm": 0.80859375, "learning_rate": 0.00015956987442297232, "loss": 1.0571, "step": 19530 }, { "epoch": 0.5015013135490539, "grad_norm": 0.7421875, "learning_rate": 0.00015956628868134235, "loss": 0.9796, "step": 19531 }, { "epoch": 0.5015269907449756, "grad_norm": 0.82421875, "learning_rate": 0.0001595627028210022, "loss": 0.8596, "step": 19532 }, { "epoch": 0.5015526679408975, "grad_norm": 0.78515625, "learning_rate": 0.0001595591168419589, "loss": 0.8472, "step": 19533 }, { "epoch": 0.5015783451368193, "grad_norm": 0.87109375, "learning_rate": 0.00015955553074421968, "loss": 0.962, "step": 19534 }, { "epoch": 0.5016040223327412, "grad_norm": 0.84765625, "learning_rate": 0.00015955194452779166, "loss": 0.873, "step": 19535 }, { "epoch": 0.501629699528663, "grad_norm": 0.84765625, "learning_rate": 0.00015954835819268203, "loss": 0.9538, "step": 19536 }, { "epoch": 0.5016553767245848, "grad_norm": 0.8359375, "learning_rate": 0.00015954477173889785, "loss": 0.9601, "step": 19537 }, { "epoch": 0.5016810539205067, "grad_norm": 0.84765625, "learning_rate": 0.00015954118516644633, "loss": 0.9668, "step": 19538 }, { "epoch": 0.5017067311164284, "grad_norm": 0.8359375, "learning_rate": 0.00015953759847533463, "loss": 0.9147, "step": 19539 }, { "epoch": 0.5017324083123502, "grad_norm": 0.71875, "learning_rate": 0.00015953401166556987, "loss": 0.9435, "step": 19540 }, { "epoch": 0.5017580855082721, "grad_norm": 0.7578125, "learning_rate": 0.0001595304247371592, "loss": 0.7622, "step": 19541 }, { "epoch": 0.5017837627041939, "grad_norm": 0.72265625, "learning_rate": 0.00015952683769010977, "loss": 0.8187, "step": 19542 }, { "epoch": 0.5018094399001157, "grad_norm": 0.75, "learning_rate": 0.00015952325052442871, "loss": 0.8993, "step": 19543 }, { "epoch": 0.5018351170960376, "grad_norm": 0.71484375, "learning_rate": 0.00015951966324012322, "loss": 1.003, "step": 19544 }, { "epoch": 0.5018607942919594, "grad_norm": 0.76953125, "learning_rate": 0.00015951607583720038, "loss": 0.8611, "step": 19545 }, { "epoch": 0.5018864714878811, "grad_norm": 0.72265625, "learning_rate": 0.00015951248831566744, "loss": 0.9499, "step": 19546 }, { "epoch": 0.501912148683803, "grad_norm": 0.80078125, "learning_rate": 0.0001595089006755314, "loss": 0.9091, "step": 19547 }, { "epoch": 0.5019378258797248, "grad_norm": 0.74609375, "learning_rate": 0.00015950531291679957, "loss": 0.8886, "step": 19548 }, { "epoch": 0.5019635030756466, "grad_norm": 0.8515625, "learning_rate": 0.00015950172503947897, "loss": 0.964, "step": 19549 }, { "epoch": 0.5019891802715685, "grad_norm": 0.765625, "learning_rate": 0.00015949813704357687, "loss": 0.9255, "step": 19550 }, { "epoch": 0.5020148574674903, "grad_norm": 0.79296875, "learning_rate": 0.0001594945489291003, "loss": 0.9391, "step": 19551 }, { "epoch": 0.502040534663412, "grad_norm": 0.77734375, "learning_rate": 0.00015949096069605649, "loss": 0.7167, "step": 19552 }, { "epoch": 0.5020662118593339, "grad_norm": 0.7734375, "learning_rate": 0.00015948737234445257, "loss": 1.1207, "step": 19553 }, { "epoch": 0.5020918890552557, "grad_norm": 0.82421875, "learning_rate": 0.0001594837838742957, "loss": 0.8505, "step": 19554 }, { "epoch": 0.5021175662511775, "grad_norm": 0.796875, "learning_rate": 0.00015948019528559302, "loss": 0.8237, "step": 19555 }, { "epoch": 0.5021432434470994, "grad_norm": 0.79296875, "learning_rate": 0.00015947660657835168, "loss": 0.9162, "step": 19556 }, { "epoch": 0.5021689206430212, "grad_norm": 0.75390625, "learning_rate": 0.00015947301775257882, "loss": 0.8829, "step": 19557 }, { "epoch": 0.502194597838943, "grad_norm": 0.76953125, "learning_rate": 0.0001594694288082816, "loss": 0.9616, "step": 19558 }, { "epoch": 0.5022202750348648, "grad_norm": 0.82421875, "learning_rate": 0.00015946583974546722, "loss": 0.7633, "step": 19559 }, { "epoch": 0.5022459522307866, "grad_norm": 0.75390625, "learning_rate": 0.00015946225056414276, "loss": 0.9309, "step": 19560 }, { "epoch": 0.5022716294267084, "grad_norm": 0.765625, "learning_rate": 0.0001594586612643154, "loss": 0.808, "step": 19561 }, { "epoch": 0.5022973066226303, "grad_norm": 0.81640625, "learning_rate": 0.00015945507184599234, "loss": 0.8985, "step": 19562 }, { "epoch": 0.5023229838185521, "grad_norm": 0.73046875, "learning_rate": 0.00015945148230918067, "loss": 1.0321, "step": 19563 }, { "epoch": 0.502348661014474, "grad_norm": 0.76171875, "learning_rate": 0.00015944789265388755, "loss": 0.8338, "step": 19564 }, { "epoch": 0.5023743382103958, "grad_norm": 0.70703125, "learning_rate": 0.00015944430288012017, "loss": 0.8442, "step": 19565 }, { "epoch": 0.5024000154063175, "grad_norm": 0.8125, "learning_rate": 0.00015944071298788564, "loss": 1.0108, "step": 19566 }, { "epoch": 0.5024256926022393, "grad_norm": 0.73046875, "learning_rate": 0.00015943712297719119, "loss": 0.8015, "step": 19567 }, { "epoch": 0.5024513697981612, "grad_norm": 0.73828125, "learning_rate": 0.0001594335328480439, "loss": 0.8995, "step": 19568 }, { "epoch": 0.502477046994083, "grad_norm": 0.80078125, "learning_rate": 0.00015942994260045092, "loss": 0.9499, "step": 19569 }, { "epoch": 0.5025027241900049, "grad_norm": 0.7421875, "learning_rate": 0.00015942635223441945, "loss": 1.0206, "step": 19570 }, { "epoch": 0.5025284013859267, "grad_norm": 0.75, "learning_rate": 0.00015942276174995662, "loss": 0.9331, "step": 19571 }, { "epoch": 0.5025540785818484, "grad_norm": 0.78125, "learning_rate": 0.0001594191711470696, "loss": 0.8511, "step": 19572 }, { "epoch": 0.5025797557777703, "grad_norm": 0.79296875, "learning_rate": 0.00015941558042576555, "loss": 0.8885, "step": 19573 }, { "epoch": 0.5026054329736921, "grad_norm": 0.79296875, "learning_rate": 0.00015941198958605162, "loss": 1.017, "step": 19574 }, { "epoch": 0.5026311101696139, "grad_norm": 0.7578125, "learning_rate": 0.00015940839862793493, "loss": 0.8626, "step": 19575 }, { "epoch": 0.5026567873655358, "grad_norm": 0.81640625, "learning_rate": 0.00015940480755142269, "loss": 1.0084, "step": 19576 }, { "epoch": 0.5026824645614576, "grad_norm": 0.71484375, "learning_rate": 0.00015940121635652204, "loss": 0.9427, "step": 19577 }, { "epoch": 0.5027081417573794, "grad_norm": 0.8203125, "learning_rate": 0.00015939762504324013, "loss": 0.8395, "step": 19578 }, { "epoch": 0.5027338189533012, "grad_norm": 0.77734375, "learning_rate": 0.0001593940336115841, "loss": 0.8066, "step": 19579 }, { "epoch": 0.502759496149223, "grad_norm": 0.8125, "learning_rate": 0.0001593904420615611, "loss": 0.8563, "step": 19580 }, { "epoch": 0.5027851733451448, "grad_norm": 0.828125, "learning_rate": 0.0001593868503931784, "loss": 0.8891, "step": 19581 }, { "epoch": 0.5028108505410667, "grad_norm": 0.796875, "learning_rate": 0.00015938325860644298, "loss": 1.0165, "step": 19582 }, { "epoch": 0.5028365277369885, "grad_norm": 0.78125, "learning_rate": 0.00015937966670136216, "loss": 0.9513, "step": 19583 }, { "epoch": 0.5028622049329103, "grad_norm": 0.78125, "learning_rate": 0.000159376074677943, "loss": 0.9154, "step": 19584 }, { "epoch": 0.5028878821288322, "grad_norm": 0.76953125, "learning_rate": 0.0001593724825361927, "loss": 0.8764, "step": 19585 }, { "epoch": 0.5029135593247539, "grad_norm": 0.83984375, "learning_rate": 0.00015936889027611835, "loss": 0.8146, "step": 19586 }, { "epoch": 0.5029392365206757, "grad_norm": 0.73828125, "learning_rate": 0.00015936529789772723, "loss": 0.8492, "step": 19587 }, { "epoch": 0.5029649137165976, "grad_norm": 0.8515625, "learning_rate": 0.0001593617054010264, "loss": 0.8696, "step": 19588 }, { "epoch": 0.5029905909125194, "grad_norm": 0.77734375, "learning_rate": 0.00015935811278602305, "loss": 0.9395, "step": 19589 }, { "epoch": 0.5030162681084412, "grad_norm": 0.80859375, "learning_rate": 0.00015935452005272436, "loss": 0.878, "step": 19590 }, { "epoch": 0.5030419453043631, "grad_norm": 0.71484375, "learning_rate": 0.00015935092720113748, "loss": 0.7564, "step": 19591 }, { "epoch": 0.5030676225002848, "grad_norm": 0.73046875, "learning_rate": 0.00015934733423126956, "loss": 0.8285, "step": 19592 }, { "epoch": 0.5030932996962066, "grad_norm": 0.73046875, "learning_rate": 0.00015934374114312774, "loss": 0.7748, "step": 19593 }, { "epoch": 0.5031189768921285, "grad_norm": 0.796875, "learning_rate": 0.00015934014793671922, "loss": 0.9577, "step": 19594 }, { "epoch": 0.5031446540880503, "grad_norm": 0.73046875, "learning_rate": 0.00015933655461205116, "loss": 0.9364, "step": 19595 }, { "epoch": 0.5031703312839721, "grad_norm": 0.83203125, "learning_rate": 0.00015933296116913068, "loss": 0.93, "step": 19596 }, { "epoch": 0.503196008479894, "grad_norm": 0.78125, "learning_rate": 0.00015932936760796497, "loss": 0.8527, "step": 19597 }, { "epoch": 0.5032216856758158, "grad_norm": 0.76953125, "learning_rate": 0.0001593257739285612, "loss": 0.8355, "step": 19598 }, { "epoch": 0.5032473628717375, "grad_norm": 0.77734375, "learning_rate": 0.00015932218013092652, "loss": 0.8497, "step": 19599 }, { "epoch": 0.5032730400676594, "grad_norm": 0.74609375, "learning_rate": 0.00015931858621506811, "loss": 0.9504, "step": 19600 }, { "epoch": 0.5032987172635812, "grad_norm": 0.77734375, "learning_rate": 0.0001593149921809931, "loss": 0.9594, "step": 19601 }, { "epoch": 0.503324394459503, "grad_norm": 0.8359375, "learning_rate": 0.00015931139802870866, "loss": 0.8715, "step": 19602 }, { "epoch": 0.5033500716554249, "grad_norm": 0.78515625, "learning_rate": 0.00015930780375822198, "loss": 1.1357, "step": 19603 }, { "epoch": 0.5033757488513467, "grad_norm": 0.7109375, "learning_rate": 0.00015930420936954018, "loss": 0.9472, "step": 19604 }, { "epoch": 0.5034014260472686, "grad_norm": 0.8125, "learning_rate": 0.00015930061486267046, "loss": 1.072, "step": 19605 }, { "epoch": 0.5034271032431903, "grad_norm": 0.69921875, "learning_rate": 0.00015929702023761996, "loss": 0.7264, "step": 19606 }, { "epoch": 0.5034527804391121, "grad_norm": 0.7421875, "learning_rate": 0.0001592934254943959, "loss": 0.9164, "step": 19607 }, { "epoch": 0.503478457635034, "grad_norm": 0.8046875, "learning_rate": 0.00015928983063300537, "loss": 0.9258, "step": 19608 }, { "epoch": 0.5035041348309558, "grad_norm": 0.79296875, "learning_rate": 0.00015928623565345556, "loss": 0.8658, "step": 19609 }, { "epoch": 0.5035298120268776, "grad_norm": 0.859375, "learning_rate": 0.0001592826405557536, "loss": 0.8806, "step": 19610 }, { "epoch": 0.5035554892227995, "grad_norm": 0.80859375, "learning_rate": 0.00015927904533990676, "loss": 0.9327, "step": 19611 }, { "epoch": 0.5035811664187212, "grad_norm": 0.80859375, "learning_rate": 0.0001592754500059221, "loss": 0.9364, "step": 19612 }, { "epoch": 0.503606843614643, "grad_norm": 0.7421875, "learning_rate": 0.00015927185455380681, "loss": 0.8504, "step": 19613 }, { "epoch": 0.5036325208105649, "grad_norm": 0.75, "learning_rate": 0.00015926825898356813, "loss": 0.7766, "step": 19614 }, { "epoch": 0.5036581980064867, "grad_norm": 0.77734375, "learning_rate": 0.0001592646632952131, "loss": 1.1258, "step": 19615 }, { "epoch": 0.5036838752024085, "grad_norm": 0.69140625, "learning_rate": 0.00015926106748874897, "loss": 0.9469, "step": 19616 }, { "epoch": 0.5037095523983304, "grad_norm": 0.70703125, "learning_rate": 0.00015925747156418288, "loss": 0.9018, "step": 19617 }, { "epoch": 0.5037352295942522, "grad_norm": 0.7578125, "learning_rate": 0.000159253875521522, "loss": 0.8394, "step": 19618 }, { "epoch": 0.5037609067901739, "grad_norm": 0.75390625, "learning_rate": 0.00015925027936077352, "loss": 1.0163, "step": 19619 }, { "epoch": 0.5037865839860958, "grad_norm": 0.703125, "learning_rate": 0.00015924668308194456, "loss": 0.7848, "step": 19620 }, { "epoch": 0.5038122611820176, "grad_norm": 0.78125, "learning_rate": 0.00015924308668504233, "loss": 0.8342, "step": 19621 }, { "epoch": 0.5038379383779394, "grad_norm": 0.7265625, "learning_rate": 0.00015923949017007398, "loss": 0.8494, "step": 19622 }, { "epoch": 0.5038636155738613, "grad_norm": 0.80078125, "learning_rate": 0.00015923589353704668, "loss": 0.8953, "step": 19623 }, { "epoch": 0.5038892927697831, "grad_norm": 0.73046875, "learning_rate": 0.00015923229678596758, "loss": 0.8324, "step": 19624 }, { "epoch": 0.5039149699657048, "grad_norm": 0.7890625, "learning_rate": 0.00015922869991684386, "loss": 0.8378, "step": 19625 }, { "epoch": 0.5039406471616267, "grad_norm": 0.7578125, "learning_rate": 0.0001592251029296827, "loss": 0.8893, "step": 19626 }, { "epoch": 0.5039663243575485, "grad_norm": 0.80859375, "learning_rate": 0.00015922150582449126, "loss": 0.9535, "step": 19627 }, { "epoch": 0.5039920015534703, "grad_norm": 0.86328125, "learning_rate": 0.00015921790860127673, "loss": 0.9338, "step": 19628 }, { "epoch": 0.5040176787493922, "grad_norm": 0.7890625, "learning_rate": 0.00015921431126004622, "loss": 0.8443, "step": 19629 }, { "epoch": 0.504043355945314, "grad_norm": 0.80078125, "learning_rate": 0.00015921071380080694, "loss": 1.0514, "step": 19630 }, { "epoch": 0.5040690331412359, "grad_norm": 0.828125, "learning_rate": 0.00015920711622356609, "loss": 0.8387, "step": 19631 }, { "epoch": 0.5040947103371576, "grad_norm": 0.765625, "learning_rate": 0.00015920351852833075, "loss": 0.8265, "step": 19632 }, { "epoch": 0.5041203875330794, "grad_norm": 0.79296875, "learning_rate": 0.00015919992071510822, "loss": 0.8511, "step": 19633 }, { "epoch": 0.5041460647290013, "grad_norm": 0.87109375, "learning_rate": 0.00015919632278390555, "loss": 0.8794, "step": 19634 }, { "epoch": 0.5041717419249231, "grad_norm": 0.7578125, "learning_rate": 0.00015919272473472994, "loss": 0.7874, "step": 19635 }, { "epoch": 0.5041974191208449, "grad_norm": 0.72265625, "learning_rate": 0.0001591891265675886, "loss": 1.0227, "step": 19636 }, { "epoch": 0.5042230963167668, "grad_norm": 0.75390625, "learning_rate": 0.00015918552828248867, "loss": 0.8258, "step": 19637 }, { "epoch": 0.5042487735126886, "grad_norm": 0.7734375, "learning_rate": 0.00015918192987943733, "loss": 0.925, "step": 19638 }, { "epoch": 0.5042744507086103, "grad_norm": 0.83203125, "learning_rate": 0.00015917833135844177, "loss": 0.8018, "step": 19639 }, { "epoch": 0.5043001279045322, "grad_norm": 0.8125, "learning_rate": 0.0001591747327195091, "loss": 0.883, "step": 19640 }, { "epoch": 0.504325805100454, "grad_norm": 0.76171875, "learning_rate": 0.00015917113396264658, "loss": 0.924, "step": 19641 }, { "epoch": 0.5043514822963758, "grad_norm": 0.8125, "learning_rate": 0.0001591675350878613, "loss": 0.7788, "step": 19642 }, { "epoch": 0.5043771594922977, "grad_norm": 0.78125, "learning_rate": 0.00015916393609516046, "loss": 0.8082, "step": 19643 }, { "epoch": 0.5044028366882195, "grad_norm": 0.80078125, "learning_rate": 0.00015916033698455125, "loss": 0.8971, "step": 19644 }, { "epoch": 0.5044285138841412, "grad_norm": 0.75, "learning_rate": 0.00015915673775604086, "loss": 0.9235, "step": 19645 }, { "epoch": 0.5044541910800631, "grad_norm": 0.76953125, "learning_rate": 0.0001591531384096364, "loss": 0.8672, "step": 19646 }, { "epoch": 0.5044798682759849, "grad_norm": 0.72265625, "learning_rate": 0.0001591495389453451, "loss": 0.883, "step": 19647 }, { "epoch": 0.5045055454719067, "grad_norm": 0.76171875, "learning_rate": 0.0001591459393631741, "loss": 0.9217, "step": 19648 }, { "epoch": 0.5045312226678286, "grad_norm": 0.8125, "learning_rate": 0.00015914233966313058, "loss": 1.0045, "step": 19649 }, { "epoch": 0.5045568998637504, "grad_norm": 0.7890625, "learning_rate": 0.00015913873984522176, "loss": 1.0272, "step": 19650 }, { "epoch": 0.5045825770596722, "grad_norm": 0.79296875, "learning_rate": 0.00015913513990945474, "loss": 0.8388, "step": 19651 }, { "epoch": 0.504608254255594, "grad_norm": 0.78515625, "learning_rate": 0.0001591315398558367, "loss": 0.8416, "step": 19652 }, { "epoch": 0.5046339314515158, "grad_norm": 0.734375, "learning_rate": 0.00015912793968437492, "loss": 0.9213, "step": 19653 }, { "epoch": 0.5046596086474376, "grad_norm": 0.98046875, "learning_rate": 0.00015912433939507646, "loss": 1.0575, "step": 19654 }, { "epoch": 0.5046852858433595, "grad_norm": 0.765625, "learning_rate": 0.0001591207389879485, "loss": 0.8532, "step": 19655 }, { "epoch": 0.5047109630392813, "grad_norm": 0.8203125, "learning_rate": 0.00015911713846299827, "loss": 1.0256, "step": 19656 }, { "epoch": 0.5047366402352031, "grad_norm": 0.7421875, "learning_rate": 0.0001591135378202329, "loss": 1.0777, "step": 19657 }, { "epoch": 0.504762317431125, "grad_norm": 0.796875, "learning_rate": 0.00015910993705965963, "loss": 0.8674, "step": 19658 }, { "epoch": 0.5047879946270467, "grad_norm": 0.71484375, "learning_rate": 0.00015910633618128559, "loss": 0.758, "step": 19659 }, { "epoch": 0.5048136718229685, "grad_norm": 0.828125, "learning_rate": 0.00015910273518511794, "loss": 0.829, "step": 19660 }, { "epoch": 0.5048393490188904, "grad_norm": 0.71484375, "learning_rate": 0.00015909913407116387, "loss": 0.7912, "step": 19661 }, { "epoch": 0.5048650262148122, "grad_norm": 0.76171875, "learning_rate": 0.00015909553283943059, "loss": 0.7748, "step": 19662 }, { "epoch": 0.504890703410734, "grad_norm": 0.734375, "learning_rate": 0.00015909193148992524, "loss": 0.8781, "step": 19663 }, { "epoch": 0.5049163806066559, "grad_norm": 0.76953125, "learning_rate": 0.00015908833002265503, "loss": 0.9778, "step": 19664 }, { "epoch": 0.5049420578025776, "grad_norm": 0.72265625, "learning_rate": 0.00015908472843762707, "loss": 0.9144, "step": 19665 }, { "epoch": 0.5049677349984995, "grad_norm": 0.7890625, "learning_rate": 0.00015908112673484863, "loss": 0.9038, "step": 19666 }, { "epoch": 0.5049934121944213, "grad_norm": 0.765625, "learning_rate": 0.0001590775249143268, "loss": 0.9736, "step": 19667 }, { "epoch": 0.5050190893903431, "grad_norm": 0.796875, "learning_rate": 0.00015907392297606884, "loss": 0.7953, "step": 19668 }, { "epoch": 0.505044766586265, "grad_norm": 0.77734375, "learning_rate": 0.00015907032092008186, "loss": 1.0116, "step": 19669 }, { "epoch": 0.5050704437821868, "grad_norm": 0.8671875, "learning_rate": 0.00015906671874637307, "loss": 0.9601, "step": 19670 }, { "epoch": 0.5050961209781086, "grad_norm": 0.8515625, "learning_rate": 0.00015906311645494964, "loss": 0.9269, "step": 19671 }, { "epoch": 0.5051217981740304, "grad_norm": 0.82421875, "learning_rate": 0.00015905951404581877, "loss": 0.8846, "step": 19672 }, { "epoch": 0.5051474753699522, "grad_norm": 0.953125, "learning_rate": 0.0001590559115189876, "loss": 0.8974, "step": 19673 }, { "epoch": 0.505173152565874, "grad_norm": 0.71875, "learning_rate": 0.00015905230887446335, "loss": 0.8501, "step": 19674 }, { "epoch": 0.5051988297617959, "grad_norm": 0.73828125, "learning_rate": 0.0001590487061122532, "loss": 0.765, "step": 19675 }, { "epoch": 0.5052245069577177, "grad_norm": 0.79296875, "learning_rate": 0.00015904510323236427, "loss": 0.8128, "step": 19676 }, { "epoch": 0.5052501841536395, "grad_norm": 0.765625, "learning_rate": 0.0001590415002348038, "loss": 0.8343, "step": 19677 }, { "epoch": 0.5052758613495614, "grad_norm": 0.7265625, "learning_rate": 0.00015903789711957898, "loss": 0.9539, "step": 19678 }, { "epoch": 0.5053015385454831, "grad_norm": 0.80078125, "learning_rate": 0.00015903429388669694, "loss": 0.9282, "step": 19679 }, { "epoch": 0.5053272157414049, "grad_norm": 0.77734375, "learning_rate": 0.00015903069053616487, "loss": 0.7323, "step": 19680 }, { "epoch": 0.5053528929373268, "grad_norm": 0.86328125, "learning_rate": 0.00015902708706799, "loss": 1.0715, "step": 19681 }, { "epoch": 0.5053785701332486, "grad_norm": 0.76171875, "learning_rate": 0.00015902348348217945, "loss": 0.8084, "step": 19682 }, { "epoch": 0.5054042473291704, "grad_norm": 0.70703125, "learning_rate": 0.00015901987977874046, "loss": 0.9386, "step": 19683 }, { "epoch": 0.5054299245250923, "grad_norm": 0.78125, "learning_rate": 0.00015901627595768015, "loss": 0.9136, "step": 19684 }, { "epoch": 0.505455601721014, "grad_norm": 0.796875, "learning_rate": 0.00015901267201900575, "loss": 0.9184, "step": 19685 }, { "epoch": 0.5054812789169358, "grad_norm": 0.75390625, "learning_rate": 0.00015900906796272445, "loss": 0.7956, "step": 19686 }, { "epoch": 0.5055069561128577, "grad_norm": 0.79296875, "learning_rate": 0.00015900546378884336, "loss": 0.8756, "step": 19687 }, { "epoch": 0.5055326333087795, "grad_norm": 0.77734375, "learning_rate": 0.0001590018594973697, "loss": 0.8969, "step": 19688 }, { "epoch": 0.5055583105047013, "grad_norm": 0.71875, "learning_rate": 0.00015899825508831073, "loss": 0.7725, "step": 19689 }, { "epoch": 0.5055839877006232, "grad_norm": 0.8359375, "learning_rate": 0.00015899465056167352, "loss": 1.0612, "step": 19690 }, { "epoch": 0.505609664896545, "grad_norm": 0.74609375, "learning_rate": 0.0001589910459174653, "loss": 0.8229, "step": 19691 }, { "epoch": 0.5056353420924667, "grad_norm": 0.74609375, "learning_rate": 0.00015898744115569325, "loss": 0.8448, "step": 19692 }, { "epoch": 0.5056610192883886, "grad_norm": 0.828125, "learning_rate": 0.00015898383627636458, "loss": 0.9897, "step": 19693 }, { "epoch": 0.5056866964843104, "grad_norm": 0.73046875, "learning_rate": 0.00015898023127948645, "loss": 0.8275, "step": 19694 }, { "epoch": 0.5057123736802323, "grad_norm": 0.8984375, "learning_rate": 0.00015897662616506602, "loss": 0.9379, "step": 19695 }, { "epoch": 0.5057380508761541, "grad_norm": 0.75390625, "learning_rate": 0.00015897302093311052, "loss": 0.7783, "step": 19696 }, { "epoch": 0.5057637280720759, "grad_norm": 0.7265625, "learning_rate": 0.00015896941558362712, "loss": 0.7836, "step": 19697 }, { "epoch": 0.5057894052679978, "grad_norm": 0.7421875, "learning_rate": 0.00015896581011662302, "loss": 0.8902, "step": 19698 }, { "epoch": 0.5058150824639195, "grad_norm": 0.74609375, "learning_rate": 0.00015896220453210535, "loss": 0.7569, "step": 19699 }, { "epoch": 0.5058407596598413, "grad_norm": 0.765625, "learning_rate": 0.00015895859883008137, "loss": 0.9877, "step": 19700 }, { "epoch": 0.5058664368557632, "grad_norm": 0.80078125, "learning_rate": 0.0001589549930105582, "loss": 0.9875, "step": 19701 }, { "epoch": 0.505892114051685, "grad_norm": 0.734375, "learning_rate": 0.00015895138707354306, "loss": 1.006, "step": 19702 }, { "epoch": 0.5059177912476068, "grad_norm": 0.75, "learning_rate": 0.00015894778101904313, "loss": 0.7765, "step": 19703 }, { "epoch": 0.5059434684435287, "grad_norm": 0.79296875, "learning_rate": 0.0001589441748470656, "loss": 0.8796, "step": 19704 }, { "epoch": 0.5059691456394504, "grad_norm": 0.828125, "learning_rate": 0.00015894056855761765, "loss": 0.8918, "step": 19705 }, { "epoch": 0.5059948228353722, "grad_norm": 0.7734375, "learning_rate": 0.00015893696215070645, "loss": 0.8387, "step": 19706 }, { "epoch": 0.5060205000312941, "grad_norm": 0.83203125, "learning_rate": 0.00015893335562633927, "loss": 1.0203, "step": 19707 }, { "epoch": 0.5060461772272159, "grad_norm": 0.80078125, "learning_rate": 0.00015892974898452317, "loss": 0.9474, "step": 19708 }, { "epoch": 0.5060718544231377, "grad_norm": 0.8046875, "learning_rate": 0.00015892614222526544, "loss": 0.8321, "step": 19709 }, { "epoch": 0.5060975316190596, "grad_norm": 0.76953125, "learning_rate": 0.00015892253534857323, "loss": 0.9837, "step": 19710 }, { "epoch": 0.5061232088149814, "grad_norm": 0.8125, "learning_rate": 0.0001589189283544537, "loss": 1.0555, "step": 19711 }, { "epoch": 0.5061488860109031, "grad_norm": 0.73046875, "learning_rate": 0.0001589153212429141, "loss": 0.8536, "step": 19712 }, { "epoch": 0.506174563206825, "grad_norm": 0.79296875, "learning_rate": 0.0001589117140139616, "loss": 0.9621, "step": 19713 }, { "epoch": 0.5062002404027468, "grad_norm": 0.73046875, "learning_rate": 0.00015890810666760333, "loss": 0.8254, "step": 19714 }, { "epoch": 0.5062259175986686, "grad_norm": 0.76171875, "learning_rate": 0.00015890449920384655, "loss": 0.9255, "step": 19715 }, { "epoch": 0.5062515947945905, "grad_norm": 0.87890625, "learning_rate": 0.0001589008916226984, "loss": 0.8901, "step": 19716 }, { "epoch": 0.5062772719905123, "grad_norm": 0.73046875, "learning_rate": 0.00015889728392416613, "loss": 0.8581, "step": 19717 }, { "epoch": 0.5063029491864341, "grad_norm": 0.84375, "learning_rate": 0.00015889367610825685, "loss": 0.9976, "step": 19718 }, { "epoch": 0.5063286263823559, "grad_norm": 0.78515625, "learning_rate": 0.00015889006817497783, "loss": 0.8941, "step": 19719 }, { "epoch": 0.5063543035782777, "grad_norm": 0.8125, "learning_rate": 0.00015888646012433624, "loss": 0.9126, "step": 19720 }, { "epoch": 0.5063799807741995, "grad_norm": 0.78515625, "learning_rate": 0.00015888285195633922, "loss": 0.8972, "step": 19721 }, { "epoch": 0.5064056579701214, "grad_norm": 0.7578125, "learning_rate": 0.000158879243670994, "loss": 0.8239, "step": 19722 }, { "epoch": 0.5064313351660432, "grad_norm": 0.87890625, "learning_rate": 0.00015887563526830777, "loss": 1.0193, "step": 19723 }, { "epoch": 0.506457012361965, "grad_norm": 0.82421875, "learning_rate": 0.00015887202674828773, "loss": 0.9249, "step": 19724 }, { "epoch": 0.5064826895578868, "grad_norm": 0.828125, "learning_rate": 0.00015886841811094105, "loss": 0.851, "step": 19725 }, { "epoch": 0.5065083667538086, "grad_norm": 0.78125, "learning_rate": 0.00015886480935627491, "loss": 0.9042, "step": 19726 }, { "epoch": 0.5065340439497305, "grad_norm": 0.83203125, "learning_rate": 0.00015886120048429653, "loss": 0.9192, "step": 19727 }, { "epoch": 0.5065597211456523, "grad_norm": 0.8046875, "learning_rate": 0.00015885759149501309, "loss": 0.8427, "step": 19728 }, { "epoch": 0.5065853983415741, "grad_norm": 0.83203125, "learning_rate": 0.0001588539823884318, "loss": 0.848, "step": 19729 }, { "epoch": 0.506611075537496, "grad_norm": 0.78125, "learning_rate": 0.00015885037316455984, "loss": 0.8733, "step": 19730 }, { "epoch": 0.5066367527334178, "grad_norm": 0.7421875, "learning_rate": 0.0001588467638234044, "loss": 0.8913, "step": 19731 }, { "epoch": 0.5066624299293395, "grad_norm": 0.72265625, "learning_rate": 0.0001588431543649727, "loss": 0.889, "step": 19732 }, { "epoch": 0.5066881071252614, "grad_norm": 0.8828125, "learning_rate": 0.00015883954478927188, "loss": 0.9985, "step": 19733 }, { "epoch": 0.5067137843211832, "grad_norm": 0.8203125, "learning_rate": 0.00015883593509630916, "loss": 0.947, "step": 19734 }, { "epoch": 0.506739461517105, "grad_norm": 0.78125, "learning_rate": 0.00015883232528609174, "loss": 0.8555, "step": 19735 }, { "epoch": 0.5067651387130269, "grad_norm": 0.76953125, "learning_rate": 0.0001588287153586268, "loss": 0.7639, "step": 19736 }, { "epoch": 0.5067908159089487, "grad_norm": 0.75, "learning_rate": 0.00015882510531392154, "loss": 0.8848, "step": 19737 }, { "epoch": 0.5068164931048705, "grad_norm": 0.75, "learning_rate": 0.00015882149515198318, "loss": 0.814, "step": 19738 }, { "epoch": 0.5068421703007923, "grad_norm": 0.75, "learning_rate": 0.0001588178848728189, "loss": 0.7984, "step": 19739 }, { "epoch": 0.5068678474967141, "grad_norm": 0.79296875, "learning_rate": 0.0001588142744764359, "loss": 0.8724, "step": 19740 }, { "epoch": 0.5068935246926359, "grad_norm": 0.79296875, "learning_rate": 0.0001588106639628413, "loss": 0.8547, "step": 19741 }, { "epoch": 0.5069192018885578, "grad_norm": 0.80078125, "learning_rate": 0.0001588070533320424, "loss": 0.9933, "step": 19742 }, { "epoch": 0.5069448790844796, "grad_norm": 0.8125, "learning_rate": 0.00015880344258404637, "loss": 1.0168, "step": 19743 }, { "epoch": 0.5069705562804014, "grad_norm": 0.76953125, "learning_rate": 0.00015879983171886036, "loss": 0.9201, "step": 19744 }, { "epoch": 0.5069962334763232, "grad_norm": 0.78515625, "learning_rate": 0.00015879622073649164, "loss": 0.9386, "step": 19745 }, { "epoch": 0.507021910672245, "grad_norm": 0.80078125, "learning_rate": 0.00015879260963694734, "loss": 0.8658, "step": 19746 }, { "epoch": 0.5070475878681668, "grad_norm": 0.765625, "learning_rate": 0.00015878899842023466, "loss": 0.8742, "step": 19747 }, { "epoch": 0.5070732650640887, "grad_norm": 0.76171875, "learning_rate": 0.00015878538708636084, "loss": 0.7594, "step": 19748 }, { "epoch": 0.5070989422600105, "grad_norm": 0.73828125, "learning_rate": 0.00015878177563533302, "loss": 0.8118, "step": 19749 }, { "epoch": 0.5071246194559323, "grad_norm": 0.83203125, "learning_rate": 0.00015877816406715848, "loss": 0.8535, "step": 19750 }, { "epoch": 0.5071502966518542, "grad_norm": 0.7578125, "learning_rate": 0.00015877455238184432, "loss": 0.885, "step": 19751 }, { "epoch": 0.5071759738477759, "grad_norm": 0.70703125, "learning_rate": 0.00015877094057939784, "loss": 0.7393, "step": 19752 }, { "epoch": 0.5072016510436977, "grad_norm": 0.77734375, "learning_rate": 0.00015876732865982614, "loss": 0.7635, "step": 19753 }, { "epoch": 0.5072273282396196, "grad_norm": 0.76953125, "learning_rate": 0.00015876371662313648, "loss": 0.8568, "step": 19754 }, { "epoch": 0.5072530054355414, "grad_norm": 0.71484375, "learning_rate": 0.00015876010446933602, "loss": 0.9677, "step": 19755 }, { "epoch": 0.5072786826314633, "grad_norm": 0.7578125, "learning_rate": 0.00015875649219843202, "loss": 0.8384, "step": 19756 }, { "epoch": 0.5073043598273851, "grad_norm": 0.83984375, "learning_rate": 0.00015875287981043158, "loss": 0.9676, "step": 19757 }, { "epoch": 0.5073300370233069, "grad_norm": 0.77734375, "learning_rate": 0.00015874926730534203, "loss": 0.8633, "step": 19758 }, { "epoch": 0.5073557142192286, "grad_norm": 0.83984375, "learning_rate": 0.00015874565468317045, "loss": 0.8135, "step": 19759 }, { "epoch": 0.5073813914151505, "grad_norm": 0.75, "learning_rate": 0.0001587420419439241, "loss": 0.8638, "step": 19760 }, { "epoch": 0.5074070686110723, "grad_norm": 0.765625, "learning_rate": 0.00015873842908761015, "loss": 0.9123, "step": 19761 }, { "epoch": 0.5074327458069942, "grad_norm": 0.73828125, "learning_rate": 0.00015873481611423582, "loss": 0.8154, "step": 19762 }, { "epoch": 0.507458423002916, "grad_norm": 0.734375, "learning_rate": 0.00015873120302380833, "loss": 0.8569, "step": 19763 }, { "epoch": 0.5074841001988378, "grad_norm": 0.75, "learning_rate": 0.00015872758981633485, "loss": 1.0976, "step": 19764 }, { "epoch": 0.5075097773947596, "grad_norm": 0.7265625, "learning_rate": 0.00015872397649182261, "loss": 0.8183, "step": 19765 }, { "epoch": 0.5075354545906814, "grad_norm": 0.75, "learning_rate": 0.00015872036305027872, "loss": 0.8621, "step": 19766 }, { "epoch": 0.5075611317866032, "grad_norm": 0.82421875, "learning_rate": 0.00015871674949171052, "loss": 0.9297, "step": 19767 }, { "epoch": 0.5075868089825251, "grad_norm": 0.7421875, "learning_rate": 0.0001587131358161251, "loss": 0.8492, "step": 19768 }, { "epoch": 0.5076124861784469, "grad_norm": 0.71875, "learning_rate": 0.00015870952202352972, "loss": 0.9092, "step": 19769 }, { "epoch": 0.5076381633743687, "grad_norm": 0.76953125, "learning_rate": 0.00015870590811393155, "loss": 0.803, "step": 19770 }, { "epoch": 0.5076638405702906, "grad_norm": 0.890625, "learning_rate": 0.00015870229408733786, "loss": 0.8297, "step": 19771 }, { "epoch": 0.5076895177662123, "grad_norm": 0.8203125, "learning_rate": 0.00015869867994375574, "loss": 0.8531, "step": 19772 }, { "epoch": 0.5077151949621341, "grad_norm": 0.69921875, "learning_rate": 0.00015869506568319248, "loss": 0.7561, "step": 19773 }, { "epoch": 0.507740872158056, "grad_norm": 0.7890625, "learning_rate": 0.00015869145130565526, "loss": 1.0773, "step": 19774 }, { "epoch": 0.5077665493539778, "grad_norm": 0.74609375, "learning_rate": 0.00015868783681115124, "loss": 0.7801, "step": 19775 }, { "epoch": 0.5077922265498996, "grad_norm": 0.828125, "learning_rate": 0.00015868422219968771, "loss": 0.9292, "step": 19776 }, { "epoch": 0.5078179037458215, "grad_norm": 0.81640625, "learning_rate": 0.0001586806074712718, "loss": 0.8734, "step": 19777 }, { "epoch": 0.5078435809417433, "grad_norm": 0.7734375, "learning_rate": 0.00015867699262591076, "loss": 0.9888, "step": 19778 }, { "epoch": 0.507869258137665, "grad_norm": 0.734375, "learning_rate": 0.00015867337766361177, "loss": 0.8066, "step": 19779 }, { "epoch": 0.5078949353335869, "grad_norm": 0.77734375, "learning_rate": 0.00015866976258438204, "loss": 0.8994, "step": 19780 }, { "epoch": 0.5079206125295087, "grad_norm": 0.75390625, "learning_rate": 0.00015866614738822875, "loss": 0.8238, "step": 19781 }, { "epoch": 0.5079462897254305, "grad_norm": 0.8046875, "learning_rate": 0.00015866253207515914, "loss": 0.8708, "step": 19782 }, { "epoch": 0.5079719669213524, "grad_norm": 0.7734375, "learning_rate": 0.0001586589166451804, "loss": 0.8216, "step": 19783 }, { "epoch": 0.5079976441172742, "grad_norm": 0.78125, "learning_rate": 0.00015865530109829975, "loss": 0.9339, "step": 19784 }, { "epoch": 0.5080233213131959, "grad_norm": 0.75, "learning_rate": 0.0001586516854345244, "loss": 0.969, "step": 19785 }, { "epoch": 0.5080489985091178, "grad_norm": 0.765625, "learning_rate": 0.00015864806965386148, "loss": 0.9038, "step": 19786 }, { "epoch": 0.5080746757050396, "grad_norm": 0.75, "learning_rate": 0.00015864445375631832, "loss": 0.8471, "step": 19787 }, { "epoch": 0.5081003529009614, "grad_norm": 0.76171875, "learning_rate": 0.000158640837741902, "loss": 0.8639, "step": 19788 }, { "epoch": 0.5081260300968833, "grad_norm": 0.7109375, "learning_rate": 0.00015863722161061982, "loss": 0.8106, "step": 19789 }, { "epoch": 0.5081517072928051, "grad_norm": 0.76171875, "learning_rate": 0.00015863360536247896, "loss": 0.925, "step": 19790 }, { "epoch": 0.508177384488727, "grad_norm": 0.80078125, "learning_rate": 0.0001586299889974866, "loss": 0.945, "step": 19791 }, { "epoch": 0.5082030616846487, "grad_norm": 0.79296875, "learning_rate": 0.00015862637251565, "loss": 0.9878, "step": 19792 }, { "epoch": 0.5082287388805705, "grad_norm": 0.765625, "learning_rate": 0.0001586227559169763, "loss": 0.8763, "step": 19793 }, { "epoch": 0.5082544160764924, "grad_norm": 0.828125, "learning_rate": 0.00015861913920147278, "loss": 0.8696, "step": 19794 }, { "epoch": 0.5082800932724142, "grad_norm": 0.73828125, "learning_rate": 0.00015861552236914655, "loss": 0.8456, "step": 19795 }, { "epoch": 0.508305770468336, "grad_norm": 0.85546875, "learning_rate": 0.00015861190542000494, "loss": 0.908, "step": 19796 }, { "epoch": 0.5083314476642579, "grad_norm": 0.77734375, "learning_rate": 0.00015860828835405507, "loss": 1.0337, "step": 19797 }, { "epoch": 0.5083571248601797, "grad_norm": 0.79296875, "learning_rate": 0.0001586046711713042, "loss": 0.8549, "step": 19798 }, { "epoch": 0.5083828020561014, "grad_norm": 0.78515625, "learning_rate": 0.0001586010538717595, "loss": 0.9849, "step": 19799 }, { "epoch": 0.5084084792520233, "grad_norm": 0.75390625, "learning_rate": 0.00015859743645542818, "loss": 0.7869, "step": 19800 }, { "epoch": 0.5084341564479451, "grad_norm": 0.6796875, "learning_rate": 0.00015859381892231747, "loss": 0.9345, "step": 19801 }, { "epoch": 0.5084598336438669, "grad_norm": 0.8046875, "learning_rate": 0.00015859020127243455, "loss": 0.9468, "step": 19802 }, { "epoch": 0.5084855108397888, "grad_norm": 0.8671875, "learning_rate": 0.0001585865835057867, "loss": 0.8714, "step": 19803 }, { "epoch": 0.5085111880357106, "grad_norm": 0.75, "learning_rate": 0.00015858296562238104, "loss": 0.9652, "step": 19804 }, { "epoch": 0.5085368652316323, "grad_norm": 0.859375, "learning_rate": 0.00015857934762222487, "loss": 0.9456, "step": 19805 }, { "epoch": 0.5085625424275542, "grad_norm": 0.6796875, "learning_rate": 0.00015857572950532528, "loss": 0.8556, "step": 19806 }, { "epoch": 0.508588219623476, "grad_norm": 0.78125, "learning_rate": 0.0001585721112716896, "loss": 0.8001, "step": 19807 }, { "epoch": 0.5086138968193978, "grad_norm": 0.76171875, "learning_rate": 0.00015856849292132495, "loss": 0.9251, "step": 19808 }, { "epoch": 0.5086395740153197, "grad_norm": 0.8515625, "learning_rate": 0.00015856487445423866, "loss": 1.005, "step": 19809 }, { "epoch": 0.5086652512112415, "grad_norm": 0.78125, "learning_rate": 0.0001585612558704378, "loss": 0.8441, "step": 19810 }, { "epoch": 0.5086909284071633, "grad_norm": 0.796875, "learning_rate": 0.00015855763716992967, "loss": 0.8062, "step": 19811 }, { "epoch": 0.5087166056030851, "grad_norm": 0.7734375, "learning_rate": 0.00015855401835272146, "loss": 0.8489, "step": 19812 }, { "epoch": 0.5087422827990069, "grad_norm": 0.8046875, "learning_rate": 0.0001585503994188204, "loss": 0.7784, "step": 19813 }, { "epoch": 0.5087679599949287, "grad_norm": 0.8515625, "learning_rate": 0.00015854678036823362, "loss": 0.9679, "step": 19814 }, { "epoch": 0.5087936371908506, "grad_norm": 0.7265625, "learning_rate": 0.00015854316120096846, "loss": 0.8414, "step": 19815 }, { "epoch": 0.5088193143867724, "grad_norm": 0.71484375, "learning_rate": 0.00015853954191703205, "loss": 0.9303, "step": 19816 }, { "epoch": 0.5088449915826943, "grad_norm": 0.703125, "learning_rate": 0.0001585359225164316, "loss": 0.7868, "step": 19817 }, { "epoch": 0.5088706687786161, "grad_norm": 0.76171875, "learning_rate": 0.00015853230299917435, "loss": 0.8637, "step": 19818 }, { "epoch": 0.5088963459745378, "grad_norm": 0.7734375, "learning_rate": 0.00015852868336526754, "loss": 0.9117, "step": 19819 }, { "epoch": 0.5089220231704596, "grad_norm": 0.78125, "learning_rate": 0.0001585250636147183, "loss": 0.7909, "step": 19820 }, { "epoch": 0.5089477003663815, "grad_norm": 0.82421875, "learning_rate": 0.00015852144374753393, "loss": 0.8896, "step": 19821 }, { "epoch": 0.5089733775623033, "grad_norm": 0.75, "learning_rate": 0.0001585178237637216, "loss": 0.8227, "step": 19822 }, { "epoch": 0.5089990547582252, "grad_norm": 0.765625, "learning_rate": 0.00015851420366328853, "loss": 0.8623, "step": 19823 }, { "epoch": 0.509024731954147, "grad_norm": 0.796875, "learning_rate": 0.00015851058344624195, "loss": 0.7859, "step": 19824 }, { "epoch": 0.5090504091500687, "grad_norm": 0.765625, "learning_rate": 0.00015850696311258905, "loss": 0.9706, "step": 19825 }, { "epoch": 0.5090760863459906, "grad_norm": 0.78515625, "learning_rate": 0.00015850334266233706, "loss": 0.939, "step": 19826 }, { "epoch": 0.5091017635419124, "grad_norm": 0.8125, "learning_rate": 0.00015849972209549317, "loss": 0.8475, "step": 19827 }, { "epoch": 0.5091274407378342, "grad_norm": 0.79296875, "learning_rate": 0.00015849610141206467, "loss": 0.9012, "step": 19828 }, { "epoch": 0.5091531179337561, "grad_norm": 0.76171875, "learning_rate": 0.00015849248061205868, "loss": 0.8871, "step": 19829 }, { "epoch": 0.5091787951296779, "grad_norm": 0.796875, "learning_rate": 0.00015848885969548245, "loss": 0.8405, "step": 19830 }, { "epoch": 0.5092044723255997, "grad_norm": 0.76953125, "learning_rate": 0.00015848523866234326, "loss": 0.8531, "step": 19831 }, { "epoch": 0.5092301495215215, "grad_norm": 0.84375, "learning_rate": 0.00015848161751264825, "loss": 1.0523, "step": 19832 }, { "epoch": 0.5092558267174433, "grad_norm": 0.734375, "learning_rate": 0.00015847799624640465, "loss": 0.8398, "step": 19833 }, { "epoch": 0.5092815039133651, "grad_norm": 0.76171875, "learning_rate": 0.0001584743748636197, "loss": 0.8274, "step": 19834 }, { "epoch": 0.509307181109287, "grad_norm": 0.75, "learning_rate": 0.00015847075336430057, "loss": 0.8475, "step": 19835 }, { "epoch": 0.5093328583052088, "grad_norm": 0.78125, "learning_rate": 0.00015846713174845454, "loss": 0.8213, "step": 19836 }, { "epoch": 0.5093585355011306, "grad_norm": 0.7890625, "learning_rate": 0.0001584635100160888, "loss": 0.7849, "step": 19837 }, { "epoch": 0.5093842126970524, "grad_norm": 0.71875, "learning_rate": 0.00015845988816721052, "loss": 0.8268, "step": 19838 }, { "epoch": 0.5094098898929742, "grad_norm": 0.78125, "learning_rate": 0.000158456266201827, "loss": 0.873, "step": 19839 }, { "epoch": 0.509435567088896, "grad_norm": 0.8046875, "learning_rate": 0.00015845264411994543, "loss": 0.9794, "step": 19840 }, { "epoch": 0.5094612442848179, "grad_norm": 0.7734375, "learning_rate": 0.00015844902192157302, "loss": 0.8032, "step": 19841 }, { "epoch": 0.5094869214807397, "grad_norm": 0.7734375, "learning_rate": 0.00015844539960671697, "loss": 0.823, "step": 19842 }, { "epoch": 0.5095125986766615, "grad_norm": 0.8125, "learning_rate": 0.00015844177717538454, "loss": 0.8069, "step": 19843 }, { "epoch": 0.5095382758725834, "grad_norm": 0.8203125, "learning_rate": 0.0001584381546275829, "loss": 1.0138, "step": 19844 }, { "epoch": 0.5095639530685051, "grad_norm": 0.80078125, "learning_rate": 0.0001584345319633193, "loss": 0.8944, "step": 19845 }, { "epoch": 0.5095896302644269, "grad_norm": 0.8203125, "learning_rate": 0.00015843090918260096, "loss": 0.7931, "step": 19846 }, { "epoch": 0.5096153074603488, "grad_norm": 0.75390625, "learning_rate": 0.00015842728628543513, "loss": 1.0101, "step": 19847 }, { "epoch": 0.5096409846562706, "grad_norm": 0.69140625, "learning_rate": 0.00015842366327182896, "loss": 0.8785, "step": 19848 }, { "epoch": 0.5096666618521924, "grad_norm": 0.796875, "learning_rate": 0.00015842004014178973, "loss": 1.0017, "step": 19849 }, { "epoch": 0.5096923390481143, "grad_norm": 0.76171875, "learning_rate": 0.0001584164168953246, "loss": 0.8373, "step": 19850 }, { "epoch": 0.5097180162440361, "grad_norm": 0.79296875, "learning_rate": 0.00015841279353244088, "loss": 0.9645, "step": 19851 }, { "epoch": 0.5097436934399578, "grad_norm": 0.68359375, "learning_rate": 0.00015840917005314567, "loss": 0.8105, "step": 19852 }, { "epoch": 0.5097693706358797, "grad_norm": 0.82421875, "learning_rate": 0.00015840554645744632, "loss": 1.0051, "step": 19853 }, { "epoch": 0.5097950478318015, "grad_norm": 0.69140625, "learning_rate": 0.00015840192274534998, "loss": 0.9125, "step": 19854 }, { "epoch": 0.5098207250277234, "grad_norm": 0.75, "learning_rate": 0.00015839829891686386, "loss": 0.8721, "step": 19855 }, { "epoch": 0.5098464022236452, "grad_norm": 0.703125, "learning_rate": 0.00015839467497199522, "loss": 0.7768, "step": 19856 }, { "epoch": 0.509872079419567, "grad_norm": 0.81640625, "learning_rate": 0.00015839105091075128, "loss": 0.9598, "step": 19857 }, { "epoch": 0.5098977566154888, "grad_norm": 0.7265625, "learning_rate": 0.00015838742673313924, "loss": 0.8345, "step": 19858 }, { "epoch": 0.5099234338114106, "grad_norm": 0.7890625, "learning_rate": 0.00015838380243916633, "loss": 0.8979, "step": 19859 }, { "epoch": 0.5099491110073324, "grad_norm": 0.83203125, "learning_rate": 0.00015838017802883978, "loss": 0.8897, "step": 19860 }, { "epoch": 0.5099747882032543, "grad_norm": 0.859375, "learning_rate": 0.0001583765535021668, "loss": 0.8766, "step": 19861 }, { "epoch": 0.5100004653991761, "grad_norm": 0.7421875, "learning_rate": 0.00015837292885915463, "loss": 0.9461, "step": 19862 }, { "epoch": 0.5100261425950979, "grad_norm": 0.7265625, "learning_rate": 0.00015836930409981047, "loss": 0.9116, "step": 19863 }, { "epoch": 0.5100518197910198, "grad_norm": 0.734375, "learning_rate": 0.00015836567922414155, "loss": 0.7574, "step": 19864 }, { "epoch": 0.5100774969869415, "grad_norm": 0.83984375, "learning_rate": 0.00015836205423215514, "loss": 0.8935, "step": 19865 }, { "epoch": 0.5101031741828633, "grad_norm": 0.7265625, "learning_rate": 0.0001583584291238584, "loss": 0.8598, "step": 19866 }, { "epoch": 0.5101288513787852, "grad_norm": 0.70703125, "learning_rate": 0.0001583548038992586, "loss": 0.7441, "step": 19867 }, { "epoch": 0.510154528574707, "grad_norm": 0.75, "learning_rate": 0.00015835117855836294, "loss": 0.8852, "step": 19868 }, { "epoch": 0.5101802057706288, "grad_norm": 0.734375, "learning_rate": 0.00015834755310117866, "loss": 0.9333, "step": 19869 }, { "epoch": 0.5102058829665507, "grad_norm": 0.74609375, "learning_rate": 0.000158343927527713, "loss": 0.8093, "step": 19870 }, { "epoch": 0.5102315601624725, "grad_norm": 0.77734375, "learning_rate": 0.00015834030183797313, "loss": 0.8811, "step": 19871 }, { "epoch": 0.5102572373583942, "grad_norm": 0.828125, "learning_rate": 0.0001583366760319663, "loss": 0.8204, "step": 19872 }, { "epoch": 0.5102829145543161, "grad_norm": 0.78515625, "learning_rate": 0.00015833305010969973, "loss": 0.8768, "step": 19873 }, { "epoch": 0.5103085917502379, "grad_norm": 0.76171875, "learning_rate": 0.0001583294240711807, "loss": 0.9198, "step": 19874 }, { "epoch": 0.5103342689461597, "grad_norm": 0.8515625, "learning_rate": 0.00015832579791641638, "loss": 0.8093, "step": 19875 }, { "epoch": 0.5103599461420816, "grad_norm": 0.76953125, "learning_rate": 0.00015832217164541402, "loss": 0.8639, "step": 19876 }, { "epoch": 0.5103856233380034, "grad_norm": 0.7890625, "learning_rate": 0.00015831854525818085, "loss": 0.9516, "step": 19877 }, { "epoch": 0.5104113005339251, "grad_norm": 0.77734375, "learning_rate": 0.00015831491875472405, "loss": 0.8547, "step": 19878 }, { "epoch": 0.510436977729847, "grad_norm": 0.6953125, "learning_rate": 0.00015831129213505091, "loss": 0.7465, "step": 19879 }, { "epoch": 0.5104626549257688, "grad_norm": 0.72265625, "learning_rate": 0.00015830766539916863, "loss": 0.917, "step": 19880 }, { "epoch": 0.5104883321216906, "grad_norm": 0.8203125, "learning_rate": 0.00015830403854708445, "loss": 0.9747, "step": 19881 }, { "epoch": 0.5105140093176125, "grad_norm": 0.75, "learning_rate": 0.00015830041157880557, "loss": 0.8781, "step": 19882 }, { "epoch": 0.5105396865135343, "grad_norm": 0.828125, "learning_rate": 0.00015829678449433927, "loss": 0.8331, "step": 19883 }, { "epoch": 0.5105653637094562, "grad_norm": 0.8046875, "learning_rate": 0.0001582931572936927, "loss": 0.9451, "step": 19884 }, { "epoch": 0.5105910409053779, "grad_norm": 0.734375, "learning_rate": 0.00015828952997687314, "loss": 0.8449, "step": 19885 }, { "epoch": 0.5106167181012997, "grad_norm": 0.76171875, "learning_rate": 0.00015828590254388783, "loss": 1.0719, "step": 19886 }, { "epoch": 0.5106423952972216, "grad_norm": 0.7109375, "learning_rate": 0.00015828227499474398, "loss": 0.7656, "step": 19887 }, { "epoch": 0.5106680724931434, "grad_norm": 0.7734375, "learning_rate": 0.0001582786473294488, "loss": 0.8944, "step": 19888 }, { "epoch": 0.5106937496890652, "grad_norm": 0.87890625, "learning_rate": 0.00015827501954800957, "loss": 0.9195, "step": 19889 }, { "epoch": 0.5107194268849871, "grad_norm": 0.76953125, "learning_rate": 0.00015827139165043347, "loss": 0.9563, "step": 19890 }, { "epoch": 0.5107451040809089, "grad_norm": 0.7421875, "learning_rate": 0.00015826776363672774, "loss": 0.8327, "step": 19891 }, { "epoch": 0.5107707812768306, "grad_norm": 0.83984375, "learning_rate": 0.00015826413550689965, "loss": 0.9606, "step": 19892 }, { "epoch": 0.5107964584727525, "grad_norm": 0.7421875, "learning_rate": 0.00015826050726095638, "loss": 0.9429, "step": 19893 }, { "epoch": 0.5108221356686743, "grad_norm": 0.76171875, "learning_rate": 0.0001582568788989052, "loss": 0.8923, "step": 19894 }, { "epoch": 0.5108478128645961, "grad_norm": 0.73828125, "learning_rate": 0.0001582532504207533, "loss": 0.8938, "step": 19895 }, { "epoch": 0.510873490060518, "grad_norm": 0.71875, "learning_rate": 0.00015824962182650796, "loss": 0.9022, "step": 19896 }, { "epoch": 0.5108991672564398, "grad_norm": 0.78125, "learning_rate": 0.00015824599311617637, "loss": 0.7739, "step": 19897 }, { "epoch": 0.5109248444523615, "grad_norm": 0.71484375, "learning_rate": 0.00015824236428976579, "loss": 0.9604, "step": 19898 }, { "epoch": 0.5109505216482834, "grad_norm": 0.75, "learning_rate": 0.0001582387353472834, "loss": 0.9787, "step": 19899 }, { "epoch": 0.5109761988442052, "grad_norm": 0.74609375, "learning_rate": 0.00015823510628873647, "loss": 0.8112, "step": 19900 }, { "epoch": 0.511001876040127, "grad_norm": 0.8046875, "learning_rate": 0.0001582314771141323, "loss": 1.1439, "step": 19901 }, { "epoch": 0.5110275532360489, "grad_norm": 0.73046875, "learning_rate": 0.000158227847823478, "loss": 0.8674, "step": 19902 }, { "epoch": 0.5110532304319707, "grad_norm": 0.8359375, "learning_rate": 0.0001582242184167809, "loss": 0.9426, "step": 19903 }, { "epoch": 0.5110789076278925, "grad_norm": 0.85546875, "learning_rate": 0.00015822058889404815, "loss": 0.9484, "step": 19904 }, { "epoch": 0.5111045848238143, "grad_norm": 0.74609375, "learning_rate": 0.00015821695925528702, "loss": 1.0335, "step": 19905 }, { "epoch": 0.5111302620197361, "grad_norm": 0.73828125, "learning_rate": 0.00015821332950050477, "loss": 0.9196, "step": 19906 }, { "epoch": 0.5111559392156579, "grad_norm": 0.82421875, "learning_rate": 0.0001582096996297086, "loss": 0.8468, "step": 19907 }, { "epoch": 0.5111816164115798, "grad_norm": 0.84765625, "learning_rate": 0.0001582060696429058, "loss": 1.007, "step": 19908 }, { "epoch": 0.5112072936075016, "grad_norm": 0.78125, "learning_rate": 0.0001582024395401035, "loss": 0.8459, "step": 19909 }, { "epoch": 0.5112329708034234, "grad_norm": 0.765625, "learning_rate": 0.00015819880932130907, "loss": 0.8975, "step": 19910 }, { "epoch": 0.5112586479993453, "grad_norm": 0.85546875, "learning_rate": 0.00015819517898652962, "loss": 0.9994, "step": 19911 }, { "epoch": 0.511284325195267, "grad_norm": 0.82421875, "learning_rate": 0.00015819154853577246, "loss": 0.8776, "step": 19912 }, { "epoch": 0.5113100023911888, "grad_norm": 0.9140625, "learning_rate": 0.00015818791796904478, "loss": 0.8613, "step": 19913 }, { "epoch": 0.5113356795871107, "grad_norm": 0.79296875, "learning_rate": 0.00015818428728635385, "loss": 0.8543, "step": 19914 }, { "epoch": 0.5113613567830325, "grad_norm": 0.828125, "learning_rate": 0.00015818065648770687, "loss": 0.7963, "step": 19915 }, { "epoch": 0.5113870339789544, "grad_norm": 0.859375, "learning_rate": 0.0001581770255731111, "loss": 0.9842, "step": 19916 }, { "epoch": 0.5114127111748762, "grad_norm": 0.78515625, "learning_rate": 0.00015817339454257377, "loss": 0.8423, "step": 19917 }, { "epoch": 0.5114383883707979, "grad_norm": 0.83203125, "learning_rate": 0.00015816976339610213, "loss": 0.9754, "step": 19918 }, { "epoch": 0.5114640655667197, "grad_norm": 0.77734375, "learning_rate": 0.00015816613213370342, "loss": 0.9109, "step": 19919 }, { "epoch": 0.5114897427626416, "grad_norm": 0.8984375, "learning_rate": 0.00015816250075538482, "loss": 0.9397, "step": 19920 }, { "epoch": 0.5115154199585634, "grad_norm": 0.71484375, "learning_rate": 0.00015815886926115366, "loss": 0.6661, "step": 19921 }, { "epoch": 0.5115410971544853, "grad_norm": 0.75390625, "learning_rate": 0.0001581552376510171, "loss": 0.7396, "step": 19922 }, { "epoch": 0.5115667743504071, "grad_norm": 0.7890625, "learning_rate": 0.00015815160592498243, "loss": 0.9081, "step": 19923 }, { "epoch": 0.5115924515463289, "grad_norm": 0.8046875, "learning_rate": 0.00015814797408305684, "loss": 0.8332, "step": 19924 }, { "epoch": 0.5116181287422507, "grad_norm": 0.76171875, "learning_rate": 0.00015814434212524757, "loss": 0.8195, "step": 19925 }, { "epoch": 0.5116438059381725, "grad_norm": 0.74609375, "learning_rate": 0.0001581407100515619, "loss": 1.0353, "step": 19926 }, { "epoch": 0.5116694831340943, "grad_norm": 0.79296875, "learning_rate": 0.00015813707786200705, "loss": 0.8618, "step": 19927 }, { "epoch": 0.5116951603300162, "grad_norm": 0.734375, "learning_rate": 0.00015813344555659024, "loss": 0.9655, "step": 19928 }, { "epoch": 0.511720837525938, "grad_norm": 0.828125, "learning_rate": 0.00015812981313531875, "loss": 0.8537, "step": 19929 }, { "epoch": 0.5117465147218598, "grad_norm": 0.828125, "learning_rate": 0.0001581261805981998, "loss": 0.9471, "step": 19930 }, { "epoch": 0.5117721919177817, "grad_norm": 0.80078125, "learning_rate": 0.00015812254794524057, "loss": 1.0888, "step": 19931 }, { "epoch": 0.5117978691137034, "grad_norm": 0.76953125, "learning_rate": 0.00015811891517644838, "loss": 0.8918, "step": 19932 }, { "epoch": 0.5118235463096252, "grad_norm": 0.7890625, "learning_rate": 0.00015811528229183047, "loss": 0.9405, "step": 19933 }, { "epoch": 0.5118492235055471, "grad_norm": 0.7578125, "learning_rate": 0.000158111649291394, "loss": 0.8202, "step": 19934 }, { "epoch": 0.5118749007014689, "grad_norm": 0.734375, "learning_rate": 0.0001581080161751463, "loss": 0.8828, "step": 19935 }, { "epoch": 0.5119005778973907, "grad_norm": 0.86328125, "learning_rate": 0.00015810438294309458, "loss": 0.8617, "step": 19936 }, { "epoch": 0.5119262550933126, "grad_norm": 0.73828125, "learning_rate": 0.00015810074959524606, "loss": 0.8569, "step": 19937 }, { "epoch": 0.5119519322892343, "grad_norm": 0.75, "learning_rate": 0.000158097116131608, "loss": 0.8477, "step": 19938 }, { "epoch": 0.5119776094851561, "grad_norm": 0.83203125, "learning_rate": 0.0001580934825521876, "loss": 0.9335, "step": 19939 }, { "epoch": 0.512003286681078, "grad_norm": 0.78515625, "learning_rate": 0.00015808984885699217, "loss": 0.8304, "step": 19940 }, { "epoch": 0.5120289638769998, "grad_norm": 0.80859375, "learning_rate": 0.0001580862150460289, "loss": 0.8289, "step": 19941 }, { "epoch": 0.5120546410729216, "grad_norm": 0.72265625, "learning_rate": 0.00015808258111930508, "loss": 0.8398, "step": 19942 }, { "epoch": 0.5120803182688435, "grad_norm": 0.80078125, "learning_rate": 0.00015807894707682788, "loss": 0.8574, "step": 19943 }, { "epoch": 0.5121059954647653, "grad_norm": 0.85546875, "learning_rate": 0.00015807531291860464, "loss": 0.9692, "step": 19944 }, { "epoch": 0.512131672660687, "grad_norm": 0.76953125, "learning_rate": 0.0001580716786446425, "loss": 1.0133, "step": 19945 }, { "epoch": 0.5121573498566089, "grad_norm": 0.74609375, "learning_rate": 0.00015806804425494878, "loss": 0.9668, "step": 19946 }, { "epoch": 0.5121830270525307, "grad_norm": 0.7890625, "learning_rate": 0.00015806440974953066, "loss": 0.9618, "step": 19947 }, { "epoch": 0.5122087042484526, "grad_norm": 0.8046875, "learning_rate": 0.00015806077512839543, "loss": 1.0259, "step": 19948 }, { "epoch": 0.5122343814443744, "grad_norm": 0.72265625, "learning_rate": 0.00015805714039155032, "loss": 0.8611, "step": 19949 }, { "epoch": 0.5122600586402962, "grad_norm": 0.7890625, "learning_rate": 0.0001580535055390026, "loss": 0.8877, "step": 19950 }, { "epoch": 0.5122857358362181, "grad_norm": 0.7890625, "learning_rate": 0.00015804987057075947, "loss": 0.827, "step": 19951 }, { "epoch": 0.5123114130321398, "grad_norm": 0.796875, "learning_rate": 0.00015804623548682818, "loss": 0.8342, "step": 19952 }, { "epoch": 0.5123370902280616, "grad_norm": 0.7421875, "learning_rate": 0.00015804260028721601, "loss": 0.9172, "step": 19953 }, { "epoch": 0.5123627674239835, "grad_norm": 0.77734375, "learning_rate": 0.00015803896497193013, "loss": 0.863, "step": 19954 }, { "epoch": 0.5123884446199053, "grad_norm": 0.69140625, "learning_rate": 0.0001580353295409779, "loss": 0.7525, "step": 19955 }, { "epoch": 0.5124141218158271, "grad_norm": 2.078125, "learning_rate": 0.00015803169399436647, "loss": 0.834, "step": 19956 }, { "epoch": 0.512439799011749, "grad_norm": 0.7421875, "learning_rate": 0.0001580280583321031, "loss": 0.934, "step": 19957 }, { "epoch": 0.5124654762076707, "grad_norm": 0.7265625, "learning_rate": 0.00015802442255419507, "loss": 0.8787, "step": 19958 }, { "epoch": 0.5124911534035925, "grad_norm": 0.78515625, "learning_rate": 0.0001580207866606496, "loss": 0.9979, "step": 19959 }, { "epoch": 0.5125168305995144, "grad_norm": 0.875, "learning_rate": 0.00015801715065147395, "loss": 0.857, "step": 19960 }, { "epoch": 0.5125425077954362, "grad_norm": 0.80078125, "learning_rate": 0.00015801351452667537, "loss": 0.8042, "step": 19961 }, { "epoch": 0.512568184991358, "grad_norm": 0.79296875, "learning_rate": 0.00015800987828626107, "loss": 0.8993, "step": 19962 }, { "epoch": 0.5125938621872799, "grad_norm": 0.8046875, "learning_rate": 0.00015800624193023835, "loss": 0.9521, "step": 19963 }, { "epoch": 0.5126195393832017, "grad_norm": 0.734375, "learning_rate": 0.0001580026054586144, "loss": 0.7996, "step": 19964 }, { "epoch": 0.5126452165791234, "grad_norm": 0.78125, "learning_rate": 0.0001579989688713965, "loss": 0.8692, "step": 19965 }, { "epoch": 0.5126708937750453, "grad_norm": 0.78515625, "learning_rate": 0.00015799533216859192, "loss": 1.0217, "step": 19966 }, { "epoch": 0.5126965709709671, "grad_norm": 0.80859375, "learning_rate": 0.00015799169535020785, "loss": 0.9846, "step": 19967 }, { "epoch": 0.5127222481668889, "grad_norm": 0.7578125, "learning_rate": 0.0001579880584162516, "loss": 0.822, "step": 19968 }, { "epoch": 0.5127479253628108, "grad_norm": 0.73046875, "learning_rate": 0.0001579844213667304, "loss": 0.6594, "step": 19969 }, { "epoch": 0.5127736025587326, "grad_norm": 0.69921875, "learning_rate": 0.00015798078420165145, "loss": 0.9764, "step": 19970 }, { "epoch": 0.5127992797546544, "grad_norm": 0.80078125, "learning_rate": 0.00015797714692102207, "loss": 0.9391, "step": 19971 }, { "epoch": 0.5128249569505762, "grad_norm": 0.765625, "learning_rate": 0.00015797350952484942, "loss": 0.831, "step": 19972 }, { "epoch": 0.512850634146498, "grad_norm": 0.7265625, "learning_rate": 0.00015796987201314083, "loss": 0.8145, "step": 19973 }, { "epoch": 0.5128763113424198, "grad_norm": 0.7421875, "learning_rate": 0.00015796623438590353, "loss": 0.9087, "step": 19974 }, { "epoch": 0.5129019885383417, "grad_norm": 0.7578125, "learning_rate": 0.00015796259664314474, "loss": 0.9171, "step": 19975 }, { "epoch": 0.5129276657342635, "grad_norm": 0.76171875, "learning_rate": 0.00015795895878487174, "loss": 0.9701, "step": 19976 }, { "epoch": 0.5129533429301854, "grad_norm": 0.74609375, "learning_rate": 0.00015795532081109176, "loss": 0.7798, "step": 19977 }, { "epoch": 0.5129790201261071, "grad_norm": 0.7421875, "learning_rate": 0.00015795168272181211, "loss": 0.9757, "step": 19978 }, { "epoch": 0.5130046973220289, "grad_norm": 0.72265625, "learning_rate": 0.00015794804451703994, "loss": 0.8604, "step": 19979 }, { "epoch": 0.5130303745179507, "grad_norm": 0.7734375, "learning_rate": 0.00015794440619678257, "loss": 0.7572, "step": 19980 }, { "epoch": 0.5130560517138726, "grad_norm": 0.7734375, "learning_rate": 0.00015794076776104722, "loss": 0.9183, "step": 19981 }, { "epoch": 0.5130817289097944, "grad_norm": 0.8359375, "learning_rate": 0.00015793712920984118, "loss": 0.8901, "step": 19982 }, { "epoch": 0.5131074061057163, "grad_norm": 0.75390625, "learning_rate": 0.00015793349054317167, "loss": 0.8035, "step": 19983 }, { "epoch": 0.5131330833016381, "grad_norm": 0.76953125, "learning_rate": 0.0001579298517610459, "loss": 0.9123, "step": 19984 }, { "epoch": 0.5131587604975598, "grad_norm": 0.83203125, "learning_rate": 0.00015792621286347123, "loss": 0.8863, "step": 19985 }, { "epoch": 0.5131844376934817, "grad_norm": 0.75, "learning_rate": 0.00015792257385045482, "loss": 0.8997, "step": 19986 }, { "epoch": 0.5132101148894035, "grad_norm": 0.734375, "learning_rate": 0.00015791893472200393, "loss": 0.8639, "step": 19987 }, { "epoch": 0.5132357920853253, "grad_norm": 0.734375, "learning_rate": 0.0001579152954781259, "loss": 0.8479, "step": 19988 }, { "epoch": 0.5132614692812472, "grad_norm": 0.75, "learning_rate": 0.00015791165611882786, "loss": 0.8658, "step": 19989 }, { "epoch": 0.513287146477169, "grad_norm": 0.79296875, "learning_rate": 0.00015790801664411714, "loss": 0.8573, "step": 19990 }, { "epoch": 0.5133128236730908, "grad_norm": 0.796875, "learning_rate": 0.00015790437705400098, "loss": 0.9719, "step": 19991 }, { "epoch": 0.5133385008690126, "grad_norm": 0.765625, "learning_rate": 0.0001579007373484866, "loss": 0.8127, "step": 19992 }, { "epoch": 0.5133641780649344, "grad_norm": 0.71875, "learning_rate": 0.0001578970975275813, "loss": 0.9123, "step": 19993 }, { "epoch": 0.5133898552608562, "grad_norm": 0.78515625, "learning_rate": 0.00015789345759129232, "loss": 0.8794, "step": 19994 }, { "epoch": 0.5134155324567781, "grad_norm": 0.80078125, "learning_rate": 0.00015788981753962693, "loss": 0.8956, "step": 19995 }, { "epoch": 0.5134412096526999, "grad_norm": 0.8046875, "learning_rate": 0.00015788617737259232, "loss": 0.9075, "step": 19996 }, { "epoch": 0.5134668868486217, "grad_norm": 0.86328125, "learning_rate": 0.0001578825370901958, "loss": 1.0393, "step": 19997 }, { "epoch": 0.5134925640445435, "grad_norm": 0.76953125, "learning_rate": 0.00015787889669244463, "loss": 0.8227, "step": 19998 }, { "epoch": 0.5135182412404653, "grad_norm": 0.78515625, "learning_rate": 0.00015787525617934602, "loss": 0.7602, "step": 19999 }, { "epoch": 0.5135439184363871, "grad_norm": 0.76953125, "learning_rate": 0.00015787161555090725, "loss": 1.0335, "step": 20000 }, { "epoch": 0.5135439184363871, "eval_loss": 0.8822827935218811, "eval_model_preparation_time": 0.0065, "eval_runtime": 405.1172, "eval_samples_per_second": 24.684, "eval_steps_per_second": 0.773, "step": 20000 }, { "epoch": 0.513569595632309, "grad_norm": 0.8671875, "learning_rate": 0.00015786797480713563, "loss": 0.9543, "step": 20001 }, { "epoch": 0.5135952728282308, "grad_norm": 0.82421875, "learning_rate": 0.0001578643339480383, "loss": 0.9423, "step": 20002 }, { "epoch": 0.5136209500241526, "grad_norm": 0.87109375, "learning_rate": 0.0001578606929736226, "loss": 0.9768, "step": 20003 }, { "epoch": 0.5136466272200745, "grad_norm": 0.76171875, "learning_rate": 0.00015785705188389577, "loss": 0.8896, "step": 20004 }, { "epoch": 0.5136723044159962, "grad_norm": 0.78515625, "learning_rate": 0.00015785341067886508, "loss": 0.9152, "step": 20005 }, { "epoch": 0.513697981611918, "grad_norm": 0.83984375, "learning_rate": 0.00015784976935853776, "loss": 1.0084, "step": 20006 }, { "epoch": 0.5137236588078399, "grad_norm": 0.74609375, "learning_rate": 0.00015784612792292104, "loss": 0.8207, "step": 20007 }, { "epoch": 0.5137493360037617, "grad_norm": 0.78125, "learning_rate": 0.00015784248637202227, "loss": 0.9628, "step": 20008 }, { "epoch": 0.5137750131996836, "grad_norm": 0.796875, "learning_rate": 0.0001578388447058486, "loss": 0.9614, "step": 20009 }, { "epoch": 0.5138006903956054, "grad_norm": 0.77734375, "learning_rate": 0.00015783520292440736, "loss": 0.9297, "step": 20010 }, { "epoch": 0.5138263675915272, "grad_norm": 0.78125, "learning_rate": 0.00015783156102770578, "loss": 0.9286, "step": 20011 }, { "epoch": 0.513852044787449, "grad_norm": 0.78125, "learning_rate": 0.00015782791901575112, "loss": 0.958, "step": 20012 }, { "epoch": 0.5138777219833708, "grad_norm": 0.8671875, "learning_rate": 0.00015782427688855065, "loss": 0.868, "step": 20013 }, { "epoch": 0.5139033991792926, "grad_norm": 0.76171875, "learning_rate": 0.0001578206346461116, "loss": 0.8798, "step": 20014 }, { "epoch": 0.5139290763752145, "grad_norm": 0.73828125, "learning_rate": 0.00015781699228844127, "loss": 0.9009, "step": 20015 }, { "epoch": 0.5139547535711363, "grad_norm": 0.7734375, "learning_rate": 0.00015781334981554687, "loss": 0.9893, "step": 20016 }, { "epoch": 0.5139804307670581, "grad_norm": 0.8203125, "learning_rate": 0.0001578097072274357, "loss": 0.8327, "step": 20017 }, { "epoch": 0.5140061079629799, "grad_norm": 0.8125, "learning_rate": 0.00015780606452411503, "loss": 0.8356, "step": 20018 }, { "epoch": 0.5140317851589017, "grad_norm": 0.8046875, "learning_rate": 0.00015780242170559204, "loss": 0.8981, "step": 20019 }, { "epoch": 0.5140574623548235, "grad_norm": 0.83984375, "learning_rate": 0.0001577987787718741, "loss": 0.8442, "step": 20020 }, { "epoch": 0.5140831395507454, "grad_norm": 0.7890625, "learning_rate": 0.00015779513572296842, "loss": 0.8967, "step": 20021 }, { "epoch": 0.5141088167466672, "grad_norm": 0.765625, "learning_rate": 0.00015779149255888223, "loss": 0.9106, "step": 20022 }, { "epoch": 0.514134493942589, "grad_norm": 0.76171875, "learning_rate": 0.0001577878492796228, "loss": 0.8704, "step": 20023 }, { "epoch": 0.5141601711385109, "grad_norm": 0.76953125, "learning_rate": 0.00015778420588519743, "loss": 0.9321, "step": 20024 }, { "epoch": 0.5141858483344326, "grad_norm": 0.890625, "learning_rate": 0.0001577805623756133, "loss": 0.8864, "step": 20025 }, { "epoch": 0.5142115255303544, "grad_norm": 0.74609375, "learning_rate": 0.0001577769187508778, "loss": 0.8453, "step": 20026 }, { "epoch": 0.5142372027262763, "grad_norm": 0.73046875, "learning_rate": 0.0001577732750109981, "loss": 0.814, "step": 20027 }, { "epoch": 0.5142628799221981, "grad_norm": 0.79296875, "learning_rate": 0.0001577696311559815, "loss": 0.8222, "step": 20028 }, { "epoch": 0.5142885571181199, "grad_norm": 0.81640625, "learning_rate": 0.00015776598718583523, "loss": 0.8584, "step": 20029 }, { "epoch": 0.5143142343140418, "grad_norm": 0.76953125, "learning_rate": 0.00015776234310056655, "loss": 0.808, "step": 20030 }, { "epoch": 0.5143399115099636, "grad_norm": 0.82421875, "learning_rate": 0.00015775869890018274, "loss": 0.9571, "step": 20031 }, { "epoch": 0.5143655887058853, "grad_norm": 0.75390625, "learning_rate": 0.00015775505458469105, "loss": 0.8889, "step": 20032 }, { "epoch": 0.5143912659018072, "grad_norm": 0.81640625, "learning_rate": 0.00015775141015409876, "loss": 0.8422, "step": 20033 }, { "epoch": 0.514416943097729, "grad_norm": 0.7421875, "learning_rate": 0.00015774776560841317, "loss": 0.8021, "step": 20034 }, { "epoch": 0.5144426202936508, "grad_norm": 0.6953125, "learning_rate": 0.00015774412094764147, "loss": 0.9959, "step": 20035 }, { "epoch": 0.5144682974895727, "grad_norm": 0.7109375, "learning_rate": 0.00015774047617179093, "loss": 0.8483, "step": 20036 }, { "epoch": 0.5144939746854945, "grad_norm": 0.79296875, "learning_rate": 0.00015773683128086886, "loss": 0.8043, "step": 20037 }, { "epoch": 0.5145196518814162, "grad_norm": 0.75, "learning_rate": 0.0001577331862748825, "loss": 0.7963, "step": 20038 }, { "epoch": 0.5145453290773381, "grad_norm": 0.78125, "learning_rate": 0.0001577295411538391, "loss": 0.8256, "step": 20039 }, { "epoch": 0.5145710062732599, "grad_norm": 0.76953125, "learning_rate": 0.00015772589591774594, "loss": 1.0084, "step": 20040 }, { "epoch": 0.5145966834691817, "grad_norm": 0.79296875, "learning_rate": 0.0001577222505666103, "loss": 0.8635, "step": 20041 }, { "epoch": 0.5146223606651036, "grad_norm": 0.78125, "learning_rate": 0.00015771860510043942, "loss": 0.886, "step": 20042 }, { "epoch": 0.5146480378610254, "grad_norm": 0.80078125, "learning_rate": 0.00015771495951924058, "loss": 0.9604, "step": 20043 }, { "epoch": 0.5146737150569473, "grad_norm": 0.734375, "learning_rate": 0.00015771131382302103, "loss": 0.8191, "step": 20044 }, { "epoch": 0.514699392252869, "grad_norm": 0.8125, "learning_rate": 0.00015770766801178802, "loss": 0.9271, "step": 20045 }, { "epoch": 0.5147250694487908, "grad_norm": 0.73828125, "learning_rate": 0.00015770402208554888, "loss": 0.8477, "step": 20046 }, { "epoch": 0.5147507466447127, "grad_norm": 0.8828125, "learning_rate": 0.00015770037604431083, "loss": 0.9604, "step": 20047 }, { "epoch": 0.5147764238406345, "grad_norm": 0.7734375, "learning_rate": 0.00015769672988808116, "loss": 0.7656, "step": 20048 }, { "epoch": 0.5148021010365563, "grad_norm": 0.83203125, "learning_rate": 0.00015769308361686707, "loss": 0.9592, "step": 20049 }, { "epoch": 0.5148277782324782, "grad_norm": 0.72265625, "learning_rate": 0.0001576894372306759, "loss": 0.7385, "step": 20050 }, { "epoch": 0.5148534554283999, "grad_norm": 0.75, "learning_rate": 0.00015768579072951493, "loss": 0.8331, "step": 20051 }, { "epoch": 0.5148791326243217, "grad_norm": 0.78515625, "learning_rate": 0.00015768214411339133, "loss": 0.8108, "step": 20052 }, { "epoch": 0.5149048098202436, "grad_norm": 0.78515625, "learning_rate": 0.00015767849738231244, "loss": 0.9936, "step": 20053 }, { "epoch": 0.5149304870161654, "grad_norm": 0.8125, "learning_rate": 0.00015767485053628553, "loss": 0.8628, "step": 20054 }, { "epoch": 0.5149561642120872, "grad_norm": 0.77734375, "learning_rate": 0.0001576712035753179, "loss": 0.8721, "step": 20055 }, { "epoch": 0.5149818414080091, "grad_norm": 0.76953125, "learning_rate": 0.00015766755649941668, "loss": 0.9179, "step": 20056 }, { "epoch": 0.5150075186039309, "grad_norm": 0.859375, "learning_rate": 0.00015766390930858928, "loss": 1.0006, "step": 20057 }, { "epoch": 0.5150331957998526, "grad_norm": 0.8125, "learning_rate": 0.0001576602620028429, "loss": 0.8832, "step": 20058 }, { "epoch": 0.5150588729957745, "grad_norm": 0.71875, "learning_rate": 0.00015765661458218484, "loss": 0.7765, "step": 20059 }, { "epoch": 0.5150845501916963, "grad_norm": 0.703125, "learning_rate": 0.00015765296704662235, "loss": 0.7124, "step": 20060 }, { "epoch": 0.5151102273876181, "grad_norm": 0.70703125, "learning_rate": 0.0001576493193961627, "loss": 0.8827, "step": 20061 }, { "epoch": 0.51513590458354, "grad_norm": 0.875, "learning_rate": 0.00015764567163081315, "loss": 0.9194, "step": 20062 }, { "epoch": 0.5151615817794618, "grad_norm": 0.76953125, "learning_rate": 0.000157642023750581, "loss": 0.9477, "step": 20063 }, { "epoch": 0.5151872589753836, "grad_norm": 0.71484375, "learning_rate": 0.00015763837575547354, "loss": 0.8719, "step": 20064 }, { "epoch": 0.5152129361713054, "grad_norm": 0.76171875, "learning_rate": 0.00015763472764549795, "loss": 0.7549, "step": 20065 }, { "epoch": 0.5152386133672272, "grad_norm": 0.83203125, "learning_rate": 0.00015763107942066154, "loss": 0.9832, "step": 20066 }, { "epoch": 0.515264290563149, "grad_norm": 0.75390625, "learning_rate": 0.00015762743108097167, "loss": 0.9233, "step": 20067 }, { "epoch": 0.5152899677590709, "grad_norm": 0.7734375, "learning_rate": 0.00015762378262643547, "loss": 0.7906, "step": 20068 }, { "epoch": 0.5153156449549927, "grad_norm": 0.78515625, "learning_rate": 0.00015762013405706032, "loss": 0.8641, "step": 20069 }, { "epoch": 0.5153413221509145, "grad_norm": 0.734375, "learning_rate": 0.00015761648537285342, "loss": 0.7765, "step": 20070 }, { "epoch": 0.5153669993468363, "grad_norm": 0.71875, "learning_rate": 0.00015761283657382205, "loss": 0.7752, "step": 20071 }, { "epoch": 0.5153926765427581, "grad_norm": 0.7578125, "learning_rate": 0.00015760918765997354, "loss": 0.8287, "step": 20072 }, { "epoch": 0.51541835373868, "grad_norm": 0.734375, "learning_rate": 0.00015760553863131512, "loss": 0.9172, "step": 20073 }, { "epoch": 0.5154440309346018, "grad_norm": 0.8046875, "learning_rate": 0.00015760188948785403, "loss": 0.8513, "step": 20074 }, { "epoch": 0.5154697081305236, "grad_norm": 0.7734375, "learning_rate": 0.0001575982402295976, "loss": 0.8858, "step": 20075 }, { "epoch": 0.5154953853264455, "grad_norm": 0.78125, "learning_rate": 0.00015759459085655308, "loss": 0.8604, "step": 20076 }, { "epoch": 0.5155210625223673, "grad_norm": 0.78125, "learning_rate": 0.00015759094136872778, "loss": 0.9251, "step": 20077 }, { "epoch": 0.515546739718289, "grad_norm": 0.75390625, "learning_rate": 0.00015758729176612888, "loss": 0.914, "step": 20078 }, { "epoch": 0.5155724169142109, "grad_norm": 0.87109375, "learning_rate": 0.0001575836420487637, "loss": 0.9156, "step": 20079 }, { "epoch": 0.5155980941101327, "grad_norm": 0.76953125, "learning_rate": 0.00015757999221663958, "loss": 0.8702, "step": 20080 }, { "epoch": 0.5156237713060545, "grad_norm": 0.69921875, "learning_rate": 0.0001575763422697637, "loss": 0.8288, "step": 20081 }, { "epoch": 0.5156494485019764, "grad_norm": 0.80859375, "learning_rate": 0.0001575726922081434, "loss": 0.8292, "step": 20082 }, { "epoch": 0.5156751256978982, "grad_norm": 0.8125, "learning_rate": 0.00015756904203178588, "loss": 0.9568, "step": 20083 }, { "epoch": 0.51570080289382, "grad_norm": 0.77734375, "learning_rate": 0.00015756539174069847, "loss": 0.9065, "step": 20084 }, { "epoch": 0.5157264800897418, "grad_norm": 0.765625, "learning_rate": 0.00015756174133488846, "loss": 0.8109, "step": 20085 }, { "epoch": 0.5157521572856636, "grad_norm": 0.7578125, "learning_rate": 0.0001575580908143631, "loss": 0.9421, "step": 20086 }, { "epoch": 0.5157778344815854, "grad_norm": 0.69921875, "learning_rate": 0.00015755444017912963, "loss": 0.8241, "step": 20087 }, { "epoch": 0.5158035116775073, "grad_norm": 0.8046875, "learning_rate": 0.0001575507894291954, "loss": 0.8555, "step": 20088 }, { "epoch": 0.5158291888734291, "grad_norm": 0.78125, "learning_rate": 0.00015754713856456763, "loss": 0.8861, "step": 20089 }, { "epoch": 0.5158548660693509, "grad_norm": 0.8046875, "learning_rate": 0.0001575434875852536, "loss": 1.0477, "step": 20090 }, { "epoch": 0.5158805432652727, "grad_norm": 0.734375, "learning_rate": 0.0001575398364912606, "loss": 0.847, "step": 20091 }, { "epoch": 0.5159062204611945, "grad_norm": 0.87109375, "learning_rate": 0.00015753618528259588, "loss": 0.938, "step": 20092 }, { "epoch": 0.5159318976571163, "grad_norm": 0.76171875, "learning_rate": 0.00015753253395926677, "loss": 0.8579, "step": 20093 }, { "epoch": 0.5159575748530382, "grad_norm": 0.828125, "learning_rate": 0.00015752888252128054, "loss": 0.8522, "step": 20094 }, { "epoch": 0.51598325204896, "grad_norm": 0.75, "learning_rate": 0.00015752523096864443, "loss": 0.8758, "step": 20095 }, { "epoch": 0.5160089292448818, "grad_norm": 0.80078125, "learning_rate": 0.0001575215793013657, "loss": 0.7647, "step": 20096 }, { "epoch": 0.5160346064408037, "grad_norm": 0.734375, "learning_rate": 0.00015751792751945168, "loss": 0.6714, "step": 20097 }, { "epoch": 0.5160602836367254, "grad_norm": 0.75390625, "learning_rate": 0.00015751427562290962, "loss": 0.7805, "step": 20098 }, { "epoch": 0.5160859608326472, "grad_norm": 0.93359375, "learning_rate": 0.0001575106236117468, "loss": 0.9154, "step": 20099 }, { "epoch": 0.5161116380285691, "grad_norm": 0.765625, "learning_rate": 0.00015750697148597055, "loss": 0.9649, "step": 20100 }, { "epoch": 0.5161373152244909, "grad_norm": 0.875, "learning_rate": 0.000157503319245588, "loss": 0.8935, "step": 20101 }, { "epoch": 0.5161629924204127, "grad_norm": 0.73828125, "learning_rate": 0.0001574996668906066, "loss": 0.8271, "step": 20102 }, { "epoch": 0.5161886696163346, "grad_norm": 0.859375, "learning_rate": 0.00015749601442103357, "loss": 0.8768, "step": 20103 }, { "epoch": 0.5162143468122564, "grad_norm": 0.79296875, "learning_rate": 0.00015749236183687612, "loss": 0.851, "step": 20104 }, { "epoch": 0.5162400240081781, "grad_norm": 0.78515625, "learning_rate": 0.00015748870913814163, "loss": 0.866, "step": 20105 }, { "epoch": 0.5162657012041, "grad_norm": 0.8203125, "learning_rate": 0.00015748505632483733, "loss": 0.7938, "step": 20106 }, { "epoch": 0.5162913784000218, "grad_norm": 0.78515625, "learning_rate": 0.0001574814033969705, "loss": 0.8876, "step": 20107 }, { "epoch": 0.5163170555959437, "grad_norm": 0.73828125, "learning_rate": 0.00015747775035454843, "loss": 0.8228, "step": 20108 }, { "epoch": 0.5163427327918655, "grad_norm": 0.76953125, "learning_rate": 0.00015747409719757839, "loss": 0.7751, "step": 20109 }, { "epoch": 0.5163684099877873, "grad_norm": 0.8046875, "learning_rate": 0.00015747044392606764, "loss": 0.8654, "step": 20110 }, { "epoch": 0.516394087183709, "grad_norm": 0.84765625, "learning_rate": 0.00015746679054002354, "loss": 0.9466, "step": 20111 }, { "epoch": 0.5164197643796309, "grad_norm": 1.984375, "learning_rate": 0.0001574631370394533, "loss": 1.0305, "step": 20112 }, { "epoch": 0.5164454415755527, "grad_norm": 0.83984375, "learning_rate": 0.00015745948342436422, "loss": 1.02, "step": 20113 }, { "epoch": 0.5164711187714746, "grad_norm": 0.8125, "learning_rate": 0.00015745582969476355, "loss": 0.917, "step": 20114 }, { "epoch": 0.5164967959673964, "grad_norm": 0.78125, "learning_rate": 0.00015745217585065864, "loss": 0.7906, "step": 20115 }, { "epoch": 0.5165224731633182, "grad_norm": 0.75, "learning_rate": 0.00015744852189205667, "loss": 0.9939, "step": 20116 }, { "epoch": 0.5165481503592401, "grad_norm": 0.8515625, "learning_rate": 0.00015744486781896506, "loss": 0.9495, "step": 20117 }, { "epoch": 0.5165738275551618, "grad_norm": 0.91796875, "learning_rate": 0.00015744121363139098, "loss": 0.9702, "step": 20118 }, { "epoch": 0.5165995047510836, "grad_norm": 0.859375, "learning_rate": 0.00015743755932934173, "loss": 0.9105, "step": 20119 }, { "epoch": 0.5166251819470055, "grad_norm": 0.7421875, "learning_rate": 0.00015743390491282467, "loss": 0.8946, "step": 20120 }, { "epoch": 0.5166508591429273, "grad_norm": 0.80078125, "learning_rate": 0.000157430250381847, "loss": 0.9154, "step": 20121 }, { "epoch": 0.5166765363388491, "grad_norm": 0.7578125, "learning_rate": 0.00015742659573641602, "loss": 0.8649, "step": 20122 }, { "epoch": 0.516702213534771, "grad_norm": 0.8203125, "learning_rate": 0.000157422940976539, "loss": 0.8054, "step": 20123 }, { "epoch": 0.5167278907306928, "grad_norm": 0.80078125, "learning_rate": 0.00015741928610222327, "loss": 0.9156, "step": 20124 }, { "epoch": 0.5167535679266145, "grad_norm": 0.8125, "learning_rate": 0.0001574156311134761, "loss": 1.021, "step": 20125 }, { "epoch": 0.5167792451225364, "grad_norm": 0.73046875, "learning_rate": 0.00015741197601030476, "loss": 0.844, "step": 20126 }, { "epoch": 0.5168049223184582, "grad_norm": 0.796875, "learning_rate": 0.00015740832079271652, "loss": 0.9242, "step": 20127 }, { "epoch": 0.51683059951438, "grad_norm": 0.75, "learning_rate": 0.00015740466546071871, "loss": 0.7885, "step": 20128 }, { "epoch": 0.5168562767103019, "grad_norm": 0.78515625, "learning_rate": 0.00015740101001431855, "loss": 0.8565, "step": 20129 }, { "epoch": 0.5168819539062237, "grad_norm": 0.74609375, "learning_rate": 0.0001573973544535234, "loss": 0.8753, "step": 20130 }, { "epoch": 0.5169076311021454, "grad_norm": 0.83203125, "learning_rate": 0.00015739369877834048, "loss": 0.8234, "step": 20131 }, { "epoch": 0.5169333082980673, "grad_norm": 0.765625, "learning_rate": 0.0001573900429887771, "loss": 0.912, "step": 20132 }, { "epoch": 0.5169589854939891, "grad_norm": 0.76953125, "learning_rate": 0.00015738638708484058, "loss": 0.8715, "step": 20133 }, { "epoch": 0.516984662689911, "grad_norm": 0.8203125, "learning_rate": 0.00015738273106653816, "loss": 0.8564, "step": 20134 }, { "epoch": 0.5170103398858328, "grad_norm": 0.80078125, "learning_rate": 0.00015737907493387714, "loss": 0.9063, "step": 20135 }, { "epoch": 0.5170360170817546, "grad_norm": 0.72265625, "learning_rate": 0.0001573754186868648, "loss": 0.8159, "step": 20136 }, { "epoch": 0.5170616942776765, "grad_norm": 0.8203125, "learning_rate": 0.00015737176232550842, "loss": 0.9104, "step": 20137 }, { "epoch": 0.5170873714735982, "grad_norm": 0.74609375, "learning_rate": 0.0001573681058498153, "loss": 0.9499, "step": 20138 }, { "epoch": 0.51711304866952, "grad_norm": 0.7578125, "learning_rate": 0.00015736444925979276, "loss": 0.8648, "step": 20139 }, { "epoch": 0.5171387258654419, "grad_norm": 0.796875, "learning_rate": 0.00015736079255544805, "loss": 0.8455, "step": 20140 }, { "epoch": 0.5171644030613637, "grad_norm": 0.84375, "learning_rate": 0.00015735713573678843, "loss": 1.0449, "step": 20141 }, { "epoch": 0.5171900802572855, "grad_norm": 0.73046875, "learning_rate": 0.00015735347880382124, "loss": 0.7938, "step": 20142 }, { "epoch": 0.5172157574532074, "grad_norm": 0.77734375, "learning_rate": 0.00015734982175655374, "loss": 0.8745, "step": 20143 }, { "epoch": 0.5172414346491292, "grad_norm": 0.69140625, "learning_rate": 0.00015734616459499323, "loss": 0.9084, "step": 20144 }, { "epoch": 0.5172671118450509, "grad_norm": 0.75390625, "learning_rate": 0.000157342507319147, "loss": 0.8125, "step": 20145 }, { "epoch": 0.5172927890409728, "grad_norm": 0.79296875, "learning_rate": 0.00015733884992902235, "loss": 0.8498, "step": 20146 }, { "epoch": 0.5173184662368946, "grad_norm": 0.80078125, "learning_rate": 0.0001573351924246265, "loss": 0.9545, "step": 20147 }, { "epoch": 0.5173441434328164, "grad_norm": 0.95703125, "learning_rate": 0.00015733153480596682, "loss": 0.8793, "step": 20148 }, { "epoch": 0.5173698206287383, "grad_norm": 0.87109375, "learning_rate": 0.0001573278770730506, "loss": 0.9331, "step": 20149 }, { "epoch": 0.5173954978246601, "grad_norm": 0.84765625, "learning_rate": 0.00015732421922588503, "loss": 0.8854, "step": 20150 }, { "epoch": 0.5174211750205818, "grad_norm": 0.69921875, "learning_rate": 0.0001573205612644775, "loss": 0.7812, "step": 20151 }, { "epoch": 0.5174468522165037, "grad_norm": 0.74609375, "learning_rate": 0.0001573169031888353, "loss": 0.7962, "step": 20152 }, { "epoch": 0.5174725294124255, "grad_norm": 0.7265625, "learning_rate": 0.00015731324499896568, "loss": 0.8402, "step": 20153 }, { "epoch": 0.5174982066083473, "grad_norm": 0.7265625, "learning_rate": 0.00015730958669487593, "loss": 0.7419, "step": 20154 }, { "epoch": 0.5175238838042692, "grad_norm": 0.82421875, "learning_rate": 0.00015730592827657334, "loss": 0.933, "step": 20155 }, { "epoch": 0.517549561000191, "grad_norm": 0.7734375, "learning_rate": 0.0001573022697440652, "loss": 0.8864, "step": 20156 }, { "epoch": 0.5175752381961128, "grad_norm": 0.75390625, "learning_rate": 0.00015729861109735883, "loss": 0.8164, "step": 20157 }, { "epoch": 0.5176009153920346, "grad_norm": 0.7734375, "learning_rate": 0.00015729495233646152, "loss": 0.791, "step": 20158 }, { "epoch": 0.5176265925879564, "grad_norm": 0.73828125, "learning_rate": 0.00015729129346138052, "loss": 0.8996, "step": 20159 }, { "epoch": 0.5176522697838782, "grad_norm": 0.81640625, "learning_rate": 0.00015728763447212316, "loss": 0.893, "step": 20160 }, { "epoch": 0.5176779469798001, "grad_norm": 0.85546875, "learning_rate": 0.00015728397536869673, "loss": 0.928, "step": 20161 }, { "epoch": 0.5177036241757219, "grad_norm": 0.75, "learning_rate": 0.0001572803161511085, "loss": 0.969, "step": 20162 }, { "epoch": 0.5177293013716437, "grad_norm": 0.78515625, "learning_rate": 0.00015727665681936577, "loss": 0.9015, "step": 20163 }, { "epoch": 0.5177549785675656, "grad_norm": 0.82421875, "learning_rate": 0.00015727299737347582, "loss": 0.8727, "step": 20164 }, { "epoch": 0.5177806557634873, "grad_norm": 0.6875, "learning_rate": 0.000157269337813446, "loss": 0.8281, "step": 20165 }, { "epoch": 0.5178063329594091, "grad_norm": 0.73828125, "learning_rate": 0.00015726567813928352, "loss": 0.8357, "step": 20166 }, { "epoch": 0.517832010155331, "grad_norm": 0.72265625, "learning_rate": 0.00015726201835099574, "loss": 0.8497, "step": 20167 }, { "epoch": 0.5178576873512528, "grad_norm": 0.76953125, "learning_rate": 0.00015725835844858995, "loss": 0.7898, "step": 20168 }, { "epoch": 0.5178833645471747, "grad_norm": 0.84375, "learning_rate": 0.0001572546984320734, "loss": 0.9281, "step": 20169 }, { "epoch": 0.5179090417430965, "grad_norm": 0.83984375, "learning_rate": 0.0001572510383014534, "loss": 0.9468, "step": 20170 }, { "epoch": 0.5179347189390182, "grad_norm": 0.734375, "learning_rate": 0.00015724737805673726, "loss": 0.9246, "step": 20171 }, { "epoch": 0.51796039613494, "grad_norm": 0.8203125, "learning_rate": 0.00015724371769793228, "loss": 0.8438, "step": 20172 }, { "epoch": 0.5179860733308619, "grad_norm": 0.76953125, "learning_rate": 0.00015724005722504573, "loss": 0.826, "step": 20173 }, { "epoch": 0.5180117505267837, "grad_norm": 0.79296875, "learning_rate": 0.0001572363966380849, "loss": 0.9039, "step": 20174 }, { "epoch": 0.5180374277227056, "grad_norm": 0.8515625, "learning_rate": 0.0001572327359370571, "loss": 0.9236, "step": 20175 }, { "epoch": 0.5180631049186274, "grad_norm": 0.76171875, "learning_rate": 0.00015722907512196965, "loss": 0.8816, "step": 20176 }, { "epoch": 0.5180887821145492, "grad_norm": 0.70703125, "learning_rate": 0.00015722541419282981, "loss": 0.7409, "step": 20177 }, { "epoch": 0.518114459310471, "grad_norm": 0.78125, "learning_rate": 0.0001572217531496449, "loss": 0.9864, "step": 20178 }, { "epoch": 0.5181401365063928, "grad_norm": 0.82421875, "learning_rate": 0.0001572180919924222, "loss": 1.0238, "step": 20179 }, { "epoch": 0.5181658137023146, "grad_norm": 0.78125, "learning_rate": 0.00015721443072116905, "loss": 1.0321, "step": 20180 }, { "epoch": 0.5181914908982365, "grad_norm": 0.8046875, "learning_rate": 0.00015721076933589265, "loss": 0.8973, "step": 20181 }, { "epoch": 0.5182171680941583, "grad_norm": 0.75390625, "learning_rate": 0.00015720710783660038, "loss": 1.0063, "step": 20182 }, { "epoch": 0.5182428452900801, "grad_norm": 0.80859375, "learning_rate": 0.0001572034462232995, "loss": 0.8885, "step": 20183 }, { "epoch": 0.518268522486002, "grad_norm": 0.78125, "learning_rate": 0.00015719978449599734, "loss": 0.8799, "step": 20184 }, { "epoch": 0.5182941996819237, "grad_norm": 0.79296875, "learning_rate": 0.00015719612265470114, "loss": 0.8314, "step": 20185 }, { "epoch": 0.5183198768778455, "grad_norm": 0.79296875, "learning_rate": 0.00015719246069941827, "loss": 0.7407, "step": 20186 }, { "epoch": 0.5183455540737674, "grad_norm": 0.78125, "learning_rate": 0.00015718879863015596, "loss": 0.9374, "step": 20187 }, { "epoch": 0.5183712312696892, "grad_norm": 0.75, "learning_rate": 0.00015718513644692156, "loss": 0.9025, "step": 20188 }, { "epoch": 0.518396908465611, "grad_norm": 0.7734375, "learning_rate": 0.00015718147414972236, "loss": 1.0008, "step": 20189 }, { "epoch": 0.5184225856615329, "grad_norm": 0.80859375, "learning_rate": 0.0001571778117385656, "loss": 0.8256, "step": 20190 }, { "epoch": 0.5184482628574546, "grad_norm": 0.7265625, "learning_rate": 0.00015717414921345867, "loss": 0.8075, "step": 20191 }, { "epoch": 0.5184739400533764, "grad_norm": 0.78515625, "learning_rate": 0.00015717048657440884, "loss": 0.8551, "step": 20192 }, { "epoch": 0.5184996172492983, "grad_norm": 0.828125, "learning_rate": 0.00015716682382142336, "loss": 0.9445, "step": 20193 }, { "epoch": 0.5185252944452201, "grad_norm": 0.875, "learning_rate": 0.00015716316095450958, "loss": 0.8542, "step": 20194 }, { "epoch": 0.518550971641142, "grad_norm": 0.83984375, "learning_rate": 0.00015715949797367475, "loss": 0.8982, "step": 20195 }, { "epoch": 0.5185766488370638, "grad_norm": 0.8203125, "learning_rate": 0.00015715583487892624, "loss": 0.968, "step": 20196 }, { "epoch": 0.5186023260329856, "grad_norm": 0.73046875, "learning_rate": 0.0001571521716702713, "loss": 0.8468, "step": 20197 }, { "epoch": 0.5186280032289073, "grad_norm": 0.80078125, "learning_rate": 0.00015714850834771724, "loss": 0.8826, "step": 20198 }, { "epoch": 0.5186536804248292, "grad_norm": 0.8359375, "learning_rate": 0.00015714484491127138, "loss": 0.815, "step": 20199 }, { "epoch": 0.518679357620751, "grad_norm": 0.8671875, "learning_rate": 0.00015714118136094101, "loss": 0.9325, "step": 20200 }, { "epoch": 0.5187050348166728, "grad_norm": 0.80078125, "learning_rate": 0.00015713751769673342, "loss": 0.8783, "step": 20201 }, { "epoch": 0.5187307120125947, "grad_norm": 0.75, "learning_rate": 0.0001571338539186559, "loss": 0.8668, "step": 20202 }, { "epoch": 0.5187563892085165, "grad_norm": 0.77734375, "learning_rate": 0.00015713019002671578, "loss": 0.8132, "step": 20203 }, { "epoch": 0.5187820664044384, "grad_norm": 0.73828125, "learning_rate": 0.00015712652602092034, "loss": 0.8158, "step": 20204 }, { "epoch": 0.5188077436003601, "grad_norm": 0.7421875, "learning_rate": 0.00015712286190127696, "loss": 0.919, "step": 20205 }, { "epoch": 0.5188334207962819, "grad_norm": 0.7421875, "learning_rate": 0.0001571191976677928, "loss": 0.8091, "step": 20206 }, { "epoch": 0.5188590979922038, "grad_norm": 0.78515625, "learning_rate": 0.00015711553332047528, "loss": 0.8675, "step": 20207 }, { "epoch": 0.5188847751881256, "grad_norm": 0.74609375, "learning_rate": 0.00015711186885933166, "loss": 0.8312, "step": 20208 }, { "epoch": 0.5189104523840474, "grad_norm": 0.765625, "learning_rate": 0.00015710820428436922, "loss": 0.9805, "step": 20209 }, { "epoch": 0.5189361295799693, "grad_norm": 0.75390625, "learning_rate": 0.0001571045395955953, "loss": 0.8851, "step": 20210 }, { "epoch": 0.518961806775891, "grad_norm": 0.8046875, "learning_rate": 0.00015710087479301716, "loss": 0.8812, "step": 20211 }, { "epoch": 0.5189874839718128, "grad_norm": 0.83984375, "learning_rate": 0.0001570972098766422, "loss": 0.8757, "step": 20212 }, { "epoch": 0.5190131611677347, "grad_norm": 0.77734375, "learning_rate": 0.0001570935448464776, "loss": 0.9717, "step": 20213 }, { "epoch": 0.5190388383636565, "grad_norm": 0.7578125, "learning_rate": 0.00015708987970253075, "loss": 0.8163, "step": 20214 }, { "epoch": 0.5190645155595783, "grad_norm": 0.82421875, "learning_rate": 0.0001570862144448089, "loss": 0.7989, "step": 20215 }, { "epoch": 0.5190901927555002, "grad_norm": 0.7734375, "learning_rate": 0.00015708254907331943, "loss": 0.8237, "step": 20216 }, { "epoch": 0.519115869951422, "grad_norm": 0.84765625, "learning_rate": 0.00015707888358806955, "loss": 0.9468, "step": 20217 }, { "epoch": 0.5191415471473437, "grad_norm": 0.83984375, "learning_rate": 0.0001570752179890666, "loss": 1.039, "step": 20218 }, { "epoch": 0.5191672243432656, "grad_norm": 0.72265625, "learning_rate": 0.00015707155227631797, "loss": 0.7708, "step": 20219 }, { "epoch": 0.5191929015391874, "grad_norm": 0.7421875, "learning_rate": 0.00015706788644983085, "loss": 0.8897, "step": 20220 }, { "epoch": 0.5192185787351092, "grad_norm": 0.78125, "learning_rate": 0.00015706422050961258, "loss": 0.9475, "step": 20221 }, { "epoch": 0.5192442559310311, "grad_norm": 0.796875, "learning_rate": 0.00015706055445567047, "loss": 0.873, "step": 20222 }, { "epoch": 0.5192699331269529, "grad_norm": 0.77734375, "learning_rate": 0.0001570568882880118, "loss": 0.8928, "step": 20223 }, { "epoch": 0.5192956103228747, "grad_norm": 0.7890625, "learning_rate": 0.000157053222006644, "loss": 0.8014, "step": 20224 }, { "epoch": 0.5193212875187965, "grad_norm": 0.796875, "learning_rate": 0.00015704955561157423, "loss": 0.9949, "step": 20225 }, { "epoch": 0.5193469647147183, "grad_norm": 0.76171875, "learning_rate": 0.00015704588910280982, "loss": 0.8386, "step": 20226 }, { "epoch": 0.5193726419106401, "grad_norm": 0.7265625, "learning_rate": 0.00015704222248035814, "loss": 0.9489, "step": 20227 }, { "epoch": 0.519398319106562, "grad_norm": 0.8125, "learning_rate": 0.00015703855574422643, "loss": 0.8478, "step": 20228 }, { "epoch": 0.5194239963024838, "grad_norm": 0.80078125, "learning_rate": 0.00015703488889442206, "loss": 0.823, "step": 20229 }, { "epoch": 0.5194496734984057, "grad_norm": 0.734375, "learning_rate": 0.0001570312219309523, "loss": 0.9402, "step": 20230 }, { "epoch": 0.5194753506943274, "grad_norm": 0.8125, "learning_rate": 0.00015702755485382447, "loss": 0.973, "step": 20231 }, { "epoch": 0.5195010278902492, "grad_norm": 0.79296875, "learning_rate": 0.00015702388766304588, "loss": 0.9533, "step": 20232 }, { "epoch": 0.519526705086171, "grad_norm": 0.7265625, "learning_rate": 0.00015702022035862383, "loss": 0.8669, "step": 20233 }, { "epoch": 0.5195523822820929, "grad_norm": 0.8125, "learning_rate": 0.0001570165529405656, "loss": 0.8627, "step": 20234 }, { "epoch": 0.5195780594780147, "grad_norm": 1.640625, "learning_rate": 0.00015701288540887856, "loss": 0.9004, "step": 20235 }, { "epoch": 0.5196037366739366, "grad_norm": 0.72265625, "learning_rate": 0.00015700921776356998, "loss": 0.8678, "step": 20236 }, { "epoch": 0.5196294138698584, "grad_norm": 0.74609375, "learning_rate": 0.00015700555000464718, "loss": 0.8456, "step": 20237 }, { "epoch": 0.5196550910657801, "grad_norm": 0.74609375, "learning_rate": 0.00015700188213211747, "loss": 0.8987, "step": 20238 }, { "epoch": 0.519680768261702, "grad_norm": 0.76953125, "learning_rate": 0.00015699821414598818, "loss": 0.7991, "step": 20239 }, { "epoch": 0.5197064454576238, "grad_norm": 0.7734375, "learning_rate": 0.0001569945460462666, "loss": 0.8355, "step": 20240 }, { "epoch": 0.5197321226535456, "grad_norm": 0.7890625, "learning_rate": 0.00015699087783296, "loss": 0.9506, "step": 20241 }, { "epoch": 0.5197577998494675, "grad_norm": 0.78125, "learning_rate": 0.00015698720950607572, "loss": 1.0322, "step": 20242 }, { "epoch": 0.5197834770453893, "grad_norm": 0.7734375, "learning_rate": 0.00015698354106562112, "loss": 0.891, "step": 20243 }, { "epoch": 0.519809154241311, "grad_norm": 0.703125, "learning_rate": 0.00015697987251160346, "loss": 0.9594, "step": 20244 }, { "epoch": 0.5198348314372329, "grad_norm": 0.7734375, "learning_rate": 0.00015697620384403006, "loss": 0.9634, "step": 20245 }, { "epoch": 0.5198605086331547, "grad_norm": 0.7890625, "learning_rate": 0.00015697253506290822, "loss": 0.9036, "step": 20246 }, { "epoch": 0.5198861858290765, "grad_norm": 0.73046875, "learning_rate": 0.00015696886616824526, "loss": 0.8582, "step": 20247 }, { "epoch": 0.5199118630249984, "grad_norm": 0.73046875, "learning_rate": 0.00015696519716004853, "loss": 0.7527, "step": 20248 }, { "epoch": 0.5199375402209202, "grad_norm": 0.8125, "learning_rate": 0.00015696152803832525, "loss": 0.9035, "step": 20249 }, { "epoch": 0.519963217416842, "grad_norm": 0.78125, "learning_rate": 0.00015695785880308284, "loss": 0.9034, "step": 20250 }, { "epoch": 0.5199888946127638, "grad_norm": 0.8203125, "learning_rate": 0.00015695418945432853, "loss": 0.9321, "step": 20251 }, { "epoch": 0.5200145718086856, "grad_norm": 0.79296875, "learning_rate": 0.0001569505199920697, "loss": 0.8686, "step": 20252 }, { "epoch": 0.5200402490046074, "grad_norm": 0.75, "learning_rate": 0.0001569468504163136, "loss": 0.9879, "step": 20253 }, { "epoch": 0.5200659262005293, "grad_norm": 0.7265625, "learning_rate": 0.0001569431807270676, "loss": 0.8697, "step": 20254 }, { "epoch": 0.5200916033964511, "grad_norm": 0.75, "learning_rate": 0.00015693951092433895, "loss": 0.8345, "step": 20255 }, { "epoch": 0.5201172805923729, "grad_norm": 0.765625, "learning_rate": 0.00015693584100813501, "loss": 0.8405, "step": 20256 }, { "epoch": 0.5201429577882948, "grad_norm": 0.7734375, "learning_rate": 0.00015693217097846309, "loss": 0.8357, "step": 20257 }, { "epoch": 0.5201686349842165, "grad_norm": 0.73828125, "learning_rate": 0.0001569285008353305, "loss": 0.824, "step": 20258 }, { "epoch": 0.5201943121801383, "grad_norm": 0.75390625, "learning_rate": 0.00015692483057874453, "loss": 0.8921, "step": 20259 }, { "epoch": 0.5202199893760602, "grad_norm": 0.79296875, "learning_rate": 0.00015692116020871253, "loss": 0.7858, "step": 20260 }, { "epoch": 0.520245666571982, "grad_norm": 0.80078125, "learning_rate": 0.0001569174897252418, "loss": 0.8337, "step": 20261 }, { "epoch": 0.5202713437679038, "grad_norm": 0.8046875, "learning_rate": 0.00015691381912833965, "loss": 0.9346, "step": 20262 }, { "epoch": 0.5202970209638257, "grad_norm": 0.78515625, "learning_rate": 0.00015691014841801338, "loss": 0.9794, "step": 20263 }, { "epoch": 0.5203226981597474, "grad_norm": 0.71875, "learning_rate": 0.00015690647759427034, "loss": 0.8148, "step": 20264 }, { "epoch": 0.5203483753556692, "grad_norm": 0.82421875, "learning_rate": 0.00015690280665711786, "loss": 0.903, "step": 20265 }, { "epoch": 0.5203740525515911, "grad_norm": 0.8046875, "learning_rate": 0.0001568991356065632, "loss": 0.9742, "step": 20266 }, { "epoch": 0.5203997297475129, "grad_norm": 0.74609375, "learning_rate": 0.0001568954644426137, "loss": 0.8209, "step": 20267 }, { "epoch": 0.5204254069434348, "grad_norm": 0.75, "learning_rate": 0.0001568917931652767, "loss": 0.8255, "step": 20268 }, { "epoch": 0.5204510841393566, "grad_norm": 0.76953125, "learning_rate": 0.00015688812177455945, "loss": 0.8353, "step": 20269 }, { "epoch": 0.5204767613352784, "grad_norm": 0.796875, "learning_rate": 0.00015688445027046933, "loss": 0.8008, "step": 20270 }, { "epoch": 0.5205024385312002, "grad_norm": 0.80859375, "learning_rate": 0.00015688077865301368, "loss": 0.9582, "step": 20271 }, { "epoch": 0.520528115727122, "grad_norm": 0.72265625, "learning_rate": 0.00015687710692219973, "loss": 0.922, "step": 20272 }, { "epoch": 0.5205537929230438, "grad_norm": 0.86328125, "learning_rate": 0.00015687343507803485, "loss": 0.9121, "step": 20273 }, { "epoch": 0.5205794701189657, "grad_norm": 0.82421875, "learning_rate": 0.00015686976312052634, "loss": 0.9418, "step": 20274 }, { "epoch": 0.5206051473148875, "grad_norm": 0.80859375, "learning_rate": 0.00015686609104968157, "loss": 0.9184, "step": 20275 }, { "epoch": 0.5206308245108093, "grad_norm": 0.7578125, "learning_rate": 0.0001568624188655078, "loss": 0.75, "step": 20276 }, { "epoch": 0.5206565017067312, "grad_norm": 0.80078125, "learning_rate": 0.00015685874656801233, "loss": 0.9041, "step": 20277 }, { "epoch": 0.5206821789026529, "grad_norm": 0.828125, "learning_rate": 0.00015685507415720258, "loss": 1.0034, "step": 20278 }, { "epoch": 0.5207078560985747, "grad_norm": 0.671875, "learning_rate": 0.00015685140163308576, "loss": 0.667, "step": 20279 }, { "epoch": 0.5207335332944966, "grad_norm": 0.7578125, "learning_rate": 0.00015684772899566925, "loss": 0.8409, "step": 20280 }, { "epoch": 0.5207592104904184, "grad_norm": 0.76953125, "learning_rate": 0.00015684405624496033, "loss": 0.7806, "step": 20281 }, { "epoch": 0.5207848876863402, "grad_norm": 0.72265625, "learning_rate": 0.00015684038338096637, "loss": 0.7641, "step": 20282 }, { "epoch": 0.5208105648822621, "grad_norm": 0.734375, "learning_rate": 0.0001568367104036946, "loss": 0.9036, "step": 20283 }, { "epoch": 0.5208362420781838, "grad_norm": 0.73828125, "learning_rate": 0.00015683303731315248, "loss": 0.8416, "step": 20284 }, { "epoch": 0.5208619192741056, "grad_norm": 0.76953125, "learning_rate": 0.0001568293641093472, "loss": 0.9281, "step": 20285 }, { "epoch": 0.5208875964700275, "grad_norm": 0.7421875, "learning_rate": 0.00015682569079228614, "loss": 0.9329, "step": 20286 }, { "epoch": 0.5209132736659493, "grad_norm": 0.7109375, "learning_rate": 0.0001568220173619766, "loss": 0.9392, "step": 20287 }, { "epoch": 0.5209389508618711, "grad_norm": 0.72265625, "learning_rate": 0.00015681834381842596, "loss": 0.8596, "step": 20288 }, { "epoch": 0.520964628057793, "grad_norm": 0.7265625, "learning_rate": 0.00015681467016164145, "loss": 0.9865, "step": 20289 }, { "epoch": 0.5209903052537148, "grad_norm": 0.78515625, "learning_rate": 0.00015681099639163043, "loss": 0.8352, "step": 20290 }, { "epoch": 0.5210159824496365, "grad_norm": 0.69921875, "learning_rate": 0.00015680732250840027, "loss": 0.9093, "step": 20291 }, { "epoch": 0.5210416596455584, "grad_norm": 0.796875, "learning_rate": 0.0001568036485119582, "loss": 0.8599, "step": 20292 }, { "epoch": 0.5210673368414802, "grad_norm": 0.828125, "learning_rate": 0.0001567999744023116, "loss": 0.8282, "step": 20293 }, { "epoch": 0.521093014037402, "grad_norm": 0.7890625, "learning_rate": 0.0001567963001794678, "loss": 0.8728, "step": 20294 }, { "epoch": 0.5211186912333239, "grad_norm": 0.81640625, "learning_rate": 0.00015679262584343407, "loss": 0.9322, "step": 20295 }, { "epoch": 0.5211443684292457, "grad_norm": 0.64453125, "learning_rate": 0.00015678895139421782, "loss": 0.7625, "step": 20296 }, { "epoch": 0.5211700456251676, "grad_norm": 0.78125, "learning_rate": 0.00015678527683182626, "loss": 0.8079, "step": 20297 }, { "epoch": 0.5211957228210893, "grad_norm": 0.78515625, "learning_rate": 0.0001567816021562668, "loss": 0.9018, "step": 20298 }, { "epoch": 0.5212214000170111, "grad_norm": 0.8984375, "learning_rate": 0.00015677792736754675, "loss": 0.8657, "step": 20299 }, { "epoch": 0.521247077212933, "grad_norm": 0.74609375, "learning_rate": 0.0001567742524656734, "loss": 0.928, "step": 20300 }, { "epoch": 0.5212727544088548, "grad_norm": 0.74609375, "learning_rate": 0.0001567705774506541, "loss": 0.8514, "step": 20301 }, { "epoch": 0.5212984316047766, "grad_norm": 0.83984375, "learning_rate": 0.00015676690232249618, "loss": 0.8907, "step": 20302 }, { "epoch": 0.5213241088006985, "grad_norm": 0.76171875, "learning_rate": 0.00015676322708120695, "loss": 0.9035, "step": 20303 }, { "epoch": 0.5213497859966202, "grad_norm": 0.76953125, "learning_rate": 0.00015675955172679372, "loss": 0.7971, "step": 20304 }, { "epoch": 0.521375463192542, "grad_norm": 0.78515625, "learning_rate": 0.00015675587625926384, "loss": 0.8111, "step": 20305 }, { "epoch": 0.5214011403884639, "grad_norm": 0.76171875, "learning_rate": 0.00015675220067862462, "loss": 0.7912, "step": 20306 }, { "epoch": 0.5214268175843857, "grad_norm": 0.75, "learning_rate": 0.00015674852498488338, "loss": 0.906, "step": 20307 }, { "epoch": 0.5214524947803075, "grad_norm": 0.828125, "learning_rate": 0.0001567448491780475, "loss": 0.8286, "step": 20308 }, { "epoch": 0.5214781719762294, "grad_norm": 0.79296875, "learning_rate": 0.0001567411732581242, "loss": 0.7992, "step": 20309 }, { "epoch": 0.5215038491721512, "grad_norm": 0.74609375, "learning_rate": 0.00015673749722512089, "loss": 0.8427, "step": 20310 }, { "epoch": 0.5215295263680729, "grad_norm": 0.734375, "learning_rate": 0.00015673382107904492, "loss": 0.886, "step": 20311 }, { "epoch": 0.5215552035639948, "grad_norm": 0.85546875, "learning_rate": 0.0001567301448199035, "loss": 0.9326, "step": 20312 }, { "epoch": 0.5215808807599166, "grad_norm": 0.76171875, "learning_rate": 0.00015672646844770407, "loss": 0.7416, "step": 20313 }, { "epoch": 0.5216065579558384, "grad_norm": 0.8203125, "learning_rate": 0.00015672279196245388, "loss": 0.8739, "step": 20314 }, { "epoch": 0.5216322351517603, "grad_norm": 0.86328125, "learning_rate": 0.0001567191153641603, "loss": 0.9616, "step": 20315 }, { "epoch": 0.5216579123476821, "grad_norm": 0.74609375, "learning_rate": 0.00015671543865283065, "loss": 0.8971, "step": 20316 }, { "epoch": 0.5216835895436039, "grad_norm": 0.77734375, "learning_rate": 0.00015671176182847227, "loss": 0.9812, "step": 20317 }, { "epoch": 0.5217092667395257, "grad_norm": 0.84375, "learning_rate": 0.00015670808489109245, "loss": 0.8926, "step": 20318 }, { "epoch": 0.5217349439354475, "grad_norm": 0.73828125, "learning_rate": 0.00015670440784069855, "loss": 0.9117, "step": 20319 }, { "epoch": 0.5217606211313693, "grad_norm": 0.8046875, "learning_rate": 0.00015670073067729787, "loss": 0.9548, "step": 20320 }, { "epoch": 0.5217862983272912, "grad_norm": 0.734375, "learning_rate": 0.00015669705340089777, "loss": 0.8618, "step": 20321 }, { "epoch": 0.521811975523213, "grad_norm": 0.78515625, "learning_rate": 0.00015669337601150555, "loss": 0.9302, "step": 20322 }, { "epoch": 0.5218376527191348, "grad_norm": 0.83984375, "learning_rate": 0.00015668969850912858, "loss": 0.892, "step": 20323 }, { "epoch": 0.5218633299150566, "grad_norm": 0.98828125, "learning_rate": 0.00015668602089377416, "loss": 0.8847, "step": 20324 }, { "epoch": 0.5218890071109784, "grad_norm": 0.81640625, "learning_rate": 0.00015668234316544962, "loss": 0.867, "step": 20325 }, { "epoch": 0.5219146843069002, "grad_norm": 0.81640625, "learning_rate": 0.00015667866532416227, "loss": 0.8708, "step": 20326 }, { "epoch": 0.5219403615028221, "grad_norm": 0.8359375, "learning_rate": 0.00015667498736991945, "loss": 0.8787, "step": 20327 }, { "epoch": 0.5219660386987439, "grad_norm": 0.82421875, "learning_rate": 0.00015667130930272852, "loss": 0.9834, "step": 20328 }, { "epoch": 0.5219917158946658, "grad_norm": 0.84375, "learning_rate": 0.0001566676311225968, "loss": 0.9068, "step": 20329 }, { "epoch": 0.5220173930905876, "grad_norm": 0.71875, "learning_rate": 0.00015666395282953158, "loss": 0.9399, "step": 20330 }, { "epoch": 0.5220430702865093, "grad_norm": 0.78125, "learning_rate": 0.00015666027442354024, "loss": 0.794, "step": 20331 }, { "epoch": 0.5220687474824312, "grad_norm": 0.78125, "learning_rate": 0.00015665659590463008, "loss": 1.0526, "step": 20332 }, { "epoch": 0.522094424678353, "grad_norm": 0.76171875, "learning_rate": 0.00015665291727280848, "loss": 0.8838, "step": 20333 }, { "epoch": 0.5221201018742748, "grad_norm": 0.8203125, "learning_rate": 0.0001566492385280827, "loss": 0.8547, "step": 20334 }, { "epoch": 0.5221457790701967, "grad_norm": 0.7890625, "learning_rate": 0.0001566455596704601, "loss": 0.7578, "step": 20335 }, { "epoch": 0.5221714562661185, "grad_norm": 0.77734375, "learning_rate": 0.00015664188069994803, "loss": 0.756, "step": 20336 }, { "epoch": 0.5221971334620403, "grad_norm": 0.84765625, "learning_rate": 0.00015663820161655382, "loss": 0.9303, "step": 20337 }, { "epoch": 0.522222810657962, "grad_norm": 0.80078125, "learning_rate": 0.00015663452242028474, "loss": 0.9976, "step": 20338 }, { "epoch": 0.5222484878538839, "grad_norm": 0.890625, "learning_rate": 0.00015663084311114825, "loss": 0.9562, "step": 20339 }, { "epoch": 0.5222741650498057, "grad_norm": 0.78515625, "learning_rate": 0.00015662716368915154, "loss": 0.9427, "step": 20340 }, { "epoch": 0.5222998422457276, "grad_norm": 0.75390625, "learning_rate": 0.00015662348415430204, "loss": 0.9547, "step": 20341 }, { "epoch": 0.5223255194416494, "grad_norm": 0.78125, "learning_rate": 0.00015661980450660702, "loss": 1.0467, "step": 20342 }, { "epoch": 0.5223511966375712, "grad_norm": 0.703125, "learning_rate": 0.00015661612474607387, "loss": 0.8308, "step": 20343 }, { "epoch": 0.522376873833493, "grad_norm": 0.75390625, "learning_rate": 0.0001566124448727099, "loss": 0.8749, "step": 20344 }, { "epoch": 0.5224025510294148, "grad_norm": 0.7890625, "learning_rate": 0.00015660876488652242, "loss": 0.8116, "step": 20345 }, { "epoch": 0.5224282282253366, "grad_norm": 0.79296875, "learning_rate": 0.0001566050847875188, "loss": 0.9194, "step": 20346 }, { "epoch": 0.5224539054212585, "grad_norm": 0.71875, "learning_rate": 0.00015660140457570635, "loss": 0.925, "step": 20347 }, { "epoch": 0.5224795826171803, "grad_norm": 0.7421875, "learning_rate": 0.00015659772425109244, "loss": 0.9872, "step": 20348 }, { "epoch": 0.5225052598131021, "grad_norm": 0.72265625, "learning_rate": 0.00015659404381368437, "loss": 0.8331, "step": 20349 }, { "epoch": 0.522530937009024, "grad_norm": 0.75, "learning_rate": 0.00015659036326348948, "loss": 0.8779, "step": 20350 }, { "epoch": 0.5225566142049457, "grad_norm": 0.77734375, "learning_rate": 0.00015658668260051507, "loss": 0.8653, "step": 20351 }, { "epoch": 0.5225822914008675, "grad_norm": 0.8203125, "learning_rate": 0.00015658300182476854, "loss": 0.7875, "step": 20352 }, { "epoch": 0.5226079685967894, "grad_norm": 0.73828125, "learning_rate": 0.00015657932093625723, "loss": 0.9034, "step": 20353 }, { "epoch": 0.5226336457927112, "grad_norm": 0.8046875, "learning_rate": 0.0001565756399349884, "loss": 0.8385, "step": 20354 }, { "epoch": 0.522659322988633, "grad_norm": 0.80078125, "learning_rate": 0.00015657195882096944, "loss": 0.8549, "step": 20355 }, { "epoch": 0.5226850001845549, "grad_norm": 0.97265625, "learning_rate": 0.00015656827759420768, "loss": 0.7833, "step": 20356 }, { "epoch": 0.5227106773804767, "grad_norm": 0.78515625, "learning_rate": 0.00015656459625471047, "loss": 0.8219, "step": 20357 }, { "epoch": 0.5227363545763984, "grad_norm": 0.765625, "learning_rate": 0.00015656091480248513, "loss": 0.8169, "step": 20358 }, { "epoch": 0.5227620317723203, "grad_norm": 0.79296875, "learning_rate": 0.00015655723323753894, "loss": 0.8923, "step": 20359 }, { "epoch": 0.5227877089682421, "grad_norm": 0.78125, "learning_rate": 0.00015655355155987934, "loss": 0.8421, "step": 20360 }, { "epoch": 0.522813386164164, "grad_norm": 0.7578125, "learning_rate": 0.00015654986976951362, "loss": 0.9042, "step": 20361 }, { "epoch": 0.5228390633600858, "grad_norm": 0.73046875, "learning_rate": 0.0001565461878664491, "loss": 0.8494, "step": 20362 }, { "epoch": 0.5228647405560076, "grad_norm": 0.82421875, "learning_rate": 0.00015654250585069317, "loss": 0.8955, "step": 20363 }, { "epoch": 0.5228904177519293, "grad_norm": 0.7578125, "learning_rate": 0.0001565388237222531, "loss": 0.7902, "step": 20364 }, { "epoch": 0.5229160949478512, "grad_norm": 0.83984375, "learning_rate": 0.0001565351414811363, "loss": 0.885, "step": 20365 }, { "epoch": 0.522941772143773, "grad_norm": 0.81640625, "learning_rate": 0.00015653145912735, "loss": 0.91, "step": 20366 }, { "epoch": 0.5229674493396949, "grad_norm": 0.7734375, "learning_rate": 0.00015652777666090163, "loss": 0.92, "step": 20367 }, { "epoch": 0.5229931265356167, "grad_norm": 0.82421875, "learning_rate": 0.00015652409408179857, "loss": 0.7775, "step": 20368 }, { "epoch": 0.5230188037315385, "grad_norm": 0.78125, "learning_rate": 0.00015652041139004803, "loss": 0.8368, "step": 20369 }, { "epoch": 0.5230444809274604, "grad_norm": 0.8359375, "learning_rate": 0.00015651672858565745, "loss": 0.9827, "step": 20370 }, { "epoch": 0.5230701581233821, "grad_norm": 0.7421875, "learning_rate": 0.00015651304566863413, "loss": 0.9524, "step": 20371 }, { "epoch": 0.5230958353193039, "grad_norm": 0.8515625, "learning_rate": 0.0001565093626389854, "loss": 0.8939, "step": 20372 }, { "epoch": 0.5231215125152258, "grad_norm": 0.84375, "learning_rate": 0.0001565056794967186, "loss": 0.9485, "step": 20373 }, { "epoch": 0.5231471897111476, "grad_norm": 0.82421875, "learning_rate": 0.0001565019962418411, "loss": 0.8663, "step": 20374 }, { "epoch": 0.5231728669070694, "grad_norm": 0.78515625, "learning_rate": 0.00015649831287436024, "loss": 0.8919, "step": 20375 }, { "epoch": 0.5231985441029913, "grad_norm": 0.6640625, "learning_rate": 0.00015649462939428338, "loss": 0.721, "step": 20376 }, { "epoch": 0.5232242212989131, "grad_norm": 0.71875, "learning_rate": 0.00015649094580161776, "loss": 0.849, "step": 20377 }, { "epoch": 0.5232498984948348, "grad_norm": 0.765625, "learning_rate": 0.0001564872620963708, "loss": 0.7365, "step": 20378 }, { "epoch": 0.5232755756907567, "grad_norm": 0.80078125, "learning_rate": 0.00015648357827854984, "loss": 0.991, "step": 20379 }, { "epoch": 0.5233012528866785, "grad_norm": 0.68359375, "learning_rate": 0.0001564798943481622, "loss": 0.7399, "step": 20380 }, { "epoch": 0.5233269300826003, "grad_norm": 0.76953125, "learning_rate": 0.00015647621030521526, "loss": 0.8614, "step": 20381 }, { "epoch": 0.5233526072785222, "grad_norm": 0.81640625, "learning_rate": 0.00015647252614971633, "loss": 0.7559, "step": 20382 }, { "epoch": 0.523378284474444, "grad_norm": 0.76953125, "learning_rate": 0.00015646884188167274, "loss": 0.7664, "step": 20383 }, { "epoch": 0.5234039616703657, "grad_norm": 0.828125, "learning_rate": 0.00015646515750109184, "loss": 0.9096, "step": 20384 }, { "epoch": 0.5234296388662876, "grad_norm": 0.79296875, "learning_rate": 0.00015646147300798103, "loss": 0.9321, "step": 20385 }, { "epoch": 0.5234553160622094, "grad_norm": 0.76953125, "learning_rate": 0.00015645778840234757, "loss": 0.8435, "step": 20386 }, { "epoch": 0.5234809932581312, "grad_norm": 0.76953125, "learning_rate": 0.0001564541036841988, "loss": 0.8107, "step": 20387 }, { "epoch": 0.5235066704540531, "grad_norm": 0.74609375, "learning_rate": 0.00015645041885354214, "loss": 0.8778, "step": 20388 }, { "epoch": 0.5235323476499749, "grad_norm": 0.703125, "learning_rate": 0.0001564467339103849, "loss": 0.8663, "step": 20389 }, { "epoch": 0.5235580248458968, "grad_norm": 0.7734375, "learning_rate": 0.00015644304885473445, "loss": 0.8578, "step": 20390 }, { "epoch": 0.5235837020418185, "grad_norm": 0.796875, "learning_rate": 0.00015643936368659803, "loss": 0.7862, "step": 20391 }, { "epoch": 0.5236093792377403, "grad_norm": 0.74609375, "learning_rate": 0.0001564356784059831, "loss": 0.8723, "step": 20392 }, { "epoch": 0.5236350564336621, "grad_norm": 0.83984375, "learning_rate": 0.00015643199301289696, "loss": 0.9383, "step": 20393 }, { "epoch": 0.523660733629584, "grad_norm": 0.8203125, "learning_rate": 0.00015642830750734694, "loss": 0.9543, "step": 20394 }, { "epoch": 0.5236864108255058, "grad_norm": 0.7265625, "learning_rate": 0.0001564246218893404, "loss": 0.8507, "step": 20395 }, { "epoch": 0.5237120880214277, "grad_norm": 0.73828125, "learning_rate": 0.0001564209361588847, "loss": 0.8017, "step": 20396 }, { "epoch": 0.5237377652173495, "grad_norm": 1.6796875, "learning_rate": 0.00015641725031598718, "loss": 0.9575, "step": 20397 }, { "epoch": 0.5237634424132712, "grad_norm": 0.75, "learning_rate": 0.00015641356436065515, "loss": 0.9777, "step": 20398 }, { "epoch": 0.523789119609193, "grad_norm": 0.7578125, "learning_rate": 0.000156409878292896, "loss": 0.9351, "step": 20399 }, { "epoch": 0.5238147968051149, "grad_norm": 0.74609375, "learning_rate": 0.00015640619211271703, "loss": 0.9154, "step": 20400 }, { "epoch": 0.5238404740010367, "grad_norm": 0.78515625, "learning_rate": 0.00015640250582012562, "loss": 0.9799, "step": 20401 }, { "epoch": 0.5238661511969586, "grad_norm": 0.71875, "learning_rate": 0.00015639881941512912, "loss": 0.7992, "step": 20402 }, { "epoch": 0.5238918283928804, "grad_norm": 0.75, "learning_rate": 0.00015639513289773487, "loss": 0.8379, "step": 20403 }, { "epoch": 0.5239175055888021, "grad_norm": 0.80078125, "learning_rate": 0.00015639144626795025, "loss": 0.8761, "step": 20404 }, { "epoch": 0.523943182784724, "grad_norm": 0.75, "learning_rate": 0.0001563877595257825, "loss": 1.0213, "step": 20405 }, { "epoch": 0.5239688599806458, "grad_norm": 0.75390625, "learning_rate": 0.00015638407267123912, "loss": 0.9169, "step": 20406 }, { "epoch": 0.5239945371765676, "grad_norm": 0.7578125, "learning_rate": 0.0001563803857043273, "loss": 0.7947, "step": 20407 }, { "epoch": 0.5240202143724895, "grad_norm": 0.71875, "learning_rate": 0.00015637669862505452, "loss": 0.769, "step": 20408 }, { "epoch": 0.5240458915684113, "grad_norm": 1.046875, "learning_rate": 0.00015637301143342803, "loss": 0.8448, "step": 20409 }, { "epoch": 0.5240715687643331, "grad_norm": 0.73828125, "learning_rate": 0.00015636932412945527, "loss": 0.8536, "step": 20410 }, { "epoch": 0.5240972459602549, "grad_norm": 0.7421875, "learning_rate": 0.00015636563671314347, "loss": 0.9717, "step": 20411 }, { "epoch": 0.5241229231561767, "grad_norm": 0.765625, "learning_rate": 0.00015636194918450008, "loss": 0.7997, "step": 20412 }, { "epoch": 0.5241486003520985, "grad_norm": 0.89453125, "learning_rate": 0.00015635826154353243, "loss": 0.8922, "step": 20413 }, { "epoch": 0.5241742775480204, "grad_norm": 0.76171875, "learning_rate": 0.00015635457379024786, "loss": 0.8976, "step": 20414 }, { "epoch": 0.5241999547439422, "grad_norm": 0.75390625, "learning_rate": 0.00015635088592465368, "loss": 0.9055, "step": 20415 }, { "epoch": 0.524225631939864, "grad_norm": 0.7421875, "learning_rate": 0.0001563471979467573, "loss": 0.8003, "step": 20416 }, { "epoch": 0.5242513091357859, "grad_norm": 0.69140625, "learning_rate": 0.00015634350985656604, "loss": 0.9256, "step": 20417 }, { "epoch": 0.5242769863317076, "grad_norm": 0.859375, "learning_rate": 0.00015633982165408727, "loss": 0.9096, "step": 20418 }, { "epoch": 0.5243026635276294, "grad_norm": 0.78515625, "learning_rate": 0.0001563361333393283, "loss": 0.9504, "step": 20419 }, { "epoch": 0.5243283407235513, "grad_norm": 0.75, "learning_rate": 0.00015633244491229653, "loss": 0.7941, "step": 20420 }, { "epoch": 0.5243540179194731, "grad_norm": 0.87890625, "learning_rate": 0.00015632875637299927, "loss": 0.852, "step": 20421 }, { "epoch": 0.524379695115395, "grad_norm": 0.77734375, "learning_rate": 0.0001563250677214439, "loss": 0.8729, "step": 20422 }, { "epoch": 0.5244053723113168, "grad_norm": 0.79296875, "learning_rate": 0.00015632137895763774, "loss": 0.9478, "step": 20423 }, { "epoch": 0.5244310495072385, "grad_norm": 0.70703125, "learning_rate": 0.00015631769008158815, "loss": 0.8484, "step": 20424 }, { "epoch": 0.5244567267031603, "grad_norm": 0.8203125, "learning_rate": 0.00015631400109330249, "loss": 0.7969, "step": 20425 }, { "epoch": 0.5244824038990822, "grad_norm": 0.75390625, "learning_rate": 0.00015631031199278813, "loss": 1.0175, "step": 20426 }, { "epoch": 0.524508081095004, "grad_norm": 0.7265625, "learning_rate": 0.00015630662278005243, "loss": 0.8882, "step": 20427 }, { "epoch": 0.5245337582909259, "grad_norm": 0.82421875, "learning_rate": 0.00015630293345510267, "loss": 0.9324, "step": 20428 }, { "epoch": 0.5245594354868477, "grad_norm": 0.80078125, "learning_rate": 0.00015629924401794627, "loss": 0.9376, "step": 20429 }, { "epoch": 0.5245851126827695, "grad_norm": 0.6953125, "learning_rate": 0.0001562955544685906, "loss": 0.8679, "step": 20430 }, { "epoch": 0.5246107898786913, "grad_norm": 0.83984375, "learning_rate": 0.00015629186480704292, "loss": 1.0354, "step": 20431 }, { "epoch": 0.5246364670746131, "grad_norm": 0.76953125, "learning_rate": 0.00015628817503331063, "loss": 0.8612, "step": 20432 }, { "epoch": 0.5246621442705349, "grad_norm": 0.8125, "learning_rate": 0.00015628448514740112, "loss": 0.9461, "step": 20433 }, { "epoch": 0.5246878214664568, "grad_norm": 0.70703125, "learning_rate": 0.0001562807951493217, "loss": 0.8731, "step": 20434 }, { "epoch": 0.5247134986623786, "grad_norm": 0.81640625, "learning_rate": 0.00015627710503907975, "loss": 0.9043, "step": 20435 }, { "epoch": 0.5247391758583004, "grad_norm": 0.75, "learning_rate": 0.00015627341481668262, "loss": 0.9026, "step": 20436 }, { "epoch": 0.5247648530542223, "grad_norm": 0.7265625, "learning_rate": 0.00015626972448213767, "loss": 0.9326, "step": 20437 }, { "epoch": 0.524790530250144, "grad_norm": 0.76953125, "learning_rate": 0.0001562660340354522, "loss": 0.8555, "step": 20438 }, { "epoch": 0.5248162074460658, "grad_norm": 0.796875, "learning_rate": 0.00015626234347663364, "loss": 0.8956, "step": 20439 }, { "epoch": 0.5248418846419877, "grad_norm": 0.78125, "learning_rate": 0.0001562586528056893, "loss": 0.9303, "step": 20440 }, { "epoch": 0.5248675618379095, "grad_norm": 0.81640625, "learning_rate": 0.00015625496202262656, "loss": 0.8052, "step": 20441 }, { "epoch": 0.5248932390338313, "grad_norm": 0.73828125, "learning_rate": 0.00015625127112745276, "loss": 0.8358, "step": 20442 }, { "epoch": 0.5249189162297532, "grad_norm": 0.79296875, "learning_rate": 0.00015624758012017522, "loss": 1.0631, "step": 20443 }, { "epoch": 0.5249445934256749, "grad_norm": 0.71875, "learning_rate": 0.0001562438890008014, "loss": 0.8641, "step": 20444 }, { "epoch": 0.5249702706215967, "grad_norm": 0.86328125, "learning_rate": 0.0001562401977693385, "loss": 0.8682, "step": 20445 }, { "epoch": 0.5249959478175186, "grad_norm": 0.7421875, "learning_rate": 0.00015623650642579404, "loss": 0.8067, "step": 20446 }, { "epoch": 0.5250216250134404, "grad_norm": 0.76953125, "learning_rate": 0.00015623281497017527, "loss": 0.8244, "step": 20447 }, { "epoch": 0.5250473022093622, "grad_norm": 0.765625, "learning_rate": 0.0001562291234024896, "loss": 0.9431, "step": 20448 }, { "epoch": 0.5250729794052841, "grad_norm": 0.94921875, "learning_rate": 0.00015622543172274435, "loss": 0.9424, "step": 20449 }, { "epoch": 0.5250986566012059, "grad_norm": 0.98046875, "learning_rate": 0.0001562217399309469, "loss": 0.8814, "step": 20450 }, { "epoch": 0.5251243337971276, "grad_norm": 0.8125, "learning_rate": 0.0001562180480271046, "loss": 0.9735, "step": 20451 }, { "epoch": 0.5251500109930495, "grad_norm": 0.7265625, "learning_rate": 0.0001562143560112248, "loss": 0.8643, "step": 20452 }, { "epoch": 0.5251756881889713, "grad_norm": 0.765625, "learning_rate": 0.00015621066388331488, "loss": 0.9995, "step": 20453 }, { "epoch": 0.5252013653848931, "grad_norm": 0.76953125, "learning_rate": 0.0001562069716433822, "loss": 0.7787, "step": 20454 }, { "epoch": 0.525227042580815, "grad_norm": 0.74609375, "learning_rate": 0.00015620327929143408, "loss": 0.8764, "step": 20455 }, { "epoch": 0.5252527197767368, "grad_norm": 0.7890625, "learning_rate": 0.00015619958682747787, "loss": 0.8747, "step": 20456 }, { "epoch": 0.5252783969726585, "grad_norm": 0.82421875, "learning_rate": 0.000156195894251521, "loss": 0.9965, "step": 20457 }, { "epoch": 0.5253040741685804, "grad_norm": 0.7578125, "learning_rate": 0.0001561922015635708, "loss": 0.8327, "step": 20458 }, { "epoch": 0.5253297513645022, "grad_norm": 0.89453125, "learning_rate": 0.0001561885087636346, "loss": 0.7641, "step": 20459 }, { "epoch": 0.525355428560424, "grad_norm": 0.75390625, "learning_rate": 0.00015618481585171975, "loss": 0.8023, "step": 20460 }, { "epoch": 0.5253811057563459, "grad_norm": 0.90625, "learning_rate": 0.00015618112282783368, "loss": 0.9725, "step": 20461 }, { "epoch": 0.5254067829522677, "grad_norm": 0.75, "learning_rate": 0.0001561774296919837, "loss": 0.8489, "step": 20462 }, { "epoch": 0.5254324601481896, "grad_norm": 0.77734375, "learning_rate": 0.00015617373644417716, "loss": 0.8554, "step": 20463 }, { "epoch": 0.5254581373441113, "grad_norm": 0.76171875, "learning_rate": 0.00015617004308442144, "loss": 0.8971, "step": 20464 }, { "epoch": 0.5254838145400331, "grad_norm": 0.80859375, "learning_rate": 0.00015616634961272393, "loss": 0.964, "step": 20465 }, { "epoch": 0.525509491735955, "grad_norm": 0.86328125, "learning_rate": 0.0001561626560290919, "loss": 0.991, "step": 20466 }, { "epoch": 0.5255351689318768, "grad_norm": 0.765625, "learning_rate": 0.0001561589623335328, "loss": 0.839, "step": 20467 }, { "epoch": 0.5255608461277986, "grad_norm": 0.8203125, "learning_rate": 0.00015615526852605398, "loss": 0.859, "step": 20468 }, { "epoch": 0.5255865233237205, "grad_norm": 0.8046875, "learning_rate": 0.00015615157460666278, "loss": 0.9266, "step": 20469 }, { "epoch": 0.5256122005196423, "grad_norm": 0.88671875, "learning_rate": 0.00015614788057536654, "loss": 0.8231, "step": 20470 }, { "epoch": 0.525637877715564, "grad_norm": 0.77734375, "learning_rate": 0.00015614418643217267, "loss": 0.8062, "step": 20471 }, { "epoch": 0.5256635549114859, "grad_norm": 0.83203125, "learning_rate": 0.00015614049217708845, "loss": 0.8808, "step": 20472 }, { "epoch": 0.5256892321074077, "grad_norm": 1.3046875, "learning_rate": 0.00015613679781012135, "loss": 1.0441, "step": 20473 }, { "epoch": 0.5257149093033295, "grad_norm": 0.79296875, "learning_rate": 0.00015613310333127868, "loss": 0.8209, "step": 20474 }, { "epoch": 0.5257405864992514, "grad_norm": 0.73046875, "learning_rate": 0.0001561294087405678, "loss": 0.8712, "step": 20475 }, { "epoch": 0.5257662636951732, "grad_norm": 0.875, "learning_rate": 0.0001561257140379961, "loss": 0.9435, "step": 20476 }, { "epoch": 0.5257919408910949, "grad_norm": 0.70703125, "learning_rate": 0.0001561220192235709, "loss": 0.9028, "step": 20477 }, { "epoch": 0.5258176180870168, "grad_norm": 0.73046875, "learning_rate": 0.00015611832429729958, "loss": 0.9274, "step": 20478 }, { "epoch": 0.5258432952829386, "grad_norm": 0.78515625, "learning_rate": 0.00015611462925918952, "loss": 0.8043, "step": 20479 }, { "epoch": 0.5258689724788604, "grad_norm": 0.80078125, "learning_rate": 0.00015611093410924806, "loss": 0.9054, "step": 20480 }, { "epoch": 0.5258946496747823, "grad_norm": 0.79296875, "learning_rate": 0.0001561072388474826, "loss": 0.7697, "step": 20481 }, { "epoch": 0.5259203268707041, "grad_norm": 0.8828125, "learning_rate": 0.00015610354347390045, "loss": 1.0743, "step": 20482 }, { "epoch": 0.525946004066626, "grad_norm": 0.7109375, "learning_rate": 0.00015609984798850904, "loss": 0.791, "step": 20483 }, { "epoch": 0.5259716812625477, "grad_norm": 0.8359375, "learning_rate": 0.00015609615239131568, "loss": 0.8724, "step": 20484 }, { "epoch": 0.5259973584584695, "grad_norm": 0.76953125, "learning_rate": 0.00015609245668232773, "loss": 0.8603, "step": 20485 }, { "epoch": 0.5260230356543913, "grad_norm": 0.734375, "learning_rate": 0.0001560887608615526, "loss": 0.9658, "step": 20486 }, { "epoch": 0.5260487128503132, "grad_norm": 0.76953125, "learning_rate": 0.00015608506492899765, "loss": 0.8566, "step": 20487 }, { "epoch": 0.526074390046235, "grad_norm": 0.79296875, "learning_rate": 0.00015608136888467023, "loss": 0.8632, "step": 20488 }, { "epoch": 0.5261000672421569, "grad_norm": 0.9375, "learning_rate": 0.00015607767272857772, "loss": 1.0287, "step": 20489 }, { "epoch": 0.5261257444380787, "grad_norm": 0.76171875, "learning_rate": 0.00015607397646072747, "loss": 0.932, "step": 20490 }, { "epoch": 0.5261514216340004, "grad_norm": 0.75, "learning_rate": 0.00015607028008112682, "loss": 0.8844, "step": 20491 }, { "epoch": 0.5261770988299223, "grad_norm": 0.83203125, "learning_rate": 0.0001560665835897832, "loss": 0.902, "step": 20492 }, { "epoch": 0.5262027760258441, "grad_norm": 1.0859375, "learning_rate": 0.00015606288698670392, "loss": 1.0234, "step": 20493 }, { "epoch": 0.5262284532217659, "grad_norm": 0.80859375, "learning_rate": 0.00015605919027189636, "loss": 1.0195, "step": 20494 }, { "epoch": 0.5262541304176878, "grad_norm": 0.79296875, "learning_rate": 0.00015605549344536795, "loss": 0.983, "step": 20495 }, { "epoch": 0.5262798076136096, "grad_norm": 0.87890625, "learning_rate": 0.00015605179650712595, "loss": 0.9249, "step": 20496 }, { "epoch": 0.5263054848095313, "grad_norm": 0.7421875, "learning_rate": 0.0001560480994571778, "loss": 0.7981, "step": 20497 }, { "epoch": 0.5263311620054532, "grad_norm": 0.80859375, "learning_rate": 0.00015604440229553088, "loss": 0.8383, "step": 20498 }, { "epoch": 0.526356839201375, "grad_norm": 0.78515625, "learning_rate": 0.00015604070502219249, "loss": 1.0308, "step": 20499 }, { "epoch": 0.5263825163972968, "grad_norm": 0.7734375, "learning_rate": 0.00015603700763717004, "loss": 0.9106, "step": 20500 }, { "epoch": 0.5264081935932187, "grad_norm": 0.734375, "learning_rate": 0.00015603331014047092, "loss": 0.7536, "step": 20501 }, { "epoch": 0.5264338707891405, "grad_norm": 0.7890625, "learning_rate": 0.00015602961253210245, "loss": 0.8461, "step": 20502 }, { "epoch": 0.5264595479850623, "grad_norm": 0.74609375, "learning_rate": 0.00015602591481207205, "loss": 0.824, "step": 20503 }, { "epoch": 0.5264852251809841, "grad_norm": 1.03125, "learning_rate": 0.00015602221698038702, "loss": 0.878, "step": 20504 }, { "epoch": 0.5265109023769059, "grad_norm": 0.73828125, "learning_rate": 0.0001560185190370548, "loss": 0.9203, "step": 20505 }, { "epoch": 0.5265365795728277, "grad_norm": 0.81640625, "learning_rate": 0.00015601482098208272, "loss": 0.9205, "step": 20506 }, { "epoch": 0.5265622567687496, "grad_norm": 1.359375, "learning_rate": 0.00015601112281547817, "loss": 0.9148, "step": 20507 }, { "epoch": 0.5265879339646714, "grad_norm": 0.703125, "learning_rate": 0.00015600742453724853, "loss": 0.8358, "step": 20508 }, { "epoch": 0.5266136111605932, "grad_norm": 0.78515625, "learning_rate": 0.00015600372614740112, "loss": 0.9692, "step": 20509 }, { "epoch": 0.5266392883565151, "grad_norm": 0.76953125, "learning_rate": 0.00015600002764594337, "loss": 0.9502, "step": 20510 }, { "epoch": 0.5266649655524368, "grad_norm": 0.75, "learning_rate": 0.0001559963290328826, "loss": 0.8452, "step": 20511 }, { "epoch": 0.5266906427483586, "grad_norm": 0.73828125, "learning_rate": 0.00015599263030822622, "loss": 0.7794, "step": 20512 }, { "epoch": 0.5267163199442805, "grad_norm": 0.84375, "learning_rate": 0.00015598893147198158, "loss": 0.8958, "step": 20513 }, { "epoch": 0.5267419971402023, "grad_norm": 0.78515625, "learning_rate": 0.00015598523252415608, "loss": 0.9318, "step": 20514 }, { "epoch": 0.5267676743361241, "grad_norm": 0.76171875, "learning_rate": 0.00015598153346475704, "loss": 0.8819, "step": 20515 }, { "epoch": 0.526793351532046, "grad_norm": 0.80859375, "learning_rate": 0.00015597783429379186, "loss": 0.9529, "step": 20516 }, { "epoch": 0.5268190287279677, "grad_norm": 0.76953125, "learning_rate": 0.00015597413501126792, "loss": 0.8509, "step": 20517 }, { "epoch": 0.5268447059238895, "grad_norm": 0.69140625, "learning_rate": 0.00015597043561719259, "loss": 0.8625, "step": 20518 }, { "epoch": 0.5268703831198114, "grad_norm": 0.80859375, "learning_rate": 0.00015596673611157323, "loss": 0.8237, "step": 20519 }, { "epoch": 0.5268960603157332, "grad_norm": 0.7734375, "learning_rate": 0.00015596303649441723, "loss": 0.9946, "step": 20520 }, { "epoch": 0.526921737511655, "grad_norm": 0.78125, "learning_rate": 0.00015595933676573192, "loss": 0.85, "step": 20521 }, { "epoch": 0.5269474147075769, "grad_norm": 0.8046875, "learning_rate": 0.00015595563692552474, "loss": 0.8176, "step": 20522 }, { "epoch": 0.5269730919034987, "grad_norm": 0.75390625, "learning_rate": 0.00015595193697380303, "loss": 0.8483, "step": 20523 }, { "epoch": 0.5269987690994204, "grad_norm": 0.76953125, "learning_rate": 0.00015594823691057416, "loss": 0.9159, "step": 20524 }, { "epoch": 0.5270244462953423, "grad_norm": 0.78125, "learning_rate": 0.0001559445367358455, "loss": 0.8093, "step": 20525 }, { "epoch": 0.5270501234912641, "grad_norm": 0.80078125, "learning_rate": 0.00015594083644962444, "loss": 0.967, "step": 20526 }, { "epoch": 0.527075800687186, "grad_norm": 0.8046875, "learning_rate": 0.0001559371360519183, "loss": 0.8929, "step": 20527 }, { "epoch": 0.5271014778831078, "grad_norm": 0.7578125, "learning_rate": 0.00015593343554273458, "loss": 0.867, "step": 20528 }, { "epoch": 0.5271271550790296, "grad_norm": 0.82421875, "learning_rate": 0.0001559297349220805, "loss": 0.8452, "step": 20529 }, { "epoch": 0.5271528322749515, "grad_norm": 0.8359375, "learning_rate": 0.00015592603418996357, "loss": 0.9803, "step": 20530 }, { "epoch": 0.5271785094708732, "grad_norm": 0.73828125, "learning_rate": 0.00015592233334639111, "loss": 0.9305, "step": 20531 }, { "epoch": 0.527204186666795, "grad_norm": 0.796875, "learning_rate": 0.00015591863239137042, "loss": 0.7013, "step": 20532 }, { "epoch": 0.5272298638627169, "grad_norm": 0.7578125, "learning_rate": 0.000155914931324909, "loss": 0.8411, "step": 20533 }, { "epoch": 0.5272555410586387, "grad_norm": 0.74609375, "learning_rate": 0.0001559112301470142, "loss": 0.8413, "step": 20534 }, { "epoch": 0.5272812182545605, "grad_norm": 0.8515625, "learning_rate": 0.00015590752885769334, "loss": 0.8946, "step": 20535 }, { "epoch": 0.5273068954504824, "grad_norm": 0.8203125, "learning_rate": 0.0001559038274569538, "loss": 0.8465, "step": 20536 }, { "epoch": 0.5273325726464041, "grad_norm": 0.78125, "learning_rate": 0.00015590012594480298, "loss": 0.8693, "step": 20537 }, { "epoch": 0.5273582498423259, "grad_norm": 0.8125, "learning_rate": 0.0001558964243212483, "loss": 0.9776, "step": 20538 }, { "epoch": 0.5273839270382478, "grad_norm": 0.70703125, "learning_rate": 0.0001558927225862971, "loss": 0.9142, "step": 20539 }, { "epoch": 0.5274096042341696, "grad_norm": 0.890625, "learning_rate": 0.0001558890207399567, "loss": 0.8197, "step": 20540 }, { "epoch": 0.5274352814300914, "grad_norm": 0.796875, "learning_rate": 0.00015588531878223455, "loss": 0.8598, "step": 20541 }, { "epoch": 0.5274609586260133, "grad_norm": 0.8203125, "learning_rate": 0.00015588161671313802, "loss": 0.9015, "step": 20542 }, { "epoch": 0.5274866358219351, "grad_norm": 0.79296875, "learning_rate": 0.00015587791453267448, "loss": 0.8578, "step": 20543 }, { "epoch": 0.5275123130178568, "grad_norm": 0.92578125, "learning_rate": 0.00015587421224085127, "loss": 0.8773, "step": 20544 }, { "epoch": 0.5275379902137787, "grad_norm": 0.8828125, "learning_rate": 0.0001558705098376758, "loss": 1.0151, "step": 20545 }, { "epoch": 0.5275636674097005, "grad_norm": 0.7734375, "learning_rate": 0.0001558668073231555, "loss": 0.8954, "step": 20546 }, { "epoch": 0.5275893446056223, "grad_norm": 0.7578125, "learning_rate": 0.00015586310469729771, "loss": 0.8787, "step": 20547 }, { "epoch": 0.5276150218015442, "grad_norm": 0.765625, "learning_rate": 0.00015585940196010976, "loss": 1.0289, "step": 20548 }, { "epoch": 0.527640698997466, "grad_norm": 0.82421875, "learning_rate": 0.00015585569911159906, "loss": 0.9668, "step": 20549 }, { "epoch": 0.5276663761933879, "grad_norm": 0.74609375, "learning_rate": 0.000155851996151773, "loss": 0.8554, "step": 20550 }, { "epoch": 0.5276920533893096, "grad_norm": 0.859375, "learning_rate": 0.00015584829308063896, "loss": 0.878, "step": 20551 }, { "epoch": 0.5277177305852314, "grad_norm": 0.7578125, "learning_rate": 0.00015584458989820435, "loss": 0.8913, "step": 20552 }, { "epoch": 0.5277434077811533, "grad_norm": 0.765625, "learning_rate": 0.00015584088660447644, "loss": 0.856, "step": 20553 }, { "epoch": 0.5277690849770751, "grad_norm": 0.734375, "learning_rate": 0.00015583718319946277, "loss": 0.8525, "step": 20554 }, { "epoch": 0.5277947621729969, "grad_norm": 0.8203125, "learning_rate": 0.0001558334796831706, "loss": 1.0715, "step": 20555 }, { "epoch": 0.5278204393689188, "grad_norm": 0.80078125, "learning_rate": 0.00015582977605560735, "loss": 1.0087, "step": 20556 }, { "epoch": 0.5278461165648405, "grad_norm": 0.73046875, "learning_rate": 0.0001558260723167804, "loss": 0.8775, "step": 20557 }, { "epoch": 0.5278717937607623, "grad_norm": 0.7734375, "learning_rate": 0.00015582236846669714, "loss": 0.7913, "step": 20558 }, { "epoch": 0.5278974709566842, "grad_norm": 0.84375, "learning_rate": 0.00015581866450536494, "loss": 0.9198, "step": 20559 }, { "epoch": 0.527923148152606, "grad_norm": 0.79296875, "learning_rate": 0.00015581496043279115, "loss": 0.8837, "step": 20560 }, { "epoch": 0.5279488253485278, "grad_norm": 0.80078125, "learning_rate": 0.00015581125624898324, "loss": 0.8224, "step": 20561 }, { "epoch": 0.5279745025444497, "grad_norm": 0.8046875, "learning_rate": 0.0001558075519539485, "loss": 0.9084, "step": 20562 }, { "epoch": 0.5280001797403715, "grad_norm": 0.7890625, "learning_rate": 0.00015580384754769437, "loss": 0.9804, "step": 20563 }, { "epoch": 0.5280258569362932, "grad_norm": 0.75, "learning_rate": 0.0001558001430302282, "loss": 0.8984, "step": 20564 }, { "epoch": 0.5280515341322151, "grad_norm": 0.74609375, "learning_rate": 0.00015579643840155736, "loss": 0.788, "step": 20565 }, { "epoch": 0.5280772113281369, "grad_norm": 0.80859375, "learning_rate": 0.00015579273366168932, "loss": 0.8749, "step": 20566 }, { "epoch": 0.5281028885240587, "grad_norm": 0.7890625, "learning_rate": 0.00015578902881063135, "loss": 0.8258, "step": 20567 }, { "epoch": 0.5281285657199806, "grad_norm": 0.828125, "learning_rate": 0.00015578532384839092, "loss": 0.9781, "step": 20568 }, { "epoch": 0.5281542429159024, "grad_norm": 0.8359375, "learning_rate": 0.00015578161877497534, "loss": 1.0521, "step": 20569 }, { "epoch": 0.5281799201118242, "grad_norm": 0.78515625, "learning_rate": 0.00015577791359039207, "loss": 0.8853, "step": 20570 }, { "epoch": 0.528205597307746, "grad_norm": 0.8203125, "learning_rate": 0.00015577420829464847, "loss": 0.8622, "step": 20571 }, { "epoch": 0.5282312745036678, "grad_norm": 0.78125, "learning_rate": 0.00015577050288775185, "loss": 0.84, "step": 20572 }, { "epoch": 0.5282569516995896, "grad_norm": 0.75, "learning_rate": 0.0001557667973697097, "loss": 0.8967, "step": 20573 }, { "epoch": 0.5282826288955115, "grad_norm": 0.7890625, "learning_rate": 0.00015576309174052938, "loss": 0.9105, "step": 20574 }, { "epoch": 0.5283083060914333, "grad_norm": 0.74609375, "learning_rate": 0.0001557593860002182, "loss": 0.9727, "step": 20575 }, { "epoch": 0.5283339832873551, "grad_norm": 0.7265625, "learning_rate": 0.0001557556801487836, "loss": 0.8813, "step": 20576 }, { "epoch": 0.5283596604832769, "grad_norm": 0.70703125, "learning_rate": 0.000155751974186233, "loss": 0.8965, "step": 20577 }, { "epoch": 0.5283853376791987, "grad_norm": 0.76953125, "learning_rate": 0.00015574826811257374, "loss": 1.2244, "step": 20578 }, { "epoch": 0.5284110148751205, "grad_norm": 0.703125, "learning_rate": 0.00015574456192781323, "loss": 0.812, "step": 20579 }, { "epoch": 0.5284366920710424, "grad_norm": 0.7578125, "learning_rate": 0.00015574085563195884, "loss": 0.7198, "step": 20580 }, { "epoch": 0.5284623692669642, "grad_norm": 0.8046875, "learning_rate": 0.00015573714922501795, "loss": 0.9913, "step": 20581 }, { "epoch": 0.528488046462886, "grad_norm": 0.765625, "learning_rate": 0.00015573344270699795, "loss": 0.873, "step": 20582 }, { "epoch": 0.5285137236588079, "grad_norm": 0.83984375, "learning_rate": 0.00015572973607790624, "loss": 0.9247, "step": 20583 }, { "epoch": 0.5285394008547296, "grad_norm": 0.796875, "learning_rate": 0.0001557260293377502, "loss": 0.9728, "step": 20584 }, { "epoch": 0.5285650780506514, "grad_norm": 0.71484375, "learning_rate": 0.00015572232248653721, "loss": 0.8328, "step": 20585 }, { "epoch": 0.5285907552465733, "grad_norm": 0.828125, "learning_rate": 0.0001557186155242747, "loss": 0.8585, "step": 20586 }, { "epoch": 0.5286164324424951, "grad_norm": 0.7265625, "learning_rate": 0.00015571490845097, "loss": 0.8761, "step": 20587 }, { "epoch": 0.528642109638417, "grad_norm": 0.89453125, "learning_rate": 0.0001557112012666305, "loss": 0.9071, "step": 20588 }, { "epoch": 0.5286677868343388, "grad_norm": 0.75390625, "learning_rate": 0.00015570749397126362, "loss": 0.7933, "step": 20589 }, { "epoch": 0.5286934640302606, "grad_norm": 0.765625, "learning_rate": 0.00015570378656487675, "loss": 0.9054, "step": 20590 }, { "epoch": 0.5287191412261824, "grad_norm": 0.80859375, "learning_rate": 0.00015570007904747724, "loss": 0.9242, "step": 20591 }, { "epoch": 0.5287448184221042, "grad_norm": 0.76953125, "learning_rate": 0.0001556963714190725, "loss": 0.8388, "step": 20592 }, { "epoch": 0.528770495618026, "grad_norm": 0.7890625, "learning_rate": 0.00015569266367966998, "loss": 1.0722, "step": 20593 }, { "epoch": 0.5287961728139479, "grad_norm": 0.80078125, "learning_rate": 0.000155688955829277, "loss": 1.0151, "step": 20594 }, { "epoch": 0.5288218500098697, "grad_norm": 0.74609375, "learning_rate": 0.0001556852478679009, "loss": 0.8269, "step": 20595 }, { "epoch": 0.5288475272057915, "grad_norm": 0.86328125, "learning_rate": 0.00015568153979554915, "loss": 0.9687, "step": 20596 }, { "epoch": 0.5288732044017133, "grad_norm": 0.78515625, "learning_rate": 0.00015567783161222915, "loss": 0.8521, "step": 20597 }, { "epoch": 0.5288988815976351, "grad_norm": 1.28125, "learning_rate": 0.00015567412331794826, "loss": 0.9329, "step": 20598 }, { "epoch": 0.5289245587935569, "grad_norm": 0.734375, "learning_rate": 0.00015567041491271387, "loss": 0.789, "step": 20599 }, { "epoch": 0.5289502359894788, "grad_norm": 0.859375, "learning_rate": 0.00015566670639653336, "loss": 1.0333, "step": 20600 }, { "epoch": 0.5289759131854006, "grad_norm": 0.77734375, "learning_rate": 0.00015566299776941414, "loss": 0.7858, "step": 20601 }, { "epoch": 0.5290015903813224, "grad_norm": 0.86328125, "learning_rate": 0.00015565928903136356, "loss": 0.8898, "step": 20602 }, { "epoch": 0.5290272675772443, "grad_norm": 0.84765625, "learning_rate": 0.00015565558018238907, "loss": 0.8005, "step": 20603 }, { "epoch": 0.529052944773166, "grad_norm": 0.8046875, "learning_rate": 0.00015565187122249804, "loss": 0.8657, "step": 20604 }, { "epoch": 0.5290786219690878, "grad_norm": 0.7265625, "learning_rate": 0.00015564816215169787, "loss": 0.8347, "step": 20605 }, { "epoch": 0.5291042991650097, "grad_norm": 0.81640625, "learning_rate": 0.0001556444529699959, "loss": 0.8452, "step": 20606 }, { "epoch": 0.5291299763609315, "grad_norm": 0.85546875, "learning_rate": 0.0001556407436773996, "loss": 0.8493, "step": 20607 }, { "epoch": 0.5291556535568533, "grad_norm": 1.65625, "learning_rate": 0.0001556370342739163, "loss": 0.8906, "step": 20608 }, { "epoch": 0.5291813307527752, "grad_norm": 0.80859375, "learning_rate": 0.00015563332475955344, "loss": 0.8292, "step": 20609 }, { "epoch": 0.529207007948697, "grad_norm": 0.80859375, "learning_rate": 0.00015562961513431838, "loss": 0.8687, "step": 20610 }, { "epoch": 0.5292326851446187, "grad_norm": 0.828125, "learning_rate": 0.00015562590539821853, "loss": 0.7403, "step": 20611 }, { "epoch": 0.5292583623405406, "grad_norm": 0.71875, "learning_rate": 0.00015562219555126125, "loss": 0.7196, "step": 20612 }, { "epoch": 0.5292840395364624, "grad_norm": 0.8203125, "learning_rate": 0.00015561848559345398, "loss": 0.9404, "step": 20613 }, { "epoch": 0.5293097167323843, "grad_norm": 0.7109375, "learning_rate": 0.0001556147755248041, "loss": 0.863, "step": 20614 }, { "epoch": 0.5293353939283061, "grad_norm": 0.71875, "learning_rate": 0.00015561106534531897, "loss": 0.7353, "step": 20615 }, { "epoch": 0.5293610711242279, "grad_norm": 0.8125, "learning_rate": 0.000155607355055006, "loss": 0.8851, "step": 20616 }, { "epoch": 0.5293867483201496, "grad_norm": 0.71484375, "learning_rate": 0.00015560364465387264, "loss": 0.8053, "step": 20617 }, { "epoch": 0.5294124255160715, "grad_norm": 0.76171875, "learning_rate": 0.0001555999341419262, "loss": 1.0035, "step": 20618 }, { "epoch": 0.5294381027119933, "grad_norm": 0.78125, "learning_rate": 0.00015559622351917412, "loss": 0.8971, "step": 20619 }, { "epoch": 0.5294637799079152, "grad_norm": 0.90625, "learning_rate": 0.0001555925127856238, "loss": 0.8823, "step": 20620 }, { "epoch": 0.529489457103837, "grad_norm": 0.8515625, "learning_rate": 0.00015558880194128263, "loss": 0.8682, "step": 20621 }, { "epoch": 0.5295151342997588, "grad_norm": 0.83203125, "learning_rate": 0.000155585090986158, "loss": 0.8657, "step": 20622 }, { "epoch": 0.5295408114956807, "grad_norm": 0.8046875, "learning_rate": 0.00015558137992025727, "loss": 0.9041, "step": 20623 }, { "epoch": 0.5295664886916024, "grad_norm": 0.79296875, "learning_rate": 0.0001555776687435879, "loss": 0.87, "step": 20624 }, { "epoch": 0.5295921658875242, "grad_norm": 0.734375, "learning_rate": 0.00015557395745615724, "loss": 0.9288, "step": 20625 }, { "epoch": 0.5296178430834461, "grad_norm": 0.8125, "learning_rate": 0.00015557024605797272, "loss": 0.9289, "step": 20626 }, { "epoch": 0.5296435202793679, "grad_norm": 0.71484375, "learning_rate": 0.00015556653454904172, "loss": 0.7873, "step": 20627 }, { "epoch": 0.5296691974752897, "grad_norm": 0.78515625, "learning_rate": 0.00015556282292937163, "loss": 0.8699, "step": 20628 }, { "epoch": 0.5296948746712116, "grad_norm": 0.79296875, "learning_rate": 0.00015555911119896984, "loss": 1.0027, "step": 20629 }, { "epoch": 0.5297205518671334, "grad_norm": 0.77734375, "learning_rate": 0.00015555539935784376, "loss": 0.9284, "step": 20630 }, { "epoch": 0.5297462290630551, "grad_norm": 0.7421875, "learning_rate": 0.0001555516874060008, "loss": 0.8668, "step": 20631 }, { "epoch": 0.529771906258977, "grad_norm": 0.75390625, "learning_rate": 0.00015554797534344833, "loss": 0.9629, "step": 20632 }, { "epoch": 0.5297975834548988, "grad_norm": 1.2265625, "learning_rate": 0.0001555442631701938, "loss": 0.8304, "step": 20633 }, { "epoch": 0.5298232606508206, "grad_norm": 0.87109375, "learning_rate": 0.00015554055088624454, "loss": 0.9362, "step": 20634 }, { "epoch": 0.5298489378467425, "grad_norm": 0.890625, "learning_rate": 0.000155536838491608, "loss": 0.9161, "step": 20635 }, { "epoch": 0.5298746150426643, "grad_norm": 0.8046875, "learning_rate": 0.00015553312598629154, "loss": 0.967, "step": 20636 }, { "epoch": 0.529900292238586, "grad_norm": 0.79296875, "learning_rate": 0.00015552941337030257, "loss": 0.9438, "step": 20637 }, { "epoch": 0.5299259694345079, "grad_norm": 0.71875, "learning_rate": 0.0001555257006436485, "loss": 0.8045, "step": 20638 }, { "epoch": 0.5299516466304297, "grad_norm": 0.79296875, "learning_rate": 0.00015552198780633674, "loss": 0.9449, "step": 20639 }, { "epoch": 0.5299773238263515, "grad_norm": 0.80078125, "learning_rate": 0.00015551827485837466, "loss": 0.8094, "step": 20640 }, { "epoch": 0.5300030010222734, "grad_norm": 0.7734375, "learning_rate": 0.0001555145617997697, "loss": 0.844, "step": 20641 }, { "epoch": 0.5300286782181952, "grad_norm": 0.81640625, "learning_rate": 0.0001555108486305292, "loss": 0.8829, "step": 20642 }, { "epoch": 0.530054355414117, "grad_norm": 0.79296875, "learning_rate": 0.0001555071353506606, "loss": 0.8948, "step": 20643 }, { "epoch": 0.5300800326100388, "grad_norm": 0.89453125, "learning_rate": 0.0001555034219601713, "loss": 1.1108, "step": 20644 }, { "epoch": 0.5301057098059606, "grad_norm": 0.88671875, "learning_rate": 0.00015549970845906867, "loss": 0.9515, "step": 20645 }, { "epoch": 0.5301313870018824, "grad_norm": 0.796875, "learning_rate": 0.00015549599484736016, "loss": 0.8629, "step": 20646 }, { "epoch": 0.5301570641978043, "grad_norm": 0.77734375, "learning_rate": 0.00015549228112505314, "loss": 0.8198, "step": 20647 }, { "epoch": 0.5301827413937261, "grad_norm": 0.890625, "learning_rate": 0.00015548856729215502, "loss": 0.8438, "step": 20648 }, { "epoch": 0.530208418589648, "grad_norm": 0.75, "learning_rate": 0.00015548485334867317, "loss": 0.8806, "step": 20649 }, { "epoch": 0.5302340957855698, "grad_norm": 0.765625, "learning_rate": 0.00015548113929461505, "loss": 0.9158, "step": 20650 }, { "epoch": 0.5302597729814915, "grad_norm": 0.75390625, "learning_rate": 0.00015547742512998802, "loss": 0.8464, "step": 20651 }, { "epoch": 0.5302854501774134, "grad_norm": 0.7421875, "learning_rate": 0.0001554737108547995, "loss": 0.7621, "step": 20652 }, { "epoch": 0.5303111273733352, "grad_norm": 0.74609375, "learning_rate": 0.00015546999646905688, "loss": 0.9069, "step": 20653 }, { "epoch": 0.530336804569257, "grad_norm": 0.68359375, "learning_rate": 0.00015546628197276756, "loss": 0.7756, "step": 20654 }, { "epoch": 0.5303624817651789, "grad_norm": 0.7890625, "learning_rate": 0.00015546256736593896, "loss": 1.0134, "step": 20655 }, { "epoch": 0.5303881589611007, "grad_norm": 0.7734375, "learning_rate": 0.00015545885264857848, "loss": 0.9332, "step": 20656 }, { "epoch": 0.5304138361570224, "grad_norm": 0.7734375, "learning_rate": 0.0001554551378206935, "loss": 0.8057, "step": 20657 }, { "epoch": 0.5304395133529443, "grad_norm": 0.80859375, "learning_rate": 0.00015545142288229144, "loss": 1.0179, "step": 20658 }, { "epoch": 0.5304651905488661, "grad_norm": 0.828125, "learning_rate": 0.0001554477078333797, "loss": 0.849, "step": 20659 }, { "epoch": 0.5304908677447879, "grad_norm": 0.71875, "learning_rate": 0.00015544399267396574, "loss": 0.8559, "step": 20660 }, { "epoch": 0.5305165449407098, "grad_norm": 0.79296875, "learning_rate": 0.00015544027740405685, "loss": 0.8983, "step": 20661 }, { "epoch": 0.5305422221366316, "grad_norm": 0.82421875, "learning_rate": 0.0001554365620236605, "loss": 1.0381, "step": 20662 }, { "epoch": 0.5305678993325534, "grad_norm": 0.80078125, "learning_rate": 0.0001554328465327841, "loss": 0.8997, "step": 20663 }, { "epoch": 0.5305935765284752, "grad_norm": 0.73046875, "learning_rate": 0.00015542913093143505, "loss": 0.9309, "step": 20664 }, { "epoch": 0.530619253724397, "grad_norm": 0.79296875, "learning_rate": 0.00015542541521962075, "loss": 0.7741, "step": 20665 }, { "epoch": 0.5306449309203188, "grad_norm": 0.765625, "learning_rate": 0.00015542169939734862, "loss": 0.9277, "step": 20666 }, { "epoch": 0.5306706081162407, "grad_norm": 0.70703125, "learning_rate": 0.000155417983464626, "loss": 0.7865, "step": 20667 }, { "epoch": 0.5306962853121625, "grad_norm": 0.7265625, "learning_rate": 0.0001554142674214604, "loss": 0.8929, "step": 20668 }, { "epoch": 0.5307219625080843, "grad_norm": 0.81640625, "learning_rate": 0.00015541055126785914, "loss": 0.9193, "step": 20669 }, { "epoch": 0.5307476397040061, "grad_norm": 0.7890625, "learning_rate": 0.00015540683500382964, "loss": 0.9074, "step": 20670 }, { "epoch": 0.5307733168999279, "grad_norm": 0.7578125, "learning_rate": 0.00015540311862937935, "loss": 0.7447, "step": 20671 }, { "epoch": 0.5307989940958497, "grad_norm": 0.7421875, "learning_rate": 0.00015539940214451568, "loss": 0.9456, "step": 20672 }, { "epoch": 0.5308246712917716, "grad_norm": 0.83203125, "learning_rate": 0.00015539568554924596, "loss": 0.8948, "step": 20673 }, { "epoch": 0.5308503484876934, "grad_norm": 0.73046875, "learning_rate": 0.00015539196884357763, "loss": 0.7615, "step": 20674 }, { "epoch": 0.5308760256836152, "grad_norm": 0.7265625, "learning_rate": 0.00015538825202751811, "loss": 0.8723, "step": 20675 }, { "epoch": 0.5309017028795371, "grad_norm": 0.73828125, "learning_rate": 0.00015538453510107483, "loss": 0.9188, "step": 20676 }, { "epoch": 0.5309273800754588, "grad_norm": 0.72265625, "learning_rate": 0.00015538081806425516, "loss": 0.9522, "step": 20677 }, { "epoch": 0.5309530572713806, "grad_norm": 0.73828125, "learning_rate": 0.00015537710091706653, "loss": 0.8867, "step": 20678 }, { "epoch": 0.5309787344673025, "grad_norm": 0.75, "learning_rate": 0.00015537338365951637, "loss": 0.9785, "step": 20679 }, { "epoch": 0.5310044116632243, "grad_norm": 0.7109375, "learning_rate": 0.00015536966629161204, "loss": 0.8161, "step": 20680 }, { "epoch": 0.5310300888591462, "grad_norm": 0.78515625, "learning_rate": 0.00015536594881336097, "loss": 0.9173, "step": 20681 }, { "epoch": 0.531055766055068, "grad_norm": 0.78125, "learning_rate": 0.00015536223122477054, "loss": 0.9218, "step": 20682 }, { "epoch": 0.5310814432509898, "grad_norm": 0.875, "learning_rate": 0.0001553585135258482, "loss": 0.9533, "step": 20683 }, { "epoch": 0.5311071204469116, "grad_norm": 0.87109375, "learning_rate": 0.00015535479571660132, "loss": 0.8797, "step": 20684 }, { "epoch": 0.5311327976428334, "grad_norm": 0.765625, "learning_rate": 0.00015535107779703736, "loss": 0.8884, "step": 20685 }, { "epoch": 0.5311584748387552, "grad_norm": 0.7265625, "learning_rate": 0.0001553473597671637, "loss": 0.8979, "step": 20686 }, { "epoch": 0.5311841520346771, "grad_norm": 0.8203125, "learning_rate": 0.00015534364162698777, "loss": 1.0199, "step": 20687 }, { "epoch": 0.5312098292305989, "grad_norm": 0.80859375, "learning_rate": 0.00015533992337651693, "loss": 0.9357, "step": 20688 }, { "epoch": 0.5312355064265207, "grad_norm": 0.7265625, "learning_rate": 0.0001553362050157586, "loss": 0.7848, "step": 20689 }, { "epoch": 0.5312611836224425, "grad_norm": 0.7890625, "learning_rate": 0.00015533248654472025, "loss": 0.8283, "step": 20690 }, { "epoch": 0.5312868608183643, "grad_norm": 0.734375, "learning_rate": 0.00015532876796340927, "loss": 0.7752, "step": 20691 }, { "epoch": 0.5313125380142861, "grad_norm": 0.88671875, "learning_rate": 0.00015532504927183303, "loss": 0.8483, "step": 20692 }, { "epoch": 0.531338215210208, "grad_norm": 0.78125, "learning_rate": 0.00015532133046999895, "loss": 0.9764, "step": 20693 }, { "epoch": 0.5313638924061298, "grad_norm": 0.8125, "learning_rate": 0.00015531761155791447, "loss": 0.9815, "step": 20694 }, { "epoch": 0.5313895696020516, "grad_norm": 0.76171875, "learning_rate": 0.000155313892535587, "loss": 0.9108, "step": 20695 }, { "epoch": 0.5314152467979735, "grad_norm": 0.74609375, "learning_rate": 0.00015531017340302393, "loss": 1.059, "step": 20696 }, { "epoch": 0.5314409239938952, "grad_norm": 0.74609375, "learning_rate": 0.00015530645416023264, "loss": 0.7235, "step": 20697 }, { "epoch": 0.531466601189817, "grad_norm": 0.80859375, "learning_rate": 0.0001553027348072206, "loss": 0.9177, "step": 20698 }, { "epoch": 0.5314922783857389, "grad_norm": 0.71875, "learning_rate": 0.00015529901534399524, "loss": 0.9313, "step": 20699 }, { "epoch": 0.5315179555816607, "grad_norm": 0.82421875, "learning_rate": 0.00015529529577056392, "loss": 0.9022, "step": 20700 }, { "epoch": 0.5315436327775825, "grad_norm": 0.78125, "learning_rate": 0.00015529157608693407, "loss": 0.8746, "step": 20701 }, { "epoch": 0.5315693099735044, "grad_norm": 0.796875, "learning_rate": 0.0001552878562931131, "loss": 0.9614, "step": 20702 }, { "epoch": 0.5315949871694262, "grad_norm": 0.765625, "learning_rate": 0.00015528413638910845, "loss": 0.98, "step": 20703 }, { "epoch": 0.5316206643653479, "grad_norm": 0.7890625, "learning_rate": 0.00015528041637492747, "loss": 0.897, "step": 20704 }, { "epoch": 0.5316463415612698, "grad_norm": 0.71875, "learning_rate": 0.00015527669625057763, "loss": 0.8808, "step": 20705 }, { "epoch": 0.5316720187571916, "grad_norm": 0.76171875, "learning_rate": 0.00015527297601606633, "loss": 0.8879, "step": 20706 }, { "epoch": 0.5316976959531134, "grad_norm": 0.8203125, "learning_rate": 0.00015526925567140097, "loss": 0.9366, "step": 20707 }, { "epoch": 0.5317233731490353, "grad_norm": 0.6796875, "learning_rate": 0.000155265535216589, "loss": 0.9003, "step": 20708 }, { "epoch": 0.5317490503449571, "grad_norm": 0.7265625, "learning_rate": 0.0001552618146516378, "loss": 0.8147, "step": 20709 }, { "epoch": 0.5317747275408788, "grad_norm": 0.7890625, "learning_rate": 0.0001552580939765548, "loss": 1.0481, "step": 20710 }, { "epoch": 0.5318004047368007, "grad_norm": 0.7890625, "learning_rate": 0.0001552543731913474, "loss": 0.9121, "step": 20711 }, { "epoch": 0.5318260819327225, "grad_norm": 0.80078125, "learning_rate": 0.00015525065229602303, "loss": 0.9399, "step": 20712 }, { "epoch": 0.5318517591286444, "grad_norm": 0.78515625, "learning_rate": 0.0001552469312905891, "loss": 0.9203, "step": 20713 }, { "epoch": 0.5318774363245662, "grad_norm": 0.75390625, "learning_rate": 0.00015524321017505304, "loss": 0.9404, "step": 20714 }, { "epoch": 0.531903113520488, "grad_norm": 0.75, "learning_rate": 0.0001552394889494222, "loss": 0.8072, "step": 20715 }, { "epoch": 0.5319287907164099, "grad_norm": 0.79296875, "learning_rate": 0.00015523576761370409, "loss": 0.7702, "step": 20716 }, { "epoch": 0.5319544679123316, "grad_norm": 0.78515625, "learning_rate": 0.00015523204616790608, "loss": 0.838, "step": 20717 }, { "epoch": 0.5319801451082534, "grad_norm": 0.8828125, "learning_rate": 0.00015522832461203561, "loss": 0.7991, "step": 20718 }, { "epoch": 0.5320058223041753, "grad_norm": 0.76953125, "learning_rate": 0.00015522460294610006, "loss": 0.9223, "step": 20719 }, { "epoch": 0.5320314995000971, "grad_norm": 0.76953125, "learning_rate": 0.00015522088117010687, "loss": 0.862, "step": 20720 }, { "epoch": 0.5320571766960189, "grad_norm": 0.796875, "learning_rate": 0.00015521715928406345, "loss": 1.0335, "step": 20721 }, { "epoch": 0.5320828538919408, "grad_norm": 0.78515625, "learning_rate": 0.0001552134372879772, "loss": 0.9937, "step": 20722 }, { "epoch": 0.5321085310878626, "grad_norm": 0.78125, "learning_rate": 0.00015520971518185556, "loss": 1.1018, "step": 20723 }, { "epoch": 0.5321342082837843, "grad_norm": 0.78125, "learning_rate": 0.00015520599296570597, "loss": 0.8164, "step": 20724 }, { "epoch": 0.5321598854797062, "grad_norm": 0.7890625, "learning_rate": 0.00015520227063953583, "loss": 0.8987, "step": 20725 }, { "epoch": 0.532185562675628, "grad_norm": 0.81640625, "learning_rate": 0.0001551985482033525, "loss": 0.8857, "step": 20726 }, { "epoch": 0.5322112398715498, "grad_norm": 0.7578125, "learning_rate": 0.0001551948256571635, "loss": 0.9866, "step": 20727 }, { "epoch": 0.5322369170674717, "grad_norm": 0.81640625, "learning_rate": 0.00015519110300097616, "loss": 0.8878, "step": 20728 }, { "epoch": 0.5322625942633935, "grad_norm": 0.7578125, "learning_rate": 0.00015518738023479798, "loss": 0.8518, "step": 20729 }, { "epoch": 0.5322882714593152, "grad_norm": 0.8046875, "learning_rate": 0.00015518365735863628, "loss": 0.8386, "step": 20730 }, { "epoch": 0.5323139486552371, "grad_norm": 0.72265625, "learning_rate": 0.0001551799343724986, "loss": 0.9289, "step": 20731 }, { "epoch": 0.5323396258511589, "grad_norm": 0.7578125, "learning_rate": 0.00015517621127639225, "loss": 0.9091, "step": 20732 }, { "epoch": 0.5323653030470807, "grad_norm": 0.80078125, "learning_rate": 0.00015517248807032469, "loss": 0.9061, "step": 20733 }, { "epoch": 0.5323909802430026, "grad_norm": 1.0234375, "learning_rate": 0.00015516876475430338, "loss": 0.8099, "step": 20734 }, { "epoch": 0.5324166574389244, "grad_norm": 0.76171875, "learning_rate": 0.0001551650413283357, "loss": 0.9395, "step": 20735 }, { "epoch": 0.5324423346348462, "grad_norm": 0.90234375, "learning_rate": 0.00015516131779242905, "loss": 0.9809, "step": 20736 }, { "epoch": 0.532468011830768, "grad_norm": 0.73828125, "learning_rate": 0.00015515759414659087, "loss": 0.7848, "step": 20737 }, { "epoch": 0.5324936890266898, "grad_norm": 0.81640625, "learning_rate": 0.00015515387039082864, "loss": 0.8848, "step": 20738 }, { "epoch": 0.5325193662226116, "grad_norm": 0.8125, "learning_rate": 0.00015515014652514968, "loss": 0.9869, "step": 20739 }, { "epoch": 0.5325450434185335, "grad_norm": 0.8203125, "learning_rate": 0.0001551464225495615, "loss": 1.0218, "step": 20740 }, { "epoch": 0.5325707206144553, "grad_norm": 0.75390625, "learning_rate": 0.00015514269846407142, "loss": 0.9175, "step": 20741 }, { "epoch": 0.5325963978103772, "grad_norm": 0.8359375, "learning_rate": 0.00015513897426868697, "loss": 0.9473, "step": 20742 }, { "epoch": 0.532622075006299, "grad_norm": 0.78515625, "learning_rate": 0.0001551352499634155, "loss": 0.8356, "step": 20743 }, { "epoch": 0.5326477522022207, "grad_norm": 0.828125, "learning_rate": 0.00015513152554826447, "loss": 0.9028, "step": 20744 }, { "epoch": 0.5326734293981426, "grad_norm": 0.8046875, "learning_rate": 0.0001551278010232413, "loss": 0.8701, "step": 20745 }, { "epoch": 0.5326991065940644, "grad_norm": 0.828125, "learning_rate": 0.0001551240763883534, "loss": 0.8191, "step": 20746 }, { "epoch": 0.5327247837899862, "grad_norm": 0.76953125, "learning_rate": 0.00015512035164360819, "loss": 0.9483, "step": 20747 }, { "epoch": 0.5327504609859081, "grad_norm": 0.80859375, "learning_rate": 0.0001551166267890131, "loss": 0.7808, "step": 20748 }, { "epoch": 0.5327761381818299, "grad_norm": 0.765625, "learning_rate": 0.00015511290182457555, "loss": 0.8337, "step": 20749 }, { "epoch": 0.5328018153777516, "grad_norm": 0.85546875, "learning_rate": 0.00015510917675030293, "loss": 0.8544, "step": 20750 }, { "epoch": 0.5328274925736735, "grad_norm": 0.796875, "learning_rate": 0.00015510545156620275, "loss": 0.8817, "step": 20751 }, { "epoch": 0.5328531697695953, "grad_norm": 0.7265625, "learning_rate": 0.00015510172627228238, "loss": 0.8939, "step": 20752 }, { "epoch": 0.5328788469655171, "grad_norm": 0.71484375, "learning_rate": 0.00015509800086854924, "loss": 0.9553, "step": 20753 }, { "epoch": 0.532904524161439, "grad_norm": 0.765625, "learning_rate": 0.00015509427535501073, "loss": 0.9967, "step": 20754 }, { "epoch": 0.5329302013573608, "grad_norm": 0.7265625, "learning_rate": 0.0001550905497316743, "loss": 0.7886, "step": 20755 }, { "epoch": 0.5329558785532826, "grad_norm": 0.71875, "learning_rate": 0.00015508682399854743, "loss": 0.9084, "step": 20756 }, { "epoch": 0.5329815557492044, "grad_norm": 0.87890625, "learning_rate": 0.0001550830981556375, "loss": 0.9748, "step": 20757 }, { "epoch": 0.5330072329451262, "grad_norm": 0.78125, "learning_rate": 0.0001550793722029519, "loss": 0.8642, "step": 20758 }, { "epoch": 0.533032910141048, "grad_norm": 0.8203125, "learning_rate": 0.00015507564614049808, "loss": 0.9371, "step": 20759 }, { "epoch": 0.5330585873369699, "grad_norm": 0.91015625, "learning_rate": 0.00015507191996828347, "loss": 0.7834, "step": 20760 }, { "epoch": 0.5330842645328917, "grad_norm": 0.82421875, "learning_rate": 0.00015506819368631553, "loss": 0.8519, "step": 20761 }, { "epoch": 0.5331099417288135, "grad_norm": 0.89453125, "learning_rate": 0.00015506446729460165, "loss": 0.9152, "step": 20762 }, { "epoch": 0.5331356189247354, "grad_norm": 0.8984375, "learning_rate": 0.00015506074079314923, "loss": 0.9892, "step": 20763 }, { "epoch": 0.5331612961206571, "grad_norm": 0.83203125, "learning_rate": 0.00015505701418196574, "loss": 0.872, "step": 20764 }, { "epoch": 0.5331869733165789, "grad_norm": 0.73046875, "learning_rate": 0.00015505328746105864, "loss": 0.7738, "step": 20765 }, { "epoch": 0.5332126505125008, "grad_norm": 0.80859375, "learning_rate": 0.00015504956063043524, "loss": 0.8837, "step": 20766 }, { "epoch": 0.5332383277084226, "grad_norm": 0.80859375, "learning_rate": 0.00015504583369010307, "loss": 0.9992, "step": 20767 }, { "epoch": 0.5332640049043444, "grad_norm": 0.79296875, "learning_rate": 0.00015504210664006953, "loss": 0.9999, "step": 20768 }, { "epoch": 0.5332896821002663, "grad_norm": 0.8359375, "learning_rate": 0.000155038379480342, "loss": 0.8236, "step": 20769 }, { "epoch": 0.533315359296188, "grad_norm": 0.85546875, "learning_rate": 0.000155034652210928, "loss": 0.8127, "step": 20770 }, { "epoch": 0.5333410364921098, "grad_norm": 0.828125, "learning_rate": 0.00015503092483183493, "loss": 0.9315, "step": 20771 }, { "epoch": 0.5333667136880317, "grad_norm": 0.75, "learning_rate": 0.00015502719734307015, "loss": 0.8538, "step": 20772 }, { "epoch": 0.5333923908839535, "grad_norm": 0.828125, "learning_rate": 0.00015502346974464114, "loss": 0.8196, "step": 20773 }, { "epoch": 0.5334180680798754, "grad_norm": 0.8203125, "learning_rate": 0.00015501974203655533, "loss": 0.8558, "step": 20774 }, { "epoch": 0.5334437452757972, "grad_norm": 0.734375, "learning_rate": 0.00015501601421882017, "loss": 0.9005, "step": 20775 }, { "epoch": 0.533469422471719, "grad_norm": 1.2734375, "learning_rate": 0.00015501228629144305, "loss": 0.8739, "step": 20776 }, { "epoch": 0.5334950996676407, "grad_norm": 0.83203125, "learning_rate": 0.0001550085582544314, "loss": 0.8494, "step": 20777 }, { "epoch": 0.5335207768635626, "grad_norm": 0.796875, "learning_rate": 0.00015500483010779268, "loss": 0.8298, "step": 20778 }, { "epoch": 0.5335464540594844, "grad_norm": 0.80859375, "learning_rate": 0.00015500110185153428, "loss": 0.8536, "step": 20779 }, { "epoch": 0.5335721312554063, "grad_norm": 0.78515625, "learning_rate": 0.00015499737348566365, "loss": 0.8839, "step": 20780 }, { "epoch": 0.5335978084513281, "grad_norm": 0.859375, "learning_rate": 0.00015499364501018824, "loss": 0.9582, "step": 20781 }, { "epoch": 0.5336234856472499, "grad_norm": 0.7421875, "learning_rate": 0.00015498991642511547, "loss": 0.8186, "step": 20782 }, { "epoch": 0.5336491628431718, "grad_norm": 0.7890625, "learning_rate": 0.00015498618773045273, "loss": 1.0012, "step": 20783 }, { "epoch": 0.5336748400390935, "grad_norm": 0.77734375, "learning_rate": 0.00015498245892620754, "loss": 0.8355, "step": 20784 }, { "epoch": 0.5337005172350153, "grad_norm": 0.734375, "learning_rate": 0.00015497873001238724, "loss": 0.8883, "step": 20785 }, { "epoch": 0.5337261944309372, "grad_norm": 0.890625, "learning_rate": 0.0001549750009889993, "loss": 0.9519, "step": 20786 }, { "epoch": 0.533751871626859, "grad_norm": 0.7734375, "learning_rate": 0.00015497127185605113, "loss": 1.0116, "step": 20787 }, { "epoch": 0.5337775488227808, "grad_norm": 0.8125, "learning_rate": 0.00015496754261355022, "loss": 0.8576, "step": 20788 }, { "epoch": 0.5338032260187027, "grad_norm": 0.7265625, "learning_rate": 0.00015496381326150394, "loss": 0.7942, "step": 20789 }, { "epoch": 0.5338289032146244, "grad_norm": 0.71484375, "learning_rate": 0.00015496008379991974, "loss": 0.6974, "step": 20790 }, { "epoch": 0.5338545804105462, "grad_norm": 0.8203125, "learning_rate": 0.00015495635422880507, "loss": 0.8574, "step": 20791 }, { "epoch": 0.5338802576064681, "grad_norm": 0.8515625, "learning_rate": 0.00015495262454816734, "loss": 0.8841, "step": 20792 }, { "epoch": 0.5339059348023899, "grad_norm": 0.7890625, "learning_rate": 0.00015494889475801399, "loss": 0.7962, "step": 20793 }, { "epoch": 0.5339316119983117, "grad_norm": 0.7578125, "learning_rate": 0.00015494516485835248, "loss": 0.8294, "step": 20794 }, { "epoch": 0.5339572891942336, "grad_norm": 0.80078125, "learning_rate": 0.0001549414348491902, "loss": 0.8748, "step": 20795 }, { "epoch": 0.5339829663901554, "grad_norm": 0.828125, "learning_rate": 0.00015493770473053463, "loss": 0.9014, "step": 20796 }, { "epoch": 0.5340086435860771, "grad_norm": 0.890625, "learning_rate": 0.00015493397450239314, "loss": 0.8615, "step": 20797 }, { "epoch": 0.534034320781999, "grad_norm": 0.7890625, "learning_rate": 0.0001549302441647732, "loss": 0.8761, "step": 20798 }, { "epoch": 0.5340599979779208, "grad_norm": 0.81640625, "learning_rate": 0.00015492651371768227, "loss": 0.9473, "step": 20799 }, { "epoch": 0.5340856751738426, "grad_norm": 0.80859375, "learning_rate": 0.00015492278316112774, "loss": 0.7995, "step": 20800 }, { "epoch": 0.5341113523697645, "grad_norm": 0.83203125, "learning_rate": 0.00015491905249511708, "loss": 0.7894, "step": 20801 }, { "epoch": 0.5341370295656863, "grad_norm": 0.7734375, "learning_rate": 0.0001549153217196577, "loss": 0.797, "step": 20802 }, { "epoch": 0.5341627067616082, "grad_norm": 0.73046875, "learning_rate": 0.00015491159083475705, "loss": 0.953, "step": 20803 }, { "epoch": 0.5341883839575299, "grad_norm": 0.76953125, "learning_rate": 0.00015490785984042258, "loss": 0.8799, "step": 20804 }, { "epoch": 0.5342140611534517, "grad_norm": 0.75, "learning_rate": 0.0001549041287366617, "loss": 0.8625, "step": 20805 }, { "epoch": 0.5342397383493735, "grad_norm": 0.76953125, "learning_rate": 0.00015490039752348182, "loss": 0.8067, "step": 20806 }, { "epoch": 0.5342654155452954, "grad_norm": 0.79296875, "learning_rate": 0.00015489666620089042, "loss": 0.8176, "step": 20807 }, { "epoch": 0.5342910927412172, "grad_norm": 0.66796875, "learning_rate": 0.0001548929347688949, "loss": 0.8567, "step": 20808 }, { "epoch": 0.5343167699371391, "grad_norm": 0.7421875, "learning_rate": 0.00015488920322750277, "loss": 0.9285, "step": 20809 }, { "epoch": 0.5343424471330608, "grad_norm": 0.80859375, "learning_rate": 0.0001548854715767214, "loss": 0.8604, "step": 20810 }, { "epoch": 0.5343681243289826, "grad_norm": 0.859375, "learning_rate": 0.00015488173981655822, "loss": 0.8329, "step": 20811 }, { "epoch": 0.5343938015249045, "grad_norm": 0.703125, "learning_rate": 0.0001548780079470207, "loss": 0.8646, "step": 20812 }, { "epoch": 0.5344194787208263, "grad_norm": 0.765625, "learning_rate": 0.00015487427596811627, "loss": 0.8358, "step": 20813 }, { "epoch": 0.5344451559167481, "grad_norm": 0.84765625, "learning_rate": 0.00015487054387985236, "loss": 1.0763, "step": 20814 }, { "epoch": 0.53447083311267, "grad_norm": 0.81640625, "learning_rate": 0.0001548668116822364, "loss": 0.9413, "step": 20815 }, { "epoch": 0.5344965103085918, "grad_norm": 1.1953125, "learning_rate": 0.0001548630793752759, "loss": 0.9197, "step": 20816 }, { "epoch": 0.5345221875045135, "grad_norm": 0.91796875, "learning_rate": 0.00015485934695897818, "loss": 0.942, "step": 20817 }, { "epoch": 0.5345478647004354, "grad_norm": 0.80078125, "learning_rate": 0.00015485561443335074, "loss": 0.9375, "step": 20818 }, { "epoch": 0.5345735418963572, "grad_norm": 0.8515625, "learning_rate": 0.00015485188179840105, "loss": 0.91, "step": 20819 }, { "epoch": 0.534599219092279, "grad_norm": 0.91796875, "learning_rate": 0.00015484814905413648, "loss": 0.8035, "step": 20820 }, { "epoch": 0.5346248962882009, "grad_norm": 0.71875, "learning_rate": 0.0001548444162005645, "loss": 0.8625, "step": 20821 }, { "epoch": 0.5346505734841227, "grad_norm": 0.7421875, "learning_rate": 0.00015484068323769255, "loss": 0.928, "step": 20822 }, { "epoch": 0.5346762506800445, "grad_norm": 0.77734375, "learning_rate": 0.00015483695016552812, "loss": 0.9438, "step": 20823 }, { "epoch": 0.5347019278759663, "grad_norm": 0.78515625, "learning_rate": 0.00015483321698407857, "loss": 0.8013, "step": 20824 }, { "epoch": 0.5347276050718881, "grad_norm": 0.82421875, "learning_rate": 0.0001548294836933514, "loss": 0.7706, "step": 20825 }, { "epoch": 0.5347532822678099, "grad_norm": 0.734375, "learning_rate": 0.00015482575029335399, "loss": 0.7959, "step": 20826 }, { "epoch": 0.5347789594637318, "grad_norm": 0.7734375, "learning_rate": 0.0001548220167840938, "loss": 0.9671, "step": 20827 }, { "epoch": 0.5348046366596536, "grad_norm": 0.77734375, "learning_rate": 0.00015481828316557831, "loss": 0.9241, "step": 20828 }, { "epoch": 0.5348303138555754, "grad_norm": 0.75390625, "learning_rate": 0.0001548145494378149, "loss": 0.8794, "step": 20829 }, { "epoch": 0.5348559910514972, "grad_norm": 0.83203125, "learning_rate": 0.0001548108156008111, "loss": 0.9337, "step": 20830 }, { "epoch": 0.534881668247419, "grad_norm": 0.80078125, "learning_rate": 0.00015480708165457427, "loss": 0.8885, "step": 20831 }, { "epoch": 0.5349073454433408, "grad_norm": 0.78515625, "learning_rate": 0.00015480334759911186, "loss": 0.8132, "step": 20832 }, { "epoch": 0.5349330226392627, "grad_norm": 0.7734375, "learning_rate": 0.00015479961343443135, "loss": 0.9884, "step": 20833 }, { "epoch": 0.5349586998351845, "grad_norm": 0.82421875, "learning_rate": 0.00015479587916054013, "loss": 0.9417, "step": 20834 }, { "epoch": 0.5349843770311064, "grad_norm": 0.79296875, "learning_rate": 0.00015479214477744568, "loss": 0.8627, "step": 20835 }, { "epoch": 0.5350100542270282, "grad_norm": 0.87109375, "learning_rate": 0.00015478841028515548, "loss": 0.9295, "step": 20836 }, { "epoch": 0.5350357314229499, "grad_norm": 0.82421875, "learning_rate": 0.00015478467568367688, "loss": 0.8874, "step": 20837 }, { "epoch": 0.5350614086188717, "grad_norm": 0.82421875, "learning_rate": 0.0001547809409730174, "loss": 0.9747, "step": 20838 }, { "epoch": 0.5350870858147936, "grad_norm": 0.7265625, "learning_rate": 0.00015477720615318443, "loss": 0.8557, "step": 20839 }, { "epoch": 0.5351127630107154, "grad_norm": 0.74609375, "learning_rate": 0.0001547734712241854, "loss": 0.8096, "step": 20840 }, { "epoch": 0.5351384402066373, "grad_norm": 0.765625, "learning_rate": 0.00015476973618602785, "loss": 0.9611, "step": 20841 }, { "epoch": 0.5351641174025591, "grad_norm": 0.85546875, "learning_rate": 0.00015476600103871916, "loss": 1.0574, "step": 20842 }, { "epoch": 0.5351897945984809, "grad_norm": 0.7890625, "learning_rate": 0.00015476226578226677, "loss": 0.9441, "step": 20843 }, { "epoch": 0.5352154717944027, "grad_norm": 0.71484375, "learning_rate": 0.00015475853041667815, "loss": 0.8065, "step": 20844 }, { "epoch": 0.5352411489903245, "grad_norm": 0.65625, "learning_rate": 0.00015475479494196068, "loss": 0.7885, "step": 20845 }, { "epoch": 0.5352668261862463, "grad_norm": 0.84375, "learning_rate": 0.00015475105935812186, "loss": 0.8353, "step": 20846 }, { "epoch": 0.5352925033821682, "grad_norm": 0.7265625, "learning_rate": 0.00015474732366516913, "loss": 0.8313, "step": 20847 }, { "epoch": 0.53531818057809, "grad_norm": 0.8046875, "learning_rate": 0.0001547435878631099, "loss": 0.8021, "step": 20848 }, { "epoch": 0.5353438577740118, "grad_norm": 0.74609375, "learning_rate": 0.0001547398519519517, "loss": 0.8646, "step": 20849 }, { "epoch": 0.5353695349699336, "grad_norm": 0.765625, "learning_rate": 0.00015473611593170192, "loss": 0.7875, "step": 20850 }, { "epoch": 0.5353952121658554, "grad_norm": 0.8359375, "learning_rate": 0.00015473237980236798, "loss": 0.8829, "step": 20851 }, { "epoch": 0.5354208893617772, "grad_norm": 0.796875, "learning_rate": 0.00015472864356395736, "loss": 0.9712, "step": 20852 }, { "epoch": 0.5354465665576991, "grad_norm": 0.765625, "learning_rate": 0.00015472490721647747, "loss": 0.766, "step": 20853 }, { "epoch": 0.5354722437536209, "grad_norm": 0.72265625, "learning_rate": 0.0001547211707599358, "loss": 0.7529, "step": 20854 }, { "epoch": 0.5354979209495427, "grad_norm": 0.83203125, "learning_rate": 0.00015471743419433976, "loss": 0.9071, "step": 20855 }, { "epoch": 0.5355235981454646, "grad_norm": 0.78125, "learning_rate": 0.00015471369751969688, "loss": 0.7046, "step": 20856 }, { "epoch": 0.5355492753413863, "grad_norm": 0.78515625, "learning_rate": 0.0001547099607360145, "loss": 0.7882, "step": 20857 }, { "epoch": 0.5355749525373081, "grad_norm": 0.765625, "learning_rate": 0.0001547062238433001, "loss": 0.9125, "step": 20858 }, { "epoch": 0.53560062973323, "grad_norm": 0.859375, "learning_rate": 0.00015470248684156115, "loss": 0.8014, "step": 20859 }, { "epoch": 0.5356263069291518, "grad_norm": 0.765625, "learning_rate": 0.0001546987497308051, "loss": 0.9139, "step": 20860 }, { "epoch": 0.5356519841250736, "grad_norm": 0.76953125, "learning_rate": 0.00015469501251103935, "loss": 0.8096, "step": 20861 }, { "epoch": 0.5356776613209955, "grad_norm": 0.76953125, "learning_rate": 0.0001546912751822714, "loss": 0.8568, "step": 20862 }, { "epoch": 0.5357033385169172, "grad_norm": 0.765625, "learning_rate": 0.00015468753774450868, "loss": 0.9645, "step": 20863 }, { "epoch": 0.535729015712839, "grad_norm": 0.7734375, "learning_rate": 0.00015468380019775864, "loss": 0.9864, "step": 20864 }, { "epoch": 0.5357546929087609, "grad_norm": 0.8203125, "learning_rate": 0.00015468006254202871, "loss": 0.9454, "step": 20865 }, { "epoch": 0.5357803701046827, "grad_norm": 0.78515625, "learning_rate": 0.00015467632477732636, "loss": 0.9537, "step": 20866 }, { "epoch": 0.5358060473006045, "grad_norm": 0.67578125, "learning_rate": 0.00015467258690365904, "loss": 0.642, "step": 20867 }, { "epoch": 0.5358317244965264, "grad_norm": 0.7421875, "learning_rate": 0.0001546688489210342, "loss": 0.8644, "step": 20868 }, { "epoch": 0.5358574016924482, "grad_norm": 0.8515625, "learning_rate": 0.00015466511082945928, "loss": 0.985, "step": 20869 }, { "epoch": 0.53588307888837, "grad_norm": 0.75390625, "learning_rate": 0.00015466137262894173, "loss": 0.7915, "step": 20870 }, { "epoch": 0.5359087560842918, "grad_norm": 0.94140625, "learning_rate": 0.000154657634319489, "loss": 0.9836, "step": 20871 }, { "epoch": 0.5359344332802136, "grad_norm": 0.8203125, "learning_rate": 0.00015465389590110852, "loss": 0.8215, "step": 20872 }, { "epoch": 0.5359601104761355, "grad_norm": 0.76171875, "learning_rate": 0.0001546501573738078, "loss": 0.9579, "step": 20873 }, { "epoch": 0.5359857876720573, "grad_norm": 0.78125, "learning_rate": 0.00015464641873759422, "loss": 0.7696, "step": 20874 }, { "epoch": 0.5360114648679791, "grad_norm": 0.80859375, "learning_rate": 0.00015464267999247525, "loss": 0.9687, "step": 20875 }, { "epoch": 0.536037142063901, "grad_norm": 0.75, "learning_rate": 0.00015463894113845842, "loss": 0.977, "step": 20876 }, { "epoch": 0.5360628192598227, "grad_norm": 0.734375, "learning_rate": 0.00015463520217555107, "loss": 0.8374, "step": 20877 }, { "epoch": 0.5360884964557445, "grad_norm": 0.72265625, "learning_rate": 0.0001546314631037607, "loss": 0.8589, "step": 20878 }, { "epoch": 0.5361141736516664, "grad_norm": 0.71484375, "learning_rate": 0.00015462772392309476, "loss": 0.8301, "step": 20879 }, { "epoch": 0.5361398508475882, "grad_norm": 0.8125, "learning_rate": 0.00015462398463356072, "loss": 0.899, "step": 20880 }, { "epoch": 0.53616552804351, "grad_norm": 0.84765625, "learning_rate": 0.00015462024523516598, "loss": 0.7932, "step": 20881 }, { "epoch": 0.5361912052394319, "grad_norm": 0.75, "learning_rate": 0.00015461650572791804, "loss": 0.7927, "step": 20882 }, { "epoch": 0.5362168824353536, "grad_norm": 0.796875, "learning_rate": 0.00015461276611182436, "loss": 0.9274, "step": 20883 }, { "epoch": 0.5362425596312754, "grad_norm": 0.76953125, "learning_rate": 0.00015460902638689237, "loss": 0.9082, "step": 20884 }, { "epoch": 0.5362682368271973, "grad_norm": 0.7421875, "learning_rate": 0.00015460528655312947, "loss": 0.7624, "step": 20885 }, { "epoch": 0.5362939140231191, "grad_norm": 0.7421875, "learning_rate": 0.0001546015466105432, "loss": 0.7452, "step": 20886 }, { "epoch": 0.5363195912190409, "grad_norm": 0.77734375, "learning_rate": 0.000154597806559141, "loss": 0.8084, "step": 20887 }, { "epoch": 0.5363452684149628, "grad_norm": 0.734375, "learning_rate": 0.00015459406639893025, "loss": 0.871, "step": 20888 }, { "epoch": 0.5363709456108846, "grad_norm": 0.69921875, "learning_rate": 0.00015459032612991853, "loss": 0.9271, "step": 20889 }, { "epoch": 0.5363966228068063, "grad_norm": 0.80078125, "learning_rate": 0.00015458658575211317, "loss": 0.9314, "step": 20890 }, { "epoch": 0.5364223000027282, "grad_norm": 0.85546875, "learning_rate": 0.0001545828452655217, "loss": 0.8698, "step": 20891 }, { "epoch": 0.53644797719865, "grad_norm": 0.765625, "learning_rate": 0.00015457910467015152, "loss": 0.8385, "step": 20892 }, { "epoch": 0.5364736543945718, "grad_norm": 0.86328125, "learning_rate": 0.00015457536396601014, "loss": 0.8892, "step": 20893 }, { "epoch": 0.5364993315904937, "grad_norm": 0.875, "learning_rate": 0.00015457162315310499, "loss": 1.0273, "step": 20894 }, { "epoch": 0.5365250087864155, "grad_norm": 0.85546875, "learning_rate": 0.00015456788223144348, "loss": 0.8105, "step": 20895 }, { "epoch": 0.5365506859823373, "grad_norm": 0.890625, "learning_rate": 0.00015456414120103317, "loss": 0.8784, "step": 20896 }, { "epoch": 0.5365763631782591, "grad_norm": 0.73046875, "learning_rate": 0.00015456040006188145, "loss": 0.8026, "step": 20897 }, { "epoch": 0.5366020403741809, "grad_norm": 0.83203125, "learning_rate": 0.00015455665881399574, "loss": 0.9281, "step": 20898 }, { "epoch": 0.5366277175701027, "grad_norm": 0.765625, "learning_rate": 0.00015455291745738355, "loss": 0.8806, "step": 20899 }, { "epoch": 0.5366533947660246, "grad_norm": 0.828125, "learning_rate": 0.00015454917599205234, "loss": 0.9292, "step": 20900 }, { "epoch": 0.5366790719619464, "grad_norm": 0.7890625, "learning_rate": 0.00015454543441800956, "loss": 0.9494, "step": 20901 }, { "epoch": 0.5367047491578683, "grad_norm": 0.80078125, "learning_rate": 0.0001545416927352626, "loss": 0.9284, "step": 20902 }, { "epoch": 0.53673042635379, "grad_norm": 0.7734375, "learning_rate": 0.00015453795094381905, "loss": 0.8787, "step": 20903 }, { "epoch": 0.5367561035497118, "grad_norm": 0.7265625, "learning_rate": 0.00015453420904368623, "loss": 0.9056, "step": 20904 }, { "epoch": 0.5367817807456337, "grad_norm": 0.70703125, "learning_rate": 0.00015453046703487169, "loss": 0.7635, "step": 20905 }, { "epoch": 0.5368074579415555, "grad_norm": 0.7421875, "learning_rate": 0.0001545267249173828, "loss": 0.8092, "step": 20906 }, { "epoch": 0.5368331351374773, "grad_norm": 0.80078125, "learning_rate": 0.00015452298269122715, "loss": 0.8991, "step": 20907 }, { "epoch": 0.5368588123333992, "grad_norm": 0.76171875, "learning_rate": 0.0001545192403564121, "loss": 0.7684, "step": 20908 }, { "epoch": 0.536884489529321, "grad_norm": 0.796875, "learning_rate": 0.0001545154979129451, "loss": 0.8338, "step": 20909 }, { "epoch": 0.5369101667252427, "grad_norm": 0.8125, "learning_rate": 0.00015451175536083367, "loss": 0.9107, "step": 20910 }, { "epoch": 0.5369358439211646, "grad_norm": 0.7578125, "learning_rate": 0.0001545080127000852, "loss": 0.8354, "step": 20911 }, { "epoch": 0.5369615211170864, "grad_norm": 0.80078125, "learning_rate": 0.0001545042699307072, "loss": 0.8845, "step": 20912 }, { "epoch": 0.5369871983130082, "grad_norm": 0.71875, "learning_rate": 0.00015450052705270713, "loss": 0.9342, "step": 20913 }, { "epoch": 0.5370128755089301, "grad_norm": 0.7890625, "learning_rate": 0.0001544967840660924, "loss": 1.0777, "step": 20914 }, { "epoch": 0.5370385527048519, "grad_norm": 0.84765625, "learning_rate": 0.0001544930409708705, "loss": 0.9867, "step": 20915 }, { "epoch": 0.5370642299007737, "grad_norm": 0.87109375, "learning_rate": 0.00015448929776704895, "loss": 0.8698, "step": 20916 }, { "epoch": 0.5370899070966955, "grad_norm": 0.77734375, "learning_rate": 0.00015448555445463514, "loss": 0.8666, "step": 20917 }, { "epoch": 0.5371155842926173, "grad_norm": 0.8671875, "learning_rate": 0.00015448181103363648, "loss": 0.9343, "step": 20918 }, { "epoch": 0.5371412614885391, "grad_norm": 0.78515625, "learning_rate": 0.00015447806750406052, "loss": 0.9945, "step": 20919 }, { "epoch": 0.537166938684461, "grad_norm": 0.80078125, "learning_rate": 0.0001544743238659147, "loss": 0.7972, "step": 20920 }, { "epoch": 0.5371926158803828, "grad_norm": 0.74609375, "learning_rate": 0.00015447058011920647, "loss": 0.7823, "step": 20921 }, { "epoch": 0.5372182930763046, "grad_norm": 0.78125, "learning_rate": 0.00015446683626394331, "loss": 0.7898, "step": 20922 }, { "epoch": 0.5372439702722264, "grad_norm": 0.71484375, "learning_rate": 0.00015446309230013268, "loss": 0.8918, "step": 20923 }, { "epoch": 0.5372696474681482, "grad_norm": 0.80078125, "learning_rate": 0.000154459348227782, "loss": 0.8591, "step": 20924 }, { "epoch": 0.53729532466407, "grad_norm": 0.796875, "learning_rate": 0.00015445560404689875, "loss": 0.8955, "step": 20925 }, { "epoch": 0.5373210018599919, "grad_norm": 0.7578125, "learning_rate": 0.00015445185975749038, "loss": 0.8706, "step": 20926 }, { "epoch": 0.5373466790559137, "grad_norm": 0.90625, "learning_rate": 0.0001544481153595644, "loss": 1.0065, "step": 20927 }, { "epoch": 0.5373723562518355, "grad_norm": 0.79296875, "learning_rate": 0.00015444437085312824, "loss": 0.7767, "step": 20928 }, { "epoch": 0.5373980334477574, "grad_norm": 0.671875, "learning_rate": 0.0001544406262381894, "loss": 0.7959, "step": 20929 }, { "epoch": 0.5374237106436791, "grad_norm": 0.7421875, "learning_rate": 0.00015443688151475527, "loss": 0.9112, "step": 20930 }, { "epoch": 0.537449387839601, "grad_norm": 0.7421875, "learning_rate": 0.00015443313668283336, "loss": 0.9134, "step": 20931 }, { "epoch": 0.5374750650355228, "grad_norm": 0.78515625, "learning_rate": 0.00015442939174243115, "loss": 0.815, "step": 20932 }, { "epoch": 0.5375007422314446, "grad_norm": 0.71484375, "learning_rate": 0.00015442564669355605, "loss": 0.8717, "step": 20933 }, { "epoch": 0.5375264194273665, "grad_norm": 0.7734375, "learning_rate": 0.00015442190153621554, "loss": 0.7637, "step": 20934 }, { "epoch": 0.5375520966232883, "grad_norm": 0.8359375, "learning_rate": 0.00015441815627041712, "loss": 0.8924, "step": 20935 }, { "epoch": 0.5375777738192101, "grad_norm": 0.734375, "learning_rate": 0.0001544144108961682, "loss": 0.7971, "step": 20936 }, { "epoch": 0.5376034510151319, "grad_norm": 1.0546875, "learning_rate": 0.00015441066541347633, "loss": 0.8612, "step": 20937 }, { "epoch": 0.5376291282110537, "grad_norm": 0.90234375, "learning_rate": 0.00015440691982234887, "loss": 0.959, "step": 20938 }, { "epoch": 0.5376548054069755, "grad_norm": 0.76953125, "learning_rate": 0.00015440317412279336, "loss": 0.9087, "step": 20939 }, { "epoch": 0.5376804826028974, "grad_norm": 0.7734375, "learning_rate": 0.00015439942831481723, "loss": 0.8434, "step": 20940 }, { "epoch": 0.5377061597988192, "grad_norm": 0.81640625, "learning_rate": 0.00015439568239842795, "loss": 0.9749, "step": 20941 }, { "epoch": 0.537731836994741, "grad_norm": 0.81640625, "learning_rate": 0.00015439193637363302, "loss": 0.964, "step": 20942 }, { "epoch": 0.5377575141906628, "grad_norm": 0.828125, "learning_rate": 0.00015438819024043982, "loss": 0.9473, "step": 20943 }, { "epoch": 0.5377831913865846, "grad_norm": 0.859375, "learning_rate": 0.0001543844439988559, "loss": 1.0628, "step": 20944 }, { "epoch": 0.5378088685825064, "grad_norm": 0.78125, "learning_rate": 0.0001543806976488887, "loss": 0.9148, "step": 20945 }, { "epoch": 0.5378345457784283, "grad_norm": 0.96484375, "learning_rate": 0.00015437695119054567, "loss": 0.8767, "step": 20946 }, { "epoch": 0.5378602229743501, "grad_norm": 0.765625, "learning_rate": 0.00015437320462383428, "loss": 0.8706, "step": 20947 }, { "epoch": 0.5378859001702719, "grad_norm": 0.75, "learning_rate": 0.000154369457948762, "loss": 0.762, "step": 20948 }, { "epoch": 0.5379115773661938, "grad_norm": 0.734375, "learning_rate": 0.00015436571116533637, "loss": 0.9806, "step": 20949 }, { "epoch": 0.5379372545621155, "grad_norm": 0.75, "learning_rate": 0.00015436196427356472, "loss": 0.8117, "step": 20950 }, { "epoch": 0.5379629317580373, "grad_norm": 0.7890625, "learning_rate": 0.0001543582172734546, "loss": 0.8062, "step": 20951 }, { "epoch": 0.5379886089539592, "grad_norm": 0.78125, "learning_rate": 0.00015435447016501344, "loss": 0.8699, "step": 20952 }, { "epoch": 0.538014286149881, "grad_norm": 0.82421875, "learning_rate": 0.00015435072294824878, "loss": 0.8825, "step": 20953 }, { "epoch": 0.5380399633458028, "grad_norm": 0.7109375, "learning_rate": 0.000154346975623168, "loss": 0.8058, "step": 20954 }, { "epoch": 0.5380656405417247, "grad_norm": 0.74609375, "learning_rate": 0.00015434322818977863, "loss": 0.8851, "step": 20955 }, { "epoch": 0.5380913177376465, "grad_norm": 0.78515625, "learning_rate": 0.0001543394806480881, "loss": 0.889, "step": 20956 }, { "epoch": 0.5381169949335682, "grad_norm": 0.80078125, "learning_rate": 0.00015433573299810388, "loss": 1.0633, "step": 20957 }, { "epoch": 0.5381426721294901, "grad_norm": 0.77734375, "learning_rate": 0.0001543319852398335, "loss": 0.9145, "step": 20958 }, { "epoch": 0.5381683493254119, "grad_norm": 0.7109375, "learning_rate": 0.00015432823737328435, "loss": 0.8231, "step": 20959 }, { "epoch": 0.5381940265213337, "grad_norm": 0.7890625, "learning_rate": 0.00015432448939846393, "loss": 0.9007, "step": 20960 }, { "epoch": 0.5382197037172556, "grad_norm": 0.7890625, "learning_rate": 0.00015432074131537971, "loss": 0.8029, "step": 20961 }, { "epoch": 0.5382453809131774, "grad_norm": 0.81640625, "learning_rate": 0.0001543169931240392, "loss": 0.9901, "step": 20962 }, { "epoch": 0.5382710581090991, "grad_norm": 0.7421875, "learning_rate": 0.00015431324482444975, "loss": 0.9724, "step": 20963 }, { "epoch": 0.538296735305021, "grad_norm": 0.87109375, "learning_rate": 0.00015430949641661896, "loss": 0.8547, "step": 20964 }, { "epoch": 0.5383224125009428, "grad_norm": 0.75, "learning_rate": 0.00015430574790055424, "loss": 0.9452, "step": 20965 }, { "epoch": 0.5383480896968647, "grad_norm": 0.75, "learning_rate": 0.00015430199927626305, "loss": 0.901, "step": 20966 }, { "epoch": 0.5383737668927865, "grad_norm": 0.8046875, "learning_rate": 0.0001542982505437529, "loss": 0.8404, "step": 20967 }, { "epoch": 0.5383994440887083, "grad_norm": 0.69921875, "learning_rate": 0.00015429450170303128, "loss": 0.9647, "step": 20968 }, { "epoch": 0.5384251212846302, "grad_norm": 0.765625, "learning_rate": 0.00015429075275410556, "loss": 0.8328, "step": 20969 }, { "epoch": 0.5384507984805519, "grad_norm": 0.76953125, "learning_rate": 0.00015428700369698333, "loss": 0.8174, "step": 20970 }, { "epoch": 0.5384764756764737, "grad_norm": 0.74609375, "learning_rate": 0.00015428325453167196, "loss": 0.8834, "step": 20971 }, { "epoch": 0.5385021528723956, "grad_norm": 0.73046875, "learning_rate": 0.00015427950525817897, "loss": 0.8318, "step": 20972 }, { "epoch": 0.5385278300683174, "grad_norm": 0.8203125, "learning_rate": 0.00015427575587651182, "loss": 0.9911, "step": 20973 }, { "epoch": 0.5385535072642392, "grad_norm": 0.8359375, "learning_rate": 0.00015427200638667803, "loss": 0.9027, "step": 20974 }, { "epoch": 0.5385791844601611, "grad_norm": 0.7890625, "learning_rate": 0.000154268256788685, "loss": 0.7324, "step": 20975 }, { "epoch": 0.5386048616560829, "grad_norm": 0.76953125, "learning_rate": 0.00015426450708254028, "loss": 0.9135, "step": 20976 }, { "epoch": 0.5386305388520046, "grad_norm": 0.81640625, "learning_rate": 0.00015426075726825126, "loss": 0.9324, "step": 20977 }, { "epoch": 0.5386562160479265, "grad_norm": 0.84765625, "learning_rate": 0.00015425700734582546, "loss": 0.9445, "step": 20978 }, { "epoch": 0.5386818932438483, "grad_norm": 0.765625, "learning_rate": 0.00015425325731527033, "loss": 0.8899, "step": 20979 }, { "epoch": 0.5387075704397701, "grad_norm": 0.8046875, "learning_rate": 0.00015424950717659339, "loss": 0.952, "step": 20980 }, { "epoch": 0.538733247635692, "grad_norm": 0.765625, "learning_rate": 0.00015424575692980208, "loss": 0.8458, "step": 20981 }, { "epoch": 0.5387589248316138, "grad_norm": 0.796875, "learning_rate": 0.00015424200657490386, "loss": 0.7843, "step": 20982 }, { "epoch": 0.5387846020275355, "grad_norm": 0.76171875, "learning_rate": 0.00015423825611190624, "loss": 1.0267, "step": 20983 }, { "epoch": 0.5388102792234574, "grad_norm": 0.796875, "learning_rate": 0.00015423450554081662, "loss": 0.8919, "step": 20984 }, { "epoch": 0.5388359564193792, "grad_norm": 0.76953125, "learning_rate": 0.0001542307548616426, "loss": 0.8848, "step": 20985 }, { "epoch": 0.538861633615301, "grad_norm": 0.8046875, "learning_rate": 0.00015422700407439154, "loss": 0.8318, "step": 20986 }, { "epoch": 0.5388873108112229, "grad_norm": 0.8046875, "learning_rate": 0.00015422325317907096, "loss": 0.8626, "step": 20987 }, { "epoch": 0.5389129880071447, "grad_norm": 0.82421875, "learning_rate": 0.00015421950217568836, "loss": 1.0942, "step": 20988 }, { "epoch": 0.5389386652030665, "grad_norm": 0.77734375, "learning_rate": 0.00015421575106425118, "loss": 0.9535, "step": 20989 }, { "epoch": 0.5389643423989883, "grad_norm": 0.7421875, "learning_rate": 0.00015421199984476692, "loss": 0.8091, "step": 20990 }, { "epoch": 0.5389900195949101, "grad_norm": 0.703125, "learning_rate": 0.000154208248517243, "loss": 0.8262, "step": 20991 }, { "epoch": 0.5390156967908319, "grad_norm": 0.73828125, "learning_rate": 0.00015420449708168697, "loss": 0.755, "step": 20992 }, { "epoch": 0.5390413739867538, "grad_norm": 0.7734375, "learning_rate": 0.00015420074553810627, "loss": 0.828, "step": 20993 }, { "epoch": 0.5390670511826756, "grad_norm": 0.75390625, "learning_rate": 0.00015419699388650836, "loss": 0.8397, "step": 20994 }, { "epoch": 0.5390927283785975, "grad_norm": 0.765625, "learning_rate": 0.00015419324212690078, "loss": 0.8304, "step": 20995 }, { "epoch": 0.5391184055745193, "grad_norm": 0.79296875, "learning_rate": 0.00015418949025929092, "loss": 0.9304, "step": 20996 }, { "epoch": 0.539144082770441, "grad_norm": 0.8515625, "learning_rate": 0.0001541857382836863, "loss": 0.7664, "step": 20997 }, { "epoch": 0.5391697599663628, "grad_norm": 0.82421875, "learning_rate": 0.00015418198620009444, "loss": 0.8932, "step": 20998 }, { "epoch": 0.5391954371622847, "grad_norm": 0.80078125, "learning_rate": 0.00015417823400852272, "loss": 0.8884, "step": 20999 }, { "epoch": 0.5392211143582065, "grad_norm": 0.75390625, "learning_rate": 0.0001541744817089787, "loss": 0.817, "step": 21000 }, { "epoch": 0.5392211143582065, "eval_loss": 0.8762781023979187, "eval_model_preparation_time": 0.0065, "eval_runtime": 409.2644, "eval_samples_per_second": 24.434, "eval_steps_per_second": 0.765, "step": 21000 }, { "epoch": 0.5392467915541284, "grad_norm": 0.86328125, "learning_rate": 0.00015417072930146985, "loss": 0.8817, "step": 21001 }, { "epoch": 0.5392724687500502, "grad_norm": 0.7578125, "learning_rate": 0.00015416697678600363, "loss": 0.9497, "step": 21002 }, { "epoch": 0.5392981459459719, "grad_norm": 0.796875, "learning_rate": 0.0001541632241625875, "loss": 1.0918, "step": 21003 }, { "epoch": 0.5393238231418938, "grad_norm": 0.734375, "learning_rate": 0.00015415947143122895, "loss": 0.9293, "step": 21004 }, { "epoch": 0.5393495003378156, "grad_norm": 0.80078125, "learning_rate": 0.00015415571859193547, "loss": 0.8585, "step": 21005 }, { "epoch": 0.5393751775337374, "grad_norm": 0.77734375, "learning_rate": 0.00015415196564471458, "loss": 0.9171, "step": 21006 }, { "epoch": 0.5394008547296593, "grad_norm": 0.8359375, "learning_rate": 0.00015414821258957363, "loss": 0.8619, "step": 21007 }, { "epoch": 0.5394265319255811, "grad_norm": 0.98828125, "learning_rate": 0.00015414445942652026, "loss": 0.8109, "step": 21008 }, { "epoch": 0.5394522091215029, "grad_norm": 0.7578125, "learning_rate": 0.00015414070615556185, "loss": 1.016, "step": 21009 }, { "epoch": 0.5394778863174247, "grad_norm": 0.88671875, "learning_rate": 0.00015413695277670587, "loss": 0.9347, "step": 21010 }, { "epoch": 0.5395035635133465, "grad_norm": 0.80859375, "learning_rate": 0.00015413319928995987, "loss": 0.8719, "step": 21011 }, { "epoch": 0.5395292407092683, "grad_norm": 0.76171875, "learning_rate": 0.00015412944569533128, "loss": 0.8366, "step": 21012 }, { "epoch": 0.5395549179051902, "grad_norm": 0.9609375, "learning_rate": 0.0001541256919928276, "loss": 0.8151, "step": 21013 }, { "epoch": 0.539580595101112, "grad_norm": 0.765625, "learning_rate": 0.0001541219381824563, "loss": 0.8264, "step": 21014 }, { "epoch": 0.5396062722970338, "grad_norm": 0.79296875, "learning_rate": 0.00015411818426422485, "loss": 1.0383, "step": 21015 }, { "epoch": 0.5396319494929557, "grad_norm": 0.85546875, "learning_rate": 0.00015411443023814075, "loss": 0.8893, "step": 21016 }, { "epoch": 0.5396576266888774, "grad_norm": 0.7890625, "learning_rate": 0.00015411067610421152, "loss": 0.8305, "step": 21017 }, { "epoch": 0.5396833038847992, "grad_norm": 1.0703125, "learning_rate": 0.00015410692186244455, "loss": 1.0092, "step": 21018 }, { "epoch": 0.5397089810807211, "grad_norm": 0.765625, "learning_rate": 0.0001541031675128474, "loss": 0.8072, "step": 21019 }, { "epoch": 0.5397346582766429, "grad_norm": 0.85546875, "learning_rate": 0.00015409941305542752, "loss": 0.8469, "step": 21020 }, { "epoch": 0.5397603354725647, "grad_norm": 0.78515625, "learning_rate": 0.0001540956584901924, "loss": 0.8393, "step": 21021 }, { "epoch": 0.5397860126684866, "grad_norm": 0.7265625, "learning_rate": 0.0001540919038171495, "loss": 0.8519, "step": 21022 }, { "epoch": 0.5398116898644083, "grad_norm": 0.75, "learning_rate": 0.00015408814903630636, "loss": 0.8535, "step": 21023 }, { "epoch": 0.5398373670603301, "grad_norm": 0.75390625, "learning_rate": 0.0001540843941476704, "loss": 0.9027, "step": 21024 }, { "epoch": 0.539863044256252, "grad_norm": 0.765625, "learning_rate": 0.00015408063915124914, "loss": 0.8473, "step": 21025 }, { "epoch": 0.5398887214521738, "grad_norm": 0.82421875, "learning_rate": 0.00015407688404705004, "loss": 0.9213, "step": 21026 }, { "epoch": 0.5399143986480957, "grad_norm": 0.78515625, "learning_rate": 0.00015407312883508056, "loss": 1.0015, "step": 21027 }, { "epoch": 0.5399400758440175, "grad_norm": 0.84765625, "learning_rate": 0.00015406937351534827, "loss": 0.8699, "step": 21028 }, { "epoch": 0.5399657530399393, "grad_norm": 0.76171875, "learning_rate": 0.0001540656180878606, "loss": 0.9774, "step": 21029 }, { "epoch": 0.539991430235861, "grad_norm": 0.76171875, "learning_rate": 0.00015406186255262502, "loss": 0.8036, "step": 21030 }, { "epoch": 0.5400171074317829, "grad_norm": 0.734375, "learning_rate": 0.00015405810690964904, "loss": 1.0175, "step": 21031 }, { "epoch": 0.5400427846277047, "grad_norm": 0.7421875, "learning_rate": 0.0001540543511589401, "loss": 0.8725, "step": 21032 }, { "epoch": 0.5400684618236266, "grad_norm": 0.71875, "learning_rate": 0.00015405059530050575, "loss": 0.813, "step": 21033 }, { "epoch": 0.5400941390195484, "grad_norm": 0.8671875, "learning_rate": 0.00015404683933435348, "loss": 0.8903, "step": 21034 }, { "epoch": 0.5401198162154702, "grad_norm": 0.796875, "learning_rate": 0.0001540430832604907, "loss": 0.8283, "step": 21035 }, { "epoch": 0.5401454934113921, "grad_norm": 0.8203125, "learning_rate": 0.00015403932707892495, "loss": 0.9269, "step": 21036 }, { "epoch": 0.5401711706073138, "grad_norm": 0.7421875, "learning_rate": 0.00015403557078966367, "loss": 0.8277, "step": 21037 }, { "epoch": 0.5401968478032356, "grad_norm": 0.80078125, "learning_rate": 0.00015403181439271443, "loss": 0.8093, "step": 21038 }, { "epoch": 0.5402225249991575, "grad_norm": 0.84375, "learning_rate": 0.00015402805788808464, "loss": 0.935, "step": 21039 }, { "epoch": 0.5402482021950793, "grad_norm": 0.73046875, "learning_rate": 0.0001540243012757818, "loss": 0.8491, "step": 21040 }, { "epoch": 0.5402738793910011, "grad_norm": 0.71484375, "learning_rate": 0.00015402054455581344, "loss": 0.8391, "step": 21041 }, { "epoch": 0.540299556586923, "grad_norm": 0.859375, "learning_rate": 0.00015401678772818697, "loss": 1.0084, "step": 21042 }, { "epoch": 0.5403252337828447, "grad_norm": 0.84375, "learning_rate": 0.00015401303079290996, "loss": 0.7861, "step": 21043 }, { "epoch": 0.5403509109787665, "grad_norm": 0.78515625, "learning_rate": 0.00015400927374998983, "loss": 0.8915, "step": 21044 }, { "epoch": 0.5403765881746884, "grad_norm": 0.8125, "learning_rate": 0.00015400551659943413, "loss": 1.0364, "step": 21045 }, { "epoch": 0.5404022653706102, "grad_norm": 0.77734375, "learning_rate": 0.0001540017593412503, "loss": 0.867, "step": 21046 }, { "epoch": 0.540427942566532, "grad_norm": 0.76953125, "learning_rate": 0.00015399800197544582, "loss": 0.8654, "step": 21047 }, { "epoch": 0.5404536197624539, "grad_norm": 0.76953125, "learning_rate": 0.00015399424450202823, "loss": 0.9413, "step": 21048 }, { "epoch": 0.5404792969583757, "grad_norm": 0.7265625, "learning_rate": 0.00015399048692100496, "loss": 0.7439, "step": 21049 }, { "epoch": 0.5405049741542974, "grad_norm": 0.71875, "learning_rate": 0.00015398672923238354, "loss": 0.7379, "step": 21050 }, { "epoch": 0.5405306513502193, "grad_norm": 0.79296875, "learning_rate": 0.00015398297143617143, "loss": 0.8959, "step": 21051 }, { "epoch": 0.5405563285461411, "grad_norm": 0.75390625, "learning_rate": 0.00015397921353237616, "loss": 0.8883, "step": 21052 }, { "epoch": 0.5405820057420629, "grad_norm": 0.74609375, "learning_rate": 0.00015397545552100515, "loss": 0.806, "step": 21053 }, { "epoch": 0.5406076829379848, "grad_norm": 0.734375, "learning_rate": 0.000153971697402066, "loss": 0.862, "step": 21054 }, { "epoch": 0.5406333601339066, "grad_norm": 0.7578125, "learning_rate": 0.0001539679391755661, "loss": 0.8724, "step": 21055 }, { "epoch": 0.5406590373298285, "grad_norm": 0.75, "learning_rate": 0.00015396418084151297, "loss": 0.7814, "step": 21056 }, { "epoch": 0.5406847145257502, "grad_norm": 0.76171875, "learning_rate": 0.00015396042239991411, "loss": 0.809, "step": 21057 }, { "epoch": 0.540710391721672, "grad_norm": 0.7109375, "learning_rate": 0.00015395666385077696, "loss": 0.7481, "step": 21058 }, { "epoch": 0.5407360689175938, "grad_norm": 0.7421875, "learning_rate": 0.0001539529051941091, "loss": 0.8411, "step": 21059 }, { "epoch": 0.5407617461135157, "grad_norm": 0.77734375, "learning_rate": 0.00015394914642991795, "loss": 0.9348, "step": 21060 }, { "epoch": 0.5407874233094375, "grad_norm": 0.7890625, "learning_rate": 0.00015394538755821106, "loss": 0.8221, "step": 21061 }, { "epoch": 0.5408131005053594, "grad_norm": 0.7734375, "learning_rate": 0.00015394162857899584, "loss": 0.8843, "step": 21062 }, { "epoch": 0.5408387777012811, "grad_norm": 0.7109375, "learning_rate": 0.00015393786949227987, "loss": 0.9236, "step": 21063 }, { "epoch": 0.5408644548972029, "grad_norm": 0.72265625, "learning_rate": 0.00015393411029807056, "loss": 0.8836, "step": 21064 }, { "epoch": 0.5408901320931248, "grad_norm": 0.77734375, "learning_rate": 0.00015393035099637548, "loss": 0.9711, "step": 21065 }, { "epoch": 0.5409158092890466, "grad_norm": 0.76953125, "learning_rate": 0.00015392659158720205, "loss": 0.9605, "step": 21066 }, { "epoch": 0.5409414864849684, "grad_norm": 0.7578125, "learning_rate": 0.0001539228320705578, "loss": 0.7983, "step": 21067 }, { "epoch": 0.5409671636808903, "grad_norm": 0.8046875, "learning_rate": 0.0001539190724464502, "loss": 0.8341, "step": 21068 }, { "epoch": 0.5409928408768121, "grad_norm": 0.828125, "learning_rate": 0.00015391531271488677, "loss": 0.8533, "step": 21069 }, { "epoch": 0.5410185180727338, "grad_norm": 0.7578125, "learning_rate": 0.00015391155287587503, "loss": 0.903, "step": 21070 }, { "epoch": 0.5410441952686557, "grad_norm": 0.796875, "learning_rate": 0.00015390779292942237, "loss": 0.9244, "step": 21071 }, { "epoch": 0.5410698724645775, "grad_norm": 0.78515625, "learning_rate": 0.0001539040328755364, "loss": 0.8215, "step": 21072 }, { "epoch": 0.5410955496604993, "grad_norm": 0.7890625, "learning_rate": 0.00015390027271422455, "loss": 1.0312, "step": 21073 }, { "epoch": 0.5411212268564212, "grad_norm": 0.828125, "learning_rate": 0.0001538965124454943, "loss": 0.8508, "step": 21074 }, { "epoch": 0.541146904052343, "grad_norm": 0.796875, "learning_rate": 0.0001538927520693532, "loss": 0.9042, "step": 21075 }, { "epoch": 0.5411725812482647, "grad_norm": 0.80859375, "learning_rate": 0.0001538889915858087, "loss": 0.7456, "step": 21076 }, { "epoch": 0.5411982584441866, "grad_norm": 0.828125, "learning_rate": 0.00015388523099486832, "loss": 0.8971, "step": 21077 }, { "epoch": 0.5412239356401084, "grad_norm": 0.6796875, "learning_rate": 0.00015388147029653953, "loss": 0.7593, "step": 21078 }, { "epoch": 0.5412496128360302, "grad_norm": 0.7890625, "learning_rate": 0.00015387770949082982, "loss": 0.9246, "step": 21079 }, { "epoch": 0.5412752900319521, "grad_norm": 0.796875, "learning_rate": 0.00015387394857774674, "loss": 0.9292, "step": 21080 }, { "epoch": 0.5413009672278739, "grad_norm": 0.9375, "learning_rate": 0.0001538701875572977, "loss": 1.0856, "step": 21081 }, { "epoch": 0.5413266444237957, "grad_norm": 0.75390625, "learning_rate": 0.00015386642642949028, "loss": 0.8868, "step": 21082 }, { "epoch": 0.5413523216197175, "grad_norm": 0.796875, "learning_rate": 0.00015386266519433193, "loss": 0.892, "step": 21083 }, { "epoch": 0.5413779988156393, "grad_norm": 0.80859375, "learning_rate": 0.00015385890385183015, "loss": 1.0303, "step": 21084 }, { "epoch": 0.5414036760115611, "grad_norm": 0.734375, "learning_rate": 0.00015385514240199243, "loss": 0.868, "step": 21085 }, { "epoch": 0.541429353207483, "grad_norm": 0.8125, "learning_rate": 0.00015385138084482628, "loss": 0.7713, "step": 21086 }, { "epoch": 0.5414550304034048, "grad_norm": 0.6875, "learning_rate": 0.0001538476191803392, "loss": 0.7703, "step": 21087 }, { "epoch": 0.5414807075993266, "grad_norm": 0.84765625, "learning_rate": 0.0001538438574085387, "loss": 0.8177, "step": 21088 }, { "epoch": 0.5415063847952485, "grad_norm": 0.76953125, "learning_rate": 0.00015384009552943222, "loss": 0.8699, "step": 21089 }, { "epoch": 0.5415320619911702, "grad_norm": 0.70703125, "learning_rate": 0.00015383633354302729, "loss": 0.9129, "step": 21090 }, { "epoch": 0.541557739187092, "grad_norm": 0.83203125, "learning_rate": 0.00015383257144933142, "loss": 0.8506, "step": 21091 }, { "epoch": 0.5415834163830139, "grad_norm": 0.78125, "learning_rate": 0.0001538288092483521, "loss": 0.8762, "step": 21092 }, { "epoch": 0.5416090935789357, "grad_norm": 0.76171875, "learning_rate": 0.00015382504694009684, "loss": 0.908, "step": 21093 }, { "epoch": 0.5416347707748576, "grad_norm": 0.6875, "learning_rate": 0.00015382128452457315, "loss": 0.758, "step": 21094 }, { "epoch": 0.5416604479707794, "grad_norm": 0.8125, "learning_rate": 0.00015381752200178841, "loss": 0.8798, "step": 21095 }, { "epoch": 0.5416861251667011, "grad_norm": 0.73828125, "learning_rate": 0.0001538137593717503, "loss": 0.9207, "step": 21096 }, { "epoch": 0.541711802362623, "grad_norm": 0.78125, "learning_rate": 0.0001538099966344662, "loss": 0.932, "step": 21097 }, { "epoch": 0.5417374795585448, "grad_norm": 0.80078125, "learning_rate": 0.0001538062337899436, "loss": 0.7607, "step": 21098 }, { "epoch": 0.5417631567544666, "grad_norm": 0.76953125, "learning_rate": 0.00015380247083819008, "loss": 0.7937, "step": 21099 }, { "epoch": 0.5417888339503885, "grad_norm": 0.875, "learning_rate": 0.00015379870777921308, "loss": 0.9464, "step": 21100 }, { "epoch": 0.5418145111463103, "grad_norm": 0.8984375, "learning_rate": 0.00015379494461302014, "loss": 0.824, "step": 21101 }, { "epoch": 0.5418401883422321, "grad_norm": 0.734375, "learning_rate": 0.00015379118133961871, "loss": 0.7017, "step": 21102 }, { "epoch": 0.5418658655381539, "grad_norm": 0.7421875, "learning_rate": 0.0001537874179590163, "loss": 1.0158, "step": 21103 }, { "epoch": 0.5418915427340757, "grad_norm": 0.78515625, "learning_rate": 0.00015378365447122044, "loss": 0.8196, "step": 21104 }, { "epoch": 0.5419172199299975, "grad_norm": 0.77734375, "learning_rate": 0.00015377989087623863, "loss": 0.7587, "step": 21105 }, { "epoch": 0.5419428971259194, "grad_norm": 0.73828125, "learning_rate": 0.00015377612717407834, "loss": 1.0308, "step": 21106 }, { "epoch": 0.5419685743218412, "grad_norm": 0.76171875, "learning_rate": 0.0001537723633647471, "loss": 0.9031, "step": 21107 }, { "epoch": 0.541994251517763, "grad_norm": 0.75390625, "learning_rate": 0.00015376859944825236, "loss": 0.8522, "step": 21108 }, { "epoch": 0.5420199287136849, "grad_norm": 0.7421875, "learning_rate": 0.0001537648354246017, "loss": 0.8429, "step": 21109 }, { "epoch": 0.5420456059096066, "grad_norm": 0.7265625, "learning_rate": 0.00015376107129380255, "loss": 0.9276, "step": 21110 }, { "epoch": 0.5420712831055284, "grad_norm": 0.828125, "learning_rate": 0.00015375730705586245, "loss": 0.9574, "step": 21111 }, { "epoch": 0.5420969603014503, "grad_norm": 0.73046875, "learning_rate": 0.00015375354271078887, "loss": 0.8616, "step": 21112 }, { "epoch": 0.5421226374973721, "grad_norm": 0.81640625, "learning_rate": 0.0001537497782585894, "loss": 0.8025, "step": 21113 }, { "epoch": 0.5421483146932939, "grad_norm": 0.78125, "learning_rate": 0.00015374601369927144, "loss": 0.8506, "step": 21114 }, { "epoch": 0.5421739918892158, "grad_norm": 0.7734375, "learning_rate": 0.00015374224903284253, "loss": 0.9175, "step": 21115 }, { "epoch": 0.5421996690851375, "grad_norm": 0.76953125, "learning_rate": 0.00015373848425931014, "loss": 0.8345, "step": 21116 }, { "epoch": 0.5422253462810593, "grad_norm": 0.8046875, "learning_rate": 0.00015373471937868186, "loss": 0.8724, "step": 21117 }, { "epoch": 0.5422510234769812, "grad_norm": 0.7734375, "learning_rate": 0.0001537309543909651, "loss": 0.9583, "step": 21118 }, { "epoch": 0.542276700672903, "grad_norm": 0.78515625, "learning_rate": 0.0001537271892961674, "loss": 0.8698, "step": 21119 }, { "epoch": 0.5423023778688248, "grad_norm": 0.78515625, "learning_rate": 0.0001537234240942963, "loss": 0.8172, "step": 21120 }, { "epoch": 0.5423280550647467, "grad_norm": 0.703125, "learning_rate": 0.00015371965878535924, "loss": 0.9236, "step": 21121 }, { "epoch": 0.5423537322606685, "grad_norm": 0.7578125, "learning_rate": 0.00015371589336936377, "loss": 0.9399, "step": 21122 }, { "epoch": 0.5423794094565902, "grad_norm": 0.734375, "learning_rate": 0.00015371212784631738, "loss": 0.7591, "step": 21123 }, { "epoch": 0.5424050866525121, "grad_norm": 0.76953125, "learning_rate": 0.00015370836221622754, "loss": 0.9182, "step": 21124 }, { "epoch": 0.5424307638484339, "grad_norm": 0.7109375, "learning_rate": 0.0001537045964791018, "loss": 0.8301, "step": 21125 }, { "epoch": 0.5424564410443558, "grad_norm": 0.73828125, "learning_rate": 0.00015370083063494767, "loss": 0.8306, "step": 21126 }, { "epoch": 0.5424821182402776, "grad_norm": 0.765625, "learning_rate": 0.0001536970646837726, "loss": 0.9201, "step": 21127 }, { "epoch": 0.5425077954361994, "grad_norm": 0.765625, "learning_rate": 0.00015369329862558416, "loss": 0.8043, "step": 21128 }, { "epoch": 0.5425334726321213, "grad_norm": 0.7265625, "learning_rate": 0.00015368953246038984, "loss": 0.9455, "step": 21129 }, { "epoch": 0.542559149828043, "grad_norm": 0.81640625, "learning_rate": 0.00015368576618819707, "loss": 0.8073, "step": 21130 }, { "epoch": 0.5425848270239648, "grad_norm": 0.7109375, "learning_rate": 0.00015368199980901346, "loss": 0.731, "step": 21131 }, { "epoch": 0.5426105042198867, "grad_norm": 0.796875, "learning_rate": 0.00015367823332284645, "loss": 0.8416, "step": 21132 }, { "epoch": 0.5426361814158085, "grad_norm": 0.80078125, "learning_rate": 0.00015367446672970362, "loss": 0.8644, "step": 21133 }, { "epoch": 0.5426618586117303, "grad_norm": 0.7421875, "learning_rate": 0.0001536707000295924, "loss": 0.7515, "step": 21134 }, { "epoch": 0.5426875358076522, "grad_norm": 0.91796875, "learning_rate": 0.0001536669332225203, "loss": 0.7997, "step": 21135 }, { "epoch": 0.5427132130035739, "grad_norm": 0.81640625, "learning_rate": 0.00015366316630849487, "loss": 0.9216, "step": 21136 }, { "epoch": 0.5427388901994957, "grad_norm": 0.83203125, "learning_rate": 0.0001536593992875236, "loss": 0.8552, "step": 21137 }, { "epoch": 0.5427645673954176, "grad_norm": 0.8046875, "learning_rate": 0.00015365563215961395, "loss": 0.879, "step": 21138 }, { "epoch": 0.5427902445913394, "grad_norm": 0.80859375, "learning_rate": 0.00015365186492477358, "loss": 0.9197, "step": 21139 }, { "epoch": 0.5428159217872612, "grad_norm": 0.7421875, "learning_rate": 0.00015364809758300978, "loss": 0.904, "step": 21140 }, { "epoch": 0.5428415989831831, "grad_norm": 0.77734375, "learning_rate": 0.00015364433013433023, "loss": 0.8826, "step": 21141 }, { "epoch": 0.5428672761791049, "grad_norm": 0.7734375, "learning_rate": 0.00015364056257874235, "loss": 0.9381, "step": 21142 }, { "epoch": 0.5428929533750266, "grad_norm": 0.74609375, "learning_rate": 0.00015363679491625363, "loss": 0.813, "step": 21143 }, { "epoch": 0.5429186305709485, "grad_norm": 0.79296875, "learning_rate": 0.0001536330271468717, "loss": 1.0006, "step": 21144 }, { "epoch": 0.5429443077668703, "grad_norm": 0.81640625, "learning_rate": 0.00015362925927060397, "loss": 0.9544, "step": 21145 }, { "epoch": 0.5429699849627921, "grad_norm": 0.7890625, "learning_rate": 0.00015362549128745796, "loss": 0.9696, "step": 21146 }, { "epoch": 0.542995662158714, "grad_norm": 0.8203125, "learning_rate": 0.0001536217231974412, "loss": 0.865, "step": 21147 }, { "epoch": 0.5430213393546358, "grad_norm": 0.72265625, "learning_rate": 0.00015361795500056115, "loss": 1.0156, "step": 21148 }, { "epoch": 0.5430470165505576, "grad_norm": 0.75, "learning_rate": 0.0001536141866968254, "loss": 0.7693, "step": 21149 }, { "epoch": 0.5430726937464794, "grad_norm": 0.734375, "learning_rate": 0.0001536104182862414, "loss": 0.7909, "step": 21150 }, { "epoch": 0.5430983709424012, "grad_norm": 0.75390625, "learning_rate": 0.0001536066497688167, "loss": 0.8804, "step": 21151 }, { "epoch": 0.543124048138323, "grad_norm": 0.734375, "learning_rate": 0.0001536028811445588, "loss": 0.9493, "step": 21152 }, { "epoch": 0.5431497253342449, "grad_norm": 0.83203125, "learning_rate": 0.00015359911241347515, "loss": 0.8945, "step": 21153 }, { "epoch": 0.5431754025301667, "grad_norm": 0.78515625, "learning_rate": 0.00015359534357557334, "loss": 1.0363, "step": 21154 }, { "epoch": 0.5432010797260886, "grad_norm": 0.78125, "learning_rate": 0.00015359157463086084, "loss": 0.8619, "step": 21155 }, { "epoch": 0.5432267569220103, "grad_norm": 0.734375, "learning_rate": 0.00015358780557934518, "loss": 0.8697, "step": 21156 }, { "epoch": 0.5432524341179321, "grad_norm": 0.75390625, "learning_rate": 0.00015358403642103387, "loss": 0.8067, "step": 21157 }, { "epoch": 0.543278111313854, "grad_norm": 0.7578125, "learning_rate": 0.00015358026715593442, "loss": 1.0106, "step": 21158 }, { "epoch": 0.5433037885097758, "grad_norm": 0.765625, "learning_rate": 0.00015357649778405431, "loss": 0.8554, "step": 21159 }, { "epoch": 0.5433294657056976, "grad_norm": 0.8203125, "learning_rate": 0.00015357272830540114, "loss": 0.9717, "step": 21160 }, { "epoch": 0.5433551429016195, "grad_norm": 0.76171875, "learning_rate": 0.00015356895871998226, "loss": 0.8338, "step": 21161 }, { "epoch": 0.5433808200975413, "grad_norm": 0.7421875, "learning_rate": 0.00015356518902780536, "loss": 0.8221, "step": 21162 }, { "epoch": 0.543406497293463, "grad_norm": 0.75390625, "learning_rate": 0.00015356141922887788, "loss": 0.8718, "step": 21163 }, { "epoch": 0.5434321744893849, "grad_norm": 0.80078125, "learning_rate": 0.00015355764932320728, "loss": 0.9586, "step": 21164 }, { "epoch": 0.5434578516853067, "grad_norm": 0.78125, "learning_rate": 0.00015355387931080116, "loss": 0.8523, "step": 21165 }, { "epoch": 0.5434835288812285, "grad_norm": 0.82421875, "learning_rate": 0.00015355010919166704, "loss": 0.9361, "step": 21166 }, { "epoch": 0.5435092060771504, "grad_norm": 0.796875, "learning_rate": 0.00015354633896581232, "loss": 0.8198, "step": 21167 }, { "epoch": 0.5435348832730722, "grad_norm": 0.75390625, "learning_rate": 0.0001535425686332446, "loss": 0.8534, "step": 21168 }, { "epoch": 0.543560560468994, "grad_norm": 0.73828125, "learning_rate": 0.00015353879819397136, "loss": 0.8078, "step": 21169 }, { "epoch": 0.5435862376649158, "grad_norm": 0.73046875, "learning_rate": 0.00015353502764800013, "loss": 0.7966, "step": 21170 }, { "epoch": 0.5436119148608376, "grad_norm": 0.74609375, "learning_rate": 0.00015353125699533847, "loss": 0.9669, "step": 21171 }, { "epoch": 0.5436375920567594, "grad_norm": 0.7734375, "learning_rate": 0.00015352748623599383, "loss": 0.9485, "step": 21172 }, { "epoch": 0.5436632692526813, "grad_norm": 0.73828125, "learning_rate": 0.00015352371536997374, "loss": 0.798, "step": 21173 }, { "epoch": 0.5436889464486031, "grad_norm": 0.7578125, "learning_rate": 0.0001535199443972857, "loss": 0.8441, "step": 21174 }, { "epoch": 0.5437146236445249, "grad_norm": 0.8359375, "learning_rate": 0.00015351617331793728, "loss": 0.9215, "step": 21175 }, { "epoch": 0.5437403008404467, "grad_norm": 0.80078125, "learning_rate": 0.00015351240213193595, "loss": 0.8249, "step": 21176 }, { "epoch": 0.5437659780363685, "grad_norm": 0.7421875, "learning_rate": 0.00015350863083928923, "loss": 0.8569, "step": 21177 }, { "epoch": 0.5437916552322903, "grad_norm": 0.765625, "learning_rate": 0.00015350485944000462, "loss": 0.9404, "step": 21178 }, { "epoch": 0.5438173324282122, "grad_norm": 0.7109375, "learning_rate": 0.00015350108793408973, "loss": 0.8517, "step": 21179 }, { "epoch": 0.543843009624134, "grad_norm": 0.7421875, "learning_rate": 0.00015349731632155193, "loss": 0.7725, "step": 21180 }, { "epoch": 0.5438686868200558, "grad_norm": 0.7578125, "learning_rate": 0.00015349354460239887, "loss": 0.8772, "step": 21181 }, { "epoch": 0.5438943640159777, "grad_norm": 0.7578125, "learning_rate": 0.00015348977277663796, "loss": 0.8382, "step": 21182 }, { "epoch": 0.5439200412118994, "grad_norm": 0.83203125, "learning_rate": 0.00015348600084427678, "loss": 0.8444, "step": 21183 }, { "epoch": 0.5439457184078212, "grad_norm": 0.74609375, "learning_rate": 0.00015348222880532284, "loss": 0.7827, "step": 21184 }, { "epoch": 0.5439713956037431, "grad_norm": 0.7890625, "learning_rate": 0.00015347845665978363, "loss": 0.8845, "step": 21185 }, { "epoch": 0.5439970727996649, "grad_norm": 1.6015625, "learning_rate": 0.0001534746844076667, "loss": 1.0492, "step": 21186 }, { "epoch": 0.5440227499955868, "grad_norm": 0.796875, "learning_rate": 0.00015347091204897959, "loss": 0.8267, "step": 21187 }, { "epoch": 0.5440484271915086, "grad_norm": 0.76953125, "learning_rate": 0.0001534671395837297, "loss": 0.769, "step": 21188 }, { "epoch": 0.5440741043874304, "grad_norm": 0.7734375, "learning_rate": 0.0001534633670119247, "loss": 0.8716, "step": 21189 }, { "epoch": 0.5440997815833521, "grad_norm": 0.79296875, "learning_rate": 0.00015345959433357202, "loss": 0.7782, "step": 21190 }, { "epoch": 0.544125458779274, "grad_norm": 0.82421875, "learning_rate": 0.00015345582154867917, "loss": 0.9246, "step": 21191 }, { "epoch": 0.5441511359751958, "grad_norm": 0.765625, "learning_rate": 0.00015345204865725376, "loss": 0.8382, "step": 21192 }, { "epoch": 0.5441768131711177, "grad_norm": 0.765625, "learning_rate": 0.00015344827565930318, "loss": 0.7798, "step": 21193 }, { "epoch": 0.5442024903670395, "grad_norm": 0.69140625, "learning_rate": 0.00015344450255483504, "loss": 0.8172, "step": 21194 }, { "epoch": 0.5442281675629613, "grad_norm": 0.78125, "learning_rate": 0.00015344072934385684, "loss": 1.0353, "step": 21195 }, { "epoch": 0.544253844758883, "grad_norm": 0.7890625, "learning_rate": 0.00015343695602637608, "loss": 0.828, "step": 21196 }, { "epoch": 0.5442795219548049, "grad_norm": 0.77734375, "learning_rate": 0.0001534331826024003, "loss": 0.9274, "step": 21197 }, { "epoch": 0.5443051991507267, "grad_norm": 0.8828125, "learning_rate": 0.00015342940907193698, "loss": 0.9462, "step": 21198 }, { "epoch": 0.5443308763466486, "grad_norm": 0.76953125, "learning_rate": 0.00015342563543499374, "loss": 0.8513, "step": 21199 }, { "epoch": 0.5443565535425704, "grad_norm": 0.78125, "learning_rate": 0.000153421861691578, "loss": 0.8806, "step": 21200 }, { "epoch": 0.5443822307384922, "grad_norm": 0.75390625, "learning_rate": 0.00015341808784169732, "loss": 0.8384, "step": 21201 }, { "epoch": 0.5444079079344141, "grad_norm": 0.79296875, "learning_rate": 0.0001534143138853592, "loss": 0.9479, "step": 21202 }, { "epoch": 0.5444335851303358, "grad_norm": 0.77734375, "learning_rate": 0.00015341053982257118, "loss": 0.85, "step": 21203 }, { "epoch": 0.5444592623262576, "grad_norm": 0.80859375, "learning_rate": 0.0001534067656533408, "loss": 1.0058, "step": 21204 }, { "epoch": 0.5444849395221795, "grad_norm": 0.76171875, "learning_rate": 0.00015340299137767556, "loss": 0.8563, "step": 21205 }, { "epoch": 0.5445106167181013, "grad_norm": 0.69921875, "learning_rate": 0.00015339921699558296, "loss": 0.8346, "step": 21206 }, { "epoch": 0.5445362939140231, "grad_norm": 0.75, "learning_rate": 0.00015339544250707055, "loss": 0.841, "step": 21207 }, { "epoch": 0.544561971109945, "grad_norm": 0.71484375, "learning_rate": 0.00015339166791214587, "loss": 0.8266, "step": 21208 }, { "epoch": 0.5445876483058668, "grad_norm": 0.828125, "learning_rate": 0.00015338789321081637, "loss": 0.7978, "step": 21209 }, { "epoch": 0.5446133255017885, "grad_norm": 0.78125, "learning_rate": 0.00015338411840308967, "loss": 0.9767, "step": 21210 }, { "epoch": 0.5446390026977104, "grad_norm": 0.828125, "learning_rate": 0.00015338034348897324, "loss": 0.9999, "step": 21211 }, { "epoch": 0.5446646798936322, "grad_norm": 0.7109375, "learning_rate": 0.0001533765684684746, "loss": 0.804, "step": 21212 }, { "epoch": 0.544690357089554, "grad_norm": 0.7734375, "learning_rate": 0.0001533727933416013, "loss": 0.7918, "step": 21213 }, { "epoch": 0.5447160342854759, "grad_norm": 0.82421875, "learning_rate": 0.0001533690181083608, "loss": 0.9567, "step": 21214 }, { "epoch": 0.5447417114813977, "grad_norm": 0.77734375, "learning_rate": 0.00015336524276876067, "loss": 0.9151, "step": 21215 }, { "epoch": 0.5447673886773194, "grad_norm": 0.8203125, "learning_rate": 0.00015336146732280846, "loss": 0.8139, "step": 21216 }, { "epoch": 0.5447930658732413, "grad_norm": 0.84375, "learning_rate": 0.00015335769177051163, "loss": 0.8267, "step": 21217 }, { "epoch": 0.5448187430691631, "grad_norm": 0.80859375, "learning_rate": 0.00015335391611187777, "loss": 0.7919, "step": 21218 }, { "epoch": 0.544844420265085, "grad_norm": 0.77734375, "learning_rate": 0.00015335014034691436, "loss": 0.9667, "step": 21219 }, { "epoch": 0.5448700974610068, "grad_norm": 0.73046875, "learning_rate": 0.00015334636447562896, "loss": 0.7623, "step": 21220 }, { "epoch": 0.5448957746569286, "grad_norm": 0.7578125, "learning_rate": 0.00015334258849802906, "loss": 0.8252, "step": 21221 }, { "epoch": 0.5449214518528505, "grad_norm": 0.82421875, "learning_rate": 0.0001533388124141222, "loss": 0.8784, "step": 21222 }, { "epoch": 0.5449471290487722, "grad_norm": 0.8125, "learning_rate": 0.0001533350362239159, "loss": 0.9209, "step": 21223 }, { "epoch": 0.544972806244694, "grad_norm": 0.78125, "learning_rate": 0.0001533312599274177, "loss": 0.7799, "step": 21224 }, { "epoch": 0.5449984834406159, "grad_norm": 0.92578125, "learning_rate": 0.00015332748352463513, "loss": 0.8932, "step": 21225 }, { "epoch": 0.5450241606365377, "grad_norm": 0.8671875, "learning_rate": 0.00015332370701557566, "loss": 1.0064, "step": 21226 }, { "epoch": 0.5450498378324595, "grad_norm": 0.76171875, "learning_rate": 0.0001533199304002469, "loss": 0.8534, "step": 21227 }, { "epoch": 0.5450755150283814, "grad_norm": 0.74609375, "learning_rate": 0.0001533161536786563, "loss": 0.7949, "step": 21228 }, { "epoch": 0.5451011922243032, "grad_norm": 0.8125, "learning_rate": 0.00015331237685081145, "loss": 0.8205, "step": 21229 }, { "epoch": 0.5451268694202249, "grad_norm": 0.75, "learning_rate": 0.00015330859991671985, "loss": 0.9747, "step": 21230 }, { "epoch": 0.5451525466161468, "grad_norm": 0.828125, "learning_rate": 0.000153304822876389, "loss": 0.9272, "step": 21231 }, { "epoch": 0.5451782238120686, "grad_norm": 0.6484375, "learning_rate": 0.00015330104572982648, "loss": 0.7186, "step": 21232 }, { "epoch": 0.5452039010079904, "grad_norm": 0.7265625, "learning_rate": 0.00015329726847703977, "loss": 0.8637, "step": 21233 }, { "epoch": 0.5452295782039123, "grad_norm": 0.796875, "learning_rate": 0.00015329349111803644, "loss": 0.9042, "step": 21234 }, { "epoch": 0.5452552553998341, "grad_norm": 0.7890625, "learning_rate": 0.00015328971365282396, "loss": 0.8786, "step": 21235 }, { "epoch": 0.5452809325957558, "grad_norm": 0.7578125, "learning_rate": 0.00015328593608140994, "loss": 0.9388, "step": 21236 }, { "epoch": 0.5453066097916777, "grad_norm": 0.69921875, "learning_rate": 0.00015328215840380182, "loss": 0.8447, "step": 21237 }, { "epoch": 0.5453322869875995, "grad_norm": 0.75, "learning_rate": 0.00015327838062000718, "loss": 0.8731, "step": 21238 }, { "epoch": 0.5453579641835213, "grad_norm": 0.81640625, "learning_rate": 0.0001532746027300336, "loss": 1.011, "step": 21239 }, { "epoch": 0.5453836413794432, "grad_norm": 0.7578125, "learning_rate": 0.0001532708247338885, "loss": 0.9045, "step": 21240 }, { "epoch": 0.545409318575365, "grad_norm": 0.71484375, "learning_rate": 0.00015326704663157944, "loss": 0.7695, "step": 21241 }, { "epoch": 0.5454349957712868, "grad_norm": 0.85546875, "learning_rate": 0.00015326326842311398, "loss": 0.8117, "step": 21242 }, { "epoch": 0.5454606729672086, "grad_norm": 0.69921875, "learning_rate": 0.00015325949010849965, "loss": 0.8643, "step": 21243 }, { "epoch": 0.5454863501631304, "grad_norm": 0.7265625, "learning_rate": 0.00015325571168774396, "loss": 0.8452, "step": 21244 }, { "epoch": 0.5455120273590522, "grad_norm": 0.80859375, "learning_rate": 0.00015325193316085444, "loss": 0.8821, "step": 21245 }, { "epoch": 0.5455377045549741, "grad_norm": 0.81640625, "learning_rate": 0.00015324815452783864, "loss": 0.9175, "step": 21246 }, { "epoch": 0.5455633817508959, "grad_norm": 0.85546875, "learning_rate": 0.00015324437578870405, "loss": 0.8991, "step": 21247 }, { "epoch": 0.5455890589468178, "grad_norm": 0.80859375, "learning_rate": 0.00015324059694345828, "loss": 0.8684, "step": 21248 }, { "epoch": 0.5456147361427396, "grad_norm": 0.72265625, "learning_rate": 0.00015323681799210876, "loss": 0.8424, "step": 21249 }, { "epoch": 0.5456404133386613, "grad_norm": 0.734375, "learning_rate": 0.0001532330389346631, "loss": 0.8079, "step": 21250 }, { "epoch": 0.5456660905345831, "grad_norm": 0.75390625, "learning_rate": 0.0001532292597711288, "loss": 0.7747, "step": 21251 }, { "epoch": 0.545691767730505, "grad_norm": 0.81640625, "learning_rate": 0.00015322548050151339, "loss": 1.0366, "step": 21252 }, { "epoch": 0.5457174449264268, "grad_norm": 0.85546875, "learning_rate": 0.00015322170112582438, "loss": 0.8577, "step": 21253 }, { "epoch": 0.5457431221223487, "grad_norm": 0.734375, "learning_rate": 0.00015321792164406937, "loss": 0.8184, "step": 21254 }, { "epoch": 0.5457687993182705, "grad_norm": 0.79296875, "learning_rate": 0.00015321414205625585, "loss": 0.94, "step": 21255 }, { "epoch": 0.5457944765141922, "grad_norm": 0.765625, "learning_rate": 0.0001532103623623913, "loss": 1.0088, "step": 21256 }, { "epoch": 0.545820153710114, "grad_norm": 0.7890625, "learning_rate": 0.00015320658256248333, "loss": 0.8687, "step": 21257 }, { "epoch": 0.5458458309060359, "grad_norm": 0.7734375, "learning_rate": 0.00015320280265653947, "loss": 0.8782, "step": 21258 }, { "epoch": 0.5458715081019577, "grad_norm": 0.7578125, "learning_rate": 0.0001531990226445672, "loss": 0.8235, "step": 21259 }, { "epoch": 0.5458971852978796, "grad_norm": 0.796875, "learning_rate": 0.00015319524252657409, "loss": 0.8667, "step": 21260 }, { "epoch": 0.5459228624938014, "grad_norm": 0.8359375, "learning_rate": 0.00015319146230256768, "loss": 0.8565, "step": 21261 }, { "epoch": 0.5459485396897232, "grad_norm": 0.734375, "learning_rate": 0.00015318768197255552, "loss": 0.7487, "step": 21262 }, { "epoch": 0.545974216885645, "grad_norm": 0.71875, "learning_rate": 0.00015318390153654506, "loss": 0.8205, "step": 21263 }, { "epoch": 0.5459998940815668, "grad_norm": 0.8203125, "learning_rate": 0.0001531801209945439, "loss": 0.9111, "step": 21264 }, { "epoch": 0.5460255712774886, "grad_norm": 0.796875, "learning_rate": 0.00015317634034655954, "loss": 0.9887, "step": 21265 }, { "epoch": 0.5460512484734105, "grad_norm": 0.78515625, "learning_rate": 0.00015317255959259958, "loss": 0.8932, "step": 21266 }, { "epoch": 0.5460769256693323, "grad_norm": 0.77734375, "learning_rate": 0.0001531687787326715, "loss": 0.8941, "step": 21267 }, { "epoch": 0.5461026028652541, "grad_norm": 0.78125, "learning_rate": 0.00015316499776678284, "loss": 0.8236, "step": 21268 }, { "epoch": 0.546128280061176, "grad_norm": 0.81640625, "learning_rate": 0.00015316121669494114, "loss": 0.8105, "step": 21269 }, { "epoch": 0.5461539572570977, "grad_norm": 0.8203125, "learning_rate": 0.00015315743551715393, "loss": 0.881, "step": 21270 }, { "epoch": 0.5461796344530195, "grad_norm": 1.1328125, "learning_rate": 0.00015315365423342877, "loss": 0.8325, "step": 21271 }, { "epoch": 0.5462053116489414, "grad_norm": 0.9140625, "learning_rate": 0.0001531498728437732, "loss": 0.9828, "step": 21272 }, { "epoch": 0.5462309888448632, "grad_norm": 0.796875, "learning_rate": 0.0001531460913481947, "loss": 0.8134, "step": 21273 }, { "epoch": 0.546256666040785, "grad_norm": 0.76171875, "learning_rate": 0.00015314230974670085, "loss": 0.8473, "step": 21274 }, { "epoch": 0.5462823432367069, "grad_norm": 0.75, "learning_rate": 0.00015313852803929917, "loss": 0.8003, "step": 21275 }, { "epoch": 0.5463080204326286, "grad_norm": 0.75390625, "learning_rate": 0.0001531347462259972, "loss": 0.8815, "step": 21276 }, { "epoch": 0.5463336976285504, "grad_norm": 0.7421875, "learning_rate": 0.0001531309643068025, "loss": 0.8718, "step": 21277 }, { "epoch": 0.5463593748244723, "grad_norm": 0.8671875, "learning_rate": 0.0001531271822817226, "loss": 0.829, "step": 21278 }, { "epoch": 0.5463850520203941, "grad_norm": 0.796875, "learning_rate": 0.00015312340015076497, "loss": 0.8093, "step": 21279 }, { "epoch": 0.546410729216316, "grad_norm": 0.8359375, "learning_rate": 0.00015311961791393726, "loss": 0.9098, "step": 21280 }, { "epoch": 0.5464364064122378, "grad_norm": 0.8359375, "learning_rate": 0.00015311583557124688, "loss": 0.9769, "step": 21281 }, { "epoch": 0.5464620836081596, "grad_norm": 0.87890625, "learning_rate": 0.0001531120531227015, "loss": 0.95, "step": 21282 }, { "epoch": 0.5464877608040813, "grad_norm": 0.77734375, "learning_rate": 0.0001531082705683086, "loss": 0.7959, "step": 21283 }, { "epoch": 0.5465134380000032, "grad_norm": 0.80078125, "learning_rate": 0.00015310448790807567, "loss": 0.9282, "step": 21284 }, { "epoch": 0.546539115195925, "grad_norm": 0.8046875, "learning_rate": 0.0001531007051420103, "loss": 0.8742, "step": 21285 }, { "epoch": 0.5465647923918469, "grad_norm": 0.79296875, "learning_rate": 0.00015309692227012006, "loss": 0.9052, "step": 21286 }, { "epoch": 0.5465904695877687, "grad_norm": 0.7734375, "learning_rate": 0.00015309313929241244, "loss": 0.8653, "step": 21287 }, { "epoch": 0.5466161467836905, "grad_norm": 0.73046875, "learning_rate": 0.00015308935620889495, "loss": 0.8081, "step": 21288 }, { "epoch": 0.5466418239796123, "grad_norm": 0.8046875, "learning_rate": 0.0001530855730195752, "loss": 0.9895, "step": 21289 }, { "epoch": 0.5466675011755341, "grad_norm": 0.76953125, "learning_rate": 0.00015308178972446066, "loss": 0.8892, "step": 21290 }, { "epoch": 0.5466931783714559, "grad_norm": 0.68359375, "learning_rate": 0.00015307800632355899, "loss": 0.7593, "step": 21291 }, { "epoch": 0.5467188555673778, "grad_norm": 0.796875, "learning_rate": 0.00015307422281687756, "loss": 0.9279, "step": 21292 }, { "epoch": 0.5467445327632996, "grad_norm": 0.77734375, "learning_rate": 0.00015307043920442402, "loss": 0.9057, "step": 21293 }, { "epoch": 0.5467702099592214, "grad_norm": 0.890625, "learning_rate": 0.0001530666554862059, "loss": 0.9001, "step": 21294 }, { "epoch": 0.5467958871551433, "grad_norm": 0.77734375, "learning_rate": 0.00015306287166223074, "loss": 1.0153, "step": 21295 }, { "epoch": 0.546821564351065, "grad_norm": 0.7578125, "learning_rate": 0.0001530590877325061, "loss": 1.0147, "step": 21296 }, { "epoch": 0.5468472415469868, "grad_norm": 0.7890625, "learning_rate": 0.00015305530369703942, "loss": 0.8254, "step": 21297 }, { "epoch": 0.5468729187429087, "grad_norm": 0.82421875, "learning_rate": 0.00015305151955583837, "loss": 0.9578, "step": 21298 }, { "epoch": 0.5468985959388305, "grad_norm": 0.82421875, "learning_rate": 0.0001530477353089104, "loss": 0.8249, "step": 21299 }, { "epoch": 0.5469242731347523, "grad_norm": 0.78125, "learning_rate": 0.0001530439509562631, "loss": 0.871, "step": 21300 }, { "epoch": 0.5469499503306742, "grad_norm": 0.87109375, "learning_rate": 0.000153040166497904, "loss": 0.9946, "step": 21301 }, { "epoch": 0.546975627526596, "grad_norm": 0.84375, "learning_rate": 0.00015303638193384062, "loss": 0.8924, "step": 21302 }, { "epoch": 0.5470013047225177, "grad_norm": 0.703125, "learning_rate": 0.00015303259726408053, "loss": 0.8059, "step": 21303 }, { "epoch": 0.5470269819184396, "grad_norm": 0.828125, "learning_rate": 0.0001530288124886313, "loss": 0.8775, "step": 21304 }, { "epoch": 0.5470526591143614, "grad_norm": 0.67578125, "learning_rate": 0.00015302502760750037, "loss": 0.7498, "step": 21305 }, { "epoch": 0.5470783363102832, "grad_norm": 0.71875, "learning_rate": 0.00015302124262069538, "loss": 0.8711, "step": 21306 }, { "epoch": 0.5471040135062051, "grad_norm": 0.85546875, "learning_rate": 0.00015301745752822386, "loss": 0.9878, "step": 21307 }, { "epoch": 0.5471296907021269, "grad_norm": 0.73828125, "learning_rate": 0.0001530136723300933, "loss": 0.807, "step": 21308 }, { "epoch": 0.5471553678980486, "grad_norm": 0.73828125, "learning_rate": 0.0001530098870263113, "loss": 0.8766, "step": 21309 }, { "epoch": 0.5471810450939705, "grad_norm": 0.84375, "learning_rate": 0.0001530061016168854, "loss": 1.2128, "step": 21310 }, { "epoch": 0.5472067222898923, "grad_norm": 0.73046875, "learning_rate": 0.0001530023161018231, "loss": 0.8605, "step": 21311 }, { "epoch": 0.5472323994858141, "grad_norm": 0.77734375, "learning_rate": 0.00015299853048113203, "loss": 0.9022, "step": 21312 }, { "epoch": 0.547258076681736, "grad_norm": 0.84765625, "learning_rate": 0.00015299474475481957, "loss": 0.8503, "step": 21313 }, { "epoch": 0.5472837538776578, "grad_norm": 0.765625, "learning_rate": 0.00015299095892289347, "loss": 0.8573, "step": 21314 }, { "epoch": 0.5473094310735797, "grad_norm": 0.85546875, "learning_rate": 0.00015298717298536113, "loss": 0.9675, "step": 21315 }, { "epoch": 0.5473351082695014, "grad_norm": 0.7734375, "learning_rate": 0.00015298338694223014, "loss": 0.8932, "step": 21316 }, { "epoch": 0.5473607854654232, "grad_norm": 0.8046875, "learning_rate": 0.00015297960079350807, "loss": 0.9134, "step": 21317 }, { "epoch": 0.547386462661345, "grad_norm": 0.875, "learning_rate": 0.0001529758145392024, "loss": 1.0424, "step": 21318 }, { "epoch": 0.5474121398572669, "grad_norm": 0.77734375, "learning_rate": 0.00015297202817932075, "loss": 0.8113, "step": 21319 }, { "epoch": 0.5474378170531887, "grad_norm": 0.7421875, "learning_rate": 0.00015296824171387064, "loss": 0.8161, "step": 21320 }, { "epoch": 0.5474634942491106, "grad_norm": 0.76953125, "learning_rate": 0.00015296445514285955, "loss": 0.8372, "step": 21321 }, { "epoch": 0.5474891714450324, "grad_norm": 0.7734375, "learning_rate": 0.00015296066846629515, "loss": 0.978, "step": 21322 }, { "epoch": 0.5475148486409541, "grad_norm": 0.828125, "learning_rate": 0.0001529568816841849, "loss": 0.9257, "step": 21323 }, { "epoch": 0.547540525836876, "grad_norm": 0.80859375, "learning_rate": 0.0001529530947965364, "loss": 0.85, "step": 21324 }, { "epoch": 0.5475662030327978, "grad_norm": 0.77734375, "learning_rate": 0.00015294930780335712, "loss": 0.8776, "step": 21325 }, { "epoch": 0.5475918802287196, "grad_norm": 0.76953125, "learning_rate": 0.00015294552070465464, "loss": 1.004, "step": 21326 }, { "epoch": 0.5476175574246415, "grad_norm": 0.83203125, "learning_rate": 0.00015294173350043658, "loss": 0.8342, "step": 21327 }, { "epoch": 0.5476432346205633, "grad_norm": 0.76171875, "learning_rate": 0.00015293794619071038, "loss": 0.7821, "step": 21328 }, { "epoch": 0.547668911816485, "grad_norm": 0.8359375, "learning_rate": 0.00015293415877548362, "loss": 0.8886, "step": 21329 }, { "epoch": 0.5476945890124069, "grad_norm": 0.71484375, "learning_rate": 0.00015293037125476393, "loss": 0.8391, "step": 21330 }, { "epoch": 0.5477202662083287, "grad_norm": 0.71875, "learning_rate": 0.0001529265836285587, "loss": 0.873, "step": 21331 }, { "epoch": 0.5477459434042505, "grad_norm": 0.7734375, "learning_rate": 0.00015292279589687566, "loss": 0.8188, "step": 21332 }, { "epoch": 0.5477716206001724, "grad_norm": 0.77734375, "learning_rate": 0.00015291900805972222, "loss": 0.8232, "step": 21333 }, { "epoch": 0.5477972977960942, "grad_norm": 0.83203125, "learning_rate": 0.00015291522011710596, "loss": 0.9349, "step": 21334 }, { "epoch": 0.547822974992016, "grad_norm": 0.73046875, "learning_rate": 0.00015291143206903448, "loss": 0.8591, "step": 21335 }, { "epoch": 0.5478486521879378, "grad_norm": 0.73828125, "learning_rate": 0.00015290764391551531, "loss": 0.8059, "step": 21336 }, { "epoch": 0.5478743293838596, "grad_norm": 0.7734375, "learning_rate": 0.000152903855656556, "loss": 0.8559, "step": 21337 }, { "epoch": 0.5479000065797814, "grad_norm": 0.8046875, "learning_rate": 0.000152900067292164, "loss": 0.8417, "step": 21338 }, { "epoch": 0.5479256837757033, "grad_norm": 0.97265625, "learning_rate": 0.00015289627882234701, "loss": 0.9926, "step": 21339 }, { "epoch": 0.5479513609716251, "grad_norm": 0.85546875, "learning_rate": 0.0001528924902471125, "loss": 0.9524, "step": 21340 }, { "epoch": 0.547977038167547, "grad_norm": 0.71875, "learning_rate": 0.00015288870156646804, "loss": 0.7746, "step": 21341 }, { "epoch": 0.5480027153634688, "grad_norm": 0.71484375, "learning_rate": 0.00015288491278042115, "loss": 0.8813, "step": 21342 }, { "epoch": 0.5480283925593905, "grad_norm": 0.74609375, "learning_rate": 0.00015288112388897943, "loss": 0.911, "step": 21343 }, { "epoch": 0.5480540697553123, "grad_norm": 0.890625, "learning_rate": 0.00015287733489215038, "loss": 0.9305, "step": 21344 }, { "epoch": 0.5480797469512342, "grad_norm": 0.74609375, "learning_rate": 0.0001528735457899416, "loss": 0.9039, "step": 21345 }, { "epoch": 0.548105424147156, "grad_norm": 0.7421875, "learning_rate": 0.00015286975658236065, "loss": 0.8211, "step": 21346 }, { "epoch": 0.5481311013430779, "grad_norm": 0.7265625, "learning_rate": 0.000152865967269415, "loss": 0.9608, "step": 21347 }, { "epoch": 0.5481567785389997, "grad_norm": 0.83203125, "learning_rate": 0.00015286217785111226, "loss": 0.9044, "step": 21348 }, { "epoch": 0.5481824557349214, "grad_norm": 0.78515625, "learning_rate": 0.00015285838832746, "loss": 0.9294, "step": 21349 }, { "epoch": 0.5482081329308433, "grad_norm": 0.70703125, "learning_rate": 0.00015285459869846573, "loss": 0.9576, "step": 21350 }, { "epoch": 0.5482338101267651, "grad_norm": 0.74609375, "learning_rate": 0.00015285080896413703, "loss": 0.8435, "step": 21351 }, { "epoch": 0.5482594873226869, "grad_norm": 0.75390625, "learning_rate": 0.00015284701912448144, "loss": 0.7987, "step": 21352 }, { "epoch": 0.5482851645186088, "grad_norm": 0.80078125, "learning_rate": 0.00015284322917950645, "loss": 0.8246, "step": 21353 }, { "epoch": 0.5483108417145306, "grad_norm": 0.7421875, "learning_rate": 0.00015283943912921977, "loss": 0.8726, "step": 21354 }, { "epoch": 0.5483365189104524, "grad_norm": 0.79296875, "learning_rate": 0.0001528356489736288, "loss": 0.9409, "step": 21355 }, { "epoch": 0.5483621961063742, "grad_norm": 0.7265625, "learning_rate": 0.00015283185871274121, "loss": 0.8169, "step": 21356 }, { "epoch": 0.548387873302296, "grad_norm": 0.7890625, "learning_rate": 0.00015282806834656447, "loss": 0.7398, "step": 21357 }, { "epoch": 0.5484135504982178, "grad_norm": 0.80078125, "learning_rate": 0.00015282427787510617, "loss": 1.0191, "step": 21358 }, { "epoch": 0.5484392276941397, "grad_norm": 0.74609375, "learning_rate": 0.00015282048729837384, "loss": 0.7969, "step": 21359 }, { "epoch": 0.5484649048900615, "grad_norm": 0.828125, "learning_rate": 0.00015281669661637505, "loss": 0.9516, "step": 21360 }, { "epoch": 0.5484905820859833, "grad_norm": 0.7578125, "learning_rate": 0.00015281290582911734, "loss": 0.8262, "step": 21361 }, { "epoch": 0.5485162592819052, "grad_norm": 0.80078125, "learning_rate": 0.0001528091149366083, "loss": 0.9221, "step": 21362 }, { "epoch": 0.5485419364778269, "grad_norm": 0.87890625, "learning_rate": 0.00015280532393885547, "loss": 0.9517, "step": 21363 }, { "epoch": 0.5485676136737487, "grad_norm": 0.76171875, "learning_rate": 0.00015280153283586637, "loss": 0.8856, "step": 21364 }, { "epoch": 0.5485932908696706, "grad_norm": 0.80859375, "learning_rate": 0.00015279774162764865, "loss": 1.0219, "step": 21365 }, { "epoch": 0.5486189680655924, "grad_norm": 0.80078125, "learning_rate": 0.00015279395031420973, "loss": 0.9191, "step": 21366 }, { "epoch": 0.5486446452615142, "grad_norm": 0.8046875, "learning_rate": 0.00015279015889555728, "loss": 0.9426, "step": 21367 }, { "epoch": 0.5486703224574361, "grad_norm": 0.75, "learning_rate": 0.0001527863673716988, "loss": 0.9566, "step": 21368 }, { "epoch": 0.5486959996533578, "grad_norm": 0.78515625, "learning_rate": 0.00015278257574264186, "loss": 0.8499, "step": 21369 }, { "epoch": 0.5487216768492796, "grad_norm": 0.78125, "learning_rate": 0.000152778784008394, "loss": 0.8826, "step": 21370 }, { "epoch": 0.5487473540452015, "grad_norm": 0.80859375, "learning_rate": 0.0001527749921689628, "loss": 0.9217, "step": 21371 }, { "epoch": 0.5487730312411233, "grad_norm": 0.75390625, "learning_rate": 0.0001527712002243558, "loss": 0.9436, "step": 21372 }, { "epoch": 0.5487987084370451, "grad_norm": 0.828125, "learning_rate": 0.00015276740817458056, "loss": 0.8021, "step": 21373 }, { "epoch": 0.548824385632967, "grad_norm": 0.78515625, "learning_rate": 0.00015276361601964465, "loss": 1.0251, "step": 21374 }, { "epoch": 0.5488500628288888, "grad_norm": 0.828125, "learning_rate": 0.0001527598237595556, "loss": 0.8214, "step": 21375 }, { "epoch": 0.5488757400248105, "grad_norm": 0.8046875, "learning_rate": 0.00015275603139432105, "loss": 0.9444, "step": 21376 }, { "epoch": 0.5489014172207324, "grad_norm": 0.734375, "learning_rate": 0.00015275223892394842, "loss": 0.9931, "step": 21377 }, { "epoch": 0.5489270944166542, "grad_norm": 0.7890625, "learning_rate": 0.0001527484463484454, "loss": 0.8042, "step": 21378 }, { "epoch": 0.548952771612576, "grad_norm": 0.79296875, "learning_rate": 0.00015274465366781946, "loss": 0.8281, "step": 21379 }, { "epoch": 0.5489784488084979, "grad_norm": 0.73828125, "learning_rate": 0.00015274086088207817, "loss": 0.8102, "step": 21380 }, { "epoch": 0.5490041260044197, "grad_norm": 0.78125, "learning_rate": 0.00015273706799122917, "loss": 0.8004, "step": 21381 }, { "epoch": 0.5490298032003416, "grad_norm": 0.828125, "learning_rate": 0.0001527332749952799, "loss": 0.9062, "step": 21382 }, { "epoch": 0.5490554803962633, "grad_norm": 0.80859375, "learning_rate": 0.00015272948189423802, "loss": 0.932, "step": 21383 }, { "epoch": 0.5490811575921851, "grad_norm": 0.73046875, "learning_rate": 0.00015272568868811103, "loss": 0.892, "step": 21384 }, { "epoch": 0.549106834788107, "grad_norm": 0.7890625, "learning_rate": 0.0001527218953769065, "loss": 0.8745, "step": 21385 }, { "epoch": 0.5491325119840288, "grad_norm": 0.73828125, "learning_rate": 0.00015271810196063198, "loss": 0.85, "step": 21386 }, { "epoch": 0.5491581891799506, "grad_norm": 0.71875, "learning_rate": 0.00015271430843929505, "loss": 0.7984, "step": 21387 }, { "epoch": 0.5491838663758725, "grad_norm": 0.7578125, "learning_rate": 0.0001527105148129033, "loss": 0.8307, "step": 21388 }, { "epoch": 0.5492095435717942, "grad_norm": 0.8828125, "learning_rate": 0.00015270672108146424, "loss": 1.0065, "step": 21389 }, { "epoch": 0.549235220767716, "grad_norm": 0.796875, "learning_rate": 0.0001527029272449854, "loss": 0.9314, "step": 21390 }, { "epoch": 0.5492608979636379, "grad_norm": 0.73828125, "learning_rate": 0.00015269913330347444, "loss": 0.7534, "step": 21391 }, { "epoch": 0.5492865751595597, "grad_norm": 0.7734375, "learning_rate": 0.00015269533925693884, "loss": 0.9356, "step": 21392 }, { "epoch": 0.5493122523554815, "grad_norm": 0.828125, "learning_rate": 0.0001526915451053862, "loss": 1.0246, "step": 21393 }, { "epoch": 0.5493379295514034, "grad_norm": 0.78125, "learning_rate": 0.00015268775084882407, "loss": 0.9079, "step": 21394 }, { "epoch": 0.5493636067473252, "grad_norm": 0.859375, "learning_rate": 0.00015268395648726, "loss": 0.9551, "step": 21395 }, { "epoch": 0.5493892839432469, "grad_norm": 0.83984375, "learning_rate": 0.00015268016202070158, "loss": 0.8084, "step": 21396 }, { "epoch": 0.5494149611391688, "grad_norm": 0.77734375, "learning_rate": 0.00015267636744915636, "loss": 0.9062, "step": 21397 }, { "epoch": 0.5494406383350906, "grad_norm": 0.80859375, "learning_rate": 0.00015267257277263186, "loss": 0.8806, "step": 21398 }, { "epoch": 0.5494663155310124, "grad_norm": 0.7421875, "learning_rate": 0.00015266877799113572, "loss": 0.7679, "step": 21399 }, { "epoch": 0.5494919927269343, "grad_norm": 0.80078125, "learning_rate": 0.00015266498310467544, "loss": 0.8165, "step": 21400 }, { "epoch": 0.5495176699228561, "grad_norm": 0.82421875, "learning_rate": 0.00015266118811325865, "loss": 1.0866, "step": 21401 }, { "epoch": 0.549543347118778, "grad_norm": 0.78515625, "learning_rate": 0.00015265739301689285, "loss": 0.9436, "step": 21402 }, { "epoch": 0.5495690243146997, "grad_norm": 0.75390625, "learning_rate": 0.0001526535978155856, "loss": 0.9523, "step": 21403 }, { "epoch": 0.5495947015106215, "grad_norm": 0.71875, "learning_rate": 0.0001526498025093445, "loss": 0.7862, "step": 21404 }, { "epoch": 0.5496203787065433, "grad_norm": 0.76171875, "learning_rate": 0.0001526460070981771, "loss": 0.926, "step": 21405 }, { "epoch": 0.5496460559024652, "grad_norm": 0.84765625, "learning_rate": 0.0001526422115820909, "loss": 0.8573, "step": 21406 }, { "epoch": 0.549671733098387, "grad_norm": 0.80078125, "learning_rate": 0.00015263841596109364, "loss": 0.9768, "step": 21407 }, { "epoch": 0.5496974102943089, "grad_norm": 0.73046875, "learning_rate": 0.0001526346202351927, "loss": 0.8877, "step": 21408 }, { "epoch": 0.5497230874902306, "grad_norm": 0.859375, "learning_rate": 0.00015263082440439572, "loss": 0.9678, "step": 21409 }, { "epoch": 0.5497487646861524, "grad_norm": 0.765625, "learning_rate": 0.0001526270284687103, "loss": 0.8522, "step": 21410 }, { "epoch": 0.5497744418820742, "grad_norm": 0.6953125, "learning_rate": 0.0001526232324281439, "loss": 0.7648, "step": 21411 }, { "epoch": 0.5498001190779961, "grad_norm": 0.7890625, "learning_rate": 0.00015261943628270422, "loss": 0.9352, "step": 21412 }, { "epoch": 0.5498257962739179, "grad_norm": 0.86328125, "learning_rate": 0.00015261564003239873, "loss": 1.0128, "step": 21413 }, { "epoch": 0.5498514734698398, "grad_norm": 0.7578125, "learning_rate": 0.000152611843677235, "loss": 0.923, "step": 21414 }, { "epoch": 0.5498771506657616, "grad_norm": 0.7890625, "learning_rate": 0.00015260804721722065, "loss": 0.9276, "step": 21415 }, { "epoch": 0.5499028278616833, "grad_norm": 0.91796875, "learning_rate": 0.0001526042506523632, "loss": 0.9488, "step": 21416 }, { "epoch": 0.5499285050576052, "grad_norm": 0.78515625, "learning_rate": 0.00015260045398267025, "loss": 0.8831, "step": 21417 }, { "epoch": 0.549954182253527, "grad_norm": 0.70703125, "learning_rate": 0.0001525966572081493, "loss": 0.8187, "step": 21418 }, { "epoch": 0.5499798594494488, "grad_norm": 0.8515625, "learning_rate": 0.00015259286032880798, "loss": 0.7908, "step": 21419 }, { "epoch": 0.5500055366453707, "grad_norm": 0.84375, "learning_rate": 0.00015258906334465386, "loss": 0.9356, "step": 21420 }, { "epoch": 0.5500312138412925, "grad_norm": 0.78125, "learning_rate": 0.0001525852662556945, "loss": 0.8975, "step": 21421 }, { "epoch": 0.5500568910372143, "grad_norm": 0.77734375, "learning_rate": 0.00015258146906193743, "loss": 0.9572, "step": 21422 }, { "epoch": 0.5500825682331361, "grad_norm": 0.6796875, "learning_rate": 0.00015257767176339023, "loss": 0.7678, "step": 21423 }, { "epoch": 0.5501082454290579, "grad_norm": 0.81640625, "learning_rate": 0.00015257387436006051, "loss": 1.0644, "step": 21424 }, { "epoch": 0.5501339226249797, "grad_norm": 0.79296875, "learning_rate": 0.0001525700768519558, "loss": 0.813, "step": 21425 }, { "epoch": 0.5501595998209016, "grad_norm": 0.8046875, "learning_rate": 0.00015256627923908365, "loss": 0.8082, "step": 21426 }, { "epoch": 0.5501852770168234, "grad_norm": 0.77734375, "learning_rate": 0.00015256248152145167, "loss": 0.8286, "step": 21427 }, { "epoch": 0.5502109542127452, "grad_norm": 0.77734375, "learning_rate": 0.0001525586836990674, "loss": 0.8422, "step": 21428 }, { "epoch": 0.550236631408667, "grad_norm": 0.80078125, "learning_rate": 0.00015255488577193847, "loss": 0.8264, "step": 21429 }, { "epoch": 0.5502623086045888, "grad_norm": 0.7734375, "learning_rate": 0.00015255108774007235, "loss": 0.8, "step": 21430 }, { "epoch": 0.5502879858005106, "grad_norm": 0.78125, "learning_rate": 0.00015254728960347667, "loss": 0.8677, "step": 21431 }, { "epoch": 0.5503136629964325, "grad_norm": 0.76953125, "learning_rate": 0.000152543491362159, "loss": 0.9533, "step": 21432 }, { "epoch": 0.5503393401923543, "grad_norm": 0.8046875, "learning_rate": 0.00015253969301612686, "loss": 0.9116, "step": 21433 }, { "epoch": 0.5503650173882761, "grad_norm": 0.7265625, "learning_rate": 0.00015253589456538791, "loss": 0.8257, "step": 21434 }, { "epoch": 0.550390694584198, "grad_norm": 0.7578125, "learning_rate": 0.00015253209600994965, "loss": 0.8997, "step": 21435 }, { "epoch": 0.5504163717801197, "grad_norm": 0.796875, "learning_rate": 0.00015252829734981965, "loss": 0.9383, "step": 21436 }, { "epoch": 0.5504420489760415, "grad_norm": 0.82421875, "learning_rate": 0.00015252449858500552, "loss": 0.9062, "step": 21437 }, { "epoch": 0.5504677261719634, "grad_norm": 0.83984375, "learning_rate": 0.0001525206997155148, "loss": 0.8802, "step": 21438 }, { "epoch": 0.5504934033678852, "grad_norm": 0.83203125, "learning_rate": 0.00015251690074135507, "loss": 0.986, "step": 21439 }, { "epoch": 0.550519080563807, "grad_norm": 0.765625, "learning_rate": 0.0001525131016625339, "loss": 0.7525, "step": 21440 }, { "epoch": 0.5505447577597289, "grad_norm": 0.8125, "learning_rate": 0.00015250930247905884, "loss": 0.9418, "step": 21441 }, { "epoch": 0.5505704349556507, "grad_norm": 0.75390625, "learning_rate": 0.00015250550319093753, "loss": 0.8519, "step": 21442 }, { "epoch": 0.5505961121515724, "grad_norm": 0.7265625, "learning_rate": 0.00015250170379817746, "loss": 0.7391, "step": 21443 }, { "epoch": 0.5506217893474943, "grad_norm": 0.7578125, "learning_rate": 0.00015249790430078624, "loss": 0.8725, "step": 21444 }, { "epoch": 0.5506474665434161, "grad_norm": 0.72265625, "learning_rate": 0.00015249410469877144, "loss": 0.838, "step": 21445 }, { "epoch": 0.550673143739338, "grad_norm": 0.765625, "learning_rate": 0.00015249030499214062, "loss": 0.8236, "step": 21446 }, { "epoch": 0.5506988209352598, "grad_norm": 0.75, "learning_rate": 0.00015248650518090141, "loss": 1.0406, "step": 21447 }, { "epoch": 0.5507244981311816, "grad_norm": 0.73828125, "learning_rate": 0.0001524827052650613, "loss": 0.9198, "step": 21448 }, { "epoch": 0.5507501753271034, "grad_norm": 0.7890625, "learning_rate": 0.0001524789052446279, "loss": 0.8681, "step": 21449 }, { "epoch": 0.5507758525230252, "grad_norm": 0.83203125, "learning_rate": 0.0001524751051196088, "loss": 1.0472, "step": 21450 }, { "epoch": 0.550801529718947, "grad_norm": 0.79296875, "learning_rate": 0.00015247130489001154, "loss": 0.8675, "step": 21451 }, { "epoch": 0.5508272069148689, "grad_norm": 0.8359375, "learning_rate": 0.00015246750455584374, "loss": 0.9721, "step": 21452 }, { "epoch": 0.5508528841107907, "grad_norm": 0.79296875, "learning_rate": 0.0001524637041171129, "loss": 0.8557, "step": 21453 }, { "epoch": 0.5508785613067125, "grad_norm": 0.7421875, "learning_rate": 0.00015245990357382665, "loss": 0.8701, "step": 21454 }, { "epoch": 0.5509042385026344, "grad_norm": 0.7890625, "learning_rate": 0.00015245610292599255, "loss": 0.9247, "step": 21455 }, { "epoch": 0.5509299156985561, "grad_norm": 0.7890625, "learning_rate": 0.00015245230217361816, "loss": 0.9999, "step": 21456 }, { "epoch": 0.5509555928944779, "grad_norm": 0.828125, "learning_rate": 0.0001524485013167111, "loss": 0.8932, "step": 21457 }, { "epoch": 0.5509812700903998, "grad_norm": 0.7890625, "learning_rate": 0.0001524447003552789, "loss": 0.9039, "step": 21458 }, { "epoch": 0.5510069472863216, "grad_norm": 0.79296875, "learning_rate": 0.00015244089928932914, "loss": 0.9333, "step": 21459 }, { "epoch": 0.5510326244822434, "grad_norm": 0.828125, "learning_rate": 0.00015243709811886942, "loss": 1.0235, "step": 21460 }, { "epoch": 0.5510583016781653, "grad_norm": 0.7890625, "learning_rate": 0.00015243329684390733, "loss": 0.8155, "step": 21461 }, { "epoch": 0.5510839788740871, "grad_norm": 0.71875, "learning_rate": 0.00015242949546445038, "loss": 0.9282, "step": 21462 }, { "epoch": 0.5511096560700088, "grad_norm": 0.68359375, "learning_rate": 0.00015242569398050618, "loss": 0.7498, "step": 21463 }, { "epoch": 0.5511353332659307, "grad_norm": 0.765625, "learning_rate": 0.00015242189239208235, "loss": 0.8109, "step": 21464 }, { "epoch": 0.5511610104618525, "grad_norm": 0.78125, "learning_rate": 0.00015241809069918634, "loss": 0.8797, "step": 21465 }, { "epoch": 0.5511866876577743, "grad_norm": 0.76171875, "learning_rate": 0.00015241428890182585, "loss": 0.8556, "step": 21466 }, { "epoch": 0.5512123648536962, "grad_norm": 0.7421875, "learning_rate": 0.00015241048700000842, "loss": 0.8861, "step": 21467 }, { "epoch": 0.551238042049618, "grad_norm": 0.828125, "learning_rate": 0.00015240668499374164, "loss": 0.8371, "step": 21468 }, { "epoch": 0.5512637192455397, "grad_norm": 0.8046875, "learning_rate": 0.00015240288288303306, "loss": 0.8539, "step": 21469 }, { "epoch": 0.5512893964414616, "grad_norm": 1.0625, "learning_rate": 0.00015239908066789026, "loss": 0.9889, "step": 21470 }, { "epoch": 0.5513150736373834, "grad_norm": 0.7578125, "learning_rate": 0.00015239527834832084, "loss": 0.8675, "step": 21471 }, { "epoch": 0.5513407508333052, "grad_norm": 0.80859375, "learning_rate": 0.00015239147592433235, "loss": 0.8499, "step": 21472 }, { "epoch": 0.5513664280292271, "grad_norm": 0.8046875, "learning_rate": 0.00015238767339593236, "loss": 0.8411, "step": 21473 }, { "epoch": 0.5513921052251489, "grad_norm": 0.76953125, "learning_rate": 0.0001523838707631285, "loss": 0.8881, "step": 21474 }, { "epoch": 0.5514177824210708, "grad_norm": 0.765625, "learning_rate": 0.00015238006802592832, "loss": 0.8089, "step": 21475 }, { "epoch": 0.5514434596169925, "grad_norm": 0.76171875, "learning_rate": 0.0001523762651843394, "loss": 0.8275, "step": 21476 }, { "epoch": 0.5514691368129143, "grad_norm": 0.73828125, "learning_rate": 0.00015237246223836928, "loss": 0.9335, "step": 21477 }, { "epoch": 0.5514948140088362, "grad_norm": 0.83203125, "learning_rate": 0.00015236865918802562, "loss": 0.9231, "step": 21478 }, { "epoch": 0.551520491204758, "grad_norm": 0.7890625, "learning_rate": 0.00015236485603331592, "loss": 0.7561, "step": 21479 }, { "epoch": 0.5515461684006798, "grad_norm": 0.8125, "learning_rate": 0.00015236105277424778, "loss": 0.9833, "step": 21480 }, { "epoch": 0.5515718455966017, "grad_norm": 0.765625, "learning_rate": 0.00015235724941082884, "loss": 0.7986, "step": 21481 }, { "epoch": 0.5515975227925234, "grad_norm": 0.765625, "learning_rate": 0.0001523534459430666, "loss": 0.9226, "step": 21482 }, { "epoch": 0.5516231999884452, "grad_norm": 0.7578125, "learning_rate": 0.00015234964237096865, "loss": 1.0725, "step": 21483 }, { "epoch": 0.5516488771843671, "grad_norm": 0.7421875, "learning_rate": 0.00015234583869454263, "loss": 0.8001, "step": 21484 }, { "epoch": 0.5516745543802889, "grad_norm": 0.81640625, "learning_rate": 0.00015234203491379606, "loss": 0.9251, "step": 21485 }, { "epoch": 0.5517002315762107, "grad_norm": 0.859375, "learning_rate": 0.00015233823102873655, "loss": 0.9787, "step": 21486 }, { "epoch": 0.5517259087721326, "grad_norm": 0.82421875, "learning_rate": 0.00015233442703937167, "loss": 0.8568, "step": 21487 }, { "epoch": 0.5517515859680544, "grad_norm": 0.75390625, "learning_rate": 0.00015233062294570901, "loss": 0.8902, "step": 21488 }, { "epoch": 0.5517772631639761, "grad_norm": 0.8203125, "learning_rate": 0.00015232681874775613, "loss": 0.8986, "step": 21489 }, { "epoch": 0.551802940359898, "grad_norm": 0.8359375, "learning_rate": 0.00015232301444552065, "loss": 0.793, "step": 21490 }, { "epoch": 0.5518286175558198, "grad_norm": 0.83984375, "learning_rate": 0.0001523192100390101, "loss": 0.9467, "step": 21491 }, { "epoch": 0.5518542947517416, "grad_norm": 0.75, "learning_rate": 0.00015231540552823212, "loss": 0.8866, "step": 21492 }, { "epoch": 0.5518799719476635, "grad_norm": 0.78515625, "learning_rate": 0.00015231160091319426, "loss": 0.8459, "step": 21493 }, { "epoch": 0.5519056491435853, "grad_norm": 0.765625, "learning_rate": 0.0001523077961939041, "loss": 0.8668, "step": 21494 }, { "epoch": 0.5519313263395071, "grad_norm": 0.8828125, "learning_rate": 0.00015230399137036921, "loss": 0.9685, "step": 21495 }, { "epoch": 0.5519570035354289, "grad_norm": 0.828125, "learning_rate": 0.00015230018644259722, "loss": 0.8527, "step": 21496 }, { "epoch": 0.5519826807313507, "grad_norm": 0.7421875, "learning_rate": 0.00015229638141059563, "loss": 0.8799, "step": 21497 }, { "epoch": 0.5520083579272725, "grad_norm": 0.83984375, "learning_rate": 0.00015229257627437213, "loss": 0.8109, "step": 21498 }, { "epoch": 0.5520340351231944, "grad_norm": 0.859375, "learning_rate": 0.00015228877103393423, "loss": 0.8373, "step": 21499 }, { "epoch": 0.5520597123191162, "grad_norm": 0.72265625, "learning_rate": 0.00015228496568928953, "loss": 0.8122, "step": 21500 }, { "epoch": 0.552085389515038, "grad_norm": 0.76953125, "learning_rate": 0.00015228116024044563, "loss": 0.939, "step": 21501 }, { "epoch": 0.5521110667109598, "grad_norm": 0.8125, "learning_rate": 0.00015227735468741006, "loss": 0.8848, "step": 21502 }, { "epoch": 0.5521367439068816, "grad_norm": 0.78125, "learning_rate": 0.00015227354903019048, "loss": 0.9517, "step": 21503 }, { "epoch": 0.5521624211028034, "grad_norm": 0.73828125, "learning_rate": 0.00015226974326879443, "loss": 0.6768, "step": 21504 }, { "epoch": 0.5521880982987253, "grad_norm": 0.78125, "learning_rate": 0.0001522659374032295, "loss": 0.8191, "step": 21505 }, { "epoch": 0.5522137754946471, "grad_norm": 0.73046875, "learning_rate": 0.0001522621314335033, "loss": 0.9008, "step": 21506 }, { "epoch": 0.552239452690569, "grad_norm": 0.6796875, "learning_rate": 0.00015225832535962334, "loss": 0.7745, "step": 21507 }, { "epoch": 0.5522651298864908, "grad_norm": 0.7734375, "learning_rate": 0.00015225451918159734, "loss": 0.8999, "step": 21508 }, { "epoch": 0.5522908070824125, "grad_norm": 0.73046875, "learning_rate": 0.00015225071289943275, "loss": 0.8372, "step": 21509 }, { "epoch": 0.5523164842783344, "grad_norm": 0.76171875, "learning_rate": 0.0001522469065131372, "loss": 0.8839, "step": 21510 }, { "epoch": 0.5523421614742562, "grad_norm": 0.80078125, "learning_rate": 0.0001522431000227183, "loss": 0.8636, "step": 21511 }, { "epoch": 0.552367838670178, "grad_norm": 0.76953125, "learning_rate": 0.00015223929342818364, "loss": 0.8902, "step": 21512 }, { "epoch": 0.5523935158660999, "grad_norm": 0.71875, "learning_rate": 0.00015223548672954075, "loss": 0.7035, "step": 21513 }, { "epoch": 0.5524191930620217, "grad_norm": 0.74609375, "learning_rate": 0.0001522316799267973, "loss": 0.9007, "step": 21514 }, { "epoch": 0.5524448702579435, "grad_norm": 0.78125, "learning_rate": 0.0001522278730199608, "loss": 0.8482, "step": 21515 }, { "epoch": 0.5524705474538653, "grad_norm": 0.796875, "learning_rate": 0.00015222406600903888, "loss": 0.9166, "step": 21516 }, { "epoch": 0.5524962246497871, "grad_norm": 0.76953125, "learning_rate": 0.00015222025889403908, "loss": 0.8869, "step": 21517 }, { "epoch": 0.5525219018457089, "grad_norm": 0.76953125, "learning_rate": 0.00015221645167496904, "loss": 0.883, "step": 21518 }, { "epoch": 0.5525475790416308, "grad_norm": 0.7421875, "learning_rate": 0.00015221264435183633, "loss": 1.0411, "step": 21519 }, { "epoch": 0.5525732562375526, "grad_norm": 0.71875, "learning_rate": 0.00015220883692464854, "loss": 0.9861, "step": 21520 }, { "epoch": 0.5525989334334744, "grad_norm": 0.74609375, "learning_rate": 0.00015220502939341325, "loss": 0.8554, "step": 21521 }, { "epoch": 0.5526246106293962, "grad_norm": 0.8046875, "learning_rate": 0.00015220122175813806, "loss": 0.8728, "step": 21522 }, { "epoch": 0.552650287825318, "grad_norm": 0.8125, "learning_rate": 0.00015219741401883052, "loss": 0.8801, "step": 21523 }, { "epoch": 0.5526759650212398, "grad_norm": 0.8046875, "learning_rate": 0.0001521936061754983, "loss": 0.9331, "step": 21524 }, { "epoch": 0.5527016422171617, "grad_norm": 0.9609375, "learning_rate": 0.00015218979822814892, "loss": 0.8692, "step": 21525 }, { "epoch": 0.5527273194130835, "grad_norm": 0.875, "learning_rate": 0.00015218599017678995, "loss": 0.9058, "step": 21526 }, { "epoch": 0.5527529966090053, "grad_norm": 0.7578125, "learning_rate": 0.00015218218202142907, "loss": 0.8059, "step": 21527 }, { "epoch": 0.5527786738049272, "grad_norm": 0.8203125, "learning_rate": 0.00015217837376207375, "loss": 0.9862, "step": 21528 }, { "epoch": 0.5528043510008489, "grad_norm": 0.828125, "learning_rate": 0.0001521745653987317, "loss": 0.8952, "step": 21529 }, { "epoch": 0.5528300281967707, "grad_norm": 0.7734375, "learning_rate": 0.00015217075693141046, "loss": 0.9131, "step": 21530 }, { "epoch": 0.5528557053926926, "grad_norm": 0.91015625, "learning_rate": 0.00015216694836011756, "loss": 0.9277, "step": 21531 }, { "epoch": 0.5528813825886144, "grad_norm": 0.74609375, "learning_rate": 0.00015216313968486066, "loss": 0.92, "step": 21532 }, { "epoch": 0.5529070597845362, "grad_norm": 0.73828125, "learning_rate": 0.00015215933090564736, "loss": 0.8916, "step": 21533 }, { "epoch": 0.5529327369804581, "grad_norm": 0.765625, "learning_rate": 0.00015215552202248525, "loss": 0.8576, "step": 21534 }, { "epoch": 0.5529584141763799, "grad_norm": 0.859375, "learning_rate": 0.00015215171303538182, "loss": 0.9678, "step": 21535 }, { "epoch": 0.5529840913723016, "grad_norm": 0.75, "learning_rate": 0.0001521479039443448, "loss": 0.8853, "step": 21536 }, { "epoch": 0.5530097685682235, "grad_norm": 0.8046875, "learning_rate": 0.0001521440947493817, "loss": 0.905, "step": 21537 }, { "epoch": 0.5530354457641453, "grad_norm": 0.72265625, "learning_rate": 0.00015214028545050011, "loss": 0.857, "step": 21538 }, { "epoch": 0.5530611229600672, "grad_norm": 0.73828125, "learning_rate": 0.00015213647604770764, "loss": 0.8802, "step": 21539 }, { "epoch": 0.553086800155989, "grad_norm": 0.7734375, "learning_rate": 0.00015213266654101192, "loss": 0.8724, "step": 21540 }, { "epoch": 0.5531124773519108, "grad_norm": 0.79296875, "learning_rate": 0.0001521288569304205, "loss": 0.8258, "step": 21541 }, { "epoch": 0.5531381545478326, "grad_norm": 0.71484375, "learning_rate": 0.00015212504721594093, "loss": 0.7669, "step": 21542 }, { "epoch": 0.5531638317437544, "grad_norm": 0.88671875, "learning_rate": 0.00015212123739758088, "loss": 0.8712, "step": 21543 }, { "epoch": 0.5531895089396762, "grad_norm": 0.76171875, "learning_rate": 0.0001521174274753479, "loss": 0.8663, "step": 21544 }, { "epoch": 0.5532151861355981, "grad_norm": 0.78515625, "learning_rate": 0.0001521136174492496, "loss": 0.8702, "step": 21545 }, { "epoch": 0.5532408633315199, "grad_norm": 0.796875, "learning_rate": 0.00015210980731929357, "loss": 0.9372, "step": 21546 }, { "epoch": 0.5532665405274417, "grad_norm": 0.828125, "learning_rate": 0.0001521059970854874, "loss": 0.8281, "step": 21547 }, { "epoch": 0.5532922177233636, "grad_norm": 0.8125, "learning_rate": 0.00015210218674783873, "loss": 0.7993, "step": 21548 }, { "epoch": 0.5533178949192853, "grad_norm": 0.8125, "learning_rate": 0.00015209837630635503, "loss": 0.8192, "step": 21549 }, { "epoch": 0.5533435721152071, "grad_norm": 0.7734375, "learning_rate": 0.00015209456576104402, "loss": 0.8345, "step": 21550 }, { "epoch": 0.553369249311129, "grad_norm": 0.80078125, "learning_rate": 0.00015209075511191325, "loss": 0.8663, "step": 21551 }, { "epoch": 0.5533949265070508, "grad_norm": 1.0859375, "learning_rate": 0.0001520869443589703, "loss": 0.8228, "step": 21552 }, { "epoch": 0.5534206037029726, "grad_norm": 0.78125, "learning_rate": 0.00015208313350222275, "loss": 0.881, "step": 21553 }, { "epoch": 0.5534462808988945, "grad_norm": 0.83984375, "learning_rate": 0.00015207932254167824, "loss": 0.9609, "step": 21554 }, { "epoch": 0.5534719580948163, "grad_norm": 0.875, "learning_rate": 0.00015207551147734436, "loss": 0.9009, "step": 21555 }, { "epoch": 0.553497635290738, "grad_norm": 0.8046875, "learning_rate": 0.00015207170030922867, "loss": 0.9749, "step": 21556 }, { "epoch": 0.5535233124866599, "grad_norm": 0.75, "learning_rate": 0.00015206788903733876, "loss": 0.8961, "step": 21557 }, { "epoch": 0.5535489896825817, "grad_norm": 0.8203125, "learning_rate": 0.0001520640776616823, "loss": 0.9246, "step": 21558 }, { "epoch": 0.5535746668785035, "grad_norm": 0.8125, "learning_rate": 0.0001520602661822668, "loss": 0.9438, "step": 21559 }, { "epoch": 0.5536003440744254, "grad_norm": 0.72265625, "learning_rate": 0.00015205645459909992, "loss": 0.8403, "step": 21560 }, { "epoch": 0.5536260212703472, "grad_norm": 0.76171875, "learning_rate": 0.0001520526429121892, "loss": 0.8991, "step": 21561 }, { "epoch": 0.5536516984662689, "grad_norm": 0.7734375, "learning_rate": 0.0001520488311215423, "loss": 0.9643, "step": 21562 }, { "epoch": 0.5536773756621908, "grad_norm": 0.81640625, "learning_rate": 0.00015204501922716675, "loss": 0.9106, "step": 21563 }, { "epoch": 0.5537030528581126, "grad_norm": 0.7421875, "learning_rate": 0.00015204120722907017, "loss": 0.9326, "step": 21564 }, { "epoch": 0.5537287300540344, "grad_norm": 0.82421875, "learning_rate": 0.0001520373951272602, "loss": 0.8437, "step": 21565 }, { "epoch": 0.5537544072499563, "grad_norm": 0.76953125, "learning_rate": 0.0001520335829217444, "loss": 0.8023, "step": 21566 }, { "epoch": 0.5537800844458781, "grad_norm": 0.78125, "learning_rate": 0.00015202977061253036, "loss": 0.8686, "step": 21567 }, { "epoch": 0.5538057616418, "grad_norm": 0.80859375, "learning_rate": 0.00015202595819962565, "loss": 0.9253, "step": 21568 }, { "epoch": 0.5538314388377217, "grad_norm": 0.76953125, "learning_rate": 0.00015202214568303796, "loss": 0.774, "step": 21569 }, { "epoch": 0.5538571160336435, "grad_norm": 0.80078125, "learning_rate": 0.0001520183330627748, "loss": 0.8016, "step": 21570 }, { "epoch": 0.5538827932295654, "grad_norm": 0.78125, "learning_rate": 0.0001520145203388438, "loss": 0.8217, "step": 21571 }, { "epoch": 0.5539084704254872, "grad_norm": 0.8203125, "learning_rate": 0.0001520107075112526, "loss": 0.7748, "step": 21572 }, { "epoch": 0.553934147621409, "grad_norm": 0.7734375, "learning_rate": 0.00015200689458000872, "loss": 0.9216, "step": 21573 }, { "epoch": 0.5539598248173309, "grad_norm": 0.95703125, "learning_rate": 0.0001520030815451198, "loss": 0.8015, "step": 21574 }, { "epoch": 0.5539855020132527, "grad_norm": 0.890625, "learning_rate": 0.00015199926840659344, "loss": 0.8925, "step": 21575 }, { "epoch": 0.5540111792091744, "grad_norm": 0.765625, "learning_rate": 0.00015199545516443724, "loss": 0.9824, "step": 21576 }, { "epoch": 0.5540368564050963, "grad_norm": 0.71484375, "learning_rate": 0.0001519916418186588, "loss": 0.9425, "step": 21577 }, { "epoch": 0.5540625336010181, "grad_norm": 0.7578125, "learning_rate": 0.00015198782836926567, "loss": 0.9527, "step": 21578 }, { "epoch": 0.5540882107969399, "grad_norm": 0.82421875, "learning_rate": 0.00015198401481626554, "loss": 0.8795, "step": 21579 }, { "epoch": 0.5541138879928618, "grad_norm": 0.78515625, "learning_rate": 0.00015198020115966598, "loss": 0.7559, "step": 21580 }, { "epoch": 0.5541395651887836, "grad_norm": 0.8203125, "learning_rate": 0.00015197638739947452, "loss": 0.9592, "step": 21581 }, { "epoch": 0.5541652423847053, "grad_norm": 0.76953125, "learning_rate": 0.00015197257353569886, "loss": 0.7497, "step": 21582 }, { "epoch": 0.5541909195806272, "grad_norm": 0.8359375, "learning_rate": 0.00015196875956834655, "loss": 0.9721, "step": 21583 }, { "epoch": 0.554216596776549, "grad_norm": 0.73828125, "learning_rate": 0.00015196494549742516, "loss": 0.8821, "step": 21584 }, { "epoch": 0.5542422739724708, "grad_norm": 0.80078125, "learning_rate": 0.00015196113132294237, "loss": 0.8736, "step": 21585 }, { "epoch": 0.5542679511683927, "grad_norm": 0.80078125, "learning_rate": 0.0001519573170449057, "loss": 0.8581, "step": 21586 }, { "epoch": 0.5542936283643145, "grad_norm": 0.76953125, "learning_rate": 0.00015195350266332283, "loss": 0.9626, "step": 21587 }, { "epoch": 0.5543193055602363, "grad_norm": 0.796875, "learning_rate": 0.0001519496881782013, "loss": 0.9283, "step": 21588 }, { "epoch": 0.5543449827561581, "grad_norm": 1.65625, "learning_rate": 0.00015194587358954875, "loss": 0.8632, "step": 21589 }, { "epoch": 0.5543706599520799, "grad_norm": 0.75, "learning_rate": 0.00015194205889737275, "loss": 0.868, "step": 21590 }, { "epoch": 0.5543963371480017, "grad_norm": 0.74609375, "learning_rate": 0.0001519382441016809, "loss": 0.7988, "step": 21591 }, { "epoch": 0.5544220143439236, "grad_norm": 0.73046875, "learning_rate": 0.00015193442920248085, "loss": 0.8434, "step": 21592 }, { "epoch": 0.5544476915398454, "grad_norm": 0.7890625, "learning_rate": 0.0001519306141997802, "loss": 0.8094, "step": 21593 }, { "epoch": 0.5544733687357672, "grad_norm": 0.7421875, "learning_rate": 0.00015192679909358648, "loss": 0.7353, "step": 21594 }, { "epoch": 0.5544990459316891, "grad_norm": 0.796875, "learning_rate": 0.00015192298388390734, "loss": 0.863, "step": 21595 }, { "epoch": 0.5545247231276108, "grad_norm": 0.78125, "learning_rate": 0.00015191916857075038, "loss": 0.802, "step": 21596 }, { "epoch": 0.5545504003235326, "grad_norm": 0.7734375, "learning_rate": 0.00015191535315412327, "loss": 0.9707, "step": 21597 }, { "epoch": 0.5545760775194545, "grad_norm": 0.74609375, "learning_rate": 0.00015191153763403347, "loss": 0.9006, "step": 21598 }, { "epoch": 0.5546017547153763, "grad_norm": 0.75, "learning_rate": 0.0001519077220104887, "loss": 0.6936, "step": 21599 }, { "epoch": 0.5546274319112982, "grad_norm": 0.7578125, "learning_rate": 0.00015190390628349654, "loss": 0.9956, "step": 21600 }, { "epoch": 0.55465310910722, "grad_norm": 0.6953125, "learning_rate": 0.00015190009045306456, "loss": 0.9802, "step": 21601 }, { "epoch": 0.5546787863031417, "grad_norm": 0.81640625, "learning_rate": 0.00015189627451920042, "loss": 0.8254, "step": 21602 }, { "epoch": 0.5547044634990635, "grad_norm": 0.75390625, "learning_rate": 0.00015189245848191166, "loss": 0.7907, "step": 21603 }, { "epoch": 0.5547301406949854, "grad_norm": 0.78125, "learning_rate": 0.00015188864234120592, "loss": 0.848, "step": 21604 }, { "epoch": 0.5547558178909072, "grad_norm": 0.7421875, "learning_rate": 0.00015188482609709082, "loss": 0.9417, "step": 21605 }, { "epoch": 0.5547814950868291, "grad_norm": 0.7578125, "learning_rate": 0.00015188100974957393, "loss": 0.8847, "step": 21606 }, { "epoch": 0.5548071722827509, "grad_norm": 0.77734375, "learning_rate": 0.0001518771932986629, "loss": 0.8683, "step": 21607 }, { "epoch": 0.5548328494786727, "grad_norm": 0.82421875, "learning_rate": 0.0001518733767443653, "loss": 0.8472, "step": 21608 }, { "epoch": 0.5548585266745945, "grad_norm": 0.7890625, "learning_rate": 0.00015186956008668873, "loss": 0.8813, "step": 21609 }, { "epoch": 0.5548842038705163, "grad_norm": 0.79296875, "learning_rate": 0.00015186574332564084, "loss": 0.9647, "step": 21610 }, { "epoch": 0.5549098810664381, "grad_norm": 0.78515625, "learning_rate": 0.00015186192646122917, "loss": 0.7904, "step": 21611 }, { "epoch": 0.55493555826236, "grad_norm": 0.78125, "learning_rate": 0.00015185810949346138, "loss": 0.6925, "step": 21612 }, { "epoch": 0.5549612354582818, "grad_norm": 0.828125, "learning_rate": 0.0001518542924223451, "loss": 1.0157, "step": 21613 }, { "epoch": 0.5549869126542036, "grad_norm": 0.8203125, "learning_rate": 0.00015185047524788784, "loss": 0.9518, "step": 21614 }, { "epoch": 0.5550125898501255, "grad_norm": 0.8125, "learning_rate": 0.00015184665797009732, "loss": 0.8432, "step": 21615 }, { "epoch": 0.5550382670460472, "grad_norm": 0.76171875, "learning_rate": 0.00015184284058898103, "loss": 0.7999, "step": 21616 }, { "epoch": 0.555063944241969, "grad_norm": 0.76953125, "learning_rate": 0.0001518390231045467, "loss": 0.7813, "step": 21617 }, { "epoch": 0.5550896214378909, "grad_norm": 0.703125, "learning_rate": 0.00015183520551680186, "loss": 0.782, "step": 21618 }, { "epoch": 0.5551152986338127, "grad_norm": 0.72265625, "learning_rate": 0.00015183138782575415, "loss": 0.7184, "step": 21619 }, { "epoch": 0.5551409758297345, "grad_norm": 0.8359375, "learning_rate": 0.00015182757003141115, "loss": 0.9459, "step": 21620 }, { "epoch": 0.5551666530256564, "grad_norm": 0.828125, "learning_rate": 0.00015182375213378045, "loss": 0.9671, "step": 21621 }, { "epoch": 0.5551923302215781, "grad_norm": 0.796875, "learning_rate": 0.00015181993413286975, "loss": 0.8764, "step": 21622 }, { "epoch": 0.5552180074174999, "grad_norm": 0.7890625, "learning_rate": 0.0001518161160286866, "loss": 0.8126, "step": 21623 }, { "epoch": 0.5552436846134218, "grad_norm": 0.7421875, "learning_rate": 0.0001518122978212386, "loss": 0.7565, "step": 21624 }, { "epoch": 0.5552693618093436, "grad_norm": 0.765625, "learning_rate": 0.00015180847951053335, "loss": 0.9081, "step": 21625 }, { "epoch": 0.5552950390052654, "grad_norm": 0.76171875, "learning_rate": 0.0001518046610965785, "loss": 0.8065, "step": 21626 }, { "epoch": 0.5553207162011873, "grad_norm": 0.796875, "learning_rate": 0.00015180084257938162, "loss": 0.8276, "step": 21627 }, { "epoch": 0.5553463933971091, "grad_norm": 0.7109375, "learning_rate": 0.00015179702395895037, "loss": 0.8144, "step": 21628 }, { "epoch": 0.5553720705930308, "grad_norm": 0.828125, "learning_rate": 0.0001517932052352923, "loss": 0.8888, "step": 21629 }, { "epoch": 0.5553977477889527, "grad_norm": 0.74609375, "learning_rate": 0.00015178938640841508, "loss": 0.846, "step": 21630 }, { "epoch": 0.5554234249848745, "grad_norm": 0.8046875, "learning_rate": 0.00015178556747832625, "loss": 0.9029, "step": 21631 }, { "epoch": 0.5554491021807964, "grad_norm": 0.75, "learning_rate": 0.0001517817484450335, "loss": 0.871, "step": 21632 }, { "epoch": 0.5554747793767182, "grad_norm": 0.79296875, "learning_rate": 0.0001517779293085444, "loss": 0.8974, "step": 21633 }, { "epoch": 0.55550045657264, "grad_norm": 0.81640625, "learning_rate": 0.00015177411006886657, "loss": 0.8299, "step": 21634 }, { "epoch": 0.5555261337685619, "grad_norm": 0.8515625, "learning_rate": 0.0001517702907260076, "loss": 0.9006, "step": 21635 }, { "epoch": 0.5555518109644836, "grad_norm": 1.0234375, "learning_rate": 0.0001517664712799751, "loss": 0.9948, "step": 21636 }, { "epoch": 0.5555774881604054, "grad_norm": 0.79296875, "learning_rate": 0.00015176265173077672, "loss": 0.8831, "step": 21637 }, { "epoch": 0.5556031653563273, "grad_norm": 0.765625, "learning_rate": 0.00015175883207842002, "loss": 0.8605, "step": 21638 }, { "epoch": 0.5556288425522491, "grad_norm": 0.828125, "learning_rate": 0.00015175501232291268, "loss": 1.1522, "step": 21639 }, { "epoch": 0.5556545197481709, "grad_norm": 0.73828125, "learning_rate": 0.0001517511924642623, "loss": 0.8043, "step": 21640 }, { "epoch": 0.5556801969440928, "grad_norm": 0.81640625, "learning_rate": 0.0001517473725024764, "loss": 1.0183, "step": 21641 }, { "epoch": 0.5557058741400145, "grad_norm": 0.84765625, "learning_rate": 0.0001517435524375627, "loss": 0.9088, "step": 21642 }, { "epoch": 0.5557315513359363, "grad_norm": 0.76953125, "learning_rate": 0.00015173973226952878, "loss": 0.8781, "step": 21643 }, { "epoch": 0.5557572285318582, "grad_norm": 0.8515625, "learning_rate": 0.00015173591199838223, "loss": 0.8538, "step": 21644 }, { "epoch": 0.55578290572778, "grad_norm": 0.7265625, "learning_rate": 0.0001517320916241307, "loss": 0.8839, "step": 21645 }, { "epoch": 0.5558085829237018, "grad_norm": 0.765625, "learning_rate": 0.00015172827114678177, "loss": 0.8037, "step": 21646 }, { "epoch": 0.5558342601196237, "grad_norm": 0.75390625, "learning_rate": 0.00015172445056634305, "loss": 0.8634, "step": 21647 }, { "epoch": 0.5558599373155455, "grad_norm": 0.71875, "learning_rate": 0.0001517206298828222, "loss": 0.7706, "step": 21648 }, { "epoch": 0.5558856145114672, "grad_norm": 0.8125, "learning_rate": 0.0001517168090962268, "loss": 0.9203, "step": 21649 }, { "epoch": 0.5559112917073891, "grad_norm": 0.80078125, "learning_rate": 0.00015171298820656448, "loss": 0.8726, "step": 21650 }, { "epoch": 0.5559369689033109, "grad_norm": 0.80078125, "learning_rate": 0.0001517091672138428, "loss": 0.8598, "step": 21651 }, { "epoch": 0.5559626460992327, "grad_norm": 0.74609375, "learning_rate": 0.00015170534611806948, "loss": 0.8486, "step": 21652 }, { "epoch": 0.5559883232951546, "grad_norm": 0.8828125, "learning_rate": 0.00015170152491925202, "loss": 0.8748, "step": 21653 }, { "epoch": 0.5560140004910764, "grad_norm": 0.75390625, "learning_rate": 0.00015169770361739816, "loss": 0.8613, "step": 21654 }, { "epoch": 0.5560396776869982, "grad_norm": 0.828125, "learning_rate": 0.0001516938822125154, "loss": 0.9499, "step": 21655 }, { "epoch": 0.55606535488292, "grad_norm": 0.77734375, "learning_rate": 0.0001516900607046114, "loss": 0.8181, "step": 21656 }, { "epoch": 0.5560910320788418, "grad_norm": 0.875, "learning_rate": 0.00015168623909369376, "loss": 0.9045, "step": 21657 }, { "epoch": 0.5561167092747636, "grad_norm": 0.85546875, "learning_rate": 0.0001516824173797702, "loss": 1.0133, "step": 21658 }, { "epoch": 0.5561423864706855, "grad_norm": 0.74609375, "learning_rate": 0.00015167859556284818, "loss": 0.7366, "step": 21659 }, { "epoch": 0.5561680636666073, "grad_norm": 0.75390625, "learning_rate": 0.0001516747736429354, "loss": 0.8139, "step": 21660 }, { "epoch": 0.5561937408625292, "grad_norm": 0.6796875, "learning_rate": 0.00015167095162003946, "loss": 0.6514, "step": 21661 }, { "epoch": 0.5562194180584509, "grad_norm": 0.73828125, "learning_rate": 0.00015166712949416796, "loss": 0.776, "step": 21662 }, { "epoch": 0.5562450952543727, "grad_norm": 0.72265625, "learning_rate": 0.0001516633072653286, "loss": 0.8507, "step": 21663 }, { "epoch": 0.5562707724502945, "grad_norm": 0.72265625, "learning_rate": 0.00015165948493352886, "loss": 0.7641, "step": 21664 }, { "epoch": 0.5562964496462164, "grad_norm": 0.83203125, "learning_rate": 0.00015165566249877648, "loss": 0.8091, "step": 21665 }, { "epoch": 0.5563221268421382, "grad_norm": 0.82421875, "learning_rate": 0.00015165183996107903, "loss": 0.954, "step": 21666 }, { "epoch": 0.5563478040380601, "grad_norm": 0.81640625, "learning_rate": 0.00015164801732044412, "loss": 0.9911, "step": 21667 }, { "epoch": 0.5563734812339819, "grad_norm": 0.7734375, "learning_rate": 0.0001516441945768794, "loss": 0.8652, "step": 21668 }, { "epoch": 0.5563991584299036, "grad_norm": 0.75, "learning_rate": 0.0001516403717303924, "loss": 0.8863, "step": 21669 }, { "epoch": 0.5564248356258255, "grad_norm": 0.78125, "learning_rate": 0.00015163654878099086, "loss": 0.9205, "step": 21670 }, { "epoch": 0.5564505128217473, "grad_norm": 0.73046875, "learning_rate": 0.00015163272572868235, "loss": 0.8841, "step": 21671 }, { "epoch": 0.5564761900176691, "grad_norm": 0.765625, "learning_rate": 0.00015162890257347446, "loss": 0.8337, "step": 21672 }, { "epoch": 0.556501867213591, "grad_norm": 0.765625, "learning_rate": 0.00015162507931537484, "loss": 0.8178, "step": 21673 }, { "epoch": 0.5565275444095128, "grad_norm": 0.8203125, "learning_rate": 0.00015162125595439107, "loss": 0.9675, "step": 21674 }, { "epoch": 0.5565532216054346, "grad_norm": 0.78515625, "learning_rate": 0.00015161743249053082, "loss": 0.9047, "step": 21675 }, { "epoch": 0.5565788988013564, "grad_norm": 0.71484375, "learning_rate": 0.0001516136089238017, "loss": 0.8213, "step": 21676 }, { "epoch": 0.5566045759972782, "grad_norm": 0.6953125, "learning_rate": 0.00015160978525421132, "loss": 0.6929, "step": 21677 }, { "epoch": 0.5566302531932, "grad_norm": 0.7734375, "learning_rate": 0.00015160596148176727, "loss": 0.8681, "step": 21678 }, { "epoch": 0.5566559303891219, "grad_norm": 0.73046875, "learning_rate": 0.00015160213760647724, "loss": 0.8222, "step": 21679 }, { "epoch": 0.5566816075850437, "grad_norm": 0.83203125, "learning_rate": 0.00015159831362834876, "loss": 0.8973, "step": 21680 }, { "epoch": 0.5567072847809655, "grad_norm": 0.73046875, "learning_rate": 0.00015159448954738956, "loss": 0.8673, "step": 21681 }, { "epoch": 0.5567329619768873, "grad_norm": 0.82421875, "learning_rate": 0.00015159066536360718, "loss": 0.8928, "step": 21682 }, { "epoch": 0.5567586391728091, "grad_norm": 0.80078125, "learning_rate": 0.00015158684107700925, "loss": 0.8839, "step": 21683 }, { "epoch": 0.5567843163687309, "grad_norm": 0.7578125, "learning_rate": 0.00015158301668760342, "loss": 0.8538, "step": 21684 }, { "epoch": 0.5568099935646528, "grad_norm": 0.7421875, "learning_rate": 0.00015157919219539729, "loss": 0.8767, "step": 21685 }, { "epoch": 0.5568356707605746, "grad_norm": 0.8046875, "learning_rate": 0.0001515753676003985, "loss": 0.9575, "step": 21686 }, { "epoch": 0.5568613479564964, "grad_norm": 0.7578125, "learning_rate": 0.00015157154290261463, "loss": 0.8112, "step": 21687 }, { "epoch": 0.5568870251524183, "grad_norm": 0.80859375, "learning_rate": 0.00015156771810205337, "loss": 1.006, "step": 21688 }, { "epoch": 0.55691270234834, "grad_norm": 0.80859375, "learning_rate": 0.0001515638931987223, "loss": 0.9079, "step": 21689 }, { "epoch": 0.5569383795442618, "grad_norm": 0.73046875, "learning_rate": 0.00015156006819262903, "loss": 0.7452, "step": 21690 }, { "epoch": 0.5569640567401837, "grad_norm": 0.73046875, "learning_rate": 0.00015155624308378122, "loss": 0.7185, "step": 21691 }, { "epoch": 0.5569897339361055, "grad_norm": 0.7734375, "learning_rate": 0.00015155241787218648, "loss": 0.9733, "step": 21692 }, { "epoch": 0.5570154111320273, "grad_norm": 0.71484375, "learning_rate": 0.00015154859255785242, "loss": 0.8218, "step": 21693 }, { "epoch": 0.5570410883279492, "grad_norm": 0.8125, "learning_rate": 0.00015154476714078664, "loss": 0.8401, "step": 21694 }, { "epoch": 0.5570667655238709, "grad_norm": 0.81640625, "learning_rate": 0.00015154094162099684, "loss": 0.823, "step": 21695 }, { "epoch": 0.5570924427197927, "grad_norm": 0.80078125, "learning_rate": 0.00015153711599849055, "loss": 0.9757, "step": 21696 }, { "epoch": 0.5571181199157146, "grad_norm": 0.7734375, "learning_rate": 0.0001515332902732755, "loss": 0.7936, "step": 21697 }, { "epoch": 0.5571437971116364, "grad_norm": 0.79296875, "learning_rate": 0.00015152946444535923, "loss": 0.885, "step": 21698 }, { "epoch": 0.5571694743075583, "grad_norm": 0.81640625, "learning_rate": 0.00015152563851474938, "loss": 0.937, "step": 21699 }, { "epoch": 0.5571951515034801, "grad_norm": 0.77734375, "learning_rate": 0.00015152181248145362, "loss": 0.9286, "step": 21700 }, { "epoch": 0.5572208286994019, "grad_norm": 0.78125, "learning_rate": 0.00015151798634547951, "loss": 0.8851, "step": 21701 }, { "epoch": 0.5572465058953237, "grad_norm": 0.75390625, "learning_rate": 0.00015151416010683472, "loss": 0.7855, "step": 21702 }, { "epoch": 0.5572721830912455, "grad_norm": 0.76953125, "learning_rate": 0.00015151033376552684, "loss": 0.7819, "step": 21703 }, { "epoch": 0.5572978602871673, "grad_norm": 0.7265625, "learning_rate": 0.00015150650732156354, "loss": 0.8603, "step": 21704 }, { "epoch": 0.5573235374830892, "grad_norm": 0.75390625, "learning_rate": 0.00015150268077495244, "loss": 0.9805, "step": 21705 }, { "epoch": 0.557349214679011, "grad_norm": 0.76171875, "learning_rate": 0.00015149885412570112, "loss": 0.8831, "step": 21706 }, { "epoch": 0.5573748918749328, "grad_norm": 0.84765625, "learning_rate": 0.00015149502737381727, "loss": 0.9925, "step": 21707 }, { "epoch": 0.5574005690708547, "grad_norm": 0.73828125, "learning_rate": 0.00015149120051930845, "loss": 0.8254, "step": 21708 }, { "epoch": 0.5574262462667764, "grad_norm": 0.65234375, "learning_rate": 0.00015148737356218234, "loss": 0.8604, "step": 21709 }, { "epoch": 0.5574519234626982, "grad_norm": 0.76171875, "learning_rate": 0.00015148354650244651, "loss": 0.9533, "step": 21710 }, { "epoch": 0.5574776006586201, "grad_norm": 0.78515625, "learning_rate": 0.00015147971934010868, "loss": 0.8316, "step": 21711 }, { "epoch": 0.5575032778545419, "grad_norm": 0.83203125, "learning_rate": 0.00015147589207517638, "loss": 0.9957, "step": 21712 }, { "epoch": 0.5575289550504637, "grad_norm": 0.8359375, "learning_rate": 0.0001514720647076573, "loss": 0.8792, "step": 21713 }, { "epoch": 0.5575546322463856, "grad_norm": 0.7578125, "learning_rate": 0.000151468237237559, "loss": 0.8968, "step": 21714 }, { "epoch": 0.5575803094423073, "grad_norm": 0.765625, "learning_rate": 0.00015146440966488917, "loss": 0.9332, "step": 21715 }, { "epoch": 0.5576059866382291, "grad_norm": 0.734375, "learning_rate": 0.00015146058198965543, "loss": 0.8257, "step": 21716 }, { "epoch": 0.557631663834151, "grad_norm": 0.81640625, "learning_rate": 0.0001514567542118654, "loss": 0.9077, "step": 21717 }, { "epoch": 0.5576573410300728, "grad_norm": 0.7265625, "learning_rate": 0.0001514529263315267, "loss": 0.8311, "step": 21718 }, { "epoch": 0.5576830182259946, "grad_norm": 0.765625, "learning_rate": 0.000151449098348647, "loss": 0.9544, "step": 21719 }, { "epoch": 0.5577086954219165, "grad_norm": 0.8203125, "learning_rate": 0.00015144527026323385, "loss": 0.9388, "step": 21720 }, { "epoch": 0.5577343726178383, "grad_norm": 0.78515625, "learning_rate": 0.00015144144207529493, "loss": 1.0556, "step": 21721 }, { "epoch": 0.55776004981376, "grad_norm": 0.890625, "learning_rate": 0.00015143761378483788, "loss": 0.8252, "step": 21722 }, { "epoch": 0.5577857270096819, "grad_norm": 0.73046875, "learning_rate": 0.00015143378539187027, "loss": 0.8428, "step": 21723 }, { "epoch": 0.5578114042056037, "grad_norm": 0.796875, "learning_rate": 0.0001514299568963998, "loss": 0.8292, "step": 21724 }, { "epoch": 0.5578370814015255, "grad_norm": 0.765625, "learning_rate": 0.0001514261282984341, "loss": 0.833, "step": 21725 }, { "epoch": 0.5578627585974474, "grad_norm": 0.75390625, "learning_rate": 0.00015142229959798074, "loss": 0.9064, "step": 21726 }, { "epoch": 0.5578884357933692, "grad_norm": 0.81640625, "learning_rate": 0.0001514184707950474, "loss": 1.0103, "step": 21727 }, { "epoch": 0.5579141129892911, "grad_norm": 0.76171875, "learning_rate": 0.00015141464188964167, "loss": 0.8224, "step": 21728 }, { "epoch": 0.5579397901852128, "grad_norm": 0.77734375, "learning_rate": 0.00015141081288177122, "loss": 0.8749, "step": 21729 }, { "epoch": 0.5579654673811346, "grad_norm": 0.796875, "learning_rate": 0.00015140698377144366, "loss": 0.8147, "step": 21730 }, { "epoch": 0.5579911445770565, "grad_norm": 0.85546875, "learning_rate": 0.00015140315455866663, "loss": 0.9387, "step": 21731 }, { "epoch": 0.5580168217729783, "grad_norm": 0.78125, "learning_rate": 0.00015139932524344777, "loss": 0.9824, "step": 21732 }, { "epoch": 0.5580424989689001, "grad_norm": 0.8671875, "learning_rate": 0.00015139549582579464, "loss": 0.9482, "step": 21733 }, { "epoch": 0.558068176164822, "grad_norm": 0.78515625, "learning_rate": 0.00015139166630571496, "loss": 0.865, "step": 21734 }, { "epoch": 0.5580938533607437, "grad_norm": 0.7890625, "learning_rate": 0.00015138783668321634, "loss": 0.9659, "step": 21735 }, { "epoch": 0.5581195305566655, "grad_norm": 0.80859375, "learning_rate": 0.0001513840069583064, "loss": 0.9867, "step": 21736 }, { "epoch": 0.5581452077525874, "grad_norm": 0.7421875, "learning_rate": 0.0001513801771309928, "loss": 0.9112, "step": 21737 }, { "epoch": 0.5581708849485092, "grad_norm": 0.82421875, "learning_rate": 0.0001513763472012831, "loss": 0.9434, "step": 21738 }, { "epoch": 0.558196562144431, "grad_norm": 0.78125, "learning_rate": 0.000151372517169185, "loss": 0.7955, "step": 21739 }, { "epoch": 0.5582222393403529, "grad_norm": 0.75, "learning_rate": 0.00015136868703470615, "loss": 1.024, "step": 21740 }, { "epoch": 0.5582479165362747, "grad_norm": 0.84375, "learning_rate": 0.0001513648567978541, "loss": 0.8521, "step": 21741 }, { "epoch": 0.5582735937321964, "grad_norm": 0.8046875, "learning_rate": 0.00015136102645863655, "loss": 0.9581, "step": 21742 }, { "epoch": 0.5582992709281183, "grad_norm": 0.76953125, "learning_rate": 0.0001513571960170611, "loss": 0.7561, "step": 21743 }, { "epoch": 0.5583249481240401, "grad_norm": 0.8203125, "learning_rate": 0.00015135336547313544, "loss": 0.8387, "step": 21744 }, { "epoch": 0.5583506253199619, "grad_norm": 0.79296875, "learning_rate": 0.00015134953482686712, "loss": 0.9411, "step": 21745 }, { "epoch": 0.5583763025158838, "grad_norm": 0.76953125, "learning_rate": 0.0001513457040782638, "loss": 0.8493, "step": 21746 }, { "epoch": 0.5584019797118056, "grad_norm": 0.7265625, "learning_rate": 0.00015134187322733315, "loss": 0.8592, "step": 21747 }, { "epoch": 0.5584276569077274, "grad_norm": 0.91015625, "learning_rate": 0.0001513380422740828, "loss": 0.8144, "step": 21748 }, { "epoch": 0.5584533341036492, "grad_norm": 0.7734375, "learning_rate": 0.00015133421121852036, "loss": 1.0346, "step": 21749 }, { "epoch": 0.558479011299571, "grad_norm": 0.796875, "learning_rate": 0.00015133038006065344, "loss": 0.9976, "step": 21750 }, { "epoch": 0.5585046884954928, "grad_norm": 0.7109375, "learning_rate": 0.0001513265488004898, "loss": 0.7381, "step": 21751 }, { "epoch": 0.5585303656914147, "grad_norm": 0.76171875, "learning_rate": 0.0001513227174380369, "loss": 0.8497, "step": 21752 }, { "epoch": 0.5585560428873365, "grad_norm": 0.8203125, "learning_rate": 0.00015131888597330248, "loss": 0.9217, "step": 21753 }, { "epoch": 0.5585817200832583, "grad_norm": 0.77734375, "learning_rate": 0.00015131505440629415, "loss": 0.8005, "step": 21754 }, { "epoch": 0.5586073972791801, "grad_norm": 0.79296875, "learning_rate": 0.00015131122273701956, "loss": 0.9284, "step": 21755 }, { "epoch": 0.5586330744751019, "grad_norm": 0.77734375, "learning_rate": 0.00015130739096548635, "loss": 0.8363, "step": 21756 }, { "epoch": 0.5586587516710237, "grad_norm": 0.7890625, "learning_rate": 0.00015130355909170215, "loss": 0.9707, "step": 21757 }, { "epoch": 0.5586844288669456, "grad_norm": 0.73828125, "learning_rate": 0.00015129972711567458, "loss": 0.9657, "step": 21758 }, { "epoch": 0.5587101060628674, "grad_norm": 0.75, "learning_rate": 0.00015129589503741129, "loss": 0.7865, "step": 21759 }, { "epoch": 0.5587357832587893, "grad_norm": 0.890625, "learning_rate": 0.0001512920628569199, "loss": 0.881, "step": 21760 }, { "epoch": 0.5587614604547111, "grad_norm": 0.7265625, "learning_rate": 0.00015128823057420807, "loss": 0.8165, "step": 21761 }, { "epoch": 0.5587871376506328, "grad_norm": 0.76171875, "learning_rate": 0.00015128439818928345, "loss": 0.803, "step": 21762 }, { "epoch": 0.5588128148465547, "grad_norm": 0.73046875, "learning_rate": 0.0001512805657021536, "loss": 0.8125, "step": 21763 }, { "epoch": 0.5588384920424765, "grad_norm": 0.84765625, "learning_rate": 0.0001512767331128263, "loss": 1.0417, "step": 21764 }, { "epoch": 0.5588641692383983, "grad_norm": 0.78125, "learning_rate": 0.000151272900421309, "loss": 0.8159, "step": 21765 }, { "epoch": 0.5588898464343202, "grad_norm": 0.703125, "learning_rate": 0.00015126906762760953, "loss": 0.745, "step": 21766 }, { "epoch": 0.558915523630242, "grad_norm": 0.78125, "learning_rate": 0.0001512652347317354, "loss": 0.9336, "step": 21767 }, { "epoch": 0.5589412008261638, "grad_norm": 0.78515625, "learning_rate": 0.00015126140173369427, "loss": 0.812, "step": 21768 }, { "epoch": 0.5589668780220856, "grad_norm": 0.8515625, "learning_rate": 0.00015125756863349383, "loss": 0.7618, "step": 21769 }, { "epoch": 0.5589925552180074, "grad_norm": 0.7890625, "learning_rate": 0.00015125373543114167, "loss": 0.8524, "step": 21770 }, { "epoch": 0.5590182324139292, "grad_norm": 0.96484375, "learning_rate": 0.00015124990212664544, "loss": 0.9137, "step": 21771 }, { "epoch": 0.5590439096098511, "grad_norm": 0.82421875, "learning_rate": 0.00015124606872001284, "loss": 0.8807, "step": 21772 }, { "epoch": 0.5590695868057729, "grad_norm": 0.7578125, "learning_rate": 0.00015124223521125138, "loss": 0.8125, "step": 21773 }, { "epoch": 0.5590952640016947, "grad_norm": 0.7734375, "learning_rate": 0.0001512384016003688, "loss": 0.8237, "step": 21774 }, { "epoch": 0.5591209411976165, "grad_norm": 0.8125, "learning_rate": 0.00015123456788737274, "loss": 0.7388, "step": 21775 }, { "epoch": 0.5591466183935383, "grad_norm": 0.7265625, "learning_rate": 0.00015123073407227077, "loss": 0.7827, "step": 21776 }, { "epoch": 0.5591722955894601, "grad_norm": 0.8515625, "learning_rate": 0.00015122690015507062, "loss": 0.9202, "step": 21777 }, { "epoch": 0.559197972785382, "grad_norm": 0.81640625, "learning_rate": 0.00015122306613577982, "loss": 0.796, "step": 21778 }, { "epoch": 0.5592236499813038, "grad_norm": 0.83984375, "learning_rate": 0.00015121923201440613, "loss": 0.9659, "step": 21779 }, { "epoch": 0.5592493271772256, "grad_norm": 0.81640625, "learning_rate": 0.00015121539779095714, "loss": 0.7714, "step": 21780 }, { "epoch": 0.5592750043731475, "grad_norm": 0.78515625, "learning_rate": 0.00015121156346544046, "loss": 1.026, "step": 21781 }, { "epoch": 0.5593006815690692, "grad_norm": 0.7890625, "learning_rate": 0.0001512077290378638, "loss": 0.9426, "step": 21782 }, { "epoch": 0.559326358764991, "grad_norm": 0.81640625, "learning_rate": 0.00015120389450823472, "loss": 1.02, "step": 21783 }, { "epoch": 0.5593520359609129, "grad_norm": 0.8359375, "learning_rate": 0.00015120005987656092, "loss": 0.9898, "step": 21784 }, { "epoch": 0.5593777131568347, "grad_norm": 0.8203125, "learning_rate": 0.00015119622514285, "loss": 0.8609, "step": 21785 }, { "epoch": 0.5594033903527565, "grad_norm": 0.77734375, "learning_rate": 0.00015119239030710966, "loss": 0.9178, "step": 21786 }, { "epoch": 0.5594290675486784, "grad_norm": 0.78515625, "learning_rate": 0.0001511885553693475, "loss": 0.9933, "step": 21787 }, { "epoch": 0.5594547447446002, "grad_norm": 0.76171875, "learning_rate": 0.00015118472032957117, "loss": 0.7203, "step": 21788 }, { "epoch": 0.5594804219405219, "grad_norm": 0.91015625, "learning_rate": 0.00015118088518778833, "loss": 0.8766, "step": 21789 }, { "epoch": 0.5595060991364438, "grad_norm": 0.7578125, "learning_rate": 0.00015117704994400662, "loss": 0.9069, "step": 21790 }, { "epoch": 0.5595317763323656, "grad_norm": 0.71875, "learning_rate": 0.00015117321459823363, "loss": 0.8289, "step": 21791 }, { "epoch": 0.5595574535282875, "grad_norm": 0.734375, "learning_rate": 0.00015116937915047706, "loss": 0.9021, "step": 21792 }, { "epoch": 0.5595831307242093, "grad_norm": 0.77734375, "learning_rate": 0.00015116554360074456, "loss": 0.8874, "step": 21793 }, { "epoch": 0.5596088079201311, "grad_norm": 0.66796875, "learning_rate": 0.0001511617079490437, "loss": 0.9289, "step": 21794 }, { "epoch": 0.5596344851160528, "grad_norm": 0.75390625, "learning_rate": 0.00015115787219538223, "loss": 0.9204, "step": 21795 }, { "epoch": 0.5596601623119747, "grad_norm": 0.79296875, "learning_rate": 0.00015115403633976775, "loss": 0.9721, "step": 21796 }, { "epoch": 0.5596858395078965, "grad_norm": 0.734375, "learning_rate": 0.00015115020038220788, "loss": 0.782, "step": 21797 }, { "epoch": 0.5597115167038184, "grad_norm": 0.73046875, "learning_rate": 0.00015114636432271027, "loss": 0.8953, "step": 21798 }, { "epoch": 0.5597371938997402, "grad_norm": 0.7890625, "learning_rate": 0.0001511425281612826, "loss": 0.9382, "step": 21799 }, { "epoch": 0.559762871095662, "grad_norm": 0.80859375, "learning_rate": 0.00015113869189793246, "loss": 1.0234, "step": 21800 }, { "epoch": 0.5597885482915839, "grad_norm": 0.8125, "learning_rate": 0.00015113485553266753, "loss": 0.913, "step": 21801 }, { "epoch": 0.5598142254875056, "grad_norm": 0.875, "learning_rate": 0.00015113101906549548, "loss": 0.849, "step": 21802 }, { "epoch": 0.5598399026834274, "grad_norm": 0.7734375, "learning_rate": 0.00015112718249642388, "loss": 0.9372, "step": 21803 }, { "epoch": 0.5598655798793493, "grad_norm": 0.78515625, "learning_rate": 0.0001511233458254605, "loss": 0.9669, "step": 21804 }, { "epoch": 0.5598912570752711, "grad_norm": 0.734375, "learning_rate": 0.00015111950905261282, "loss": 0.8815, "step": 21805 }, { "epoch": 0.5599169342711929, "grad_norm": 0.76171875, "learning_rate": 0.00015111567217788863, "loss": 0.672, "step": 21806 }, { "epoch": 0.5599426114671148, "grad_norm": 0.8203125, "learning_rate": 0.0001511118352012955, "loss": 0.7838, "step": 21807 }, { "epoch": 0.5599682886630366, "grad_norm": 0.84375, "learning_rate": 0.00015110799812284112, "loss": 0.9445, "step": 21808 }, { "epoch": 0.5599939658589583, "grad_norm": 0.734375, "learning_rate": 0.0001511041609425331, "loss": 0.7898, "step": 21809 }, { "epoch": 0.5600196430548802, "grad_norm": 0.80078125, "learning_rate": 0.0001511003236603791, "loss": 0.9416, "step": 21810 }, { "epoch": 0.560045320250802, "grad_norm": 0.8203125, "learning_rate": 0.00015109648627638678, "loss": 0.8612, "step": 21811 }, { "epoch": 0.5600709974467238, "grad_norm": 0.734375, "learning_rate": 0.00015109264879056383, "loss": 0.9202, "step": 21812 }, { "epoch": 0.5600966746426457, "grad_norm": 0.67578125, "learning_rate": 0.00015108881120291776, "loss": 0.8691, "step": 21813 }, { "epoch": 0.5601223518385675, "grad_norm": 0.75390625, "learning_rate": 0.00015108497351345635, "loss": 1.0395, "step": 21814 }, { "epoch": 0.5601480290344892, "grad_norm": 0.80859375, "learning_rate": 0.00015108113572218722, "loss": 0.8517, "step": 21815 }, { "epoch": 0.5601737062304111, "grad_norm": 0.75, "learning_rate": 0.00015107729782911793, "loss": 0.9008, "step": 21816 }, { "epoch": 0.5601993834263329, "grad_norm": 0.796875, "learning_rate": 0.00015107345983425628, "loss": 0.9533, "step": 21817 }, { "epoch": 0.5602250606222547, "grad_norm": 0.76953125, "learning_rate": 0.00015106962173760976, "loss": 0.8692, "step": 21818 }, { "epoch": 0.5602507378181766, "grad_norm": 0.8203125, "learning_rate": 0.00015106578353918617, "loss": 0.76, "step": 21819 }, { "epoch": 0.5602764150140984, "grad_norm": 0.8125, "learning_rate": 0.00015106194523899303, "loss": 0.8161, "step": 21820 }, { "epoch": 0.5603020922100203, "grad_norm": 0.77734375, "learning_rate": 0.00015105810683703808, "loss": 0.7919, "step": 21821 }, { "epoch": 0.560327769405942, "grad_norm": 0.7734375, "learning_rate": 0.0001510542683333289, "loss": 0.7887, "step": 21822 }, { "epoch": 0.5603534466018638, "grad_norm": 0.7734375, "learning_rate": 0.00015105042972787323, "loss": 0.9665, "step": 21823 }, { "epoch": 0.5603791237977856, "grad_norm": 0.7109375, "learning_rate": 0.00015104659102067864, "loss": 0.8842, "step": 21824 }, { "epoch": 0.5604048009937075, "grad_norm": 0.734375, "learning_rate": 0.0001510427522117528, "loss": 0.8375, "step": 21825 }, { "epoch": 0.5604304781896293, "grad_norm": 0.75, "learning_rate": 0.00015103891330110332, "loss": 0.9522, "step": 21826 }, { "epoch": 0.5604561553855512, "grad_norm": 0.73046875, "learning_rate": 0.00015103507428873794, "loss": 0.8241, "step": 21827 }, { "epoch": 0.560481832581473, "grad_norm": 0.8203125, "learning_rate": 0.00015103123517466429, "loss": 0.8458, "step": 21828 }, { "epoch": 0.5605075097773947, "grad_norm": 0.8125, "learning_rate": 0.00015102739595888997, "loss": 0.9421, "step": 21829 }, { "epoch": 0.5605331869733166, "grad_norm": 0.79296875, "learning_rate": 0.0001510235566414227, "loss": 0.8773, "step": 21830 }, { "epoch": 0.5605588641692384, "grad_norm": 0.6953125, "learning_rate": 0.00015101971722227003, "loss": 0.7947, "step": 21831 }, { "epoch": 0.5605845413651602, "grad_norm": 0.8046875, "learning_rate": 0.0001510158777014397, "loss": 0.7624, "step": 21832 }, { "epoch": 0.5606102185610821, "grad_norm": 0.74609375, "learning_rate": 0.00015101203807893934, "loss": 0.7541, "step": 21833 }, { "epoch": 0.5606358957570039, "grad_norm": 0.76171875, "learning_rate": 0.00015100819835477656, "loss": 0.9402, "step": 21834 }, { "epoch": 0.5606615729529256, "grad_norm": 0.73046875, "learning_rate": 0.00015100435852895906, "loss": 1.0435, "step": 21835 }, { "epoch": 0.5606872501488475, "grad_norm": 0.66796875, "learning_rate": 0.00015100051860149452, "loss": 0.7581, "step": 21836 }, { "epoch": 0.5607129273447693, "grad_norm": 0.75390625, "learning_rate": 0.00015099667857239055, "loss": 0.8065, "step": 21837 }, { "epoch": 0.5607386045406911, "grad_norm": 0.83203125, "learning_rate": 0.00015099283844165478, "loss": 0.9145, "step": 21838 }, { "epoch": 0.560764281736613, "grad_norm": 0.859375, "learning_rate": 0.0001509889982092949, "loss": 0.9244, "step": 21839 }, { "epoch": 0.5607899589325348, "grad_norm": 0.82421875, "learning_rate": 0.00015098515787531852, "loss": 0.9774, "step": 21840 }, { "epoch": 0.5608156361284566, "grad_norm": 0.7265625, "learning_rate": 0.00015098131743973336, "loss": 0.8852, "step": 21841 }, { "epoch": 0.5608413133243784, "grad_norm": 0.75390625, "learning_rate": 0.00015097747690254702, "loss": 0.9698, "step": 21842 }, { "epoch": 0.5608669905203002, "grad_norm": 0.83203125, "learning_rate": 0.00015097363626376722, "loss": 0.8916, "step": 21843 }, { "epoch": 0.560892667716222, "grad_norm": 0.78125, "learning_rate": 0.0001509697955234015, "loss": 0.9046, "step": 21844 }, { "epoch": 0.5609183449121439, "grad_norm": 0.87109375, "learning_rate": 0.00015096595468145764, "loss": 0.9468, "step": 21845 }, { "epoch": 0.5609440221080657, "grad_norm": 0.74609375, "learning_rate": 0.0001509621137379432, "loss": 0.7948, "step": 21846 }, { "epoch": 0.5609696993039875, "grad_norm": 0.80078125, "learning_rate": 0.00015095827269286587, "loss": 0.863, "step": 21847 }, { "epoch": 0.5609953764999094, "grad_norm": 0.80859375, "learning_rate": 0.0001509544315462333, "loss": 0.8345, "step": 21848 }, { "epoch": 0.5610210536958311, "grad_norm": 1.1328125, "learning_rate": 0.0001509505902980532, "loss": 0.9747, "step": 21849 }, { "epoch": 0.5610467308917529, "grad_norm": 0.71484375, "learning_rate": 0.00015094674894833315, "loss": 0.6613, "step": 21850 }, { "epoch": 0.5610724080876748, "grad_norm": 0.76171875, "learning_rate": 0.0001509429074970808, "loss": 0.7705, "step": 21851 }, { "epoch": 0.5610980852835966, "grad_norm": 0.82421875, "learning_rate": 0.00015093906594430386, "loss": 0.8962, "step": 21852 }, { "epoch": 0.5611237624795185, "grad_norm": 0.78125, "learning_rate": 0.00015093522429001, "loss": 0.8796, "step": 21853 }, { "epoch": 0.5611494396754403, "grad_norm": 0.79296875, "learning_rate": 0.00015093138253420677, "loss": 0.9142, "step": 21854 }, { "epoch": 0.561175116871362, "grad_norm": 0.78515625, "learning_rate": 0.00015092754067690196, "loss": 0.8302, "step": 21855 }, { "epoch": 0.5612007940672838, "grad_norm": 0.73046875, "learning_rate": 0.00015092369871810312, "loss": 0.8392, "step": 21856 }, { "epoch": 0.5612264712632057, "grad_norm": 0.74609375, "learning_rate": 0.00015091985665781796, "loss": 0.8122, "step": 21857 }, { "epoch": 0.5612521484591275, "grad_norm": 0.76953125, "learning_rate": 0.0001509160144960541, "loss": 0.7722, "step": 21858 }, { "epoch": 0.5612778256550494, "grad_norm": 0.82421875, "learning_rate": 0.00015091217223281927, "loss": 0.9109, "step": 21859 }, { "epoch": 0.5613035028509712, "grad_norm": 0.77734375, "learning_rate": 0.00015090832986812104, "loss": 0.8924, "step": 21860 }, { "epoch": 0.561329180046893, "grad_norm": 0.7265625, "learning_rate": 0.0001509044874019671, "loss": 0.75, "step": 21861 }, { "epoch": 0.5613548572428148, "grad_norm": 0.7265625, "learning_rate": 0.00015090064483436517, "loss": 0.6874, "step": 21862 }, { "epoch": 0.5613805344387366, "grad_norm": 0.75, "learning_rate": 0.00015089680216532283, "loss": 0.9774, "step": 21863 }, { "epoch": 0.5614062116346584, "grad_norm": 0.79296875, "learning_rate": 0.00015089295939484774, "loss": 0.7641, "step": 21864 }, { "epoch": 0.5614318888305803, "grad_norm": 0.734375, "learning_rate": 0.00015088911652294762, "loss": 0.855, "step": 21865 }, { "epoch": 0.5614575660265021, "grad_norm": 0.7421875, "learning_rate": 0.00015088527354963, "loss": 0.8733, "step": 21866 }, { "epoch": 0.5614832432224239, "grad_norm": 0.7578125, "learning_rate": 0.00015088143047490272, "loss": 0.823, "step": 21867 }, { "epoch": 0.5615089204183458, "grad_norm": 0.8125, "learning_rate": 0.0001508775872987733, "loss": 0.7405, "step": 21868 }, { "epoch": 0.5615345976142675, "grad_norm": 0.8359375, "learning_rate": 0.00015087374402124944, "loss": 0.8022, "step": 21869 }, { "epoch": 0.5615602748101893, "grad_norm": 0.8125, "learning_rate": 0.00015086990064233885, "loss": 0.9102, "step": 21870 }, { "epoch": 0.5615859520061112, "grad_norm": 0.83984375, "learning_rate": 0.0001508660571620491, "loss": 0.9125, "step": 21871 }, { "epoch": 0.561611629202033, "grad_norm": 0.72265625, "learning_rate": 0.0001508622135803879, "loss": 0.9336, "step": 21872 }, { "epoch": 0.5616373063979548, "grad_norm": 0.734375, "learning_rate": 0.0001508583698973629, "loss": 0.678, "step": 21873 }, { "epoch": 0.5616629835938767, "grad_norm": 0.80078125, "learning_rate": 0.00015085452611298176, "loss": 0.9509, "step": 21874 }, { "epoch": 0.5616886607897984, "grad_norm": 0.75, "learning_rate": 0.00015085068222725216, "loss": 0.7706, "step": 21875 }, { "epoch": 0.5617143379857202, "grad_norm": 0.8828125, "learning_rate": 0.00015084683824018175, "loss": 0.8882, "step": 21876 }, { "epoch": 0.5617400151816421, "grad_norm": 0.8046875, "learning_rate": 0.00015084299415177817, "loss": 1.0129, "step": 21877 }, { "epoch": 0.5617656923775639, "grad_norm": 0.73046875, "learning_rate": 0.0001508391499620491, "loss": 0.8501, "step": 21878 }, { "epoch": 0.5617913695734857, "grad_norm": 0.796875, "learning_rate": 0.00015083530567100217, "loss": 0.8439, "step": 21879 }, { "epoch": 0.5618170467694076, "grad_norm": 0.6875, "learning_rate": 0.0001508314612786451, "loss": 0.8231, "step": 21880 }, { "epoch": 0.5618427239653294, "grad_norm": 0.7890625, "learning_rate": 0.0001508276167849855, "loss": 0.9395, "step": 21881 }, { "epoch": 0.5618684011612511, "grad_norm": 0.77734375, "learning_rate": 0.00015082377219003106, "loss": 0.912, "step": 21882 }, { "epoch": 0.561894078357173, "grad_norm": 0.765625, "learning_rate": 0.00015081992749378946, "loss": 0.8373, "step": 21883 }, { "epoch": 0.5619197555530948, "grad_norm": 0.8125, "learning_rate": 0.0001508160826962683, "loss": 0.9926, "step": 21884 }, { "epoch": 0.5619454327490166, "grad_norm": 0.7578125, "learning_rate": 0.00015081223779747527, "loss": 0.7756, "step": 21885 }, { "epoch": 0.5619711099449385, "grad_norm": 0.78515625, "learning_rate": 0.00015080839279741807, "loss": 0.9127, "step": 21886 }, { "epoch": 0.5619967871408603, "grad_norm": 0.7578125, "learning_rate": 0.00015080454769610433, "loss": 0.8336, "step": 21887 }, { "epoch": 0.5620224643367822, "grad_norm": 0.76953125, "learning_rate": 0.00015080070249354167, "loss": 0.774, "step": 21888 }, { "epoch": 0.5620481415327039, "grad_norm": 0.72265625, "learning_rate": 0.00015079685718973784, "loss": 0.8367, "step": 21889 }, { "epoch": 0.5620738187286257, "grad_norm": 0.78515625, "learning_rate": 0.00015079301178470047, "loss": 0.8201, "step": 21890 }, { "epoch": 0.5620994959245476, "grad_norm": 0.79296875, "learning_rate": 0.00015078916627843717, "loss": 0.8874, "step": 21891 }, { "epoch": 0.5621251731204694, "grad_norm": 0.83203125, "learning_rate": 0.0001507853206709557, "loss": 0.8512, "step": 21892 }, { "epoch": 0.5621508503163912, "grad_norm": 0.765625, "learning_rate": 0.00015078147496226362, "loss": 0.7297, "step": 21893 }, { "epoch": 0.5621765275123131, "grad_norm": 0.6953125, "learning_rate": 0.0001507776291523687, "loss": 0.8308, "step": 21894 }, { "epoch": 0.5622022047082348, "grad_norm": 0.796875, "learning_rate": 0.00015077378324127854, "loss": 0.9421, "step": 21895 }, { "epoch": 0.5622278819041566, "grad_norm": 0.79296875, "learning_rate": 0.00015076993722900082, "loss": 0.7649, "step": 21896 }, { "epoch": 0.5622535591000785, "grad_norm": 0.71484375, "learning_rate": 0.00015076609111554318, "loss": 0.7589, "step": 21897 }, { "epoch": 0.5622792362960003, "grad_norm": 0.8203125, "learning_rate": 0.0001507622449009133, "loss": 0.7897, "step": 21898 }, { "epoch": 0.5623049134919221, "grad_norm": 0.76171875, "learning_rate": 0.00015075839858511887, "loss": 0.7958, "step": 21899 }, { "epoch": 0.562330590687844, "grad_norm": 0.7734375, "learning_rate": 0.00015075455216816754, "loss": 0.8992, "step": 21900 }, { "epoch": 0.5623562678837658, "grad_norm": 0.81640625, "learning_rate": 0.00015075070565006695, "loss": 0.8749, "step": 21901 }, { "epoch": 0.5623819450796875, "grad_norm": 0.7734375, "learning_rate": 0.00015074685903082485, "loss": 0.8612, "step": 21902 }, { "epoch": 0.5624076222756094, "grad_norm": 0.79296875, "learning_rate": 0.00015074301231044875, "loss": 0.9284, "step": 21903 }, { "epoch": 0.5624332994715312, "grad_norm": 0.8359375, "learning_rate": 0.00015073916548894646, "loss": 0.8141, "step": 21904 }, { "epoch": 0.562458976667453, "grad_norm": 0.87109375, "learning_rate": 0.00015073531856632562, "loss": 0.9552, "step": 21905 }, { "epoch": 0.5624846538633749, "grad_norm": 0.76171875, "learning_rate": 0.00015073147154259382, "loss": 0.9064, "step": 21906 }, { "epoch": 0.5625103310592967, "grad_norm": 0.73828125, "learning_rate": 0.00015072762441775881, "loss": 0.9303, "step": 21907 }, { "epoch": 0.5625360082552184, "grad_norm": 0.8046875, "learning_rate": 0.00015072377719182821, "loss": 0.8294, "step": 21908 }, { "epoch": 0.5625616854511403, "grad_norm": 0.8046875, "learning_rate": 0.00015071992986480975, "loss": 0.8547, "step": 21909 }, { "epoch": 0.5625873626470621, "grad_norm": 0.74609375, "learning_rate": 0.000150716082436711, "loss": 0.8815, "step": 21910 }, { "epoch": 0.5626130398429839, "grad_norm": 0.76171875, "learning_rate": 0.00015071223490753969, "loss": 0.9115, "step": 21911 }, { "epoch": 0.5626387170389058, "grad_norm": 0.76171875, "learning_rate": 0.0001507083872773035, "loss": 0.7672, "step": 21912 }, { "epoch": 0.5626643942348276, "grad_norm": 0.8203125, "learning_rate": 0.00015070453954601004, "loss": 0.8834, "step": 21913 }, { "epoch": 0.5626900714307494, "grad_norm": 0.8046875, "learning_rate": 0.00015070069171366703, "loss": 0.7757, "step": 21914 }, { "epoch": 0.5627157486266712, "grad_norm": 0.81640625, "learning_rate": 0.00015069684378028216, "loss": 1.0008, "step": 21915 }, { "epoch": 0.562741425822593, "grad_norm": 0.87109375, "learning_rate": 0.000150692995745863, "loss": 0.8913, "step": 21916 }, { "epoch": 0.5627671030185148, "grad_norm": 0.7890625, "learning_rate": 0.0001506891476104173, "loss": 0.9804, "step": 21917 }, { "epoch": 0.5627927802144367, "grad_norm": 0.83203125, "learning_rate": 0.0001506852993739527, "loss": 0.9764, "step": 21918 }, { "epoch": 0.5628184574103585, "grad_norm": 1.0390625, "learning_rate": 0.0001506814510364769, "loss": 0.9725, "step": 21919 }, { "epoch": 0.5628441346062804, "grad_norm": 0.6875, "learning_rate": 0.0001506776025979975, "loss": 1.0011, "step": 21920 }, { "epoch": 0.5628698118022022, "grad_norm": 0.83203125, "learning_rate": 0.00015067375405852226, "loss": 0.8178, "step": 21921 }, { "epoch": 0.5628954889981239, "grad_norm": 0.70703125, "learning_rate": 0.00015066990541805878, "loss": 0.8531, "step": 21922 }, { "epoch": 0.5629211661940458, "grad_norm": 0.74609375, "learning_rate": 0.0001506660566766148, "loss": 0.8622, "step": 21923 }, { "epoch": 0.5629468433899676, "grad_norm": 0.765625, "learning_rate": 0.0001506622078341979, "loss": 0.792, "step": 21924 }, { "epoch": 0.5629725205858894, "grad_norm": 0.72265625, "learning_rate": 0.00015065835889081582, "loss": 0.925, "step": 21925 }, { "epoch": 0.5629981977818113, "grad_norm": 0.7890625, "learning_rate": 0.00015065450984647617, "loss": 0.9638, "step": 21926 }, { "epoch": 0.5630238749777331, "grad_norm": 0.8828125, "learning_rate": 0.0001506506607011867, "loss": 1.0525, "step": 21927 }, { "epoch": 0.5630495521736548, "grad_norm": 0.91015625, "learning_rate": 0.00015064681145495504, "loss": 0.8379, "step": 21928 }, { "epoch": 0.5630752293695767, "grad_norm": 0.80078125, "learning_rate": 0.00015064296210778882, "loss": 0.8639, "step": 21929 }, { "epoch": 0.5631009065654985, "grad_norm": 0.7265625, "learning_rate": 0.00015063911265969577, "loss": 0.8793, "step": 21930 }, { "epoch": 0.5631265837614203, "grad_norm": 0.78125, "learning_rate": 0.00015063526311068353, "loss": 0.9131, "step": 21931 }, { "epoch": 0.5631522609573422, "grad_norm": 0.78125, "learning_rate": 0.00015063141346075983, "loss": 0.8176, "step": 21932 }, { "epoch": 0.563177938153264, "grad_norm": 0.77734375, "learning_rate": 0.00015062756370993224, "loss": 0.7453, "step": 21933 }, { "epoch": 0.5632036153491858, "grad_norm": 0.8359375, "learning_rate": 0.00015062371385820853, "loss": 0.8556, "step": 21934 }, { "epoch": 0.5632292925451076, "grad_norm": 0.75390625, "learning_rate": 0.00015061986390559631, "loss": 0.9177, "step": 21935 }, { "epoch": 0.5632549697410294, "grad_norm": 0.77734375, "learning_rate": 0.00015061601385210328, "loss": 0.8284, "step": 21936 }, { "epoch": 0.5632806469369512, "grad_norm": 0.796875, "learning_rate": 0.00015061216369773712, "loss": 0.8142, "step": 21937 }, { "epoch": 0.5633063241328731, "grad_norm": 0.8203125, "learning_rate": 0.00015060831344250545, "loss": 0.9712, "step": 21938 }, { "epoch": 0.5633320013287949, "grad_norm": 0.74609375, "learning_rate": 0.000150604463086416, "loss": 0.9656, "step": 21939 }, { "epoch": 0.5633576785247167, "grad_norm": 0.7578125, "learning_rate": 0.00015060061262947647, "loss": 1.0044, "step": 21940 }, { "epoch": 0.5633833557206386, "grad_norm": 0.765625, "learning_rate": 0.00015059676207169445, "loss": 0.7935, "step": 21941 }, { "epoch": 0.5634090329165603, "grad_norm": 0.7734375, "learning_rate": 0.00015059291141307767, "loss": 0.8878, "step": 21942 }, { "epoch": 0.5634347101124821, "grad_norm": 0.890625, "learning_rate": 0.00015058906065363377, "loss": 0.9612, "step": 21943 }, { "epoch": 0.563460387308404, "grad_norm": 0.7890625, "learning_rate": 0.00015058520979337043, "loss": 0.7308, "step": 21944 }, { "epoch": 0.5634860645043258, "grad_norm": 0.734375, "learning_rate": 0.0001505813588322954, "loss": 0.9517, "step": 21945 }, { "epoch": 0.5635117417002476, "grad_norm": 0.8125, "learning_rate": 0.00015057750777041623, "loss": 0.9072, "step": 21946 }, { "epoch": 0.5635374188961695, "grad_norm": 0.76171875, "learning_rate": 0.00015057365660774066, "loss": 0.8819, "step": 21947 }, { "epoch": 0.5635630960920912, "grad_norm": 0.7578125, "learning_rate": 0.00015056980534427635, "loss": 0.7982, "step": 21948 }, { "epoch": 0.563588773288013, "grad_norm": 0.890625, "learning_rate": 0.00015056595398003103, "loss": 0.8537, "step": 21949 }, { "epoch": 0.5636144504839349, "grad_norm": 0.83203125, "learning_rate": 0.0001505621025150123, "loss": 0.8339, "step": 21950 }, { "epoch": 0.5636401276798567, "grad_norm": 0.7890625, "learning_rate": 0.00015055825094922786, "loss": 0.8901, "step": 21951 }, { "epoch": 0.5636658048757786, "grad_norm": 0.78125, "learning_rate": 0.00015055439928268543, "loss": 0.8378, "step": 21952 }, { "epoch": 0.5636914820717004, "grad_norm": 0.82421875, "learning_rate": 0.00015055054751539262, "loss": 0.9075, "step": 21953 }, { "epoch": 0.5637171592676222, "grad_norm": 0.83984375, "learning_rate": 0.00015054669564735712, "loss": 0.9139, "step": 21954 }, { "epoch": 0.563742836463544, "grad_norm": 0.8046875, "learning_rate": 0.0001505428436785867, "loss": 0.9615, "step": 21955 }, { "epoch": 0.5637685136594658, "grad_norm": 0.75390625, "learning_rate": 0.00015053899160908887, "loss": 0.8652, "step": 21956 }, { "epoch": 0.5637941908553876, "grad_norm": 0.76953125, "learning_rate": 0.00015053513943887143, "loss": 0.8813, "step": 21957 }, { "epoch": 0.5638198680513095, "grad_norm": 0.74609375, "learning_rate": 0.000150531287167942, "loss": 0.9031, "step": 21958 }, { "epoch": 0.5638455452472313, "grad_norm": 0.796875, "learning_rate": 0.0001505274347963083, "loss": 0.852, "step": 21959 }, { "epoch": 0.5638712224431531, "grad_norm": 0.72265625, "learning_rate": 0.00015052358232397797, "loss": 0.7614, "step": 21960 }, { "epoch": 0.563896899639075, "grad_norm": 0.8046875, "learning_rate": 0.00015051972975095877, "loss": 0.8489, "step": 21961 }, { "epoch": 0.5639225768349967, "grad_norm": 0.75390625, "learning_rate": 0.00015051587707725822, "loss": 0.8979, "step": 21962 }, { "epoch": 0.5639482540309185, "grad_norm": 0.71875, "learning_rate": 0.00015051202430288414, "loss": 0.8602, "step": 21963 }, { "epoch": 0.5639739312268404, "grad_norm": 0.7265625, "learning_rate": 0.00015050817142784416, "loss": 0.8608, "step": 21964 }, { "epoch": 0.5639996084227622, "grad_norm": 0.78515625, "learning_rate": 0.00015050431845214593, "loss": 0.9742, "step": 21965 }, { "epoch": 0.564025285618684, "grad_norm": 0.7265625, "learning_rate": 0.0001505004653757972, "loss": 0.8688, "step": 21966 }, { "epoch": 0.5640509628146059, "grad_norm": 0.7109375, "learning_rate": 0.00015049661219880553, "loss": 0.7505, "step": 21967 }, { "epoch": 0.5640766400105276, "grad_norm": 0.71875, "learning_rate": 0.00015049275892117873, "loss": 0.9256, "step": 21968 }, { "epoch": 0.5641023172064494, "grad_norm": 0.84765625, "learning_rate": 0.00015048890554292442, "loss": 0.8121, "step": 21969 }, { "epoch": 0.5641279944023713, "grad_norm": 0.76171875, "learning_rate": 0.00015048505206405026, "loss": 0.9207, "step": 21970 }, { "epoch": 0.5641536715982931, "grad_norm": 0.69140625, "learning_rate": 0.000150481198484564, "loss": 0.9012, "step": 21971 }, { "epoch": 0.5641793487942149, "grad_norm": 0.74609375, "learning_rate": 0.0001504773448044732, "loss": 0.913, "step": 21972 }, { "epoch": 0.5642050259901368, "grad_norm": 0.81640625, "learning_rate": 0.00015047349102378563, "loss": 0.9972, "step": 21973 }, { "epoch": 0.5642307031860586, "grad_norm": 0.76171875, "learning_rate": 0.000150469637142509, "loss": 0.8948, "step": 21974 }, { "epoch": 0.5642563803819803, "grad_norm": 0.8203125, "learning_rate": 0.0001504657831606509, "loss": 0.8872, "step": 21975 }, { "epoch": 0.5642820575779022, "grad_norm": 0.78515625, "learning_rate": 0.00015046192907821907, "loss": 0.9942, "step": 21976 }, { "epoch": 0.564307734773824, "grad_norm": 0.84375, "learning_rate": 0.00015045807489522117, "loss": 0.9742, "step": 21977 }, { "epoch": 0.5643334119697458, "grad_norm": 0.9453125, "learning_rate": 0.00015045422061166485, "loss": 0.9303, "step": 21978 }, { "epoch": 0.5643590891656677, "grad_norm": 0.6953125, "learning_rate": 0.00015045036622755788, "loss": 0.9219, "step": 21979 }, { "epoch": 0.5643847663615895, "grad_norm": 0.8984375, "learning_rate": 0.00015044651174290786, "loss": 1.0453, "step": 21980 }, { "epoch": 0.5644104435575114, "grad_norm": 0.69921875, "learning_rate": 0.00015044265715772253, "loss": 0.8746, "step": 21981 }, { "epoch": 0.5644361207534331, "grad_norm": 0.81640625, "learning_rate": 0.0001504388024720095, "loss": 0.9433, "step": 21982 }, { "epoch": 0.5644617979493549, "grad_norm": 0.7890625, "learning_rate": 0.0001504349476857765, "loss": 0.8972, "step": 21983 }, { "epoch": 0.5644874751452768, "grad_norm": 0.76171875, "learning_rate": 0.0001504310927990312, "loss": 0.7377, "step": 21984 }, { "epoch": 0.5645131523411986, "grad_norm": 0.81640625, "learning_rate": 0.00015042723781178132, "loss": 0.8781, "step": 21985 }, { "epoch": 0.5645388295371204, "grad_norm": 0.8203125, "learning_rate": 0.00015042338272403446, "loss": 0.7386, "step": 21986 }, { "epoch": 0.5645645067330423, "grad_norm": 0.78515625, "learning_rate": 0.0001504195275357984, "loss": 0.8551, "step": 21987 }, { "epoch": 0.564590183928964, "grad_norm": 0.76953125, "learning_rate": 0.00015041567224708074, "loss": 0.8509, "step": 21988 }, { "epoch": 0.5646158611248858, "grad_norm": 0.80078125, "learning_rate": 0.00015041181685788923, "loss": 0.859, "step": 21989 }, { "epoch": 0.5646415383208077, "grad_norm": 0.75390625, "learning_rate": 0.00015040796136823152, "loss": 0.942, "step": 21990 }, { "epoch": 0.5646672155167295, "grad_norm": 0.765625, "learning_rate": 0.00015040410577811528, "loss": 0.7477, "step": 21991 }, { "epoch": 0.5646928927126513, "grad_norm": 0.8984375, "learning_rate": 0.00015040025008754821, "loss": 0.9837, "step": 21992 }, { "epoch": 0.5647185699085732, "grad_norm": 0.78125, "learning_rate": 0.00015039639429653803, "loss": 0.8733, "step": 21993 }, { "epoch": 0.564744247104495, "grad_norm": 0.7421875, "learning_rate": 0.00015039253840509233, "loss": 0.7548, "step": 21994 }, { "epoch": 0.5647699243004167, "grad_norm": 0.8046875, "learning_rate": 0.0001503886824132189, "loss": 0.8308, "step": 21995 }, { "epoch": 0.5647956014963386, "grad_norm": 0.7265625, "learning_rate": 0.00015038482632092536, "loss": 0.7786, "step": 21996 }, { "epoch": 0.5648212786922604, "grad_norm": 0.78125, "learning_rate": 0.0001503809701282194, "loss": 0.7957, "step": 21997 }, { "epoch": 0.5648469558881822, "grad_norm": 0.8203125, "learning_rate": 0.00015037711383510875, "loss": 0.8898, "step": 21998 }, { "epoch": 0.5648726330841041, "grad_norm": 0.78125, "learning_rate": 0.00015037325744160103, "loss": 0.9073, "step": 21999 }, { "epoch": 0.5648983102800259, "grad_norm": 0.796875, "learning_rate": 0.00015036940094770396, "loss": 0.8524, "step": 22000 }, { "epoch": 0.5648983102800259, "eval_loss": 0.8665466904640198, "eval_model_preparation_time": 0.0065, "eval_runtime": 417.0215, "eval_samples_per_second": 23.98, "eval_steps_per_second": 0.751, "step": 22000 }, { "epoch": 0.5649239874759477, "grad_norm": 0.83203125, "learning_rate": 0.00015036554435342528, "loss": 0.9165, "step": 22001 }, { "epoch": 0.5649496646718695, "grad_norm": 0.8359375, "learning_rate": 0.00015036168765877251, "loss": 0.9714, "step": 22002 }, { "epoch": 0.5649753418677913, "grad_norm": 0.7890625, "learning_rate": 0.00015035783086375353, "loss": 0.84, "step": 22003 }, { "epoch": 0.5650010190637131, "grad_norm": 0.76171875, "learning_rate": 0.0001503539739683759, "loss": 0.9205, "step": 22004 }, { "epoch": 0.565026696259635, "grad_norm": 0.71875, "learning_rate": 0.00015035011697264738, "loss": 0.8385, "step": 22005 }, { "epoch": 0.5650523734555568, "grad_norm": 0.81640625, "learning_rate": 0.00015034625987657565, "loss": 0.7699, "step": 22006 }, { "epoch": 0.5650780506514786, "grad_norm": 0.74609375, "learning_rate": 0.0001503424026801683, "loss": 0.9317, "step": 22007 }, { "epoch": 0.5651037278474004, "grad_norm": 0.7890625, "learning_rate": 0.00015033854538343315, "loss": 0.8595, "step": 22008 }, { "epoch": 0.5651294050433222, "grad_norm": 0.87109375, "learning_rate": 0.0001503346879863778, "loss": 0.9934, "step": 22009 }, { "epoch": 0.565155082239244, "grad_norm": 0.828125, "learning_rate": 0.00015033083048900995, "loss": 1.0713, "step": 22010 }, { "epoch": 0.5651807594351659, "grad_norm": 0.81640625, "learning_rate": 0.0001503269728913373, "loss": 0.9404, "step": 22011 }, { "epoch": 0.5652064366310877, "grad_norm": 0.8046875, "learning_rate": 0.0001503231151933676, "loss": 0.884, "step": 22012 }, { "epoch": 0.5652321138270096, "grad_norm": 0.76171875, "learning_rate": 0.0001503192573951084, "loss": 0.9168, "step": 22013 }, { "epoch": 0.5652577910229314, "grad_norm": 0.69921875, "learning_rate": 0.00015031539949656751, "loss": 0.7071, "step": 22014 }, { "epoch": 0.5652834682188531, "grad_norm": 0.7578125, "learning_rate": 0.00015031154149775254, "loss": 0.8884, "step": 22015 }, { "epoch": 0.565309145414775, "grad_norm": 0.7578125, "learning_rate": 0.00015030768339867128, "loss": 0.8119, "step": 22016 }, { "epoch": 0.5653348226106968, "grad_norm": 0.74609375, "learning_rate": 0.00015030382519933127, "loss": 0.8648, "step": 22017 }, { "epoch": 0.5653604998066186, "grad_norm": 0.75, "learning_rate": 0.00015029996689974032, "loss": 0.8349, "step": 22018 }, { "epoch": 0.5653861770025405, "grad_norm": 0.77734375, "learning_rate": 0.00015029610849990607, "loss": 1.0208, "step": 22019 }, { "epoch": 0.5654118541984623, "grad_norm": 0.79296875, "learning_rate": 0.0001502922499998362, "loss": 0.8954, "step": 22020 }, { "epoch": 0.5654375313943841, "grad_norm": 0.78125, "learning_rate": 0.0001502883913995385, "loss": 0.8785, "step": 22021 }, { "epoch": 0.5654632085903059, "grad_norm": 0.703125, "learning_rate": 0.0001502845326990205, "loss": 0.902, "step": 22022 }, { "epoch": 0.5654888857862277, "grad_norm": 0.7109375, "learning_rate": 0.00015028067389828999, "loss": 0.7536, "step": 22023 }, { "epoch": 0.5655145629821495, "grad_norm": 0.83203125, "learning_rate": 0.00015027681499735462, "loss": 1.0278, "step": 22024 }, { "epoch": 0.5655402401780714, "grad_norm": 0.765625, "learning_rate": 0.00015027295599622214, "loss": 0.8666, "step": 22025 }, { "epoch": 0.5655659173739932, "grad_norm": 0.77734375, "learning_rate": 0.00015026909689490016, "loss": 0.8115, "step": 22026 }, { "epoch": 0.565591594569915, "grad_norm": 0.828125, "learning_rate": 0.00015026523769339644, "loss": 0.9071, "step": 22027 }, { "epoch": 0.5656172717658368, "grad_norm": 0.68359375, "learning_rate": 0.0001502613783917186, "loss": 0.757, "step": 22028 }, { "epoch": 0.5656429489617586, "grad_norm": 0.7265625, "learning_rate": 0.00015025751898987443, "loss": 0.819, "step": 22029 }, { "epoch": 0.5656686261576804, "grad_norm": 0.80078125, "learning_rate": 0.00015025365948787153, "loss": 0.8577, "step": 22030 }, { "epoch": 0.5656943033536023, "grad_norm": 0.70703125, "learning_rate": 0.00015024979988571764, "loss": 0.8231, "step": 22031 }, { "epoch": 0.5657199805495241, "grad_norm": 0.78515625, "learning_rate": 0.00015024594018342042, "loss": 0.927, "step": 22032 }, { "epoch": 0.5657456577454459, "grad_norm": 3.734375, "learning_rate": 0.0001502420803809876, "loss": 0.8263, "step": 22033 }, { "epoch": 0.5657713349413678, "grad_norm": 0.83984375, "learning_rate": 0.00015023822047842685, "loss": 0.8565, "step": 22034 }, { "epoch": 0.5657970121372895, "grad_norm": 0.79296875, "learning_rate": 0.00015023436047574586, "loss": 0.9525, "step": 22035 }, { "epoch": 0.5658226893332113, "grad_norm": 0.66796875, "learning_rate": 0.00015023050037295233, "loss": 0.7642, "step": 22036 }, { "epoch": 0.5658483665291332, "grad_norm": 0.83203125, "learning_rate": 0.00015022664017005392, "loss": 0.9011, "step": 22037 }, { "epoch": 0.565874043725055, "grad_norm": 0.79296875, "learning_rate": 0.0001502227798670584, "loss": 1.005, "step": 22038 }, { "epoch": 0.5658997209209768, "grad_norm": 0.73046875, "learning_rate": 0.00015021891946397336, "loss": 0.8225, "step": 22039 }, { "epoch": 0.5659253981168987, "grad_norm": 0.76953125, "learning_rate": 0.00015021505896080663, "loss": 0.8085, "step": 22040 }, { "epoch": 0.5659510753128205, "grad_norm": 0.8125, "learning_rate": 0.00015021119835756575, "loss": 0.8999, "step": 22041 }, { "epoch": 0.5659767525087422, "grad_norm": 0.7578125, "learning_rate": 0.00015020733765425852, "loss": 0.8771, "step": 22042 }, { "epoch": 0.5660024297046641, "grad_norm": 0.734375, "learning_rate": 0.0001502034768508926, "loss": 0.9041, "step": 22043 }, { "epoch": 0.5660281069005859, "grad_norm": 0.73046875, "learning_rate": 0.00015019961594747567, "loss": 0.9391, "step": 22044 }, { "epoch": 0.5660537840965078, "grad_norm": 0.7890625, "learning_rate": 0.00015019575494401545, "loss": 0.8596, "step": 22045 }, { "epoch": 0.5660794612924296, "grad_norm": 0.77734375, "learning_rate": 0.00015019189384051962, "loss": 0.8942, "step": 22046 }, { "epoch": 0.5661051384883514, "grad_norm": 0.890625, "learning_rate": 0.0001501880326369959, "loss": 0.8702, "step": 22047 }, { "epoch": 0.5661308156842731, "grad_norm": 0.7734375, "learning_rate": 0.00015018417133345197, "loss": 0.8837, "step": 22048 }, { "epoch": 0.566156492880195, "grad_norm": 0.7265625, "learning_rate": 0.0001501803099298955, "loss": 0.7737, "step": 22049 }, { "epoch": 0.5661821700761168, "grad_norm": 0.6953125, "learning_rate": 0.00015017644842633417, "loss": 0.7765, "step": 22050 }, { "epoch": 0.5662078472720387, "grad_norm": 0.703125, "learning_rate": 0.00015017258682277577, "loss": 0.9131, "step": 22051 }, { "epoch": 0.5662335244679605, "grad_norm": 0.81640625, "learning_rate": 0.0001501687251192279, "loss": 0.8863, "step": 22052 }, { "epoch": 0.5662592016638823, "grad_norm": 0.78125, "learning_rate": 0.00015016486331569833, "loss": 0.9173, "step": 22053 }, { "epoch": 0.5662848788598042, "grad_norm": 0.80078125, "learning_rate": 0.00015016100141219466, "loss": 0.9425, "step": 22054 }, { "epoch": 0.5663105560557259, "grad_norm": 0.81640625, "learning_rate": 0.00015015713940872468, "loss": 1.032, "step": 22055 }, { "epoch": 0.5663362332516477, "grad_norm": 0.79296875, "learning_rate": 0.00015015327730529606, "loss": 0.7678, "step": 22056 }, { "epoch": 0.5663619104475696, "grad_norm": 0.75390625, "learning_rate": 0.0001501494151019165, "loss": 0.8134, "step": 22057 }, { "epoch": 0.5663875876434914, "grad_norm": 0.8046875, "learning_rate": 0.00015014555279859362, "loss": 0.7782, "step": 22058 }, { "epoch": 0.5664132648394132, "grad_norm": 0.76171875, "learning_rate": 0.00015014169039533526, "loss": 0.825, "step": 22059 }, { "epoch": 0.5664389420353351, "grad_norm": 0.80859375, "learning_rate": 0.000150137827892149, "loss": 0.8332, "step": 22060 }, { "epoch": 0.5664646192312569, "grad_norm": 0.796875, "learning_rate": 0.0001501339652890426, "loss": 1.007, "step": 22061 }, { "epoch": 0.5664902964271786, "grad_norm": 0.7734375, "learning_rate": 0.00015013010258602372, "loss": 0.9497, "step": 22062 }, { "epoch": 0.5665159736231005, "grad_norm": 0.78515625, "learning_rate": 0.00015012623978310005, "loss": 0.9547, "step": 22063 }, { "epoch": 0.5665416508190223, "grad_norm": 0.74609375, "learning_rate": 0.00015012237688027936, "loss": 0.9096, "step": 22064 }, { "epoch": 0.5665673280149441, "grad_norm": 0.7734375, "learning_rate": 0.00015011851387756927, "loss": 0.8293, "step": 22065 }, { "epoch": 0.566593005210866, "grad_norm": 0.76953125, "learning_rate": 0.0001501146507749775, "loss": 0.9436, "step": 22066 }, { "epoch": 0.5666186824067878, "grad_norm": 0.84375, "learning_rate": 0.0001501107875725118, "loss": 0.978, "step": 22067 }, { "epoch": 0.5666443596027095, "grad_norm": 0.72265625, "learning_rate": 0.0001501069242701798, "loss": 0.8314, "step": 22068 }, { "epoch": 0.5666700367986314, "grad_norm": 0.8984375, "learning_rate": 0.00015010306086798923, "loss": 0.8188, "step": 22069 }, { "epoch": 0.5666957139945532, "grad_norm": 0.7578125, "learning_rate": 0.00015009919736594778, "loss": 0.6498, "step": 22070 }, { "epoch": 0.566721391190475, "grad_norm": 0.734375, "learning_rate": 0.00015009533376406315, "loss": 0.793, "step": 22071 }, { "epoch": 0.5667470683863969, "grad_norm": 0.8203125, "learning_rate": 0.00015009147006234304, "loss": 0.8404, "step": 22072 }, { "epoch": 0.5667727455823187, "grad_norm": 0.77734375, "learning_rate": 0.00015008760626079518, "loss": 0.9673, "step": 22073 }, { "epoch": 0.5667984227782406, "grad_norm": 0.84375, "learning_rate": 0.00015008374235942723, "loss": 0.8388, "step": 22074 }, { "epoch": 0.5668240999741623, "grad_norm": 0.7578125, "learning_rate": 0.00015007987835824693, "loss": 0.9383, "step": 22075 }, { "epoch": 0.5668497771700841, "grad_norm": 0.7421875, "learning_rate": 0.0001500760142572619, "loss": 0.8666, "step": 22076 }, { "epoch": 0.566875454366006, "grad_norm": 0.765625, "learning_rate": 0.00015007215005647995, "loss": 0.7468, "step": 22077 }, { "epoch": 0.5669011315619278, "grad_norm": 0.90625, "learning_rate": 0.0001500682857559087, "loss": 0.7581, "step": 22078 }, { "epoch": 0.5669268087578496, "grad_norm": 0.765625, "learning_rate": 0.00015006442135555588, "loss": 0.9583, "step": 22079 }, { "epoch": 0.5669524859537715, "grad_norm": 0.75, "learning_rate": 0.00015006055685542924, "loss": 1.0273, "step": 22080 }, { "epoch": 0.5669781631496933, "grad_norm": 0.76953125, "learning_rate": 0.00015005669225553637, "loss": 0.8651, "step": 22081 }, { "epoch": 0.567003840345615, "grad_norm": 0.88671875, "learning_rate": 0.00015005282755588506, "loss": 0.9082, "step": 22082 }, { "epoch": 0.5670295175415369, "grad_norm": 0.76171875, "learning_rate": 0.00015004896275648297, "loss": 0.9091, "step": 22083 }, { "epoch": 0.5670551947374587, "grad_norm": 0.80078125, "learning_rate": 0.0001500450978573378, "loss": 0.9166, "step": 22084 }, { "epoch": 0.5670808719333805, "grad_norm": 0.765625, "learning_rate": 0.00015004123285845732, "loss": 0.8942, "step": 22085 }, { "epoch": 0.5671065491293024, "grad_norm": 0.7734375, "learning_rate": 0.00015003736775984919, "loss": 0.8594, "step": 22086 }, { "epoch": 0.5671322263252242, "grad_norm": 0.75, "learning_rate": 0.00015003350256152106, "loss": 0.8903, "step": 22087 }, { "epoch": 0.5671579035211459, "grad_norm": 0.7890625, "learning_rate": 0.0001500296372634807, "loss": 0.85, "step": 22088 }, { "epoch": 0.5671835807170678, "grad_norm": 0.83203125, "learning_rate": 0.0001500257718657358, "loss": 0.888, "step": 22089 }, { "epoch": 0.5672092579129896, "grad_norm": 0.71484375, "learning_rate": 0.00015002190636829402, "loss": 0.8094, "step": 22090 }, { "epoch": 0.5672349351089114, "grad_norm": 0.734375, "learning_rate": 0.00015001804077116314, "loss": 0.9494, "step": 22091 }, { "epoch": 0.5672606123048333, "grad_norm": 0.828125, "learning_rate": 0.00015001417507435079, "loss": 0.9728, "step": 22092 }, { "epoch": 0.5672862895007551, "grad_norm": 0.75, "learning_rate": 0.00015001030927786475, "loss": 0.8438, "step": 22093 }, { "epoch": 0.5673119666966769, "grad_norm": 0.8671875, "learning_rate": 0.00015000644338171267, "loss": 0.8409, "step": 22094 }, { "epoch": 0.5673376438925987, "grad_norm": 0.8125, "learning_rate": 0.0001500025773859022, "loss": 0.9566, "step": 22095 }, { "epoch": 0.5673633210885205, "grad_norm": 0.765625, "learning_rate": 0.0001499987112904412, "loss": 0.8833, "step": 22096 }, { "epoch": 0.5673889982844423, "grad_norm": 0.7734375, "learning_rate": 0.00014999484509533724, "loss": 0.8973, "step": 22097 }, { "epoch": 0.5674146754803642, "grad_norm": 0.7890625, "learning_rate": 0.00014999097880059805, "loss": 0.8996, "step": 22098 }, { "epoch": 0.567440352676286, "grad_norm": 0.703125, "learning_rate": 0.0001499871124062314, "loss": 0.8783, "step": 22099 }, { "epoch": 0.5674660298722078, "grad_norm": 0.75390625, "learning_rate": 0.00014998324591224493, "loss": 0.8327, "step": 22100 }, { "epoch": 0.5674917070681296, "grad_norm": 0.79296875, "learning_rate": 0.00014997937931864635, "loss": 0.7001, "step": 22101 }, { "epoch": 0.5675173842640514, "grad_norm": 0.76953125, "learning_rate": 0.0001499755126254434, "loss": 0.787, "step": 22102 }, { "epoch": 0.5675430614599732, "grad_norm": 0.83984375, "learning_rate": 0.00014997164583264376, "loss": 0.8946, "step": 22103 }, { "epoch": 0.5675687386558951, "grad_norm": 0.82421875, "learning_rate": 0.00014996777894025516, "loss": 0.7582, "step": 22104 }, { "epoch": 0.5675944158518169, "grad_norm": 0.78515625, "learning_rate": 0.00014996391194828528, "loss": 0.9672, "step": 22105 }, { "epoch": 0.5676200930477387, "grad_norm": 0.7890625, "learning_rate": 0.00014996004485674184, "loss": 0.9372, "step": 22106 }, { "epoch": 0.5676457702436606, "grad_norm": 0.7890625, "learning_rate": 0.00014995617766563256, "loss": 0.8428, "step": 22107 }, { "epoch": 0.5676714474395823, "grad_norm": 0.79296875, "learning_rate": 0.00014995231037496506, "loss": 0.8097, "step": 22108 }, { "epoch": 0.5676971246355041, "grad_norm": 0.77734375, "learning_rate": 0.0001499484429847472, "loss": 0.8067, "step": 22109 }, { "epoch": 0.567722801831426, "grad_norm": 0.79296875, "learning_rate": 0.00014994457549498657, "loss": 0.8377, "step": 22110 }, { "epoch": 0.5677484790273478, "grad_norm": 0.84765625, "learning_rate": 0.0001499407079056909, "loss": 0.7864, "step": 22111 }, { "epoch": 0.5677741562232697, "grad_norm": 0.82421875, "learning_rate": 0.00014993684021686793, "loss": 0.9328, "step": 22112 }, { "epoch": 0.5677998334191915, "grad_norm": 0.80859375, "learning_rate": 0.00014993297242852534, "loss": 0.8524, "step": 22113 }, { "epoch": 0.5678255106151133, "grad_norm": 0.78515625, "learning_rate": 0.00014992910454067086, "loss": 0.8842, "step": 22114 }, { "epoch": 0.567851187811035, "grad_norm": 0.76953125, "learning_rate": 0.00014992523655331217, "loss": 0.85, "step": 22115 }, { "epoch": 0.5678768650069569, "grad_norm": 1.234375, "learning_rate": 0.000149921368466457, "loss": 0.7942, "step": 22116 }, { "epoch": 0.5679025422028787, "grad_norm": 0.7578125, "learning_rate": 0.00014991750028011305, "loss": 0.8715, "step": 22117 }, { "epoch": 0.5679282193988006, "grad_norm": 0.828125, "learning_rate": 0.00014991363199428804, "loss": 0.8782, "step": 22118 }, { "epoch": 0.5679538965947224, "grad_norm": 0.73828125, "learning_rate": 0.00014990976360898964, "loss": 0.9195, "step": 22119 }, { "epoch": 0.5679795737906442, "grad_norm": 0.7421875, "learning_rate": 0.00014990589512422565, "loss": 0.9184, "step": 22120 }, { "epoch": 0.568005250986566, "grad_norm": 0.76953125, "learning_rate": 0.00014990202654000364, "loss": 0.7883, "step": 22121 }, { "epoch": 0.5680309281824878, "grad_norm": 0.78515625, "learning_rate": 0.00014989815785633142, "loss": 0.8086, "step": 22122 }, { "epoch": 0.5680566053784096, "grad_norm": 0.7734375, "learning_rate": 0.00014989428907321672, "loss": 0.95, "step": 22123 }, { "epoch": 0.5680822825743315, "grad_norm": 0.78125, "learning_rate": 0.00014989042019066716, "loss": 0.8511, "step": 22124 }, { "epoch": 0.5681079597702533, "grad_norm": 0.80859375, "learning_rate": 0.00014988655120869054, "loss": 0.9876, "step": 22125 }, { "epoch": 0.5681336369661751, "grad_norm": 0.78125, "learning_rate": 0.00014988268212729456, "loss": 0.7855, "step": 22126 }, { "epoch": 0.568159314162097, "grad_norm": 0.76171875, "learning_rate": 0.0001498788129464868, "loss": 0.8816, "step": 22127 }, { "epoch": 0.5681849913580187, "grad_norm": 0.7734375, "learning_rate": 0.00014987494366627515, "loss": 0.9041, "step": 22128 }, { "epoch": 0.5682106685539405, "grad_norm": 0.78515625, "learning_rate": 0.0001498710742866672, "loss": 0.9043, "step": 22129 }, { "epoch": 0.5682363457498624, "grad_norm": 0.71875, "learning_rate": 0.00014986720480767075, "loss": 0.9168, "step": 22130 }, { "epoch": 0.5682620229457842, "grad_norm": 0.81640625, "learning_rate": 0.00014986333522929343, "loss": 1.0233, "step": 22131 }, { "epoch": 0.568287700141706, "grad_norm": 0.75, "learning_rate": 0.000149859465551543, "loss": 0.8929, "step": 22132 }, { "epoch": 0.5683133773376279, "grad_norm": 0.76171875, "learning_rate": 0.00014985559577442717, "loss": 0.8273, "step": 22133 }, { "epoch": 0.5683390545335497, "grad_norm": 0.7734375, "learning_rate": 0.00014985172589795362, "loss": 0.8146, "step": 22134 }, { "epoch": 0.5683647317294714, "grad_norm": 0.796875, "learning_rate": 0.00014984785592213006, "loss": 0.8267, "step": 22135 }, { "epoch": 0.5683904089253933, "grad_norm": 0.8671875, "learning_rate": 0.00014984398584696427, "loss": 0.7801, "step": 22136 }, { "epoch": 0.5684160861213151, "grad_norm": 0.71875, "learning_rate": 0.0001498401156724639, "loss": 0.7353, "step": 22137 }, { "epoch": 0.568441763317237, "grad_norm": 0.71484375, "learning_rate": 0.00014983624539863667, "loss": 0.8987, "step": 22138 }, { "epoch": 0.5684674405131588, "grad_norm": 0.81640625, "learning_rate": 0.00014983237502549034, "loss": 0.9256, "step": 22139 }, { "epoch": 0.5684931177090806, "grad_norm": 0.71875, "learning_rate": 0.00014982850455303254, "loss": 0.7974, "step": 22140 }, { "epoch": 0.5685187949050023, "grad_norm": 0.71875, "learning_rate": 0.00014982463398127108, "loss": 0.7511, "step": 22141 }, { "epoch": 0.5685444721009242, "grad_norm": 0.73828125, "learning_rate": 0.0001498207633102136, "loss": 0.8521, "step": 22142 }, { "epoch": 0.568570149296846, "grad_norm": 0.7734375, "learning_rate": 0.00014981689253986785, "loss": 0.7574, "step": 22143 }, { "epoch": 0.5685958264927679, "grad_norm": 0.8359375, "learning_rate": 0.0001498130216702415, "loss": 0.8812, "step": 22144 }, { "epoch": 0.5686215036886897, "grad_norm": 0.96875, "learning_rate": 0.00014980915070134234, "loss": 0.8732, "step": 22145 }, { "epoch": 0.5686471808846115, "grad_norm": 0.74609375, "learning_rate": 0.000149805279633178, "loss": 0.8606, "step": 22146 }, { "epoch": 0.5686728580805334, "grad_norm": 0.7421875, "learning_rate": 0.00014980140846575628, "loss": 0.7601, "step": 22147 }, { "epoch": 0.5686985352764551, "grad_norm": 0.80078125, "learning_rate": 0.00014979753719908482, "loss": 0.9706, "step": 22148 }, { "epoch": 0.5687242124723769, "grad_norm": 0.81640625, "learning_rate": 0.00014979366583317137, "loss": 1.0266, "step": 22149 }, { "epoch": 0.5687498896682988, "grad_norm": 0.76953125, "learning_rate": 0.00014978979436802367, "loss": 0.9482, "step": 22150 }, { "epoch": 0.5687755668642206, "grad_norm": 0.76953125, "learning_rate": 0.00014978592280364938, "loss": 0.8083, "step": 22151 }, { "epoch": 0.5688012440601424, "grad_norm": 0.73828125, "learning_rate": 0.00014978205114005627, "loss": 0.8716, "step": 22152 }, { "epoch": 0.5688269212560643, "grad_norm": 0.73046875, "learning_rate": 0.000149778179377252, "loss": 0.8899, "step": 22153 }, { "epoch": 0.5688525984519861, "grad_norm": 0.7734375, "learning_rate": 0.00014977430751524434, "loss": 0.984, "step": 22154 }, { "epoch": 0.5688782756479078, "grad_norm": 0.734375, "learning_rate": 0.00014977043555404097, "loss": 0.8558, "step": 22155 }, { "epoch": 0.5689039528438297, "grad_norm": 0.80078125, "learning_rate": 0.0001497665634936496, "loss": 0.8266, "step": 22156 }, { "epoch": 0.5689296300397515, "grad_norm": 0.78515625, "learning_rate": 0.00014976269133407796, "loss": 0.8611, "step": 22157 }, { "epoch": 0.5689553072356733, "grad_norm": 0.875, "learning_rate": 0.0001497588190753338, "loss": 0.9996, "step": 22158 }, { "epoch": 0.5689809844315952, "grad_norm": 0.88671875, "learning_rate": 0.0001497549467174248, "loss": 0.8606, "step": 22159 }, { "epoch": 0.569006661627517, "grad_norm": 0.8046875, "learning_rate": 0.00014975107426035868, "loss": 0.8138, "step": 22160 }, { "epoch": 0.5690323388234387, "grad_norm": 0.8125, "learning_rate": 0.00014974720170414315, "loss": 0.8799, "step": 22161 }, { "epoch": 0.5690580160193606, "grad_norm": 0.7578125, "learning_rate": 0.00014974332904878597, "loss": 0.8341, "step": 22162 }, { "epoch": 0.5690836932152824, "grad_norm": 0.78515625, "learning_rate": 0.00014973945629429483, "loss": 0.8084, "step": 22163 }, { "epoch": 0.5691093704112042, "grad_norm": 0.78125, "learning_rate": 0.0001497355834406774, "loss": 0.8569, "step": 22164 }, { "epoch": 0.5691350476071261, "grad_norm": 0.80859375, "learning_rate": 0.00014973171048794152, "loss": 0.8965, "step": 22165 }, { "epoch": 0.5691607248030479, "grad_norm": 0.69921875, "learning_rate": 0.00014972783743609478, "loss": 0.7561, "step": 22166 }, { "epoch": 0.5691864019989697, "grad_norm": 0.7734375, "learning_rate": 0.00014972396428514494, "loss": 0.859, "step": 22167 }, { "epoch": 0.5692120791948915, "grad_norm": 0.78125, "learning_rate": 0.00014972009103509976, "loss": 0.7791, "step": 22168 }, { "epoch": 0.5692377563908133, "grad_norm": 0.7578125, "learning_rate": 0.0001497162176859669, "loss": 0.7999, "step": 22169 }, { "epoch": 0.5692634335867351, "grad_norm": 0.71484375, "learning_rate": 0.00014971234423775417, "loss": 0.741, "step": 22170 }, { "epoch": 0.569289110782657, "grad_norm": 0.75390625, "learning_rate": 0.00014970847069046918, "loss": 0.9791, "step": 22171 }, { "epoch": 0.5693147879785788, "grad_norm": 0.796875, "learning_rate": 0.0001497045970441197, "loss": 0.8779, "step": 22172 }, { "epoch": 0.5693404651745007, "grad_norm": 0.78125, "learning_rate": 0.00014970072329871346, "loss": 0.8605, "step": 22173 }, { "epoch": 0.5693661423704225, "grad_norm": 0.73046875, "learning_rate": 0.00014969684945425818, "loss": 0.786, "step": 22174 }, { "epoch": 0.5693918195663442, "grad_norm": 0.859375, "learning_rate": 0.00014969297551076154, "loss": 0.8682, "step": 22175 }, { "epoch": 0.569417496762266, "grad_norm": 0.77734375, "learning_rate": 0.0001496891014682313, "loss": 0.8206, "step": 22176 }, { "epoch": 0.5694431739581879, "grad_norm": 0.77734375, "learning_rate": 0.00014968522732667516, "loss": 0.7886, "step": 22177 }, { "epoch": 0.5694688511541097, "grad_norm": 0.85546875, "learning_rate": 0.00014968135308610086, "loss": 0.9522, "step": 22178 }, { "epoch": 0.5694945283500316, "grad_norm": 0.74609375, "learning_rate": 0.00014967747874651614, "loss": 0.7874, "step": 22179 }, { "epoch": 0.5695202055459534, "grad_norm": 0.73046875, "learning_rate": 0.00014967360430792864, "loss": 0.801, "step": 22180 }, { "epoch": 0.5695458827418751, "grad_norm": 0.703125, "learning_rate": 0.00014966972977034616, "loss": 0.7467, "step": 22181 }, { "epoch": 0.569571559937797, "grad_norm": 0.73828125, "learning_rate": 0.0001496658551337764, "loss": 0.9848, "step": 22182 }, { "epoch": 0.5695972371337188, "grad_norm": 0.8125, "learning_rate": 0.00014966198039822705, "loss": 1.033, "step": 22183 }, { "epoch": 0.5696229143296406, "grad_norm": 0.77734375, "learning_rate": 0.00014965810556370588, "loss": 1.0281, "step": 22184 }, { "epoch": 0.5696485915255625, "grad_norm": 0.79296875, "learning_rate": 0.00014965423063022058, "loss": 1.0541, "step": 22185 }, { "epoch": 0.5696742687214843, "grad_norm": 0.7578125, "learning_rate": 0.0001496503555977789, "loss": 0.7452, "step": 22186 }, { "epoch": 0.5696999459174061, "grad_norm": 0.72265625, "learning_rate": 0.00014964648046638854, "loss": 0.8305, "step": 22187 }, { "epoch": 0.5697256231133279, "grad_norm": 0.765625, "learning_rate": 0.00014964260523605722, "loss": 0.8694, "step": 22188 }, { "epoch": 0.5697513003092497, "grad_norm": 0.8515625, "learning_rate": 0.00014963872990679267, "loss": 0.9643, "step": 22189 }, { "epoch": 0.5697769775051715, "grad_norm": 0.81640625, "learning_rate": 0.00014963485447860265, "loss": 0.9712, "step": 22190 }, { "epoch": 0.5698026547010934, "grad_norm": 0.78515625, "learning_rate": 0.0001496309789514948, "loss": 0.9219, "step": 22191 }, { "epoch": 0.5698283318970152, "grad_norm": 0.8515625, "learning_rate": 0.00014962710332547694, "loss": 1.037, "step": 22192 }, { "epoch": 0.569854009092937, "grad_norm": 0.8515625, "learning_rate": 0.00014962322760055672, "loss": 0.7752, "step": 22193 }, { "epoch": 0.5698796862888589, "grad_norm": 0.80078125, "learning_rate": 0.00014961935177674188, "loss": 0.9649, "step": 22194 }, { "epoch": 0.5699053634847806, "grad_norm": 0.78125, "learning_rate": 0.00014961547585404017, "loss": 0.9201, "step": 22195 }, { "epoch": 0.5699310406807024, "grad_norm": 0.73046875, "learning_rate": 0.0001496115998324593, "loss": 0.8877, "step": 22196 }, { "epoch": 0.5699567178766243, "grad_norm": 0.78125, "learning_rate": 0.00014960772371200697, "loss": 0.9119, "step": 22197 }, { "epoch": 0.5699823950725461, "grad_norm": 0.75390625, "learning_rate": 0.00014960384749269093, "loss": 1.0123, "step": 22198 }, { "epoch": 0.570008072268468, "grad_norm": 0.79296875, "learning_rate": 0.00014959997117451894, "loss": 0.7714, "step": 22199 }, { "epoch": 0.5700337494643898, "grad_norm": 0.78125, "learning_rate": 0.00014959609475749866, "loss": 0.8405, "step": 22200 }, { "epoch": 0.5700594266603115, "grad_norm": 0.828125, "learning_rate": 0.00014959221824163786, "loss": 0.9292, "step": 22201 }, { "epoch": 0.5700851038562333, "grad_norm": 0.78125, "learning_rate": 0.00014958834162694421, "loss": 0.8605, "step": 22202 }, { "epoch": 0.5701107810521552, "grad_norm": 0.77734375, "learning_rate": 0.00014958446491342552, "loss": 0.8228, "step": 22203 }, { "epoch": 0.570136458248077, "grad_norm": 0.7578125, "learning_rate": 0.00014958058810108943, "loss": 0.8832, "step": 22204 }, { "epoch": 0.5701621354439989, "grad_norm": 0.8046875, "learning_rate": 0.00014957671118994377, "loss": 0.8617, "step": 22205 }, { "epoch": 0.5701878126399207, "grad_norm": 0.75390625, "learning_rate": 0.00014957283417999613, "loss": 0.9558, "step": 22206 }, { "epoch": 0.5702134898358425, "grad_norm": 0.73828125, "learning_rate": 0.00014956895707125435, "loss": 0.7473, "step": 22207 }, { "epoch": 0.5702391670317642, "grad_norm": 0.796875, "learning_rate": 0.0001495650798637261, "loss": 0.8397, "step": 22208 }, { "epoch": 0.5702648442276861, "grad_norm": 0.8203125, "learning_rate": 0.00014956120255741912, "loss": 0.8502, "step": 22209 }, { "epoch": 0.5702905214236079, "grad_norm": 0.76953125, "learning_rate": 0.00014955732515234115, "loss": 0.7855, "step": 22210 }, { "epoch": 0.5703161986195298, "grad_norm": 0.79296875, "learning_rate": 0.00014955344764849992, "loss": 0.9003, "step": 22211 }, { "epoch": 0.5703418758154516, "grad_norm": 0.73046875, "learning_rate": 0.00014954957004590313, "loss": 0.8699, "step": 22212 }, { "epoch": 0.5703675530113734, "grad_norm": 0.7578125, "learning_rate": 0.00014954569234455853, "loss": 0.7759, "step": 22213 }, { "epoch": 0.5703932302072953, "grad_norm": 0.734375, "learning_rate": 0.00014954181454447386, "loss": 0.9888, "step": 22214 }, { "epoch": 0.570418907403217, "grad_norm": 0.82421875, "learning_rate": 0.00014953793664565677, "loss": 1.0352, "step": 22215 }, { "epoch": 0.5704445845991388, "grad_norm": 0.75390625, "learning_rate": 0.00014953405864811506, "loss": 0.8025, "step": 22216 }, { "epoch": 0.5704702617950607, "grad_norm": 0.765625, "learning_rate": 0.0001495301805518565, "loss": 0.8007, "step": 22217 }, { "epoch": 0.5704959389909825, "grad_norm": 0.74609375, "learning_rate": 0.00014952630235688874, "loss": 0.9639, "step": 22218 }, { "epoch": 0.5705216161869043, "grad_norm": 0.74609375, "learning_rate": 0.0001495224240632195, "loss": 0.8627, "step": 22219 }, { "epoch": 0.5705472933828262, "grad_norm": 0.80078125, "learning_rate": 0.00014951854567085658, "loss": 0.9722, "step": 22220 }, { "epoch": 0.5705729705787479, "grad_norm": 0.7421875, "learning_rate": 0.00014951466717980764, "loss": 0.8397, "step": 22221 }, { "epoch": 0.5705986477746697, "grad_norm": 0.6875, "learning_rate": 0.00014951078859008047, "loss": 0.818, "step": 22222 }, { "epoch": 0.5706243249705916, "grad_norm": 0.875, "learning_rate": 0.00014950690990168274, "loss": 0.8512, "step": 22223 }, { "epoch": 0.5706500021665134, "grad_norm": 0.78515625, "learning_rate": 0.00014950303111462224, "loss": 0.9555, "step": 22224 }, { "epoch": 0.5706756793624352, "grad_norm": 0.74609375, "learning_rate": 0.00014949915222890666, "loss": 0.8453, "step": 22225 }, { "epoch": 0.5707013565583571, "grad_norm": 0.78515625, "learning_rate": 0.00014949527324454372, "loss": 0.9682, "step": 22226 }, { "epoch": 0.5707270337542789, "grad_norm": 0.7890625, "learning_rate": 0.0001494913941615412, "loss": 0.8378, "step": 22227 }, { "epoch": 0.5707527109502006, "grad_norm": 0.73046875, "learning_rate": 0.0001494875149799068, "loss": 0.8829, "step": 22228 }, { "epoch": 0.5707783881461225, "grad_norm": 0.87890625, "learning_rate": 0.00014948363569964825, "loss": 0.9343, "step": 22229 }, { "epoch": 0.5708040653420443, "grad_norm": 0.77734375, "learning_rate": 0.00014947975632077326, "loss": 0.821, "step": 22230 }, { "epoch": 0.5708297425379661, "grad_norm": 0.82421875, "learning_rate": 0.00014947587684328964, "loss": 0.8717, "step": 22231 }, { "epoch": 0.570855419733888, "grad_norm": 0.74609375, "learning_rate": 0.00014947199726720502, "loss": 1.02, "step": 22232 }, { "epoch": 0.5708810969298098, "grad_norm": 0.74609375, "learning_rate": 0.0001494681175925272, "loss": 0.8733, "step": 22233 }, { "epoch": 0.5709067741257317, "grad_norm": 1.1484375, "learning_rate": 0.00014946423781926385, "loss": 0.9519, "step": 22234 }, { "epoch": 0.5709324513216534, "grad_norm": 0.8515625, "learning_rate": 0.00014946035794742278, "loss": 0.7661, "step": 22235 }, { "epoch": 0.5709581285175752, "grad_norm": 0.7578125, "learning_rate": 0.0001494564779770117, "loss": 0.8856, "step": 22236 }, { "epoch": 0.570983805713497, "grad_norm": 0.7265625, "learning_rate": 0.0001494525979080383, "loss": 0.8591, "step": 22237 }, { "epoch": 0.5710094829094189, "grad_norm": 0.78125, "learning_rate": 0.00014944871774051033, "loss": 0.7818, "step": 22238 }, { "epoch": 0.5710351601053407, "grad_norm": 0.7578125, "learning_rate": 0.00014944483747443554, "loss": 0.8139, "step": 22239 }, { "epoch": 0.5710608373012626, "grad_norm": 0.734375, "learning_rate": 0.0001494409571098217, "loss": 0.747, "step": 22240 }, { "epoch": 0.5710865144971843, "grad_norm": 0.72265625, "learning_rate": 0.00014943707664667643, "loss": 0.7527, "step": 22241 }, { "epoch": 0.5711121916931061, "grad_norm": 0.74609375, "learning_rate": 0.00014943319608500756, "loss": 1.0666, "step": 22242 }, { "epoch": 0.571137868889028, "grad_norm": 0.7578125, "learning_rate": 0.00014942931542482279, "loss": 0.8751, "step": 22243 }, { "epoch": 0.5711635460849498, "grad_norm": 0.765625, "learning_rate": 0.00014942543466612987, "loss": 0.8748, "step": 22244 }, { "epoch": 0.5711892232808716, "grad_norm": 0.7578125, "learning_rate": 0.0001494215538089365, "loss": 0.8881, "step": 22245 }, { "epoch": 0.5712149004767935, "grad_norm": 0.7421875, "learning_rate": 0.0001494176728532505, "loss": 0.9972, "step": 22246 }, { "epoch": 0.5712405776727153, "grad_norm": 0.8125, "learning_rate": 0.0001494137917990795, "loss": 1.0158, "step": 22247 }, { "epoch": 0.571266254868637, "grad_norm": 0.77734375, "learning_rate": 0.00014940991064643126, "loss": 0.8171, "step": 22248 }, { "epoch": 0.5712919320645589, "grad_norm": 0.8046875, "learning_rate": 0.0001494060293953135, "loss": 0.8505, "step": 22249 }, { "epoch": 0.5713176092604807, "grad_norm": 0.8203125, "learning_rate": 0.00014940214804573407, "loss": 0.984, "step": 22250 }, { "epoch": 0.5713432864564025, "grad_norm": 0.71875, "learning_rate": 0.0001493982665977006, "loss": 0.8429, "step": 22251 }, { "epoch": 0.5713689636523244, "grad_norm": 0.7734375, "learning_rate": 0.00014939438505122082, "loss": 0.8643, "step": 22252 }, { "epoch": 0.5713946408482462, "grad_norm": 0.7734375, "learning_rate": 0.0001493905034063025, "loss": 0.7507, "step": 22253 }, { "epoch": 0.571420318044168, "grad_norm": 0.69140625, "learning_rate": 0.00014938662166295337, "loss": 0.831, "step": 22254 }, { "epoch": 0.5714459952400898, "grad_norm": 0.75, "learning_rate": 0.00014938273982118117, "loss": 0.9503, "step": 22255 }, { "epoch": 0.5714716724360116, "grad_norm": 0.75, "learning_rate": 0.00014937885788099362, "loss": 0.9526, "step": 22256 }, { "epoch": 0.5714973496319334, "grad_norm": 0.796875, "learning_rate": 0.00014937497584239847, "loss": 0.8496, "step": 22257 }, { "epoch": 0.5715230268278553, "grad_norm": 0.828125, "learning_rate": 0.00014937109370540347, "loss": 0.8912, "step": 22258 }, { "epoch": 0.5715487040237771, "grad_norm": 0.7265625, "learning_rate": 0.00014936721147001632, "loss": 0.8962, "step": 22259 }, { "epoch": 0.571574381219699, "grad_norm": 0.8359375, "learning_rate": 0.00014936332913624475, "loss": 0.8186, "step": 22260 }, { "epoch": 0.5716000584156207, "grad_norm": 0.80859375, "learning_rate": 0.0001493594467040966, "loss": 0.8142, "step": 22261 }, { "epoch": 0.5716257356115425, "grad_norm": 0.70703125, "learning_rate": 0.00014935556417357949, "loss": 0.745, "step": 22262 }, { "epoch": 0.5716514128074643, "grad_norm": 0.73828125, "learning_rate": 0.00014935168154470117, "loss": 0.8652, "step": 22263 }, { "epoch": 0.5716770900033862, "grad_norm": 0.78515625, "learning_rate": 0.00014934779881746945, "loss": 0.9052, "step": 22264 }, { "epoch": 0.571702767199308, "grad_norm": 0.79296875, "learning_rate": 0.00014934391599189198, "loss": 0.8192, "step": 22265 }, { "epoch": 0.5717284443952299, "grad_norm": 0.75390625, "learning_rate": 0.00014934003306797658, "loss": 0.8776, "step": 22266 }, { "epoch": 0.5717541215911517, "grad_norm": 0.76171875, "learning_rate": 0.00014933615004573096, "loss": 0.7933, "step": 22267 }, { "epoch": 0.5717797987870734, "grad_norm": 0.80859375, "learning_rate": 0.0001493322669251628, "loss": 0.9578, "step": 22268 }, { "epoch": 0.5718054759829952, "grad_norm": 0.7578125, "learning_rate": 0.00014932838370627992, "loss": 0.979, "step": 22269 }, { "epoch": 0.5718311531789171, "grad_norm": 0.76171875, "learning_rate": 0.00014932450038909005, "loss": 0.8496, "step": 22270 }, { "epoch": 0.5718568303748389, "grad_norm": 0.81640625, "learning_rate": 0.00014932061697360086, "loss": 0.8922, "step": 22271 }, { "epoch": 0.5718825075707608, "grad_norm": 0.80859375, "learning_rate": 0.00014931673345982017, "loss": 0.9054, "step": 22272 }, { "epoch": 0.5719081847666826, "grad_norm": 0.76953125, "learning_rate": 0.00014931284984775567, "loss": 0.9366, "step": 22273 }, { "epoch": 0.5719338619626044, "grad_norm": 0.83203125, "learning_rate": 0.00014930896613741512, "loss": 0.769, "step": 22274 }, { "epoch": 0.5719595391585262, "grad_norm": 0.765625, "learning_rate": 0.00014930508232880626, "loss": 0.8166, "step": 22275 }, { "epoch": 0.571985216354448, "grad_norm": 0.7421875, "learning_rate": 0.0001493011984219368, "loss": 0.8838, "step": 22276 }, { "epoch": 0.5720108935503698, "grad_norm": 0.7265625, "learning_rate": 0.00014929731441681453, "loss": 0.9403, "step": 22277 }, { "epoch": 0.5720365707462917, "grad_norm": 0.83984375, "learning_rate": 0.00014929343031344714, "loss": 0.8625, "step": 22278 }, { "epoch": 0.5720622479422135, "grad_norm": 0.74609375, "learning_rate": 0.00014928954611184242, "loss": 1.0019, "step": 22279 }, { "epoch": 0.5720879251381353, "grad_norm": 0.82421875, "learning_rate": 0.0001492856618120081, "loss": 0.8384, "step": 22280 }, { "epoch": 0.5721136023340571, "grad_norm": 0.7734375, "learning_rate": 0.00014928177741395187, "loss": 0.9296, "step": 22281 }, { "epoch": 0.5721392795299789, "grad_norm": 0.75, "learning_rate": 0.00014927789291768154, "loss": 0.8142, "step": 22282 }, { "epoch": 0.5721649567259007, "grad_norm": 0.7734375, "learning_rate": 0.0001492740083232048, "loss": 0.8362, "step": 22283 }, { "epoch": 0.5721906339218226, "grad_norm": 0.6953125, "learning_rate": 0.0001492701236305294, "loss": 0.7511, "step": 22284 }, { "epoch": 0.5722163111177444, "grad_norm": 0.76171875, "learning_rate": 0.00014926623883966314, "loss": 0.9829, "step": 22285 }, { "epoch": 0.5722419883136662, "grad_norm": 0.76171875, "learning_rate": 0.0001492623539506137, "loss": 0.9975, "step": 22286 }, { "epoch": 0.5722676655095881, "grad_norm": 0.703125, "learning_rate": 0.0001492584689633888, "loss": 0.7935, "step": 22287 }, { "epoch": 0.5722933427055098, "grad_norm": 0.7578125, "learning_rate": 0.00014925458387799628, "loss": 0.9807, "step": 22288 }, { "epoch": 0.5723190199014316, "grad_norm": 0.84765625, "learning_rate": 0.00014925069869444374, "loss": 0.9389, "step": 22289 }, { "epoch": 0.5723446970973535, "grad_norm": 0.828125, "learning_rate": 0.0001492468134127391, "loss": 0.897, "step": 22290 }, { "epoch": 0.5723703742932753, "grad_norm": 0.8671875, "learning_rate": 0.00014924292803288996, "loss": 0.9954, "step": 22291 }, { "epoch": 0.5723960514891971, "grad_norm": 0.765625, "learning_rate": 0.0001492390425549041, "loss": 0.9595, "step": 22292 }, { "epoch": 0.572421728685119, "grad_norm": 0.80078125, "learning_rate": 0.00014923515697878932, "loss": 0.8747, "step": 22293 }, { "epoch": 0.5724474058810408, "grad_norm": 0.81640625, "learning_rate": 0.00014923127130455328, "loss": 1.0094, "step": 22294 }, { "epoch": 0.5724730830769625, "grad_norm": 0.80859375, "learning_rate": 0.00014922738553220378, "loss": 0.892, "step": 22295 }, { "epoch": 0.5724987602728844, "grad_norm": 0.80078125, "learning_rate": 0.00014922349966174856, "loss": 0.9427, "step": 22296 }, { "epoch": 0.5725244374688062, "grad_norm": 0.7265625, "learning_rate": 0.00014921961369319532, "loss": 0.7936, "step": 22297 }, { "epoch": 0.572550114664728, "grad_norm": 0.69921875, "learning_rate": 0.00014921572762655188, "loss": 0.9053, "step": 22298 }, { "epoch": 0.5725757918606499, "grad_norm": 0.75, "learning_rate": 0.00014921184146182592, "loss": 0.8331, "step": 22299 }, { "epoch": 0.5726014690565717, "grad_norm": 0.7265625, "learning_rate": 0.00014920795519902518, "loss": 0.8232, "step": 22300 }, { "epoch": 0.5726271462524934, "grad_norm": 0.7578125, "learning_rate": 0.00014920406883815745, "loss": 0.7837, "step": 22301 }, { "epoch": 0.5726528234484153, "grad_norm": 0.76953125, "learning_rate": 0.00014920018237923045, "loss": 0.8048, "step": 22302 }, { "epoch": 0.5726785006443371, "grad_norm": 0.7578125, "learning_rate": 0.00014919629582225196, "loss": 0.9123, "step": 22303 }, { "epoch": 0.572704177840259, "grad_norm": 1.140625, "learning_rate": 0.00014919240916722967, "loss": 0.9333, "step": 22304 }, { "epoch": 0.5727298550361808, "grad_norm": 0.78125, "learning_rate": 0.00014918852241417135, "loss": 0.8847, "step": 22305 }, { "epoch": 0.5727555322321026, "grad_norm": 0.84765625, "learning_rate": 0.00014918463556308476, "loss": 0.8816, "step": 22306 }, { "epoch": 0.5727812094280245, "grad_norm": 0.91796875, "learning_rate": 0.00014918074861397762, "loss": 0.903, "step": 22307 }, { "epoch": 0.5728068866239462, "grad_norm": 0.7734375, "learning_rate": 0.00014917686156685768, "loss": 0.816, "step": 22308 }, { "epoch": 0.572832563819868, "grad_norm": 0.76171875, "learning_rate": 0.00014917297442173272, "loss": 0.8664, "step": 22309 }, { "epoch": 0.5728582410157899, "grad_norm": 0.75, "learning_rate": 0.00014916908717861046, "loss": 0.8131, "step": 22310 }, { "epoch": 0.5728839182117117, "grad_norm": 0.7421875, "learning_rate": 0.00014916519983749867, "loss": 0.8611, "step": 22311 }, { "epoch": 0.5729095954076335, "grad_norm": 0.734375, "learning_rate": 0.00014916131239840505, "loss": 0.7636, "step": 22312 }, { "epoch": 0.5729352726035554, "grad_norm": 0.70703125, "learning_rate": 0.00014915742486133738, "loss": 0.8597, "step": 22313 }, { "epoch": 0.5729609497994771, "grad_norm": 0.79296875, "learning_rate": 0.0001491535372263034, "loss": 0.8847, "step": 22314 }, { "epoch": 0.5729866269953989, "grad_norm": 0.7890625, "learning_rate": 0.00014914964949331086, "loss": 0.7955, "step": 22315 }, { "epoch": 0.5730123041913208, "grad_norm": 0.80859375, "learning_rate": 0.0001491457616623675, "loss": 0.9498, "step": 22316 }, { "epoch": 0.5730379813872426, "grad_norm": 0.76953125, "learning_rate": 0.0001491418737334811, "loss": 0.8576, "step": 22317 }, { "epoch": 0.5730636585831644, "grad_norm": 0.7734375, "learning_rate": 0.00014913798570665937, "loss": 0.8614, "step": 22318 }, { "epoch": 0.5730893357790863, "grad_norm": 0.83203125, "learning_rate": 0.0001491340975819101, "loss": 0.8898, "step": 22319 }, { "epoch": 0.5731150129750081, "grad_norm": 0.7890625, "learning_rate": 0.000149130209359241, "loss": 0.8357, "step": 22320 }, { "epoch": 0.5731406901709298, "grad_norm": 0.76171875, "learning_rate": 0.0001491263210386598, "loss": 0.9103, "step": 22321 }, { "epoch": 0.5731663673668517, "grad_norm": 0.70703125, "learning_rate": 0.0001491224326201743, "loss": 0.7679, "step": 22322 }, { "epoch": 0.5731920445627735, "grad_norm": 0.796875, "learning_rate": 0.0001491185441037922, "loss": 0.7262, "step": 22323 }, { "epoch": 0.5732177217586953, "grad_norm": 0.8046875, "learning_rate": 0.00014911465548952133, "loss": 0.8193, "step": 22324 }, { "epoch": 0.5732433989546172, "grad_norm": 0.765625, "learning_rate": 0.00014911076677736937, "loss": 0.8101, "step": 22325 }, { "epoch": 0.573269076150539, "grad_norm": 0.70703125, "learning_rate": 0.00014910687796734407, "loss": 0.8831, "step": 22326 }, { "epoch": 0.5732947533464609, "grad_norm": 0.73046875, "learning_rate": 0.0001491029890594532, "loss": 0.8972, "step": 22327 }, { "epoch": 0.5733204305423826, "grad_norm": 0.73828125, "learning_rate": 0.00014909910005370453, "loss": 0.9628, "step": 22328 }, { "epoch": 0.5733461077383044, "grad_norm": 0.7734375, "learning_rate": 0.00014909521095010577, "loss": 0.8914, "step": 22329 }, { "epoch": 0.5733717849342262, "grad_norm": 0.7734375, "learning_rate": 0.00014909132174866473, "loss": 0.9222, "step": 22330 }, { "epoch": 0.5733974621301481, "grad_norm": 1.625, "learning_rate": 0.00014908743244938905, "loss": 0.9032, "step": 22331 }, { "epoch": 0.5734231393260699, "grad_norm": 0.78515625, "learning_rate": 0.0001490835430522866, "loss": 0.8284, "step": 22332 }, { "epoch": 0.5734488165219918, "grad_norm": 0.79296875, "learning_rate": 0.0001490796535573651, "loss": 0.8344, "step": 22333 }, { "epoch": 0.5734744937179135, "grad_norm": 0.80859375, "learning_rate": 0.0001490757639646322, "loss": 0.9601, "step": 22334 }, { "epoch": 0.5735001709138353, "grad_norm": 0.828125, "learning_rate": 0.00014907187427409582, "loss": 0.8533, "step": 22335 }, { "epoch": 0.5735258481097572, "grad_norm": 0.8359375, "learning_rate": 0.0001490679844857636, "loss": 0.845, "step": 22336 }, { "epoch": 0.573551525305679, "grad_norm": 0.85546875, "learning_rate": 0.00014906409459964333, "loss": 0.8943, "step": 22337 }, { "epoch": 0.5735772025016008, "grad_norm": 0.73046875, "learning_rate": 0.00014906020461574274, "loss": 0.844, "step": 22338 }, { "epoch": 0.5736028796975227, "grad_norm": 0.7734375, "learning_rate": 0.00014905631453406956, "loss": 0.8376, "step": 22339 }, { "epoch": 0.5736285568934445, "grad_norm": 0.83203125, "learning_rate": 0.00014905242435463162, "loss": 0.8061, "step": 22340 }, { "epoch": 0.5736542340893662, "grad_norm": 0.8203125, "learning_rate": 0.0001490485340774366, "loss": 0.8967, "step": 22341 }, { "epoch": 0.5736799112852881, "grad_norm": 0.86328125, "learning_rate": 0.0001490446437024923, "loss": 0.9513, "step": 22342 }, { "epoch": 0.5737055884812099, "grad_norm": 0.7890625, "learning_rate": 0.00014904075322980646, "loss": 0.8826, "step": 22343 }, { "epoch": 0.5737312656771317, "grad_norm": 0.8046875, "learning_rate": 0.00014903686265938684, "loss": 0.9492, "step": 22344 }, { "epoch": 0.5737569428730536, "grad_norm": 0.78515625, "learning_rate": 0.00014903297199124116, "loss": 0.9193, "step": 22345 }, { "epoch": 0.5737826200689754, "grad_norm": 0.76171875, "learning_rate": 0.00014902908122537719, "loss": 0.9318, "step": 22346 }, { "epoch": 0.5738082972648972, "grad_norm": 0.73046875, "learning_rate": 0.00014902519036180271, "loss": 0.9924, "step": 22347 }, { "epoch": 0.573833974460819, "grad_norm": 0.78515625, "learning_rate": 0.00014902129940052542, "loss": 0.8918, "step": 22348 }, { "epoch": 0.5738596516567408, "grad_norm": 0.83203125, "learning_rate": 0.00014901740834155318, "loss": 0.9347, "step": 22349 }, { "epoch": 0.5738853288526626, "grad_norm": 0.73828125, "learning_rate": 0.0001490135171848936, "loss": 0.8044, "step": 22350 }, { "epoch": 0.5739110060485845, "grad_norm": 0.86328125, "learning_rate": 0.00014900962593055454, "loss": 1.1027, "step": 22351 }, { "epoch": 0.5739366832445063, "grad_norm": 0.7734375, "learning_rate": 0.00014900573457854373, "loss": 0.8631, "step": 22352 }, { "epoch": 0.5739623604404281, "grad_norm": 0.78125, "learning_rate": 0.0001490018431288689, "loss": 0.8028, "step": 22353 }, { "epoch": 0.5739880376363499, "grad_norm": 0.73046875, "learning_rate": 0.0001489979515815378, "loss": 0.8339, "step": 22354 }, { "epoch": 0.5740137148322717, "grad_norm": 0.83984375, "learning_rate": 0.00014899405993655824, "loss": 1.0028, "step": 22355 }, { "epoch": 0.5740393920281935, "grad_norm": 0.83203125, "learning_rate": 0.00014899016819393797, "loss": 0.8517, "step": 22356 }, { "epoch": 0.5740650692241154, "grad_norm": 0.84375, "learning_rate": 0.00014898627635368466, "loss": 0.8623, "step": 22357 }, { "epoch": 0.5740907464200372, "grad_norm": 0.7109375, "learning_rate": 0.00014898238441580616, "loss": 0.9531, "step": 22358 }, { "epoch": 0.574116423615959, "grad_norm": 0.78125, "learning_rate": 0.00014897849238031019, "loss": 0.8664, "step": 22359 }, { "epoch": 0.5741421008118809, "grad_norm": 0.77734375, "learning_rate": 0.00014897460024720452, "loss": 0.8968, "step": 22360 }, { "epoch": 0.5741677780078026, "grad_norm": 0.73046875, "learning_rate": 0.00014897070801649686, "loss": 0.7888, "step": 22361 }, { "epoch": 0.5741934552037244, "grad_norm": 0.78125, "learning_rate": 0.00014896681568819504, "loss": 0.898, "step": 22362 }, { "epoch": 0.5742191323996463, "grad_norm": 0.83984375, "learning_rate": 0.00014896292326230676, "loss": 1.0733, "step": 22363 }, { "epoch": 0.5742448095955681, "grad_norm": 0.69921875, "learning_rate": 0.0001489590307388398, "loss": 0.8125, "step": 22364 }, { "epoch": 0.57427048679149, "grad_norm": 0.83203125, "learning_rate": 0.00014895513811780193, "loss": 1.0148, "step": 22365 }, { "epoch": 0.5742961639874118, "grad_norm": 0.765625, "learning_rate": 0.00014895124539920086, "loss": 0.8341, "step": 22366 }, { "epoch": 0.5743218411833336, "grad_norm": 0.80859375, "learning_rate": 0.0001489473525830444, "loss": 0.8573, "step": 22367 }, { "epoch": 0.5743475183792554, "grad_norm": 0.81640625, "learning_rate": 0.00014894345966934028, "loss": 0.8637, "step": 22368 }, { "epoch": 0.5743731955751772, "grad_norm": 0.75, "learning_rate": 0.0001489395666580963, "loss": 1.0015, "step": 22369 }, { "epoch": 0.574398872771099, "grad_norm": 0.76953125, "learning_rate": 0.00014893567354932016, "loss": 0.7962, "step": 22370 }, { "epoch": 0.5744245499670209, "grad_norm": 0.75390625, "learning_rate": 0.0001489317803430196, "loss": 0.7873, "step": 22371 }, { "epoch": 0.5744502271629427, "grad_norm": 0.8515625, "learning_rate": 0.00014892788703920249, "loss": 0.9795, "step": 22372 }, { "epoch": 0.5744759043588645, "grad_norm": 0.83203125, "learning_rate": 0.00014892399363787648, "loss": 0.888, "step": 22373 }, { "epoch": 0.5745015815547863, "grad_norm": 0.76953125, "learning_rate": 0.00014892010013904935, "loss": 0.8653, "step": 22374 }, { "epoch": 0.5745272587507081, "grad_norm": 0.73828125, "learning_rate": 0.00014891620654272893, "loss": 0.8153, "step": 22375 }, { "epoch": 0.5745529359466299, "grad_norm": 0.734375, "learning_rate": 0.00014891231284892294, "loss": 0.9029, "step": 22376 }, { "epoch": 0.5745786131425518, "grad_norm": 0.7578125, "learning_rate": 0.00014890841905763907, "loss": 0.9023, "step": 22377 }, { "epoch": 0.5746042903384736, "grad_norm": 0.8515625, "learning_rate": 0.00014890452516888518, "loss": 0.8439, "step": 22378 }, { "epoch": 0.5746299675343954, "grad_norm": 0.8046875, "learning_rate": 0.00014890063118266894, "loss": 0.8708, "step": 22379 }, { "epoch": 0.5746556447303173, "grad_norm": 0.78125, "learning_rate": 0.00014889673709899822, "loss": 1.0059, "step": 22380 }, { "epoch": 0.574681321926239, "grad_norm": 0.80859375, "learning_rate": 0.00014889284291788067, "loss": 0.8649, "step": 22381 }, { "epoch": 0.5747069991221608, "grad_norm": 0.75, "learning_rate": 0.00014888894863932415, "loss": 0.8444, "step": 22382 }, { "epoch": 0.5747326763180827, "grad_norm": 0.78515625, "learning_rate": 0.00014888505426333633, "loss": 0.8365, "step": 22383 }, { "epoch": 0.5747583535140045, "grad_norm": 0.79296875, "learning_rate": 0.00014888115978992504, "loss": 0.8838, "step": 22384 }, { "epoch": 0.5747840307099263, "grad_norm": 0.72265625, "learning_rate": 0.000148877265219098, "loss": 0.9126, "step": 22385 }, { "epoch": 0.5748097079058482, "grad_norm": 0.7890625, "learning_rate": 0.000148873370550863, "loss": 0.9151, "step": 22386 }, { "epoch": 0.57483538510177, "grad_norm": 0.76171875, "learning_rate": 0.00014886947578522777, "loss": 0.8048, "step": 22387 }, { "epoch": 0.5748610622976917, "grad_norm": 0.76953125, "learning_rate": 0.00014886558092220008, "loss": 0.9052, "step": 22388 }, { "epoch": 0.5748867394936136, "grad_norm": 0.796875, "learning_rate": 0.00014886168596178772, "loss": 0.8853, "step": 22389 }, { "epoch": 0.5749124166895354, "grad_norm": 0.75390625, "learning_rate": 0.00014885779090399845, "loss": 0.8664, "step": 22390 }, { "epoch": 0.5749380938854572, "grad_norm": 0.8359375, "learning_rate": 0.00014885389574884, "loss": 0.8993, "step": 22391 }, { "epoch": 0.5749637710813791, "grad_norm": 0.74609375, "learning_rate": 0.00014885000049632015, "loss": 0.895, "step": 22392 }, { "epoch": 0.5749894482773009, "grad_norm": 0.72265625, "learning_rate": 0.00014884610514644665, "loss": 0.7812, "step": 22393 }, { "epoch": 0.5750151254732226, "grad_norm": 0.7734375, "learning_rate": 0.00014884220969922727, "loss": 0.8654, "step": 22394 }, { "epoch": 0.5750408026691445, "grad_norm": 0.73046875, "learning_rate": 0.00014883831415466977, "loss": 0.9379, "step": 22395 }, { "epoch": 0.5750664798650663, "grad_norm": 0.77734375, "learning_rate": 0.00014883441851278194, "loss": 0.9262, "step": 22396 }, { "epoch": 0.5750921570609882, "grad_norm": 0.7890625, "learning_rate": 0.00014883052277357155, "loss": 1.0534, "step": 22397 }, { "epoch": 0.57511783425691, "grad_norm": 0.8125, "learning_rate": 0.0001488266269370463, "loss": 0.8745, "step": 22398 }, { "epoch": 0.5751435114528318, "grad_norm": 0.78515625, "learning_rate": 0.00014882273100321402, "loss": 0.8597, "step": 22399 }, { "epoch": 0.5751691886487537, "grad_norm": 0.75, "learning_rate": 0.0001488188349720824, "loss": 0.9571, "step": 22400 }, { "epoch": 0.5751948658446754, "grad_norm": 0.78515625, "learning_rate": 0.00014881493884365932, "loss": 0.8516, "step": 22401 }, { "epoch": 0.5752205430405972, "grad_norm": 0.734375, "learning_rate": 0.00014881104261795246, "loss": 1.0052, "step": 22402 }, { "epoch": 0.5752462202365191, "grad_norm": 0.74609375, "learning_rate": 0.00014880714629496956, "loss": 0.7461, "step": 22403 }, { "epoch": 0.5752718974324409, "grad_norm": 0.7890625, "learning_rate": 0.00014880324987471844, "loss": 0.94, "step": 22404 }, { "epoch": 0.5752975746283627, "grad_norm": 0.7734375, "learning_rate": 0.00014879935335720688, "loss": 0.7663, "step": 22405 }, { "epoch": 0.5753232518242846, "grad_norm": 0.828125, "learning_rate": 0.00014879545674244257, "loss": 0.9639, "step": 22406 }, { "epoch": 0.5753489290202064, "grad_norm": 0.79296875, "learning_rate": 0.00014879156003043336, "loss": 0.9522, "step": 22407 }, { "epoch": 0.5753746062161281, "grad_norm": 0.80859375, "learning_rate": 0.00014878766322118698, "loss": 0.7465, "step": 22408 }, { "epoch": 0.57540028341205, "grad_norm": 0.70703125, "learning_rate": 0.0001487837663147112, "loss": 0.8226, "step": 22409 }, { "epoch": 0.5754259606079718, "grad_norm": 0.7734375, "learning_rate": 0.00014877986931101375, "loss": 0.8851, "step": 22410 }, { "epoch": 0.5754516378038936, "grad_norm": 0.8359375, "learning_rate": 0.00014877597221010243, "loss": 0.8164, "step": 22411 }, { "epoch": 0.5754773149998155, "grad_norm": 0.8359375, "learning_rate": 0.000148772075011985, "loss": 0.8742, "step": 22412 }, { "epoch": 0.5755029921957373, "grad_norm": 0.81640625, "learning_rate": 0.00014876817771666926, "loss": 0.8853, "step": 22413 }, { "epoch": 0.575528669391659, "grad_norm": 0.7109375, "learning_rate": 0.00014876428032416293, "loss": 0.7536, "step": 22414 }, { "epoch": 0.5755543465875809, "grad_norm": 0.73828125, "learning_rate": 0.0001487603828344738, "loss": 0.9458, "step": 22415 }, { "epoch": 0.5755800237835027, "grad_norm": 0.70703125, "learning_rate": 0.00014875648524760961, "loss": 1.0239, "step": 22416 }, { "epoch": 0.5756057009794245, "grad_norm": 0.91796875, "learning_rate": 0.00014875258756357818, "loss": 0.8544, "step": 22417 }, { "epoch": 0.5756313781753464, "grad_norm": 0.7578125, "learning_rate": 0.00014874868978238722, "loss": 0.8965, "step": 22418 }, { "epoch": 0.5756570553712682, "grad_norm": 0.8515625, "learning_rate": 0.0001487447919040445, "loss": 0.9034, "step": 22419 }, { "epoch": 0.57568273256719, "grad_norm": 0.73046875, "learning_rate": 0.00014874089392855787, "loss": 0.7984, "step": 22420 }, { "epoch": 0.5757084097631118, "grad_norm": 0.81640625, "learning_rate": 0.00014873699585593502, "loss": 0.92, "step": 22421 }, { "epoch": 0.5757340869590336, "grad_norm": 0.7734375, "learning_rate": 0.00014873309768618375, "loss": 0.8601, "step": 22422 }, { "epoch": 0.5757597641549554, "grad_norm": 0.8125, "learning_rate": 0.00014872919941931183, "loss": 0.9155, "step": 22423 }, { "epoch": 0.5757854413508773, "grad_norm": 0.75390625, "learning_rate": 0.00014872530105532699, "loss": 0.8899, "step": 22424 }, { "epoch": 0.5758111185467991, "grad_norm": 0.8125, "learning_rate": 0.00014872140259423702, "loss": 0.8097, "step": 22425 }, { "epoch": 0.575836795742721, "grad_norm": 0.81640625, "learning_rate": 0.00014871750403604972, "loss": 0.9279, "step": 22426 }, { "epoch": 0.5758624729386428, "grad_norm": 2.015625, "learning_rate": 0.0001487136053807728, "loss": 0.8349, "step": 22427 }, { "epoch": 0.5758881501345645, "grad_norm": 0.75, "learning_rate": 0.0001487097066284141, "loss": 1.0322, "step": 22428 }, { "epoch": 0.5759138273304863, "grad_norm": 0.66015625, "learning_rate": 0.00014870580777898135, "loss": 0.8332, "step": 22429 }, { "epoch": 0.5759395045264082, "grad_norm": 0.77734375, "learning_rate": 0.0001487019088324823, "loss": 0.9713, "step": 22430 }, { "epoch": 0.57596518172233, "grad_norm": 0.82421875, "learning_rate": 0.00014869800978892478, "loss": 0.9855, "step": 22431 }, { "epoch": 0.5759908589182519, "grad_norm": 0.8203125, "learning_rate": 0.0001486941106483165, "loss": 0.8639, "step": 22432 }, { "epoch": 0.5760165361141737, "grad_norm": 0.7890625, "learning_rate": 0.00014869021141066525, "loss": 0.9361, "step": 22433 }, { "epoch": 0.5760422133100954, "grad_norm": 0.75390625, "learning_rate": 0.00014868631207597883, "loss": 0.7821, "step": 22434 }, { "epoch": 0.5760678905060173, "grad_norm": 0.74609375, "learning_rate": 0.00014868241264426497, "loss": 0.7574, "step": 22435 }, { "epoch": 0.5760935677019391, "grad_norm": 0.703125, "learning_rate": 0.00014867851311553147, "loss": 0.8702, "step": 22436 }, { "epoch": 0.5761192448978609, "grad_norm": 0.9453125, "learning_rate": 0.0001486746134897861, "loss": 0.8159, "step": 22437 }, { "epoch": 0.5761449220937828, "grad_norm": 0.87109375, "learning_rate": 0.00014867071376703658, "loss": 0.8299, "step": 22438 }, { "epoch": 0.5761705992897046, "grad_norm": 0.75, "learning_rate": 0.00014866681394729077, "loss": 0.8158, "step": 22439 }, { "epoch": 0.5761962764856264, "grad_norm": 0.91796875, "learning_rate": 0.00014866291403055638, "loss": 0.8746, "step": 22440 }, { "epoch": 0.5762219536815482, "grad_norm": 0.8203125, "learning_rate": 0.00014865901401684122, "loss": 1.0011, "step": 22441 }, { "epoch": 0.57624763087747, "grad_norm": 0.78125, "learning_rate": 0.00014865511390615302, "loss": 0.894, "step": 22442 }, { "epoch": 0.5762733080733918, "grad_norm": 0.69921875, "learning_rate": 0.0001486512136984995, "loss": 0.7689, "step": 22443 }, { "epoch": 0.5762989852693137, "grad_norm": 0.95703125, "learning_rate": 0.00014864731339388863, "loss": 0.8765, "step": 22444 }, { "epoch": 0.5763246624652355, "grad_norm": 0.75, "learning_rate": 0.000148643412992328, "loss": 0.7243, "step": 22445 }, { "epoch": 0.5763503396611573, "grad_norm": 0.6953125, "learning_rate": 0.00014863951249382545, "loss": 0.747, "step": 22446 }, { "epoch": 0.5763760168570792, "grad_norm": 0.75390625, "learning_rate": 0.00014863561189838872, "loss": 0.8847, "step": 22447 }, { "epoch": 0.5764016940530009, "grad_norm": 0.83203125, "learning_rate": 0.00014863171120602564, "loss": 0.936, "step": 22448 }, { "epoch": 0.5764273712489227, "grad_norm": 0.7890625, "learning_rate": 0.00014862781041674394, "loss": 0.958, "step": 22449 }, { "epoch": 0.5764530484448446, "grad_norm": 0.828125, "learning_rate": 0.0001486239095305514, "loss": 0.8594, "step": 22450 }, { "epoch": 0.5764787256407664, "grad_norm": 0.74609375, "learning_rate": 0.00014862000854745582, "loss": 0.8776, "step": 22451 }, { "epoch": 0.5765044028366882, "grad_norm": 0.78125, "learning_rate": 0.00014861610746746496, "loss": 0.947, "step": 22452 }, { "epoch": 0.5765300800326101, "grad_norm": 0.80859375, "learning_rate": 0.00014861220629058656, "loss": 0.9121, "step": 22453 }, { "epoch": 0.5765557572285318, "grad_norm": 0.76953125, "learning_rate": 0.00014860830501682844, "loss": 0.9368, "step": 22454 }, { "epoch": 0.5765814344244536, "grad_norm": 0.73046875, "learning_rate": 0.00014860440364619838, "loss": 0.7777, "step": 22455 }, { "epoch": 0.5766071116203755, "grad_norm": 0.79296875, "learning_rate": 0.00014860050217870406, "loss": 0.8074, "step": 22456 }, { "epoch": 0.5766327888162973, "grad_norm": 0.80859375, "learning_rate": 0.00014859660061435342, "loss": 0.9892, "step": 22457 }, { "epoch": 0.5766584660122192, "grad_norm": 0.76953125, "learning_rate": 0.00014859269895315408, "loss": 0.9228, "step": 22458 }, { "epoch": 0.576684143208141, "grad_norm": 0.84375, "learning_rate": 0.0001485887971951139, "loss": 0.9505, "step": 22459 }, { "epoch": 0.5767098204040628, "grad_norm": 0.73046875, "learning_rate": 0.00014858489534024064, "loss": 0.8186, "step": 22460 }, { "epoch": 0.5767354975999845, "grad_norm": 0.8359375, "learning_rate": 0.0001485809933885421, "loss": 0.7582, "step": 22461 }, { "epoch": 0.5767611747959064, "grad_norm": 0.796875, "learning_rate": 0.000148577091340026, "loss": 0.9281, "step": 22462 }, { "epoch": 0.5767868519918282, "grad_norm": 0.85546875, "learning_rate": 0.00014857318919470012, "loss": 1.0845, "step": 22463 }, { "epoch": 0.5768125291877501, "grad_norm": 0.84765625, "learning_rate": 0.00014856928695257228, "loss": 0.8749, "step": 22464 }, { "epoch": 0.5768382063836719, "grad_norm": 0.82421875, "learning_rate": 0.00014856538461365025, "loss": 1.0501, "step": 22465 }, { "epoch": 0.5768638835795937, "grad_norm": 0.73046875, "learning_rate": 0.00014856148217794182, "loss": 0.769, "step": 22466 }, { "epoch": 0.5768895607755156, "grad_norm": 0.78125, "learning_rate": 0.0001485575796454547, "loss": 0.8434, "step": 22467 }, { "epoch": 0.5769152379714373, "grad_norm": 0.8515625, "learning_rate": 0.00014855367701619674, "loss": 0.8036, "step": 22468 }, { "epoch": 0.5769409151673591, "grad_norm": 0.828125, "learning_rate": 0.00014854977429017567, "loss": 0.9792, "step": 22469 }, { "epoch": 0.576966592363281, "grad_norm": 0.77734375, "learning_rate": 0.00014854587146739928, "loss": 0.9008, "step": 22470 }, { "epoch": 0.5769922695592028, "grad_norm": 0.80859375, "learning_rate": 0.00014854196854787537, "loss": 0.7692, "step": 22471 }, { "epoch": 0.5770179467551246, "grad_norm": 0.74609375, "learning_rate": 0.0001485380655316117, "loss": 0.8605, "step": 22472 }, { "epoch": 0.5770436239510465, "grad_norm": 0.74609375, "learning_rate": 0.00014853416241861602, "loss": 0.8362, "step": 22473 }, { "epoch": 0.5770693011469682, "grad_norm": 0.72265625, "learning_rate": 0.00014853025920889617, "loss": 0.8016, "step": 22474 }, { "epoch": 0.57709497834289, "grad_norm": 0.79296875, "learning_rate": 0.00014852635590245988, "loss": 0.9318, "step": 22475 }, { "epoch": 0.5771206555388119, "grad_norm": 0.79296875, "learning_rate": 0.00014852245249931495, "loss": 0.8037, "step": 22476 }, { "epoch": 0.5771463327347337, "grad_norm": 0.828125, "learning_rate": 0.00014851854899946917, "loss": 0.8317, "step": 22477 }, { "epoch": 0.5771720099306555, "grad_norm": 0.73828125, "learning_rate": 0.00014851464540293027, "loss": 0.7967, "step": 22478 }, { "epoch": 0.5771976871265774, "grad_norm": 0.80859375, "learning_rate": 0.0001485107417097061, "loss": 0.9542, "step": 22479 }, { "epoch": 0.5772233643224992, "grad_norm": 0.73046875, "learning_rate": 0.0001485068379198044, "loss": 0.8535, "step": 22480 }, { "epoch": 0.5772490415184209, "grad_norm": 0.80859375, "learning_rate": 0.00014850293403323296, "loss": 0.9882, "step": 22481 }, { "epoch": 0.5772747187143428, "grad_norm": 0.76171875, "learning_rate": 0.00014849903004999953, "loss": 0.862, "step": 22482 }, { "epoch": 0.5773003959102646, "grad_norm": 0.77734375, "learning_rate": 0.0001484951259701119, "loss": 0.7934, "step": 22483 }, { "epoch": 0.5773260731061864, "grad_norm": 0.76953125, "learning_rate": 0.00014849122179357791, "loss": 0.7442, "step": 22484 }, { "epoch": 0.5773517503021083, "grad_norm": 0.8125, "learning_rate": 0.00014848731752040525, "loss": 0.858, "step": 22485 }, { "epoch": 0.5773774274980301, "grad_norm": 0.73046875, "learning_rate": 0.00014848341315060176, "loss": 0.9237, "step": 22486 }, { "epoch": 0.577403104693952, "grad_norm": 0.7578125, "learning_rate": 0.00014847950868417524, "loss": 0.7946, "step": 22487 }, { "epoch": 0.5774287818898737, "grad_norm": 0.7421875, "learning_rate": 0.0001484756041211334, "loss": 0.8499, "step": 22488 }, { "epoch": 0.5774544590857955, "grad_norm": 0.77734375, "learning_rate": 0.0001484716994614841, "loss": 0.844, "step": 22489 }, { "epoch": 0.5774801362817173, "grad_norm": 0.7890625, "learning_rate": 0.00014846779470523505, "loss": 0.8167, "step": 22490 }, { "epoch": 0.5775058134776392, "grad_norm": 0.8125, "learning_rate": 0.00014846388985239405, "loss": 0.9537, "step": 22491 }, { "epoch": 0.577531490673561, "grad_norm": 0.75390625, "learning_rate": 0.0001484599849029689, "loss": 0.8592, "step": 22492 }, { "epoch": 0.5775571678694829, "grad_norm": 0.7109375, "learning_rate": 0.0001484560798569674, "loss": 1.0361, "step": 22493 }, { "epoch": 0.5775828450654046, "grad_norm": 0.75390625, "learning_rate": 0.0001484521747143973, "loss": 0.9053, "step": 22494 }, { "epoch": 0.5776085222613264, "grad_norm": 0.89453125, "learning_rate": 0.0001484482694752664, "loss": 0.8407, "step": 22495 }, { "epoch": 0.5776341994572483, "grad_norm": 0.765625, "learning_rate": 0.00014844436413958246, "loss": 0.9822, "step": 22496 }, { "epoch": 0.5776598766531701, "grad_norm": 0.8671875, "learning_rate": 0.0001484404587073533, "loss": 0.9622, "step": 22497 }, { "epoch": 0.5776855538490919, "grad_norm": 0.7578125, "learning_rate": 0.00014843655317858668, "loss": 0.8924, "step": 22498 }, { "epoch": 0.5777112310450138, "grad_norm": 0.921875, "learning_rate": 0.00014843264755329033, "loss": 0.9913, "step": 22499 }, { "epoch": 0.5777369082409356, "grad_norm": 0.70703125, "learning_rate": 0.00014842874183147216, "loss": 0.8161, "step": 22500 }, { "epoch": 0.5777625854368573, "grad_norm": 0.765625, "learning_rate": 0.00014842483601313985, "loss": 0.6835, "step": 22501 }, { "epoch": 0.5777882626327792, "grad_norm": 0.77734375, "learning_rate": 0.0001484209300983012, "loss": 0.8564, "step": 22502 }, { "epoch": 0.577813939828701, "grad_norm": 0.75, "learning_rate": 0.00014841702408696406, "loss": 0.8837, "step": 22503 }, { "epoch": 0.5778396170246228, "grad_norm": 0.7734375, "learning_rate": 0.0001484131179791361, "loss": 0.8916, "step": 22504 }, { "epoch": 0.5778652942205447, "grad_norm": 0.7734375, "learning_rate": 0.0001484092117748252, "loss": 0.938, "step": 22505 }, { "epoch": 0.5778909714164665, "grad_norm": 0.84375, "learning_rate": 0.00014840530547403913, "loss": 1.011, "step": 22506 }, { "epoch": 0.5779166486123883, "grad_norm": 0.80078125, "learning_rate": 0.00014840139907678566, "loss": 0.7641, "step": 22507 }, { "epoch": 0.5779423258083101, "grad_norm": 0.75390625, "learning_rate": 0.00014839749258307257, "loss": 0.8679, "step": 22508 }, { "epoch": 0.5779680030042319, "grad_norm": 0.765625, "learning_rate": 0.00014839358599290763, "loss": 0.6797, "step": 22509 }, { "epoch": 0.5779936802001537, "grad_norm": 0.75, "learning_rate": 0.00014838967930629862, "loss": 0.8205, "step": 22510 }, { "epoch": 0.5780193573960756, "grad_norm": 0.81640625, "learning_rate": 0.0001483857725232534, "loss": 0.8588, "step": 22511 }, { "epoch": 0.5780450345919974, "grad_norm": 0.77734375, "learning_rate": 0.0001483818656437797, "loss": 0.8139, "step": 22512 }, { "epoch": 0.5780707117879192, "grad_norm": 0.72265625, "learning_rate": 0.00014837795866788528, "loss": 0.7979, "step": 22513 }, { "epoch": 0.578096388983841, "grad_norm": 0.7265625, "learning_rate": 0.000148374051595578, "loss": 0.7335, "step": 22514 }, { "epoch": 0.5781220661797628, "grad_norm": 0.80859375, "learning_rate": 0.00014837014442686556, "loss": 0.8739, "step": 22515 }, { "epoch": 0.5781477433756846, "grad_norm": 0.765625, "learning_rate": 0.00014836623716175585, "loss": 1.0068, "step": 22516 }, { "epoch": 0.5781734205716065, "grad_norm": 0.7890625, "learning_rate": 0.00014836232980025655, "loss": 0.8082, "step": 22517 }, { "epoch": 0.5781990977675283, "grad_norm": 0.8046875, "learning_rate": 0.0001483584223423755, "loss": 0.9442, "step": 22518 }, { "epoch": 0.5782247749634501, "grad_norm": 0.75, "learning_rate": 0.00014835451478812048, "loss": 0.8191, "step": 22519 }, { "epoch": 0.578250452159372, "grad_norm": 0.69140625, "learning_rate": 0.0001483506071374993, "loss": 0.8892, "step": 22520 }, { "epoch": 0.5782761293552937, "grad_norm": 0.76171875, "learning_rate": 0.00014834669939051972, "loss": 0.8901, "step": 22521 }, { "epoch": 0.5783018065512155, "grad_norm": 0.75, "learning_rate": 0.00014834279154718954, "loss": 0.9316, "step": 22522 }, { "epoch": 0.5783274837471374, "grad_norm": 0.7578125, "learning_rate": 0.00014833888360751653, "loss": 0.7893, "step": 22523 }, { "epoch": 0.5783531609430592, "grad_norm": 0.78515625, "learning_rate": 0.00014833497557150847, "loss": 0.8293, "step": 22524 }, { "epoch": 0.578378838138981, "grad_norm": 0.79296875, "learning_rate": 0.00014833106743917322, "loss": 0.8704, "step": 22525 }, { "epoch": 0.5784045153349029, "grad_norm": 0.7890625, "learning_rate": 0.00014832715921051849, "loss": 0.8835, "step": 22526 }, { "epoch": 0.5784301925308246, "grad_norm": 0.79296875, "learning_rate": 0.00014832325088555212, "loss": 0.7468, "step": 22527 }, { "epoch": 0.5784558697267465, "grad_norm": 0.78125, "learning_rate": 0.00014831934246428184, "loss": 0.8448, "step": 22528 }, { "epoch": 0.5784815469226683, "grad_norm": 0.72265625, "learning_rate": 0.0001483154339467155, "loss": 0.8682, "step": 22529 }, { "epoch": 0.5785072241185901, "grad_norm": 0.84375, "learning_rate": 0.00014831152533286085, "loss": 0.8071, "step": 22530 }, { "epoch": 0.578532901314512, "grad_norm": 0.80859375, "learning_rate": 0.0001483076166227257, "loss": 0.849, "step": 22531 }, { "epoch": 0.5785585785104338, "grad_norm": 0.7578125, "learning_rate": 0.00014830370781631787, "loss": 1.0144, "step": 22532 }, { "epoch": 0.5785842557063556, "grad_norm": 0.7734375, "learning_rate": 0.00014829979891364508, "loss": 0.8437, "step": 22533 }, { "epoch": 0.5786099329022774, "grad_norm": 0.7109375, "learning_rate": 0.00014829588991471514, "loss": 0.7966, "step": 22534 }, { "epoch": 0.5786356100981992, "grad_norm": 0.8046875, "learning_rate": 0.00014829198081953585, "loss": 1.0093, "step": 22535 }, { "epoch": 0.578661287294121, "grad_norm": 0.76171875, "learning_rate": 0.00014828807162811503, "loss": 0.7523, "step": 22536 }, { "epoch": 0.5786869644900429, "grad_norm": 0.75, "learning_rate": 0.00014828416234046046, "loss": 0.8232, "step": 22537 }, { "epoch": 0.5787126416859647, "grad_norm": 0.75390625, "learning_rate": 0.00014828025295657987, "loss": 0.8333, "step": 22538 }, { "epoch": 0.5787383188818865, "grad_norm": 0.9296875, "learning_rate": 0.00014827634347648112, "loss": 0.8531, "step": 22539 }, { "epoch": 0.5787639960778084, "grad_norm": 0.78125, "learning_rate": 0.00014827243390017197, "loss": 0.8192, "step": 22540 }, { "epoch": 0.5787896732737301, "grad_norm": 0.77734375, "learning_rate": 0.00014826852422766023, "loss": 0.8912, "step": 22541 }, { "epoch": 0.5788153504696519, "grad_norm": 0.73046875, "learning_rate": 0.00014826461445895366, "loss": 0.9015, "step": 22542 }, { "epoch": 0.5788410276655738, "grad_norm": 0.7578125, "learning_rate": 0.00014826070459406015, "loss": 0.8799, "step": 22543 }, { "epoch": 0.5788667048614956, "grad_norm": 0.75390625, "learning_rate": 0.00014825679463298733, "loss": 0.8319, "step": 22544 }, { "epoch": 0.5788923820574174, "grad_norm": 0.89453125, "learning_rate": 0.00014825288457574311, "loss": 0.8673, "step": 22545 }, { "epoch": 0.5789180592533393, "grad_norm": 0.7421875, "learning_rate": 0.00014824897442233524, "loss": 0.7855, "step": 22546 }, { "epoch": 0.578943736449261, "grad_norm": 0.72265625, "learning_rate": 0.00014824506417277154, "loss": 0.8927, "step": 22547 }, { "epoch": 0.5789694136451828, "grad_norm": 0.84765625, "learning_rate": 0.00014824115382705975, "loss": 0.8189, "step": 22548 }, { "epoch": 0.5789950908411047, "grad_norm": 0.82421875, "learning_rate": 0.00014823724338520775, "loss": 0.7358, "step": 22549 }, { "epoch": 0.5790207680370265, "grad_norm": 0.78125, "learning_rate": 0.00014823333284722323, "loss": 1.0435, "step": 22550 }, { "epoch": 0.5790464452329483, "grad_norm": 0.796875, "learning_rate": 0.00014822942221311407, "loss": 0.8646, "step": 22551 }, { "epoch": 0.5790721224288702, "grad_norm": 0.86328125, "learning_rate": 0.00014822551148288798, "loss": 0.855, "step": 22552 }, { "epoch": 0.579097799624792, "grad_norm": 0.78125, "learning_rate": 0.00014822160065655287, "loss": 0.9471, "step": 22553 }, { "epoch": 0.5791234768207137, "grad_norm": 0.7578125, "learning_rate": 0.00014821768973411644, "loss": 0.8609, "step": 22554 }, { "epoch": 0.5791491540166356, "grad_norm": 0.79296875, "learning_rate": 0.0001482137787155865, "loss": 0.7335, "step": 22555 }, { "epoch": 0.5791748312125574, "grad_norm": 0.80859375, "learning_rate": 0.00014820986760097083, "loss": 0.8624, "step": 22556 }, { "epoch": 0.5792005084084793, "grad_norm": 0.859375, "learning_rate": 0.00014820595639027727, "loss": 1.0604, "step": 22557 }, { "epoch": 0.5792261856044011, "grad_norm": 0.80859375, "learning_rate": 0.00014820204508351358, "loss": 0.8475, "step": 22558 }, { "epoch": 0.5792518628003229, "grad_norm": 0.74609375, "learning_rate": 0.0001481981336806876, "loss": 0.7878, "step": 22559 }, { "epoch": 0.5792775399962448, "grad_norm": 0.77734375, "learning_rate": 0.00014819422218180706, "loss": 0.9117, "step": 22560 }, { "epoch": 0.5793032171921665, "grad_norm": 0.78515625, "learning_rate": 0.00014819031058687984, "loss": 0.8572, "step": 22561 }, { "epoch": 0.5793288943880883, "grad_norm": 0.76953125, "learning_rate": 0.00014818639889591366, "loss": 0.7904, "step": 22562 }, { "epoch": 0.5793545715840102, "grad_norm": 0.78125, "learning_rate": 0.0001481824871089163, "loss": 0.8631, "step": 22563 }, { "epoch": 0.579380248779932, "grad_norm": 0.8515625, "learning_rate": 0.00014817857522589565, "loss": 0.8639, "step": 22564 }, { "epoch": 0.5794059259758538, "grad_norm": 0.7109375, "learning_rate": 0.00014817466324685942, "loss": 0.848, "step": 22565 }, { "epoch": 0.5794316031717757, "grad_norm": 0.77734375, "learning_rate": 0.00014817075117181544, "loss": 0.7825, "step": 22566 }, { "epoch": 0.5794572803676974, "grad_norm": 0.796875, "learning_rate": 0.00014816683900077153, "loss": 0.9755, "step": 22567 }, { "epoch": 0.5794829575636192, "grad_norm": 0.74609375, "learning_rate": 0.00014816292673373543, "loss": 1.0038, "step": 22568 }, { "epoch": 0.5795086347595411, "grad_norm": 0.80078125, "learning_rate": 0.000148159014370715, "loss": 0.7639, "step": 22569 }, { "epoch": 0.5795343119554629, "grad_norm": 0.7421875, "learning_rate": 0.00014815510191171796, "loss": 0.7618, "step": 22570 }, { "epoch": 0.5795599891513847, "grad_norm": 0.73046875, "learning_rate": 0.00014815118935675216, "loss": 0.7793, "step": 22571 }, { "epoch": 0.5795856663473066, "grad_norm": 0.76171875, "learning_rate": 0.0001481472767058254, "loss": 0.9014, "step": 22572 }, { "epoch": 0.5796113435432284, "grad_norm": 0.78125, "learning_rate": 0.00014814336395894547, "loss": 0.7844, "step": 22573 }, { "epoch": 0.5796370207391501, "grad_norm": 0.81640625, "learning_rate": 0.00014813945111612017, "loss": 0.8406, "step": 22574 }, { "epoch": 0.579662697935072, "grad_norm": 0.75390625, "learning_rate": 0.0001481355381773573, "loss": 0.8002, "step": 22575 }, { "epoch": 0.5796883751309938, "grad_norm": 0.8046875, "learning_rate": 0.00014813162514266463, "loss": 0.9431, "step": 22576 }, { "epoch": 0.5797140523269156, "grad_norm": 0.76171875, "learning_rate": 0.00014812771201204997, "loss": 0.9836, "step": 22577 }, { "epoch": 0.5797397295228375, "grad_norm": 0.8125, "learning_rate": 0.00014812379878552116, "loss": 0.9139, "step": 22578 }, { "epoch": 0.5797654067187593, "grad_norm": 0.7890625, "learning_rate": 0.00014811988546308596, "loss": 0.8713, "step": 22579 }, { "epoch": 0.5797910839146811, "grad_norm": 0.80078125, "learning_rate": 0.00014811597204475216, "loss": 0.922, "step": 22580 }, { "epoch": 0.5798167611106029, "grad_norm": 0.78125, "learning_rate": 0.00014811205853052756, "loss": 0.9547, "step": 22581 }, { "epoch": 0.5798424383065247, "grad_norm": 0.81640625, "learning_rate": 0.00014810814492042, "loss": 0.972, "step": 22582 }, { "epoch": 0.5798681155024465, "grad_norm": 0.73046875, "learning_rate": 0.00014810423121443725, "loss": 0.6983, "step": 22583 }, { "epoch": 0.5798937926983684, "grad_norm": 0.7578125, "learning_rate": 0.0001481003174125871, "loss": 0.9075, "step": 22584 }, { "epoch": 0.5799194698942902, "grad_norm": 0.796875, "learning_rate": 0.00014809640351487737, "loss": 0.9286, "step": 22585 }, { "epoch": 0.579945147090212, "grad_norm": 0.75390625, "learning_rate": 0.00014809248952131588, "loss": 0.823, "step": 22586 }, { "epoch": 0.5799708242861338, "grad_norm": 0.7578125, "learning_rate": 0.00014808857543191034, "loss": 0.8477, "step": 22587 }, { "epoch": 0.5799965014820556, "grad_norm": 0.6875, "learning_rate": 0.00014808466124666865, "loss": 0.9121, "step": 22588 }, { "epoch": 0.5800221786779775, "grad_norm": 0.80078125, "learning_rate": 0.00014808074696559857, "loss": 1.056, "step": 22589 }, { "epoch": 0.5800478558738993, "grad_norm": 0.7890625, "learning_rate": 0.0001480768325887079, "loss": 0.8882, "step": 22590 }, { "epoch": 0.5800735330698211, "grad_norm": 0.7890625, "learning_rate": 0.00014807291811600446, "loss": 0.8583, "step": 22591 }, { "epoch": 0.580099210265743, "grad_norm": 0.73828125, "learning_rate": 0.00014806900354749603, "loss": 1.0149, "step": 22592 }, { "epoch": 0.5801248874616648, "grad_norm": 0.76171875, "learning_rate": 0.00014806508888319043, "loss": 0.8168, "step": 22593 }, { "epoch": 0.5801505646575865, "grad_norm": 0.80859375, "learning_rate": 0.00014806117412309545, "loss": 0.8504, "step": 22594 }, { "epoch": 0.5801762418535084, "grad_norm": 0.7578125, "learning_rate": 0.00014805725926721885, "loss": 0.9285, "step": 22595 }, { "epoch": 0.5802019190494302, "grad_norm": 0.76953125, "learning_rate": 0.0001480533443155685, "loss": 0.925, "step": 22596 }, { "epoch": 0.580227596245352, "grad_norm": 0.78515625, "learning_rate": 0.00014804942926815217, "loss": 0.9185, "step": 22597 }, { "epoch": 0.5802532734412739, "grad_norm": 0.71875, "learning_rate": 0.00014804551412497767, "loss": 0.7491, "step": 22598 }, { "epoch": 0.5802789506371957, "grad_norm": 0.76953125, "learning_rate": 0.00014804159888605282, "loss": 0.7686, "step": 22599 }, { "epoch": 0.5803046278331175, "grad_norm": 0.83203125, "learning_rate": 0.0001480376835513854, "loss": 0.9457, "step": 22600 }, { "epoch": 0.5803303050290393, "grad_norm": 0.81640625, "learning_rate": 0.00014803376812098318, "loss": 0.892, "step": 22601 }, { "epoch": 0.5803559822249611, "grad_norm": 0.765625, "learning_rate": 0.00014802985259485403, "loss": 0.9505, "step": 22602 }, { "epoch": 0.5803816594208829, "grad_norm": 0.92578125, "learning_rate": 0.00014802593697300567, "loss": 0.8569, "step": 22603 }, { "epoch": 0.5804073366168048, "grad_norm": 0.78125, "learning_rate": 0.000148022021255446, "loss": 0.727, "step": 22604 }, { "epoch": 0.5804330138127266, "grad_norm": 0.77734375, "learning_rate": 0.0001480181054421828, "loss": 0.8234, "step": 22605 }, { "epoch": 0.5804586910086484, "grad_norm": 0.83203125, "learning_rate": 0.00014801418953322383, "loss": 0.8918, "step": 22606 }, { "epoch": 0.5804843682045702, "grad_norm": 0.7890625, "learning_rate": 0.00014801027352857692, "loss": 0.8074, "step": 22607 }, { "epoch": 0.580510045400492, "grad_norm": 0.8125, "learning_rate": 0.00014800635742824987, "loss": 0.7839, "step": 22608 }, { "epoch": 0.5805357225964138, "grad_norm": 0.765625, "learning_rate": 0.0001480024412322505, "loss": 0.7783, "step": 22609 }, { "epoch": 0.5805613997923357, "grad_norm": 0.93359375, "learning_rate": 0.0001479985249405866, "loss": 0.7638, "step": 22610 }, { "epoch": 0.5805870769882575, "grad_norm": 0.65234375, "learning_rate": 0.00014799460855326592, "loss": 0.9242, "step": 22611 }, { "epoch": 0.5806127541841793, "grad_norm": 0.75390625, "learning_rate": 0.00014799069207029639, "loss": 0.8344, "step": 22612 }, { "epoch": 0.5806384313801012, "grad_norm": 0.78125, "learning_rate": 0.00014798677549168572, "loss": 0.9113, "step": 22613 }, { "epoch": 0.5806641085760229, "grad_norm": 0.734375, "learning_rate": 0.00014798285881744173, "loss": 1.0157, "step": 22614 }, { "epoch": 0.5806897857719447, "grad_norm": 0.7890625, "learning_rate": 0.00014797894204757225, "loss": 0.8295, "step": 22615 }, { "epoch": 0.5807154629678666, "grad_norm": 0.80859375, "learning_rate": 0.00014797502518208505, "loss": 0.8412, "step": 22616 }, { "epoch": 0.5807411401637884, "grad_norm": 0.73828125, "learning_rate": 0.000147971108220988, "loss": 0.8369, "step": 22617 }, { "epoch": 0.5807668173597103, "grad_norm": 0.72265625, "learning_rate": 0.00014796719116428884, "loss": 0.8747, "step": 22618 }, { "epoch": 0.5807924945556321, "grad_norm": 0.74609375, "learning_rate": 0.00014796327401199542, "loss": 0.899, "step": 22619 }, { "epoch": 0.5808181717515539, "grad_norm": 0.76953125, "learning_rate": 0.0001479593567641155, "loss": 0.9014, "step": 22620 }, { "epoch": 0.5808438489474756, "grad_norm": 0.8671875, "learning_rate": 0.00014795543942065691, "loss": 0.9897, "step": 22621 }, { "epoch": 0.5808695261433975, "grad_norm": 0.83203125, "learning_rate": 0.00014795152198162747, "loss": 0.9147, "step": 22622 }, { "epoch": 0.5808952033393193, "grad_norm": 0.72265625, "learning_rate": 0.00014794760444703498, "loss": 0.7267, "step": 22623 }, { "epoch": 0.5809208805352412, "grad_norm": 0.84375, "learning_rate": 0.00014794368681688726, "loss": 0.9704, "step": 22624 }, { "epoch": 0.580946557731163, "grad_norm": 0.76171875, "learning_rate": 0.00014793976909119208, "loss": 0.8635, "step": 22625 }, { "epoch": 0.5809722349270848, "grad_norm": 0.78515625, "learning_rate": 0.0001479358512699573, "loss": 0.7235, "step": 22626 }, { "epoch": 0.5809979121230066, "grad_norm": 0.79296875, "learning_rate": 0.00014793193335319068, "loss": 0.8851, "step": 22627 }, { "epoch": 0.5810235893189284, "grad_norm": 0.8984375, "learning_rate": 0.00014792801534090004, "loss": 0.8371, "step": 22628 }, { "epoch": 0.5810492665148502, "grad_norm": 0.7578125, "learning_rate": 0.0001479240972330932, "loss": 0.8657, "step": 22629 }, { "epoch": 0.5810749437107721, "grad_norm": 0.9375, "learning_rate": 0.00014792017902977796, "loss": 0.8446, "step": 22630 }, { "epoch": 0.5811006209066939, "grad_norm": 0.7734375, "learning_rate": 0.00014791626073096215, "loss": 0.9261, "step": 22631 }, { "epoch": 0.5811262981026157, "grad_norm": 0.828125, "learning_rate": 0.00014791234233665353, "loss": 0.8585, "step": 22632 }, { "epoch": 0.5811519752985376, "grad_norm": 0.79296875, "learning_rate": 0.00014790842384686, "loss": 0.9749, "step": 22633 }, { "epoch": 0.5811776524944593, "grad_norm": 0.859375, "learning_rate": 0.00014790450526158927, "loss": 1.0179, "step": 22634 }, { "epoch": 0.5812033296903811, "grad_norm": 0.75, "learning_rate": 0.00014790058658084915, "loss": 0.7862, "step": 22635 }, { "epoch": 0.581229006886303, "grad_norm": 0.796875, "learning_rate": 0.00014789666780464753, "loss": 0.8838, "step": 22636 }, { "epoch": 0.5812546840822248, "grad_norm": 0.77734375, "learning_rate": 0.00014789274893299217, "loss": 0.8476, "step": 22637 }, { "epoch": 0.5812803612781466, "grad_norm": 0.74609375, "learning_rate": 0.00014788882996589087, "loss": 0.8502, "step": 22638 }, { "epoch": 0.5813060384740685, "grad_norm": 0.78125, "learning_rate": 0.0001478849109033515, "loss": 0.8991, "step": 22639 }, { "epoch": 0.5813317156699903, "grad_norm": 0.7890625, "learning_rate": 0.00014788099174538178, "loss": 0.9465, "step": 22640 }, { "epoch": 0.581357392865912, "grad_norm": 0.7265625, "learning_rate": 0.0001478770724919896, "loss": 0.7154, "step": 22641 }, { "epoch": 0.5813830700618339, "grad_norm": 0.83203125, "learning_rate": 0.00014787315314318273, "loss": 0.9172, "step": 22642 }, { "epoch": 0.5814087472577557, "grad_norm": 0.84375, "learning_rate": 0.000147869233698969, "loss": 0.8189, "step": 22643 }, { "epoch": 0.5814344244536775, "grad_norm": 0.7734375, "learning_rate": 0.00014786531415935617, "loss": 0.8377, "step": 22644 }, { "epoch": 0.5814601016495994, "grad_norm": 0.80859375, "learning_rate": 0.00014786139452435215, "loss": 0.9237, "step": 22645 }, { "epoch": 0.5814857788455212, "grad_norm": 0.82421875, "learning_rate": 0.0001478574747939647, "loss": 0.9684, "step": 22646 }, { "epoch": 0.5815114560414429, "grad_norm": 0.74609375, "learning_rate": 0.00014785355496820156, "loss": 0.7881, "step": 22647 }, { "epoch": 0.5815371332373648, "grad_norm": 0.73046875, "learning_rate": 0.00014784963504707062, "loss": 0.7999, "step": 22648 }, { "epoch": 0.5815628104332866, "grad_norm": 0.8125, "learning_rate": 0.00014784571503057974, "loss": 0.9233, "step": 22649 }, { "epoch": 0.5815884876292085, "grad_norm": 0.81640625, "learning_rate": 0.0001478417949187366, "loss": 0.912, "step": 22650 }, { "epoch": 0.5816141648251303, "grad_norm": 0.83984375, "learning_rate": 0.00014783787471154912, "loss": 1.0036, "step": 22651 }, { "epoch": 0.5816398420210521, "grad_norm": 0.83984375, "learning_rate": 0.0001478339544090251, "loss": 0.8682, "step": 22652 }, { "epoch": 0.581665519216974, "grad_norm": 0.83203125, "learning_rate": 0.00014783003401117227, "loss": 0.9629, "step": 22653 }, { "epoch": 0.5816911964128957, "grad_norm": 0.78515625, "learning_rate": 0.00014782611351799857, "loss": 0.7797, "step": 22654 }, { "epoch": 0.5817168736088175, "grad_norm": 0.77734375, "learning_rate": 0.0001478221929295117, "loss": 0.8653, "step": 22655 }, { "epoch": 0.5817425508047394, "grad_norm": 0.81640625, "learning_rate": 0.00014781827224571954, "loss": 0.7566, "step": 22656 }, { "epoch": 0.5817682280006612, "grad_norm": 0.80859375, "learning_rate": 0.00014781435146662988, "loss": 0.9018, "step": 22657 }, { "epoch": 0.581793905196583, "grad_norm": 0.82421875, "learning_rate": 0.00014781043059225053, "loss": 0.8035, "step": 22658 }, { "epoch": 0.5818195823925049, "grad_norm": 0.8046875, "learning_rate": 0.0001478065096225893, "loss": 0.8626, "step": 22659 }, { "epoch": 0.5818452595884267, "grad_norm": 0.734375, "learning_rate": 0.00014780258855765402, "loss": 0.889, "step": 22660 }, { "epoch": 0.5818709367843484, "grad_norm": 0.7890625, "learning_rate": 0.00014779866739745253, "loss": 0.8682, "step": 22661 }, { "epoch": 0.5818966139802703, "grad_norm": 0.82421875, "learning_rate": 0.00014779474614199257, "loss": 0.8253, "step": 22662 }, { "epoch": 0.5819222911761921, "grad_norm": 0.85546875, "learning_rate": 0.000147790824791282, "loss": 0.9299, "step": 22663 }, { "epoch": 0.5819479683721139, "grad_norm": 0.79296875, "learning_rate": 0.00014778690334532865, "loss": 0.8551, "step": 22664 }, { "epoch": 0.5819736455680358, "grad_norm": 0.7109375, "learning_rate": 0.00014778298180414035, "loss": 0.8531, "step": 22665 }, { "epoch": 0.5819993227639576, "grad_norm": 0.734375, "learning_rate": 0.00014777906016772482, "loss": 0.8253, "step": 22666 }, { "epoch": 0.5820249999598793, "grad_norm": 0.83203125, "learning_rate": 0.00014777513843608998, "loss": 0.8516, "step": 22667 }, { "epoch": 0.5820506771558012, "grad_norm": 0.78515625, "learning_rate": 0.0001477712166092436, "loss": 0.8541, "step": 22668 }, { "epoch": 0.582076354351723, "grad_norm": 0.77734375, "learning_rate": 0.00014776729468719347, "loss": 0.8646, "step": 22669 }, { "epoch": 0.5821020315476448, "grad_norm": 0.73828125, "learning_rate": 0.00014776337266994747, "loss": 0.7053, "step": 22670 }, { "epoch": 0.5821277087435667, "grad_norm": 0.82421875, "learning_rate": 0.00014775945055751338, "loss": 0.8331, "step": 22671 }, { "epoch": 0.5821533859394885, "grad_norm": 0.74609375, "learning_rate": 0.00014775552834989903, "loss": 0.9914, "step": 22672 }, { "epoch": 0.5821790631354103, "grad_norm": 0.796875, "learning_rate": 0.00014775160604711224, "loss": 0.935, "step": 22673 }, { "epoch": 0.5822047403313321, "grad_norm": 0.765625, "learning_rate": 0.00014774768364916077, "loss": 0.8769, "step": 22674 }, { "epoch": 0.5822304175272539, "grad_norm": 0.85546875, "learning_rate": 0.00014774376115605245, "loss": 0.9922, "step": 22675 }, { "epoch": 0.5822560947231757, "grad_norm": 0.70703125, "learning_rate": 0.00014773983856779518, "loss": 0.7864, "step": 22676 }, { "epoch": 0.5822817719190976, "grad_norm": 1.25, "learning_rate": 0.00014773591588439672, "loss": 0.89, "step": 22677 }, { "epoch": 0.5823074491150194, "grad_norm": 0.7734375, "learning_rate": 0.0001477319931058649, "loss": 0.9164, "step": 22678 }, { "epoch": 0.5823331263109413, "grad_norm": 0.79296875, "learning_rate": 0.00014772807023220752, "loss": 0.8291, "step": 22679 }, { "epoch": 0.5823588035068631, "grad_norm": 0.71484375, "learning_rate": 0.0001477241472634324, "loss": 0.8047, "step": 22680 }, { "epoch": 0.5823844807027848, "grad_norm": 0.796875, "learning_rate": 0.00014772022419954736, "loss": 0.9616, "step": 22681 }, { "epoch": 0.5824101578987066, "grad_norm": 0.8515625, "learning_rate": 0.00014771630104056023, "loss": 0.8793, "step": 22682 }, { "epoch": 0.5824358350946285, "grad_norm": 0.7421875, "learning_rate": 0.0001477123777864788, "loss": 0.7344, "step": 22683 }, { "epoch": 0.5824615122905503, "grad_norm": 0.77734375, "learning_rate": 0.00014770845443731095, "loss": 0.7891, "step": 22684 }, { "epoch": 0.5824871894864722, "grad_norm": 0.75390625, "learning_rate": 0.00014770453099306445, "loss": 0.7162, "step": 22685 }, { "epoch": 0.582512866682394, "grad_norm": 0.7734375, "learning_rate": 0.00014770060745374712, "loss": 0.8706, "step": 22686 }, { "epoch": 0.5825385438783157, "grad_norm": 0.76171875, "learning_rate": 0.00014769668381936678, "loss": 0.8515, "step": 22687 }, { "epoch": 0.5825642210742376, "grad_norm": 0.69921875, "learning_rate": 0.00014769276008993126, "loss": 0.8791, "step": 22688 }, { "epoch": 0.5825898982701594, "grad_norm": 0.75390625, "learning_rate": 0.0001476888362654484, "loss": 0.8377, "step": 22689 }, { "epoch": 0.5826155754660812, "grad_norm": 0.76171875, "learning_rate": 0.000147684912345926, "loss": 0.8401, "step": 22690 }, { "epoch": 0.5826412526620031, "grad_norm": 0.68359375, "learning_rate": 0.00014768098833137184, "loss": 0.868, "step": 22691 }, { "epoch": 0.5826669298579249, "grad_norm": 1.203125, "learning_rate": 0.00014767706422179382, "loss": 0.7672, "step": 22692 }, { "epoch": 0.5826926070538467, "grad_norm": 0.77734375, "learning_rate": 0.00014767314001719965, "loss": 0.9723, "step": 22693 }, { "epoch": 0.5827182842497685, "grad_norm": 0.72265625, "learning_rate": 0.00014766921571759725, "loss": 0.9201, "step": 22694 }, { "epoch": 0.5827439614456903, "grad_norm": 0.76171875, "learning_rate": 0.00014766529132299446, "loss": 0.736, "step": 22695 }, { "epoch": 0.5827696386416121, "grad_norm": 0.8046875, "learning_rate": 0.000147661366833399, "loss": 0.8256, "step": 22696 }, { "epoch": 0.582795315837534, "grad_norm": 0.765625, "learning_rate": 0.00014765744224881874, "loss": 0.7343, "step": 22697 }, { "epoch": 0.5828209930334558, "grad_norm": 0.75390625, "learning_rate": 0.00014765351756926154, "loss": 0.852, "step": 22698 }, { "epoch": 0.5828466702293776, "grad_norm": 0.7578125, "learning_rate": 0.00014764959279473514, "loss": 0.9284, "step": 22699 }, { "epoch": 0.5828723474252995, "grad_norm": 0.703125, "learning_rate": 0.00014764566792524743, "loss": 0.7738, "step": 22700 }, { "epoch": 0.5828980246212212, "grad_norm": 0.78515625, "learning_rate": 0.0001476417429608062, "loss": 0.973, "step": 22701 }, { "epoch": 0.582923701817143, "grad_norm": 0.84765625, "learning_rate": 0.00014763781790141928, "loss": 0.7469, "step": 22702 }, { "epoch": 0.5829493790130649, "grad_norm": 0.7265625, "learning_rate": 0.0001476338927470945, "loss": 0.7573, "step": 22703 }, { "epoch": 0.5829750562089867, "grad_norm": 0.80859375, "learning_rate": 0.0001476299674978397, "loss": 0.8856, "step": 22704 }, { "epoch": 0.5830007334049085, "grad_norm": 0.8046875, "learning_rate": 0.00014762604215366263, "loss": 0.9121, "step": 22705 }, { "epoch": 0.5830264106008304, "grad_norm": 0.8125, "learning_rate": 0.00014762211671457114, "loss": 0.8856, "step": 22706 }, { "epoch": 0.5830520877967521, "grad_norm": 1.109375, "learning_rate": 0.00014761819118057313, "loss": 0.9111, "step": 22707 }, { "epoch": 0.5830777649926739, "grad_norm": 0.84375, "learning_rate": 0.00014761426555167634, "loss": 0.9532, "step": 22708 }, { "epoch": 0.5831034421885958, "grad_norm": 0.765625, "learning_rate": 0.0001476103398278886, "loss": 0.847, "step": 22709 }, { "epoch": 0.5831291193845176, "grad_norm": 0.76953125, "learning_rate": 0.0001476064140092178, "loss": 0.9309, "step": 22710 }, { "epoch": 0.5831547965804394, "grad_norm": 0.765625, "learning_rate": 0.00014760248809567172, "loss": 0.8802, "step": 22711 }, { "epoch": 0.5831804737763613, "grad_norm": 0.74609375, "learning_rate": 0.00014759856208725812, "loss": 0.9573, "step": 22712 }, { "epoch": 0.5832061509722831, "grad_norm": 0.75, "learning_rate": 0.00014759463598398495, "loss": 0.9337, "step": 22713 }, { "epoch": 0.5832318281682048, "grad_norm": 0.78515625, "learning_rate": 0.0001475907097858599, "loss": 0.9, "step": 22714 }, { "epoch": 0.5832575053641267, "grad_norm": 0.73828125, "learning_rate": 0.00014758678349289093, "loss": 0.9487, "step": 22715 }, { "epoch": 0.5832831825600485, "grad_norm": 0.74609375, "learning_rate": 0.00014758285710508577, "loss": 0.8117, "step": 22716 }, { "epoch": 0.5833088597559704, "grad_norm": 0.73828125, "learning_rate": 0.00014757893062245227, "loss": 0.6994, "step": 22717 }, { "epoch": 0.5833345369518922, "grad_norm": 0.6875, "learning_rate": 0.0001475750040449983, "loss": 0.8298, "step": 22718 }, { "epoch": 0.583360214147814, "grad_norm": 0.82421875, "learning_rate": 0.00014757107737273158, "loss": 0.8861, "step": 22719 }, { "epoch": 0.5833858913437358, "grad_norm": 0.69921875, "learning_rate": 0.00014756715060566006, "loss": 0.8398, "step": 22720 }, { "epoch": 0.5834115685396576, "grad_norm": 0.68359375, "learning_rate": 0.00014756322374379145, "loss": 0.7395, "step": 22721 }, { "epoch": 0.5834372457355794, "grad_norm": 0.82421875, "learning_rate": 0.00014755929678713368, "loss": 0.8511, "step": 22722 }, { "epoch": 0.5834629229315013, "grad_norm": 0.85546875, "learning_rate": 0.0001475553697356945, "loss": 0.7319, "step": 22723 }, { "epoch": 0.5834886001274231, "grad_norm": 0.73828125, "learning_rate": 0.00014755144258948177, "loss": 0.8116, "step": 22724 }, { "epoch": 0.5835142773233449, "grad_norm": 0.76953125, "learning_rate": 0.0001475475153485033, "loss": 0.8964, "step": 22725 }, { "epoch": 0.5835399545192668, "grad_norm": 0.7890625, "learning_rate": 0.00014754358801276696, "loss": 0.9996, "step": 22726 }, { "epoch": 0.5835656317151885, "grad_norm": 0.78515625, "learning_rate": 0.00014753966058228052, "loss": 0.893, "step": 22727 }, { "epoch": 0.5835913089111103, "grad_norm": 0.80859375, "learning_rate": 0.00014753573305705182, "loss": 0.9353, "step": 22728 }, { "epoch": 0.5836169861070322, "grad_norm": 0.78515625, "learning_rate": 0.00014753180543708872, "loss": 0.945, "step": 22729 }, { "epoch": 0.583642663302954, "grad_norm": 0.76953125, "learning_rate": 0.000147527877722399, "loss": 0.7606, "step": 22730 }, { "epoch": 0.5836683404988758, "grad_norm": 0.8203125, "learning_rate": 0.00014752394991299051, "loss": 0.9568, "step": 22731 }, { "epoch": 0.5836940176947977, "grad_norm": 0.84765625, "learning_rate": 0.00014752002200887115, "loss": 0.9015, "step": 22732 }, { "epoch": 0.5837196948907195, "grad_norm": 0.8125, "learning_rate": 0.00014751609401004857, "loss": 0.832, "step": 22733 }, { "epoch": 0.5837453720866412, "grad_norm": 0.69140625, "learning_rate": 0.0001475121659165308, "loss": 0.8994, "step": 22734 }, { "epoch": 0.5837710492825631, "grad_norm": 0.7734375, "learning_rate": 0.00014750823772832555, "loss": 0.867, "step": 22735 }, { "epoch": 0.5837967264784849, "grad_norm": 0.8203125, "learning_rate": 0.00014750430944544061, "loss": 0.8526, "step": 22736 }, { "epoch": 0.5838224036744067, "grad_norm": 0.74609375, "learning_rate": 0.000147500381067884, "loss": 0.8389, "step": 22737 }, { "epoch": 0.5838480808703286, "grad_norm": 0.6953125, "learning_rate": 0.00014749645259566328, "loss": 0.7503, "step": 22738 }, { "epoch": 0.5838737580662504, "grad_norm": 0.82421875, "learning_rate": 0.0001474925240287865, "loss": 0.8875, "step": 22739 }, { "epoch": 0.5838994352621721, "grad_norm": 0.7421875, "learning_rate": 0.0001474885953672614, "loss": 0.8811, "step": 22740 }, { "epoch": 0.583925112458094, "grad_norm": 0.76953125, "learning_rate": 0.0001474846666110958, "loss": 0.9631, "step": 22741 }, { "epoch": 0.5839507896540158, "grad_norm": 0.9140625, "learning_rate": 0.0001474807377602976, "loss": 0.9754, "step": 22742 }, { "epoch": 0.5839764668499376, "grad_norm": 0.828125, "learning_rate": 0.00014747680881487451, "loss": 0.8752, "step": 22743 }, { "epoch": 0.5840021440458595, "grad_norm": 0.8203125, "learning_rate": 0.00014747287977483447, "loss": 0.8868, "step": 22744 }, { "epoch": 0.5840278212417813, "grad_norm": 0.703125, "learning_rate": 0.00014746895064018527, "loss": 0.8357, "step": 22745 }, { "epoch": 0.5840534984377032, "grad_norm": 0.80078125, "learning_rate": 0.0001474650214109347, "loss": 0.7249, "step": 22746 }, { "epoch": 0.5840791756336249, "grad_norm": 0.75390625, "learning_rate": 0.00014746109208709066, "loss": 0.8556, "step": 22747 }, { "epoch": 0.5841048528295467, "grad_norm": 0.6953125, "learning_rate": 0.00014745716266866097, "loss": 0.8466, "step": 22748 }, { "epoch": 0.5841305300254686, "grad_norm": 0.74609375, "learning_rate": 0.0001474532331556534, "loss": 0.7211, "step": 22749 }, { "epoch": 0.5841562072213904, "grad_norm": 0.71484375, "learning_rate": 0.00014744930354807585, "loss": 0.8315, "step": 22750 }, { "epoch": 0.5841818844173122, "grad_norm": 0.7578125, "learning_rate": 0.00014744537384593612, "loss": 0.8652, "step": 22751 }, { "epoch": 0.5842075616132341, "grad_norm": 0.828125, "learning_rate": 0.00014744144404924204, "loss": 0.8806, "step": 22752 }, { "epoch": 0.5842332388091559, "grad_norm": 0.80859375, "learning_rate": 0.00014743751415800146, "loss": 0.8951, "step": 22753 }, { "epoch": 0.5842589160050776, "grad_norm": 0.75, "learning_rate": 0.00014743358417222216, "loss": 0.841, "step": 22754 }, { "epoch": 0.5842845932009995, "grad_norm": 0.74609375, "learning_rate": 0.00014742965409191206, "loss": 0.8146, "step": 22755 }, { "epoch": 0.5843102703969213, "grad_norm": 1.0625, "learning_rate": 0.00014742572391707894, "loss": 0.7662, "step": 22756 }, { "epoch": 0.5843359475928431, "grad_norm": 0.75390625, "learning_rate": 0.00014742179364773063, "loss": 0.8914, "step": 22757 }, { "epoch": 0.584361624788765, "grad_norm": 0.76171875, "learning_rate": 0.00014741786328387497, "loss": 0.8978, "step": 22758 }, { "epoch": 0.5843873019846868, "grad_norm": 0.7578125, "learning_rate": 0.00014741393282551976, "loss": 0.9002, "step": 22759 }, { "epoch": 0.5844129791806085, "grad_norm": 0.8203125, "learning_rate": 0.00014741000227267288, "loss": 0.8619, "step": 22760 }, { "epoch": 0.5844386563765304, "grad_norm": 0.8359375, "learning_rate": 0.00014740607162534218, "loss": 0.832, "step": 22761 }, { "epoch": 0.5844643335724522, "grad_norm": 0.73828125, "learning_rate": 0.00014740214088353542, "loss": 0.8152, "step": 22762 }, { "epoch": 0.584490010768374, "grad_norm": 0.73046875, "learning_rate": 0.00014739821004726048, "loss": 0.7629, "step": 22763 }, { "epoch": 0.5845156879642959, "grad_norm": 0.765625, "learning_rate": 0.0001473942791165252, "loss": 0.7701, "step": 22764 }, { "epoch": 0.5845413651602177, "grad_norm": 0.80078125, "learning_rate": 0.0001473903480913374, "loss": 0.9385, "step": 22765 }, { "epoch": 0.5845670423561395, "grad_norm": 0.734375, "learning_rate": 0.00014738641697170491, "loss": 0.8109, "step": 22766 }, { "epoch": 0.5845927195520613, "grad_norm": 0.7421875, "learning_rate": 0.0001473824857576356, "loss": 0.7606, "step": 22767 }, { "epoch": 0.5846183967479831, "grad_norm": 0.78125, "learning_rate": 0.00014737855444913725, "loss": 0.8438, "step": 22768 }, { "epoch": 0.5846440739439049, "grad_norm": 1.1796875, "learning_rate": 0.00014737462304621773, "loss": 0.7132, "step": 22769 }, { "epoch": 0.5846697511398268, "grad_norm": 0.73828125, "learning_rate": 0.00014737069154888488, "loss": 0.8216, "step": 22770 }, { "epoch": 0.5846954283357486, "grad_norm": 0.80078125, "learning_rate": 0.00014736675995714649, "loss": 0.9594, "step": 22771 }, { "epoch": 0.5847211055316704, "grad_norm": 0.73046875, "learning_rate": 0.00014736282827101046, "loss": 0.9642, "step": 22772 }, { "epoch": 0.5847467827275923, "grad_norm": 0.82421875, "learning_rate": 0.00014735889649048454, "loss": 0.9694, "step": 22773 }, { "epoch": 0.584772459923514, "grad_norm": 0.7265625, "learning_rate": 0.00014735496461557666, "loss": 0.8521, "step": 22774 }, { "epoch": 0.5847981371194358, "grad_norm": 0.7421875, "learning_rate": 0.00014735103264629462, "loss": 0.9414, "step": 22775 }, { "epoch": 0.5848238143153577, "grad_norm": 0.73046875, "learning_rate": 0.0001473471005826462, "loss": 0.8605, "step": 22776 }, { "epoch": 0.5848494915112795, "grad_norm": 0.73046875, "learning_rate": 0.00014734316842463936, "loss": 0.8456, "step": 22777 }, { "epoch": 0.5848751687072014, "grad_norm": 0.73828125, "learning_rate": 0.00014733923617228178, "loss": 0.8227, "step": 22778 }, { "epoch": 0.5849008459031232, "grad_norm": 0.80859375, "learning_rate": 0.00014733530382558143, "loss": 0.9428, "step": 22779 }, { "epoch": 0.5849265230990449, "grad_norm": 0.75390625, "learning_rate": 0.00014733137138454606, "loss": 0.7874, "step": 22780 }, { "epoch": 0.5849522002949668, "grad_norm": 0.734375, "learning_rate": 0.00014732743884918358, "loss": 0.8012, "step": 22781 }, { "epoch": 0.5849778774908886, "grad_norm": 0.76953125, "learning_rate": 0.00014732350621950177, "loss": 0.8216, "step": 22782 }, { "epoch": 0.5850035546868104, "grad_norm": 0.8515625, "learning_rate": 0.0001473195734955085, "loss": 0.942, "step": 22783 }, { "epoch": 0.5850292318827323, "grad_norm": 0.83203125, "learning_rate": 0.00014731564067721158, "loss": 0.9541, "step": 22784 }, { "epoch": 0.5850549090786541, "grad_norm": 0.71875, "learning_rate": 0.00014731170776461887, "loss": 0.8061, "step": 22785 }, { "epoch": 0.5850805862745759, "grad_norm": 0.71875, "learning_rate": 0.0001473077747577382, "loss": 0.813, "step": 22786 }, { "epoch": 0.5851062634704977, "grad_norm": 0.76953125, "learning_rate": 0.0001473038416565774, "loss": 0.916, "step": 22787 }, { "epoch": 0.5851319406664195, "grad_norm": 0.8203125, "learning_rate": 0.00014729990846114432, "loss": 0.9105, "step": 22788 }, { "epoch": 0.5851576178623413, "grad_norm": 0.76953125, "learning_rate": 0.00014729597517144677, "loss": 0.7342, "step": 22789 }, { "epoch": 0.5851832950582632, "grad_norm": 0.78515625, "learning_rate": 0.00014729204178749267, "loss": 0.7915, "step": 22790 }, { "epoch": 0.585208972254185, "grad_norm": 0.78515625, "learning_rate": 0.00014728810830928976, "loss": 0.8461, "step": 22791 }, { "epoch": 0.5852346494501068, "grad_norm": 0.80859375, "learning_rate": 0.00014728417473684595, "loss": 0.7981, "step": 22792 }, { "epoch": 0.5852603266460287, "grad_norm": 0.87109375, "learning_rate": 0.00014728024107016902, "loss": 1.0055, "step": 22793 }, { "epoch": 0.5852860038419504, "grad_norm": 0.80078125, "learning_rate": 0.00014727630730926685, "loss": 0.8577, "step": 22794 }, { "epoch": 0.5853116810378722, "grad_norm": 0.75390625, "learning_rate": 0.0001472723734541473, "loss": 0.9996, "step": 22795 }, { "epoch": 0.5853373582337941, "grad_norm": 0.76953125, "learning_rate": 0.00014726843950481813, "loss": 0.8089, "step": 22796 }, { "epoch": 0.5853630354297159, "grad_norm": 0.796875, "learning_rate": 0.00014726450546128728, "loss": 0.9786, "step": 22797 }, { "epoch": 0.5853887126256377, "grad_norm": 0.76171875, "learning_rate": 0.0001472605713235625, "loss": 0.9803, "step": 22798 }, { "epoch": 0.5854143898215596, "grad_norm": 0.83984375, "learning_rate": 0.0001472566370916517, "loss": 0.8156, "step": 22799 }, { "epoch": 0.5854400670174813, "grad_norm": 0.69921875, "learning_rate": 0.00014725270276556265, "loss": 0.8272, "step": 22800 }, { "epoch": 0.5854657442134031, "grad_norm": 0.76953125, "learning_rate": 0.00014724876834530329, "loss": 0.8806, "step": 22801 }, { "epoch": 0.585491421409325, "grad_norm": 0.8828125, "learning_rate": 0.00014724483383088134, "loss": 0.8808, "step": 22802 }, { "epoch": 0.5855170986052468, "grad_norm": 0.7421875, "learning_rate": 0.00014724089922230475, "loss": 0.8577, "step": 22803 }, { "epoch": 0.5855427758011686, "grad_norm": 0.71484375, "learning_rate": 0.0001472369645195813, "loss": 0.9254, "step": 22804 }, { "epoch": 0.5855684529970905, "grad_norm": 0.77734375, "learning_rate": 0.00014723302972271884, "loss": 0.8757, "step": 22805 }, { "epoch": 0.5855941301930123, "grad_norm": 0.765625, "learning_rate": 0.00014722909483172523, "loss": 0.9265, "step": 22806 }, { "epoch": 0.585619807388934, "grad_norm": 0.73828125, "learning_rate": 0.0001472251598466083, "loss": 0.7717, "step": 22807 }, { "epoch": 0.5856454845848559, "grad_norm": 0.78515625, "learning_rate": 0.0001472212247673759, "loss": 0.9767, "step": 22808 }, { "epoch": 0.5856711617807777, "grad_norm": 0.82421875, "learning_rate": 0.00014721728959403584, "loss": 0.9129, "step": 22809 }, { "epoch": 0.5856968389766996, "grad_norm": 0.828125, "learning_rate": 0.000147213354326596, "loss": 0.8188, "step": 22810 }, { "epoch": 0.5857225161726214, "grad_norm": 0.859375, "learning_rate": 0.00014720941896506423, "loss": 0.8984, "step": 22811 }, { "epoch": 0.5857481933685432, "grad_norm": 0.84765625, "learning_rate": 0.00014720548350944832, "loss": 0.7949, "step": 22812 }, { "epoch": 0.5857738705644651, "grad_norm": 0.86328125, "learning_rate": 0.00014720154795975614, "loss": 0.9452, "step": 22813 }, { "epoch": 0.5857995477603868, "grad_norm": 0.8828125, "learning_rate": 0.00014719761231599555, "loss": 0.9491, "step": 22814 }, { "epoch": 0.5858252249563086, "grad_norm": 0.921875, "learning_rate": 0.0001471936765781744, "loss": 0.9075, "step": 22815 }, { "epoch": 0.5858509021522305, "grad_norm": 0.765625, "learning_rate": 0.0001471897407463005, "loss": 0.8005, "step": 22816 }, { "epoch": 0.5858765793481523, "grad_norm": 0.76171875, "learning_rate": 0.0001471858048203817, "loss": 0.8755, "step": 22817 }, { "epoch": 0.5859022565440741, "grad_norm": 0.72265625, "learning_rate": 0.00014718186880042586, "loss": 0.8936, "step": 22818 }, { "epoch": 0.585927933739996, "grad_norm": 0.79296875, "learning_rate": 0.00014717793268644082, "loss": 0.989, "step": 22819 }, { "epoch": 0.5859536109359177, "grad_norm": 0.81640625, "learning_rate": 0.00014717399647843444, "loss": 0.7895, "step": 22820 }, { "epoch": 0.5859792881318395, "grad_norm": 0.75390625, "learning_rate": 0.0001471700601764145, "loss": 0.8423, "step": 22821 }, { "epoch": 0.5860049653277614, "grad_norm": 0.8671875, "learning_rate": 0.00014716612378038894, "loss": 0.8686, "step": 22822 }, { "epoch": 0.5860306425236832, "grad_norm": 0.81640625, "learning_rate": 0.00014716218729036556, "loss": 0.8551, "step": 22823 }, { "epoch": 0.586056319719605, "grad_norm": 0.84765625, "learning_rate": 0.00014715825070635215, "loss": 0.8951, "step": 22824 }, { "epoch": 0.5860819969155269, "grad_norm": 0.7578125, "learning_rate": 0.00014715431402835666, "loss": 0.7385, "step": 22825 }, { "epoch": 0.5861076741114487, "grad_norm": 0.70703125, "learning_rate": 0.00014715037725638683, "loss": 0.8815, "step": 22826 }, { "epoch": 0.5861333513073704, "grad_norm": 0.7578125, "learning_rate": 0.0001471464403904506, "loss": 0.8479, "step": 22827 }, { "epoch": 0.5861590285032923, "grad_norm": 0.7890625, "learning_rate": 0.00014714250343055577, "loss": 0.8487, "step": 22828 }, { "epoch": 0.5861847056992141, "grad_norm": 0.859375, "learning_rate": 0.00014713856637671015, "loss": 0.9969, "step": 22829 }, { "epoch": 0.5862103828951359, "grad_norm": 0.75, "learning_rate": 0.00014713462922892167, "loss": 0.8526, "step": 22830 }, { "epoch": 0.5862360600910578, "grad_norm": 0.859375, "learning_rate": 0.00014713069198719807, "loss": 0.7766, "step": 22831 }, { "epoch": 0.5862617372869796, "grad_norm": 0.76171875, "learning_rate": 0.00014712675465154733, "loss": 0.7156, "step": 22832 }, { "epoch": 0.5862874144829014, "grad_norm": 0.7578125, "learning_rate": 0.0001471228172219772, "loss": 0.8272, "step": 22833 }, { "epoch": 0.5863130916788232, "grad_norm": 0.73828125, "learning_rate": 0.0001471188796984955, "loss": 0.8428, "step": 22834 }, { "epoch": 0.586338768874745, "grad_norm": 0.76953125, "learning_rate": 0.0001471149420811102, "loss": 0.937, "step": 22835 }, { "epoch": 0.5863644460706668, "grad_norm": 0.7421875, "learning_rate": 0.00014711100436982908, "loss": 0.8503, "step": 22836 }, { "epoch": 0.5863901232665887, "grad_norm": 0.734375, "learning_rate": 0.00014710706656465993, "loss": 0.8845, "step": 22837 }, { "epoch": 0.5864158004625105, "grad_norm": 0.734375, "learning_rate": 0.00014710312866561068, "loss": 0.9157, "step": 22838 }, { "epoch": 0.5864414776584324, "grad_norm": 0.73828125, "learning_rate": 0.00014709919067268915, "loss": 0.9055, "step": 22839 }, { "epoch": 0.5864671548543541, "grad_norm": 0.765625, "learning_rate": 0.0001470952525859032, "loss": 0.8747, "step": 22840 }, { "epoch": 0.5864928320502759, "grad_norm": 0.75, "learning_rate": 0.00014709131440526063, "loss": 0.8882, "step": 22841 }, { "epoch": 0.5865185092461978, "grad_norm": 0.8125, "learning_rate": 0.00014708737613076936, "loss": 0.8039, "step": 22842 }, { "epoch": 0.5865441864421196, "grad_norm": 0.78125, "learning_rate": 0.0001470834377624372, "loss": 0.9289, "step": 22843 }, { "epoch": 0.5865698636380414, "grad_norm": 0.7578125, "learning_rate": 0.000147079499300272, "loss": 0.871, "step": 22844 }, { "epoch": 0.5865955408339633, "grad_norm": 0.796875, "learning_rate": 0.00014707556074428156, "loss": 1.0548, "step": 22845 }, { "epoch": 0.5866212180298851, "grad_norm": 0.796875, "learning_rate": 0.00014707162209447384, "loss": 0.8728, "step": 22846 }, { "epoch": 0.5866468952258068, "grad_norm": 0.734375, "learning_rate": 0.00014706768335085658, "loss": 0.8578, "step": 22847 }, { "epoch": 0.5866725724217287, "grad_norm": 0.74609375, "learning_rate": 0.00014706374451343773, "loss": 0.7763, "step": 22848 }, { "epoch": 0.5866982496176505, "grad_norm": 0.76953125, "learning_rate": 0.00014705980558222506, "loss": 0.9587, "step": 22849 }, { "epoch": 0.5867239268135723, "grad_norm": 0.796875, "learning_rate": 0.00014705586655722644, "loss": 0.9199, "step": 22850 }, { "epoch": 0.5867496040094942, "grad_norm": 0.7109375, "learning_rate": 0.00014705192743844975, "loss": 0.8932, "step": 22851 }, { "epoch": 0.586775281205416, "grad_norm": 0.7890625, "learning_rate": 0.00014704798822590283, "loss": 0.78, "step": 22852 }, { "epoch": 0.5868009584013378, "grad_norm": 0.79296875, "learning_rate": 0.00014704404891959346, "loss": 0.8234, "step": 22853 }, { "epoch": 0.5868266355972596, "grad_norm": 0.76953125, "learning_rate": 0.0001470401095195296, "loss": 0.8581, "step": 22854 }, { "epoch": 0.5868523127931814, "grad_norm": 0.80078125, "learning_rate": 0.00014703617002571906, "loss": 0.9372, "step": 22855 }, { "epoch": 0.5868779899891032, "grad_norm": 0.81640625, "learning_rate": 0.00014703223043816967, "loss": 0.8116, "step": 22856 }, { "epoch": 0.5869036671850251, "grad_norm": 0.8203125, "learning_rate": 0.0001470282907568893, "loss": 0.8759, "step": 22857 }, { "epoch": 0.5869293443809469, "grad_norm": 0.78515625, "learning_rate": 0.00014702435098188575, "loss": 0.8245, "step": 22858 }, { "epoch": 0.5869550215768687, "grad_norm": 0.875, "learning_rate": 0.00014702041111316696, "loss": 0.9439, "step": 22859 }, { "epoch": 0.5869806987727905, "grad_norm": 0.7734375, "learning_rate": 0.0001470164711507407, "loss": 0.8417, "step": 22860 }, { "epoch": 0.5870063759687123, "grad_norm": 0.80078125, "learning_rate": 0.00014701253109461487, "loss": 0.9175, "step": 22861 }, { "epoch": 0.5870320531646341, "grad_norm": 0.87109375, "learning_rate": 0.00014700859094479734, "loss": 0.8704, "step": 22862 }, { "epoch": 0.587057730360556, "grad_norm": 0.8046875, "learning_rate": 0.0001470046507012959, "loss": 0.8284, "step": 22863 }, { "epoch": 0.5870834075564778, "grad_norm": 0.76171875, "learning_rate": 0.0001470007103641185, "loss": 0.8304, "step": 22864 }, { "epoch": 0.5871090847523996, "grad_norm": 0.96875, "learning_rate": 0.00014699676993327286, "loss": 0.805, "step": 22865 }, { "epoch": 0.5871347619483215, "grad_norm": 0.75, "learning_rate": 0.00014699282940876693, "loss": 0.8169, "step": 22866 }, { "epoch": 0.5871604391442432, "grad_norm": 0.796875, "learning_rate": 0.00014698888879060854, "loss": 0.8341, "step": 22867 }, { "epoch": 0.587186116340165, "grad_norm": 0.7734375, "learning_rate": 0.00014698494807880555, "loss": 0.9295, "step": 22868 }, { "epoch": 0.5872117935360869, "grad_norm": 0.73828125, "learning_rate": 0.0001469810072733658, "loss": 0.8212, "step": 22869 }, { "epoch": 0.5872374707320087, "grad_norm": 0.77734375, "learning_rate": 0.00014697706637429713, "loss": 0.7839, "step": 22870 }, { "epoch": 0.5872631479279306, "grad_norm": 0.8203125, "learning_rate": 0.0001469731253816074, "loss": 0.8574, "step": 22871 }, { "epoch": 0.5872888251238524, "grad_norm": 0.7421875, "learning_rate": 0.0001469691842953045, "loss": 1.0107, "step": 22872 }, { "epoch": 0.5873145023197742, "grad_norm": 0.77734375, "learning_rate": 0.00014696524311539624, "loss": 0.9343, "step": 22873 }, { "epoch": 0.587340179515696, "grad_norm": 0.75390625, "learning_rate": 0.0001469613018418905, "loss": 0.999, "step": 22874 }, { "epoch": 0.5873658567116178, "grad_norm": 0.87890625, "learning_rate": 0.00014695736047479514, "loss": 0.8387, "step": 22875 }, { "epoch": 0.5873915339075396, "grad_norm": 0.80859375, "learning_rate": 0.00014695341901411796, "loss": 0.9044, "step": 22876 }, { "epoch": 0.5874172111034615, "grad_norm": 0.7578125, "learning_rate": 0.0001469494774598669, "loss": 0.8821, "step": 22877 }, { "epoch": 0.5874428882993833, "grad_norm": 0.80078125, "learning_rate": 0.00014694553581204978, "loss": 0.8704, "step": 22878 }, { "epoch": 0.5874685654953051, "grad_norm": 0.734375, "learning_rate": 0.0001469415940706744, "loss": 0.7906, "step": 22879 }, { "epoch": 0.5874942426912269, "grad_norm": 0.75390625, "learning_rate": 0.00014693765223574868, "loss": 0.8797, "step": 22880 }, { "epoch": 0.5875199198871487, "grad_norm": 0.7421875, "learning_rate": 0.0001469337103072805, "loss": 0.8124, "step": 22881 }, { "epoch": 0.5875455970830705, "grad_norm": 0.8515625, "learning_rate": 0.00014692976828527762, "loss": 0.9457, "step": 22882 }, { "epoch": 0.5875712742789924, "grad_norm": 0.73828125, "learning_rate": 0.000146925826169748, "loss": 0.8831, "step": 22883 }, { "epoch": 0.5875969514749142, "grad_norm": 0.76953125, "learning_rate": 0.00014692188396069942, "loss": 0.9763, "step": 22884 }, { "epoch": 0.587622628670836, "grad_norm": 0.79296875, "learning_rate": 0.00014691794165813975, "loss": 0.9044, "step": 22885 }, { "epoch": 0.5876483058667579, "grad_norm": 0.7578125, "learning_rate": 0.0001469139992620769, "loss": 0.7856, "step": 22886 }, { "epoch": 0.5876739830626796, "grad_norm": 0.765625, "learning_rate": 0.00014691005677251864, "loss": 0.8581, "step": 22887 }, { "epoch": 0.5876996602586014, "grad_norm": 0.7890625, "learning_rate": 0.00014690611418947288, "loss": 0.9954, "step": 22888 }, { "epoch": 0.5877253374545233, "grad_norm": 0.8046875, "learning_rate": 0.0001469021715129475, "loss": 1.0253, "step": 22889 }, { "epoch": 0.5877510146504451, "grad_norm": 0.734375, "learning_rate": 0.00014689822874295035, "loss": 0.9754, "step": 22890 }, { "epoch": 0.5877766918463669, "grad_norm": 0.8125, "learning_rate": 0.00014689428587948922, "loss": 0.9059, "step": 22891 }, { "epoch": 0.5878023690422888, "grad_norm": 0.77734375, "learning_rate": 0.000146890342922572, "loss": 0.8948, "step": 22892 }, { "epoch": 0.5878280462382106, "grad_norm": 0.7578125, "learning_rate": 0.0001468863998722066, "loss": 0.8845, "step": 22893 }, { "epoch": 0.5878537234341323, "grad_norm": 0.76953125, "learning_rate": 0.00014688245672840085, "loss": 0.9821, "step": 22894 }, { "epoch": 0.5878794006300542, "grad_norm": 0.77734375, "learning_rate": 0.00014687851349116257, "loss": 0.9402, "step": 22895 }, { "epoch": 0.587905077825976, "grad_norm": 0.734375, "learning_rate": 0.00014687457016049967, "loss": 0.7864, "step": 22896 }, { "epoch": 0.5879307550218978, "grad_norm": 0.75, "learning_rate": 0.00014687062673641999, "loss": 0.8715, "step": 22897 }, { "epoch": 0.5879564322178197, "grad_norm": 0.76171875, "learning_rate": 0.00014686668321893133, "loss": 0.9135, "step": 22898 }, { "epoch": 0.5879821094137415, "grad_norm": 0.72265625, "learning_rate": 0.00014686273960804163, "loss": 0.7444, "step": 22899 }, { "epoch": 0.5880077866096632, "grad_norm": 0.765625, "learning_rate": 0.00014685879590375876, "loss": 0.955, "step": 22900 }, { "epoch": 0.5880334638055851, "grad_norm": 0.74609375, "learning_rate": 0.0001468548521060905, "loss": 0.7954, "step": 22901 }, { "epoch": 0.5880591410015069, "grad_norm": 0.78125, "learning_rate": 0.0001468509082150448, "loss": 0.8674, "step": 22902 }, { "epoch": 0.5880848181974287, "grad_norm": 0.73828125, "learning_rate": 0.00014684696423062943, "loss": 0.7899, "step": 22903 }, { "epoch": 0.5881104953933506, "grad_norm": 0.86328125, "learning_rate": 0.0001468430201528523, "loss": 0.9699, "step": 22904 }, { "epoch": 0.5881361725892724, "grad_norm": 0.71484375, "learning_rate": 0.00014683907598172128, "loss": 0.7476, "step": 22905 }, { "epoch": 0.5881618497851943, "grad_norm": 0.77734375, "learning_rate": 0.00014683513171724418, "loss": 0.7948, "step": 22906 }, { "epoch": 0.588187526981116, "grad_norm": 0.84765625, "learning_rate": 0.00014683118735942888, "loss": 0.9689, "step": 22907 }, { "epoch": 0.5882132041770378, "grad_norm": 0.7890625, "learning_rate": 0.0001468272429082833, "loss": 0.918, "step": 22908 }, { "epoch": 0.5882388813729597, "grad_norm": 0.69921875, "learning_rate": 0.00014682329836381526, "loss": 0.8673, "step": 22909 }, { "epoch": 0.5882645585688815, "grad_norm": 0.73046875, "learning_rate": 0.0001468193537260326, "loss": 0.841, "step": 22910 }, { "epoch": 0.5882902357648033, "grad_norm": 0.83203125, "learning_rate": 0.00014681540899494317, "loss": 0.8698, "step": 22911 }, { "epoch": 0.5883159129607252, "grad_norm": 0.8125, "learning_rate": 0.00014681146417055486, "loss": 0.8483, "step": 22912 }, { "epoch": 0.588341590156647, "grad_norm": 0.78125, "learning_rate": 0.00014680751925287558, "loss": 0.8891, "step": 22913 }, { "epoch": 0.5883672673525687, "grad_norm": 0.76171875, "learning_rate": 0.0001468035742419131, "loss": 0.8629, "step": 22914 }, { "epoch": 0.5883929445484906, "grad_norm": 0.75, "learning_rate": 0.00014679962913767536, "loss": 0.7449, "step": 22915 }, { "epoch": 0.5884186217444124, "grad_norm": 0.77734375, "learning_rate": 0.00014679568394017014, "loss": 1.0193, "step": 22916 }, { "epoch": 0.5884442989403342, "grad_norm": 0.76953125, "learning_rate": 0.00014679173864940538, "loss": 0.8733, "step": 22917 }, { "epoch": 0.5884699761362561, "grad_norm": 0.76171875, "learning_rate": 0.00014678779326538893, "loss": 0.8709, "step": 22918 }, { "epoch": 0.5884956533321779, "grad_norm": 0.78125, "learning_rate": 0.00014678384778812857, "loss": 0.831, "step": 22919 }, { "epoch": 0.5885213305280996, "grad_norm": 0.79296875, "learning_rate": 0.00014677990221763227, "loss": 0.9353, "step": 22920 }, { "epoch": 0.5885470077240215, "grad_norm": 0.83203125, "learning_rate": 0.0001467759565539079, "loss": 0.8507, "step": 22921 }, { "epoch": 0.5885726849199433, "grad_norm": 0.75, "learning_rate": 0.0001467720107969632, "loss": 0.7982, "step": 22922 }, { "epoch": 0.5885983621158651, "grad_norm": 0.69921875, "learning_rate": 0.0001467680649468061, "loss": 0.7743, "step": 22923 }, { "epoch": 0.588624039311787, "grad_norm": 0.78515625, "learning_rate": 0.00014676411900344452, "loss": 0.9028, "step": 22924 }, { "epoch": 0.5886497165077088, "grad_norm": 0.828125, "learning_rate": 0.00014676017296688624, "loss": 0.9112, "step": 22925 }, { "epoch": 0.5886753937036306, "grad_norm": 0.78515625, "learning_rate": 0.00014675622683713916, "loss": 0.8, "step": 22926 }, { "epoch": 0.5887010708995524, "grad_norm": 0.7578125, "learning_rate": 0.00014675228061421117, "loss": 0.8128, "step": 22927 }, { "epoch": 0.5887267480954742, "grad_norm": 0.73828125, "learning_rate": 0.0001467483342981101, "loss": 0.88, "step": 22928 }, { "epoch": 0.588752425291396, "grad_norm": 0.7890625, "learning_rate": 0.00014674438788884382, "loss": 0.9329, "step": 22929 }, { "epoch": 0.5887781024873179, "grad_norm": 0.9140625, "learning_rate": 0.0001467404413864202, "loss": 0.9823, "step": 22930 }, { "epoch": 0.5888037796832397, "grad_norm": 0.73046875, "learning_rate": 0.0001467364947908471, "loss": 0.9374, "step": 22931 }, { "epoch": 0.5888294568791616, "grad_norm": 0.85546875, "learning_rate": 0.00014673254810213237, "loss": 0.935, "step": 22932 }, { "epoch": 0.5888551340750833, "grad_norm": 0.74609375, "learning_rate": 0.0001467286013202839, "loss": 0.8797, "step": 22933 }, { "epoch": 0.5888808112710051, "grad_norm": 0.8046875, "learning_rate": 0.00014672465444530954, "loss": 0.8957, "step": 22934 }, { "epoch": 0.588906488466927, "grad_norm": 0.78515625, "learning_rate": 0.0001467207074772172, "loss": 0.8319, "step": 22935 }, { "epoch": 0.5889321656628488, "grad_norm": 0.90234375, "learning_rate": 0.0001467167604160147, "loss": 0.9663, "step": 22936 }, { "epoch": 0.5889578428587706, "grad_norm": 0.78515625, "learning_rate": 0.0001467128132617099, "loss": 0.901, "step": 22937 }, { "epoch": 0.5889835200546925, "grad_norm": 0.73046875, "learning_rate": 0.0001467088660143107, "loss": 0.849, "step": 22938 }, { "epoch": 0.5890091972506143, "grad_norm": 0.80078125, "learning_rate": 0.00014670491867382492, "loss": 0.9142, "step": 22939 }, { "epoch": 0.589034874446536, "grad_norm": 0.8125, "learning_rate": 0.0001467009712402605, "loss": 0.9311, "step": 22940 }, { "epoch": 0.5890605516424579, "grad_norm": 0.8046875, "learning_rate": 0.00014669702371362522, "loss": 0.8803, "step": 22941 }, { "epoch": 0.5890862288383797, "grad_norm": 0.84765625, "learning_rate": 0.00014669307609392702, "loss": 1.003, "step": 22942 }, { "epoch": 0.5891119060343015, "grad_norm": 0.8203125, "learning_rate": 0.0001466891283811737, "loss": 0.8222, "step": 22943 }, { "epoch": 0.5891375832302234, "grad_norm": 0.76171875, "learning_rate": 0.0001466851805753732, "loss": 0.8617, "step": 22944 }, { "epoch": 0.5891632604261452, "grad_norm": 0.86328125, "learning_rate": 0.00014668123267653333, "loss": 0.7172, "step": 22945 }, { "epoch": 0.589188937622067, "grad_norm": 0.74609375, "learning_rate": 0.00014667728468466197, "loss": 0.9552, "step": 22946 }, { "epoch": 0.5892146148179888, "grad_norm": 0.74609375, "learning_rate": 0.00014667333659976702, "loss": 0.8354, "step": 22947 }, { "epoch": 0.5892402920139106, "grad_norm": 0.80859375, "learning_rate": 0.00014666938842185635, "loss": 0.879, "step": 22948 }, { "epoch": 0.5892659692098324, "grad_norm": 0.671875, "learning_rate": 0.00014666544015093778, "loss": 0.8641, "step": 22949 }, { "epoch": 0.5892916464057543, "grad_norm": 0.80859375, "learning_rate": 0.0001466614917870192, "loss": 0.7997, "step": 22950 }, { "epoch": 0.5893173236016761, "grad_norm": 0.76953125, "learning_rate": 0.00014665754333010849, "loss": 0.9223, "step": 22951 }, { "epoch": 0.5893430007975979, "grad_norm": 0.8125, "learning_rate": 0.0001466535947802135, "loss": 0.8998, "step": 22952 }, { "epoch": 0.5893686779935197, "grad_norm": 0.8046875, "learning_rate": 0.00014664964613734212, "loss": 0.9446, "step": 22953 }, { "epoch": 0.5893943551894415, "grad_norm": 0.765625, "learning_rate": 0.0001466456974015022, "loss": 1.0392, "step": 22954 }, { "epoch": 0.5894200323853633, "grad_norm": 0.7265625, "learning_rate": 0.00014664174857270164, "loss": 0.8231, "step": 22955 }, { "epoch": 0.5894457095812852, "grad_norm": 0.81640625, "learning_rate": 0.00014663779965094827, "loss": 0.8625, "step": 22956 }, { "epoch": 0.589471386777207, "grad_norm": 0.8203125, "learning_rate": 0.00014663385063624996, "loss": 0.9166, "step": 22957 }, { "epoch": 0.5894970639731288, "grad_norm": 0.76171875, "learning_rate": 0.00014662990152861463, "loss": 0.8304, "step": 22958 }, { "epoch": 0.5895227411690507, "grad_norm": 0.7265625, "learning_rate": 0.00014662595232805008, "loss": 0.8602, "step": 22959 }, { "epoch": 0.5895484183649724, "grad_norm": 0.8359375, "learning_rate": 0.00014662200303456422, "loss": 0.7682, "step": 22960 }, { "epoch": 0.5895740955608942, "grad_norm": 0.79296875, "learning_rate": 0.00014661805364816498, "loss": 0.8305, "step": 22961 }, { "epoch": 0.5895997727568161, "grad_norm": 0.83203125, "learning_rate": 0.00014661410416886013, "loss": 0.8786, "step": 22962 }, { "epoch": 0.5896254499527379, "grad_norm": 0.73828125, "learning_rate": 0.00014661015459665759, "loss": 0.7405, "step": 22963 }, { "epoch": 0.5896511271486597, "grad_norm": 0.765625, "learning_rate": 0.0001466062049315652, "loss": 0.8916, "step": 22964 }, { "epoch": 0.5896768043445816, "grad_norm": 0.7734375, "learning_rate": 0.00014660225517359088, "loss": 0.9308, "step": 22965 }, { "epoch": 0.5897024815405034, "grad_norm": 0.734375, "learning_rate": 0.0001465983053227425, "loss": 0.8032, "step": 22966 }, { "epoch": 0.5897281587364251, "grad_norm": 0.8046875, "learning_rate": 0.00014659435537902783, "loss": 0.8321, "step": 22967 }, { "epoch": 0.589753835932347, "grad_norm": 1.0, "learning_rate": 0.00014659040534245488, "loss": 0.9147, "step": 22968 }, { "epoch": 0.5897795131282688, "grad_norm": 0.6796875, "learning_rate": 0.00014658645521303145, "loss": 0.8422, "step": 22969 }, { "epoch": 0.5898051903241907, "grad_norm": 0.75390625, "learning_rate": 0.00014658250499076538, "loss": 0.7459, "step": 22970 }, { "epoch": 0.5898308675201125, "grad_norm": 0.84765625, "learning_rate": 0.00014657855467566463, "loss": 0.8848, "step": 22971 }, { "epoch": 0.5898565447160343, "grad_norm": 0.78125, "learning_rate": 0.00014657460426773704, "loss": 0.9206, "step": 22972 }, { "epoch": 0.589882221911956, "grad_norm": 0.74609375, "learning_rate": 0.00014657065376699041, "loss": 0.876, "step": 22973 }, { "epoch": 0.5899078991078779, "grad_norm": 0.7890625, "learning_rate": 0.00014656670317343275, "loss": 0.9468, "step": 22974 }, { "epoch": 0.5899335763037997, "grad_norm": 0.8203125, "learning_rate": 0.0001465627524870718, "loss": 0.8129, "step": 22975 }, { "epoch": 0.5899592534997216, "grad_norm": 0.7109375, "learning_rate": 0.00014655880170791552, "loss": 0.7611, "step": 22976 }, { "epoch": 0.5899849306956434, "grad_norm": 0.765625, "learning_rate": 0.00014655485083597174, "loss": 0.7233, "step": 22977 }, { "epoch": 0.5900106078915652, "grad_norm": 0.7578125, "learning_rate": 0.00014655089987124833, "loss": 0.7742, "step": 22978 }, { "epoch": 0.5900362850874871, "grad_norm": 0.75, "learning_rate": 0.0001465469488137532, "loss": 0.9841, "step": 22979 }, { "epoch": 0.5900619622834088, "grad_norm": 0.72265625, "learning_rate": 0.0001465429976634942, "loss": 0.8253, "step": 22980 }, { "epoch": 0.5900876394793306, "grad_norm": 0.70703125, "learning_rate": 0.00014653904642047925, "loss": 0.83, "step": 22981 }, { "epoch": 0.5901133166752525, "grad_norm": 0.74609375, "learning_rate": 0.00014653509508471613, "loss": 0.766, "step": 22982 }, { "epoch": 0.5901389938711743, "grad_norm": 0.8125, "learning_rate": 0.00014653114365621278, "loss": 0.9354, "step": 22983 }, { "epoch": 0.5901646710670961, "grad_norm": 0.73828125, "learning_rate": 0.0001465271921349771, "loss": 0.919, "step": 22984 }, { "epoch": 0.590190348263018, "grad_norm": 0.7890625, "learning_rate": 0.0001465232405210169, "loss": 0.8047, "step": 22985 }, { "epoch": 0.5902160254589398, "grad_norm": 0.69921875, "learning_rate": 0.00014651928881434008, "loss": 0.8444, "step": 22986 }, { "epoch": 0.5902417026548615, "grad_norm": 0.7578125, "learning_rate": 0.00014651533701495455, "loss": 0.8422, "step": 22987 }, { "epoch": 0.5902673798507834, "grad_norm": 0.75390625, "learning_rate": 0.0001465113851228681, "loss": 0.8155, "step": 22988 }, { "epoch": 0.5902930570467052, "grad_norm": 0.765625, "learning_rate": 0.0001465074331380887, "loss": 0.8115, "step": 22989 }, { "epoch": 0.590318734242627, "grad_norm": 0.83203125, "learning_rate": 0.0001465034810606242, "loss": 0.848, "step": 22990 }, { "epoch": 0.5903444114385489, "grad_norm": 0.81640625, "learning_rate": 0.00014649952889048243, "loss": 0.8743, "step": 22991 }, { "epoch": 0.5903700886344707, "grad_norm": 0.81640625, "learning_rate": 0.0001464955766276713, "loss": 0.8121, "step": 22992 }, { "epoch": 0.5903957658303924, "grad_norm": 0.8359375, "learning_rate": 0.00014649162427219872, "loss": 0.8615, "step": 22993 }, { "epoch": 0.5904214430263143, "grad_norm": 0.83203125, "learning_rate": 0.0001464876718240725, "loss": 0.8613, "step": 22994 }, { "epoch": 0.5904471202222361, "grad_norm": 0.7578125, "learning_rate": 0.00014648371928330055, "loss": 0.8944, "step": 22995 }, { "epoch": 0.590472797418158, "grad_norm": 0.7578125, "learning_rate": 0.00014647976664989077, "loss": 0.8189, "step": 22996 }, { "epoch": 0.5904984746140798, "grad_norm": 0.7734375, "learning_rate": 0.000146475813923851, "loss": 0.7911, "step": 22997 }, { "epoch": 0.5905241518100016, "grad_norm": 0.78125, "learning_rate": 0.00014647186110518912, "loss": 0.8511, "step": 22998 }, { "epoch": 0.5905498290059235, "grad_norm": 0.7421875, "learning_rate": 0.000146467908193913, "loss": 0.8178, "step": 22999 }, { "epoch": 0.5905755062018452, "grad_norm": 0.74609375, "learning_rate": 0.0001464639551900306, "loss": 0.8476, "step": 23000 }, { "epoch": 0.5905755062018452, "eval_loss": 0.8621730208396912, "eval_runtime": 391.923, "eval_samples_per_second": 25.515, "eval_steps_per_second": 0.799, "step": 23000 }, { "epoch": 0.590601183397767, "grad_norm": 0.76953125, "learning_rate": 0.00014646000209354968, "loss": 0.746, "step": 23001 }, { "epoch": 0.5906268605936889, "grad_norm": 0.7578125, "learning_rate": 0.0001464560489044782, "loss": 0.7591, "step": 23002 }, { "epoch": 0.5906525377896107, "grad_norm": 0.796875, "learning_rate": 0.00014645209562282402, "loss": 0.9481, "step": 23003 }, { "epoch": 0.5906782149855325, "grad_norm": 0.81640625, "learning_rate": 0.00014644814224859496, "loss": 0.8734, "step": 23004 }, { "epoch": 0.5907038921814544, "grad_norm": 0.73828125, "learning_rate": 0.000146444188781799, "loss": 0.7481, "step": 23005 }, { "epoch": 0.5907295693773762, "grad_norm": 0.78125, "learning_rate": 0.00014644023522244394, "loss": 0.9059, "step": 23006 }, { "epoch": 0.5907552465732979, "grad_norm": 0.79296875, "learning_rate": 0.0001464362815705377, "loss": 0.9199, "step": 23007 }, { "epoch": 0.5907809237692198, "grad_norm": 0.75, "learning_rate": 0.00014643232782608813, "loss": 0.8031, "step": 23008 }, { "epoch": 0.5908066009651416, "grad_norm": 0.75, "learning_rate": 0.00014642837398910316, "loss": 1.0016, "step": 23009 }, { "epoch": 0.5908322781610634, "grad_norm": 0.8515625, "learning_rate": 0.00014642442005959058, "loss": 0.8877, "step": 23010 }, { "epoch": 0.5908579553569853, "grad_norm": 0.73828125, "learning_rate": 0.00014642046603755837, "loss": 0.8038, "step": 23011 }, { "epoch": 0.5908836325529071, "grad_norm": 0.84375, "learning_rate": 0.0001464165119230143, "loss": 0.9201, "step": 23012 }, { "epoch": 0.5909093097488288, "grad_norm": 0.7421875, "learning_rate": 0.00014641255771596638, "loss": 0.8513, "step": 23013 }, { "epoch": 0.5909349869447507, "grad_norm": 0.765625, "learning_rate": 0.00014640860341642243, "loss": 0.9406, "step": 23014 }, { "epoch": 0.5909606641406725, "grad_norm": 0.8125, "learning_rate": 0.0001464046490243903, "loss": 0.9762, "step": 23015 }, { "epoch": 0.5909863413365943, "grad_norm": 0.8203125, "learning_rate": 0.00014640069453987787, "loss": 0.895, "step": 23016 }, { "epoch": 0.5910120185325162, "grad_norm": 0.796875, "learning_rate": 0.00014639673996289312, "loss": 0.8512, "step": 23017 }, { "epoch": 0.591037695728438, "grad_norm": 0.7734375, "learning_rate": 0.0001463927852934438, "loss": 0.8962, "step": 23018 }, { "epoch": 0.5910633729243598, "grad_norm": 0.76953125, "learning_rate": 0.00014638883053153784, "loss": 0.8217, "step": 23019 }, { "epoch": 0.5910890501202816, "grad_norm": 0.80078125, "learning_rate": 0.00014638487567718316, "loss": 0.8719, "step": 23020 }, { "epoch": 0.5911147273162034, "grad_norm": 0.75390625, "learning_rate": 0.00014638092073038762, "loss": 0.7874, "step": 23021 }, { "epoch": 0.5911404045121252, "grad_norm": 0.8125, "learning_rate": 0.00014637696569115907, "loss": 0.9057, "step": 23022 }, { "epoch": 0.5911660817080471, "grad_norm": 0.78125, "learning_rate": 0.00014637301055950543, "loss": 0.8308, "step": 23023 }, { "epoch": 0.5911917589039689, "grad_norm": 0.7890625, "learning_rate": 0.00014636905533543457, "loss": 0.8643, "step": 23024 }, { "epoch": 0.5912174360998907, "grad_norm": 0.6796875, "learning_rate": 0.00014636510001895435, "loss": 0.7303, "step": 23025 }, { "epoch": 0.5912431132958126, "grad_norm": 0.765625, "learning_rate": 0.00014636114461007269, "loss": 0.8082, "step": 23026 }, { "epoch": 0.5912687904917343, "grad_norm": 0.734375, "learning_rate": 0.00014635718910879748, "loss": 0.8215, "step": 23027 }, { "epoch": 0.5912944676876561, "grad_norm": 0.6875, "learning_rate": 0.00014635323351513653, "loss": 0.7981, "step": 23028 }, { "epoch": 0.591320144883578, "grad_norm": 0.796875, "learning_rate": 0.00014634927782909783, "loss": 0.8464, "step": 23029 }, { "epoch": 0.5913458220794998, "grad_norm": 0.75390625, "learning_rate": 0.00014634532205068919, "loss": 0.7013, "step": 23030 }, { "epoch": 0.5913714992754217, "grad_norm": 0.84765625, "learning_rate": 0.00014634136617991845, "loss": 0.9444, "step": 23031 }, { "epoch": 0.5913971764713435, "grad_norm": 0.8828125, "learning_rate": 0.0001463374102167936, "loss": 0.9214, "step": 23032 }, { "epoch": 0.5914228536672652, "grad_norm": 0.8671875, "learning_rate": 0.0001463334541613225, "loss": 0.8499, "step": 23033 }, { "epoch": 0.591448530863187, "grad_norm": 0.87890625, "learning_rate": 0.000146329498013513, "loss": 0.7896, "step": 23034 }, { "epoch": 0.5914742080591089, "grad_norm": 0.72265625, "learning_rate": 0.00014632554177337296, "loss": 0.9047, "step": 23035 }, { "epoch": 0.5914998852550307, "grad_norm": 0.72265625, "learning_rate": 0.00014632158544091033, "loss": 0.8621, "step": 23036 }, { "epoch": 0.5915255624509526, "grad_norm": 0.73828125, "learning_rate": 0.00014631762901613294, "loss": 0.88, "step": 23037 }, { "epoch": 0.5915512396468744, "grad_norm": 0.80859375, "learning_rate": 0.00014631367249904874, "loss": 1.0037, "step": 23038 }, { "epoch": 0.5915769168427962, "grad_norm": 0.78515625, "learning_rate": 0.00014630971588966554, "loss": 0.9264, "step": 23039 }, { "epoch": 0.591602594038718, "grad_norm": 0.78515625, "learning_rate": 0.00014630575918799128, "loss": 0.8267, "step": 23040 }, { "epoch": 0.5916282712346398, "grad_norm": 0.7734375, "learning_rate": 0.0001463018023940338, "loss": 0.7935, "step": 23041 }, { "epoch": 0.5916539484305616, "grad_norm": 0.828125, "learning_rate": 0.00014629784550780105, "loss": 0.8472, "step": 23042 }, { "epoch": 0.5916796256264835, "grad_norm": 0.73046875, "learning_rate": 0.00014629388852930084, "loss": 0.7845, "step": 23043 }, { "epoch": 0.5917053028224053, "grad_norm": 0.8046875, "learning_rate": 0.0001462899314585411, "loss": 0.8555, "step": 23044 }, { "epoch": 0.5917309800183271, "grad_norm": 0.76171875, "learning_rate": 0.00014628597429552968, "loss": 0.9038, "step": 23045 }, { "epoch": 0.591756657214249, "grad_norm": 0.7890625, "learning_rate": 0.00014628201704027457, "loss": 0.9346, "step": 23046 }, { "epoch": 0.5917823344101707, "grad_norm": 0.78515625, "learning_rate": 0.00014627805969278352, "loss": 0.8692, "step": 23047 }, { "epoch": 0.5918080116060925, "grad_norm": 0.7421875, "learning_rate": 0.00014627410225306447, "loss": 0.7117, "step": 23048 }, { "epoch": 0.5918336888020144, "grad_norm": 0.7265625, "learning_rate": 0.00014627014472112538, "loss": 0.81, "step": 23049 }, { "epoch": 0.5918593659979362, "grad_norm": 0.68359375, "learning_rate": 0.000146266187096974, "loss": 0.8483, "step": 23050 }, { "epoch": 0.591885043193858, "grad_norm": 0.78515625, "learning_rate": 0.00014626222938061832, "loss": 0.8484, "step": 23051 }, { "epoch": 0.5919107203897799, "grad_norm": 0.90625, "learning_rate": 0.00014625827157206618, "loss": 0.7581, "step": 23052 }, { "epoch": 0.5919363975857016, "grad_norm": 0.8125, "learning_rate": 0.00014625431367132553, "loss": 0.9994, "step": 23053 }, { "epoch": 0.5919620747816234, "grad_norm": 0.765625, "learning_rate": 0.00014625035567840416, "loss": 0.8771, "step": 23054 }, { "epoch": 0.5919877519775453, "grad_norm": 0.859375, "learning_rate": 0.00014624639759331, "loss": 0.9142, "step": 23055 }, { "epoch": 0.5920134291734671, "grad_norm": 0.82421875, "learning_rate": 0.00014624243941605098, "loss": 0.9578, "step": 23056 }, { "epoch": 0.592039106369389, "grad_norm": 0.75390625, "learning_rate": 0.00014623848114663496, "loss": 0.855, "step": 23057 }, { "epoch": 0.5920647835653108, "grad_norm": 0.7421875, "learning_rate": 0.0001462345227850698, "loss": 0.7136, "step": 23058 }, { "epoch": 0.5920904607612326, "grad_norm": 0.80859375, "learning_rate": 0.00014623056433136342, "loss": 0.9748, "step": 23059 }, { "epoch": 0.5921161379571543, "grad_norm": 0.796875, "learning_rate": 0.00014622660578552372, "loss": 0.7558, "step": 23060 }, { "epoch": 0.5921418151530762, "grad_norm": 0.83984375, "learning_rate": 0.00014622264714755855, "loss": 0.9139, "step": 23061 }, { "epoch": 0.592167492348998, "grad_norm": 0.7890625, "learning_rate": 0.00014621868841747584, "loss": 0.9273, "step": 23062 }, { "epoch": 0.5921931695449199, "grad_norm": 0.71484375, "learning_rate": 0.00014621472959528342, "loss": 0.9422, "step": 23063 }, { "epoch": 0.5922188467408417, "grad_norm": 0.8203125, "learning_rate": 0.00014621077068098924, "loss": 0.8857, "step": 23064 }, { "epoch": 0.5922445239367635, "grad_norm": 0.77734375, "learning_rate": 0.00014620681167460116, "loss": 0.8015, "step": 23065 }, { "epoch": 0.5922702011326854, "grad_norm": 0.80078125, "learning_rate": 0.0001462028525761271, "loss": 0.8262, "step": 23066 }, { "epoch": 0.5922958783286071, "grad_norm": 0.796875, "learning_rate": 0.00014619889338557494, "loss": 0.9073, "step": 23067 }, { "epoch": 0.5923215555245289, "grad_norm": 0.921875, "learning_rate": 0.0001461949341029525, "loss": 0.973, "step": 23068 }, { "epoch": 0.5923472327204508, "grad_norm": 0.75390625, "learning_rate": 0.00014619097472826778, "loss": 0.9525, "step": 23069 }, { "epoch": 0.5923729099163726, "grad_norm": 0.78515625, "learning_rate": 0.0001461870152615286, "loss": 0.9169, "step": 23070 }, { "epoch": 0.5923985871122944, "grad_norm": 0.828125, "learning_rate": 0.00014618305570274285, "loss": 0.9671, "step": 23071 }, { "epoch": 0.5924242643082163, "grad_norm": 0.8125, "learning_rate": 0.00014617909605191848, "loss": 0.8714, "step": 23072 }, { "epoch": 0.592449941504138, "grad_norm": 0.74609375, "learning_rate": 0.00014617513630906334, "loss": 0.8097, "step": 23073 }, { "epoch": 0.5924756187000598, "grad_norm": 0.78125, "learning_rate": 0.00014617117647418532, "loss": 0.9666, "step": 23074 }, { "epoch": 0.5925012958959817, "grad_norm": 0.765625, "learning_rate": 0.0001461672165472923, "loss": 0.8166, "step": 23075 }, { "epoch": 0.5925269730919035, "grad_norm": 0.78515625, "learning_rate": 0.00014616325652839221, "loss": 0.8195, "step": 23076 }, { "epoch": 0.5925526502878253, "grad_norm": 0.79296875, "learning_rate": 0.00014615929641749288, "loss": 0.897, "step": 23077 }, { "epoch": 0.5925783274837472, "grad_norm": 0.77734375, "learning_rate": 0.0001461553362146023, "loss": 1.0052, "step": 23078 }, { "epoch": 0.592604004679669, "grad_norm": 0.828125, "learning_rate": 0.00014615137591972828, "loss": 0.9006, "step": 23079 }, { "epoch": 0.5926296818755907, "grad_norm": 0.7734375, "learning_rate": 0.00014614741553287873, "loss": 0.8106, "step": 23080 }, { "epoch": 0.5926553590715126, "grad_norm": 0.84375, "learning_rate": 0.00014614345505406152, "loss": 1.0553, "step": 23081 }, { "epoch": 0.5926810362674344, "grad_norm": 0.78125, "learning_rate": 0.0001461394944832846, "loss": 0.9198, "step": 23082 }, { "epoch": 0.5927067134633562, "grad_norm": 0.828125, "learning_rate": 0.00014613553382055586, "loss": 0.9052, "step": 23083 }, { "epoch": 0.5927323906592781, "grad_norm": 0.703125, "learning_rate": 0.00014613157306588313, "loss": 0.8815, "step": 23084 }, { "epoch": 0.5927580678551999, "grad_norm": 0.87109375, "learning_rate": 0.00014612761221927435, "loss": 0.9097, "step": 23085 }, { "epoch": 0.5927837450511217, "grad_norm": 0.77734375, "learning_rate": 0.00014612365128073744, "loss": 0.9651, "step": 23086 }, { "epoch": 0.5928094222470435, "grad_norm": 0.7578125, "learning_rate": 0.0001461196902502802, "loss": 1.0846, "step": 23087 }, { "epoch": 0.5928350994429653, "grad_norm": 0.7890625, "learning_rate": 0.00014611572912791063, "loss": 0.8225, "step": 23088 }, { "epoch": 0.5928607766388871, "grad_norm": 0.75390625, "learning_rate": 0.00014611176791363655, "loss": 0.9031, "step": 23089 }, { "epoch": 0.592886453834809, "grad_norm": 0.8203125, "learning_rate": 0.0001461078066074659, "loss": 0.9554, "step": 23090 }, { "epoch": 0.5929121310307308, "grad_norm": 0.7421875, "learning_rate": 0.00014610384520940655, "loss": 0.7522, "step": 23091 }, { "epoch": 0.5929378082266527, "grad_norm": 0.7734375, "learning_rate": 0.0001460998837194664, "loss": 0.9158, "step": 23092 }, { "epoch": 0.5929634854225744, "grad_norm": 0.78125, "learning_rate": 0.00014609592213765335, "loss": 0.8651, "step": 23093 }, { "epoch": 0.5929891626184962, "grad_norm": 0.8203125, "learning_rate": 0.0001460919604639753, "loss": 0.8814, "step": 23094 }, { "epoch": 0.593014839814418, "grad_norm": 0.84765625, "learning_rate": 0.00014608799869844012, "loss": 0.9524, "step": 23095 }, { "epoch": 0.5930405170103399, "grad_norm": 0.80078125, "learning_rate": 0.00014608403684105572, "loss": 0.9232, "step": 23096 }, { "epoch": 0.5930661942062617, "grad_norm": 0.8125, "learning_rate": 0.00014608007489183003, "loss": 0.7519, "step": 23097 }, { "epoch": 0.5930918714021836, "grad_norm": 0.7421875, "learning_rate": 0.00014607611285077086, "loss": 0.6918, "step": 23098 }, { "epoch": 0.5931175485981054, "grad_norm": 0.7890625, "learning_rate": 0.0001460721507178862, "loss": 0.7769, "step": 23099 }, { "epoch": 0.5931432257940271, "grad_norm": 0.734375, "learning_rate": 0.00014606818849318386, "loss": 0.8679, "step": 23100 }, { "epoch": 0.593168902989949, "grad_norm": 0.7890625, "learning_rate": 0.00014606422617667185, "loss": 0.9415, "step": 23101 }, { "epoch": 0.5931945801858708, "grad_norm": 0.7578125, "learning_rate": 0.00014606026376835795, "loss": 1.0096, "step": 23102 }, { "epoch": 0.5932202573817926, "grad_norm": 0.78125, "learning_rate": 0.0001460563012682501, "loss": 0.8326, "step": 23103 }, { "epoch": 0.5932459345777145, "grad_norm": 0.80078125, "learning_rate": 0.00014605233867635625, "loss": 0.9003, "step": 23104 }, { "epoch": 0.5932716117736363, "grad_norm": 0.82421875, "learning_rate": 0.0001460483759926842, "loss": 0.9282, "step": 23105 }, { "epoch": 0.5932972889695581, "grad_norm": 0.734375, "learning_rate": 0.0001460444132172419, "loss": 0.8697, "step": 23106 }, { "epoch": 0.5933229661654799, "grad_norm": 0.75, "learning_rate": 0.00014604045035003727, "loss": 0.7725, "step": 23107 }, { "epoch": 0.5933486433614017, "grad_norm": 0.76171875, "learning_rate": 0.00014603648739107815, "loss": 0.8377, "step": 23108 }, { "epoch": 0.5933743205573235, "grad_norm": 0.80078125, "learning_rate": 0.00014603252434037249, "loss": 0.8976, "step": 23109 }, { "epoch": 0.5933999977532454, "grad_norm": 0.80078125, "learning_rate": 0.0001460285611979282, "loss": 0.8209, "step": 23110 }, { "epoch": 0.5934256749491672, "grad_norm": 0.76171875, "learning_rate": 0.00014602459796375308, "loss": 0.9716, "step": 23111 }, { "epoch": 0.593451352145089, "grad_norm": 0.890625, "learning_rate": 0.00014602063463785514, "loss": 0.941, "step": 23112 }, { "epoch": 0.5934770293410108, "grad_norm": 0.85546875, "learning_rate": 0.0001460166712202422, "loss": 0.9788, "step": 23113 }, { "epoch": 0.5935027065369326, "grad_norm": 0.7734375, "learning_rate": 0.00014601270771092222, "loss": 0.8116, "step": 23114 }, { "epoch": 0.5935283837328544, "grad_norm": 0.7109375, "learning_rate": 0.00014600874410990305, "loss": 0.8317, "step": 23115 }, { "epoch": 0.5935540609287763, "grad_norm": 0.75, "learning_rate": 0.0001460047804171926, "loss": 0.8212, "step": 23116 }, { "epoch": 0.5935797381246981, "grad_norm": 0.72265625, "learning_rate": 0.0001460008166327988, "loss": 0.805, "step": 23117 }, { "epoch": 0.59360541532062, "grad_norm": 0.859375, "learning_rate": 0.00014599685275672953, "loss": 0.8545, "step": 23118 }, { "epoch": 0.5936310925165418, "grad_norm": 0.8125, "learning_rate": 0.00014599288878899266, "loss": 0.7701, "step": 23119 }, { "epoch": 0.5936567697124635, "grad_norm": 0.83203125, "learning_rate": 0.00014598892472959615, "loss": 0.9612, "step": 23120 }, { "epoch": 0.5936824469083853, "grad_norm": 0.76171875, "learning_rate": 0.00014598496057854785, "loss": 0.9143, "step": 23121 }, { "epoch": 0.5937081241043072, "grad_norm": 0.72265625, "learning_rate": 0.00014598099633585568, "loss": 0.8527, "step": 23122 }, { "epoch": 0.593733801300229, "grad_norm": 0.75390625, "learning_rate": 0.00014597703200152756, "loss": 0.8058, "step": 23123 }, { "epoch": 0.5937594784961508, "grad_norm": 0.74609375, "learning_rate": 0.0001459730675755713, "loss": 0.8476, "step": 23124 }, { "epoch": 0.5937851556920727, "grad_norm": 0.8125, "learning_rate": 0.00014596910305799494, "loss": 0.9054, "step": 23125 }, { "epoch": 0.5938108328879945, "grad_norm": 0.75390625, "learning_rate": 0.00014596513844880628, "loss": 0.8998, "step": 23126 }, { "epoch": 0.5938365100839162, "grad_norm": 0.73828125, "learning_rate": 0.00014596117374801324, "loss": 0.8465, "step": 23127 }, { "epoch": 0.5938621872798381, "grad_norm": 0.734375, "learning_rate": 0.00014595720895562377, "loss": 0.8442, "step": 23128 }, { "epoch": 0.5938878644757599, "grad_norm": 0.7265625, "learning_rate": 0.0001459532440716457, "loss": 0.9071, "step": 23129 }, { "epoch": 0.5939135416716818, "grad_norm": 0.828125, "learning_rate": 0.000145949279096087, "loss": 0.7619, "step": 23130 }, { "epoch": 0.5939392188676036, "grad_norm": 0.75390625, "learning_rate": 0.0001459453140289555, "loss": 0.8116, "step": 23131 }, { "epoch": 0.5939648960635254, "grad_norm": 0.7265625, "learning_rate": 0.00014594134887025914, "loss": 0.8488, "step": 23132 }, { "epoch": 0.5939905732594472, "grad_norm": 0.7421875, "learning_rate": 0.00014593738362000583, "loss": 0.8451, "step": 23133 }, { "epoch": 0.594016250455369, "grad_norm": 0.86328125, "learning_rate": 0.0001459334182782035, "loss": 0.8365, "step": 23134 }, { "epoch": 0.5940419276512908, "grad_norm": 0.7734375, "learning_rate": 0.00014592945284485996, "loss": 0.8534, "step": 23135 }, { "epoch": 0.5940676048472127, "grad_norm": 0.85546875, "learning_rate": 0.0001459254873199832, "loss": 0.8525, "step": 23136 }, { "epoch": 0.5940932820431345, "grad_norm": 0.734375, "learning_rate": 0.00014592152170358108, "loss": 0.706, "step": 23137 }, { "epoch": 0.5941189592390563, "grad_norm": 0.7265625, "learning_rate": 0.00014591755599566153, "loss": 0.8278, "step": 23138 }, { "epoch": 0.5941446364349782, "grad_norm": 0.734375, "learning_rate": 0.00014591359019623245, "loss": 0.9142, "step": 23139 }, { "epoch": 0.5941703136308999, "grad_norm": 0.8046875, "learning_rate": 0.0001459096243053017, "loss": 0.7561, "step": 23140 }, { "epoch": 0.5941959908268217, "grad_norm": 0.8046875, "learning_rate": 0.00014590565832287724, "loss": 0.7729, "step": 23141 }, { "epoch": 0.5942216680227436, "grad_norm": 0.7734375, "learning_rate": 0.00014590169224896696, "loss": 0.9258, "step": 23142 }, { "epoch": 0.5942473452186654, "grad_norm": 0.7890625, "learning_rate": 0.0001458977260835787, "loss": 0.7936, "step": 23143 }, { "epoch": 0.5942730224145872, "grad_norm": 0.76953125, "learning_rate": 0.00014589375982672044, "loss": 0.9287, "step": 23144 }, { "epoch": 0.5942986996105091, "grad_norm": 0.80078125, "learning_rate": 0.00014588979347840008, "loss": 0.8055, "step": 23145 }, { "epoch": 0.5943243768064308, "grad_norm": 0.765625, "learning_rate": 0.00014588582703862552, "loss": 0.7808, "step": 23146 }, { "epoch": 0.5943500540023526, "grad_norm": 0.8125, "learning_rate": 0.00014588186050740464, "loss": 0.9362, "step": 23147 }, { "epoch": 0.5943757311982745, "grad_norm": 0.796875, "learning_rate": 0.00014587789388474534, "loss": 0.8029, "step": 23148 }, { "epoch": 0.5944014083941963, "grad_norm": 0.80078125, "learning_rate": 0.00014587392717065552, "loss": 0.8605, "step": 23149 }, { "epoch": 0.5944270855901181, "grad_norm": 0.77734375, "learning_rate": 0.00014586996036514315, "loss": 0.8775, "step": 23150 }, { "epoch": 0.59445276278604, "grad_norm": 0.75, "learning_rate": 0.00014586599346821607, "loss": 0.9038, "step": 23151 }, { "epoch": 0.5944784399819618, "grad_norm": 0.7109375, "learning_rate": 0.00014586202647988222, "loss": 0.8807, "step": 23152 }, { "epoch": 0.5945041171778835, "grad_norm": 0.70703125, "learning_rate": 0.0001458580594001495, "loss": 0.7704, "step": 23153 }, { "epoch": 0.5945297943738054, "grad_norm": 0.77734375, "learning_rate": 0.0001458540922290258, "loss": 0.8325, "step": 23154 }, { "epoch": 0.5945554715697272, "grad_norm": 0.7578125, "learning_rate": 0.00014585012496651903, "loss": 0.7496, "step": 23155 }, { "epoch": 0.594581148765649, "grad_norm": 0.75, "learning_rate": 0.00014584615761263712, "loss": 0.8021, "step": 23156 }, { "epoch": 0.5946068259615709, "grad_norm": 0.8046875, "learning_rate": 0.00014584219016738794, "loss": 0.8547, "step": 23157 }, { "epoch": 0.5946325031574927, "grad_norm": 0.7421875, "learning_rate": 0.00014583822263077942, "loss": 0.9033, "step": 23158 }, { "epoch": 0.5946581803534146, "grad_norm": 0.765625, "learning_rate": 0.0001458342550028195, "loss": 0.8641, "step": 23159 }, { "epoch": 0.5946838575493363, "grad_norm": 0.76953125, "learning_rate": 0.000145830287283516, "loss": 0.8261, "step": 23160 }, { "epoch": 0.5947095347452581, "grad_norm": 0.80859375, "learning_rate": 0.0001458263194728769, "loss": 0.8827, "step": 23161 }, { "epoch": 0.59473521194118, "grad_norm": 0.78125, "learning_rate": 0.00014582235157091007, "loss": 0.8215, "step": 23162 }, { "epoch": 0.5947608891371018, "grad_norm": 0.76953125, "learning_rate": 0.00014581838357762345, "loss": 0.7998, "step": 23163 }, { "epoch": 0.5947865663330236, "grad_norm": 0.765625, "learning_rate": 0.0001458144154930249, "loss": 0.8369, "step": 23164 }, { "epoch": 0.5948122435289455, "grad_norm": 0.79296875, "learning_rate": 0.0001458104473171224, "loss": 0.976, "step": 23165 }, { "epoch": 0.5948379207248672, "grad_norm": 0.89453125, "learning_rate": 0.00014580647904992379, "loss": 1.0442, "step": 23166 }, { "epoch": 0.594863597920789, "grad_norm": 0.7421875, "learning_rate": 0.000145802510691437, "loss": 0.9052, "step": 23167 }, { "epoch": 0.5948892751167109, "grad_norm": 0.76171875, "learning_rate": 0.00014579854224166998, "loss": 0.9363, "step": 23168 }, { "epoch": 0.5949149523126327, "grad_norm": 0.79296875, "learning_rate": 0.00014579457370063055, "loss": 0.8773, "step": 23169 }, { "epoch": 0.5949406295085545, "grad_norm": 0.796875, "learning_rate": 0.0001457906050683267, "loss": 0.8647, "step": 23170 }, { "epoch": 0.5949663067044764, "grad_norm": 0.82421875, "learning_rate": 0.00014578663634476634, "loss": 0.9438, "step": 23171 }, { "epoch": 0.5949919839003982, "grad_norm": 0.75390625, "learning_rate": 0.00014578266752995732, "loss": 0.7805, "step": 23172 }, { "epoch": 0.5950176610963199, "grad_norm": 0.68359375, "learning_rate": 0.00014577869862390756, "loss": 0.8304, "step": 23173 }, { "epoch": 0.5950433382922418, "grad_norm": 0.7734375, "learning_rate": 0.000145774729626625, "loss": 0.8697, "step": 23174 }, { "epoch": 0.5950690154881636, "grad_norm": 0.7890625, "learning_rate": 0.00014577076053811755, "loss": 0.7847, "step": 23175 }, { "epoch": 0.5950946926840854, "grad_norm": 0.71484375, "learning_rate": 0.0001457667913583931, "loss": 0.8541, "step": 23176 }, { "epoch": 0.5951203698800073, "grad_norm": 0.76171875, "learning_rate": 0.00014576282208745957, "loss": 0.9455, "step": 23177 }, { "epoch": 0.5951460470759291, "grad_norm": 0.75390625, "learning_rate": 0.0001457588527253249, "loss": 0.7001, "step": 23178 }, { "epoch": 0.5951717242718509, "grad_norm": 0.9140625, "learning_rate": 0.00014575488327199694, "loss": 0.9108, "step": 23179 }, { "epoch": 0.5951974014677727, "grad_norm": 0.77734375, "learning_rate": 0.00014575091372748361, "loss": 0.8739, "step": 23180 }, { "epoch": 0.5952230786636945, "grad_norm": 0.796875, "learning_rate": 0.00014574694409179287, "loss": 0.9191, "step": 23181 }, { "epoch": 0.5952487558596163, "grad_norm": 0.80078125, "learning_rate": 0.0001457429743649326, "loss": 0.8377, "step": 23182 }, { "epoch": 0.5952744330555382, "grad_norm": 0.796875, "learning_rate": 0.0001457390045469107, "loss": 0.9169, "step": 23183 }, { "epoch": 0.59530011025146, "grad_norm": 0.8125, "learning_rate": 0.00014573503463773512, "loss": 0.8268, "step": 23184 }, { "epoch": 0.5953257874473818, "grad_norm": 0.80859375, "learning_rate": 0.00014573106463741374, "loss": 0.8883, "step": 23185 }, { "epoch": 0.5953514646433036, "grad_norm": 0.85546875, "learning_rate": 0.0001457270945459545, "loss": 0.8776, "step": 23186 }, { "epoch": 0.5953771418392254, "grad_norm": 1.0078125, "learning_rate": 0.00014572312436336523, "loss": 0.7838, "step": 23187 }, { "epoch": 0.5954028190351472, "grad_norm": 0.80859375, "learning_rate": 0.00014571915408965392, "loss": 0.7304, "step": 23188 }, { "epoch": 0.5954284962310691, "grad_norm": 0.859375, "learning_rate": 0.00014571518372482852, "loss": 0.8598, "step": 23189 }, { "epoch": 0.5954541734269909, "grad_norm": 0.75390625, "learning_rate": 0.00014571121326889682, "loss": 0.9159, "step": 23190 }, { "epoch": 0.5954798506229128, "grad_norm": 0.76171875, "learning_rate": 0.00014570724272186685, "loss": 0.9159, "step": 23191 }, { "epoch": 0.5955055278188346, "grad_norm": 0.70703125, "learning_rate": 0.00014570327208374647, "loss": 0.7895, "step": 23192 }, { "epoch": 0.5955312050147563, "grad_norm": 0.80078125, "learning_rate": 0.00014569930135454356, "loss": 0.8975, "step": 23193 }, { "epoch": 0.5955568822106782, "grad_norm": 0.70703125, "learning_rate": 0.0001456953305342661, "loss": 0.7901, "step": 23194 }, { "epoch": 0.5955825594066, "grad_norm": 0.7421875, "learning_rate": 0.00014569135962292198, "loss": 0.7715, "step": 23195 }, { "epoch": 0.5956082366025218, "grad_norm": 0.74609375, "learning_rate": 0.00014568738862051907, "loss": 0.8346, "step": 23196 }, { "epoch": 0.5956339137984437, "grad_norm": 0.78125, "learning_rate": 0.00014568341752706535, "loss": 0.9523, "step": 23197 }, { "epoch": 0.5956595909943655, "grad_norm": 0.75, "learning_rate": 0.0001456794463425687, "loss": 0.8427, "step": 23198 }, { "epoch": 0.5956852681902873, "grad_norm": 0.84375, "learning_rate": 0.00014567547506703704, "loss": 0.9077, "step": 23199 }, { "epoch": 0.5957109453862091, "grad_norm": 0.796875, "learning_rate": 0.0001456715037004783, "loss": 0.8786, "step": 23200 }, { "epoch": 0.5957366225821309, "grad_norm": 0.7265625, "learning_rate": 0.00014566753224290035, "loss": 0.9127, "step": 23201 }, { "epoch": 0.5957622997780527, "grad_norm": 0.79296875, "learning_rate": 0.00014566356069431116, "loss": 0.8439, "step": 23202 }, { "epoch": 0.5957879769739746, "grad_norm": 0.796875, "learning_rate": 0.0001456595890547186, "loss": 0.8513, "step": 23203 }, { "epoch": 0.5958136541698964, "grad_norm": 0.67578125, "learning_rate": 0.00014565561732413063, "loss": 0.8423, "step": 23204 }, { "epoch": 0.5958393313658182, "grad_norm": 0.73828125, "learning_rate": 0.0001456516455025551, "loss": 0.8861, "step": 23205 }, { "epoch": 0.59586500856174, "grad_norm": 0.80859375, "learning_rate": 0.00014564767359, "loss": 0.8486, "step": 23206 }, { "epoch": 0.5958906857576618, "grad_norm": 0.77734375, "learning_rate": 0.00014564370158647318, "loss": 0.9112, "step": 23207 }, { "epoch": 0.5959163629535836, "grad_norm": 0.765625, "learning_rate": 0.0001456397294919826, "loss": 0.8753, "step": 23208 }, { "epoch": 0.5959420401495055, "grad_norm": 0.6953125, "learning_rate": 0.00014563575730653614, "loss": 0.7918, "step": 23209 }, { "epoch": 0.5959677173454273, "grad_norm": 0.66796875, "learning_rate": 0.00014563178503014177, "loss": 0.8367, "step": 23210 }, { "epoch": 0.5959933945413491, "grad_norm": 0.7109375, "learning_rate": 0.0001456278126628074, "loss": 0.8503, "step": 23211 }, { "epoch": 0.596019071737271, "grad_norm": 0.7421875, "learning_rate": 0.00014562384020454084, "loss": 0.7748, "step": 23212 }, { "epoch": 0.5960447489331927, "grad_norm": 0.75390625, "learning_rate": 0.00014561986765535014, "loss": 0.8527, "step": 23213 }, { "epoch": 0.5960704261291145, "grad_norm": 0.765625, "learning_rate": 0.00014561589501524313, "loss": 0.8263, "step": 23214 }, { "epoch": 0.5960961033250364, "grad_norm": 0.74609375, "learning_rate": 0.00014561192228422783, "loss": 0.7245, "step": 23215 }, { "epoch": 0.5961217805209582, "grad_norm": 0.76953125, "learning_rate": 0.000145607949462312, "loss": 0.8288, "step": 23216 }, { "epoch": 0.59614745771688, "grad_norm": 0.76953125, "learning_rate": 0.00014560397654950373, "loss": 0.8042, "step": 23217 }, { "epoch": 0.5961731349128019, "grad_norm": 0.7890625, "learning_rate": 0.0001456000035458108, "loss": 0.915, "step": 23218 }, { "epoch": 0.5961988121087237, "grad_norm": 0.7109375, "learning_rate": 0.00014559603045124123, "loss": 0.8547, "step": 23219 }, { "epoch": 0.5962244893046454, "grad_norm": 0.78125, "learning_rate": 0.00014559205726580283, "loss": 0.9365, "step": 23220 }, { "epoch": 0.5962501665005673, "grad_norm": 0.75390625, "learning_rate": 0.0001455880839895036, "loss": 0.7994, "step": 23221 }, { "epoch": 0.5962758436964891, "grad_norm": 0.75, "learning_rate": 0.00014558411062235146, "loss": 0.8516, "step": 23222 }, { "epoch": 0.596301520892411, "grad_norm": 0.734375, "learning_rate": 0.00014558013716435428, "loss": 0.7813, "step": 23223 }, { "epoch": 0.5963271980883328, "grad_norm": 0.8203125, "learning_rate": 0.00014557616361552002, "loss": 0.9846, "step": 23224 }, { "epoch": 0.5963528752842546, "grad_norm": 0.75390625, "learning_rate": 0.0001455721899758566, "loss": 0.89, "step": 23225 }, { "epoch": 0.5963785524801763, "grad_norm": 0.70703125, "learning_rate": 0.00014556821624537187, "loss": 0.7442, "step": 23226 }, { "epoch": 0.5964042296760982, "grad_norm": 0.8203125, "learning_rate": 0.00014556424242407383, "loss": 0.8421, "step": 23227 }, { "epoch": 0.59642990687202, "grad_norm": 0.7109375, "learning_rate": 0.00014556026851197034, "loss": 0.7783, "step": 23228 }, { "epoch": 0.5964555840679419, "grad_norm": 0.78515625, "learning_rate": 0.0001455562945090694, "loss": 0.9931, "step": 23229 }, { "epoch": 0.5964812612638637, "grad_norm": 0.7734375, "learning_rate": 0.00014555232041537885, "loss": 0.8432, "step": 23230 }, { "epoch": 0.5965069384597855, "grad_norm": 0.79296875, "learning_rate": 0.00014554834623090667, "loss": 0.9104, "step": 23231 }, { "epoch": 0.5965326156557074, "grad_norm": 0.8359375, "learning_rate": 0.00014554437195566072, "loss": 0.9593, "step": 23232 }, { "epoch": 0.5965582928516291, "grad_norm": 0.703125, "learning_rate": 0.00014554039758964894, "loss": 0.7752, "step": 23233 }, { "epoch": 0.5965839700475509, "grad_norm": 0.75390625, "learning_rate": 0.0001455364231328793, "loss": 0.8846, "step": 23234 }, { "epoch": 0.5966096472434728, "grad_norm": 0.80078125, "learning_rate": 0.00014553244858535967, "loss": 0.8447, "step": 23235 }, { "epoch": 0.5966353244393946, "grad_norm": 1.0, "learning_rate": 0.00014552847394709798, "loss": 0.8914, "step": 23236 }, { "epoch": 0.5966610016353164, "grad_norm": 0.734375, "learning_rate": 0.00014552449921810217, "loss": 0.8346, "step": 23237 }, { "epoch": 0.5966866788312383, "grad_norm": 0.765625, "learning_rate": 0.0001455205243983801, "loss": 0.7587, "step": 23238 }, { "epoch": 0.5967123560271601, "grad_norm": 0.796875, "learning_rate": 0.00014551654948793976, "loss": 0.8523, "step": 23239 }, { "epoch": 0.5967380332230818, "grad_norm": 0.84765625, "learning_rate": 0.00014551257448678904, "loss": 1.0012, "step": 23240 }, { "epoch": 0.5967637104190037, "grad_norm": 0.8046875, "learning_rate": 0.0001455085993949359, "loss": 0.8157, "step": 23241 }, { "epoch": 0.5967893876149255, "grad_norm": 0.80859375, "learning_rate": 0.0001455046242123882, "loss": 0.9054, "step": 23242 }, { "epoch": 0.5968150648108473, "grad_norm": 0.8203125, "learning_rate": 0.00014550064893915392, "loss": 0.9572, "step": 23243 }, { "epoch": 0.5968407420067692, "grad_norm": 0.8671875, "learning_rate": 0.00014549667357524094, "loss": 0.9327, "step": 23244 }, { "epoch": 0.596866419202691, "grad_norm": 0.7578125, "learning_rate": 0.0001454926981206572, "loss": 0.7808, "step": 23245 }, { "epoch": 0.5968920963986127, "grad_norm": 0.76953125, "learning_rate": 0.00014548872257541062, "loss": 0.8634, "step": 23246 }, { "epoch": 0.5969177735945346, "grad_norm": 0.7734375, "learning_rate": 0.00014548474693950914, "loss": 1.0206, "step": 23247 }, { "epoch": 0.5969434507904564, "grad_norm": 0.85546875, "learning_rate": 0.00014548077121296068, "loss": 0.8181, "step": 23248 }, { "epoch": 0.5969691279863782, "grad_norm": 0.74609375, "learning_rate": 0.0001454767953957731, "loss": 0.7918, "step": 23249 }, { "epoch": 0.5969948051823001, "grad_norm": 0.7265625, "learning_rate": 0.00014547281948795442, "loss": 0.8765, "step": 23250 }, { "epoch": 0.5970204823782219, "grad_norm": 0.68359375, "learning_rate": 0.00014546884348951247, "loss": 0.7348, "step": 23251 }, { "epoch": 0.5970461595741438, "grad_norm": 0.7734375, "learning_rate": 0.0001454648674004553, "loss": 0.9442, "step": 23252 }, { "epoch": 0.5970718367700655, "grad_norm": 0.77734375, "learning_rate": 0.0001454608912207907, "loss": 0.8584, "step": 23253 }, { "epoch": 0.5970975139659873, "grad_norm": 0.8046875, "learning_rate": 0.00014545691495052664, "loss": 0.8447, "step": 23254 }, { "epoch": 0.5971231911619092, "grad_norm": 0.7890625, "learning_rate": 0.0001454529385896711, "loss": 0.8273, "step": 23255 }, { "epoch": 0.597148868357831, "grad_norm": 0.79296875, "learning_rate": 0.0001454489621382319, "loss": 0.8778, "step": 23256 }, { "epoch": 0.5971745455537528, "grad_norm": 0.890625, "learning_rate": 0.00014544498559621708, "loss": 0.9051, "step": 23257 }, { "epoch": 0.5972002227496747, "grad_norm": 0.796875, "learning_rate": 0.00014544100896363447, "loss": 0.7942, "step": 23258 }, { "epoch": 0.5972258999455965, "grad_norm": 0.7578125, "learning_rate": 0.00014543703224049207, "loss": 0.8532, "step": 23259 }, { "epoch": 0.5972515771415182, "grad_norm": 0.734375, "learning_rate": 0.00014543305542679774, "loss": 0.8932, "step": 23260 }, { "epoch": 0.5972772543374401, "grad_norm": 0.7578125, "learning_rate": 0.00014542907852255945, "loss": 1.0044, "step": 23261 }, { "epoch": 0.5973029315333619, "grad_norm": 0.85546875, "learning_rate": 0.0001454251015277851, "loss": 0.8754, "step": 23262 }, { "epoch": 0.5973286087292837, "grad_norm": 0.77734375, "learning_rate": 0.00014542112444248265, "loss": 0.9286, "step": 23263 }, { "epoch": 0.5973542859252056, "grad_norm": 0.7421875, "learning_rate": 0.00014541714726665997, "loss": 0.8782, "step": 23264 }, { "epoch": 0.5973799631211274, "grad_norm": 0.73828125, "learning_rate": 0.00014541317000032504, "loss": 0.8697, "step": 23265 }, { "epoch": 0.5974056403170491, "grad_norm": 0.75390625, "learning_rate": 0.00014540919264348573, "loss": 0.7967, "step": 23266 }, { "epoch": 0.597431317512971, "grad_norm": 0.859375, "learning_rate": 0.00014540521519615005, "loss": 0.8493, "step": 23267 }, { "epoch": 0.5974569947088928, "grad_norm": 0.73828125, "learning_rate": 0.00014540123765832585, "loss": 0.8135, "step": 23268 }, { "epoch": 0.5974826719048146, "grad_norm": 0.8359375, "learning_rate": 0.00014539726003002108, "loss": 0.8748, "step": 23269 }, { "epoch": 0.5975083491007365, "grad_norm": 0.828125, "learning_rate": 0.0001453932823112437, "loss": 0.9235, "step": 23270 }, { "epoch": 0.5975340262966583, "grad_norm": 0.79296875, "learning_rate": 0.00014538930450200158, "loss": 0.8732, "step": 23271 }, { "epoch": 0.5975597034925801, "grad_norm": 0.828125, "learning_rate": 0.00014538532660230268, "loss": 0.868, "step": 23272 }, { "epoch": 0.5975853806885019, "grad_norm": 0.82421875, "learning_rate": 0.00014538134861215491, "loss": 0.8916, "step": 23273 }, { "epoch": 0.5976110578844237, "grad_norm": 0.87890625, "learning_rate": 0.00014537737053156623, "loss": 0.9053, "step": 23274 }, { "epoch": 0.5976367350803455, "grad_norm": 0.77734375, "learning_rate": 0.00014537339236054457, "loss": 0.7913, "step": 23275 }, { "epoch": 0.5976624122762674, "grad_norm": 0.8046875, "learning_rate": 0.0001453694140990978, "loss": 0.9069, "step": 23276 }, { "epoch": 0.5976880894721892, "grad_norm": 0.8203125, "learning_rate": 0.00014536543574723392, "loss": 0.8794, "step": 23277 }, { "epoch": 0.597713766668111, "grad_norm": 0.78125, "learning_rate": 0.0001453614573049608, "loss": 0.8094, "step": 23278 }, { "epoch": 0.5977394438640329, "grad_norm": 0.73046875, "learning_rate": 0.0001453574787722864, "loss": 0.8693, "step": 23279 }, { "epoch": 0.5977651210599546, "grad_norm": 0.7265625, "learning_rate": 0.00014535350014921865, "loss": 0.7505, "step": 23280 }, { "epoch": 0.5977907982558764, "grad_norm": 0.78125, "learning_rate": 0.00014534952143576544, "loss": 0.8998, "step": 23281 }, { "epoch": 0.5978164754517983, "grad_norm": 0.75390625, "learning_rate": 0.00014534554263193476, "loss": 0.9534, "step": 23282 }, { "epoch": 0.5978421526477201, "grad_norm": 0.83203125, "learning_rate": 0.0001453415637377345, "loss": 0.8865, "step": 23283 }, { "epoch": 0.597867829843642, "grad_norm": 0.70703125, "learning_rate": 0.0001453375847531726, "loss": 0.7374, "step": 23284 }, { "epoch": 0.5978935070395638, "grad_norm": 0.78515625, "learning_rate": 0.000145333605678257, "loss": 0.8768, "step": 23285 }, { "epoch": 0.5979191842354855, "grad_norm": 0.8515625, "learning_rate": 0.0001453296265129956, "loss": 0.9043, "step": 23286 }, { "epoch": 0.5979448614314073, "grad_norm": 0.7265625, "learning_rate": 0.00014532564725739636, "loss": 0.8675, "step": 23287 }, { "epoch": 0.5979705386273292, "grad_norm": 0.84375, "learning_rate": 0.0001453216679114672, "loss": 0.8636, "step": 23288 }, { "epoch": 0.597996215823251, "grad_norm": 0.7890625, "learning_rate": 0.00014531768847521602, "loss": 0.8064, "step": 23289 }, { "epoch": 0.5980218930191729, "grad_norm": 0.75, "learning_rate": 0.00014531370894865084, "loss": 0.8057, "step": 23290 }, { "epoch": 0.5980475702150947, "grad_norm": 0.72265625, "learning_rate": 0.0001453097293317795, "loss": 0.9123, "step": 23291 }, { "epoch": 0.5980732474110165, "grad_norm": 0.78125, "learning_rate": 0.00014530574962460998, "loss": 0.8646, "step": 23292 }, { "epoch": 0.5980989246069383, "grad_norm": 0.80859375, "learning_rate": 0.00014530176982715016, "loss": 0.8733, "step": 23293 }, { "epoch": 0.5981246018028601, "grad_norm": 0.90234375, "learning_rate": 0.00014529778993940798, "loss": 0.8175, "step": 23294 }, { "epoch": 0.5981502789987819, "grad_norm": 0.8046875, "learning_rate": 0.00014529380996139145, "loss": 0.8918, "step": 23295 }, { "epoch": 0.5981759561947038, "grad_norm": 0.8046875, "learning_rate": 0.00014528982989310847, "loss": 0.8095, "step": 23296 }, { "epoch": 0.5982016333906256, "grad_norm": 0.73828125, "learning_rate": 0.0001452858497345669, "loss": 0.7931, "step": 23297 }, { "epoch": 0.5982273105865474, "grad_norm": 0.78125, "learning_rate": 0.00014528186948577473, "loss": 0.8438, "step": 23298 }, { "epoch": 0.5982529877824693, "grad_norm": 0.75, "learning_rate": 0.00014527788914673988, "loss": 0.7399, "step": 23299 }, { "epoch": 0.598278664978391, "grad_norm": 0.78125, "learning_rate": 0.00014527390871747032, "loss": 0.7914, "step": 23300 }, { "epoch": 0.5983043421743128, "grad_norm": 0.765625, "learning_rate": 0.00014526992819797392, "loss": 0.9919, "step": 23301 }, { "epoch": 0.5983300193702347, "grad_norm": 0.7734375, "learning_rate": 0.00014526594758825862, "loss": 0.7493, "step": 23302 }, { "epoch": 0.5983556965661565, "grad_norm": 0.8046875, "learning_rate": 0.00014526196688833244, "loss": 0.8804, "step": 23303 }, { "epoch": 0.5983813737620783, "grad_norm": 0.796875, "learning_rate": 0.0001452579860982032, "loss": 0.7727, "step": 23304 }, { "epoch": 0.5984070509580002, "grad_norm": 0.75, "learning_rate": 0.00014525400521787888, "loss": 0.9135, "step": 23305 }, { "epoch": 0.5984327281539219, "grad_norm": 0.77734375, "learning_rate": 0.00014525002424736744, "loss": 0.8286, "step": 23306 }, { "epoch": 0.5984584053498437, "grad_norm": 0.8203125, "learning_rate": 0.0001452460431866768, "loss": 0.8362, "step": 23307 }, { "epoch": 0.5984840825457656, "grad_norm": 0.7578125, "learning_rate": 0.00014524206203581484, "loss": 0.8278, "step": 23308 }, { "epoch": 0.5985097597416874, "grad_norm": 0.8046875, "learning_rate": 0.00014523808079478957, "loss": 0.7552, "step": 23309 }, { "epoch": 0.5985354369376092, "grad_norm": 0.80859375, "learning_rate": 0.0001452340994636089, "loss": 0.8911, "step": 23310 }, { "epoch": 0.5985611141335311, "grad_norm": 0.78125, "learning_rate": 0.00014523011804228072, "loss": 0.7939, "step": 23311 }, { "epoch": 0.5985867913294529, "grad_norm": 0.77734375, "learning_rate": 0.00014522613653081303, "loss": 0.8601, "step": 23312 }, { "epoch": 0.5986124685253746, "grad_norm": 0.76953125, "learning_rate": 0.0001452221549292137, "loss": 0.9154, "step": 23313 }, { "epoch": 0.5986381457212965, "grad_norm": 0.75, "learning_rate": 0.00014521817323749072, "loss": 0.7316, "step": 23314 }, { "epoch": 0.5986638229172183, "grad_norm": 0.7578125, "learning_rate": 0.00014521419145565201, "loss": 0.7656, "step": 23315 }, { "epoch": 0.5986895001131401, "grad_norm": 0.71875, "learning_rate": 0.0001452102095837055, "loss": 0.7886, "step": 23316 }, { "epoch": 0.598715177309062, "grad_norm": 0.765625, "learning_rate": 0.00014520622762165914, "loss": 0.7707, "step": 23317 }, { "epoch": 0.5987408545049838, "grad_norm": 0.88671875, "learning_rate": 0.0001452022455695208, "loss": 0.7976, "step": 23318 }, { "epoch": 0.5987665317009057, "grad_norm": 0.72265625, "learning_rate": 0.00014519826342729853, "loss": 0.819, "step": 23319 }, { "epoch": 0.5987922088968274, "grad_norm": 0.83984375, "learning_rate": 0.0001451942811950002, "loss": 0.8003, "step": 23320 }, { "epoch": 0.5988178860927492, "grad_norm": 0.8359375, "learning_rate": 0.00014519029887263368, "loss": 0.8722, "step": 23321 }, { "epoch": 0.598843563288671, "grad_norm": 0.81640625, "learning_rate": 0.00014518631646020703, "loss": 1.0247, "step": 23322 }, { "epoch": 0.5988692404845929, "grad_norm": 0.78125, "learning_rate": 0.00014518233395772814, "loss": 0.8639, "step": 23323 }, { "epoch": 0.5988949176805147, "grad_norm": 0.7421875, "learning_rate": 0.00014517835136520493, "loss": 0.8675, "step": 23324 }, { "epoch": 0.5989205948764366, "grad_norm": 0.80078125, "learning_rate": 0.00014517436868264537, "loss": 0.7865, "step": 23325 }, { "epoch": 0.5989462720723583, "grad_norm": 0.7578125, "learning_rate": 0.00014517038591005732, "loss": 0.8495, "step": 23326 }, { "epoch": 0.5989719492682801, "grad_norm": 0.7265625, "learning_rate": 0.0001451664030474488, "loss": 0.847, "step": 23327 }, { "epoch": 0.598997626464202, "grad_norm": 0.68359375, "learning_rate": 0.0001451624200948277, "loss": 0.7112, "step": 23328 }, { "epoch": 0.5990233036601238, "grad_norm": 0.69140625, "learning_rate": 0.00014515843705220201, "loss": 0.9072, "step": 23329 }, { "epoch": 0.5990489808560456, "grad_norm": 0.7734375, "learning_rate": 0.00014515445391957962, "loss": 0.9089, "step": 23330 }, { "epoch": 0.5990746580519675, "grad_norm": 0.828125, "learning_rate": 0.00014515047069696847, "loss": 0.7738, "step": 23331 }, { "epoch": 0.5991003352478893, "grad_norm": 0.86328125, "learning_rate": 0.0001451464873843765, "loss": 0.8417, "step": 23332 }, { "epoch": 0.599126012443811, "grad_norm": 0.8203125, "learning_rate": 0.00014514250398181167, "loss": 0.9999, "step": 23333 }, { "epoch": 0.5991516896397329, "grad_norm": 0.7578125, "learning_rate": 0.00014513852048928194, "loss": 0.9047, "step": 23334 }, { "epoch": 0.5991773668356547, "grad_norm": 0.7578125, "learning_rate": 0.00014513453690679517, "loss": 0.8703, "step": 23335 }, { "epoch": 0.5992030440315765, "grad_norm": 0.7890625, "learning_rate": 0.0001451305532343594, "loss": 0.8755, "step": 23336 }, { "epoch": 0.5992287212274984, "grad_norm": 0.75, "learning_rate": 0.00014512656947198246, "loss": 0.7051, "step": 23337 }, { "epoch": 0.5992543984234202, "grad_norm": 0.796875, "learning_rate": 0.00014512258561967238, "loss": 0.8336, "step": 23338 }, { "epoch": 0.5992800756193419, "grad_norm": 0.74609375, "learning_rate": 0.00014511860167743703, "loss": 0.8335, "step": 23339 }, { "epoch": 0.5993057528152638, "grad_norm": 0.79296875, "learning_rate": 0.0001451146176452844, "loss": 0.8224, "step": 23340 }, { "epoch": 0.5993314300111856, "grad_norm": 0.828125, "learning_rate": 0.0001451106335232224, "loss": 1.015, "step": 23341 }, { "epoch": 0.5993571072071074, "grad_norm": 0.8203125, "learning_rate": 0.00014510664931125902, "loss": 0.875, "step": 23342 }, { "epoch": 0.5993827844030293, "grad_norm": 0.74609375, "learning_rate": 0.00014510266500940213, "loss": 0.9334, "step": 23343 }, { "epoch": 0.5994084615989511, "grad_norm": 0.7265625, "learning_rate": 0.00014509868061765972, "loss": 0.8123, "step": 23344 }, { "epoch": 0.599434138794873, "grad_norm": 0.78515625, "learning_rate": 0.00014509469613603967, "loss": 0.8607, "step": 23345 }, { "epoch": 0.5994598159907947, "grad_norm": 0.90234375, "learning_rate": 0.00014509071156455002, "loss": 0.9291, "step": 23346 }, { "epoch": 0.5994854931867165, "grad_norm": 0.72265625, "learning_rate": 0.00014508672690319863, "loss": 0.8621, "step": 23347 }, { "epoch": 0.5995111703826383, "grad_norm": 0.796875, "learning_rate": 0.00014508274215199344, "loss": 0.8664, "step": 23348 }, { "epoch": 0.5995368475785602, "grad_norm": 0.73828125, "learning_rate": 0.00014507875731094248, "loss": 0.871, "step": 23349 }, { "epoch": 0.599562524774482, "grad_norm": 0.8046875, "learning_rate": 0.00014507477238005358, "loss": 0.9054, "step": 23350 }, { "epoch": 0.5995882019704039, "grad_norm": 0.74609375, "learning_rate": 0.00014507078735933476, "loss": 0.8527, "step": 23351 }, { "epoch": 0.5996138791663257, "grad_norm": 0.72265625, "learning_rate": 0.0001450668022487939, "loss": 0.8699, "step": 23352 }, { "epoch": 0.5996395563622474, "grad_norm": 0.78515625, "learning_rate": 0.00014506281704843897, "loss": 0.8618, "step": 23353 }, { "epoch": 0.5996652335581693, "grad_norm": 0.75, "learning_rate": 0.00014505883175827794, "loss": 0.8746, "step": 23354 }, { "epoch": 0.5996909107540911, "grad_norm": 0.75, "learning_rate": 0.00014505484637831873, "loss": 0.846, "step": 23355 }, { "epoch": 0.5997165879500129, "grad_norm": 0.83203125, "learning_rate": 0.00014505086090856926, "loss": 1.0232, "step": 23356 }, { "epoch": 0.5997422651459348, "grad_norm": 0.75390625, "learning_rate": 0.00014504687534903752, "loss": 0.9108, "step": 23357 }, { "epoch": 0.5997679423418566, "grad_norm": 0.796875, "learning_rate": 0.0001450428896997314, "loss": 0.8448, "step": 23358 }, { "epoch": 0.5997936195377783, "grad_norm": 0.83984375, "learning_rate": 0.00014503890396065888, "loss": 0.9082, "step": 23359 }, { "epoch": 0.5998192967337002, "grad_norm": 0.8046875, "learning_rate": 0.0001450349181318279, "loss": 0.884, "step": 23360 }, { "epoch": 0.599844973929622, "grad_norm": 0.76171875, "learning_rate": 0.0001450309322132464, "loss": 0.8952, "step": 23361 }, { "epoch": 0.5998706511255438, "grad_norm": 0.7890625, "learning_rate": 0.00014502694620492233, "loss": 0.8392, "step": 23362 }, { "epoch": 0.5998963283214657, "grad_norm": 0.859375, "learning_rate": 0.00014502296010686358, "loss": 0.9688, "step": 23363 }, { "epoch": 0.5999220055173875, "grad_norm": 0.8046875, "learning_rate": 0.00014501897391907818, "loss": 0.8365, "step": 23364 }, { "epoch": 0.5999476827133093, "grad_norm": 0.7890625, "learning_rate": 0.00014501498764157402, "loss": 0.7104, "step": 23365 }, { "epoch": 0.5999733599092311, "grad_norm": 0.74609375, "learning_rate": 0.00014501100127435902, "loss": 0.8445, "step": 23366 }, { "epoch": 0.5999990371051529, "grad_norm": 0.72265625, "learning_rate": 0.00014500701481744122, "loss": 0.877, "step": 23367 }, { "epoch": 0.6000247143010747, "grad_norm": 0.765625, "learning_rate": 0.0001450030282708285, "loss": 0.8621, "step": 23368 }, { "epoch": 0.6000503914969966, "grad_norm": 0.88671875, "learning_rate": 0.00014499904163452876, "loss": 0.805, "step": 23369 }, { "epoch": 0.6000760686929184, "grad_norm": 0.78515625, "learning_rate": 0.00014499505490855001, "loss": 0.8873, "step": 23370 }, { "epoch": 0.6001017458888402, "grad_norm": 0.7734375, "learning_rate": 0.00014499106809290018, "loss": 0.7673, "step": 23371 }, { "epoch": 0.6001274230847621, "grad_norm": 0.8671875, "learning_rate": 0.00014498708118758724, "loss": 0.9577, "step": 23372 }, { "epoch": 0.6001531002806838, "grad_norm": 0.85546875, "learning_rate": 0.0001449830941926191, "loss": 0.7974, "step": 23373 }, { "epoch": 0.6001787774766056, "grad_norm": 0.81640625, "learning_rate": 0.0001449791071080037, "loss": 0.8812, "step": 23374 }, { "epoch": 0.6002044546725275, "grad_norm": 0.80078125, "learning_rate": 0.00014497511993374904, "loss": 0.8343, "step": 23375 }, { "epoch": 0.6002301318684493, "grad_norm": 0.72265625, "learning_rate": 0.00014497113266986297, "loss": 0.9001, "step": 23376 }, { "epoch": 0.6002558090643711, "grad_norm": 0.75390625, "learning_rate": 0.00014496714531635354, "loss": 0.8985, "step": 23377 }, { "epoch": 0.600281486260293, "grad_norm": 0.7421875, "learning_rate": 0.00014496315787322864, "loss": 0.9547, "step": 23378 }, { "epoch": 0.6003071634562147, "grad_norm": 0.72265625, "learning_rate": 0.00014495917034049622, "loss": 0.8742, "step": 23379 }, { "epoch": 0.6003328406521365, "grad_norm": 0.796875, "learning_rate": 0.00014495518271816424, "loss": 1.0577, "step": 23380 }, { "epoch": 0.6003585178480584, "grad_norm": 0.8671875, "learning_rate": 0.00014495119500624066, "loss": 0.8353, "step": 23381 }, { "epoch": 0.6003841950439802, "grad_norm": 0.703125, "learning_rate": 0.0001449472072047334, "loss": 0.9049, "step": 23382 }, { "epoch": 0.600409872239902, "grad_norm": 0.734375, "learning_rate": 0.0001449432193136504, "loss": 0.8914, "step": 23383 }, { "epoch": 0.6004355494358239, "grad_norm": 0.79296875, "learning_rate": 0.00014493923133299965, "loss": 0.8183, "step": 23384 }, { "epoch": 0.6004612266317457, "grad_norm": 0.796875, "learning_rate": 0.00014493524326278902, "loss": 0.9175, "step": 23385 }, { "epoch": 0.6004869038276675, "grad_norm": 0.8203125, "learning_rate": 0.00014493125510302657, "loss": 0.9686, "step": 23386 }, { "epoch": 0.6005125810235893, "grad_norm": 0.7421875, "learning_rate": 0.00014492726685372013, "loss": 0.8426, "step": 23387 }, { "epoch": 0.6005382582195111, "grad_norm": 0.79296875, "learning_rate": 0.00014492327851487778, "loss": 0.9168, "step": 23388 }, { "epoch": 0.600563935415433, "grad_norm": 0.75, "learning_rate": 0.00014491929008650733, "loss": 0.918, "step": 23389 }, { "epoch": 0.6005896126113548, "grad_norm": 0.76171875, "learning_rate": 0.0001449153015686168, "loss": 0.9017, "step": 23390 }, { "epoch": 0.6006152898072766, "grad_norm": 0.84765625, "learning_rate": 0.00014491131296121416, "loss": 0.76, "step": 23391 }, { "epoch": 0.6006409670031985, "grad_norm": 0.84765625, "learning_rate": 0.0001449073242643073, "loss": 0.8577, "step": 23392 }, { "epoch": 0.6006666441991202, "grad_norm": 0.7734375, "learning_rate": 0.0001449033354779042, "loss": 0.8712, "step": 23393 }, { "epoch": 0.600692321395042, "grad_norm": 0.77734375, "learning_rate": 0.00014489934660201283, "loss": 0.8829, "step": 23394 }, { "epoch": 0.6007179985909639, "grad_norm": 0.77734375, "learning_rate": 0.00014489535763664111, "loss": 0.9193, "step": 23395 }, { "epoch": 0.6007436757868857, "grad_norm": 0.9375, "learning_rate": 0.00014489136858179702, "loss": 0.8058, "step": 23396 }, { "epoch": 0.6007693529828075, "grad_norm": 0.8515625, "learning_rate": 0.00014488737943748844, "loss": 0.9832, "step": 23397 }, { "epoch": 0.6007950301787294, "grad_norm": 0.765625, "learning_rate": 0.0001448833902037234, "loss": 0.7628, "step": 23398 }, { "epoch": 0.6008207073746511, "grad_norm": 0.7109375, "learning_rate": 0.0001448794008805098, "loss": 0.8989, "step": 23399 }, { "epoch": 0.6008463845705729, "grad_norm": 0.80078125, "learning_rate": 0.00014487541146785562, "loss": 0.7966, "step": 23400 }, { "epoch": 0.6008720617664948, "grad_norm": 0.7578125, "learning_rate": 0.0001448714219657688, "loss": 0.8734, "step": 23401 }, { "epoch": 0.6008977389624166, "grad_norm": 0.76953125, "learning_rate": 0.0001448674323742573, "loss": 0.905, "step": 23402 }, { "epoch": 0.6009234161583384, "grad_norm": 0.73828125, "learning_rate": 0.00014486344269332903, "loss": 0.831, "step": 23403 }, { "epoch": 0.6009490933542603, "grad_norm": 0.80078125, "learning_rate": 0.000144859452922992, "loss": 0.9701, "step": 23404 }, { "epoch": 0.6009747705501821, "grad_norm": 0.7578125, "learning_rate": 0.0001448554630632541, "loss": 0.7835, "step": 23405 }, { "epoch": 0.6010004477461038, "grad_norm": 0.69921875, "learning_rate": 0.00014485147311412333, "loss": 0.8016, "step": 23406 }, { "epoch": 0.6010261249420257, "grad_norm": 0.796875, "learning_rate": 0.00014484748307560762, "loss": 0.8203, "step": 23407 }, { "epoch": 0.6010518021379475, "grad_norm": 0.7421875, "learning_rate": 0.00014484349294771495, "loss": 0.9104, "step": 23408 }, { "epoch": 0.6010774793338693, "grad_norm": 0.82421875, "learning_rate": 0.00014483950273045323, "loss": 0.8449, "step": 23409 }, { "epoch": 0.6011031565297912, "grad_norm": 0.7890625, "learning_rate": 0.00014483551242383045, "loss": 0.8649, "step": 23410 }, { "epoch": 0.601128833725713, "grad_norm": 0.72265625, "learning_rate": 0.0001448315220278545, "loss": 0.8516, "step": 23411 }, { "epoch": 0.6011545109216349, "grad_norm": 0.8203125, "learning_rate": 0.00014482753154253342, "loss": 0.8927, "step": 23412 }, { "epoch": 0.6011801881175566, "grad_norm": 0.7265625, "learning_rate": 0.0001448235409678751, "loss": 0.7768, "step": 23413 }, { "epoch": 0.6012058653134784, "grad_norm": 0.7109375, "learning_rate": 0.00014481955030388753, "loss": 0.8034, "step": 23414 }, { "epoch": 0.6012315425094003, "grad_norm": 0.74609375, "learning_rate": 0.00014481555955057863, "loss": 0.9366, "step": 23415 }, { "epoch": 0.6012572197053221, "grad_norm": 0.76171875, "learning_rate": 0.00014481156870795636, "loss": 0.8346, "step": 23416 }, { "epoch": 0.6012828969012439, "grad_norm": 0.796875, "learning_rate": 0.00014480757777602868, "loss": 0.888, "step": 23417 }, { "epoch": 0.6013085740971658, "grad_norm": 0.8203125, "learning_rate": 0.00014480358675480356, "loss": 0.9571, "step": 23418 }, { "epoch": 0.6013342512930875, "grad_norm": 0.78515625, "learning_rate": 0.00014479959564428895, "loss": 0.7987, "step": 23419 }, { "epoch": 0.6013599284890093, "grad_norm": 0.73828125, "learning_rate": 0.00014479560444449275, "loss": 0.8436, "step": 23420 }, { "epoch": 0.6013856056849312, "grad_norm": 0.82421875, "learning_rate": 0.00014479161315542302, "loss": 0.8907, "step": 23421 }, { "epoch": 0.601411282880853, "grad_norm": 0.86328125, "learning_rate": 0.0001447876217770876, "loss": 0.7635, "step": 23422 }, { "epoch": 0.6014369600767748, "grad_norm": 0.7578125, "learning_rate": 0.0001447836303094945, "loss": 0.8823, "step": 23423 }, { "epoch": 0.6014626372726967, "grad_norm": 0.8671875, "learning_rate": 0.00014477963875265167, "loss": 0.865, "step": 23424 }, { "epoch": 0.6014883144686185, "grad_norm": 0.734375, "learning_rate": 0.00014477564710656707, "loss": 0.9064, "step": 23425 }, { "epoch": 0.6015139916645402, "grad_norm": 0.74609375, "learning_rate": 0.00014477165537124867, "loss": 0.9235, "step": 23426 }, { "epoch": 0.6015396688604621, "grad_norm": 0.796875, "learning_rate": 0.0001447676635467044, "loss": 0.9126, "step": 23427 }, { "epoch": 0.6015653460563839, "grad_norm": 0.78515625, "learning_rate": 0.00014476367163294222, "loss": 0.9153, "step": 23428 }, { "epoch": 0.6015910232523057, "grad_norm": 0.73828125, "learning_rate": 0.0001447596796299701, "loss": 0.7824, "step": 23429 }, { "epoch": 0.6016167004482276, "grad_norm": 0.75, "learning_rate": 0.00014475568753779592, "loss": 0.8522, "step": 23430 }, { "epoch": 0.6016423776441494, "grad_norm": 0.703125, "learning_rate": 0.00014475169535642775, "loss": 0.7166, "step": 23431 }, { "epoch": 0.6016680548400712, "grad_norm": 0.80078125, "learning_rate": 0.00014474770308587346, "loss": 0.9812, "step": 23432 }, { "epoch": 0.601693732035993, "grad_norm": 0.87890625, "learning_rate": 0.00014474371072614105, "loss": 0.8679, "step": 23433 }, { "epoch": 0.6017194092319148, "grad_norm": 0.6875, "learning_rate": 0.0001447397182772385, "loss": 0.6627, "step": 23434 }, { "epoch": 0.6017450864278366, "grad_norm": 0.8046875, "learning_rate": 0.0001447357257391737, "loss": 0.7953, "step": 23435 }, { "epoch": 0.6017707636237585, "grad_norm": 0.79296875, "learning_rate": 0.00014473173311195465, "loss": 0.7718, "step": 23436 }, { "epoch": 0.6017964408196803, "grad_norm": 0.77734375, "learning_rate": 0.0001447277403955893, "loss": 0.7774, "step": 23437 }, { "epoch": 0.6018221180156021, "grad_norm": 0.75, "learning_rate": 0.00014472374759008557, "loss": 0.8043, "step": 23438 }, { "epoch": 0.6018477952115239, "grad_norm": 0.9140625, "learning_rate": 0.0001447197546954515, "loss": 0.786, "step": 23439 }, { "epoch": 0.6018734724074457, "grad_norm": 0.78125, "learning_rate": 0.00014471576171169498, "loss": 0.8342, "step": 23440 }, { "epoch": 0.6018991496033675, "grad_norm": 0.8046875, "learning_rate": 0.00014471176863882398, "loss": 0.8524, "step": 23441 }, { "epoch": 0.6019248267992894, "grad_norm": 0.84765625, "learning_rate": 0.00014470777547684647, "loss": 0.8997, "step": 23442 }, { "epoch": 0.6019505039952112, "grad_norm": 0.91796875, "learning_rate": 0.0001447037822257704, "loss": 0.8375, "step": 23443 }, { "epoch": 0.601976181191133, "grad_norm": 0.79296875, "learning_rate": 0.0001446997888856037, "loss": 0.9144, "step": 23444 }, { "epoch": 0.6020018583870549, "grad_norm": 0.80078125, "learning_rate": 0.0001446957954563544, "loss": 0.8413, "step": 23445 }, { "epoch": 0.6020275355829766, "grad_norm": 0.74609375, "learning_rate": 0.0001446918019380304, "loss": 0.8602, "step": 23446 }, { "epoch": 0.6020532127788984, "grad_norm": 0.72265625, "learning_rate": 0.00014468780833063968, "loss": 0.8295, "step": 23447 }, { "epoch": 0.6020788899748203, "grad_norm": 0.828125, "learning_rate": 0.0001446838146341902, "loss": 0.9579, "step": 23448 }, { "epoch": 0.6021045671707421, "grad_norm": 0.859375, "learning_rate": 0.00014467982084868992, "loss": 0.823, "step": 23449 }, { "epoch": 0.602130244366664, "grad_norm": 0.796875, "learning_rate": 0.0001446758269741468, "loss": 0.902, "step": 23450 }, { "epoch": 0.6021559215625858, "grad_norm": 0.87109375, "learning_rate": 0.00014467183301056875, "loss": 0.9288, "step": 23451 }, { "epoch": 0.6021815987585076, "grad_norm": 0.8046875, "learning_rate": 0.0001446678389579638, "loss": 0.9151, "step": 23452 }, { "epoch": 0.6022072759544294, "grad_norm": 0.75, "learning_rate": 0.00014466384481633986, "loss": 0.8835, "step": 23453 }, { "epoch": 0.6022329531503512, "grad_norm": 0.79296875, "learning_rate": 0.00014465985058570495, "loss": 0.852, "step": 23454 }, { "epoch": 0.602258630346273, "grad_norm": 0.79296875, "learning_rate": 0.00014465585626606697, "loss": 0.7351, "step": 23455 }, { "epoch": 0.6022843075421949, "grad_norm": 0.73046875, "learning_rate": 0.0001446518618574339, "loss": 0.7695, "step": 23456 }, { "epoch": 0.6023099847381167, "grad_norm": 0.74609375, "learning_rate": 0.0001446478673598137, "loss": 0.794, "step": 23457 }, { "epoch": 0.6023356619340385, "grad_norm": 0.70703125, "learning_rate": 0.00014464387277321434, "loss": 0.7213, "step": 23458 }, { "epoch": 0.6023613391299603, "grad_norm": 0.77734375, "learning_rate": 0.00014463987809764376, "loss": 0.7774, "step": 23459 }, { "epoch": 0.6023870163258821, "grad_norm": 0.734375, "learning_rate": 0.00014463588333310995, "loss": 0.6975, "step": 23460 }, { "epoch": 0.6024126935218039, "grad_norm": 0.7734375, "learning_rate": 0.00014463188847962087, "loss": 0.9045, "step": 23461 }, { "epoch": 0.6024383707177258, "grad_norm": 0.734375, "learning_rate": 0.00014462789353718444, "loss": 0.7834, "step": 23462 }, { "epoch": 0.6024640479136476, "grad_norm": 0.75390625, "learning_rate": 0.00014462389850580868, "loss": 0.8079, "step": 23463 }, { "epoch": 0.6024897251095694, "grad_norm": 0.76953125, "learning_rate": 0.0001446199033855015, "loss": 0.7146, "step": 23464 }, { "epoch": 0.6025154023054913, "grad_norm": 0.78515625, "learning_rate": 0.00014461590817627088, "loss": 0.7575, "step": 23465 }, { "epoch": 0.602541079501413, "grad_norm": 0.73046875, "learning_rate": 0.0001446119128781248, "loss": 1.0124, "step": 23466 }, { "epoch": 0.6025667566973348, "grad_norm": 0.70703125, "learning_rate": 0.00014460791749107117, "loss": 0.7624, "step": 23467 }, { "epoch": 0.6025924338932567, "grad_norm": 0.8046875, "learning_rate": 0.00014460392201511805, "loss": 0.8239, "step": 23468 }, { "epoch": 0.6026181110891785, "grad_norm": 0.77734375, "learning_rate": 0.00014459992645027333, "loss": 0.7729, "step": 23469 }, { "epoch": 0.6026437882851003, "grad_norm": 0.76953125, "learning_rate": 0.00014459593079654495, "loss": 0.9347, "step": 23470 }, { "epoch": 0.6026694654810222, "grad_norm": 0.70703125, "learning_rate": 0.00014459193505394092, "loss": 0.7462, "step": 23471 }, { "epoch": 0.602695142676944, "grad_norm": 0.7421875, "learning_rate": 0.0001445879392224692, "loss": 0.8462, "step": 23472 }, { "epoch": 0.6027208198728657, "grad_norm": 0.8359375, "learning_rate": 0.0001445839433021377, "loss": 1.0053, "step": 23473 }, { "epoch": 0.6027464970687876, "grad_norm": 0.7265625, "learning_rate": 0.0001445799472929545, "loss": 0.8835, "step": 23474 }, { "epoch": 0.6027721742647094, "grad_norm": 0.8515625, "learning_rate": 0.00014457595119492746, "loss": 0.903, "step": 23475 }, { "epoch": 0.6027978514606313, "grad_norm": 0.81640625, "learning_rate": 0.00014457195500806457, "loss": 0.9062, "step": 23476 }, { "epoch": 0.6028235286565531, "grad_norm": 0.90234375, "learning_rate": 0.00014456795873237383, "loss": 0.933, "step": 23477 }, { "epoch": 0.6028492058524749, "grad_norm": 0.75, "learning_rate": 0.00014456396236786314, "loss": 0.8142, "step": 23478 }, { "epoch": 0.6028748830483966, "grad_norm": 0.7734375, "learning_rate": 0.0001445599659145405, "loss": 0.8684, "step": 23479 }, { "epoch": 0.6029005602443185, "grad_norm": 0.74609375, "learning_rate": 0.0001445559693724139, "loss": 0.8985, "step": 23480 }, { "epoch": 0.6029262374402403, "grad_norm": 0.7890625, "learning_rate": 0.00014455197274149126, "loss": 0.8932, "step": 23481 }, { "epoch": 0.6029519146361622, "grad_norm": 0.8046875, "learning_rate": 0.00014454797602178056, "loss": 0.8146, "step": 23482 }, { "epoch": 0.602977591832084, "grad_norm": 0.734375, "learning_rate": 0.00014454397921328976, "loss": 0.9146, "step": 23483 }, { "epoch": 0.6030032690280058, "grad_norm": 0.74609375, "learning_rate": 0.00014453998231602687, "loss": 0.8482, "step": 23484 }, { "epoch": 0.6030289462239277, "grad_norm": 0.78125, "learning_rate": 0.00014453598532999977, "loss": 0.9448, "step": 23485 }, { "epoch": 0.6030546234198494, "grad_norm": 0.87109375, "learning_rate": 0.00014453198825521647, "loss": 0.929, "step": 23486 }, { "epoch": 0.6030803006157712, "grad_norm": 0.80078125, "learning_rate": 0.000144527991091685, "loss": 0.8558, "step": 23487 }, { "epoch": 0.6031059778116931, "grad_norm": 0.796875, "learning_rate": 0.00014452399383941322, "loss": 0.7749, "step": 23488 }, { "epoch": 0.6031316550076149, "grad_norm": 0.76171875, "learning_rate": 0.00014451999649840914, "loss": 0.9622, "step": 23489 }, { "epoch": 0.6031573322035367, "grad_norm": 0.7265625, "learning_rate": 0.00014451599906868077, "loss": 1.0348, "step": 23490 }, { "epoch": 0.6031830093994586, "grad_norm": 0.80859375, "learning_rate": 0.00014451200155023597, "loss": 0.8585, "step": 23491 }, { "epoch": 0.6032086865953804, "grad_norm": 0.75, "learning_rate": 0.00014450800394308285, "loss": 0.9669, "step": 23492 }, { "epoch": 0.6032343637913021, "grad_norm": 0.71875, "learning_rate": 0.00014450400624722924, "loss": 0.8323, "step": 23493 }, { "epoch": 0.603260040987224, "grad_norm": 0.69140625, "learning_rate": 0.00014450000846268318, "loss": 0.751, "step": 23494 }, { "epoch": 0.6032857181831458, "grad_norm": 0.890625, "learning_rate": 0.00014449601058945266, "loss": 0.7946, "step": 23495 }, { "epoch": 0.6033113953790676, "grad_norm": 0.828125, "learning_rate": 0.00014449201262754555, "loss": 0.797, "step": 23496 }, { "epoch": 0.6033370725749895, "grad_norm": 0.66796875, "learning_rate": 0.0001444880145769699, "loss": 0.8353, "step": 23497 }, { "epoch": 0.6033627497709113, "grad_norm": 0.765625, "learning_rate": 0.00014448401643773367, "loss": 0.8841, "step": 23498 }, { "epoch": 0.603388426966833, "grad_norm": 0.80859375, "learning_rate": 0.0001444800182098448, "loss": 0.9013, "step": 23499 }, { "epoch": 0.6034141041627549, "grad_norm": 0.71875, "learning_rate": 0.00014447601989331127, "loss": 0.9441, "step": 23500 }, { "epoch": 0.6034397813586767, "grad_norm": 0.74609375, "learning_rate": 0.00014447202148814105, "loss": 0.8567, "step": 23501 }, { "epoch": 0.6034654585545985, "grad_norm": 0.7421875, "learning_rate": 0.00014446802299434213, "loss": 0.8201, "step": 23502 }, { "epoch": 0.6034911357505204, "grad_norm": 0.828125, "learning_rate": 0.00014446402441192243, "loss": 0.9339, "step": 23503 }, { "epoch": 0.6035168129464422, "grad_norm": 0.77734375, "learning_rate": 0.00014446002574088995, "loss": 0.8334, "step": 23504 }, { "epoch": 0.603542490142364, "grad_norm": 0.73828125, "learning_rate": 0.00014445602698125265, "loss": 0.9343, "step": 23505 }, { "epoch": 0.6035681673382858, "grad_norm": 0.90625, "learning_rate": 0.00014445202813301853, "loss": 0.9394, "step": 23506 }, { "epoch": 0.6035938445342076, "grad_norm": 0.7578125, "learning_rate": 0.00014444802919619552, "loss": 0.9003, "step": 23507 }, { "epoch": 0.6036195217301294, "grad_norm": 0.81640625, "learning_rate": 0.00014444403017079162, "loss": 0.7661, "step": 23508 }, { "epoch": 0.6036451989260513, "grad_norm": 0.7265625, "learning_rate": 0.00014444003105681477, "loss": 0.7767, "step": 23509 }, { "epoch": 0.6036708761219731, "grad_norm": 0.80078125, "learning_rate": 0.00014443603185427292, "loss": 0.9284, "step": 23510 }, { "epoch": 0.603696553317895, "grad_norm": 0.81640625, "learning_rate": 0.0001444320325631741, "loss": 0.7329, "step": 23511 }, { "epoch": 0.6037222305138168, "grad_norm": 0.80078125, "learning_rate": 0.00014442803318352623, "loss": 0.7911, "step": 23512 }, { "epoch": 0.6037479077097385, "grad_norm": 0.84765625, "learning_rate": 0.00014442403371533734, "loss": 1.099, "step": 23513 }, { "epoch": 0.6037735849056604, "grad_norm": 0.79296875, "learning_rate": 0.00014442003415861537, "loss": 0.9188, "step": 23514 }, { "epoch": 0.6037992621015822, "grad_norm": 0.76171875, "learning_rate": 0.00014441603451336826, "loss": 0.8195, "step": 23515 }, { "epoch": 0.603824939297504, "grad_norm": 0.73828125, "learning_rate": 0.000144412034779604, "loss": 0.7624, "step": 23516 }, { "epoch": 0.6038506164934259, "grad_norm": 0.76953125, "learning_rate": 0.00014440803495733057, "loss": 0.9374, "step": 23517 }, { "epoch": 0.6038762936893477, "grad_norm": 0.7421875, "learning_rate": 0.00014440403504655593, "loss": 0.8217, "step": 23518 }, { "epoch": 0.6039019708852694, "grad_norm": 1.3984375, "learning_rate": 0.00014440003504728808, "loss": 0.848, "step": 23519 }, { "epoch": 0.6039276480811913, "grad_norm": 0.83984375, "learning_rate": 0.00014439603495953495, "loss": 1.0887, "step": 23520 }, { "epoch": 0.6039533252771131, "grad_norm": 0.69921875, "learning_rate": 0.00014439203478330454, "loss": 0.8481, "step": 23521 }, { "epoch": 0.6039790024730349, "grad_norm": 0.7265625, "learning_rate": 0.00014438803451860484, "loss": 0.841, "step": 23522 }, { "epoch": 0.6040046796689568, "grad_norm": 0.69921875, "learning_rate": 0.00014438403416544375, "loss": 0.8586, "step": 23523 }, { "epoch": 0.6040303568648786, "grad_norm": 0.75390625, "learning_rate": 0.00014438003372382934, "loss": 0.803, "step": 23524 }, { "epoch": 0.6040560340608004, "grad_norm": 0.7578125, "learning_rate": 0.0001443760331937695, "loss": 0.9392, "step": 23525 }, { "epoch": 0.6040817112567222, "grad_norm": 0.78515625, "learning_rate": 0.0001443720325752722, "loss": 0.7961, "step": 23526 }, { "epoch": 0.604107388452644, "grad_norm": 0.7578125, "learning_rate": 0.00014436803186834553, "loss": 0.8249, "step": 23527 }, { "epoch": 0.6041330656485658, "grad_norm": 0.73046875, "learning_rate": 0.00014436403107299733, "loss": 0.7732, "step": 23528 }, { "epoch": 0.6041587428444877, "grad_norm": 0.7890625, "learning_rate": 0.00014436003018923564, "loss": 0.872, "step": 23529 }, { "epoch": 0.6041844200404095, "grad_norm": 0.8125, "learning_rate": 0.00014435602921706838, "loss": 0.8637, "step": 23530 }, { "epoch": 0.6042100972363313, "grad_norm": 0.79296875, "learning_rate": 0.0001443520281565036, "loss": 0.7904, "step": 23531 }, { "epoch": 0.6042357744322532, "grad_norm": 0.84375, "learning_rate": 0.00014434802700754923, "loss": 0.7443, "step": 23532 }, { "epoch": 0.6042614516281749, "grad_norm": 0.80859375, "learning_rate": 0.00014434402577021325, "loss": 0.8989, "step": 23533 }, { "epoch": 0.6042871288240967, "grad_norm": 0.859375, "learning_rate": 0.0001443400244445036, "loss": 0.7901, "step": 23534 }, { "epoch": 0.6043128060200186, "grad_norm": 0.73046875, "learning_rate": 0.00014433602303042832, "loss": 0.8248, "step": 23535 }, { "epoch": 0.6043384832159404, "grad_norm": 0.71875, "learning_rate": 0.00014433202152799532, "loss": 0.81, "step": 23536 }, { "epoch": 0.6043641604118623, "grad_norm": 0.83203125, "learning_rate": 0.00014432801993721262, "loss": 0.875, "step": 23537 }, { "epoch": 0.6043898376077841, "grad_norm": 0.7734375, "learning_rate": 0.00014432401825808822, "loss": 0.7121, "step": 23538 }, { "epoch": 0.6044155148037058, "grad_norm": 0.8125, "learning_rate": 0.00014432001649063, "loss": 0.9134, "step": 23539 }, { "epoch": 0.6044411919996276, "grad_norm": 0.7890625, "learning_rate": 0.00014431601463484603, "loss": 0.8163, "step": 23540 }, { "epoch": 0.6044668691955495, "grad_norm": 0.81640625, "learning_rate": 0.00014431201269074422, "loss": 0.7659, "step": 23541 }, { "epoch": 0.6044925463914713, "grad_norm": 1.0390625, "learning_rate": 0.00014430801065833256, "loss": 1.0016, "step": 23542 }, { "epoch": 0.6045182235873932, "grad_norm": 0.86328125, "learning_rate": 0.00014430400853761906, "loss": 0.9281, "step": 23543 }, { "epoch": 0.604543900783315, "grad_norm": 0.7890625, "learning_rate": 0.00014430000632861166, "loss": 1.0529, "step": 23544 }, { "epoch": 0.6045695779792368, "grad_norm": 0.73046875, "learning_rate": 0.00014429600403131839, "loss": 0.9453, "step": 23545 }, { "epoch": 0.6045952551751586, "grad_norm": 0.72265625, "learning_rate": 0.00014429200164574717, "loss": 0.7868, "step": 23546 }, { "epoch": 0.6046209323710804, "grad_norm": 0.80859375, "learning_rate": 0.00014428799917190594, "loss": 1.078, "step": 23547 }, { "epoch": 0.6046466095670022, "grad_norm": 0.7578125, "learning_rate": 0.00014428399660980278, "loss": 0.8813, "step": 23548 }, { "epoch": 0.6046722867629241, "grad_norm": 0.765625, "learning_rate": 0.00014427999395944557, "loss": 0.9135, "step": 23549 }, { "epoch": 0.6046979639588459, "grad_norm": 0.765625, "learning_rate": 0.00014427599122084235, "loss": 0.7611, "step": 23550 }, { "epoch": 0.6047236411547677, "grad_norm": 0.77734375, "learning_rate": 0.0001442719883940011, "loss": 0.9976, "step": 23551 }, { "epoch": 0.6047493183506895, "grad_norm": 0.70703125, "learning_rate": 0.00014426798547892978, "loss": 0.903, "step": 23552 }, { "epoch": 0.6047749955466113, "grad_norm": 0.86328125, "learning_rate": 0.00014426398247563635, "loss": 0.7943, "step": 23553 }, { "epoch": 0.6048006727425331, "grad_norm": 0.8046875, "learning_rate": 0.0001442599793841288, "loss": 0.7886, "step": 23554 }, { "epoch": 0.604826349938455, "grad_norm": 0.7109375, "learning_rate": 0.0001442559762044151, "loss": 0.9368, "step": 23555 }, { "epoch": 0.6048520271343768, "grad_norm": 0.75390625, "learning_rate": 0.00014425197293650325, "loss": 0.8803, "step": 23556 }, { "epoch": 0.6048777043302986, "grad_norm": 0.82421875, "learning_rate": 0.00014424796958040123, "loss": 0.9271, "step": 23557 }, { "epoch": 0.6049033815262205, "grad_norm": 0.828125, "learning_rate": 0.00014424396613611697, "loss": 0.8705, "step": 23558 }, { "epoch": 0.6049290587221422, "grad_norm": 0.86328125, "learning_rate": 0.0001442399626036585, "loss": 0.9042, "step": 23559 }, { "epoch": 0.604954735918064, "grad_norm": 0.73046875, "learning_rate": 0.00014423595898303376, "loss": 0.8102, "step": 23560 }, { "epoch": 0.6049804131139859, "grad_norm": 0.77734375, "learning_rate": 0.0001442319552742508, "loss": 0.7532, "step": 23561 }, { "epoch": 0.6050060903099077, "grad_norm": 0.80078125, "learning_rate": 0.0001442279514773175, "loss": 0.8936, "step": 23562 }, { "epoch": 0.6050317675058295, "grad_norm": 0.79296875, "learning_rate": 0.0001442239475922419, "loss": 0.9799, "step": 23563 }, { "epoch": 0.6050574447017514, "grad_norm": 0.73828125, "learning_rate": 0.00014421994361903196, "loss": 0.8644, "step": 23564 }, { "epoch": 0.6050831218976732, "grad_norm": 0.78515625, "learning_rate": 0.0001442159395576957, "loss": 0.9065, "step": 23565 }, { "epoch": 0.6051087990935949, "grad_norm": 3.625, "learning_rate": 0.00014421193540824104, "loss": 0.9882, "step": 23566 }, { "epoch": 0.6051344762895168, "grad_norm": 0.8046875, "learning_rate": 0.000144207931170676, "loss": 0.9292, "step": 23567 }, { "epoch": 0.6051601534854386, "grad_norm": 0.78125, "learning_rate": 0.0001442039268450085, "loss": 0.719, "step": 23568 }, { "epoch": 0.6051858306813604, "grad_norm": 0.83984375, "learning_rate": 0.0001441999224312466, "loss": 0.956, "step": 23569 }, { "epoch": 0.6052115078772823, "grad_norm": 0.69921875, "learning_rate": 0.00014419591792939827, "loss": 0.8105, "step": 23570 }, { "epoch": 0.6052371850732041, "grad_norm": 0.78515625, "learning_rate": 0.00014419191333947144, "loss": 0.8475, "step": 23571 }, { "epoch": 0.6052628622691258, "grad_norm": 0.73828125, "learning_rate": 0.00014418790866147413, "loss": 0.8006, "step": 23572 }, { "epoch": 0.6052885394650477, "grad_norm": 0.76171875, "learning_rate": 0.0001441839038954143, "loss": 0.8909, "step": 23573 }, { "epoch": 0.6053142166609695, "grad_norm": 0.80859375, "learning_rate": 0.00014417989904129996, "loss": 1.0197, "step": 23574 }, { "epoch": 0.6053398938568914, "grad_norm": 0.8125, "learning_rate": 0.00014417589409913907, "loss": 0.9002, "step": 23575 }, { "epoch": 0.6053655710528132, "grad_norm": 0.68359375, "learning_rate": 0.0001441718890689396, "loss": 0.8222, "step": 23576 }, { "epoch": 0.605391248248735, "grad_norm": 0.7421875, "learning_rate": 0.00014416788395070954, "loss": 0.8794, "step": 23577 }, { "epoch": 0.6054169254446569, "grad_norm": 0.76953125, "learning_rate": 0.0001441638787444569, "loss": 0.774, "step": 23578 }, { "epoch": 0.6054426026405786, "grad_norm": 0.80078125, "learning_rate": 0.00014415987345018963, "loss": 0.9633, "step": 23579 }, { "epoch": 0.6054682798365004, "grad_norm": 0.7734375, "learning_rate": 0.0001441558680679157, "loss": 0.972, "step": 23580 }, { "epoch": 0.6054939570324223, "grad_norm": 0.73828125, "learning_rate": 0.0001441518625976431, "loss": 0.9172, "step": 23581 }, { "epoch": 0.6055196342283441, "grad_norm": 0.7890625, "learning_rate": 0.00014414785703937988, "loss": 0.8642, "step": 23582 }, { "epoch": 0.6055453114242659, "grad_norm": 0.78125, "learning_rate": 0.00014414385139313395, "loss": 0.8903, "step": 23583 }, { "epoch": 0.6055709886201878, "grad_norm": 0.86328125, "learning_rate": 0.00014413984565891328, "loss": 0.8597, "step": 23584 }, { "epoch": 0.6055966658161096, "grad_norm": 0.70703125, "learning_rate": 0.00014413583983672592, "loss": 0.8739, "step": 23585 }, { "epoch": 0.6056223430120313, "grad_norm": 0.80859375, "learning_rate": 0.00014413183392657982, "loss": 0.9216, "step": 23586 }, { "epoch": 0.6056480202079532, "grad_norm": 0.86328125, "learning_rate": 0.00014412782792848293, "loss": 0.915, "step": 23587 }, { "epoch": 0.605673697403875, "grad_norm": 0.82421875, "learning_rate": 0.00014412382184244332, "loss": 0.9253, "step": 23588 }, { "epoch": 0.6056993745997968, "grad_norm": 0.78125, "learning_rate": 0.00014411981566846884, "loss": 0.9169, "step": 23589 }, { "epoch": 0.6057250517957187, "grad_norm": 0.72265625, "learning_rate": 0.00014411580940656762, "loss": 0.7249, "step": 23590 }, { "epoch": 0.6057507289916405, "grad_norm": 0.74609375, "learning_rate": 0.00014411180305674753, "loss": 0.7553, "step": 23591 }, { "epoch": 0.6057764061875622, "grad_norm": 0.7265625, "learning_rate": 0.00014410779661901666, "loss": 0.7947, "step": 23592 }, { "epoch": 0.6058020833834841, "grad_norm": 0.765625, "learning_rate": 0.0001441037900933829, "loss": 0.9413, "step": 23593 }, { "epoch": 0.6058277605794059, "grad_norm": 0.82421875, "learning_rate": 0.00014409978347985425, "loss": 0.9546, "step": 23594 }, { "epoch": 0.6058534377753277, "grad_norm": 0.76171875, "learning_rate": 0.00014409577677843873, "loss": 0.8844, "step": 23595 }, { "epoch": 0.6058791149712496, "grad_norm": 0.75, "learning_rate": 0.0001440917699891443, "loss": 0.8791, "step": 23596 }, { "epoch": 0.6059047921671714, "grad_norm": 0.7734375, "learning_rate": 0.00014408776311197898, "loss": 0.8864, "step": 23597 }, { "epoch": 0.6059304693630932, "grad_norm": 0.81640625, "learning_rate": 0.0001440837561469507, "loss": 0.9435, "step": 23598 }, { "epoch": 0.605956146559015, "grad_norm": 0.74609375, "learning_rate": 0.00014407974909406754, "loss": 0.7866, "step": 23599 }, { "epoch": 0.6059818237549368, "grad_norm": 0.7734375, "learning_rate": 0.00014407574195333736, "loss": 0.7575, "step": 23600 }, { "epoch": 0.6060075009508586, "grad_norm": 0.8203125, "learning_rate": 0.00014407173472476823, "loss": 0.9226, "step": 23601 }, { "epoch": 0.6060331781467805, "grad_norm": 0.73828125, "learning_rate": 0.0001440677274083681, "loss": 0.9301, "step": 23602 }, { "epoch": 0.6060588553427023, "grad_norm": 0.75390625, "learning_rate": 0.00014406372000414498, "loss": 0.8521, "step": 23603 }, { "epoch": 0.6060845325386242, "grad_norm": 0.796875, "learning_rate": 0.00014405971251210683, "loss": 0.9027, "step": 23604 }, { "epoch": 0.606110209734546, "grad_norm": 0.72265625, "learning_rate": 0.00014405570493226168, "loss": 0.8838, "step": 23605 }, { "epoch": 0.6061358869304677, "grad_norm": 0.76171875, "learning_rate": 0.00014405169726461748, "loss": 0.8185, "step": 23606 }, { "epoch": 0.6061615641263896, "grad_norm": 0.76953125, "learning_rate": 0.00014404768950918224, "loss": 0.9273, "step": 23607 }, { "epoch": 0.6061872413223114, "grad_norm": 0.8359375, "learning_rate": 0.00014404368166596389, "loss": 0.8971, "step": 23608 }, { "epoch": 0.6062129185182332, "grad_norm": 0.75390625, "learning_rate": 0.0001440396737349705, "loss": 0.8885, "step": 23609 }, { "epoch": 0.6062385957141551, "grad_norm": 0.82421875, "learning_rate": 0.00014403566571621, "loss": 0.8918, "step": 23610 }, { "epoch": 0.6062642729100769, "grad_norm": 0.75, "learning_rate": 0.0001440316576096904, "loss": 0.9083, "step": 23611 }, { "epoch": 0.6062899501059986, "grad_norm": 0.71484375, "learning_rate": 0.0001440276494154197, "loss": 0.8661, "step": 23612 }, { "epoch": 0.6063156273019205, "grad_norm": 0.703125, "learning_rate": 0.00014402364113340585, "loss": 0.8781, "step": 23613 }, { "epoch": 0.6063413044978423, "grad_norm": 0.75390625, "learning_rate": 0.00014401963276365688, "loss": 0.8477, "step": 23614 }, { "epoch": 0.6063669816937641, "grad_norm": 0.75390625, "learning_rate": 0.00014401562430618075, "loss": 0.7514, "step": 23615 }, { "epoch": 0.606392658889686, "grad_norm": 0.73046875, "learning_rate": 0.00014401161576098543, "loss": 0.9122, "step": 23616 }, { "epoch": 0.6064183360856078, "grad_norm": 0.703125, "learning_rate": 0.00014400760712807896, "loss": 0.7628, "step": 23617 }, { "epoch": 0.6064440132815296, "grad_norm": 0.78515625, "learning_rate": 0.00014400359840746933, "loss": 0.94, "step": 23618 }, { "epoch": 0.6064696904774514, "grad_norm": 0.72265625, "learning_rate": 0.00014399958959916446, "loss": 0.8409, "step": 23619 }, { "epoch": 0.6064953676733732, "grad_norm": 0.73046875, "learning_rate": 0.0001439955807031724, "loss": 0.8406, "step": 23620 }, { "epoch": 0.606521044869295, "grad_norm": 0.8125, "learning_rate": 0.0001439915717195011, "loss": 0.8205, "step": 23621 }, { "epoch": 0.6065467220652169, "grad_norm": 0.72265625, "learning_rate": 0.0001439875626481586, "loss": 0.7737, "step": 23622 }, { "epoch": 0.6065723992611387, "grad_norm": 0.7265625, "learning_rate": 0.00014398355348915287, "loss": 0.79, "step": 23623 }, { "epoch": 0.6065980764570605, "grad_norm": 0.7109375, "learning_rate": 0.00014397954424249184, "loss": 0.7617, "step": 23624 }, { "epoch": 0.6066237536529824, "grad_norm": 0.77734375, "learning_rate": 0.00014397553490818363, "loss": 0.9388, "step": 23625 }, { "epoch": 0.6066494308489041, "grad_norm": 0.79296875, "learning_rate": 0.0001439715254862361, "loss": 0.8064, "step": 23626 }, { "epoch": 0.6066751080448259, "grad_norm": 0.8046875, "learning_rate": 0.0001439675159766573, "loss": 0.8846, "step": 23627 }, { "epoch": 0.6067007852407478, "grad_norm": 0.78125, "learning_rate": 0.0001439635063794552, "loss": 0.9024, "step": 23628 }, { "epoch": 0.6067264624366696, "grad_norm": 0.7734375, "learning_rate": 0.0001439594966946378, "loss": 0.8827, "step": 23629 }, { "epoch": 0.6067521396325914, "grad_norm": 0.75390625, "learning_rate": 0.0001439554869222131, "loss": 0.9958, "step": 23630 }, { "epoch": 0.6067778168285133, "grad_norm": 0.83984375, "learning_rate": 0.0001439514770621891, "loss": 0.8664, "step": 23631 }, { "epoch": 0.606803494024435, "grad_norm": 0.8203125, "learning_rate": 0.00014394746711457378, "loss": 0.937, "step": 23632 }, { "epoch": 0.6068291712203568, "grad_norm": 0.79296875, "learning_rate": 0.0001439434570793751, "loss": 1.0113, "step": 23633 }, { "epoch": 0.6068548484162787, "grad_norm": 0.75390625, "learning_rate": 0.00014393944695660112, "loss": 0.8352, "step": 23634 }, { "epoch": 0.6068805256122005, "grad_norm": 0.8125, "learning_rate": 0.00014393543674625975, "loss": 0.893, "step": 23635 }, { "epoch": 0.6069062028081224, "grad_norm": 0.73046875, "learning_rate": 0.00014393142644835905, "loss": 0.7334, "step": 23636 }, { "epoch": 0.6069318800040442, "grad_norm": 0.71484375, "learning_rate": 0.00014392741606290694, "loss": 0.8051, "step": 23637 }, { "epoch": 0.606957557199966, "grad_norm": 0.75, "learning_rate": 0.00014392340558991152, "loss": 0.7268, "step": 23638 }, { "epoch": 0.6069832343958877, "grad_norm": 0.8125, "learning_rate": 0.0001439193950293807, "loss": 0.8763, "step": 23639 }, { "epoch": 0.6070089115918096, "grad_norm": 0.75, "learning_rate": 0.00014391538438132247, "loss": 0.8061, "step": 23640 }, { "epoch": 0.6070345887877314, "grad_norm": 0.76953125, "learning_rate": 0.00014391137364574486, "loss": 0.8963, "step": 23641 }, { "epoch": 0.6070602659836533, "grad_norm": 0.7734375, "learning_rate": 0.00014390736282265587, "loss": 0.9038, "step": 23642 }, { "epoch": 0.6070859431795751, "grad_norm": 0.77734375, "learning_rate": 0.00014390335191206342, "loss": 0.8655, "step": 23643 }, { "epoch": 0.6071116203754969, "grad_norm": 0.79296875, "learning_rate": 0.00014389934091397558, "loss": 0.9044, "step": 23644 }, { "epoch": 0.6071372975714188, "grad_norm": 0.75390625, "learning_rate": 0.00014389532982840035, "loss": 0.7556, "step": 23645 }, { "epoch": 0.6071629747673405, "grad_norm": 0.73828125, "learning_rate": 0.00014389131865534568, "loss": 0.9005, "step": 23646 }, { "epoch": 0.6071886519632623, "grad_norm": 0.8203125, "learning_rate": 0.00014388730739481957, "loss": 0.9461, "step": 23647 }, { "epoch": 0.6072143291591842, "grad_norm": 0.7734375, "learning_rate": 0.00014388329604682999, "loss": 0.8506, "step": 23648 }, { "epoch": 0.607240006355106, "grad_norm": 1.125, "learning_rate": 0.000143879284611385, "loss": 0.9591, "step": 23649 }, { "epoch": 0.6072656835510278, "grad_norm": 0.85546875, "learning_rate": 0.00014387527308849256, "loss": 0.9326, "step": 23650 }, { "epoch": 0.6072913607469497, "grad_norm": 0.73828125, "learning_rate": 0.00014387126147816066, "loss": 0.8206, "step": 23651 }, { "epoch": 0.6073170379428714, "grad_norm": 0.75390625, "learning_rate": 0.0001438672497803973, "loss": 0.9387, "step": 23652 }, { "epoch": 0.6073427151387932, "grad_norm": 0.7890625, "learning_rate": 0.00014386323799521044, "loss": 0.9166, "step": 23653 }, { "epoch": 0.6073683923347151, "grad_norm": 0.75, "learning_rate": 0.00014385922612260817, "loss": 0.8084, "step": 23654 }, { "epoch": 0.6073940695306369, "grad_norm": 0.72265625, "learning_rate": 0.00014385521416259838, "loss": 0.7543, "step": 23655 }, { "epoch": 0.6074197467265587, "grad_norm": 0.79296875, "learning_rate": 0.0001438512021151891, "loss": 0.8456, "step": 23656 }, { "epoch": 0.6074454239224806, "grad_norm": 0.79296875, "learning_rate": 0.00014384718998038836, "loss": 1.0281, "step": 23657 }, { "epoch": 0.6074711011184024, "grad_norm": 0.765625, "learning_rate": 0.00014384317775820412, "loss": 0.8723, "step": 23658 }, { "epoch": 0.6074967783143241, "grad_norm": 1.09375, "learning_rate": 0.00014383916544864438, "loss": 0.8538, "step": 23659 }, { "epoch": 0.607522455510246, "grad_norm": 0.73046875, "learning_rate": 0.0001438351530517172, "loss": 0.8889, "step": 23660 }, { "epoch": 0.6075481327061678, "grad_norm": 0.8046875, "learning_rate": 0.00014383114056743044, "loss": 0.8578, "step": 23661 }, { "epoch": 0.6075738099020896, "grad_norm": 0.73046875, "learning_rate": 0.00014382712799579223, "loss": 0.7513, "step": 23662 }, { "epoch": 0.6075994870980115, "grad_norm": 0.78125, "learning_rate": 0.0001438231153368105, "loss": 0.8968, "step": 23663 }, { "epoch": 0.6076251642939333, "grad_norm": 0.828125, "learning_rate": 0.00014381910259049323, "loss": 0.9697, "step": 23664 }, { "epoch": 0.6076508414898552, "grad_norm": 0.73828125, "learning_rate": 0.0001438150897568485, "loss": 0.693, "step": 23665 }, { "epoch": 0.6076765186857769, "grad_norm": 0.74609375, "learning_rate": 0.0001438110768358842, "loss": 0.9423, "step": 23666 }, { "epoch": 0.6077021958816987, "grad_norm": 0.73828125, "learning_rate": 0.00014380706382760843, "loss": 0.8172, "step": 23667 }, { "epoch": 0.6077278730776206, "grad_norm": 0.75390625, "learning_rate": 0.0001438030507320291, "loss": 0.8261, "step": 23668 }, { "epoch": 0.6077535502735424, "grad_norm": 0.72265625, "learning_rate": 0.00014379903754915425, "loss": 0.7365, "step": 23669 }, { "epoch": 0.6077792274694642, "grad_norm": 0.77734375, "learning_rate": 0.0001437950242789919, "loss": 0.841, "step": 23670 }, { "epoch": 0.6078049046653861, "grad_norm": 0.796875, "learning_rate": 0.00014379101092155003, "loss": 0.861, "step": 23671 }, { "epoch": 0.6078305818613078, "grad_norm": 0.74609375, "learning_rate": 0.00014378699747683658, "loss": 0.7722, "step": 23672 }, { "epoch": 0.6078562590572296, "grad_norm": 0.71484375, "learning_rate": 0.00014378298394485966, "loss": 0.741, "step": 23673 }, { "epoch": 0.6078819362531515, "grad_norm": 0.8046875, "learning_rate": 0.00014377897032562714, "loss": 0.8701, "step": 23674 }, { "epoch": 0.6079076134490733, "grad_norm": 0.8125, "learning_rate": 0.00014377495661914712, "loss": 0.7948, "step": 23675 }, { "epoch": 0.6079332906449951, "grad_norm": 0.72265625, "learning_rate": 0.0001437709428254276, "loss": 0.7827, "step": 23676 }, { "epoch": 0.607958967840917, "grad_norm": 0.84375, "learning_rate": 0.0001437669289444765, "loss": 0.8444, "step": 23677 }, { "epoch": 0.6079846450368388, "grad_norm": 0.765625, "learning_rate": 0.0001437629149763019, "loss": 0.8568, "step": 23678 }, { "epoch": 0.6080103222327605, "grad_norm": 0.77734375, "learning_rate": 0.00014375890092091175, "loss": 0.8782, "step": 23679 }, { "epoch": 0.6080359994286824, "grad_norm": 0.80859375, "learning_rate": 0.00014375488677831403, "loss": 0.8597, "step": 23680 }, { "epoch": 0.6080616766246042, "grad_norm": 0.75, "learning_rate": 0.0001437508725485168, "loss": 0.799, "step": 23681 }, { "epoch": 0.608087353820526, "grad_norm": 0.83984375, "learning_rate": 0.00014374685823152804, "loss": 0.9119, "step": 23682 }, { "epoch": 0.6081130310164479, "grad_norm": 0.7578125, "learning_rate": 0.00014374284382735573, "loss": 0.8158, "step": 23683 }, { "epoch": 0.6081387082123697, "grad_norm": 0.7890625, "learning_rate": 0.0001437388293360079, "loss": 0.9207, "step": 23684 }, { "epoch": 0.6081643854082915, "grad_norm": 0.7890625, "learning_rate": 0.0001437348147574925, "loss": 0.7764, "step": 23685 }, { "epoch": 0.6081900626042133, "grad_norm": 0.79296875, "learning_rate": 0.0001437308000918176, "loss": 0.8378, "step": 23686 }, { "epoch": 0.6082157398001351, "grad_norm": 0.81640625, "learning_rate": 0.00014372678533899116, "loss": 0.871, "step": 23687 }, { "epoch": 0.6082414169960569, "grad_norm": 0.71484375, "learning_rate": 0.00014372277049902116, "loss": 0.8259, "step": 23688 }, { "epoch": 0.6082670941919788, "grad_norm": 0.73046875, "learning_rate": 0.00014371875557191564, "loss": 0.7836, "step": 23689 }, { "epoch": 0.6082927713879006, "grad_norm": 0.7734375, "learning_rate": 0.00014371474055768261, "loss": 0.9272, "step": 23690 }, { "epoch": 0.6083184485838224, "grad_norm": 0.8046875, "learning_rate": 0.00014371072545633004, "loss": 0.8238, "step": 23691 }, { "epoch": 0.6083441257797442, "grad_norm": 0.8359375, "learning_rate": 0.00014370671026786596, "loss": 0.8447, "step": 23692 }, { "epoch": 0.608369802975666, "grad_norm": 0.80859375, "learning_rate": 0.0001437026949922983, "loss": 0.9223, "step": 23693 }, { "epoch": 0.6083954801715878, "grad_norm": 0.7890625, "learning_rate": 0.00014369867962963517, "loss": 0.8559, "step": 23694 }, { "epoch": 0.6084211573675097, "grad_norm": 0.73828125, "learning_rate": 0.0001436946641798845, "loss": 0.7912, "step": 23695 }, { "epoch": 0.6084468345634315, "grad_norm": 0.8359375, "learning_rate": 0.00014369064864305428, "loss": 0.9342, "step": 23696 }, { "epoch": 0.6084725117593534, "grad_norm": 0.7421875, "learning_rate": 0.0001436866330191526, "loss": 0.8712, "step": 23697 }, { "epoch": 0.6084981889552752, "grad_norm": 0.78515625, "learning_rate": 0.00014368261730818738, "loss": 0.7947, "step": 23698 }, { "epoch": 0.6085238661511969, "grad_norm": 0.78125, "learning_rate": 0.00014367860151016663, "loss": 0.945, "step": 23699 }, { "epoch": 0.6085495433471187, "grad_norm": 0.66796875, "learning_rate": 0.0001436745856250984, "loss": 0.7737, "step": 23700 }, { "epoch": 0.6085752205430406, "grad_norm": 0.81640625, "learning_rate": 0.00014367056965299067, "loss": 1.0066, "step": 23701 }, { "epoch": 0.6086008977389624, "grad_norm": 0.76953125, "learning_rate": 0.00014366655359385143, "loss": 0.8487, "step": 23702 }, { "epoch": 0.6086265749348843, "grad_norm": 0.7734375, "learning_rate": 0.00014366253744768867, "loss": 0.7924, "step": 23703 }, { "epoch": 0.6086522521308061, "grad_norm": 0.80859375, "learning_rate": 0.00014365852121451047, "loss": 0.9206, "step": 23704 }, { "epoch": 0.6086779293267279, "grad_norm": 0.8125, "learning_rate": 0.00014365450489432476, "loss": 0.8365, "step": 23705 }, { "epoch": 0.6087036065226497, "grad_norm": 0.8125, "learning_rate": 0.00014365048848713951, "loss": 0.8664, "step": 23706 }, { "epoch": 0.6087292837185715, "grad_norm": 0.76953125, "learning_rate": 0.00014364647199296285, "loss": 0.8641, "step": 23707 }, { "epoch": 0.6087549609144933, "grad_norm": 0.796875, "learning_rate": 0.00014364245541180267, "loss": 0.7807, "step": 23708 }, { "epoch": 0.6087806381104152, "grad_norm": 0.78125, "learning_rate": 0.00014363843874366702, "loss": 0.87, "step": 23709 }, { "epoch": 0.608806315306337, "grad_norm": 0.8359375, "learning_rate": 0.00014363442198856393, "loss": 0.8401, "step": 23710 }, { "epoch": 0.6088319925022588, "grad_norm": 0.80078125, "learning_rate": 0.00014363040514650137, "loss": 0.8454, "step": 23711 }, { "epoch": 0.6088576696981806, "grad_norm": 0.77734375, "learning_rate": 0.00014362638821748734, "loss": 0.8721, "step": 23712 }, { "epoch": 0.6088833468941024, "grad_norm": 0.78125, "learning_rate": 0.00014362237120152987, "loss": 0.7337, "step": 23713 }, { "epoch": 0.6089090240900242, "grad_norm": 0.8203125, "learning_rate": 0.00014361835409863697, "loss": 0.946, "step": 23714 }, { "epoch": 0.6089347012859461, "grad_norm": 0.796875, "learning_rate": 0.00014361433690881662, "loss": 0.969, "step": 23715 }, { "epoch": 0.6089603784818679, "grad_norm": 0.8203125, "learning_rate": 0.00014361031963207682, "loss": 0.814, "step": 23716 }, { "epoch": 0.6089860556777897, "grad_norm": 0.7890625, "learning_rate": 0.00014360630226842563, "loss": 0.7944, "step": 23717 }, { "epoch": 0.6090117328737116, "grad_norm": 0.8125, "learning_rate": 0.000143602284817871, "loss": 0.9396, "step": 23718 }, { "epoch": 0.6090374100696333, "grad_norm": 0.76953125, "learning_rate": 0.00014359826728042093, "loss": 0.8113, "step": 23719 }, { "epoch": 0.6090630872655551, "grad_norm": 0.8125, "learning_rate": 0.00014359424965608347, "loss": 0.8872, "step": 23720 }, { "epoch": 0.609088764461477, "grad_norm": 0.828125, "learning_rate": 0.0001435902319448666, "loss": 0.9282, "step": 23721 }, { "epoch": 0.6091144416573988, "grad_norm": 0.93359375, "learning_rate": 0.00014358621414677834, "loss": 0.9652, "step": 23722 }, { "epoch": 0.6091401188533206, "grad_norm": 0.859375, "learning_rate": 0.0001435821962618267, "loss": 1.0336, "step": 23723 }, { "epoch": 0.6091657960492425, "grad_norm": 0.73828125, "learning_rate": 0.00014357817829001968, "loss": 0.7516, "step": 23724 }, { "epoch": 0.6091914732451643, "grad_norm": 0.7890625, "learning_rate": 0.00014357416023136528, "loss": 0.8517, "step": 23725 }, { "epoch": 0.609217150441086, "grad_norm": 0.81640625, "learning_rate": 0.0001435701420858715, "loss": 0.7906, "step": 23726 }, { "epoch": 0.6092428276370079, "grad_norm": 0.71875, "learning_rate": 0.0001435661238535464, "loss": 0.8614, "step": 23727 }, { "epoch": 0.6092685048329297, "grad_norm": 0.9609375, "learning_rate": 0.00014356210553439792, "loss": 0.8609, "step": 23728 }, { "epoch": 0.6092941820288515, "grad_norm": 0.75390625, "learning_rate": 0.00014355808712843412, "loss": 0.8248, "step": 23729 }, { "epoch": 0.6093198592247734, "grad_norm": 0.73828125, "learning_rate": 0.00014355406863566296, "loss": 0.8028, "step": 23730 }, { "epoch": 0.6093455364206952, "grad_norm": 0.734375, "learning_rate": 0.0001435500500560925, "loss": 0.8071, "step": 23731 }, { "epoch": 0.609371213616617, "grad_norm": 0.73828125, "learning_rate": 0.0001435460313897307, "loss": 0.786, "step": 23732 }, { "epoch": 0.6093968908125388, "grad_norm": 0.796875, "learning_rate": 0.0001435420126365856, "loss": 0.7896, "step": 23733 }, { "epoch": 0.6094225680084606, "grad_norm": 0.765625, "learning_rate": 0.00014353799379666522, "loss": 0.8689, "step": 23734 }, { "epoch": 0.6094482452043825, "grad_norm": 0.76953125, "learning_rate": 0.00014353397486997755, "loss": 0.8838, "step": 23735 }, { "epoch": 0.6094739224003043, "grad_norm": 0.7578125, "learning_rate": 0.00014352995585653058, "loss": 0.6839, "step": 23736 }, { "epoch": 0.6094995995962261, "grad_norm": 0.73046875, "learning_rate": 0.00014352593675633237, "loss": 0.758, "step": 23737 }, { "epoch": 0.609525276792148, "grad_norm": 0.70703125, "learning_rate": 0.00014352191756939085, "loss": 0.7625, "step": 23738 }, { "epoch": 0.6095509539880697, "grad_norm": 0.7578125, "learning_rate": 0.00014351789829571411, "loss": 0.8367, "step": 23739 }, { "epoch": 0.6095766311839915, "grad_norm": 0.8046875, "learning_rate": 0.00014351387893531015, "loss": 0.8926, "step": 23740 }, { "epoch": 0.6096023083799134, "grad_norm": 0.76171875, "learning_rate": 0.00014350985948818692, "loss": 0.9099, "step": 23741 }, { "epoch": 0.6096279855758352, "grad_norm": 0.72265625, "learning_rate": 0.0001435058399543525, "loss": 0.7713, "step": 23742 }, { "epoch": 0.609653662771757, "grad_norm": 0.7890625, "learning_rate": 0.00014350182033381485, "loss": 0.9796, "step": 23743 }, { "epoch": 0.6096793399676789, "grad_norm": 0.80859375, "learning_rate": 0.000143497800626582, "loss": 0.936, "step": 23744 }, { "epoch": 0.6097050171636007, "grad_norm": 0.7578125, "learning_rate": 0.000143493780832662, "loss": 0.7795, "step": 23745 }, { "epoch": 0.6097306943595224, "grad_norm": 0.8203125, "learning_rate": 0.0001434897609520628, "loss": 1.0038, "step": 23746 }, { "epoch": 0.6097563715554443, "grad_norm": 0.72265625, "learning_rate": 0.0001434857409847924, "loss": 0.7233, "step": 23747 }, { "epoch": 0.6097820487513661, "grad_norm": 0.74609375, "learning_rate": 0.0001434817209308589, "loss": 0.8328, "step": 23748 }, { "epoch": 0.6098077259472879, "grad_norm": 0.734375, "learning_rate": 0.00014347770079027022, "loss": 0.8181, "step": 23749 }, { "epoch": 0.6098334031432098, "grad_norm": 0.7578125, "learning_rate": 0.00014347368056303448, "loss": 0.7553, "step": 23750 }, { "epoch": 0.6098590803391316, "grad_norm": 0.83203125, "learning_rate": 0.00014346966024915954, "loss": 0.8575, "step": 23751 }, { "epoch": 0.6098847575350533, "grad_norm": 0.80078125, "learning_rate": 0.00014346563984865354, "loss": 0.8628, "step": 23752 }, { "epoch": 0.6099104347309752, "grad_norm": 0.83984375, "learning_rate": 0.00014346161936152442, "loss": 1.0265, "step": 23753 }, { "epoch": 0.609936111926897, "grad_norm": 0.8125, "learning_rate": 0.0001434575987877802, "loss": 1.0427, "step": 23754 }, { "epoch": 0.6099617891228188, "grad_norm": 0.77734375, "learning_rate": 0.00014345357812742894, "loss": 0.8214, "step": 23755 }, { "epoch": 0.6099874663187407, "grad_norm": 0.76953125, "learning_rate": 0.00014344955738047866, "loss": 0.9012, "step": 23756 }, { "epoch": 0.6100131435146625, "grad_norm": 0.73828125, "learning_rate": 0.0001434455365469373, "loss": 0.8174, "step": 23757 }, { "epoch": 0.6100388207105844, "grad_norm": 0.6875, "learning_rate": 0.0001434415156268129, "loss": 0.7777, "step": 23758 }, { "epoch": 0.6100644979065061, "grad_norm": 0.8359375, "learning_rate": 0.00014343749462011348, "loss": 0.8814, "step": 23759 }, { "epoch": 0.6100901751024279, "grad_norm": 0.7265625, "learning_rate": 0.00014343347352684707, "loss": 0.7857, "step": 23760 }, { "epoch": 0.6101158522983497, "grad_norm": 0.83203125, "learning_rate": 0.00014342945234702164, "loss": 0.8356, "step": 23761 }, { "epoch": 0.6101415294942716, "grad_norm": 0.75390625, "learning_rate": 0.00014342543108064528, "loss": 0.7752, "step": 23762 }, { "epoch": 0.6101672066901934, "grad_norm": 0.796875, "learning_rate": 0.00014342140972772594, "loss": 0.8165, "step": 23763 }, { "epoch": 0.6101928838861153, "grad_norm": 0.70703125, "learning_rate": 0.00014341738828827167, "loss": 0.7685, "step": 23764 }, { "epoch": 0.610218561082037, "grad_norm": 0.76171875, "learning_rate": 0.00014341336676229044, "loss": 1.0174, "step": 23765 }, { "epoch": 0.6102442382779588, "grad_norm": 0.76953125, "learning_rate": 0.0001434093451497903, "loss": 0.73, "step": 23766 }, { "epoch": 0.6102699154738807, "grad_norm": 0.7890625, "learning_rate": 0.00014340532345077928, "loss": 0.8522, "step": 23767 }, { "epoch": 0.6102955926698025, "grad_norm": 0.7265625, "learning_rate": 0.00014340130166526534, "loss": 0.867, "step": 23768 }, { "epoch": 0.6103212698657243, "grad_norm": 0.796875, "learning_rate": 0.00014339727979325654, "loss": 1.0468, "step": 23769 }, { "epoch": 0.6103469470616462, "grad_norm": 0.75390625, "learning_rate": 0.00014339325783476088, "loss": 0.9242, "step": 23770 }, { "epoch": 0.610372624257568, "grad_norm": 0.828125, "learning_rate": 0.00014338923578978638, "loss": 0.9147, "step": 23771 }, { "epoch": 0.6103983014534897, "grad_norm": 0.85546875, "learning_rate": 0.00014338521365834104, "loss": 0.8476, "step": 23772 }, { "epoch": 0.6104239786494116, "grad_norm": 0.6875, "learning_rate": 0.00014338119144043288, "loss": 0.8872, "step": 23773 }, { "epoch": 0.6104496558453334, "grad_norm": 0.8125, "learning_rate": 0.00014337716913606996, "loss": 0.9632, "step": 23774 }, { "epoch": 0.6104753330412552, "grad_norm": 0.765625, "learning_rate": 0.00014337314674526023, "loss": 0.8361, "step": 23775 }, { "epoch": 0.6105010102371771, "grad_norm": 0.78125, "learning_rate": 0.00014336912426801178, "loss": 0.9681, "step": 23776 }, { "epoch": 0.6105266874330989, "grad_norm": 0.7734375, "learning_rate": 0.00014336510170433254, "loss": 0.8173, "step": 23777 }, { "epoch": 0.6105523646290207, "grad_norm": 0.796875, "learning_rate": 0.00014336107905423058, "loss": 0.8216, "step": 23778 }, { "epoch": 0.6105780418249425, "grad_norm": 0.80078125, "learning_rate": 0.0001433570563177139, "loss": 0.7298, "step": 23779 }, { "epoch": 0.6106037190208643, "grad_norm": 0.8984375, "learning_rate": 0.00014335303349479053, "loss": 0.9855, "step": 23780 }, { "epoch": 0.6106293962167861, "grad_norm": 0.8203125, "learning_rate": 0.00014334901058546845, "loss": 0.8321, "step": 23781 }, { "epoch": 0.610655073412708, "grad_norm": 0.71484375, "learning_rate": 0.00014334498758975577, "loss": 0.8113, "step": 23782 }, { "epoch": 0.6106807506086298, "grad_norm": 0.80859375, "learning_rate": 0.0001433409645076604, "loss": 0.8203, "step": 23783 }, { "epoch": 0.6107064278045516, "grad_norm": 0.71484375, "learning_rate": 0.00014333694133919042, "loss": 0.8603, "step": 23784 }, { "epoch": 0.6107321050004734, "grad_norm": 0.78125, "learning_rate": 0.00014333291808435382, "loss": 0.8284, "step": 23785 }, { "epoch": 0.6107577821963952, "grad_norm": 0.8046875, "learning_rate": 0.00014332889474315863, "loss": 0.9514, "step": 23786 }, { "epoch": 0.610783459392317, "grad_norm": 0.73828125, "learning_rate": 0.00014332487131561288, "loss": 0.8316, "step": 23787 }, { "epoch": 0.6108091365882389, "grad_norm": 0.7421875, "learning_rate": 0.00014332084780172455, "loss": 0.8984, "step": 23788 }, { "epoch": 0.6108348137841607, "grad_norm": 0.76171875, "learning_rate": 0.0001433168242015017, "loss": 0.8286, "step": 23789 }, { "epoch": 0.6108604909800825, "grad_norm": 0.8046875, "learning_rate": 0.00014331280051495235, "loss": 0.9535, "step": 23790 }, { "epoch": 0.6108861681760044, "grad_norm": 0.828125, "learning_rate": 0.00014330877674208447, "loss": 0.7915, "step": 23791 }, { "epoch": 0.6109118453719261, "grad_norm": 0.81640625, "learning_rate": 0.00014330475288290613, "loss": 0.8302, "step": 23792 }, { "epoch": 0.610937522567848, "grad_norm": 0.7890625, "learning_rate": 0.00014330072893742533, "loss": 0.9261, "step": 23793 }, { "epoch": 0.6109631997637698, "grad_norm": 0.8203125, "learning_rate": 0.00014329670490565002, "loss": 0.9067, "step": 23794 }, { "epoch": 0.6109888769596916, "grad_norm": 0.80859375, "learning_rate": 0.00014329268078758838, "loss": 0.8045, "step": 23795 }, { "epoch": 0.6110145541556135, "grad_norm": 0.72265625, "learning_rate": 0.00014328865658324832, "loss": 0.7663, "step": 23796 }, { "epoch": 0.6110402313515353, "grad_norm": 0.80078125, "learning_rate": 0.00014328463229263788, "loss": 0.942, "step": 23797 }, { "epoch": 0.6110659085474571, "grad_norm": 0.7890625, "learning_rate": 0.00014328060791576506, "loss": 0.8341, "step": 23798 }, { "epoch": 0.6110915857433789, "grad_norm": 0.8671875, "learning_rate": 0.00014327658345263786, "loss": 0.9214, "step": 23799 }, { "epoch": 0.6111172629393007, "grad_norm": 0.76953125, "learning_rate": 0.0001432725589032644, "loss": 0.8713, "step": 23800 }, { "epoch": 0.6111429401352225, "grad_norm": 0.70703125, "learning_rate": 0.0001432685342676526, "loss": 0.7993, "step": 23801 }, { "epoch": 0.6111686173311444, "grad_norm": 0.74609375, "learning_rate": 0.0001432645095458105, "loss": 0.9252, "step": 23802 }, { "epoch": 0.6111942945270662, "grad_norm": 0.7734375, "learning_rate": 0.00014326048473774623, "loss": 1.0657, "step": 23803 }, { "epoch": 0.611219971722988, "grad_norm": 0.76953125, "learning_rate": 0.00014325645984346766, "loss": 0.9577, "step": 23804 }, { "epoch": 0.6112456489189098, "grad_norm": 0.79296875, "learning_rate": 0.00014325243486298288, "loss": 0.8021, "step": 23805 }, { "epoch": 0.6112713261148316, "grad_norm": 0.859375, "learning_rate": 0.0001432484097962999, "loss": 0.9251, "step": 23806 }, { "epoch": 0.6112970033107534, "grad_norm": 0.78125, "learning_rate": 0.00014324438464342676, "loss": 0.8279, "step": 23807 }, { "epoch": 0.6113226805066753, "grad_norm": 0.7578125, "learning_rate": 0.00014324035940437143, "loss": 0.8651, "step": 23808 }, { "epoch": 0.6113483577025971, "grad_norm": 1.0, "learning_rate": 0.00014323633407914204, "loss": 0.7724, "step": 23809 }, { "epoch": 0.6113740348985189, "grad_norm": 0.75390625, "learning_rate": 0.00014323230866774647, "loss": 0.8766, "step": 23810 }, { "epoch": 0.6113997120944408, "grad_norm": 0.75390625, "learning_rate": 0.00014322828317019285, "loss": 0.8302, "step": 23811 }, { "epoch": 0.6114253892903625, "grad_norm": 0.78125, "learning_rate": 0.00014322425758648918, "loss": 0.8015, "step": 23812 }, { "epoch": 0.6114510664862843, "grad_norm": 0.69921875, "learning_rate": 0.00014322023191664342, "loss": 0.8653, "step": 23813 }, { "epoch": 0.6114767436822062, "grad_norm": 0.71484375, "learning_rate": 0.00014321620616066367, "loss": 0.8061, "step": 23814 }, { "epoch": 0.611502420878128, "grad_norm": 0.80078125, "learning_rate": 0.00014321218031855795, "loss": 0.8892, "step": 23815 }, { "epoch": 0.6115280980740498, "grad_norm": 0.75390625, "learning_rate": 0.0001432081543903342, "loss": 0.7901, "step": 23816 }, { "epoch": 0.6115537752699717, "grad_norm": 0.796875, "learning_rate": 0.00014320412837600058, "loss": 0.9256, "step": 23817 }, { "epoch": 0.6115794524658935, "grad_norm": 0.71484375, "learning_rate": 0.00014320010227556495, "loss": 0.7163, "step": 23818 }, { "epoch": 0.6116051296618152, "grad_norm": 0.76953125, "learning_rate": 0.00014319607608903547, "loss": 0.8926, "step": 23819 }, { "epoch": 0.6116308068577371, "grad_norm": 0.78125, "learning_rate": 0.0001431920498164201, "loss": 0.7668, "step": 23820 }, { "epoch": 0.6116564840536589, "grad_norm": 0.78125, "learning_rate": 0.00014318802345772688, "loss": 0.8242, "step": 23821 }, { "epoch": 0.6116821612495807, "grad_norm": 0.82421875, "learning_rate": 0.00014318399701296385, "loss": 0.8779, "step": 23822 }, { "epoch": 0.6117078384455026, "grad_norm": 0.73046875, "learning_rate": 0.00014317997048213897, "loss": 0.8245, "step": 23823 }, { "epoch": 0.6117335156414244, "grad_norm": 0.82421875, "learning_rate": 0.00014317594386526033, "loss": 0.8988, "step": 23824 }, { "epoch": 0.6117591928373461, "grad_norm": 0.7734375, "learning_rate": 0.00014317191716233592, "loss": 0.857, "step": 23825 }, { "epoch": 0.611784870033268, "grad_norm": 0.76171875, "learning_rate": 0.0001431678903733738, "loss": 0.9419, "step": 23826 }, { "epoch": 0.6118105472291898, "grad_norm": 0.8671875, "learning_rate": 0.00014316386349838198, "loss": 0.8681, "step": 23827 }, { "epoch": 0.6118362244251117, "grad_norm": 0.7890625, "learning_rate": 0.00014315983653736847, "loss": 0.9692, "step": 23828 }, { "epoch": 0.6118619016210335, "grad_norm": 0.7578125, "learning_rate": 0.0001431558094903413, "loss": 0.8646, "step": 23829 }, { "epoch": 0.6118875788169553, "grad_norm": 0.73828125, "learning_rate": 0.0001431517823573085, "loss": 0.8675, "step": 23830 }, { "epoch": 0.6119132560128772, "grad_norm": 0.765625, "learning_rate": 0.0001431477551382781, "loss": 0.8311, "step": 23831 }, { "epoch": 0.6119389332087989, "grad_norm": 0.9140625, "learning_rate": 0.0001431437278332581, "loss": 0.9592, "step": 23832 }, { "epoch": 0.6119646104047207, "grad_norm": 0.796875, "learning_rate": 0.00014313970044225658, "loss": 0.896, "step": 23833 }, { "epoch": 0.6119902876006426, "grad_norm": 0.7421875, "learning_rate": 0.0001431356729652815, "loss": 0.8311, "step": 23834 }, { "epoch": 0.6120159647965644, "grad_norm": 0.76953125, "learning_rate": 0.00014313164540234095, "loss": 0.9673, "step": 23835 }, { "epoch": 0.6120416419924862, "grad_norm": 0.796875, "learning_rate": 0.0001431276177534429, "loss": 0.921, "step": 23836 }, { "epoch": 0.6120673191884081, "grad_norm": 0.7578125, "learning_rate": 0.00014312359001859544, "loss": 0.7974, "step": 23837 }, { "epoch": 0.6120929963843299, "grad_norm": 0.796875, "learning_rate": 0.00014311956219780655, "loss": 0.9459, "step": 23838 }, { "epoch": 0.6121186735802516, "grad_norm": 0.75390625, "learning_rate": 0.00014311553429108425, "loss": 0.8298, "step": 23839 }, { "epoch": 0.6121443507761735, "grad_norm": 0.7890625, "learning_rate": 0.0001431115062984366, "loss": 0.74, "step": 23840 }, { "epoch": 0.6121700279720953, "grad_norm": 0.76171875, "learning_rate": 0.00014310747821987162, "loss": 0.8641, "step": 23841 }, { "epoch": 0.6121957051680171, "grad_norm": 0.78515625, "learning_rate": 0.0001431034500553973, "loss": 0.8385, "step": 23842 }, { "epoch": 0.612221382363939, "grad_norm": 0.828125, "learning_rate": 0.0001430994218050217, "loss": 0.844, "step": 23843 }, { "epoch": 0.6122470595598608, "grad_norm": 0.78125, "learning_rate": 0.00014309539346875287, "loss": 0.82, "step": 23844 }, { "epoch": 0.6122727367557825, "grad_norm": 0.78515625, "learning_rate": 0.0001430913650465988, "loss": 0.7853, "step": 23845 }, { "epoch": 0.6122984139517044, "grad_norm": 0.7109375, "learning_rate": 0.00014308733653856752, "loss": 0.8881, "step": 23846 }, { "epoch": 0.6123240911476262, "grad_norm": 0.84375, "learning_rate": 0.0001430833079446671, "loss": 0.8634, "step": 23847 }, { "epoch": 0.612349768343548, "grad_norm": 0.77734375, "learning_rate": 0.00014307927926490553, "loss": 1.1508, "step": 23848 }, { "epoch": 0.6123754455394699, "grad_norm": 0.75, "learning_rate": 0.00014307525049929085, "loss": 0.9155, "step": 23849 }, { "epoch": 0.6124011227353917, "grad_norm": 0.67578125, "learning_rate": 0.00014307122164783107, "loss": 0.87, "step": 23850 }, { "epoch": 0.6124267999313135, "grad_norm": 0.76171875, "learning_rate": 0.00014306719271053425, "loss": 0.8448, "step": 23851 }, { "epoch": 0.6124524771272353, "grad_norm": 0.73046875, "learning_rate": 0.00014306316368740838, "loss": 0.845, "step": 23852 }, { "epoch": 0.6124781543231571, "grad_norm": 0.7734375, "learning_rate": 0.0001430591345784615, "loss": 0.8363, "step": 23853 }, { "epoch": 0.612503831519079, "grad_norm": 0.71484375, "learning_rate": 0.00014305510538370172, "loss": 0.8548, "step": 23854 }, { "epoch": 0.6125295087150008, "grad_norm": 0.80859375, "learning_rate": 0.00014305107610313697, "loss": 0.9292, "step": 23855 }, { "epoch": 0.6125551859109226, "grad_norm": 0.78125, "learning_rate": 0.00014304704673677533, "loss": 0.9033, "step": 23856 }, { "epoch": 0.6125808631068445, "grad_norm": 0.6875, "learning_rate": 0.0001430430172846248, "loss": 0.7634, "step": 23857 }, { "epoch": 0.6126065403027663, "grad_norm": 0.78515625, "learning_rate": 0.0001430389877466934, "loss": 0.8985, "step": 23858 }, { "epoch": 0.612632217498688, "grad_norm": 0.7421875, "learning_rate": 0.0001430349581229892, "loss": 0.9281, "step": 23859 }, { "epoch": 0.6126578946946099, "grad_norm": 0.7421875, "learning_rate": 0.00014303092841352023, "loss": 0.9385, "step": 23860 }, { "epoch": 0.6126835718905317, "grad_norm": 0.80078125, "learning_rate": 0.0001430268986182945, "loss": 0.8637, "step": 23861 }, { "epoch": 0.6127092490864535, "grad_norm": 0.7578125, "learning_rate": 0.00014302286873732006, "loss": 0.8288, "step": 23862 }, { "epoch": 0.6127349262823754, "grad_norm": 0.734375, "learning_rate": 0.0001430188387706049, "loss": 0.757, "step": 23863 }, { "epoch": 0.6127606034782972, "grad_norm": 0.80859375, "learning_rate": 0.00014301480871815712, "loss": 0.899, "step": 23864 }, { "epoch": 0.6127862806742189, "grad_norm": 0.84375, "learning_rate": 0.00014301077857998472, "loss": 0.8449, "step": 23865 }, { "epoch": 0.6128119578701408, "grad_norm": 0.8203125, "learning_rate": 0.00014300674835609564, "loss": 1.002, "step": 23866 }, { "epoch": 0.6128376350660626, "grad_norm": 0.796875, "learning_rate": 0.00014300271804649809, "loss": 0.8432, "step": 23867 }, { "epoch": 0.6128633122619844, "grad_norm": 0.76171875, "learning_rate": 0.00014299868765119995, "loss": 0.7391, "step": 23868 }, { "epoch": 0.6128889894579063, "grad_norm": 0.765625, "learning_rate": 0.00014299465717020933, "loss": 0.905, "step": 23869 }, { "epoch": 0.6129146666538281, "grad_norm": 0.73046875, "learning_rate": 0.00014299062660353423, "loss": 0.8968, "step": 23870 }, { "epoch": 0.6129403438497499, "grad_norm": 0.7890625, "learning_rate": 0.0001429865959511827, "loss": 0.8766, "step": 23871 }, { "epoch": 0.6129660210456717, "grad_norm": 0.76171875, "learning_rate": 0.00014298256521316278, "loss": 0.8146, "step": 23872 }, { "epoch": 0.6129916982415935, "grad_norm": 0.828125, "learning_rate": 0.00014297853438948247, "loss": 1.0314, "step": 23873 }, { "epoch": 0.6130173754375153, "grad_norm": 0.74609375, "learning_rate": 0.00014297450348014983, "loss": 0.801, "step": 23874 }, { "epoch": 0.6130430526334372, "grad_norm": 0.75390625, "learning_rate": 0.0001429704724851729, "loss": 0.8538, "step": 23875 }, { "epoch": 0.613068729829359, "grad_norm": 0.765625, "learning_rate": 0.00014296644140455967, "loss": 0.8617, "step": 23876 }, { "epoch": 0.6130944070252808, "grad_norm": 0.7578125, "learning_rate": 0.00014296241023831826, "loss": 0.7339, "step": 23877 }, { "epoch": 0.6131200842212027, "grad_norm": 0.75, "learning_rate": 0.00014295837898645658, "loss": 0.7521, "step": 23878 }, { "epoch": 0.6131457614171244, "grad_norm": 0.7421875, "learning_rate": 0.00014295434764898275, "loss": 0.7661, "step": 23879 }, { "epoch": 0.6131714386130462, "grad_norm": 0.71484375, "learning_rate": 0.0001429503162259048, "loss": 0.8971, "step": 23880 }, { "epoch": 0.6131971158089681, "grad_norm": 0.75, "learning_rate": 0.00014294628471723075, "loss": 0.8428, "step": 23881 }, { "epoch": 0.6132227930048899, "grad_norm": 0.7890625, "learning_rate": 0.00014294225312296862, "loss": 0.8838, "step": 23882 }, { "epoch": 0.6132484702008117, "grad_norm": 0.7734375, "learning_rate": 0.0001429382214431265, "loss": 0.7773, "step": 23883 }, { "epoch": 0.6132741473967336, "grad_norm": 0.734375, "learning_rate": 0.00014293418967771234, "loss": 0.7066, "step": 23884 }, { "epoch": 0.6132998245926553, "grad_norm": 0.82421875, "learning_rate": 0.00014293015782673423, "loss": 0.9433, "step": 23885 }, { "epoch": 0.6133255017885771, "grad_norm": 0.75390625, "learning_rate": 0.0001429261258902002, "loss": 0.8607, "step": 23886 }, { "epoch": 0.613351178984499, "grad_norm": 0.703125, "learning_rate": 0.00014292209386811825, "loss": 0.8917, "step": 23887 }, { "epoch": 0.6133768561804208, "grad_norm": 0.79296875, "learning_rate": 0.0001429180617604965, "loss": 0.8175, "step": 23888 }, { "epoch": 0.6134025333763427, "grad_norm": 0.81640625, "learning_rate": 0.00014291402956734289, "loss": 1.0187, "step": 23889 }, { "epoch": 0.6134282105722645, "grad_norm": 0.80859375, "learning_rate": 0.00014290999728866548, "loss": 0.893, "step": 23890 }, { "epoch": 0.6134538877681863, "grad_norm": 0.75390625, "learning_rate": 0.00014290596492447236, "loss": 0.8377, "step": 23891 }, { "epoch": 0.613479564964108, "grad_norm": 0.74609375, "learning_rate": 0.00014290193247477152, "loss": 0.8097, "step": 23892 }, { "epoch": 0.6135052421600299, "grad_norm": 0.82421875, "learning_rate": 0.00014289789993957095, "loss": 0.778, "step": 23893 }, { "epoch": 0.6135309193559517, "grad_norm": 0.71875, "learning_rate": 0.0001428938673188788, "loss": 0.8354, "step": 23894 }, { "epoch": 0.6135565965518736, "grad_norm": 0.796875, "learning_rate": 0.00014288983461270305, "loss": 0.8823, "step": 23895 }, { "epoch": 0.6135822737477954, "grad_norm": 0.82421875, "learning_rate": 0.0001428858018210517, "loss": 0.9909, "step": 23896 }, { "epoch": 0.6136079509437172, "grad_norm": 0.7734375, "learning_rate": 0.00014288176894393284, "loss": 0.8594, "step": 23897 }, { "epoch": 0.6136336281396391, "grad_norm": 0.71484375, "learning_rate": 0.00014287773598135447, "loss": 0.8005, "step": 23898 }, { "epoch": 0.6136593053355608, "grad_norm": 0.77734375, "learning_rate": 0.00014287370293332465, "loss": 0.8369, "step": 23899 }, { "epoch": 0.6136849825314826, "grad_norm": 0.76953125, "learning_rate": 0.00014286966979985144, "loss": 0.808, "step": 23900 }, { "epoch": 0.6137106597274045, "grad_norm": 0.8046875, "learning_rate": 0.00014286563658094282, "loss": 0.7828, "step": 23901 }, { "epoch": 0.6137363369233263, "grad_norm": 0.77734375, "learning_rate": 0.0001428616032766069, "loss": 0.7648, "step": 23902 }, { "epoch": 0.6137620141192481, "grad_norm": 0.6953125, "learning_rate": 0.0001428575698868516, "loss": 0.7267, "step": 23903 }, { "epoch": 0.61378769131517, "grad_norm": 0.79296875, "learning_rate": 0.0001428535364116851, "loss": 0.936, "step": 23904 }, { "epoch": 0.6138133685110917, "grad_norm": 0.73828125, "learning_rate": 0.00014284950285111536, "loss": 0.8981, "step": 23905 }, { "epoch": 0.6138390457070135, "grad_norm": 0.7421875, "learning_rate": 0.0001428454692051504, "loss": 0.8234, "step": 23906 }, { "epoch": 0.6138647229029354, "grad_norm": 0.859375, "learning_rate": 0.00014284143547379835, "loss": 0.7524, "step": 23907 }, { "epoch": 0.6138904000988572, "grad_norm": 0.75390625, "learning_rate": 0.00014283740165706713, "loss": 0.9312, "step": 23908 }, { "epoch": 0.613916077294779, "grad_norm": 0.7578125, "learning_rate": 0.0001428333677549649, "loss": 0.8051, "step": 23909 }, { "epoch": 0.6139417544907009, "grad_norm": 0.765625, "learning_rate": 0.00014282933376749958, "loss": 0.995, "step": 23910 }, { "epoch": 0.6139674316866227, "grad_norm": 0.7578125, "learning_rate": 0.00014282529969467927, "loss": 0.9025, "step": 23911 }, { "epoch": 0.6139931088825444, "grad_norm": 0.7109375, "learning_rate": 0.00014282126553651203, "loss": 0.7645, "step": 23912 }, { "epoch": 0.6140187860784663, "grad_norm": 0.78125, "learning_rate": 0.00014281723129300588, "loss": 0.8651, "step": 23913 }, { "epoch": 0.6140444632743881, "grad_norm": 0.7421875, "learning_rate": 0.00014281319696416881, "loss": 0.8193, "step": 23914 }, { "epoch": 0.6140701404703099, "grad_norm": 0.84375, "learning_rate": 0.00014280916255000897, "loss": 0.8387, "step": 23915 }, { "epoch": 0.6140958176662318, "grad_norm": 0.76953125, "learning_rate": 0.00014280512805053429, "loss": 0.7469, "step": 23916 }, { "epoch": 0.6141214948621536, "grad_norm": 0.70703125, "learning_rate": 0.00014280109346575287, "loss": 0.8944, "step": 23917 }, { "epoch": 0.6141471720580755, "grad_norm": 0.75390625, "learning_rate": 0.00014279705879567277, "loss": 0.8032, "step": 23918 }, { "epoch": 0.6141728492539972, "grad_norm": 0.69921875, "learning_rate": 0.00014279302404030192, "loss": 0.8211, "step": 23919 }, { "epoch": 0.614198526449919, "grad_norm": 0.71875, "learning_rate": 0.00014278898919964852, "loss": 0.9442, "step": 23920 }, { "epoch": 0.6142242036458408, "grad_norm": 0.80859375, "learning_rate": 0.00014278495427372053, "loss": 0.8086, "step": 23921 }, { "epoch": 0.6142498808417627, "grad_norm": 0.7265625, "learning_rate": 0.00014278091926252592, "loss": 0.8237, "step": 23922 }, { "epoch": 0.6142755580376845, "grad_norm": 0.75390625, "learning_rate": 0.00014277688416607285, "loss": 0.6878, "step": 23923 }, { "epoch": 0.6143012352336064, "grad_norm": 0.76171875, "learning_rate": 0.00014277284898436926, "loss": 0.8397, "step": 23924 }, { "epoch": 0.6143269124295281, "grad_norm": 0.76953125, "learning_rate": 0.0001427688137174233, "loss": 0.8872, "step": 23925 }, { "epoch": 0.6143525896254499, "grad_norm": 0.75, "learning_rate": 0.00014276477836524295, "loss": 0.7989, "step": 23926 }, { "epoch": 0.6143782668213718, "grad_norm": 0.83203125, "learning_rate": 0.00014276074292783624, "loss": 0.8652, "step": 23927 }, { "epoch": 0.6144039440172936, "grad_norm": 0.7734375, "learning_rate": 0.00014275670740521127, "loss": 0.7569, "step": 23928 }, { "epoch": 0.6144296212132154, "grad_norm": 0.72265625, "learning_rate": 0.00014275267179737603, "loss": 0.7923, "step": 23929 }, { "epoch": 0.6144552984091373, "grad_norm": 0.75, "learning_rate": 0.00014274863610433855, "loss": 0.9358, "step": 23930 }, { "epoch": 0.6144809756050591, "grad_norm": 0.71875, "learning_rate": 0.00014274460032610693, "loss": 0.8424, "step": 23931 }, { "epoch": 0.6145066528009808, "grad_norm": 0.7734375, "learning_rate": 0.0001427405644626892, "loss": 0.8341, "step": 23932 }, { "epoch": 0.6145323299969027, "grad_norm": 0.78515625, "learning_rate": 0.00014273652851409333, "loss": 0.8215, "step": 23933 }, { "epoch": 0.6145580071928245, "grad_norm": 0.8046875, "learning_rate": 0.00014273249248032745, "loss": 0.8093, "step": 23934 }, { "epoch": 0.6145836843887463, "grad_norm": 0.8125, "learning_rate": 0.00014272845636139955, "loss": 0.9178, "step": 23935 }, { "epoch": 0.6146093615846682, "grad_norm": 0.7890625, "learning_rate": 0.00014272442015731774, "loss": 0.8021, "step": 23936 }, { "epoch": 0.61463503878059, "grad_norm": 0.7734375, "learning_rate": 0.00014272038386809, "loss": 0.89, "step": 23937 }, { "epoch": 0.6146607159765118, "grad_norm": 0.81640625, "learning_rate": 0.00014271634749372437, "loss": 0.7541, "step": 23938 }, { "epoch": 0.6146863931724336, "grad_norm": 0.875, "learning_rate": 0.00014271231103422892, "loss": 0.812, "step": 23939 }, { "epoch": 0.6147120703683554, "grad_norm": 0.91796875, "learning_rate": 0.00014270827448961172, "loss": 0.9719, "step": 23940 }, { "epoch": 0.6147377475642772, "grad_norm": 0.8828125, "learning_rate": 0.00014270423785988075, "loss": 0.7722, "step": 23941 }, { "epoch": 0.6147634247601991, "grad_norm": 0.765625, "learning_rate": 0.00014270020114504414, "loss": 0.9643, "step": 23942 }, { "epoch": 0.6147891019561209, "grad_norm": 0.828125, "learning_rate": 0.00014269616434510986, "loss": 0.844, "step": 23943 }, { "epoch": 0.6148147791520427, "grad_norm": 0.703125, "learning_rate": 0.00014269212746008598, "loss": 0.7306, "step": 23944 }, { "epoch": 0.6148404563479645, "grad_norm": 0.69921875, "learning_rate": 0.00014268809048998055, "loss": 0.8183, "step": 23945 }, { "epoch": 0.6148661335438863, "grad_norm": 0.80078125, "learning_rate": 0.0001426840534348016, "loss": 0.7511, "step": 23946 }, { "epoch": 0.6148918107398081, "grad_norm": 0.83984375, "learning_rate": 0.00014268001629455724, "loss": 0.9205, "step": 23947 }, { "epoch": 0.61491748793573, "grad_norm": 0.81640625, "learning_rate": 0.0001426759790692554, "loss": 0.7823, "step": 23948 }, { "epoch": 0.6149431651316518, "grad_norm": 0.8203125, "learning_rate": 0.00014267194175890418, "loss": 0.8354, "step": 23949 }, { "epoch": 0.6149688423275737, "grad_norm": 0.77734375, "learning_rate": 0.0001426679043635117, "loss": 0.8542, "step": 23950 }, { "epoch": 0.6149945195234955, "grad_norm": 0.7421875, "learning_rate": 0.0001426638668830859, "loss": 0.8327, "step": 23951 }, { "epoch": 0.6150201967194172, "grad_norm": 0.80859375, "learning_rate": 0.00014265982931763483, "loss": 0.9049, "step": 23952 }, { "epoch": 0.615045873915339, "grad_norm": 0.73828125, "learning_rate": 0.00014265579166716666, "loss": 0.7834, "step": 23953 }, { "epoch": 0.6150715511112609, "grad_norm": 0.80859375, "learning_rate": 0.0001426517539316893, "loss": 0.9434, "step": 23954 }, { "epoch": 0.6150972283071827, "grad_norm": 0.734375, "learning_rate": 0.00014264771611121086, "loss": 0.8221, "step": 23955 }, { "epoch": 0.6151229055031046, "grad_norm": 0.7421875, "learning_rate": 0.00014264367820573934, "loss": 0.8473, "step": 23956 }, { "epoch": 0.6151485826990264, "grad_norm": 0.8125, "learning_rate": 0.00014263964021528287, "loss": 0.8623, "step": 23957 }, { "epoch": 0.6151742598949482, "grad_norm": 0.77734375, "learning_rate": 0.0001426356021398494, "loss": 0.952, "step": 23958 }, { "epoch": 0.61519993709087, "grad_norm": 0.74609375, "learning_rate": 0.00014263156397944707, "loss": 0.8413, "step": 23959 }, { "epoch": 0.6152256142867918, "grad_norm": 0.75390625, "learning_rate": 0.00014262752573408386, "loss": 0.7085, "step": 23960 }, { "epoch": 0.6152512914827136, "grad_norm": 0.7734375, "learning_rate": 0.00014262348740376785, "loss": 0.8494, "step": 23961 }, { "epoch": 0.6152769686786355, "grad_norm": 0.875, "learning_rate": 0.0001426194489885071, "loss": 0.8497, "step": 23962 }, { "epoch": 0.6153026458745573, "grad_norm": 0.78125, "learning_rate": 0.0001426154104883096, "loss": 0.86, "step": 23963 }, { "epoch": 0.6153283230704791, "grad_norm": 0.796875, "learning_rate": 0.00014261137190318347, "loss": 1.0108, "step": 23964 }, { "epoch": 0.6153540002664009, "grad_norm": 0.6953125, "learning_rate": 0.0001426073332331367, "loss": 0.7511, "step": 23965 }, { "epoch": 0.6153796774623227, "grad_norm": 2.734375, "learning_rate": 0.00014260329447817735, "loss": 0.863, "step": 23966 }, { "epoch": 0.6154053546582445, "grad_norm": 0.7578125, "learning_rate": 0.0001425992556383135, "loss": 0.7872, "step": 23967 }, { "epoch": 0.6154310318541664, "grad_norm": 0.83203125, "learning_rate": 0.00014259521671355322, "loss": 0.8755, "step": 23968 }, { "epoch": 0.6154567090500882, "grad_norm": 0.80859375, "learning_rate": 0.00014259117770390448, "loss": 0.9195, "step": 23969 }, { "epoch": 0.61548238624601, "grad_norm": 0.796875, "learning_rate": 0.00014258713860937537, "loss": 0.9195, "step": 23970 }, { "epoch": 0.6155080634419319, "grad_norm": 0.8046875, "learning_rate": 0.00014258309942997394, "loss": 0.754, "step": 23971 }, { "epoch": 0.6155337406378536, "grad_norm": 0.796875, "learning_rate": 0.00014257906016570824, "loss": 0.7847, "step": 23972 }, { "epoch": 0.6155594178337754, "grad_norm": 0.78515625, "learning_rate": 0.0001425750208165863, "loss": 0.8725, "step": 23973 }, { "epoch": 0.6155850950296973, "grad_norm": 0.734375, "learning_rate": 0.00014257098138261627, "loss": 0.8409, "step": 23974 }, { "epoch": 0.6156107722256191, "grad_norm": 0.78515625, "learning_rate": 0.00014256694186380603, "loss": 0.8206, "step": 23975 }, { "epoch": 0.6156364494215409, "grad_norm": 0.7890625, "learning_rate": 0.00014256290226016377, "loss": 0.8796, "step": 23976 }, { "epoch": 0.6156621266174628, "grad_norm": 0.7421875, "learning_rate": 0.00014255886257169749, "loss": 0.7652, "step": 23977 }, { "epoch": 0.6156878038133845, "grad_norm": 0.7734375, "learning_rate": 0.0001425548227984152, "loss": 0.7988, "step": 23978 }, { "epoch": 0.6157134810093063, "grad_norm": 0.78515625, "learning_rate": 0.00014255078294032504, "loss": 0.9619, "step": 23979 }, { "epoch": 0.6157391582052282, "grad_norm": 0.7578125, "learning_rate": 0.00014254674299743497, "loss": 0.9292, "step": 23980 }, { "epoch": 0.61576483540115, "grad_norm": 0.74609375, "learning_rate": 0.0001425427029697531, "loss": 0.857, "step": 23981 }, { "epoch": 0.6157905125970718, "grad_norm": 0.8515625, "learning_rate": 0.0001425386628572875, "loss": 0.9754, "step": 23982 }, { "epoch": 0.6158161897929937, "grad_norm": 0.74609375, "learning_rate": 0.00014253462266004612, "loss": 0.8452, "step": 23983 }, { "epoch": 0.6158418669889155, "grad_norm": 0.734375, "learning_rate": 0.00014253058237803713, "loss": 0.7938, "step": 23984 }, { "epoch": 0.6158675441848372, "grad_norm": 0.77734375, "learning_rate": 0.00014252654201126852, "loss": 0.7936, "step": 23985 }, { "epoch": 0.6158932213807591, "grad_norm": 0.7421875, "learning_rate": 0.00014252250155974837, "loss": 0.8302, "step": 23986 }, { "epoch": 0.6159188985766809, "grad_norm": 0.69140625, "learning_rate": 0.0001425184610234847, "loss": 0.7964, "step": 23987 }, { "epoch": 0.6159445757726028, "grad_norm": 0.77734375, "learning_rate": 0.00014251442040248555, "loss": 0.9662, "step": 23988 }, { "epoch": 0.6159702529685246, "grad_norm": 0.80078125, "learning_rate": 0.00014251037969675902, "loss": 0.8869, "step": 23989 }, { "epoch": 0.6159959301644464, "grad_norm": 0.78125, "learning_rate": 0.00014250633890631318, "loss": 0.9177, "step": 23990 }, { "epoch": 0.6160216073603683, "grad_norm": 0.79296875, "learning_rate": 0.000142502298031156, "loss": 0.9413, "step": 23991 }, { "epoch": 0.61604728455629, "grad_norm": 0.76171875, "learning_rate": 0.00014249825707129558, "loss": 0.754, "step": 23992 }, { "epoch": 0.6160729617522118, "grad_norm": 0.76953125, "learning_rate": 0.00014249421602674, "loss": 0.823, "step": 23993 }, { "epoch": 0.6160986389481337, "grad_norm": 0.796875, "learning_rate": 0.00014249017489749728, "loss": 0.9117, "step": 23994 }, { "epoch": 0.6161243161440555, "grad_norm": 0.75390625, "learning_rate": 0.00014248613368357546, "loss": 0.8413, "step": 23995 }, { "epoch": 0.6161499933399773, "grad_norm": 0.734375, "learning_rate": 0.00014248209238498262, "loss": 0.7889, "step": 23996 }, { "epoch": 0.6161756705358992, "grad_norm": 0.74609375, "learning_rate": 0.00014247805100172682, "loss": 0.7985, "step": 23997 }, { "epoch": 0.6162013477318209, "grad_norm": 0.7734375, "learning_rate": 0.00014247400953381608, "loss": 0.8393, "step": 23998 }, { "epoch": 0.6162270249277427, "grad_norm": 0.78515625, "learning_rate": 0.00014246996798125848, "loss": 0.8489, "step": 23999 }, { "epoch": 0.6162527021236646, "grad_norm": 0.73828125, "learning_rate": 0.0001424659263440621, "loss": 0.7069, "step": 24000 }, { "epoch": 0.6162527021236646, "eval_loss": 0.8535081744194031, "eval_runtime": 388.1277, "eval_samples_per_second": 25.765, "eval_steps_per_second": 0.806, "step": 24000 }, { "epoch": 0.6162783793195864, "grad_norm": 0.7265625, "learning_rate": 0.00014246188462223492, "loss": 0.955, "step": 24001 }, { "epoch": 0.6163040565155082, "grad_norm": 0.73046875, "learning_rate": 0.0001424578428157851, "loss": 0.8169, "step": 24002 }, { "epoch": 0.6163297337114301, "grad_norm": 0.75, "learning_rate": 0.00014245380092472057, "loss": 0.8568, "step": 24003 }, { "epoch": 0.6163554109073519, "grad_norm": 0.796875, "learning_rate": 0.00014244975894904947, "loss": 0.7798, "step": 24004 }, { "epoch": 0.6163810881032736, "grad_norm": 0.71875, "learning_rate": 0.00014244571688877986, "loss": 0.8201, "step": 24005 }, { "epoch": 0.6164067652991955, "grad_norm": 0.81640625, "learning_rate": 0.00014244167474391977, "loss": 0.9603, "step": 24006 }, { "epoch": 0.6164324424951173, "grad_norm": 0.8359375, "learning_rate": 0.00014243763251447724, "loss": 0.8757, "step": 24007 }, { "epoch": 0.6164581196910391, "grad_norm": 0.765625, "learning_rate": 0.00014243359020046033, "loss": 0.786, "step": 24008 }, { "epoch": 0.616483796886961, "grad_norm": 0.828125, "learning_rate": 0.00014242954780187712, "loss": 1.0145, "step": 24009 }, { "epoch": 0.6165094740828828, "grad_norm": 0.75390625, "learning_rate": 0.00014242550531873565, "loss": 0.8242, "step": 24010 }, { "epoch": 0.6165351512788046, "grad_norm": 0.68359375, "learning_rate": 0.00014242146275104397, "loss": 0.7463, "step": 24011 }, { "epoch": 0.6165608284747264, "grad_norm": 0.734375, "learning_rate": 0.00014241742009881017, "loss": 0.9347, "step": 24012 }, { "epoch": 0.6165865056706482, "grad_norm": 0.77734375, "learning_rate": 0.00014241337736204228, "loss": 0.9205, "step": 24013 }, { "epoch": 0.61661218286657, "grad_norm": 0.68359375, "learning_rate": 0.00014240933454074837, "loss": 0.9183, "step": 24014 }, { "epoch": 0.6166378600624919, "grad_norm": 0.8046875, "learning_rate": 0.00014240529163493646, "loss": 0.8968, "step": 24015 }, { "epoch": 0.6166635372584137, "grad_norm": 0.7890625, "learning_rate": 0.00014240124864461464, "loss": 1.0386, "step": 24016 }, { "epoch": 0.6166892144543356, "grad_norm": 0.828125, "learning_rate": 0.00014239720556979098, "loss": 0.9445, "step": 24017 }, { "epoch": 0.6167148916502573, "grad_norm": 0.80859375, "learning_rate": 0.00014239316241047346, "loss": 0.9104, "step": 24018 }, { "epoch": 0.6167405688461791, "grad_norm": 0.7890625, "learning_rate": 0.00014238911916667025, "loss": 0.8307, "step": 24019 }, { "epoch": 0.616766246042101, "grad_norm": 0.96484375, "learning_rate": 0.00014238507583838936, "loss": 0.8825, "step": 24020 }, { "epoch": 0.6167919232380228, "grad_norm": 0.78125, "learning_rate": 0.00014238103242563883, "loss": 0.785, "step": 24021 }, { "epoch": 0.6168176004339446, "grad_norm": 0.71484375, "learning_rate": 0.00014237698892842674, "loss": 0.9341, "step": 24022 }, { "epoch": 0.6168432776298665, "grad_norm": 0.75, "learning_rate": 0.00014237294534676112, "loss": 0.7622, "step": 24023 }, { "epoch": 0.6168689548257883, "grad_norm": 0.828125, "learning_rate": 0.00014236890168065004, "loss": 0.8937, "step": 24024 }, { "epoch": 0.61689463202171, "grad_norm": 0.76953125, "learning_rate": 0.0001423648579301016, "loss": 0.727, "step": 24025 }, { "epoch": 0.6169203092176319, "grad_norm": 0.76171875, "learning_rate": 0.00014236081409512382, "loss": 0.9327, "step": 24026 }, { "epoch": 0.6169459864135537, "grad_norm": 0.8203125, "learning_rate": 0.00014235677017572474, "loss": 0.8425, "step": 24027 }, { "epoch": 0.6169716636094755, "grad_norm": 0.796875, "learning_rate": 0.00014235272617191245, "loss": 0.8411, "step": 24028 }, { "epoch": 0.6169973408053974, "grad_norm": 0.76953125, "learning_rate": 0.000142348682083695, "loss": 0.943, "step": 24029 }, { "epoch": 0.6170230180013192, "grad_norm": 0.6640625, "learning_rate": 0.00014234463791108046, "loss": 0.763, "step": 24030 }, { "epoch": 0.617048695197241, "grad_norm": 0.71875, "learning_rate": 0.00014234059365407685, "loss": 0.8596, "step": 24031 }, { "epoch": 0.6170743723931628, "grad_norm": 0.7578125, "learning_rate": 0.00014233654931269233, "loss": 0.8665, "step": 24032 }, { "epoch": 0.6171000495890846, "grad_norm": 0.78515625, "learning_rate": 0.00014233250488693484, "loss": 0.7613, "step": 24033 }, { "epoch": 0.6171257267850064, "grad_norm": 0.78515625, "learning_rate": 0.00014232846037681252, "loss": 0.8755, "step": 24034 }, { "epoch": 0.6171514039809283, "grad_norm": 0.75390625, "learning_rate": 0.00014232441578233338, "loss": 0.8267, "step": 24035 }, { "epoch": 0.6171770811768501, "grad_norm": 0.88671875, "learning_rate": 0.0001423203711035055, "loss": 0.8681, "step": 24036 }, { "epoch": 0.6172027583727719, "grad_norm": 0.7890625, "learning_rate": 0.00014231632634033697, "loss": 0.7878, "step": 24037 }, { "epoch": 0.6172284355686937, "grad_norm": 0.7734375, "learning_rate": 0.00014231228149283578, "loss": 0.7909, "step": 24038 }, { "epoch": 0.6172541127646155, "grad_norm": 0.765625, "learning_rate": 0.00014230823656101007, "loss": 0.9142, "step": 24039 }, { "epoch": 0.6172797899605373, "grad_norm": 0.81640625, "learning_rate": 0.00014230419154486788, "loss": 0.8563, "step": 24040 }, { "epoch": 0.6173054671564592, "grad_norm": 0.71875, "learning_rate": 0.0001423001464444172, "loss": 0.7778, "step": 24041 }, { "epoch": 0.617331144352381, "grad_norm": 0.8125, "learning_rate": 0.0001422961012596662, "loss": 0.9206, "step": 24042 }, { "epoch": 0.6173568215483028, "grad_norm": 0.69921875, "learning_rate": 0.00014229205599062286, "loss": 0.8704, "step": 24043 }, { "epoch": 0.6173824987442247, "grad_norm": 0.7421875, "learning_rate": 0.0001422880106372953, "loss": 0.9154, "step": 24044 }, { "epoch": 0.6174081759401464, "grad_norm": 0.78125, "learning_rate": 0.00014228396519969154, "loss": 0.9271, "step": 24045 }, { "epoch": 0.6174338531360682, "grad_norm": 0.74609375, "learning_rate": 0.0001422799196778197, "loss": 0.8856, "step": 24046 }, { "epoch": 0.6174595303319901, "grad_norm": 0.765625, "learning_rate": 0.00014227587407168773, "loss": 0.872, "step": 24047 }, { "epoch": 0.6174852075279119, "grad_norm": 0.7734375, "learning_rate": 0.00014227182838130379, "loss": 0.9717, "step": 24048 }, { "epoch": 0.6175108847238338, "grad_norm": 0.80078125, "learning_rate": 0.00014226778260667587, "loss": 0.9607, "step": 24049 }, { "epoch": 0.6175365619197556, "grad_norm": 0.765625, "learning_rate": 0.00014226373674781215, "loss": 0.9681, "step": 24050 }, { "epoch": 0.6175622391156774, "grad_norm": 0.74609375, "learning_rate": 0.0001422596908047206, "loss": 0.9095, "step": 24051 }, { "epoch": 0.6175879163115991, "grad_norm": 0.765625, "learning_rate": 0.00014225564477740927, "loss": 0.7881, "step": 24052 }, { "epoch": 0.617613593507521, "grad_norm": 0.74609375, "learning_rate": 0.00014225159866588632, "loss": 0.8113, "step": 24053 }, { "epoch": 0.6176392707034428, "grad_norm": 0.91796875, "learning_rate": 0.0001422475524701597, "loss": 1.0303, "step": 24054 }, { "epoch": 0.6176649478993647, "grad_norm": 0.76171875, "learning_rate": 0.00014224350619023753, "loss": 0.8097, "step": 24055 }, { "epoch": 0.6176906250952865, "grad_norm": 0.7421875, "learning_rate": 0.00014223945982612786, "loss": 0.8139, "step": 24056 }, { "epoch": 0.6177163022912083, "grad_norm": 0.72265625, "learning_rate": 0.0001422354133778388, "loss": 0.7276, "step": 24057 }, { "epoch": 0.61774197948713, "grad_norm": 0.7421875, "learning_rate": 0.00014223136684537833, "loss": 0.9204, "step": 24058 }, { "epoch": 0.6177676566830519, "grad_norm": 0.7890625, "learning_rate": 0.0001422273202287546, "loss": 0.8448, "step": 24059 }, { "epoch": 0.6177933338789737, "grad_norm": 0.671875, "learning_rate": 0.0001422232735279756, "loss": 0.7436, "step": 24060 }, { "epoch": 0.6178190110748956, "grad_norm": 0.80859375, "learning_rate": 0.00014221922674304945, "loss": 0.8206, "step": 24061 }, { "epoch": 0.6178446882708174, "grad_norm": 0.8359375, "learning_rate": 0.0001422151798739842, "loss": 0.8919, "step": 24062 }, { "epoch": 0.6178703654667392, "grad_norm": 0.7734375, "learning_rate": 0.0001422111329207879, "loss": 0.8378, "step": 24063 }, { "epoch": 0.6178960426626611, "grad_norm": 0.796875, "learning_rate": 0.00014220708588346862, "loss": 0.8014, "step": 24064 }, { "epoch": 0.6179217198585828, "grad_norm": 0.73046875, "learning_rate": 0.00014220303876203444, "loss": 0.7597, "step": 24065 }, { "epoch": 0.6179473970545046, "grad_norm": 0.75, "learning_rate": 0.00014219899155649344, "loss": 0.6869, "step": 24066 }, { "epoch": 0.6179730742504265, "grad_norm": 0.7734375, "learning_rate": 0.00014219494426685362, "loss": 0.7804, "step": 24067 }, { "epoch": 0.6179987514463483, "grad_norm": 0.796875, "learning_rate": 0.0001421908968931231, "loss": 0.8078, "step": 24068 }, { "epoch": 0.6180244286422701, "grad_norm": 0.796875, "learning_rate": 0.00014218684943530994, "loss": 0.8935, "step": 24069 }, { "epoch": 0.618050105838192, "grad_norm": 0.8125, "learning_rate": 0.0001421828018934222, "loss": 0.8353, "step": 24070 }, { "epoch": 0.6180757830341138, "grad_norm": 0.82421875, "learning_rate": 0.00014217875426746795, "loss": 0.8178, "step": 24071 }, { "epoch": 0.6181014602300355, "grad_norm": 0.78125, "learning_rate": 0.00014217470655745524, "loss": 0.8116, "step": 24072 }, { "epoch": 0.6181271374259574, "grad_norm": 0.75, "learning_rate": 0.00014217065876339215, "loss": 0.7541, "step": 24073 }, { "epoch": 0.6181528146218792, "grad_norm": 0.7890625, "learning_rate": 0.00014216661088528677, "loss": 0.8726, "step": 24074 }, { "epoch": 0.618178491817801, "grad_norm": 0.7734375, "learning_rate": 0.0001421625629231471, "loss": 0.8922, "step": 24075 }, { "epoch": 0.6182041690137229, "grad_norm": 0.78125, "learning_rate": 0.00014215851487698128, "loss": 0.9765, "step": 24076 }, { "epoch": 0.6182298462096447, "grad_norm": 0.7734375, "learning_rate": 0.00014215446674679732, "loss": 0.876, "step": 24077 }, { "epoch": 0.6182555234055664, "grad_norm": 0.75390625, "learning_rate": 0.0001421504185326034, "loss": 0.8905, "step": 24078 }, { "epoch": 0.6182812006014883, "grad_norm": 0.765625, "learning_rate": 0.00014214637023440743, "loss": 0.7847, "step": 24079 }, { "epoch": 0.6183068777974101, "grad_norm": 0.7734375, "learning_rate": 0.00014214232185221753, "loss": 0.8508, "step": 24080 }, { "epoch": 0.618332554993332, "grad_norm": 0.77734375, "learning_rate": 0.00014213827338604182, "loss": 0.9267, "step": 24081 }, { "epoch": 0.6183582321892538, "grad_norm": 0.69921875, "learning_rate": 0.00014213422483588836, "loss": 0.7335, "step": 24082 }, { "epoch": 0.6183839093851756, "grad_norm": 0.78125, "learning_rate": 0.0001421301762017652, "loss": 0.823, "step": 24083 }, { "epoch": 0.6184095865810975, "grad_norm": 0.71875, "learning_rate": 0.00014212612748368034, "loss": 0.6867, "step": 24084 }, { "epoch": 0.6184352637770192, "grad_norm": 0.7734375, "learning_rate": 0.000142122078681642, "loss": 0.8424, "step": 24085 }, { "epoch": 0.618460940972941, "grad_norm": 0.84765625, "learning_rate": 0.0001421180297956581, "loss": 0.85, "step": 24086 }, { "epoch": 0.6184866181688629, "grad_norm": 0.7421875, "learning_rate": 0.00014211398082573676, "loss": 0.7758, "step": 24087 }, { "epoch": 0.6185122953647847, "grad_norm": 0.8203125, "learning_rate": 0.0001421099317718861, "loss": 0.8025, "step": 24088 }, { "epoch": 0.6185379725607065, "grad_norm": 0.66015625, "learning_rate": 0.00014210588263411413, "loss": 0.7958, "step": 24089 }, { "epoch": 0.6185636497566284, "grad_norm": 0.90234375, "learning_rate": 0.00014210183341242894, "loss": 0.9067, "step": 24090 }, { "epoch": 0.6185893269525502, "grad_norm": 0.80078125, "learning_rate": 0.00014209778410683864, "loss": 0.9677, "step": 24091 }, { "epoch": 0.6186150041484719, "grad_norm": 0.77734375, "learning_rate": 0.0001420937347173512, "loss": 0.7912, "step": 24092 }, { "epoch": 0.6186406813443938, "grad_norm": 0.75390625, "learning_rate": 0.0001420896852439748, "loss": 0.9731, "step": 24093 }, { "epoch": 0.6186663585403156, "grad_norm": 0.796875, "learning_rate": 0.00014208563568671742, "loss": 0.8258, "step": 24094 }, { "epoch": 0.6186920357362374, "grad_norm": 0.75390625, "learning_rate": 0.00014208158604558718, "loss": 0.7907, "step": 24095 }, { "epoch": 0.6187177129321593, "grad_norm": 0.71875, "learning_rate": 0.00014207753632059213, "loss": 0.7692, "step": 24096 }, { "epoch": 0.6187433901280811, "grad_norm": 0.72265625, "learning_rate": 0.00014207348651174034, "loss": 0.7722, "step": 24097 }, { "epoch": 0.6187690673240028, "grad_norm": 0.66015625, "learning_rate": 0.00014206943661903993, "loss": 0.8462, "step": 24098 }, { "epoch": 0.6187947445199247, "grad_norm": 0.7421875, "learning_rate": 0.00014206538664249894, "loss": 0.9011, "step": 24099 }, { "epoch": 0.6188204217158465, "grad_norm": 0.76171875, "learning_rate": 0.00014206133658212542, "loss": 0.8164, "step": 24100 }, { "epoch": 0.6188460989117683, "grad_norm": 0.76171875, "learning_rate": 0.00014205728643792745, "loss": 0.8746, "step": 24101 }, { "epoch": 0.6188717761076902, "grad_norm": 0.7890625, "learning_rate": 0.0001420532362099131, "loss": 0.8844, "step": 24102 }, { "epoch": 0.618897453303612, "grad_norm": 0.7890625, "learning_rate": 0.00014204918589809045, "loss": 0.8886, "step": 24103 }, { "epoch": 0.6189231304995338, "grad_norm": 0.796875, "learning_rate": 0.00014204513550246758, "loss": 0.7275, "step": 24104 }, { "epoch": 0.6189488076954556, "grad_norm": 0.76953125, "learning_rate": 0.00014204108502305254, "loss": 0.8238, "step": 24105 }, { "epoch": 0.6189744848913774, "grad_norm": 0.8125, "learning_rate": 0.00014203703445985342, "loss": 1.0573, "step": 24106 }, { "epoch": 0.6190001620872992, "grad_norm": 0.75, "learning_rate": 0.0001420329838128783, "loss": 0.909, "step": 24107 }, { "epoch": 0.6190258392832211, "grad_norm": 0.76953125, "learning_rate": 0.0001420289330821352, "loss": 0.6738, "step": 24108 }, { "epoch": 0.6190515164791429, "grad_norm": 0.79296875, "learning_rate": 0.00014202488226763228, "loss": 0.6968, "step": 24109 }, { "epoch": 0.6190771936750648, "grad_norm": 0.86328125, "learning_rate": 0.00014202083136937756, "loss": 0.7834, "step": 24110 }, { "epoch": 0.6191028708709866, "grad_norm": 0.7109375, "learning_rate": 0.00014201678038737908, "loss": 0.7251, "step": 24111 }, { "epoch": 0.6191285480669083, "grad_norm": 0.7890625, "learning_rate": 0.00014201272932164502, "loss": 0.8858, "step": 24112 }, { "epoch": 0.6191542252628301, "grad_norm": 0.74609375, "learning_rate": 0.00014200867817218334, "loss": 0.8791, "step": 24113 }, { "epoch": 0.619179902458752, "grad_norm": 0.8046875, "learning_rate": 0.00014200462693900217, "loss": 0.9492, "step": 24114 }, { "epoch": 0.6192055796546738, "grad_norm": 0.7734375, "learning_rate": 0.00014200057562210956, "loss": 0.852, "step": 24115 }, { "epoch": 0.6192312568505957, "grad_norm": 0.78515625, "learning_rate": 0.00014199652422151358, "loss": 0.8957, "step": 24116 }, { "epoch": 0.6192569340465175, "grad_norm": 0.76171875, "learning_rate": 0.00014199247273722233, "loss": 0.8908, "step": 24117 }, { "epoch": 0.6192826112424392, "grad_norm": 0.75, "learning_rate": 0.0001419884211692439, "loss": 0.8311, "step": 24118 }, { "epoch": 0.619308288438361, "grad_norm": 0.80078125, "learning_rate": 0.00014198436951758634, "loss": 0.9164, "step": 24119 }, { "epoch": 0.6193339656342829, "grad_norm": 0.734375, "learning_rate": 0.00014198031778225772, "loss": 0.8523, "step": 24120 }, { "epoch": 0.6193596428302047, "grad_norm": 0.8515625, "learning_rate": 0.0001419762659632661, "loss": 0.8844, "step": 24121 }, { "epoch": 0.6193853200261266, "grad_norm": 0.73828125, "learning_rate": 0.00014197221406061957, "loss": 0.8315, "step": 24122 }, { "epoch": 0.6194109972220484, "grad_norm": 0.79296875, "learning_rate": 0.00014196816207432623, "loss": 0.7732, "step": 24123 }, { "epoch": 0.6194366744179702, "grad_norm": 0.78515625, "learning_rate": 0.00014196411000439414, "loss": 0.869, "step": 24124 }, { "epoch": 0.619462351613892, "grad_norm": 0.78515625, "learning_rate": 0.0001419600578508314, "loss": 0.901, "step": 24125 }, { "epoch": 0.6194880288098138, "grad_norm": 0.75390625, "learning_rate": 0.00014195600561364596, "loss": 0.8406, "step": 24126 }, { "epoch": 0.6195137060057356, "grad_norm": 0.8046875, "learning_rate": 0.00014195195329284605, "loss": 0.896, "step": 24127 }, { "epoch": 0.6195393832016575, "grad_norm": 0.79296875, "learning_rate": 0.0001419479008884397, "loss": 0.9681, "step": 24128 }, { "epoch": 0.6195650603975793, "grad_norm": 0.83203125, "learning_rate": 0.00014194384840043493, "loss": 0.9114, "step": 24129 }, { "epoch": 0.6195907375935011, "grad_norm": 0.75390625, "learning_rate": 0.0001419397958288399, "loss": 0.9282, "step": 24130 }, { "epoch": 0.619616414789423, "grad_norm": 0.81640625, "learning_rate": 0.00014193574317366263, "loss": 1.0562, "step": 24131 }, { "epoch": 0.6196420919853447, "grad_norm": 0.796875, "learning_rate": 0.00014193169043491118, "loss": 1.0004, "step": 24132 }, { "epoch": 0.6196677691812665, "grad_norm": 0.76953125, "learning_rate": 0.00014192763761259372, "loss": 0.8772, "step": 24133 }, { "epoch": 0.6196934463771884, "grad_norm": 0.76171875, "learning_rate": 0.0001419235847067182, "loss": 0.8123, "step": 24134 }, { "epoch": 0.6197191235731102, "grad_norm": 0.75, "learning_rate": 0.00014191953171729282, "loss": 0.7759, "step": 24135 }, { "epoch": 0.619744800769032, "grad_norm": 0.76171875, "learning_rate": 0.00014191547864432556, "loss": 0.7693, "step": 24136 }, { "epoch": 0.6197704779649539, "grad_norm": 0.734375, "learning_rate": 0.00014191142548782454, "loss": 0.7583, "step": 24137 }, { "epoch": 0.6197961551608756, "grad_norm": 0.72265625, "learning_rate": 0.0001419073722477979, "loss": 0.871, "step": 24138 }, { "epoch": 0.6198218323567974, "grad_norm": 0.8046875, "learning_rate": 0.00014190331892425357, "loss": 0.6903, "step": 24139 }, { "epoch": 0.6198475095527193, "grad_norm": 0.7890625, "learning_rate": 0.00014189926551719974, "loss": 0.847, "step": 24140 }, { "epoch": 0.6198731867486411, "grad_norm": 0.7265625, "learning_rate": 0.00014189521202664446, "loss": 0.7369, "step": 24141 }, { "epoch": 0.619898863944563, "grad_norm": 0.80859375, "learning_rate": 0.0001418911584525958, "loss": 0.979, "step": 24142 }, { "epoch": 0.6199245411404848, "grad_norm": 0.77734375, "learning_rate": 0.00014188710479506183, "loss": 0.8958, "step": 24143 }, { "epoch": 0.6199502183364066, "grad_norm": 0.75, "learning_rate": 0.00014188305105405067, "loss": 0.8082, "step": 24144 }, { "epoch": 0.6199758955323283, "grad_norm": 0.77734375, "learning_rate": 0.0001418789972295704, "loss": 0.7373, "step": 24145 }, { "epoch": 0.6200015727282502, "grad_norm": 0.7734375, "learning_rate": 0.00014187494332162904, "loss": 0.8214, "step": 24146 }, { "epoch": 0.620027249924172, "grad_norm": 0.7578125, "learning_rate": 0.0001418708893302347, "loss": 0.8463, "step": 24147 }, { "epoch": 0.6200529271200939, "grad_norm": 0.796875, "learning_rate": 0.00014186683525539543, "loss": 0.7425, "step": 24148 }, { "epoch": 0.6200786043160157, "grad_norm": 0.76953125, "learning_rate": 0.00014186278109711936, "loss": 0.8083, "step": 24149 }, { "epoch": 0.6201042815119375, "grad_norm": 0.734375, "learning_rate": 0.00014185872685541456, "loss": 0.922, "step": 24150 }, { "epoch": 0.6201299587078594, "grad_norm": 0.8046875, "learning_rate": 0.00014185467253028912, "loss": 0.8633, "step": 24151 }, { "epoch": 0.6201556359037811, "grad_norm": 0.8359375, "learning_rate": 0.00014185061812175105, "loss": 0.9772, "step": 24152 }, { "epoch": 0.6201813130997029, "grad_norm": 0.734375, "learning_rate": 0.0001418465636298085, "loss": 0.8045, "step": 24153 }, { "epoch": 0.6202069902956248, "grad_norm": 0.83984375, "learning_rate": 0.00014184250905446955, "loss": 0.8826, "step": 24154 }, { "epoch": 0.6202326674915466, "grad_norm": 0.80859375, "learning_rate": 0.00014183845439574223, "loss": 1.018, "step": 24155 }, { "epoch": 0.6202583446874684, "grad_norm": 0.78515625, "learning_rate": 0.00014183439965363465, "loss": 0.7751, "step": 24156 }, { "epoch": 0.6202840218833903, "grad_norm": 0.75, "learning_rate": 0.00014183034482815492, "loss": 0.8843, "step": 24157 }, { "epoch": 0.620309699079312, "grad_norm": 0.796875, "learning_rate": 0.00014182628991931106, "loss": 0.8724, "step": 24158 }, { "epoch": 0.6203353762752338, "grad_norm": 0.81640625, "learning_rate": 0.0001418222349271112, "loss": 0.9328, "step": 24159 }, { "epoch": 0.6203610534711557, "grad_norm": 0.76171875, "learning_rate": 0.0001418181798515634, "loss": 0.8166, "step": 24160 }, { "epoch": 0.6203867306670775, "grad_norm": 0.7578125, "learning_rate": 0.0001418141246926757, "loss": 0.7853, "step": 24161 }, { "epoch": 0.6204124078629993, "grad_norm": 0.7421875, "learning_rate": 0.00014181006945045632, "loss": 0.8433, "step": 24162 }, { "epoch": 0.6204380850589212, "grad_norm": 0.73828125, "learning_rate": 0.0001418060141249132, "loss": 0.8394, "step": 24163 }, { "epoch": 0.620463762254843, "grad_norm": 0.9921875, "learning_rate": 0.00014180195871605445, "loss": 0.9202, "step": 24164 }, { "epoch": 0.6204894394507647, "grad_norm": 0.71484375, "learning_rate": 0.0001417979032238882, "loss": 0.8508, "step": 24165 }, { "epoch": 0.6205151166466866, "grad_norm": 0.82421875, "learning_rate": 0.00014179384764842248, "loss": 0.8218, "step": 24166 }, { "epoch": 0.6205407938426084, "grad_norm": 0.7578125, "learning_rate": 0.0001417897919896654, "loss": 0.922, "step": 24167 }, { "epoch": 0.6205664710385302, "grad_norm": 0.734375, "learning_rate": 0.00014178573624762505, "loss": 0.7886, "step": 24168 }, { "epoch": 0.6205921482344521, "grad_norm": 0.7421875, "learning_rate": 0.0001417816804223095, "loss": 0.8247, "step": 24169 }, { "epoch": 0.6206178254303739, "grad_norm": 0.79296875, "learning_rate": 0.00014177762451372686, "loss": 0.9198, "step": 24170 }, { "epoch": 0.6206435026262956, "grad_norm": 0.7421875, "learning_rate": 0.00014177356852188518, "loss": 0.8279, "step": 24171 }, { "epoch": 0.6206691798222175, "grad_norm": 0.7421875, "learning_rate": 0.0001417695124467925, "loss": 0.7155, "step": 24172 }, { "epoch": 0.6206948570181393, "grad_norm": 0.80078125, "learning_rate": 0.00014176545628845702, "loss": 0.8452, "step": 24173 }, { "epoch": 0.6207205342140611, "grad_norm": 0.7578125, "learning_rate": 0.00014176140004688672, "loss": 0.8729, "step": 24174 }, { "epoch": 0.620746211409983, "grad_norm": 0.8515625, "learning_rate": 0.00014175734372208972, "loss": 0.8587, "step": 24175 }, { "epoch": 0.6207718886059048, "grad_norm": 0.77734375, "learning_rate": 0.0001417532873140741, "loss": 0.8745, "step": 24176 }, { "epoch": 0.6207975658018267, "grad_norm": 0.76171875, "learning_rate": 0.00014174923082284796, "loss": 0.7521, "step": 24177 }, { "epoch": 0.6208232429977484, "grad_norm": 0.75390625, "learning_rate": 0.0001417451742484194, "loss": 0.8374, "step": 24178 }, { "epoch": 0.6208489201936702, "grad_norm": 0.76171875, "learning_rate": 0.0001417411175907965, "loss": 0.8777, "step": 24179 }, { "epoch": 0.620874597389592, "grad_norm": 0.80078125, "learning_rate": 0.00014173706084998726, "loss": 0.8511, "step": 24180 }, { "epoch": 0.6209002745855139, "grad_norm": 0.80078125, "learning_rate": 0.00014173300402599984, "loss": 0.7418, "step": 24181 }, { "epoch": 0.6209259517814357, "grad_norm": 0.74609375, "learning_rate": 0.00014172894711884232, "loss": 0.7679, "step": 24182 }, { "epoch": 0.6209516289773576, "grad_norm": 0.71875, "learning_rate": 0.00014172489012852276, "loss": 0.8518, "step": 24183 }, { "epoch": 0.6209773061732794, "grad_norm": 0.83984375, "learning_rate": 0.00014172083305504933, "loss": 1.0647, "step": 24184 }, { "epoch": 0.6210029833692011, "grad_norm": 0.83984375, "learning_rate": 0.00014171677589843, "loss": 0.8026, "step": 24185 }, { "epoch": 0.621028660565123, "grad_norm": 0.78125, "learning_rate": 0.0001417127186586729, "loss": 0.8576, "step": 24186 }, { "epoch": 0.6210543377610448, "grad_norm": 0.8359375, "learning_rate": 0.00014170866133578612, "loss": 0.8124, "step": 24187 }, { "epoch": 0.6210800149569666, "grad_norm": 0.83984375, "learning_rate": 0.00014170460392977775, "loss": 0.8787, "step": 24188 }, { "epoch": 0.6211056921528885, "grad_norm": 0.796875, "learning_rate": 0.00014170054644065586, "loss": 0.8988, "step": 24189 }, { "epoch": 0.6211313693488103, "grad_norm": 0.734375, "learning_rate": 0.0001416964888684286, "loss": 0.8365, "step": 24190 }, { "epoch": 0.621157046544732, "grad_norm": 0.69921875, "learning_rate": 0.00014169243121310397, "loss": 0.6968, "step": 24191 }, { "epoch": 0.6211827237406539, "grad_norm": 0.859375, "learning_rate": 0.00014168837347469006, "loss": 0.8961, "step": 24192 }, { "epoch": 0.6212084009365757, "grad_norm": 0.7578125, "learning_rate": 0.000141684315653195, "loss": 0.7515, "step": 24193 }, { "epoch": 0.6212340781324975, "grad_norm": 0.78125, "learning_rate": 0.00014168025774862687, "loss": 0.9541, "step": 24194 }, { "epoch": 0.6212597553284194, "grad_norm": 0.8359375, "learning_rate": 0.00014167619976099377, "loss": 0.9487, "step": 24195 }, { "epoch": 0.6212854325243412, "grad_norm": 0.765625, "learning_rate": 0.00014167214169030377, "loss": 0.9544, "step": 24196 }, { "epoch": 0.621311109720263, "grad_norm": 0.80078125, "learning_rate": 0.00014166808353656497, "loss": 0.8636, "step": 24197 }, { "epoch": 0.6213367869161848, "grad_norm": 0.7578125, "learning_rate": 0.00014166402529978538, "loss": 0.7669, "step": 24198 }, { "epoch": 0.6213624641121066, "grad_norm": 0.74609375, "learning_rate": 0.00014165996697997318, "loss": 0.8034, "step": 24199 }, { "epoch": 0.6213881413080284, "grad_norm": 0.7578125, "learning_rate": 0.00014165590857713646, "loss": 0.8043, "step": 24200 }, { "epoch": 0.6214138185039503, "grad_norm": 0.78515625, "learning_rate": 0.00014165185009128324, "loss": 0.7653, "step": 24201 }, { "epoch": 0.6214394956998721, "grad_norm": 0.76953125, "learning_rate": 0.00014164779152242164, "loss": 0.8248, "step": 24202 }, { "epoch": 0.621465172895794, "grad_norm": 0.7890625, "learning_rate": 0.00014164373287055978, "loss": 0.8311, "step": 24203 }, { "epoch": 0.6214908500917158, "grad_norm": 0.87109375, "learning_rate": 0.00014163967413570574, "loss": 0.9767, "step": 24204 }, { "epoch": 0.6215165272876375, "grad_norm": 0.7890625, "learning_rate": 0.00014163561531786755, "loss": 0.883, "step": 24205 }, { "epoch": 0.6215422044835593, "grad_norm": 0.80859375, "learning_rate": 0.00014163155641705336, "loss": 0.8805, "step": 24206 }, { "epoch": 0.6215678816794812, "grad_norm": 0.75390625, "learning_rate": 0.00014162749743327124, "loss": 0.8574, "step": 24207 }, { "epoch": 0.621593558875403, "grad_norm": 0.7578125, "learning_rate": 0.00014162343836652928, "loss": 0.9026, "step": 24208 }, { "epoch": 0.6216192360713249, "grad_norm": 0.75390625, "learning_rate": 0.00014161937921683553, "loss": 0.8156, "step": 24209 }, { "epoch": 0.6216449132672467, "grad_norm": 0.78125, "learning_rate": 0.00014161531998419817, "loss": 0.8751, "step": 24210 }, { "epoch": 0.6216705904631684, "grad_norm": 0.83984375, "learning_rate": 0.0001416112606686252, "loss": 0.8589, "step": 24211 }, { "epoch": 0.6216962676590903, "grad_norm": 0.70703125, "learning_rate": 0.00014160720127012475, "loss": 0.7776, "step": 24212 }, { "epoch": 0.6217219448550121, "grad_norm": 0.79296875, "learning_rate": 0.00014160314178870493, "loss": 0.8419, "step": 24213 }, { "epoch": 0.6217476220509339, "grad_norm": 0.81640625, "learning_rate": 0.00014159908222437378, "loss": 0.7544, "step": 24214 }, { "epoch": 0.6217732992468558, "grad_norm": 0.7578125, "learning_rate": 0.0001415950225771394, "loss": 0.8246, "step": 24215 }, { "epoch": 0.6217989764427776, "grad_norm": 0.71875, "learning_rate": 0.00014159096284700994, "loss": 0.9198, "step": 24216 }, { "epoch": 0.6218246536386994, "grad_norm": 0.7734375, "learning_rate": 0.00014158690303399344, "loss": 0.8772, "step": 24217 }, { "epoch": 0.6218503308346212, "grad_norm": 0.796875, "learning_rate": 0.000141582843138098, "loss": 0.9149, "step": 24218 }, { "epoch": 0.621876008030543, "grad_norm": 0.77734375, "learning_rate": 0.0001415787831593317, "loss": 0.8626, "step": 24219 }, { "epoch": 0.6219016852264648, "grad_norm": 0.875, "learning_rate": 0.00014157472309770262, "loss": 0.9073, "step": 24220 }, { "epoch": 0.6219273624223867, "grad_norm": 0.77734375, "learning_rate": 0.0001415706629532189, "loss": 0.7848, "step": 24221 }, { "epoch": 0.6219530396183085, "grad_norm": 0.73828125, "learning_rate": 0.00014156660272588857, "loss": 0.8437, "step": 24222 }, { "epoch": 0.6219787168142303, "grad_norm": 0.75, "learning_rate": 0.0001415625424157198, "loss": 0.9148, "step": 24223 }, { "epoch": 0.6220043940101522, "grad_norm": 0.8359375, "learning_rate": 0.00014155848202272064, "loss": 0.9787, "step": 24224 }, { "epoch": 0.6220300712060739, "grad_norm": 0.76171875, "learning_rate": 0.00014155442154689915, "loss": 0.7762, "step": 24225 }, { "epoch": 0.6220557484019957, "grad_norm": 0.84765625, "learning_rate": 0.00014155036098826348, "loss": 0.8013, "step": 24226 }, { "epoch": 0.6220814255979176, "grad_norm": 0.765625, "learning_rate": 0.00014154630034682165, "loss": 0.895, "step": 24227 }, { "epoch": 0.6221071027938394, "grad_norm": 0.7890625, "learning_rate": 0.00014154223962258182, "loss": 0.8735, "step": 24228 }, { "epoch": 0.6221327799897612, "grad_norm": 0.75390625, "learning_rate": 0.00014153817881555207, "loss": 0.7943, "step": 24229 }, { "epoch": 0.6221584571856831, "grad_norm": 0.83203125, "learning_rate": 0.00014153411792574048, "loss": 0.9595, "step": 24230 }, { "epoch": 0.6221841343816048, "grad_norm": 0.9140625, "learning_rate": 0.00014153005695315515, "loss": 0.9026, "step": 24231 }, { "epoch": 0.6222098115775266, "grad_norm": 0.7578125, "learning_rate": 0.00014152599589780412, "loss": 0.8, "step": 24232 }, { "epoch": 0.6222354887734485, "grad_norm": 0.75, "learning_rate": 0.00014152193475969557, "loss": 0.8399, "step": 24233 }, { "epoch": 0.6222611659693703, "grad_norm": 0.76953125, "learning_rate": 0.00014151787353883756, "loss": 0.9052, "step": 24234 }, { "epoch": 0.6222868431652921, "grad_norm": 0.75390625, "learning_rate": 0.00014151381223523817, "loss": 0.855, "step": 24235 }, { "epoch": 0.622312520361214, "grad_norm": 0.77734375, "learning_rate": 0.0001415097508489055, "loss": 0.9464, "step": 24236 }, { "epoch": 0.6223381975571358, "grad_norm": 0.73828125, "learning_rate": 0.00014150568937984766, "loss": 0.7827, "step": 24237 }, { "epoch": 0.6223638747530575, "grad_norm": 0.7734375, "learning_rate": 0.0001415016278280727, "loss": 0.8737, "step": 24238 }, { "epoch": 0.6223895519489794, "grad_norm": 0.671875, "learning_rate": 0.00014149756619358876, "loss": 0.691, "step": 24239 }, { "epoch": 0.6224152291449012, "grad_norm": 0.859375, "learning_rate": 0.00014149350447640392, "loss": 0.8493, "step": 24240 }, { "epoch": 0.622440906340823, "grad_norm": 0.98046875, "learning_rate": 0.00014148944267652625, "loss": 0.8988, "step": 24241 }, { "epoch": 0.6224665835367449, "grad_norm": 0.80078125, "learning_rate": 0.0001414853807939639, "loss": 0.8879, "step": 24242 }, { "epoch": 0.6224922607326667, "grad_norm": 0.7578125, "learning_rate": 0.00014148131882872495, "loss": 0.8266, "step": 24243 }, { "epoch": 0.6225179379285886, "grad_norm": 0.796875, "learning_rate": 0.00014147725678081744, "loss": 0.8632, "step": 24244 }, { "epoch": 0.6225436151245103, "grad_norm": 0.81640625, "learning_rate": 0.00014147319465024954, "loss": 0.8252, "step": 24245 }, { "epoch": 0.6225692923204321, "grad_norm": 0.796875, "learning_rate": 0.00014146913243702925, "loss": 0.8221, "step": 24246 }, { "epoch": 0.622594969516354, "grad_norm": 0.765625, "learning_rate": 0.00014146507014116478, "loss": 0.836, "step": 24247 }, { "epoch": 0.6226206467122758, "grad_norm": 0.74609375, "learning_rate": 0.00014146100776266416, "loss": 0.8048, "step": 24248 }, { "epoch": 0.6226463239081976, "grad_norm": 0.76953125, "learning_rate": 0.0001414569453015355, "loss": 0.779, "step": 24249 }, { "epoch": 0.6226720011041195, "grad_norm": 0.76953125, "learning_rate": 0.00014145288275778687, "loss": 0.9531, "step": 24250 }, { "epoch": 0.6226976783000412, "grad_norm": 0.765625, "learning_rate": 0.00014144882013142639, "loss": 0.9299, "step": 24251 }, { "epoch": 0.622723355495963, "grad_norm": 0.7578125, "learning_rate": 0.00014144475742246216, "loss": 0.8666, "step": 24252 }, { "epoch": 0.6227490326918849, "grad_norm": 0.77734375, "learning_rate": 0.0001414406946309023, "loss": 0.8893, "step": 24253 }, { "epoch": 0.6227747098878067, "grad_norm": 0.74609375, "learning_rate": 0.00014143663175675487, "loss": 0.8922, "step": 24254 }, { "epoch": 0.6228003870837285, "grad_norm": 0.71484375, "learning_rate": 0.00014143256880002795, "loss": 0.9105, "step": 24255 }, { "epoch": 0.6228260642796504, "grad_norm": 0.7265625, "learning_rate": 0.0001414285057607297, "loss": 0.8911, "step": 24256 }, { "epoch": 0.6228517414755722, "grad_norm": 0.94140625, "learning_rate": 0.00014142444263886812, "loss": 0.9166, "step": 24257 }, { "epoch": 0.6228774186714939, "grad_norm": 0.734375, "learning_rate": 0.00014142037943445143, "loss": 0.7381, "step": 24258 }, { "epoch": 0.6229030958674158, "grad_norm": 0.703125, "learning_rate": 0.0001414163161474876, "loss": 0.7561, "step": 24259 }, { "epoch": 0.6229287730633376, "grad_norm": 0.78515625, "learning_rate": 0.00014141225277798487, "loss": 0.8264, "step": 24260 }, { "epoch": 0.6229544502592594, "grad_norm": 0.80078125, "learning_rate": 0.00014140818932595122, "loss": 0.9615, "step": 24261 }, { "epoch": 0.6229801274551813, "grad_norm": 0.73046875, "learning_rate": 0.0001414041257913948, "loss": 0.9645, "step": 24262 }, { "epoch": 0.6230058046511031, "grad_norm": 0.80078125, "learning_rate": 0.0001414000621743237, "loss": 0.8702, "step": 24263 }, { "epoch": 0.623031481847025, "grad_norm": 0.76171875, "learning_rate": 0.000141395998474746, "loss": 0.9448, "step": 24264 }, { "epoch": 0.6230571590429467, "grad_norm": 0.828125, "learning_rate": 0.00014139193469266982, "loss": 0.8084, "step": 24265 }, { "epoch": 0.6230828362388685, "grad_norm": 0.8046875, "learning_rate": 0.00014138787082810327, "loss": 0.9772, "step": 24266 }, { "epoch": 0.6231085134347903, "grad_norm": 0.70703125, "learning_rate": 0.0001413838068810544, "loss": 0.9775, "step": 24267 }, { "epoch": 0.6231341906307122, "grad_norm": 0.859375, "learning_rate": 0.00014137974285153136, "loss": 1.0677, "step": 24268 }, { "epoch": 0.623159867826634, "grad_norm": 0.875, "learning_rate": 0.00014137567873954223, "loss": 0.8087, "step": 24269 }, { "epoch": 0.6231855450225559, "grad_norm": 0.73046875, "learning_rate": 0.00014137161454509512, "loss": 0.8105, "step": 24270 }, { "epoch": 0.6232112222184776, "grad_norm": 0.765625, "learning_rate": 0.0001413675502681981, "loss": 0.8656, "step": 24271 }, { "epoch": 0.6232368994143994, "grad_norm": 0.83984375, "learning_rate": 0.00014136348590885927, "loss": 0.9144, "step": 24272 }, { "epoch": 0.6232625766103213, "grad_norm": 0.83984375, "learning_rate": 0.0001413594214670868, "loss": 0.7969, "step": 24273 }, { "epoch": 0.6232882538062431, "grad_norm": 0.76953125, "learning_rate": 0.0001413553569428887, "loss": 0.944, "step": 24274 }, { "epoch": 0.6233139310021649, "grad_norm": 0.74609375, "learning_rate": 0.00014135129233627315, "loss": 0.817, "step": 24275 }, { "epoch": 0.6233396081980868, "grad_norm": 0.7734375, "learning_rate": 0.00014134722764724817, "loss": 0.782, "step": 24276 }, { "epoch": 0.6233652853940086, "grad_norm": 0.7578125, "learning_rate": 0.00014134316287582193, "loss": 0.7299, "step": 24277 }, { "epoch": 0.6233909625899303, "grad_norm": 0.875, "learning_rate": 0.00014133909802200244, "loss": 1.019, "step": 24278 }, { "epoch": 0.6234166397858522, "grad_norm": 0.8203125, "learning_rate": 0.0001413350330857979, "loss": 0.8571, "step": 24279 }, { "epoch": 0.623442316981774, "grad_norm": 0.796875, "learning_rate": 0.0001413309680672164, "loss": 0.9532, "step": 24280 }, { "epoch": 0.6234679941776958, "grad_norm": 0.86328125, "learning_rate": 0.000141326902966266, "loss": 0.8578, "step": 24281 }, { "epoch": 0.6234936713736177, "grad_norm": 0.81640625, "learning_rate": 0.00014132283778295479, "loss": 0.8752, "step": 24282 }, { "epoch": 0.6235193485695395, "grad_norm": 0.83203125, "learning_rate": 0.00014131877251729092, "loss": 0.8811, "step": 24283 }, { "epoch": 0.6235450257654613, "grad_norm": 0.87109375, "learning_rate": 0.00014131470716928247, "loss": 0.7433, "step": 24284 }, { "epoch": 0.6235707029613831, "grad_norm": 0.7578125, "learning_rate": 0.00014131064173893753, "loss": 0.816, "step": 24285 }, { "epoch": 0.6235963801573049, "grad_norm": 0.8359375, "learning_rate": 0.0001413065762262642, "loss": 0.9858, "step": 24286 }, { "epoch": 0.6236220573532267, "grad_norm": 0.80078125, "learning_rate": 0.00014130251063127064, "loss": 0.7614, "step": 24287 }, { "epoch": 0.6236477345491486, "grad_norm": 0.78125, "learning_rate": 0.00014129844495396488, "loss": 0.8698, "step": 24288 }, { "epoch": 0.6236734117450704, "grad_norm": 0.7890625, "learning_rate": 0.00014129437919435504, "loss": 0.84, "step": 24289 }, { "epoch": 0.6236990889409922, "grad_norm": 0.8125, "learning_rate": 0.00014129031335244924, "loss": 0.8216, "step": 24290 }, { "epoch": 0.623724766136914, "grad_norm": 0.74609375, "learning_rate": 0.00014128624742825555, "loss": 0.8642, "step": 24291 }, { "epoch": 0.6237504433328358, "grad_norm": 0.83203125, "learning_rate": 0.00014128218142178215, "loss": 0.8959, "step": 24292 }, { "epoch": 0.6237761205287576, "grad_norm": 0.78125, "learning_rate": 0.00014127811533303706, "loss": 0.8511, "step": 24293 }, { "epoch": 0.6238017977246795, "grad_norm": 0.84765625, "learning_rate": 0.0001412740491620284, "loss": 0.8815, "step": 24294 }, { "epoch": 0.6238274749206013, "grad_norm": 0.8046875, "learning_rate": 0.00014126998290876433, "loss": 0.7994, "step": 24295 }, { "epoch": 0.6238531521165231, "grad_norm": 0.8046875, "learning_rate": 0.00014126591657325288, "loss": 0.8149, "step": 24296 }, { "epoch": 0.623878829312445, "grad_norm": 0.76171875, "learning_rate": 0.0001412618501555022, "loss": 0.9334, "step": 24297 }, { "epoch": 0.6239045065083667, "grad_norm": 0.875, "learning_rate": 0.00014125778365552038, "loss": 0.8418, "step": 24298 }, { "epoch": 0.6239301837042885, "grad_norm": 0.765625, "learning_rate": 0.0001412537170733155, "loss": 0.7598, "step": 24299 }, { "epoch": 0.6239558609002104, "grad_norm": 0.83203125, "learning_rate": 0.00014124965040889572, "loss": 0.9901, "step": 24300 }, { "epoch": 0.6239815380961322, "grad_norm": 0.796875, "learning_rate": 0.00014124558366226912, "loss": 0.8324, "step": 24301 }, { "epoch": 0.624007215292054, "grad_norm": 0.79296875, "learning_rate": 0.00014124151683344376, "loss": 0.8303, "step": 24302 }, { "epoch": 0.6240328924879759, "grad_norm": 0.77734375, "learning_rate": 0.00014123744992242782, "loss": 0.7856, "step": 24303 }, { "epoch": 0.6240585696838977, "grad_norm": 0.78125, "learning_rate": 0.00014123338292922934, "loss": 0.9087, "step": 24304 }, { "epoch": 0.6240842468798194, "grad_norm": 0.7578125, "learning_rate": 0.00014122931585385646, "loss": 0.9604, "step": 24305 }, { "epoch": 0.6241099240757413, "grad_norm": 0.8203125, "learning_rate": 0.0001412252486963173, "loss": 0.8544, "step": 24306 }, { "epoch": 0.6241356012716631, "grad_norm": 0.7265625, "learning_rate": 0.0001412211814566199, "loss": 0.791, "step": 24307 }, { "epoch": 0.624161278467585, "grad_norm": 0.78515625, "learning_rate": 0.00014121711413477242, "loss": 0.9505, "step": 24308 }, { "epoch": 0.6241869556635068, "grad_norm": 0.73046875, "learning_rate": 0.00014121304673078298, "loss": 0.8155, "step": 24309 }, { "epoch": 0.6242126328594286, "grad_norm": 0.78125, "learning_rate": 0.00014120897924465962, "loss": 0.9, "step": 24310 }, { "epoch": 0.6242383100553504, "grad_norm": 0.75, "learning_rate": 0.00014120491167641052, "loss": 0.8261, "step": 24311 }, { "epoch": 0.6242639872512722, "grad_norm": 0.73046875, "learning_rate": 0.00014120084402604376, "loss": 0.8075, "step": 24312 }, { "epoch": 0.624289664447194, "grad_norm": 0.7890625, "learning_rate": 0.00014119677629356741, "loss": 0.8057, "step": 24313 }, { "epoch": 0.6243153416431159, "grad_norm": 0.8125, "learning_rate": 0.00014119270847898964, "loss": 0.9433, "step": 24314 }, { "epoch": 0.6243410188390377, "grad_norm": 0.828125, "learning_rate": 0.0001411886405823185, "loss": 0.7985, "step": 24315 }, { "epoch": 0.6243666960349595, "grad_norm": 0.83203125, "learning_rate": 0.00014118457260356212, "loss": 0.9378, "step": 24316 }, { "epoch": 0.6243923732308814, "grad_norm": 0.859375, "learning_rate": 0.00014118050454272861, "loss": 0.7966, "step": 24317 }, { "epoch": 0.6244180504268031, "grad_norm": 0.765625, "learning_rate": 0.00014117643639982607, "loss": 0.86, "step": 24318 }, { "epoch": 0.6244437276227249, "grad_norm": 0.80078125, "learning_rate": 0.00014117236817486262, "loss": 0.8725, "step": 24319 }, { "epoch": 0.6244694048186468, "grad_norm": 0.8359375, "learning_rate": 0.00014116829986784638, "loss": 0.9039, "step": 24320 }, { "epoch": 0.6244950820145686, "grad_norm": 0.7734375, "learning_rate": 0.00014116423147878538, "loss": 0.9289, "step": 24321 }, { "epoch": 0.6245207592104904, "grad_norm": 0.74609375, "learning_rate": 0.00014116016300768784, "loss": 0.8957, "step": 24322 }, { "epoch": 0.6245464364064123, "grad_norm": 0.75390625, "learning_rate": 0.0001411560944545618, "loss": 0.8335, "step": 24323 }, { "epoch": 0.6245721136023341, "grad_norm": 0.83203125, "learning_rate": 0.00014115202581941536, "loss": 0.8996, "step": 24324 }, { "epoch": 0.6245977907982558, "grad_norm": 0.78515625, "learning_rate": 0.00014114795710225666, "loss": 0.8349, "step": 24325 }, { "epoch": 0.6246234679941777, "grad_norm": 0.78515625, "learning_rate": 0.0001411438883030938, "loss": 0.8432, "step": 24326 }, { "epoch": 0.6246491451900995, "grad_norm": 0.76953125, "learning_rate": 0.00014113981942193488, "loss": 0.7524, "step": 24327 }, { "epoch": 0.6246748223860213, "grad_norm": 0.765625, "learning_rate": 0.00014113575045878802, "loss": 0.8496, "step": 24328 }, { "epoch": 0.6247004995819432, "grad_norm": 0.890625, "learning_rate": 0.00014113168141366135, "loss": 0.9547, "step": 24329 }, { "epoch": 0.624726176777865, "grad_norm": 0.78515625, "learning_rate": 0.00014112761228656293, "loss": 0.7956, "step": 24330 }, { "epoch": 0.6247518539737867, "grad_norm": 0.75, "learning_rate": 0.00014112354307750088, "loss": 0.6684, "step": 24331 }, { "epoch": 0.6247775311697086, "grad_norm": 0.80078125, "learning_rate": 0.00014111947378648334, "loss": 0.8303, "step": 24332 }, { "epoch": 0.6248032083656304, "grad_norm": 0.84765625, "learning_rate": 0.00014111540441351841, "loss": 0.8181, "step": 24333 }, { "epoch": 0.6248288855615522, "grad_norm": 0.84765625, "learning_rate": 0.00014111133495861417, "loss": 1.0613, "step": 24334 }, { "epoch": 0.6248545627574741, "grad_norm": 0.7890625, "learning_rate": 0.00014110726542177878, "loss": 0.9302, "step": 24335 }, { "epoch": 0.6248802399533959, "grad_norm": 0.76953125, "learning_rate": 0.00014110319580302032, "loss": 0.88, "step": 24336 }, { "epoch": 0.6249059171493178, "grad_norm": 0.8203125, "learning_rate": 0.0001410991261023469, "loss": 0.9552, "step": 24337 }, { "epoch": 0.6249315943452395, "grad_norm": 0.77734375, "learning_rate": 0.00014109505631976663, "loss": 0.896, "step": 24338 }, { "epoch": 0.6249572715411613, "grad_norm": 0.78515625, "learning_rate": 0.00014109098645528762, "loss": 0.8813, "step": 24339 }, { "epoch": 0.6249829487370832, "grad_norm": 0.73828125, "learning_rate": 0.00014108691650891798, "loss": 0.9414, "step": 24340 }, { "epoch": 0.625008625933005, "grad_norm": 0.8359375, "learning_rate": 0.00014108284648066586, "loss": 0.9295, "step": 24341 }, { "epoch": 0.6250343031289268, "grad_norm": 0.82421875, "learning_rate": 0.00014107877637053933, "loss": 0.8952, "step": 24342 }, { "epoch": 0.6250599803248487, "grad_norm": 0.71875, "learning_rate": 0.00014107470617854648, "loss": 0.9043, "step": 24343 }, { "epoch": 0.6250856575207705, "grad_norm": 0.7734375, "learning_rate": 0.00014107063590469547, "loss": 0.7999, "step": 24344 }, { "epoch": 0.6251113347166922, "grad_norm": 0.73046875, "learning_rate": 0.00014106656554899438, "loss": 0.8254, "step": 24345 }, { "epoch": 0.6251370119126141, "grad_norm": 0.703125, "learning_rate": 0.00014106249511145133, "loss": 0.8416, "step": 24346 }, { "epoch": 0.6251626891085359, "grad_norm": 0.859375, "learning_rate": 0.00014105842459207442, "loss": 0.9014, "step": 24347 }, { "epoch": 0.6251883663044577, "grad_norm": 0.77734375, "learning_rate": 0.00014105435399087185, "loss": 0.8443, "step": 24348 }, { "epoch": 0.6252140435003796, "grad_norm": 0.84765625, "learning_rate": 0.00014105028330785164, "loss": 0.8565, "step": 24349 }, { "epoch": 0.6252397206963014, "grad_norm": 0.7265625, "learning_rate": 0.00014104621254302187, "loss": 0.9235, "step": 24350 }, { "epoch": 0.6252653978922231, "grad_norm": 0.7421875, "learning_rate": 0.00014104214169639073, "loss": 0.8314, "step": 24351 }, { "epoch": 0.625291075088145, "grad_norm": 0.7890625, "learning_rate": 0.00014103807076796634, "loss": 0.7892, "step": 24352 }, { "epoch": 0.6253167522840668, "grad_norm": 0.73046875, "learning_rate": 0.00014103399975775674, "loss": 0.9154, "step": 24353 }, { "epoch": 0.6253424294799886, "grad_norm": 0.81640625, "learning_rate": 0.0001410299286657701, "loss": 0.8248, "step": 24354 }, { "epoch": 0.6253681066759105, "grad_norm": 0.796875, "learning_rate": 0.00014102585749201454, "loss": 0.8294, "step": 24355 }, { "epoch": 0.6253937838718323, "grad_norm": 0.78515625, "learning_rate": 0.00014102178623649815, "loss": 0.8897, "step": 24356 }, { "epoch": 0.6254194610677541, "grad_norm": 0.82421875, "learning_rate": 0.00014101771489922902, "loss": 0.8767, "step": 24357 }, { "epoch": 0.6254451382636759, "grad_norm": 0.7890625, "learning_rate": 0.0001410136434802153, "loss": 0.8511, "step": 24358 }, { "epoch": 0.6254708154595977, "grad_norm": 0.8046875, "learning_rate": 0.00014100957197946508, "loss": 0.9645, "step": 24359 }, { "epoch": 0.6254964926555195, "grad_norm": 0.76953125, "learning_rate": 0.00014100550039698652, "loss": 0.9936, "step": 24360 }, { "epoch": 0.6255221698514414, "grad_norm": 0.79296875, "learning_rate": 0.00014100142873278766, "loss": 0.9003, "step": 24361 }, { "epoch": 0.6255478470473632, "grad_norm": 0.73828125, "learning_rate": 0.0001409973569868767, "loss": 0.7884, "step": 24362 }, { "epoch": 0.625573524243285, "grad_norm": 0.71484375, "learning_rate": 0.00014099328515926167, "loss": 0.7705, "step": 24363 }, { "epoch": 0.6255992014392069, "grad_norm": 0.73046875, "learning_rate": 0.00014098921324995077, "loss": 0.78, "step": 24364 }, { "epoch": 0.6256248786351286, "grad_norm": 0.80078125, "learning_rate": 0.00014098514125895204, "loss": 0.8605, "step": 24365 }, { "epoch": 0.6256505558310504, "grad_norm": 0.7890625, "learning_rate": 0.0001409810691862736, "loss": 0.9364, "step": 24366 }, { "epoch": 0.6256762330269723, "grad_norm": 0.80859375, "learning_rate": 0.00014097699703192364, "loss": 0.8436, "step": 24367 }, { "epoch": 0.6257019102228941, "grad_norm": 0.84765625, "learning_rate": 0.0001409729247959102, "loss": 0.8707, "step": 24368 }, { "epoch": 0.625727587418816, "grad_norm": 0.84375, "learning_rate": 0.00014096885247824142, "loss": 0.7894, "step": 24369 }, { "epoch": 0.6257532646147378, "grad_norm": 0.76171875, "learning_rate": 0.00014096478007892546, "loss": 0.8423, "step": 24370 }, { "epoch": 0.6257789418106595, "grad_norm": 0.6953125, "learning_rate": 0.00014096070759797032, "loss": 0.7757, "step": 24371 }, { "epoch": 0.6258046190065814, "grad_norm": 0.79296875, "learning_rate": 0.00014095663503538425, "loss": 0.9246, "step": 24372 }, { "epoch": 0.6258302962025032, "grad_norm": 0.734375, "learning_rate": 0.00014095256239117528, "loss": 0.7786, "step": 24373 }, { "epoch": 0.625855973398425, "grad_norm": 0.84765625, "learning_rate": 0.00014094848966535154, "loss": 0.8711, "step": 24374 }, { "epoch": 0.6258816505943469, "grad_norm": 0.73046875, "learning_rate": 0.00014094441685792123, "loss": 0.731, "step": 24375 }, { "epoch": 0.6259073277902687, "grad_norm": 0.88671875, "learning_rate": 0.00014094034396889234, "loss": 0.9169, "step": 24376 }, { "epoch": 0.6259330049861905, "grad_norm": 0.71875, "learning_rate": 0.000140936270998273, "loss": 0.8921, "step": 24377 }, { "epoch": 0.6259586821821123, "grad_norm": 0.87109375, "learning_rate": 0.00014093219794607145, "loss": 0.881, "step": 24378 }, { "epoch": 0.6259843593780341, "grad_norm": 0.80859375, "learning_rate": 0.00014092812481229566, "loss": 0.8627, "step": 24379 }, { "epoch": 0.6260100365739559, "grad_norm": 0.82421875, "learning_rate": 0.00014092405159695388, "loss": 1.0435, "step": 24380 }, { "epoch": 0.6260357137698778, "grad_norm": 0.8359375, "learning_rate": 0.00014091997830005415, "loss": 0.8762, "step": 24381 }, { "epoch": 0.6260613909657996, "grad_norm": 0.8203125, "learning_rate": 0.00014091590492160456, "loss": 0.9776, "step": 24382 }, { "epoch": 0.6260870681617214, "grad_norm": 0.74609375, "learning_rate": 0.00014091183146161328, "loss": 0.8507, "step": 24383 }, { "epoch": 0.6261127453576432, "grad_norm": 0.75, "learning_rate": 0.0001409077579200884, "loss": 0.7035, "step": 24384 }, { "epoch": 0.626138422553565, "grad_norm": 0.75, "learning_rate": 0.0001409036842970381, "loss": 0.8573, "step": 24385 }, { "epoch": 0.6261640997494868, "grad_norm": 0.76171875, "learning_rate": 0.00014089961059247043, "loss": 0.8451, "step": 24386 }, { "epoch": 0.6261897769454087, "grad_norm": 0.796875, "learning_rate": 0.0001408955368063935, "loss": 0.8771, "step": 24387 }, { "epoch": 0.6262154541413305, "grad_norm": 0.76953125, "learning_rate": 0.0001408914629388155, "loss": 0.692, "step": 24388 }, { "epoch": 0.6262411313372523, "grad_norm": 0.7734375, "learning_rate": 0.00014088738898974452, "loss": 0.8508, "step": 24389 }, { "epoch": 0.6262668085331742, "grad_norm": 0.78515625, "learning_rate": 0.0001408833149591886, "loss": 0.9726, "step": 24390 }, { "epoch": 0.6262924857290959, "grad_norm": 0.7265625, "learning_rate": 0.00014087924084715597, "loss": 0.7891, "step": 24391 }, { "epoch": 0.6263181629250177, "grad_norm": 0.72265625, "learning_rate": 0.0001408751666536547, "loss": 0.8388, "step": 24392 }, { "epoch": 0.6263438401209396, "grad_norm": 0.8203125, "learning_rate": 0.00014087109237869293, "loss": 0.8717, "step": 24393 }, { "epoch": 0.6263695173168614, "grad_norm": 0.875, "learning_rate": 0.00014086701802227877, "loss": 0.9285, "step": 24394 }, { "epoch": 0.6263951945127832, "grad_norm": 0.6953125, "learning_rate": 0.00014086294358442032, "loss": 0.8531, "step": 24395 }, { "epoch": 0.6264208717087051, "grad_norm": 0.77734375, "learning_rate": 0.00014085886906512572, "loss": 0.8943, "step": 24396 }, { "epoch": 0.6264465489046269, "grad_norm": 0.76953125, "learning_rate": 0.00014085479446440305, "loss": 0.851, "step": 24397 }, { "epoch": 0.6264722261005486, "grad_norm": 0.7734375, "learning_rate": 0.0001408507197822605, "loss": 0.8154, "step": 24398 }, { "epoch": 0.6264979032964705, "grad_norm": 0.7109375, "learning_rate": 0.00014084664501870614, "loss": 0.7864, "step": 24399 }, { "epoch": 0.6265235804923923, "grad_norm": 0.7421875, "learning_rate": 0.00014084257017374813, "loss": 0.7032, "step": 24400 }, { "epoch": 0.6265492576883142, "grad_norm": 0.765625, "learning_rate": 0.00014083849524739456, "loss": 0.7901, "step": 24401 }, { "epoch": 0.626574934884236, "grad_norm": 0.78125, "learning_rate": 0.00014083442023965357, "loss": 0.937, "step": 24402 }, { "epoch": 0.6266006120801578, "grad_norm": 0.77734375, "learning_rate": 0.00014083034515053323, "loss": 0.9043, "step": 24403 }, { "epoch": 0.6266262892760796, "grad_norm": 0.77734375, "learning_rate": 0.00014082626998004172, "loss": 0.7283, "step": 24404 }, { "epoch": 0.6266519664720014, "grad_norm": 1.2109375, "learning_rate": 0.00014082219472818714, "loss": 0.8051, "step": 24405 }, { "epoch": 0.6266776436679232, "grad_norm": 0.80078125, "learning_rate": 0.00014081811939497757, "loss": 0.792, "step": 24406 }, { "epoch": 0.6267033208638451, "grad_norm": 0.79296875, "learning_rate": 0.00014081404398042123, "loss": 0.9146, "step": 24407 }, { "epoch": 0.6267289980597669, "grad_norm": 0.640625, "learning_rate": 0.00014080996848452618, "loss": 0.738, "step": 24408 }, { "epoch": 0.6267546752556887, "grad_norm": 0.75390625, "learning_rate": 0.00014080589290730057, "loss": 0.7455, "step": 24409 }, { "epoch": 0.6267803524516106, "grad_norm": 0.78515625, "learning_rate": 0.00014080181724875247, "loss": 0.8952, "step": 24410 }, { "epoch": 0.6268060296475323, "grad_norm": 0.78125, "learning_rate": 0.00014079774150889003, "loss": 0.8441, "step": 24411 }, { "epoch": 0.6268317068434541, "grad_norm": 0.78125, "learning_rate": 0.0001407936656877214, "loss": 0.7441, "step": 24412 }, { "epoch": 0.626857384039376, "grad_norm": 0.73828125, "learning_rate": 0.00014078958978525465, "loss": 0.9081, "step": 24413 }, { "epoch": 0.6268830612352978, "grad_norm": 0.74609375, "learning_rate": 0.00014078551380149797, "loss": 0.8055, "step": 24414 }, { "epoch": 0.6269087384312196, "grad_norm": 0.74609375, "learning_rate": 0.00014078143773645942, "loss": 0.8299, "step": 24415 }, { "epoch": 0.6269344156271415, "grad_norm": 0.734375, "learning_rate": 0.00014077736159014714, "loss": 0.7619, "step": 24416 }, { "epoch": 0.6269600928230633, "grad_norm": 0.7734375, "learning_rate": 0.00014077328536256926, "loss": 0.7798, "step": 24417 }, { "epoch": 0.626985770018985, "grad_norm": 0.7734375, "learning_rate": 0.00014076920905373394, "loss": 0.8642, "step": 24418 }, { "epoch": 0.6270114472149069, "grad_norm": 0.76953125, "learning_rate": 0.00014076513266364922, "loss": 0.8838, "step": 24419 }, { "epoch": 0.6270371244108287, "grad_norm": 0.8046875, "learning_rate": 0.00014076105619232333, "loss": 0.9241, "step": 24420 }, { "epoch": 0.6270628016067505, "grad_norm": 0.76953125, "learning_rate": 0.0001407569796397643, "loss": 0.8855, "step": 24421 }, { "epoch": 0.6270884788026724, "grad_norm": 0.81640625, "learning_rate": 0.0001407529030059803, "loss": 0.8016, "step": 24422 }, { "epoch": 0.6271141559985942, "grad_norm": 0.734375, "learning_rate": 0.00014074882629097943, "loss": 0.8214, "step": 24423 }, { "epoch": 0.6271398331945159, "grad_norm": 0.7578125, "learning_rate": 0.00014074474949476984, "loss": 0.7402, "step": 24424 }, { "epoch": 0.6271655103904378, "grad_norm": 0.8359375, "learning_rate": 0.00014074067261735966, "loss": 0.8628, "step": 24425 }, { "epoch": 0.6271911875863596, "grad_norm": 0.77734375, "learning_rate": 0.000140736595658757, "loss": 0.7869, "step": 24426 }, { "epoch": 0.6272168647822814, "grad_norm": 0.75, "learning_rate": 0.00014073251861896997, "loss": 0.9645, "step": 24427 }, { "epoch": 0.6272425419782033, "grad_norm": 0.75390625, "learning_rate": 0.0001407284414980067, "loss": 0.8106, "step": 24428 }, { "epoch": 0.6272682191741251, "grad_norm": 0.76953125, "learning_rate": 0.00014072436429587534, "loss": 0.9283, "step": 24429 }, { "epoch": 0.627293896370047, "grad_norm": 0.76171875, "learning_rate": 0.000140720287012584, "loss": 0.8579, "step": 24430 }, { "epoch": 0.6273195735659687, "grad_norm": 0.75390625, "learning_rate": 0.00014071620964814082, "loss": 0.8949, "step": 24431 }, { "epoch": 0.6273452507618905, "grad_norm": 0.78125, "learning_rate": 0.0001407121322025539, "loss": 1.0064, "step": 24432 }, { "epoch": 0.6273709279578124, "grad_norm": 0.79296875, "learning_rate": 0.00014070805467583135, "loss": 0.8225, "step": 24433 }, { "epoch": 0.6273966051537342, "grad_norm": 0.78125, "learning_rate": 0.00014070397706798134, "loss": 0.9745, "step": 24434 }, { "epoch": 0.627422282349656, "grad_norm": 0.8359375, "learning_rate": 0.000140699899379012, "loss": 0.9223, "step": 24435 }, { "epoch": 0.6274479595455779, "grad_norm": 0.7890625, "learning_rate": 0.00014069582160893142, "loss": 0.787, "step": 24436 }, { "epoch": 0.6274736367414997, "grad_norm": 0.7109375, "learning_rate": 0.00014069174375774777, "loss": 0.9523, "step": 24437 }, { "epoch": 0.6274993139374214, "grad_norm": 0.80078125, "learning_rate": 0.0001406876658254691, "loss": 0.9326, "step": 24438 }, { "epoch": 0.6275249911333433, "grad_norm": 0.78125, "learning_rate": 0.00014068358781210363, "loss": 0.8521, "step": 24439 }, { "epoch": 0.6275506683292651, "grad_norm": 0.80859375, "learning_rate": 0.00014067950971765944, "loss": 0.799, "step": 24440 }, { "epoch": 0.6275763455251869, "grad_norm": 0.7890625, "learning_rate": 0.00014067543154214467, "loss": 0.8527, "step": 24441 }, { "epoch": 0.6276020227211088, "grad_norm": 0.8203125, "learning_rate": 0.00014067135328556742, "loss": 0.7793, "step": 24442 }, { "epoch": 0.6276276999170306, "grad_norm": 0.78125, "learning_rate": 0.0001406672749479358, "loss": 0.9426, "step": 24443 }, { "epoch": 0.6276533771129523, "grad_norm": 0.81640625, "learning_rate": 0.00014066319652925803, "loss": 1.0014, "step": 24444 }, { "epoch": 0.6276790543088742, "grad_norm": 0.77734375, "learning_rate": 0.00014065911802954218, "loss": 0.7542, "step": 24445 }, { "epoch": 0.627704731504796, "grad_norm": 0.796875, "learning_rate": 0.00014065503944879634, "loss": 0.7471, "step": 24446 }, { "epoch": 0.6277304087007178, "grad_norm": 0.7734375, "learning_rate": 0.00014065096078702872, "loss": 0.7923, "step": 24447 }, { "epoch": 0.6277560858966397, "grad_norm": 0.8046875, "learning_rate": 0.0001406468820442474, "loss": 0.9969, "step": 24448 }, { "epoch": 0.6277817630925615, "grad_norm": 0.82421875, "learning_rate": 0.0001406428032204605, "loss": 0.794, "step": 24449 }, { "epoch": 0.6278074402884833, "grad_norm": 0.79296875, "learning_rate": 0.00014063872431567618, "loss": 0.7874, "step": 24450 }, { "epoch": 0.6278331174844051, "grad_norm": 0.81640625, "learning_rate": 0.00014063464532990255, "loss": 0.7871, "step": 24451 }, { "epoch": 0.6278587946803269, "grad_norm": 0.88671875, "learning_rate": 0.00014063056626314771, "loss": 0.7434, "step": 24452 }, { "epoch": 0.6278844718762487, "grad_norm": 0.8671875, "learning_rate": 0.0001406264871154199, "loss": 1.0572, "step": 24453 }, { "epoch": 0.6279101490721706, "grad_norm": 0.7265625, "learning_rate": 0.0001406224078867271, "loss": 0.7982, "step": 24454 }, { "epoch": 0.6279358262680924, "grad_norm": 0.82421875, "learning_rate": 0.00014061832857707753, "loss": 1.0864, "step": 24455 }, { "epoch": 0.6279615034640142, "grad_norm": 0.73828125, "learning_rate": 0.0001406142491864793, "loss": 0.7428, "step": 24456 }, { "epoch": 0.6279871806599361, "grad_norm": 0.734375, "learning_rate": 0.00014061016971494053, "loss": 0.9193, "step": 24457 }, { "epoch": 0.6280128578558578, "grad_norm": 0.75, "learning_rate": 0.00014060609016246938, "loss": 0.873, "step": 24458 }, { "epoch": 0.6280385350517796, "grad_norm": 0.84765625, "learning_rate": 0.00014060201052907393, "loss": 0.8576, "step": 24459 }, { "epoch": 0.6280642122477015, "grad_norm": 0.859375, "learning_rate": 0.00014059793081476238, "loss": 0.9509, "step": 24460 }, { "epoch": 0.6280898894436233, "grad_norm": 0.875, "learning_rate": 0.00014059385101954281, "loss": 0.9432, "step": 24461 }, { "epoch": 0.6281155666395452, "grad_norm": 0.7421875, "learning_rate": 0.00014058977114342336, "loss": 0.8341, "step": 24462 }, { "epoch": 0.628141243835467, "grad_norm": 0.8359375, "learning_rate": 0.00014058569118641215, "loss": 0.9027, "step": 24463 }, { "epoch": 0.6281669210313887, "grad_norm": 0.82421875, "learning_rate": 0.00014058161114851732, "loss": 0.8031, "step": 24464 }, { "epoch": 0.6281925982273106, "grad_norm": 0.75390625, "learning_rate": 0.00014057753102974702, "loss": 0.7975, "step": 24465 }, { "epoch": 0.6282182754232324, "grad_norm": 0.78125, "learning_rate": 0.00014057345083010937, "loss": 0.9196, "step": 24466 }, { "epoch": 0.6282439526191542, "grad_norm": 0.83203125, "learning_rate": 0.00014056937054961248, "loss": 1.012, "step": 24467 }, { "epoch": 0.6282696298150761, "grad_norm": 0.80078125, "learning_rate": 0.00014056529018826452, "loss": 0.9244, "step": 24468 }, { "epoch": 0.6282953070109979, "grad_norm": 0.90625, "learning_rate": 0.00014056120974607358, "loss": 0.9083, "step": 24469 }, { "epoch": 0.6283209842069197, "grad_norm": 0.7734375, "learning_rate": 0.00014055712922304782, "loss": 0.8945, "step": 24470 }, { "epoch": 0.6283466614028415, "grad_norm": 0.6484375, "learning_rate": 0.00014055304861919536, "loss": 0.5537, "step": 24471 }, { "epoch": 0.6283723385987633, "grad_norm": 0.796875, "learning_rate": 0.00014054896793452436, "loss": 0.8216, "step": 24472 }, { "epoch": 0.6283980157946851, "grad_norm": 0.8203125, "learning_rate": 0.00014054488716904292, "loss": 0.7972, "step": 24473 }, { "epoch": 0.628423692990607, "grad_norm": 0.75390625, "learning_rate": 0.00014054080632275916, "loss": 0.8212, "step": 24474 }, { "epoch": 0.6284493701865288, "grad_norm": 0.79296875, "learning_rate": 0.00014053672539568126, "loss": 0.8929, "step": 24475 }, { "epoch": 0.6284750473824506, "grad_norm": 0.78125, "learning_rate": 0.00014053264438781732, "loss": 0.7911, "step": 24476 }, { "epoch": 0.6285007245783725, "grad_norm": 0.76953125, "learning_rate": 0.0001405285632991755, "loss": 0.828, "step": 24477 }, { "epoch": 0.6285264017742942, "grad_norm": 0.7265625, "learning_rate": 0.00014052448212976385, "loss": 0.71, "step": 24478 }, { "epoch": 0.628552078970216, "grad_norm": 0.78125, "learning_rate": 0.0001405204008795906, "loss": 0.9157, "step": 24479 }, { "epoch": 0.6285777561661379, "grad_norm": 0.703125, "learning_rate": 0.00014051631954866388, "loss": 0.7946, "step": 24480 }, { "epoch": 0.6286034333620597, "grad_norm": 0.83984375, "learning_rate": 0.0001405122381369918, "loss": 0.867, "step": 24481 }, { "epoch": 0.6286291105579815, "grad_norm": 1.1875, "learning_rate": 0.00014050815664458247, "loss": 0.8523, "step": 24482 }, { "epoch": 0.6286547877539034, "grad_norm": 0.73828125, "learning_rate": 0.00014050407507144402, "loss": 0.9618, "step": 24483 }, { "epoch": 0.6286804649498251, "grad_norm": 0.78515625, "learning_rate": 0.00014049999341758464, "loss": 0.8374, "step": 24484 }, { "epoch": 0.6287061421457469, "grad_norm": 0.79296875, "learning_rate": 0.00014049591168301242, "loss": 0.9075, "step": 24485 }, { "epoch": 0.6287318193416688, "grad_norm": 0.7578125, "learning_rate": 0.00014049182986773548, "loss": 0.946, "step": 24486 }, { "epoch": 0.6287574965375906, "grad_norm": 0.75390625, "learning_rate": 0.00014048774797176204, "loss": 0.8623, "step": 24487 }, { "epoch": 0.6287831737335124, "grad_norm": 0.76171875, "learning_rate": 0.0001404836659951001, "loss": 0.8637, "step": 24488 }, { "epoch": 0.6288088509294343, "grad_norm": 0.84375, "learning_rate": 0.00014047958393775795, "loss": 0.9578, "step": 24489 }, { "epoch": 0.6288345281253561, "grad_norm": 0.7734375, "learning_rate": 0.00014047550179974362, "loss": 0.78, "step": 24490 }, { "epoch": 0.6288602053212778, "grad_norm": 0.76953125, "learning_rate": 0.00014047141958106524, "loss": 0.859, "step": 24491 }, { "epoch": 0.6288858825171997, "grad_norm": 0.76953125, "learning_rate": 0.00014046733728173097, "loss": 0.8154, "step": 24492 }, { "epoch": 0.6289115597131215, "grad_norm": 0.8125, "learning_rate": 0.000140463254901749, "loss": 0.8412, "step": 24493 }, { "epoch": 0.6289372369090434, "grad_norm": 0.9765625, "learning_rate": 0.0001404591724411274, "loss": 1.0551, "step": 24494 }, { "epoch": 0.6289629141049652, "grad_norm": 0.72265625, "learning_rate": 0.00014045508989987432, "loss": 0.8524, "step": 24495 }, { "epoch": 0.628988591300887, "grad_norm": 0.8125, "learning_rate": 0.00014045100727799788, "loss": 0.7497, "step": 24496 }, { "epoch": 0.6290142684968089, "grad_norm": 0.80859375, "learning_rate": 0.0001404469245755063, "loss": 0.9722, "step": 24497 }, { "epoch": 0.6290399456927306, "grad_norm": 0.7578125, "learning_rate": 0.0001404428417924076, "loss": 0.8619, "step": 24498 }, { "epoch": 0.6290656228886524, "grad_norm": 0.80078125, "learning_rate": 0.00014043875892870994, "loss": 0.9146, "step": 24499 }, { "epoch": 0.6290913000845743, "grad_norm": 0.76953125, "learning_rate": 0.00014043467598442156, "loss": 0.918, "step": 24500 }, { "epoch": 0.6291169772804961, "grad_norm": 0.69921875, "learning_rate": 0.00014043059295955045, "loss": 0.7359, "step": 24501 }, { "epoch": 0.6291426544764179, "grad_norm": 0.7421875, "learning_rate": 0.00014042650985410487, "loss": 0.7632, "step": 24502 }, { "epoch": 0.6291683316723398, "grad_norm": 0.71484375, "learning_rate": 0.0001404224266680929, "loss": 0.7431, "step": 24503 }, { "epoch": 0.6291940088682615, "grad_norm": 0.84375, "learning_rate": 0.00014041834340152267, "loss": 0.897, "step": 24504 }, { "epoch": 0.6292196860641833, "grad_norm": 0.7734375, "learning_rate": 0.00014041426005440236, "loss": 0.87, "step": 24505 }, { "epoch": 0.6292453632601052, "grad_norm": 0.75, "learning_rate": 0.00014041017662674008, "loss": 0.7628, "step": 24506 }, { "epoch": 0.629271040456027, "grad_norm": 0.94140625, "learning_rate": 0.00014040609311854395, "loss": 0.914, "step": 24507 }, { "epoch": 0.6292967176519488, "grad_norm": 0.8203125, "learning_rate": 0.0001404020095298221, "loss": 0.755, "step": 24508 }, { "epoch": 0.6293223948478707, "grad_norm": 0.74609375, "learning_rate": 0.0001403979258605827, "loss": 0.9047, "step": 24509 }, { "epoch": 0.6293480720437925, "grad_norm": 0.73046875, "learning_rate": 0.00014039384211083394, "loss": 0.8247, "step": 24510 }, { "epoch": 0.6293737492397142, "grad_norm": 0.82421875, "learning_rate": 0.00014038975828058387, "loss": 0.8841, "step": 24511 }, { "epoch": 0.6293994264356361, "grad_norm": 0.76953125, "learning_rate": 0.00014038567436984063, "loss": 0.9118, "step": 24512 }, { "epoch": 0.6294251036315579, "grad_norm": 0.734375, "learning_rate": 0.00014038159037861245, "loss": 0.8733, "step": 24513 }, { "epoch": 0.6294507808274797, "grad_norm": 0.7265625, "learning_rate": 0.00014037750630690738, "loss": 0.8245, "step": 24514 }, { "epoch": 0.6294764580234016, "grad_norm": 0.8125, "learning_rate": 0.00014037342215473357, "loss": 0.9181, "step": 24515 }, { "epoch": 0.6295021352193234, "grad_norm": 0.8203125, "learning_rate": 0.00014036933792209918, "loss": 0.9199, "step": 24516 }, { "epoch": 0.6295278124152452, "grad_norm": 0.75, "learning_rate": 0.00014036525360901236, "loss": 0.9168, "step": 24517 }, { "epoch": 0.629553489611167, "grad_norm": 0.7421875, "learning_rate": 0.0001403611692154812, "loss": 0.8676, "step": 24518 }, { "epoch": 0.6295791668070888, "grad_norm": 0.8203125, "learning_rate": 0.00014035708474151393, "loss": 0.8514, "step": 24519 }, { "epoch": 0.6296048440030106, "grad_norm": 0.8125, "learning_rate": 0.00014035300018711862, "loss": 0.8184, "step": 24520 }, { "epoch": 0.6296305211989325, "grad_norm": 0.828125, "learning_rate": 0.0001403489155523034, "loss": 0.9168, "step": 24521 }, { "epoch": 0.6296561983948543, "grad_norm": 0.74609375, "learning_rate": 0.00014034483083707645, "loss": 0.6503, "step": 24522 }, { "epoch": 0.6296818755907762, "grad_norm": 0.77734375, "learning_rate": 0.0001403407460414459, "loss": 0.9104, "step": 24523 }, { "epoch": 0.6297075527866979, "grad_norm": 0.79296875, "learning_rate": 0.0001403366611654199, "loss": 0.9351, "step": 24524 }, { "epoch": 0.6297332299826197, "grad_norm": 0.82421875, "learning_rate": 0.00014033257620900655, "loss": 0.8123, "step": 24525 }, { "epoch": 0.6297589071785415, "grad_norm": 0.7109375, "learning_rate": 0.00014032849117221404, "loss": 0.7982, "step": 24526 }, { "epoch": 0.6297845843744634, "grad_norm": 0.78125, "learning_rate": 0.0001403244060550505, "loss": 0.8673, "step": 24527 }, { "epoch": 0.6298102615703852, "grad_norm": 0.80078125, "learning_rate": 0.00014032032085752402, "loss": 0.9323, "step": 24528 }, { "epoch": 0.6298359387663071, "grad_norm": 0.78515625, "learning_rate": 0.00014031623557964282, "loss": 0.8521, "step": 24529 }, { "epoch": 0.6298616159622289, "grad_norm": 0.76171875, "learning_rate": 0.000140312150221415, "loss": 0.8215, "step": 24530 }, { "epoch": 0.6298872931581506, "grad_norm": 0.75390625, "learning_rate": 0.00014030806478284867, "loss": 0.8361, "step": 24531 }, { "epoch": 0.6299129703540725, "grad_norm": 0.80859375, "learning_rate": 0.00014030397926395203, "loss": 0.9533, "step": 24532 }, { "epoch": 0.6299386475499943, "grad_norm": 0.80859375, "learning_rate": 0.0001402998936647332, "loss": 0.7879, "step": 24533 }, { "epoch": 0.6299643247459161, "grad_norm": 0.77734375, "learning_rate": 0.00014029580798520034, "loss": 0.934, "step": 24534 }, { "epoch": 0.629990001941838, "grad_norm": 0.78125, "learning_rate": 0.0001402917222253616, "loss": 0.8667, "step": 24535 }, { "epoch": 0.6300156791377598, "grad_norm": 0.70703125, "learning_rate": 0.00014028763638522501, "loss": 0.7079, "step": 24536 }, { "epoch": 0.6300413563336816, "grad_norm": 0.78515625, "learning_rate": 0.00014028355046479885, "loss": 0.824, "step": 24537 }, { "epoch": 0.6300670335296034, "grad_norm": 0.8671875, "learning_rate": 0.00014027946446409123, "loss": 0.9322, "step": 24538 }, { "epoch": 0.6300927107255252, "grad_norm": 0.73046875, "learning_rate": 0.00014027537838311025, "loss": 0.8466, "step": 24539 }, { "epoch": 0.630118387921447, "grad_norm": 0.8046875, "learning_rate": 0.0001402712922218641, "loss": 0.8209, "step": 24540 }, { "epoch": 0.6301440651173689, "grad_norm": 0.828125, "learning_rate": 0.00014026720598036088, "loss": 0.7459, "step": 24541 }, { "epoch": 0.6301697423132907, "grad_norm": 0.7890625, "learning_rate": 0.00014026311965860877, "loss": 0.8379, "step": 24542 }, { "epoch": 0.6301954195092125, "grad_norm": 0.71875, "learning_rate": 0.00014025903325661589, "loss": 0.8066, "step": 24543 }, { "epoch": 0.6302210967051343, "grad_norm": 0.78515625, "learning_rate": 0.00014025494677439036, "loss": 0.8796, "step": 24544 }, { "epoch": 0.6302467739010561, "grad_norm": 0.8515625, "learning_rate": 0.00014025086021194044, "loss": 0.8543, "step": 24545 }, { "epoch": 0.6302724510969779, "grad_norm": 0.82421875, "learning_rate": 0.0001402467735692741, "loss": 0.8297, "step": 24546 }, { "epoch": 0.6302981282928998, "grad_norm": 0.7578125, "learning_rate": 0.00014024268684639966, "loss": 0.7455, "step": 24547 }, { "epoch": 0.6303238054888216, "grad_norm": 0.69921875, "learning_rate": 0.00014023860004332513, "loss": 0.8255, "step": 24548 }, { "epoch": 0.6303494826847434, "grad_norm": 0.7890625, "learning_rate": 0.0001402345131600587, "loss": 0.8484, "step": 24549 }, { "epoch": 0.6303751598806653, "grad_norm": 0.80078125, "learning_rate": 0.00014023042619660855, "loss": 0.9114, "step": 24550 }, { "epoch": 0.630400837076587, "grad_norm": 0.8046875, "learning_rate": 0.00014022633915298277, "loss": 0.8474, "step": 24551 }, { "epoch": 0.6304265142725088, "grad_norm": 0.78125, "learning_rate": 0.00014022225202918955, "loss": 0.821, "step": 24552 }, { "epoch": 0.6304521914684307, "grad_norm": 0.7890625, "learning_rate": 0.00014021816482523701, "loss": 0.8924, "step": 24553 }, { "epoch": 0.6304778686643525, "grad_norm": 0.80859375, "learning_rate": 0.00014021407754113331, "loss": 0.8352, "step": 24554 }, { "epoch": 0.6305035458602744, "grad_norm": 0.74609375, "learning_rate": 0.00014020999017688655, "loss": 0.8709, "step": 24555 }, { "epoch": 0.6305292230561962, "grad_norm": 0.71484375, "learning_rate": 0.00014020590273250492, "loss": 0.7928, "step": 24556 }, { "epoch": 0.630554900252118, "grad_norm": 0.7734375, "learning_rate": 0.0001402018152079966, "loss": 0.8661, "step": 24557 }, { "epoch": 0.6305805774480397, "grad_norm": 0.7734375, "learning_rate": 0.00014019772760336964, "loss": 0.8766, "step": 24558 }, { "epoch": 0.6306062546439616, "grad_norm": 0.75, "learning_rate": 0.00014019363991863232, "loss": 0.9129, "step": 24559 }, { "epoch": 0.6306319318398834, "grad_norm": 0.73046875, "learning_rate": 0.0001401895521537926, "loss": 0.8161, "step": 24560 }, { "epoch": 0.6306576090358053, "grad_norm": 0.75390625, "learning_rate": 0.0001401854643088588, "loss": 0.8878, "step": 24561 }, { "epoch": 0.6306832862317271, "grad_norm": 0.74609375, "learning_rate": 0.000140181376383839, "loss": 0.8077, "step": 24562 }, { "epoch": 0.6307089634276489, "grad_norm": 0.6640625, "learning_rate": 0.00014017728837874128, "loss": 0.7964, "step": 24563 }, { "epoch": 0.6307346406235707, "grad_norm": 0.80078125, "learning_rate": 0.00014017320029357394, "loss": 0.8855, "step": 24564 }, { "epoch": 0.6307603178194925, "grad_norm": 0.765625, "learning_rate": 0.000140169112128345, "loss": 0.7473, "step": 24565 }, { "epoch": 0.6307859950154143, "grad_norm": 0.80078125, "learning_rate": 0.00014016502388306264, "loss": 0.8243, "step": 24566 }, { "epoch": 0.6308116722113362, "grad_norm": 0.74609375, "learning_rate": 0.00014016093555773503, "loss": 0.7906, "step": 24567 }, { "epoch": 0.630837349407258, "grad_norm": 0.79296875, "learning_rate": 0.00014015684715237028, "loss": 0.8902, "step": 24568 }, { "epoch": 0.6308630266031798, "grad_norm": 0.83984375, "learning_rate": 0.0001401527586669766, "loss": 0.8426, "step": 24569 }, { "epoch": 0.6308887037991017, "grad_norm": 0.8046875, "learning_rate": 0.0001401486701015621, "loss": 0.8495, "step": 24570 }, { "epoch": 0.6309143809950234, "grad_norm": 0.76953125, "learning_rate": 0.0001401445814561349, "loss": 0.8001, "step": 24571 }, { "epoch": 0.6309400581909452, "grad_norm": 0.80859375, "learning_rate": 0.00014014049273070319, "loss": 0.9166, "step": 24572 }, { "epoch": 0.6309657353868671, "grad_norm": 0.76171875, "learning_rate": 0.00014013640392527508, "loss": 0.814, "step": 24573 }, { "epoch": 0.6309914125827889, "grad_norm": 0.71484375, "learning_rate": 0.00014013231503985876, "loss": 0.7683, "step": 24574 }, { "epoch": 0.6310170897787107, "grad_norm": 0.8515625, "learning_rate": 0.00014012822607446237, "loss": 0.89, "step": 24575 }, { "epoch": 0.6310427669746326, "grad_norm": 0.71484375, "learning_rate": 0.000140124137029094, "loss": 0.8057, "step": 24576 }, { "epoch": 0.6310684441705544, "grad_norm": 0.8046875, "learning_rate": 0.0001401200479037619, "loss": 0.8365, "step": 24577 }, { "epoch": 0.6310941213664761, "grad_norm": 0.7890625, "learning_rate": 0.0001401159586984742, "loss": 0.8573, "step": 24578 }, { "epoch": 0.631119798562398, "grad_norm": 0.796875, "learning_rate": 0.00014011186941323897, "loss": 0.811, "step": 24579 }, { "epoch": 0.6311454757583198, "grad_norm": 0.828125, "learning_rate": 0.0001401077800480644, "loss": 0.9418, "step": 24580 }, { "epoch": 0.6311711529542416, "grad_norm": 0.87890625, "learning_rate": 0.00014010369060295865, "loss": 0.8585, "step": 24581 }, { "epoch": 0.6311968301501635, "grad_norm": 0.7734375, "learning_rate": 0.0001400996010779299, "loss": 0.9736, "step": 24582 }, { "epoch": 0.6312225073460853, "grad_norm": 0.78515625, "learning_rate": 0.00014009551147298622, "loss": 0.9077, "step": 24583 }, { "epoch": 0.631248184542007, "grad_norm": 0.8515625, "learning_rate": 0.00014009142178813584, "loss": 0.9429, "step": 24584 }, { "epoch": 0.6312738617379289, "grad_norm": 0.75390625, "learning_rate": 0.0001400873320233869, "loss": 0.8965, "step": 24585 }, { "epoch": 0.6312995389338507, "grad_norm": 0.75390625, "learning_rate": 0.00014008324217874745, "loss": 0.7412, "step": 24586 }, { "epoch": 0.6313252161297725, "grad_norm": 0.75, "learning_rate": 0.00014007915225422579, "loss": 0.8793, "step": 24587 }, { "epoch": 0.6313508933256944, "grad_norm": 0.76953125, "learning_rate": 0.00014007506224983, "loss": 0.8328, "step": 24588 }, { "epoch": 0.6313765705216162, "grad_norm": 0.75, "learning_rate": 0.00014007097216556819, "loss": 0.8499, "step": 24589 }, { "epoch": 0.6314022477175381, "grad_norm": 0.80078125, "learning_rate": 0.00014006688200144857, "loss": 0.8627, "step": 24590 }, { "epoch": 0.6314279249134598, "grad_norm": 0.73046875, "learning_rate": 0.00014006279175747928, "loss": 0.9046, "step": 24591 }, { "epoch": 0.6314536021093816, "grad_norm": 0.71875, "learning_rate": 0.00014005870143366845, "loss": 0.8345, "step": 24592 }, { "epoch": 0.6314792793053035, "grad_norm": 0.74609375, "learning_rate": 0.00014005461103002426, "loss": 0.9249, "step": 24593 }, { "epoch": 0.6315049565012253, "grad_norm": 0.80859375, "learning_rate": 0.00014005052054655485, "loss": 0.9506, "step": 24594 }, { "epoch": 0.6315306336971471, "grad_norm": 0.828125, "learning_rate": 0.00014004642998326834, "loss": 0.9573, "step": 24595 }, { "epoch": 0.631556310893069, "grad_norm": 0.80078125, "learning_rate": 0.00014004233934017296, "loss": 0.8981, "step": 24596 }, { "epoch": 0.6315819880889907, "grad_norm": 0.7421875, "learning_rate": 0.0001400382486172768, "loss": 0.8264, "step": 24597 }, { "epoch": 0.6316076652849125, "grad_norm": 0.78125, "learning_rate": 0.000140034157814588, "loss": 0.8742, "step": 24598 }, { "epoch": 0.6316333424808344, "grad_norm": 0.71484375, "learning_rate": 0.00014003006693211477, "loss": 0.8214, "step": 24599 }, { "epoch": 0.6316590196767562, "grad_norm": 0.7890625, "learning_rate": 0.00014002597596986523, "loss": 0.9855, "step": 24600 }, { "epoch": 0.631684696872678, "grad_norm": 0.77734375, "learning_rate": 0.00014002188492784755, "loss": 0.9422, "step": 24601 }, { "epoch": 0.6317103740685999, "grad_norm": 0.796875, "learning_rate": 0.00014001779380606982, "loss": 0.8455, "step": 24602 }, { "epoch": 0.6317360512645217, "grad_norm": 0.74609375, "learning_rate": 0.00014001370260454025, "loss": 0.8582, "step": 24603 }, { "epoch": 0.6317617284604434, "grad_norm": 0.796875, "learning_rate": 0.00014000961132326699, "loss": 0.9861, "step": 24604 }, { "epoch": 0.6317874056563653, "grad_norm": 0.67578125, "learning_rate": 0.0001400055199622582, "loss": 0.8414, "step": 24605 }, { "epoch": 0.6318130828522871, "grad_norm": 0.80078125, "learning_rate": 0.00014000142852152204, "loss": 0.9278, "step": 24606 }, { "epoch": 0.6318387600482089, "grad_norm": 0.7890625, "learning_rate": 0.00013999733700106664, "loss": 0.9317, "step": 24607 }, { "epoch": 0.6318644372441308, "grad_norm": 0.71484375, "learning_rate": 0.00013999324540090013, "loss": 0.7144, "step": 24608 }, { "epoch": 0.6318901144400526, "grad_norm": 0.76171875, "learning_rate": 0.00013998915372103073, "loss": 0.7821, "step": 24609 }, { "epoch": 0.6319157916359744, "grad_norm": 0.7578125, "learning_rate": 0.00013998506196146655, "loss": 0.7276, "step": 24610 }, { "epoch": 0.6319414688318962, "grad_norm": 0.77734375, "learning_rate": 0.0001399809701222157, "loss": 0.8253, "step": 24611 }, { "epoch": 0.631967146027818, "grad_norm": 0.79296875, "learning_rate": 0.00013997687820328648, "loss": 0.7747, "step": 24612 }, { "epoch": 0.6319928232237398, "grad_norm": 0.84375, "learning_rate": 0.0001399727862046869, "loss": 0.8962, "step": 24613 }, { "epoch": 0.6320185004196617, "grad_norm": 0.76953125, "learning_rate": 0.00013996869412642517, "loss": 0.8482, "step": 24614 }, { "epoch": 0.6320441776155835, "grad_norm": 0.82421875, "learning_rate": 0.00013996460196850948, "loss": 0.8679, "step": 24615 }, { "epoch": 0.6320698548115053, "grad_norm": 0.82421875, "learning_rate": 0.00013996050973094786, "loss": 0.8403, "step": 24616 }, { "epoch": 0.6320955320074271, "grad_norm": 0.8125, "learning_rate": 0.00013995641741374864, "loss": 0.8737, "step": 24617 }, { "epoch": 0.6321212092033489, "grad_norm": 0.79296875, "learning_rate": 0.00013995232501691986, "loss": 0.8341, "step": 24618 }, { "epoch": 0.6321468863992707, "grad_norm": 0.8046875, "learning_rate": 0.0001399482325404697, "loss": 0.7401, "step": 24619 }, { "epoch": 0.6321725635951926, "grad_norm": 0.79296875, "learning_rate": 0.00013994413998440634, "loss": 0.8562, "step": 24620 }, { "epoch": 0.6321982407911144, "grad_norm": 0.71875, "learning_rate": 0.00013994004734873788, "loss": 0.8372, "step": 24621 }, { "epoch": 0.6322239179870363, "grad_norm": 0.87890625, "learning_rate": 0.00013993595463347255, "loss": 0.9861, "step": 24622 }, { "epoch": 0.6322495951829581, "grad_norm": 0.77734375, "learning_rate": 0.00013993186183861847, "loss": 0.8969, "step": 24623 }, { "epoch": 0.6322752723788798, "grad_norm": 0.80078125, "learning_rate": 0.00013992776896418375, "loss": 0.8749, "step": 24624 }, { "epoch": 0.6323009495748017, "grad_norm": 0.76953125, "learning_rate": 0.00013992367601017665, "loss": 0.8954, "step": 24625 }, { "epoch": 0.6323266267707235, "grad_norm": 0.734375, "learning_rate": 0.00013991958297660524, "loss": 0.7387, "step": 24626 }, { "epoch": 0.6323523039666453, "grad_norm": 0.75390625, "learning_rate": 0.00013991548986347772, "loss": 0.8787, "step": 24627 }, { "epoch": 0.6323779811625672, "grad_norm": 0.7421875, "learning_rate": 0.0001399113966708022, "loss": 0.7712, "step": 24628 }, { "epoch": 0.632403658358489, "grad_norm": 0.7890625, "learning_rate": 0.0001399073033985869, "loss": 0.9084, "step": 24629 }, { "epoch": 0.6324293355544108, "grad_norm": 0.83203125, "learning_rate": 0.00013990321004683995, "loss": 0.9877, "step": 24630 }, { "epoch": 0.6324550127503326, "grad_norm": 0.75, "learning_rate": 0.00013989911661556955, "loss": 0.8948, "step": 24631 }, { "epoch": 0.6324806899462544, "grad_norm": 0.7109375, "learning_rate": 0.00013989502310478373, "loss": 0.6447, "step": 24632 }, { "epoch": 0.6325063671421762, "grad_norm": 0.78515625, "learning_rate": 0.00013989092951449079, "loss": 0.8179, "step": 24633 }, { "epoch": 0.6325320443380981, "grad_norm": 0.77734375, "learning_rate": 0.0001398868358446988, "loss": 0.7741, "step": 24634 }, { "epoch": 0.6325577215340199, "grad_norm": 0.828125, "learning_rate": 0.00013988274209541593, "loss": 0.9271, "step": 24635 }, { "epoch": 0.6325833987299417, "grad_norm": 0.6875, "learning_rate": 0.0001398786482666504, "loss": 0.8735, "step": 24636 }, { "epoch": 0.6326090759258635, "grad_norm": 0.8046875, "learning_rate": 0.00013987455435841031, "loss": 0.9137, "step": 24637 }, { "epoch": 0.6326347531217853, "grad_norm": 0.84375, "learning_rate": 0.00013987046037070385, "loss": 0.834, "step": 24638 }, { "epoch": 0.6326604303177071, "grad_norm": 0.7265625, "learning_rate": 0.00013986636630353917, "loss": 0.8521, "step": 24639 }, { "epoch": 0.632686107513629, "grad_norm": 0.77734375, "learning_rate": 0.0001398622721569244, "loss": 0.8998, "step": 24640 }, { "epoch": 0.6327117847095508, "grad_norm": 0.78515625, "learning_rate": 0.00013985817793086774, "loss": 0.8911, "step": 24641 }, { "epoch": 0.6327374619054726, "grad_norm": 0.7421875, "learning_rate": 0.0001398540836253773, "loss": 0.9181, "step": 24642 }, { "epoch": 0.6327631391013945, "grad_norm": 0.8046875, "learning_rate": 0.0001398499892404613, "loss": 0.8371, "step": 24643 }, { "epoch": 0.6327888162973162, "grad_norm": 0.76953125, "learning_rate": 0.00013984589477612788, "loss": 0.8041, "step": 24644 }, { "epoch": 0.632814493493238, "grad_norm": 0.76171875, "learning_rate": 0.0001398418002323852, "loss": 0.8066, "step": 24645 }, { "epoch": 0.6328401706891599, "grad_norm": 0.73828125, "learning_rate": 0.00013983770560924133, "loss": 1.0044, "step": 24646 }, { "epoch": 0.6328658478850817, "grad_norm": 0.81640625, "learning_rate": 0.0001398336109067046, "loss": 0.9514, "step": 24647 }, { "epoch": 0.6328915250810035, "grad_norm": 0.7421875, "learning_rate": 0.00013982951612478303, "loss": 0.8274, "step": 24648 }, { "epoch": 0.6329172022769254, "grad_norm": 0.78125, "learning_rate": 0.00013982542126348487, "loss": 0.8888, "step": 24649 }, { "epoch": 0.6329428794728472, "grad_norm": 0.7734375, "learning_rate": 0.0001398213263228182, "loss": 0.9012, "step": 24650 }, { "epoch": 0.632968556668769, "grad_norm": 0.80078125, "learning_rate": 0.00013981723130279127, "loss": 0.8855, "step": 24651 }, { "epoch": 0.6329942338646908, "grad_norm": 0.81640625, "learning_rate": 0.00013981313620341215, "loss": 0.9481, "step": 24652 }, { "epoch": 0.6330199110606126, "grad_norm": 0.69921875, "learning_rate": 0.0001398090410246891, "loss": 0.8604, "step": 24653 }, { "epoch": 0.6330455882565345, "grad_norm": 0.68359375, "learning_rate": 0.00013980494576663018, "loss": 0.8457, "step": 24654 }, { "epoch": 0.6330712654524563, "grad_norm": 0.765625, "learning_rate": 0.0001398008504292436, "loss": 0.8438, "step": 24655 }, { "epoch": 0.6330969426483781, "grad_norm": 0.828125, "learning_rate": 0.00013979675501253752, "loss": 0.8448, "step": 24656 }, { "epoch": 0.6331226198442998, "grad_norm": 0.84375, "learning_rate": 0.0001397926595165201, "loss": 0.8547, "step": 24657 }, { "epoch": 0.6331482970402217, "grad_norm": 0.7734375, "learning_rate": 0.00013978856394119955, "loss": 0.7906, "step": 24658 }, { "epoch": 0.6331739742361435, "grad_norm": 0.79296875, "learning_rate": 0.00013978446828658396, "loss": 0.8336, "step": 24659 }, { "epoch": 0.6331996514320654, "grad_norm": 0.7734375, "learning_rate": 0.00013978037255268151, "loss": 0.8769, "step": 24660 }, { "epoch": 0.6332253286279872, "grad_norm": 0.8359375, "learning_rate": 0.00013977627673950039, "loss": 0.8281, "step": 24661 }, { "epoch": 0.633251005823909, "grad_norm": 0.83203125, "learning_rate": 0.00013977218084704874, "loss": 0.9133, "step": 24662 }, { "epoch": 0.6332766830198309, "grad_norm": 0.86328125, "learning_rate": 0.0001397680848753347, "loss": 1.0172, "step": 24663 }, { "epoch": 0.6333023602157526, "grad_norm": 0.765625, "learning_rate": 0.0001397639888243665, "loss": 0.9491, "step": 24664 }, { "epoch": 0.6333280374116744, "grad_norm": 0.796875, "learning_rate": 0.00013975989269415223, "loss": 0.8657, "step": 24665 }, { "epoch": 0.6333537146075963, "grad_norm": 0.734375, "learning_rate": 0.00013975579648470006, "loss": 0.8394, "step": 24666 }, { "epoch": 0.6333793918035181, "grad_norm": 0.79296875, "learning_rate": 0.00013975170019601822, "loss": 0.7663, "step": 24667 }, { "epoch": 0.6334050689994399, "grad_norm": 0.80859375, "learning_rate": 0.00013974760382811483, "loss": 0.7635, "step": 24668 }, { "epoch": 0.6334307461953618, "grad_norm": 0.82421875, "learning_rate": 0.000139743507380998, "loss": 0.8651, "step": 24669 }, { "epoch": 0.6334564233912836, "grad_norm": 0.7890625, "learning_rate": 0.00013973941085467604, "loss": 0.8098, "step": 24670 }, { "epoch": 0.6334821005872053, "grad_norm": 0.8515625, "learning_rate": 0.00013973531424915699, "loss": 0.8218, "step": 24671 }, { "epoch": 0.6335077777831272, "grad_norm": 0.78125, "learning_rate": 0.00013973121756444905, "loss": 0.844, "step": 24672 }, { "epoch": 0.633533454979049, "grad_norm": 0.70703125, "learning_rate": 0.00013972712080056036, "loss": 0.9001, "step": 24673 }, { "epoch": 0.6335591321749708, "grad_norm": 0.71484375, "learning_rate": 0.0001397230239574991, "loss": 0.6969, "step": 24674 }, { "epoch": 0.6335848093708927, "grad_norm": 0.8125, "learning_rate": 0.00013971892703527346, "loss": 0.9096, "step": 24675 }, { "epoch": 0.6336104865668145, "grad_norm": 0.78515625, "learning_rate": 0.00013971483003389162, "loss": 0.9846, "step": 24676 }, { "epoch": 0.6336361637627362, "grad_norm": 0.7421875, "learning_rate": 0.00013971073295336166, "loss": 0.8333, "step": 24677 }, { "epoch": 0.6336618409586581, "grad_norm": 0.73046875, "learning_rate": 0.0001397066357936918, "loss": 0.9491, "step": 24678 }, { "epoch": 0.6336875181545799, "grad_norm": 0.71484375, "learning_rate": 0.00013970253855489023, "loss": 0.9016, "step": 24679 }, { "epoch": 0.6337131953505017, "grad_norm": 0.82421875, "learning_rate": 0.00013969844123696507, "loss": 0.9646, "step": 24680 }, { "epoch": 0.6337388725464236, "grad_norm": 0.73828125, "learning_rate": 0.00013969434383992452, "loss": 0.7868, "step": 24681 }, { "epoch": 0.6337645497423454, "grad_norm": 0.78515625, "learning_rate": 0.00013969024636377667, "loss": 0.9155, "step": 24682 }, { "epoch": 0.6337902269382673, "grad_norm": 0.74609375, "learning_rate": 0.00013968614880852982, "loss": 0.7287, "step": 24683 }, { "epoch": 0.633815904134189, "grad_norm": 0.8046875, "learning_rate": 0.00013968205117419205, "loss": 0.7896, "step": 24684 }, { "epoch": 0.6338415813301108, "grad_norm": 0.828125, "learning_rate": 0.00013967795346077152, "loss": 0.9028, "step": 24685 }, { "epoch": 0.6338672585260327, "grad_norm": 0.8046875, "learning_rate": 0.0001396738556682764, "loss": 0.8943, "step": 24686 }, { "epoch": 0.6338929357219545, "grad_norm": 0.8125, "learning_rate": 0.00013966975779671487, "loss": 0.8987, "step": 24687 }, { "epoch": 0.6339186129178763, "grad_norm": 0.84765625, "learning_rate": 0.00013966565984609508, "loss": 0.8489, "step": 24688 }, { "epoch": 0.6339442901137982, "grad_norm": 0.70703125, "learning_rate": 0.00013966156181642524, "loss": 0.7454, "step": 24689 }, { "epoch": 0.63396996730972, "grad_norm": 0.828125, "learning_rate": 0.00013965746370771348, "loss": 0.8238, "step": 24690 }, { "epoch": 0.6339956445056417, "grad_norm": 0.8046875, "learning_rate": 0.00013965336551996803, "loss": 0.8642, "step": 24691 }, { "epoch": 0.6340213217015636, "grad_norm": 0.78515625, "learning_rate": 0.00013964926725319694, "loss": 0.8363, "step": 24692 }, { "epoch": 0.6340469988974854, "grad_norm": 0.87109375, "learning_rate": 0.00013964516890740844, "loss": 0.8518, "step": 24693 }, { "epoch": 0.6340726760934072, "grad_norm": 0.76953125, "learning_rate": 0.00013964107048261071, "loss": 0.7632, "step": 24694 }, { "epoch": 0.6340983532893291, "grad_norm": 0.7890625, "learning_rate": 0.00013963697197881195, "loss": 1.0131, "step": 24695 }, { "epoch": 0.6341240304852509, "grad_norm": 0.73828125, "learning_rate": 0.00013963287339602021, "loss": 0.7929, "step": 24696 }, { "epoch": 0.6341497076811726, "grad_norm": 0.7265625, "learning_rate": 0.0001396287747342438, "loss": 0.7695, "step": 24697 }, { "epoch": 0.6341753848770945, "grad_norm": 0.7421875, "learning_rate": 0.0001396246759934908, "loss": 0.7849, "step": 24698 }, { "epoch": 0.6342010620730163, "grad_norm": 0.70703125, "learning_rate": 0.00013962057717376939, "loss": 0.8016, "step": 24699 }, { "epoch": 0.6342267392689381, "grad_norm": 0.8203125, "learning_rate": 0.00013961647827508776, "loss": 0.9331, "step": 24700 }, { "epoch": 0.63425241646486, "grad_norm": 0.7890625, "learning_rate": 0.00013961237929745405, "loss": 0.8691, "step": 24701 }, { "epoch": 0.6342780936607818, "grad_norm": 0.80859375, "learning_rate": 0.00013960828024087645, "loss": 0.9435, "step": 24702 }, { "epoch": 0.6343037708567036, "grad_norm": 0.765625, "learning_rate": 0.00013960418110536314, "loss": 0.8557, "step": 24703 }, { "epoch": 0.6343294480526254, "grad_norm": 0.8046875, "learning_rate": 0.00013960008189092227, "loss": 0.9063, "step": 24704 }, { "epoch": 0.6343551252485472, "grad_norm": 0.76171875, "learning_rate": 0.00013959598259756203, "loss": 0.8563, "step": 24705 }, { "epoch": 0.634380802444469, "grad_norm": 0.83203125, "learning_rate": 0.00013959188322529054, "loss": 0.7841, "step": 24706 }, { "epoch": 0.6344064796403909, "grad_norm": 0.796875, "learning_rate": 0.00013958778377411602, "loss": 0.8516, "step": 24707 }, { "epoch": 0.6344321568363127, "grad_norm": 0.80859375, "learning_rate": 0.00013958368424404663, "loss": 0.958, "step": 24708 }, { "epoch": 0.6344578340322345, "grad_norm": 0.80078125, "learning_rate": 0.0001395795846350905, "loss": 0.8228, "step": 24709 }, { "epoch": 0.6344835112281564, "grad_norm": 0.79296875, "learning_rate": 0.00013957548494725587, "loss": 0.7977, "step": 24710 }, { "epoch": 0.6345091884240781, "grad_norm": 0.73828125, "learning_rate": 0.00013957138518055088, "loss": 0.8498, "step": 24711 }, { "epoch": 0.6345348656199999, "grad_norm": 0.734375, "learning_rate": 0.00013956728533498364, "loss": 0.9139, "step": 24712 }, { "epoch": 0.6345605428159218, "grad_norm": 0.80859375, "learning_rate": 0.00013956318541056244, "loss": 0.801, "step": 24713 }, { "epoch": 0.6345862200118436, "grad_norm": 0.73046875, "learning_rate": 0.00013955908540729531, "loss": 0.7661, "step": 24714 }, { "epoch": 0.6346118972077655, "grad_norm": 0.828125, "learning_rate": 0.00013955498532519057, "loss": 0.7193, "step": 24715 }, { "epoch": 0.6346375744036873, "grad_norm": 0.7890625, "learning_rate": 0.00013955088516425627, "loss": 0.7833, "step": 24716 }, { "epoch": 0.634663251599609, "grad_norm": 0.765625, "learning_rate": 0.00013954678492450066, "loss": 1.0295, "step": 24717 }, { "epoch": 0.6346889287955308, "grad_norm": 0.87109375, "learning_rate": 0.00013954268460593188, "loss": 0.86, "step": 24718 }, { "epoch": 0.6347146059914527, "grad_norm": 0.8203125, "learning_rate": 0.0001395385842085581, "loss": 0.9835, "step": 24719 }, { "epoch": 0.6347402831873745, "grad_norm": 0.796875, "learning_rate": 0.00013953448373238745, "loss": 0.7315, "step": 24720 }, { "epoch": 0.6347659603832964, "grad_norm": 0.78125, "learning_rate": 0.00013953038317742817, "loss": 0.9203, "step": 24721 }, { "epoch": 0.6347916375792182, "grad_norm": 0.6875, "learning_rate": 0.0001395262825436884, "loss": 0.7878, "step": 24722 }, { "epoch": 0.63481731477514, "grad_norm": 0.796875, "learning_rate": 0.00013952218183117636, "loss": 0.9123, "step": 24723 }, { "epoch": 0.6348429919710618, "grad_norm": 0.71875, "learning_rate": 0.00013951808103990017, "loss": 0.7586, "step": 24724 }, { "epoch": 0.6348686691669836, "grad_norm": 0.796875, "learning_rate": 0.00013951398016986798, "loss": 0.9161, "step": 24725 }, { "epoch": 0.6348943463629054, "grad_norm": 0.78515625, "learning_rate": 0.000139509879221088, "loss": 0.8997, "step": 24726 }, { "epoch": 0.6349200235588273, "grad_norm": 0.7265625, "learning_rate": 0.00013950577819356842, "loss": 0.8463, "step": 24727 }, { "epoch": 0.6349457007547491, "grad_norm": 0.765625, "learning_rate": 0.00013950167708731738, "loss": 0.868, "step": 24728 }, { "epoch": 0.6349713779506709, "grad_norm": 0.72265625, "learning_rate": 0.00013949757590234308, "loss": 0.8319, "step": 24729 }, { "epoch": 0.6349970551465928, "grad_norm": 0.87890625, "learning_rate": 0.00013949347463865368, "loss": 0.9655, "step": 24730 }, { "epoch": 0.6350227323425145, "grad_norm": 0.85546875, "learning_rate": 0.00013948937329625734, "loss": 0.9435, "step": 24731 }, { "epoch": 0.6350484095384363, "grad_norm": 0.7109375, "learning_rate": 0.00013948527187516226, "loss": 0.7671, "step": 24732 }, { "epoch": 0.6350740867343582, "grad_norm": 0.87890625, "learning_rate": 0.00013948117037537657, "loss": 0.9403, "step": 24733 }, { "epoch": 0.63509976393028, "grad_norm": 0.77734375, "learning_rate": 0.00013947706879690848, "loss": 0.8484, "step": 24734 }, { "epoch": 0.6351254411262018, "grad_norm": 0.69140625, "learning_rate": 0.0001394729671397662, "loss": 0.7164, "step": 24735 }, { "epoch": 0.6351511183221237, "grad_norm": 0.87890625, "learning_rate": 0.0001394688654039578, "loss": 0.8685, "step": 24736 }, { "epoch": 0.6351767955180454, "grad_norm": 0.76171875, "learning_rate": 0.00013946476358949158, "loss": 0.8479, "step": 24737 }, { "epoch": 0.6352024727139672, "grad_norm": 0.70703125, "learning_rate": 0.0001394606616963756, "loss": 0.7931, "step": 24738 }, { "epoch": 0.6352281499098891, "grad_norm": 0.8046875, "learning_rate": 0.00013945655972461812, "loss": 0.923, "step": 24739 }, { "epoch": 0.6352538271058109, "grad_norm": 0.69921875, "learning_rate": 0.00013945245767422727, "loss": 0.8519, "step": 24740 }, { "epoch": 0.6352795043017327, "grad_norm": 0.76953125, "learning_rate": 0.00013944835554521122, "loss": 0.8999, "step": 24741 }, { "epoch": 0.6353051814976546, "grad_norm": 0.83203125, "learning_rate": 0.00013944425333757817, "loss": 0.8237, "step": 24742 }, { "epoch": 0.6353308586935764, "grad_norm": 0.75390625, "learning_rate": 0.0001394401510513363, "loss": 0.8202, "step": 24743 }, { "epoch": 0.6353565358894981, "grad_norm": 0.87109375, "learning_rate": 0.00013943604868649377, "loss": 0.8713, "step": 24744 }, { "epoch": 0.63538221308542, "grad_norm": 0.86328125, "learning_rate": 0.00013943194624305876, "loss": 0.7803, "step": 24745 }, { "epoch": 0.6354078902813418, "grad_norm": 0.82421875, "learning_rate": 0.0001394278437210394, "loss": 0.7929, "step": 24746 }, { "epoch": 0.6354335674772636, "grad_norm": 0.86328125, "learning_rate": 0.00013942374112044394, "loss": 0.9366, "step": 24747 }, { "epoch": 0.6354592446731855, "grad_norm": 0.7890625, "learning_rate": 0.00013941963844128053, "loss": 0.9538, "step": 24748 }, { "epoch": 0.6354849218691073, "grad_norm": 0.87109375, "learning_rate": 0.00013941553568355732, "loss": 0.8826, "step": 24749 }, { "epoch": 0.6355105990650292, "grad_norm": 0.81640625, "learning_rate": 0.00013941143284728255, "loss": 0.8922, "step": 24750 }, { "epoch": 0.6355362762609509, "grad_norm": 0.84375, "learning_rate": 0.00013940732993246433, "loss": 1.0224, "step": 24751 }, { "epoch": 0.6355619534568727, "grad_norm": 0.8515625, "learning_rate": 0.00013940322693911086, "loss": 0.8316, "step": 24752 }, { "epoch": 0.6355876306527946, "grad_norm": 0.79296875, "learning_rate": 0.00013939912386723034, "loss": 0.9174, "step": 24753 }, { "epoch": 0.6356133078487164, "grad_norm": 0.75390625, "learning_rate": 0.00013939502071683087, "loss": 0.8322, "step": 24754 }, { "epoch": 0.6356389850446382, "grad_norm": 0.78125, "learning_rate": 0.0001393909174879207, "loss": 0.748, "step": 24755 }, { "epoch": 0.6356646622405601, "grad_norm": 0.78515625, "learning_rate": 0.00013938681418050806, "loss": 0.8646, "step": 24756 }, { "epoch": 0.6356903394364818, "grad_norm": 0.83203125, "learning_rate": 0.00013938271079460098, "loss": 0.9922, "step": 24757 }, { "epoch": 0.6357160166324036, "grad_norm": 0.72265625, "learning_rate": 0.00013937860733020772, "loss": 0.8291, "step": 24758 }, { "epoch": 0.6357416938283255, "grad_norm": 0.75, "learning_rate": 0.00013937450378733647, "loss": 0.7776, "step": 24759 }, { "epoch": 0.6357673710242473, "grad_norm": 0.87890625, "learning_rate": 0.0001393704001659954, "loss": 0.8791, "step": 24760 }, { "epoch": 0.6357930482201691, "grad_norm": 0.7734375, "learning_rate": 0.0001393662964661927, "loss": 0.8948, "step": 24761 }, { "epoch": 0.635818725416091, "grad_norm": 0.828125, "learning_rate": 0.00013936219268793643, "loss": 0.8137, "step": 24762 }, { "epoch": 0.6358444026120128, "grad_norm": 0.83984375, "learning_rate": 0.00013935808883123497, "loss": 0.895, "step": 24763 }, { "epoch": 0.6358700798079345, "grad_norm": 0.77734375, "learning_rate": 0.00013935398489609637, "loss": 0.7704, "step": 24764 }, { "epoch": 0.6358957570038564, "grad_norm": 0.84375, "learning_rate": 0.0001393498808825288, "loss": 0.8691, "step": 24765 }, { "epoch": 0.6359214341997782, "grad_norm": 0.7734375, "learning_rate": 0.00013934577679054048, "loss": 0.8922, "step": 24766 }, { "epoch": 0.6359471113957, "grad_norm": 0.73828125, "learning_rate": 0.00013934167262013962, "loss": 0.7609, "step": 24767 }, { "epoch": 0.6359727885916219, "grad_norm": 0.875, "learning_rate": 0.0001393375683713343, "loss": 0.9691, "step": 24768 }, { "epoch": 0.6359984657875437, "grad_norm": 0.78515625, "learning_rate": 0.00013933346404413282, "loss": 0.7625, "step": 24769 }, { "epoch": 0.6360241429834655, "grad_norm": 0.76953125, "learning_rate": 0.00013932935963854325, "loss": 0.8688, "step": 24770 }, { "epoch": 0.6360498201793873, "grad_norm": 0.75390625, "learning_rate": 0.00013932525515457383, "loss": 0.9614, "step": 24771 }, { "epoch": 0.6360754973753091, "grad_norm": 0.83984375, "learning_rate": 0.00013932115059223275, "loss": 0.8408, "step": 24772 }, { "epoch": 0.6361011745712309, "grad_norm": 0.72265625, "learning_rate": 0.00013931704595152814, "loss": 0.8997, "step": 24773 }, { "epoch": 0.6361268517671528, "grad_norm": 0.70703125, "learning_rate": 0.00013931294123246826, "loss": 0.8467, "step": 24774 }, { "epoch": 0.6361525289630746, "grad_norm": 1.1171875, "learning_rate": 0.00013930883643506118, "loss": 0.9053, "step": 24775 }, { "epoch": 0.6361782061589965, "grad_norm": 0.78515625, "learning_rate": 0.0001393047315593152, "loss": 0.9444, "step": 24776 }, { "epoch": 0.6362038833549182, "grad_norm": 0.7734375, "learning_rate": 0.0001393006266052384, "loss": 0.861, "step": 24777 }, { "epoch": 0.63622956055084, "grad_norm": 0.859375, "learning_rate": 0.00013929652157283897, "loss": 0.7412, "step": 24778 }, { "epoch": 0.6362552377467618, "grad_norm": 0.82421875, "learning_rate": 0.00013929241646212516, "loss": 1.0427, "step": 24779 }, { "epoch": 0.6362809149426837, "grad_norm": 0.7734375, "learning_rate": 0.00013928831127310514, "loss": 0.7937, "step": 24780 }, { "epoch": 0.6363065921386055, "grad_norm": 1.1171875, "learning_rate": 0.00013928420600578702, "loss": 0.8663, "step": 24781 }, { "epoch": 0.6363322693345274, "grad_norm": 0.8125, "learning_rate": 0.000139280100660179, "loss": 0.8128, "step": 24782 }, { "epoch": 0.6363579465304492, "grad_norm": 0.85546875, "learning_rate": 0.0001392759952362894, "loss": 0.9224, "step": 24783 }, { "epoch": 0.6363836237263709, "grad_norm": 0.7109375, "learning_rate": 0.0001392718897341262, "loss": 0.7956, "step": 24784 }, { "epoch": 0.6364093009222928, "grad_norm": 0.76171875, "learning_rate": 0.00013926778415369767, "loss": 0.8668, "step": 24785 }, { "epoch": 0.6364349781182146, "grad_norm": 0.859375, "learning_rate": 0.000139263678495012, "loss": 0.8552, "step": 24786 }, { "epoch": 0.6364606553141364, "grad_norm": 0.765625, "learning_rate": 0.0001392595727580774, "loss": 0.8366, "step": 24787 }, { "epoch": 0.6364863325100583, "grad_norm": 0.71484375, "learning_rate": 0.00013925546694290198, "loss": 0.7696, "step": 24788 }, { "epoch": 0.6365120097059801, "grad_norm": 0.75, "learning_rate": 0.000139251361049494, "loss": 0.8225, "step": 24789 }, { "epoch": 0.6365376869019018, "grad_norm": 0.8828125, "learning_rate": 0.0001392472550778616, "loss": 0.8067, "step": 24790 }, { "epoch": 0.6365633640978237, "grad_norm": 0.81640625, "learning_rate": 0.0001392431490280129, "loss": 1.0387, "step": 24791 }, { "epoch": 0.6365890412937455, "grad_norm": 0.84375, "learning_rate": 0.00013923904289995622, "loss": 0.8587, "step": 24792 }, { "epoch": 0.6366147184896673, "grad_norm": 0.71484375, "learning_rate": 0.00013923493669369964, "loss": 0.7751, "step": 24793 }, { "epoch": 0.6366403956855892, "grad_norm": 0.7578125, "learning_rate": 0.00013923083040925138, "loss": 0.7656, "step": 24794 }, { "epoch": 0.636666072881511, "grad_norm": 0.7265625, "learning_rate": 0.00013922672404661961, "loss": 0.6701, "step": 24795 }, { "epoch": 0.6366917500774328, "grad_norm": 0.734375, "learning_rate": 0.00013922261760581254, "loss": 0.7746, "step": 24796 }, { "epoch": 0.6367174272733546, "grad_norm": 0.7734375, "learning_rate": 0.00013921851108683833, "loss": 0.8174, "step": 24797 }, { "epoch": 0.6367431044692764, "grad_norm": 0.77734375, "learning_rate": 0.00013921440448970518, "loss": 0.9979, "step": 24798 }, { "epoch": 0.6367687816651982, "grad_norm": 0.6953125, "learning_rate": 0.00013921029781442123, "loss": 0.7318, "step": 24799 }, { "epoch": 0.6367944588611201, "grad_norm": 0.81640625, "learning_rate": 0.00013920619106099472, "loss": 0.9635, "step": 24800 }, { "epoch": 0.6368201360570419, "grad_norm": 0.81640625, "learning_rate": 0.00013920208422943384, "loss": 0.8934, "step": 24801 }, { "epoch": 0.6368458132529637, "grad_norm": 0.72265625, "learning_rate": 0.0001391979773197467, "loss": 0.7813, "step": 24802 }, { "epoch": 0.6368714904488856, "grad_norm": 0.78515625, "learning_rate": 0.00013919387033194157, "loss": 0.8566, "step": 24803 }, { "epoch": 0.6368971676448073, "grad_norm": 0.78515625, "learning_rate": 0.00013918976326602657, "loss": 1.0198, "step": 24804 }, { "epoch": 0.6369228448407291, "grad_norm": 0.734375, "learning_rate": 0.00013918565612200992, "loss": 0.7631, "step": 24805 }, { "epoch": 0.636948522036651, "grad_norm": 0.80078125, "learning_rate": 0.0001391815488998998, "loss": 0.8625, "step": 24806 }, { "epoch": 0.6369741992325728, "grad_norm": 0.82421875, "learning_rate": 0.00013917744159970438, "loss": 0.9533, "step": 24807 }, { "epoch": 0.6369998764284946, "grad_norm": 0.7578125, "learning_rate": 0.00013917333422143187, "loss": 0.8649, "step": 24808 }, { "epoch": 0.6370255536244165, "grad_norm": 0.75, "learning_rate": 0.00013916922676509045, "loss": 0.7305, "step": 24809 }, { "epoch": 0.6370512308203382, "grad_norm": 0.69921875, "learning_rate": 0.0001391651192306883, "loss": 0.6638, "step": 24810 }, { "epoch": 0.63707690801626, "grad_norm": 0.75390625, "learning_rate": 0.0001391610116182336, "loss": 0.8968, "step": 24811 }, { "epoch": 0.6371025852121819, "grad_norm": 0.7578125, "learning_rate": 0.00013915690392773454, "loss": 0.8447, "step": 24812 }, { "epoch": 0.6371282624081037, "grad_norm": 0.796875, "learning_rate": 0.00013915279615919928, "loss": 0.8441, "step": 24813 }, { "epoch": 0.6371539396040256, "grad_norm": 0.75390625, "learning_rate": 0.00013914868831263604, "loss": 0.8694, "step": 24814 }, { "epoch": 0.6371796167999474, "grad_norm": 0.87890625, "learning_rate": 0.00013914458038805306, "loss": 0.8386, "step": 24815 }, { "epoch": 0.6372052939958692, "grad_norm": 0.8671875, "learning_rate": 0.0001391404723854584, "loss": 0.9995, "step": 24816 }, { "epoch": 0.637230971191791, "grad_norm": 0.7578125, "learning_rate": 0.00013913636430486036, "loss": 0.811, "step": 24817 }, { "epoch": 0.6372566483877128, "grad_norm": 0.765625, "learning_rate": 0.00013913225614626705, "loss": 0.8836, "step": 24818 }, { "epoch": 0.6372823255836346, "grad_norm": 0.76953125, "learning_rate": 0.00013912814790968672, "loss": 0.8666, "step": 24819 }, { "epoch": 0.6373080027795565, "grad_norm": 0.7109375, "learning_rate": 0.0001391240395951275, "loss": 0.8169, "step": 24820 }, { "epoch": 0.6373336799754783, "grad_norm": 0.8671875, "learning_rate": 0.00013911993120259758, "loss": 0.7867, "step": 24821 }, { "epoch": 0.6373593571714001, "grad_norm": 0.8203125, "learning_rate": 0.00013911582273210525, "loss": 0.8394, "step": 24822 }, { "epoch": 0.637385034367322, "grad_norm": 0.7421875, "learning_rate": 0.00013911171418365853, "loss": 0.7505, "step": 24823 }, { "epoch": 0.6374107115632437, "grad_norm": 0.7578125, "learning_rate": 0.00013910760555726573, "loss": 0.8039, "step": 24824 }, { "epoch": 0.6374363887591655, "grad_norm": 0.77734375, "learning_rate": 0.00013910349685293506, "loss": 0.7345, "step": 24825 }, { "epoch": 0.6374620659550874, "grad_norm": 0.73046875, "learning_rate": 0.00013909938807067456, "loss": 0.8351, "step": 24826 }, { "epoch": 0.6374877431510092, "grad_norm": 0.76171875, "learning_rate": 0.00013909527921049256, "loss": 0.822, "step": 24827 }, { "epoch": 0.637513420346931, "grad_norm": 0.78515625, "learning_rate": 0.0001390911702723972, "loss": 0.9052, "step": 24828 }, { "epoch": 0.6375390975428529, "grad_norm": 0.8203125, "learning_rate": 0.00013908706125639666, "loss": 0.8949, "step": 24829 }, { "epoch": 0.6375647747387746, "grad_norm": 0.77734375, "learning_rate": 0.00013908295216249914, "loss": 0.7083, "step": 24830 }, { "epoch": 0.6375904519346964, "grad_norm": 0.7578125, "learning_rate": 0.0001390788429907128, "loss": 0.7835, "step": 24831 }, { "epoch": 0.6376161291306183, "grad_norm": 0.80078125, "learning_rate": 0.00013907473374104589, "loss": 0.9204, "step": 24832 }, { "epoch": 0.6376418063265401, "grad_norm": 0.78125, "learning_rate": 0.00013907062441350659, "loss": 0.7764, "step": 24833 }, { "epoch": 0.6376674835224619, "grad_norm": 0.77734375, "learning_rate": 0.000139066515008103, "loss": 0.7908, "step": 24834 }, { "epoch": 0.6376931607183838, "grad_norm": 0.76171875, "learning_rate": 0.00013906240552484343, "loss": 0.8834, "step": 24835 }, { "epoch": 0.6377188379143056, "grad_norm": 0.76953125, "learning_rate": 0.00013905829596373598, "loss": 0.6344, "step": 24836 }, { "epoch": 0.6377445151102273, "grad_norm": 0.7890625, "learning_rate": 0.0001390541863247889, "loss": 0.7641, "step": 24837 }, { "epoch": 0.6377701923061492, "grad_norm": 0.80859375, "learning_rate": 0.00013905007660801032, "loss": 0.8934, "step": 24838 }, { "epoch": 0.637795869502071, "grad_norm": 0.91015625, "learning_rate": 0.0001390459668134085, "loss": 1.095, "step": 24839 }, { "epoch": 0.6378215466979928, "grad_norm": 0.7734375, "learning_rate": 0.00013904185694099157, "loss": 0.9742, "step": 24840 }, { "epoch": 0.6378472238939147, "grad_norm": 0.76953125, "learning_rate": 0.00013903774699076778, "loss": 0.9655, "step": 24841 }, { "epoch": 0.6378729010898365, "grad_norm": 0.74609375, "learning_rate": 0.00013903363696274524, "loss": 0.841, "step": 24842 }, { "epoch": 0.6378985782857584, "grad_norm": 0.78125, "learning_rate": 0.00013902952685693223, "loss": 0.8266, "step": 24843 }, { "epoch": 0.6379242554816801, "grad_norm": 0.828125, "learning_rate": 0.00013902541667333687, "loss": 0.867, "step": 24844 }, { "epoch": 0.6379499326776019, "grad_norm": 0.78125, "learning_rate": 0.00013902130641196738, "loss": 0.8601, "step": 24845 }, { "epoch": 0.6379756098735238, "grad_norm": 0.78515625, "learning_rate": 0.00013901719607283198, "loss": 0.9478, "step": 24846 }, { "epoch": 0.6380012870694456, "grad_norm": 0.85546875, "learning_rate": 0.00013901308565593878, "loss": 0.925, "step": 24847 }, { "epoch": 0.6380269642653674, "grad_norm": 0.75390625, "learning_rate": 0.00013900897516129607, "loss": 0.8921, "step": 24848 }, { "epoch": 0.6380526414612893, "grad_norm": 0.73046875, "learning_rate": 0.000139004864588912, "loss": 0.7944, "step": 24849 }, { "epoch": 0.638078318657211, "grad_norm": 0.72265625, "learning_rate": 0.00013900075393879471, "loss": 0.8242, "step": 24850 }, { "epoch": 0.6381039958531328, "grad_norm": 0.73828125, "learning_rate": 0.0001389966432109525, "loss": 0.9163, "step": 24851 }, { "epoch": 0.6381296730490547, "grad_norm": 0.80859375, "learning_rate": 0.00013899253240539348, "loss": 0.8903, "step": 24852 }, { "epoch": 0.6381553502449765, "grad_norm": 0.73828125, "learning_rate": 0.00013898842152212584, "loss": 0.7839, "step": 24853 }, { "epoch": 0.6381810274408983, "grad_norm": 0.79296875, "learning_rate": 0.00013898431056115783, "loss": 0.6807, "step": 24854 }, { "epoch": 0.6382067046368202, "grad_norm": 0.75, "learning_rate": 0.00013898019952249756, "loss": 0.8621, "step": 24855 }, { "epoch": 0.638232381832742, "grad_norm": 0.76953125, "learning_rate": 0.00013897608840615332, "loss": 0.8717, "step": 24856 }, { "epoch": 0.6382580590286637, "grad_norm": 0.79296875, "learning_rate": 0.00013897197721213326, "loss": 0.9622, "step": 24857 }, { "epoch": 0.6382837362245856, "grad_norm": 0.79296875, "learning_rate": 0.00013896786594044556, "loss": 0.8985, "step": 24858 }, { "epoch": 0.6383094134205074, "grad_norm": 0.8125, "learning_rate": 0.0001389637545910984, "loss": 0.9275, "step": 24859 }, { "epoch": 0.6383350906164292, "grad_norm": 0.78515625, "learning_rate": 0.00013895964316410004, "loss": 0.8407, "step": 24860 }, { "epoch": 0.6383607678123511, "grad_norm": 0.84375, "learning_rate": 0.00013895553165945858, "loss": 0.8768, "step": 24861 }, { "epoch": 0.6383864450082729, "grad_norm": 0.765625, "learning_rate": 0.00013895142007718233, "loss": 0.7267, "step": 24862 }, { "epoch": 0.6384121222041947, "grad_norm": 0.81640625, "learning_rate": 0.00013894730841727934, "loss": 0.9651, "step": 24863 }, { "epoch": 0.6384377994001165, "grad_norm": 0.73828125, "learning_rate": 0.00013894319667975794, "loss": 0.7543, "step": 24864 }, { "epoch": 0.6384634765960383, "grad_norm": 0.8046875, "learning_rate": 0.00013893908486462624, "loss": 0.839, "step": 24865 }, { "epoch": 0.6384891537919601, "grad_norm": 0.78125, "learning_rate": 0.00013893497297189246, "loss": 0.8401, "step": 24866 }, { "epoch": 0.638514830987882, "grad_norm": 0.7109375, "learning_rate": 0.0001389308610015648, "loss": 0.7894, "step": 24867 }, { "epoch": 0.6385405081838038, "grad_norm": 0.70703125, "learning_rate": 0.00013892674895365148, "loss": 0.8679, "step": 24868 }, { "epoch": 0.6385661853797256, "grad_norm": 0.80859375, "learning_rate": 0.00013892263682816064, "loss": 0.8656, "step": 24869 }, { "epoch": 0.6385918625756474, "grad_norm": 0.78515625, "learning_rate": 0.0001389185246251005, "loss": 0.9246, "step": 24870 }, { "epoch": 0.6386175397715692, "grad_norm": 0.83203125, "learning_rate": 0.00013891441234447925, "loss": 0.9122, "step": 24871 }, { "epoch": 0.638643216967491, "grad_norm": 0.7890625, "learning_rate": 0.0001389102999863051, "loss": 0.79, "step": 24872 }, { "epoch": 0.6386688941634129, "grad_norm": 0.87109375, "learning_rate": 0.00013890618755058626, "loss": 0.9725, "step": 24873 }, { "epoch": 0.6386945713593347, "grad_norm": 0.74609375, "learning_rate": 0.00013890207503733083, "loss": 0.8718, "step": 24874 }, { "epoch": 0.6387202485552566, "grad_norm": 0.72265625, "learning_rate": 0.00013889796244654718, "loss": 0.8329, "step": 24875 }, { "epoch": 0.6387459257511784, "grad_norm": 0.75390625, "learning_rate": 0.0001388938497782433, "loss": 0.8464, "step": 24876 }, { "epoch": 0.6387716029471001, "grad_norm": 0.8359375, "learning_rate": 0.00013888973703242758, "loss": 0.947, "step": 24877 }, { "epoch": 0.638797280143022, "grad_norm": 0.734375, "learning_rate": 0.0001388856242091081, "loss": 0.7921, "step": 24878 }, { "epoch": 0.6388229573389438, "grad_norm": 0.828125, "learning_rate": 0.00013888151130829303, "loss": 0.8355, "step": 24879 }, { "epoch": 0.6388486345348656, "grad_norm": 0.78515625, "learning_rate": 0.00013887739832999066, "loss": 0.8264, "step": 24880 }, { "epoch": 0.6388743117307875, "grad_norm": 0.75, "learning_rate": 0.0001388732852742092, "loss": 0.7042, "step": 24881 }, { "epoch": 0.6388999889267093, "grad_norm": 0.765625, "learning_rate": 0.00013886917214095672, "loss": 0.7957, "step": 24882 }, { "epoch": 0.6389256661226311, "grad_norm": 0.7578125, "learning_rate": 0.0001388650589302415, "loss": 0.7711, "step": 24883 }, { "epoch": 0.6389513433185529, "grad_norm": 0.82421875, "learning_rate": 0.00013886094564207174, "loss": 0.877, "step": 24884 }, { "epoch": 0.6389770205144747, "grad_norm": 0.7578125, "learning_rate": 0.00013885683227645563, "loss": 0.8188, "step": 24885 }, { "epoch": 0.6390026977103965, "grad_norm": 0.78515625, "learning_rate": 0.00013885271883340138, "loss": 0.7266, "step": 24886 }, { "epoch": 0.6390283749063184, "grad_norm": 0.8828125, "learning_rate": 0.00013884860531291712, "loss": 1.044, "step": 24887 }, { "epoch": 0.6390540521022402, "grad_norm": 0.73046875, "learning_rate": 0.00013884449171501118, "loss": 0.8701, "step": 24888 }, { "epoch": 0.639079729298162, "grad_norm": 0.7421875, "learning_rate": 0.00013884037803969164, "loss": 0.7798, "step": 24889 }, { "epoch": 0.6391054064940838, "grad_norm": 0.953125, "learning_rate": 0.00013883626428696667, "loss": 0.9032, "step": 24890 }, { "epoch": 0.6391310836900056, "grad_norm": 0.78125, "learning_rate": 0.0001388321504568446, "loss": 0.9256, "step": 24891 }, { "epoch": 0.6391567608859274, "grad_norm": 0.79296875, "learning_rate": 0.00013882803654933357, "loss": 0.9384, "step": 24892 }, { "epoch": 0.6391824380818493, "grad_norm": 0.84375, "learning_rate": 0.00013882392256444172, "loss": 0.9053, "step": 24893 }, { "epoch": 0.6392081152777711, "grad_norm": 0.82421875, "learning_rate": 0.00013881980850217738, "loss": 0.7956, "step": 24894 }, { "epoch": 0.6392337924736929, "grad_norm": 0.828125, "learning_rate": 0.00013881569436254858, "loss": 0.8644, "step": 24895 }, { "epoch": 0.6392594696696148, "grad_norm": 0.9296875, "learning_rate": 0.00013881158014556367, "loss": 0.8383, "step": 24896 }, { "epoch": 0.6392851468655365, "grad_norm": 0.8046875, "learning_rate": 0.00013880746585123073, "loss": 0.847, "step": 24897 }, { "epoch": 0.6393108240614583, "grad_norm": 0.90234375, "learning_rate": 0.00013880335147955806, "loss": 0.9024, "step": 24898 }, { "epoch": 0.6393365012573802, "grad_norm": 0.83984375, "learning_rate": 0.0001387992370305538, "loss": 1.0401, "step": 24899 }, { "epoch": 0.639362178453302, "grad_norm": 0.6953125, "learning_rate": 0.00013879512250422618, "loss": 0.7956, "step": 24900 }, { "epoch": 0.6393878556492238, "grad_norm": 0.80859375, "learning_rate": 0.00013879100790058338, "loss": 0.9358, "step": 24901 }, { "epoch": 0.6394135328451457, "grad_norm": 0.88671875, "learning_rate": 0.00013878689321963357, "loss": 0.9416, "step": 24902 }, { "epoch": 0.6394392100410675, "grad_norm": 0.81640625, "learning_rate": 0.000138782778461385, "loss": 0.9516, "step": 24903 }, { "epoch": 0.6394648872369892, "grad_norm": 0.7890625, "learning_rate": 0.0001387786636258459, "loss": 0.9392, "step": 24904 }, { "epoch": 0.6394905644329111, "grad_norm": 0.72265625, "learning_rate": 0.00013877454871302438, "loss": 0.7631, "step": 24905 }, { "epoch": 0.6395162416288329, "grad_norm": 0.69921875, "learning_rate": 0.0001387704337229287, "loss": 0.7991, "step": 24906 }, { "epoch": 0.6395419188247548, "grad_norm": 0.73828125, "learning_rate": 0.00013876631865556704, "loss": 0.8079, "step": 24907 }, { "epoch": 0.6395675960206766, "grad_norm": 0.75390625, "learning_rate": 0.0001387622035109476, "loss": 0.6531, "step": 24908 }, { "epoch": 0.6395932732165984, "grad_norm": 0.7265625, "learning_rate": 0.00013875808828907863, "loss": 0.847, "step": 24909 }, { "epoch": 0.6396189504125201, "grad_norm": 0.7421875, "learning_rate": 0.00013875397298996828, "loss": 0.793, "step": 24910 }, { "epoch": 0.639644627608442, "grad_norm": 0.76953125, "learning_rate": 0.00013874985761362472, "loss": 0.7649, "step": 24911 }, { "epoch": 0.6396703048043638, "grad_norm": 0.7578125, "learning_rate": 0.00013874574216005624, "loss": 0.8272, "step": 24912 }, { "epoch": 0.6396959820002857, "grad_norm": 0.7890625, "learning_rate": 0.000138741626629271, "loss": 0.8842, "step": 24913 }, { "epoch": 0.6397216591962075, "grad_norm": 0.8359375, "learning_rate": 0.00013873751102127718, "loss": 0.8881, "step": 24914 }, { "epoch": 0.6397473363921293, "grad_norm": 0.75390625, "learning_rate": 0.000138733395336083, "loss": 0.9095, "step": 24915 }, { "epoch": 0.6397730135880512, "grad_norm": 0.71875, "learning_rate": 0.00013872927957369666, "loss": 0.8272, "step": 24916 }, { "epoch": 0.6397986907839729, "grad_norm": 0.7890625, "learning_rate": 0.00013872516373412635, "loss": 0.8398, "step": 24917 }, { "epoch": 0.6398243679798947, "grad_norm": 0.78515625, "learning_rate": 0.0001387210478173803, "loss": 0.9307, "step": 24918 }, { "epoch": 0.6398500451758166, "grad_norm": 0.76171875, "learning_rate": 0.0001387169318234667, "loss": 0.7743, "step": 24919 }, { "epoch": 0.6398757223717384, "grad_norm": 0.765625, "learning_rate": 0.00013871281575239376, "loss": 0.8108, "step": 24920 }, { "epoch": 0.6399013995676602, "grad_norm": 0.765625, "learning_rate": 0.00013870869960416968, "loss": 0.8807, "step": 24921 }, { "epoch": 0.6399270767635821, "grad_norm": 0.79296875, "learning_rate": 0.00013870458337880264, "loss": 0.8448, "step": 24922 }, { "epoch": 0.6399527539595039, "grad_norm": 0.765625, "learning_rate": 0.00013870046707630088, "loss": 0.8136, "step": 24923 }, { "epoch": 0.6399784311554256, "grad_norm": 0.765625, "learning_rate": 0.00013869635069667255, "loss": 0.7996, "step": 24924 }, { "epoch": 0.6400041083513475, "grad_norm": 0.7890625, "learning_rate": 0.00013869223423992592, "loss": 0.8936, "step": 24925 }, { "epoch": 0.6400297855472693, "grad_norm": 0.7890625, "learning_rate": 0.00013868811770606916, "loss": 0.803, "step": 24926 }, { "epoch": 0.6400554627431911, "grad_norm": 0.75390625, "learning_rate": 0.0001386840010951105, "loss": 0.922, "step": 24927 }, { "epoch": 0.640081139939113, "grad_norm": 0.8203125, "learning_rate": 0.00013867988440705806, "loss": 0.8159, "step": 24928 }, { "epoch": 0.6401068171350348, "grad_norm": 0.7890625, "learning_rate": 0.00013867576764192013, "loss": 0.9242, "step": 24929 }, { "epoch": 0.6401324943309565, "grad_norm": 0.8125, "learning_rate": 0.00013867165079970489, "loss": 0.8704, "step": 24930 }, { "epoch": 0.6401581715268784, "grad_norm": 0.79296875, "learning_rate": 0.00013866753388042056, "loss": 0.8169, "step": 24931 }, { "epoch": 0.6401838487228002, "grad_norm": 0.78125, "learning_rate": 0.0001386634168840753, "loss": 0.7643, "step": 24932 }, { "epoch": 0.640209525918722, "grad_norm": 0.7578125, "learning_rate": 0.00013865929981067736, "loss": 0.8341, "step": 24933 }, { "epoch": 0.6402352031146439, "grad_norm": 0.75390625, "learning_rate": 0.00013865518266023493, "loss": 0.8687, "step": 24934 }, { "epoch": 0.6402608803105657, "grad_norm": 0.80859375, "learning_rate": 0.0001386510654327562, "loss": 0.8346, "step": 24935 }, { "epoch": 0.6402865575064876, "grad_norm": 0.80859375, "learning_rate": 0.0001386469481282494, "loss": 0.8798, "step": 24936 }, { "epoch": 0.6403122347024093, "grad_norm": 0.734375, "learning_rate": 0.00013864283074672273, "loss": 0.8621, "step": 24937 }, { "epoch": 0.6403379118983311, "grad_norm": 0.734375, "learning_rate": 0.00013863871328818433, "loss": 0.8111, "step": 24938 }, { "epoch": 0.640363589094253, "grad_norm": 0.75390625, "learning_rate": 0.00013863459575264253, "loss": 0.8218, "step": 24939 }, { "epoch": 0.6403892662901748, "grad_norm": 0.7578125, "learning_rate": 0.00013863047814010546, "loss": 0.9322, "step": 24940 }, { "epoch": 0.6404149434860966, "grad_norm": 1.25, "learning_rate": 0.00013862636045058134, "loss": 0.7907, "step": 24941 }, { "epoch": 0.6404406206820185, "grad_norm": 0.78125, "learning_rate": 0.00013862224268407837, "loss": 0.8085, "step": 24942 }, { "epoch": 0.6404662978779403, "grad_norm": 0.8671875, "learning_rate": 0.00013861812484060473, "loss": 0.8951, "step": 24943 }, { "epoch": 0.640491975073862, "grad_norm": 0.75, "learning_rate": 0.0001386140069201687, "loss": 0.7893, "step": 24944 }, { "epoch": 0.6405176522697839, "grad_norm": 0.7734375, "learning_rate": 0.0001386098889227784, "loss": 0.7397, "step": 24945 }, { "epoch": 0.6405433294657057, "grad_norm": 0.8359375, "learning_rate": 0.00013860577084844208, "loss": 0.9002, "step": 24946 }, { "epoch": 0.6405690066616275, "grad_norm": 0.8046875, "learning_rate": 0.00013860165269716798, "loss": 0.9332, "step": 24947 }, { "epoch": 0.6405946838575494, "grad_norm": 0.88671875, "learning_rate": 0.00013859753446896425, "loss": 0.8046, "step": 24948 }, { "epoch": 0.6406203610534712, "grad_norm": 0.734375, "learning_rate": 0.00013859341616383913, "loss": 0.8029, "step": 24949 }, { "epoch": 0.6406460382493929, "grad_norm": 0.7109375, "learning_rate": 0.00013858929778180082, "loss": 0.8296, "step": 24950 }, { "epoch": 0.6406717154453148, "grad_norm": 0.82421875, "learning_rate": 0.0001385851793228575, "loss": 0.8663, "step": 24951 }, { "epoch": 0.6406973926412366, "grad_norm": 0.94921875, "learning_rate": 0.00013858106078701743, "loss": 0.7386, "step": 24952 }, { "epoch": 0.6407230698371584, "grad_norm": 0.72265625, "learning_rate": 0.0001385769421742888, "loss": 0.8298, "step": 24953 }, { "epoch": 0.6407487470330803, "grad_norm": 0.87890625, "learning_rate": 0.00013857282348467978, "loss": 0.8703, "step": 24954 }, { "epoch": 0.6407744242290021, "grad_norm": 0.7109375, "learning_rate": 0.00013856870471819864, "loss": 0.8758, "step": 24955 }, { "epoch": 0.6408001014249239, "grad_norm": 0.84375, "learning_rate": 0.0001385645858748535, "loss": 0.9068, "step": 24956 }, { "epoch": 0.6408257786208457, "grad_norm": 0.82421875, "learning_rate": 0.00013856046695465265, "loss": 0.8158, "step": 24957 }, { "epoch": 0.6408514558167675, "grad_norm": 0.73828125, "learning_rate": 0.0001385563479576043, "loss": 0.8702, "step": 24958 }, { "epoch": 0.6408771330126893, "grad_norm": 0.8515625, "learning_rate": 0.0001385522288837166, "loss": 0.8047, "step": 24959 }, { "epoch": 0.6409028102086112, "grad_norm": 0.9375, "learning_rate": 0.00013854810973299784, "loss": 0.9074, "step": 24960 }, { "epoch": 0.640928487404533, "grad_norm": 0.859375, "learning_rate": 0.00013854399050545612, "loss": 0.8421, "step": 24961 }, { "epoch": 0.6409541646004548, "grad_norm": 0.80078125, "learning_rate": 0.00013853987120109973, "loss": 0.8721, "step": 24962 }, { "epoch": 0.6409798417963767, "grad_norm": 0.796875, "learning_rate": 0.0001385357518199369, "loss": 0.8766, "step": 24963 }, { "epoch": 0.6410055189922984, "grad_norm": 0.75, "learning_rate": 0.0001385316323619757, "loss": 0.9027, "step": 24964 }, { "epoch": 0.6410311961882202, "grad_norm": 0.87890625, "learning_rate": 0.0001385275128272245, "loss": 0.8869, "step": 24965 }, { "epoch": 0.6410568733841421, "grad_norm": 0.84765625, "learning_rate": 0.00013852339321569144, "loss": 0.8005, "step": 24966 }, { "epoch": 0.6410825505800639, "grad_norm": 0.75390625, "learning_rate": 0.00013851927352738475, "loss": 0.8691, "step": 24967 }, { "epoch": 0.6411082277759858, "grad_norm": 0.7109375, "learning_rate": 0.0001385151537623126, "loss": 0.8325, "step": 24968 }, { "epoch": 0.6411339049719076, "grad_norm": 0.71875, "learning_rate": 0.00013851103392048325, "loss": 0.693, "step": 24969 }, { "epoch": 0.6411595821678293, "grad_norm": 0.78125, "learning_rate": 0.0001385069140019049, "loss": 0.8065, "step": 24970 }, { "epoch": 0.6411852593637511, "grad_norm": 0.7109375, "learning_rate": 0.00013850279400658572, "loss": 0.8881, "step": 24971 }, { "epoch": 0.641210936559673, "grad_norm": 0.76953125, "learning_rate": 0.00013849867393453394, "loss": 0.8886, "step": 24972 }, { "epoch": 0.6412366137555948, "grad_norm": 0.82421875, "learning_rate": 0.00013849455378575783, "loss": 0.8603, "step": 24973 }, { "epoch": 0.6412622909515167, "grad_norm": 0.796875, "learning_rate": 0.00013849043356026553, "loss": 0.9791, "step": 24974 }, { "epoch": 0.6412879681474385, "grad_norm": 0.79296875, "learning_rate": 0.00013848631325806521, "loss": 0.7771, "step": 24975 }, { "epoch": 0.6413136453433603, "grad_norm": 0.8203125, "learning_rate": 0.00013848219287916522, "loss": 1.0107, "step": 24976 }, { "epoch": 0.641339322539282, "grad_norm": 0.7421875, "learning_rate": 0.0001384780724235737, "loss": 0.9431, "step": 24977 }, { "epoch": 0.6413649997352039, "grad_norm": 0.71484375, "learning_rate": 0.0001384739518912988, "loss": 0.782, "step": 24978 }, { "epoch": 0.6413906769311257, "grad_norm": 0.828125, "learning_rate": 0.00013846983128234882, "loss": 0.8653, "step": 24979 }, { "epoch": 0.6414163541270476, "grad_norm": 0.80078125, "learning_rate": 0.00013846571059673195, "loss": 0.8462, "step": 24980 }, { "epoch": 0.6414420313229694, "grad_norm": 0.765625, "learning_rate": 0.00013846158983445638, "loss": 0.9144, "step": 24981 }, { "epoch": 0.6414677085188912, "grad_norm": 0.78125, "learning_rate": 0.00013845746899553032, "loss": 0.8392, "step": 24982 }, { "epoch": 0.6414933857148131, "grad_norm": 0.7421875, "learning_rate": 0.00013845334807996204, "loss": 0.9596, "step": 24983 }, { "epoch": 0.6415190629107348, "grad_norm": 0.77734375, "learning_rate": 0.0001384492270877597, "loss": 0.8884, "step": 24984 }, { "epoch": 0.6415447401066566, "grad_norm": 0.7890625, "learning_rate": 0.00013844510601893152, "loss": 0.9264, "step": 24985 }, { "epoch": 0.6415704173025785, "grad_norm": 0.74609375, "learning_rate": 0.0001384409848734857, "loss": 0.9151, "step": 24986 }, { "epoch": 0.6415960944985003, "grad_norm": 0.75390625, "learning_rate": 0.00013843686365143048, "loss": 0.9301, "step": 24987 }, { "epoch": 0.6416217716944221, "grad_norm": 0.8203125, "learning_rate": 0.00013843274235277403, "loss": 0.7932, "step": 24988 }, { "epoch": 0.641647448890344, "grad_norm": 0.765625, "learning_rate": 0.00013842862097752465, "loss": 0.9124, "step": 24989 }, { "epoch": 0.6416731260862657, "grad_norm": 0.75390625, "learning_rate": 0.00013842449952569048, "loss": 0.7777, "step": 24990 }, { "epoch": 0.6416988032821875, "grad_norm": 0.73828125, "learning_rate": 0.00013842037799727974, "loss": 0.8239, "step": 24991 }, { "epoch": 0.6417244804781094, "grad_norm": 0.796875, "learning_rate": 0.00013841625639230066, "loss": 0.9103, "step": 24992 }, { "epoch": 0.6417501576740312, "grad_norm": 0.7734375, "learning_rate": 0.00013841213471076146, "loss": 0.8549, "step": 24993 }, { "epoch": 0.641775834869953, "grad_norm": 0.78125, "learning_rate": 0.00013840801295267037, "loss": 0.8183, "step": 24994 }, { "epoch": 0.6418015120658749, "grad_norm": 1.0546875, "learning_rate": 0.00013840389111803553, "loss": 0.7827, "step": 24995 }, { "epoch": 0.6418271892617967, "grad_norm": 0.78125, "learning_rate": 0.0001383997692068652, "loss": 1.0176, "step": 24996 }, { "epoch": 0.6418528664577184, "grad_norm": 0.73828125, "learning_rate": 0.00013839564721916764, "loss": 0.8375, "step": 24997 }, { "epoch": 0.6418785436536403, "grad_norm": 0.7734375, "learning_rate": 0.00013839152515495102, "loss": 0.881, "step": 24998 }, { "epoch": 0.6419042208495621, "grad_norm": 0.765625, "learning_rate": 0.00013838740301422354, "loss": 0.759, "step": 24999 }, { "epoch": 0.641929898045484, "grad_norm": 0.76171875, "learning_rate": 0.00013838328079699347, "loss": 0.904, "step": 25000 }, { "epoch": 0.641929898045484, "eval_loss": 0.8474920988082886, "eval_runtime": 384.9961, "eval_samples_per_second": 25.974, "eval_steps_per_second": 0.813, "step": 25000 }, { "epoch": 0.6419555752414058, "grad_norm": 0.74609375, "learning_rate": 0.00013837915850326894, "loss": 0.792, "step": 25001 }, { "epoch": 0.6419812524373276, "grad_norm": 0.8671875, "learning_rate": 0.00013837503613305823, "loss": 0.8383, "step": 25002 }, { "epoch": 0.6420069296332493, "grad_norm": 0.83203125, "learning_rate": 0.00013837091368636956, "loss": 0.9426, "step": 25003 }, { "epoch": 0.6420326068291712, "grad_norm": 0.73046875, "learning_rate": 0.00013836679116321107, "loss": 0.8764, "step": 25004 }, { "epoch": 0.642058284025093, "grad_norm": 0.79296875, "learning_rate": 0.00013836266856359107, "loss": 0.9005, "step": 25005 }, { "epoch": 0.6420839612210149, "grad_norm": 0.94921875, "learning_rate": 0.00013835854588751777, "loss": 0.8923, "step": 25006 }, { "epoch": 0.6421096384169367, "grad_norm": 0.77734375, "learning_rate": 0.0001383544231349993, "loss": 0.7546, "step": 25007 }, { "epoch": 0.6421353156128585, "grad_norm": 0.70703125, "learning_rate": 0.00013835030030604398, "loss": 0.8194, "step": 25008 }, { "epoch": 0.6421609928087804, "grad_norm": 0.80859375, "learning_rate": 0.00013834617740065988, "loss": 0.9364, "step": 25009 }, { "epoch": 0.6421866700047021, "grad_norm": 0.828125, "learning_rate": 0.0001383420544188554, "loss": 0.8578, "step": 25010 }, { "epoch": 0.6422123472006239, "grad_norm": 0.8515625, "learning_rate": 0.00013833793136063866, "loss": 0.9584, "step": 25011 }, { "epoch": 0.6422380243965458, "grad_norm": 0.83203125, "learning_rate": 0.00013833380822601786, "loss": 0.7816, "step": 25012 }, { "epoch": 0.6422637015924676, "grad_norm": 0.7578125, "learning_rate": 0.0001383296850150013, "loss": 0.9183, "step": 25013 }, { "epoch": 0.6422893787883894, "grad_norm": 1.0703125, "learning_rate": 0.00013832556172759704, "loss": 0.7789, "step": 25014 }, { "epoch": 0.6423150559843113, "grad_norm": 0.7734375, "learning_rate": 0.00013832143836381346, "loss": 0.8996, "step": 25015 }, { "epoch": 0.6423407331802331, "grad_norm": 0.80859375, "learning_rate": 0.0001383173149236587, "loss": 0.8965, "step": 25016 }, { "epoch": 0.6423664103761548, "grad_norm": 0.7578125, "learning_rate": 0.00013831319140714102, "loss": 0.7865, "step": 25017 }, { "epoch": 0.6423920875720767, "grad_norm": 0.7734375, "learning_rate": 0.00013830906781426858, "loss": 0.8328, "step": 25018 }, { "epoch": 0.6424177647679985, "grad_norm": 0.75, "learning_rate": 0.00013830494414504964, "loss": 0.7054, "step": 25019 }, { "epoch": 0.6424434419639203, "grad_norm": 0.765625, "learning_rate": 0.0001383008203994924, "loss": 0.8392, "step": 25020 }, { "epoch": 0.6424691191598422, "grad_norm": 0.796875, "learning_rate": 0.00013829669657760508, "loss": 0.9371, "step": 25021 }, { "epoch": 0.642494796355764, "grad_norm": 0.6953125, "learning_rate": 0.00013829257267939594, "loss": 0.7808, "step": 25022 }, { "epoch": 0.6425204735516857, "grad_norm": 0.76171875, "learning_rate": 0.0001382884487048731, "loss": 0.9282, "step": 25023 }, { "epoch": 0.6425461507476076, "grad_norm": 0.79296875, "learning_rate": 0.00013828432465404488, "loss": 0.8297, "step": 25024 }, { "epoch": 0.6425718279435294, "grad_norm": 0.7734375, "learning_rate": 0.00013828020052691946, "loss": 0.8177, "step": 25025 }, { "epoch": 0.6425975051394512, "grad_norm": 0.81640625, "learning_rate": 0.00013827607632350505, "loss": 0.8816, "step": 25026 }, { "epoch": 0.6426231823353731, "grad_norm": 0.734375, "learning_rate": 0.00013827195204380987, "loss": 0.8499, "step": 25027 }, { "epoch": 0.6426488595312949, "grad_norm": 0.71875, "learning_rate": 0.00013826782768784214, "loss": 0.7515, "step": 25028 }, { "epoch": 0.6426745367272167, "grad_norm": 0.7890625, "learning_rate": 0.00013826370325561012, "loss": 0.7683, "step": 25029 }, { "epoch": 0.6427002139231385, "grad_norm": 0.734375, "learning_rate": 0.00013825957874712196, "loss": 0.8735, "step": 25030 }, { "epoch": 0.6427258911190603, "grad_norm": 0.77734375, "learning_rate": 0.00013825545416238592, "loss": 0.8805, "step": 25031 }, { "epoch": 0.6427515683149821, "grad_norm": 0.984375, "learning_rate": 0.00013825132950141028, "loss": 0.9207, "step": 25032 }, { "epoch": 0.642777245510904, "grad_norm": 0.80078125, "learning_rate": 0.0001382472047642031, "loss": 0.9315, "step": 25033 }, { "epoch": 0.6428029227068258, "grad_norm": 1.1171875, "learning_rate": 0.00013824307995077275, "loss": 0.9103, "step": 25034 }, { "epoch": 0.6428285999027477, "grad_norm": 0.75390625, "learning_rate": 0.00013823895506112738, "loss": 0.8657, "step": 25035 }, { "epoch": 0.6428542770986695, "grad_norm": 0.73046875, "learning_rate": 0.00013823483009527522, "loss": 0.7459, "step": 25036 }, { "epoch": 0.6428799542945912, "grad_norm": 0.75390625, "learning_rate": 0.0001382307050532245, "loss": 0.8434, "step": 25037 }, { "epoch": 0.642905631490513, "grad_norm": 0.76953125, "learning_rate": 0.00013822657993498347, "loss": 0.7914, "step": 25038 }, { "epoch": 0.6429313086864349, "grad_norm": 0.73828125, "learning_rate": 0.0001382224547405603, "loss": 0.692, "step": 25039 }, { "epoch": 0.6429569858823567, "grad_norm": 0.81640625, "learning_rate": 0.00013821832946996323, "loss": 0.8697, "step": 25040 }, { "epoch": 0.6429826630782786, "grad_norm": 0.7421875, "learning_rate": 0.00013821420412320044, "loss": 0.8541, "step": 25041 }, { "epoch": 0.6430083402742004, "grad_norm": 0.765625, "learning_rate": 0.00013821007870028025, "loss": 0.8687, "step": 25042 }, { "epoch": 0.6430340174701221, "grad_norm": 0.78515625, "learning_rate": 0.0001382059532012108, "loss": 0.8522, "step": 25043 }, { "epoch": 0.643059694666044, "grad_norm": 0.72265625, "learning_rate": 0.00013820182762600033, "loss": 0.7915, "step": 25044 }, { "epoch": 0.6430853718619658, "grad_norm": 0.78125, "learning_rate": 0.0001381977019746571, "loss": 0.8885, "step": 25045 }, { "epoch": 0.6431110490578876, "grad_norm": 0.765625, "learning_rate": 0.00013819357624718928, "loss": 0.8746, "step": 25046 }, { "epoch": 0.6431367262538095, "grad_norm": 0.8125, "learning_rate": 0.00013818945044360512, "loss": 0.9158, "step": 25047 }, { "epoch": 0.6431624034497313, "grad_norm": 0.9921875, "learning_rate": 0.00013818532456391283, "loss": 0.9226, "step": 25048 }, { "epoch": 0.6431880806456531, "grad_norm": 0.8359375, "learning_rate": 0.00013818119860812062, "loss": 0.7716, "step": 25049 }, { "epoch": 0.6432137578415749, "grad_norm": 0.82421875, "learning_rate": 0.00013817707257623676, "loss": 0.9057, "step": 25050 }, { "epoch": 0.6432394350374967, "grad_norm": 0.796875, "learning_rate": 0.00013817294646826942, "loss": 0.7789, "step": 25051 }, { "epoch": 0.6432651122334185, "grad_norm": 0.7421875, "learning_rate": 0.00013816882028422686, "loss": 0.8207, "step": 25052 }, { "epoch": 0.6432907894293404, "grad_norm": 0.76171875, "learning_rate": 0.00013816469402411728, "loss": 0.7757, "step": 25053 }, { "epoch": 0.6433164666252622, "grad_norm": 0.77734375, "learning_rate": 0.00013816056768794894, "loss": 0.7825, "step": 25054 }, { "epoch": 0.643342143821184, "grad_norm": 0.8046875, "learning_rate": 0.00013815644127572997, "loss": 0.8232, "step": 25055 }, { "epoch": 0.6433678210171059, "grad_norm": 0.75, "learning_rate": 0.00013815231478746872, "loss": 0.9622, "step": 25056 }, { "epoch": 0.6433934982130276, "grad_norm": 0.78515625, "learning_rate": 0.00013814818822317331, "loss": 0.9579, "step": 25057 }, { "epoch": 0.6434191754089494, "grad_norm": 0.84375, "learning_rate": 0.00013814406158285203, "loss": 1.0074, "step": 25058 }, { "epoch": 0.6434448526048713, "grad_norm": 0.74609375, "learning_rate": 0.0001381399348665131, "loss": 0.8178, "step": 25059 }, { "epoch": 0.6434705298007931, "grad_norm": 0.79296875, "learning_rate": 0.00013813580807416472, "loss": 0.8631, "step": 25060 }, { "epoch": 0.643496206996715, "grad_norm": 0.796875, "learning_rate": 0.0001381316812058151, "loss": 0.8654, "step": 25061 }, { "epoch": 0.6435218841926368, "grad_norm": 0.734375, "learning_rate": 0.00013812755426147248, "loss": 0.824, "step": 25062 }, { "epoch": 0.6435475613885585, "grad_norm": 0.77734375, "learning_rate": 0.00013812342724114508, "loss": 0.7897, "step": 25063 }, { "epoch": 0.6435732385844803, "grad_norm": 0.75390625, "learning_rate": 0.00013811930014484116, "loss": 0.9345, "step": 25064 }, { "epoch": 0.6435989157804022, "grad_norm": 0.81640625, "learning_rate": 0.0001381151729725689, "loss": 0.9837, "step": 25065 }, { "epoch": 0.643624592976324, "grad_norm": 0.77734375, "learning_rate": 0.00013811104572433657, "loss": 0.8734, "step": 25066 }, { "epoch": 0.6436502701722459, "grad_norm": 0.7265625, "learning_rate": 0.00013810691840015235, "loss": 0.7954, "step": 25067 }, { "epoch": 0.6436759473681677, "grad_norm": 0.73046875, "learning_rate": 0.00013810279100002446, "loss": 0.814, "step": 25068 }, { "epoch": 0.6437016245640895, "grad_norm": 0.7265625, "learning_rate": 0.0001380986635239612, "loss": 0.7705, "step": 25069 }, { "epoch": 0.6437273017600113, "grad_norm": 0.7734375, "learning_rate": 0.0001380945359719707, "loss": 0.7278, "step": 25070 }, { "epoch": 0.6437529789559331, "grad_norm": 0.73046875, "learning_rate": 0.00013809040834406123, "loss": 0.7941, "step": 25071 }, { "epoch": 0.6437786561518549, "grad_norm": 0.75, "learning_rate": 0.00013808628064024107, "loss": 0.8291, "step": 25072 }, { "epoch": 0.6438043333477768, "grad_norm": 0.9375, "learning_rate": 0.00013808215286051836, "loss": 0.826, "step": 25073 }, { "epoch": 0.6438300105436986, "grad_norm": 0.8359375, "learning_rate": 0.00013807802500490134, "loss": 0.9419, "step": 25074 }, { "epoch": 0.6438556877396204, "grad_norm": 0.7578125, "learning_rate": 0.0001380738970733983, "loss": 0.7796, "step": 25075 }, { "epoch": 0.6438813649355423, "grad_norm": 0.765625, "learning_rate": 0.00013806976906601735, "loss": 0.7788, "step": 25076 }, { "epoch": 0.643907042131464, "grad_norm": 0.8203125, "learning_rate": 0.00013806564098276685, "loss": 0.8536, "step": 25077 }, { "epoch": 0.6439327193273858, "grad_norm": 0.8828125, "learning_rate": 0.00013806151282365497, "loss": 0.8229, "step": 25078 }, { "epoch": 0.6439583965233077, "grad_norm": 0.76953125, "learning_rate": 0.0001380573845886899, "loss": 0.9013, "step": 25079 }, { "epoch": 0.6439840737192295, "grad_norm": 0.8671875, "learning_rate": 0.00013805325627787993, "loss": 0.9898, "step": 25080 }, { "epoch": 0.6440097509151513, "grad_norm": 0.6875, "learning_rate": 0.00013804912789123323, "loss": 0.731, "step": 25081 }, { "epoch": 0.6440354281110732, "grad_norm": 0.82421875, "learning_rate": 0.00013804499942875808, "loss": 0.8439, "step": 25082 }, { "epoch": 0.6440611053069949, "grad_norm": 0.734375, "learning_rate": 0.00013804087089046266, "loss": 0.8486, "step": 25083 }, { "epoch": 0.6440867825029167, "grad_norm": 0.87890625, "learning_rate": 0.00013803674227635523, "loss": 0.934, "step": 25084 }, { "epoch": 0.6441124596988386, "grad_norm": 0.69921875, "learning_rate": 0.000138032613586444, "loss": 0.7008, "step": 25085 }, { "epoch": 0.6441381368947604, "grad_norm": 0.8203125, "learning_rate": 0.00013802848482073721, "loss": 0.9467, "step": 25086 }, { "epoch": 0.6441638140906822, "grad_norm": 0.76171875, "learning_rate": 0.0001380243559792431, "loss": 0.7205, "step": 25087 }, { "epoch": 0.6441894912866041, "grad_norm": 0.7578125, "learning_rate": 0.00013802022706196988, "loss": 0.827, "step": 25088 }, { "epoch": 0.6442151684825259, "grad_norm": 0.76953125, "learning_rate": 0.00013801609806892576, "loss": 0.8682, "step": 25089 }, { "epoch": 0.6442408456784476, "grad_norm": 0.73828125, "learning_rate": 0.000138011969000119, "loss": 0.8373, "step": 25090 }, { "epoch": 0.6442665228743695, "grad_norm": 0.796875, "learning_rate": 0.00013800783985555784, "loss": 0.8356, "step": 25091 }, { "epoch": 0.6442922000702913, "grad_norm": 0.7734375, "learning_rate": 0.00013800371063525047, "loss": 0.9684, "step": 25092 }, { "epoch": 0.6443178772662131, "grad_norm": 0.87109375, "learning_rate": 0.00013799958133920516, "loss": 0.8851, "step": 25093 }, { "epoch": 0.644343554462135, "grad_norm": 0.73046875, "learning_rate": 0.00013799545196743006, "loss": 0.6948, "step": 25094 }, { "epoch": 0.6443692316580568, "grad_norm": 0.8359375, "learning_rate": 0.00013799132251993352, "loss": 0.8573, "step": 25095 }, { "epoch": 0.6443949088539787, "grad_norm": 0.91015625, "learning_rate": 0.0001379871929967237, "loss": 0.7979, "step": 25096 }, { "epoch": 0.6444205860499004, "grad_norm": 0.76953125, "learning_rate": 0.00013798306339780878, "loss": 0.9127, "step": 25097 }, { "epoch": 0.6444462632458222, "grad_norm": 0.8125, "learning_rate": 0.0001379789337231971, "loss": 0.7129, "step": 25098 }, { "epoch": 0.644471940441744, "grad_norm": 0.890625, "learning_rate": 0.00013797480397289685, "loss": 0.9269, "step": 25099 }, { "epoch": 0.6444976176376659, "grad_norm": 0.81640625, "learning_rate": 0.00013797067414691617, "loss": 0.9873, "step": 25100 }, { "epoch": 0.6445232948335877, "grad_norm": 0.75390625, "learning_rate": 0.00013796654424526344, "loss": 0.7991, "step": 25101 }, { "epoch": 0.6445489720295096, "grad_norm": 0.7890625, "learning_rate": 0.0001379624142679468, "loss": 0.9966, "step": 25102 }, { "epoch": 0.6445746492254313, "grad_norm": 0.74609375, "learning_rate": 0.00013795828421497443, "loss": 0.7595, "step": 25103 }, { "epoch": 0.6446003264213531, "grad_norm": 0.76953125, "learning_rate": 0.0001379541540863547, "loss": 0.8369, "step": 25104 }, { "epoch": 0.644626003617275, "grad_norm": 0.7265625, "learning_rate": 0.00013795002388209576, "loss": 0.7474, "step": 25105 }, { "epoch": 0.6446516808131968, "grad_norm": 0.8359375, "learning_rate": 0.00013794589360220584, "loss": 0.7917, "step": 25106 }, { "epoch": 0.6446773580091186, "grad_norm": 0.73828125, "learning_rate": 0.00013794176324669318, "loss": 0.7819, "step": 25107 }, { "epoch": 0.6447030352050405, "grad_norm": 0.828125, "learning_rate": 0.000137937632815566, "loss": 0.9319, "step": 25108 }, { "epoch": 0.6447287124009623, "grad_norm": 0.91015625, "learning_rate": 0.00013793350230883257, "loss": 0.8049, "step": 25109 }, { "epoch": 0.644754389596884, "grad_norm": 0.8046875, "learning_rate": 0.0001379293717265011, "loss": 0.8189, "step": 25110 }, { "epoch": 0.6447800667928059, "grad_norm": 0.8125, "learning_rate": 0.0001379252410685798, "loss": 0.917, "step": 25111 }, { "epoch": 0.6448057439887277, "grad_norm": 0.80859375, "learning_rate": 0.00013792111033507695, "loss": 0.8252, "step": 25112 }, { "epoch": 0.6448314211846495, "grad_norm": 0.94921875, "learning_rate": 0.0001379169795260007, "loss": 0.783, "step": 25113 }, { "epoch": 0.6448570983805714, "grad_norm": 0.7734375, "learning_rate": 0.00013791284864135938, "loss": 0.743, "step": 25114 }, { "epoch": 0.6448827755764932, "grad_norm": 0.69140625, "learning_rate": 0.00013790871768116117, "loss": 0.8098, "step": 25115 }, { "epoch": 0.644908452772415, "grad_norm": 0.76171875, "learning_rate": 0.00013790458664541427, "loss": 0.8458, "step": 25116 }, { "epoch": 0.6449341299683368, "grad_norm": 0.98046875, "learning_rate": 0.000137900455534127, "loss": 0.8252, "step": 25117 }, { "epoch": 0.6449598071642586, "grad_norm": 0.83984375, "learning_rate": 0.00013789632434730754, "loss": 0.902, "step": 25118 }, { "epoch": 0.6449854843601804, "grad_norm": 0.8515625, "learning_rate": 0.00013789219308496411, "loss": 0.7989, "step": 25119 }, { "epoch": 0.6450111615561023, "grad_norm": 0.82421875, "learning_rate": 0.00013788806174710498, "loss": 0.8998, "step": 25120 }, { "epoch": 0.6450368387520241, "grad_norm": 0.796875, "learning_rate": 0.00013788393033373835, "loss": 0.9596, "step": 25121 }, { "epoch": 0.645062515947946, "grad_norm": 0.734375, "learning_rate": 0.00013787979884487248, "loss": 0.8665, "step": 25122 }, { "epoch": 0.6450881931438677, "grad_norm": 0.79296875, "learning_rate": 0.00013787566728051557, "loss": 0.7832, "step": 25123 }, { "epoch": 0.6451138703397895, "grad_norm": 0.74609375, "learning_rate": 0.0001378715356406759, "loss": 0.9074, "step": 25124 }, { "epoch": 0.6451395475357113, "grad_norm": 0.78125, "learning_rate": 0.00013786740392536168, "loss": 0.8672, "step": 25125 }, { "epoch": 0.6451652247316332, "grad_norm": 0.7890625, "learning_rate": 0.00013786327213458112, "loss": 0.8528, "step": 25126 }, { "epoch": 0.645190901927555, "grad_norm": 0.77734375, "learning_rate": 0.0001378591402683425, "loss": 0.8145, "step": 25127 }, { "epoch": 0.6452165791234769, "grad_norm": 0.75390625, "learning_rate": 0.00013785500832665405, "loss": 0.9143, "step": 25128 }, { "epoch": 0.6452422563193987, "grad_norm": 0.765625, "learning_rate": 0.00013785087630952396, "loss": 1.0317, "step": 25129 }, { "epoch": 0.6452679335153204, "grad_norm": 0.83984375, "learning_rate": 0.0001378467442169605, "loss": 0.7844, "step": 25130 }, { "epoch": 0.6452936107112422, "grad_norm": 0.69140625, "learning_rate": 0.00013784261204897191, "loss": 0.7177, "step": 25131 }, { "epoch": 0.6453192879071641, "grad_norm": 0.72265625, "learning_rate": 0.0001378384798055664, "loss": 0.7269, "step": 25132 }, { "epoch": 0.6453449651030859, "grad_norm": 0.75, "learning_rate": 0.00013783434748675222, "loss": 0.8651, "step": 25133 }, { "epoch": 0.6453706422990078, "grad_norm": 0.73828125, "learning_rate": 0.00013783021509253759, "loss": 0.8662, "step": 25134 }, { "epoch": 0.6453963194949296, "grad_norm": 0.79296875, "learning_rate": 0.0001378260826229308, "loss": 0.792, "step": 25135 }, { "epoch": 0.6454219966908514, "grad_norm": 0.80859375, "learning_rate": 0.00013782195007793998, "loss": 0.9707, "step": 25136 }, { "epoch": 0.6454476738867732, "grad_norm": 0.8046875, "learning_rate": 0.00013781781745757346, "loss": 0.7156, "step": 25137 }, { "epoch": 0.645473351082695, "grad_norm": 0.70703125, "learning_rate": 0.00013781368476183947, "loss": 0.8004, "step": 25138 }, { "epoch": 0.6454990282786168, "grad_norm": 0.73828125, "learning_rate": 0.0001378095519907462, "loss": 0.7658, "step": 25139 }, { "epoch": 0.6455247054745387, "grad_norm": 0.73828125, "learning_rate": 0.0001378054191443019, "loss": 0.7994, "step": 25140 }, { "epoch": 0.6455503826704605, "grad_norm": 0.6953125, "learning_rate": 0.00013780128622251482, "loss": 0.7634, "step": 25141 }, { "epoch": 0.6455760598663823, "grad_norm": 0.71484375, "learning_rate": 0.00013779715322539323, "loss": 0.7435, "step": 25142 }, { "epoch": 0.6456017370623041, "grad_norm": 0.78515625, "learning_rate": 0.00013779302015294524, "loss": 0.8891, "step": 25143 }, { "epoch": 0.6456274142582259, "grad_norm": 0.8125, "learning_rate": 0.00013778888700517926, "loss": 0.8722, "step": 25144 }, { "epoch": 0.6456530914541477, "grad_norm": 0.765625, "learning_rate": 0.0001377847537821034, "loss": 0.7993, "step": 25145 }, { "epoch": 0.6456787686500696, "grad_norm": 0.78125, "learning_rate": 0.00013778062048372595, "loss": 0.7797, "step": 25146 }, { "epoch": 0.6457044458459914, "grad_norm": 0.7890625, "learning_rate": 0.00013777648711005514, "loss": 0.817, "step": 25147 }, { "epoch": 0.6457301230419132, "grad_norm": 0.75, "learning_rate": 0.00013777235366109917, "loss": 0.9045, "step": 25148 }, { "epoch": 0.6457558002378351, "grad_norm": 0.8203125, "learning_rate": 0.00013776822013686636, "loss": 0.9062, "step": 25149 }, { "epoch": 0.6457814774337568, "grad_norm": 0.76953125, "learning_rate": 0.00013776408653736488, "loss": 0.8028, "step": 25150 }, { "epoch": 0.6458071546296786, "grad_norm": 0.828125, "learning_rate": 0.000137759952862603, "loss": 0.8185, "step": 25151 }, { "epoch": 0.6458328318256005, "grad_norm": 0.8046875, "learning_rate": 0.0001377558191125889, "loss": 0.8625, "step": 25152 }, { "epoch": 0.6458585090215223, "grad_norm": 0.83984375, "learning_rate": 0.0001377516852873309, "loss": 0.9987, "step": 25153 }, { "epoch": 0.6458841862174441, "grad_norm": 0.77734375, "learning_rate": 0.0001377475513868372, "loss": 0.8259, "step": 25154 }, { "epoch": 0.645909863413366, "grad_norm": 0.796875, "learning_rate": 0.00013774341741111602, "loss": 0.9701, "step": 25155 }, { "epoch": 0.6459355406092878, "grad_norm": 0.796875, "learning_rate": 0.00013773928336017563, "loss": 0.8583, "step": 25156 }, { "epoch": 0.6459612178052095, "grad_norm": 0.796875, "learning_rate": 0.00013773514923402428, "loss": 0.9051, "step": 25157 }, { "epoch": 0.6459868950011314, "grad_norm": 0.75390625, "learning_rate": 0.00013773101503267016, "loss": 0.8709, "step": 25158 }, { "epoch": 0.6460125721970532, "grad_norm": 0.79296875, "learning_rate": 0.00013772688075612153, "loss": 0.8519, "step": 25159 }, { "epoch": 0.646038249392975, "grad_norm": 1.1875, "learning_rate": 0.00013772274640438668, "loss": 0.8306, "step": 25160 }, { "epoch": 0.6460639265888969, "grad_norm": 0.76171875, "learning_rate": 0.00013771861197747374, "loss": 0.9317, "step": 25161 }, { "epoch": 0.6460896037848187, "grad_norm": 0.77734375, "learning_rate": 0.00013771447747539106, "loss": 0.8018, "step": 25162 }, { "epoch": 0.6461152809807404, "grad_norm": 0.8125, "learning_rate": 0.00013771034289814682, "loss": 0.8556, "step": 25163 }, { "epoch": 0.6461409581766623, "grad_norm": 0.76171875, "learning_rate": 0.00013770620824574926, "loss": 0.8477, "step": 25164 }, { "epoch": 0.6461666353725841, "grad_norm": 0.6875, "learning_rate": 0.00013770207351820665, "loss": 0.7179, "step": 25165 }, { "epoch": 0.646192312568506, "grad_norm": 0.82421875, "learning_rate": 0.00013769793871552718, "loss": 0.7515, "step": 25166 }, { "epoch": 0.6462179897644278, "grad_norm": 0.76953125, "learning_rate": 0.00013769380383771916, "loss": 0.8149, "step": 25167 }, { "epoch": 0.6462436669603496, "grad_norm": 0.7734375, "learning_rate": 0.0001376896688847908, "loss": 0.8605, "step": 25168 }, { "epoch": 0.6462693441562715, "grad_norm": 0.77734375, "learning_rate": 0.0001376855338567503, "loss": 0.8216, "step": 25169 }, { "epoch": 0.6462950213521932, "grad_norm": 0.71875, "learning_rate": 0.00013768139875360594, "loss": 0.7876, "step": 25170 }, { "epoch": 0.646320698548115, "grad_norm": 0.76953125, "learning_rate": 0.000137677263575366, "loss": 0.9225, "step": 25171 }, { "epoch": 0.6463463757440369, "grad_norm": 0.72265625, "learning_rate": 0.00013767312832203862, "loss": 0.7184, "step": 25172 }, { "epoch": 0.6463720529399587, "grad_norm": 0.8359375, "learning_rate": 0.00013766899299363214, "loss": 0.7527, "step": 25173 }, { "epoch": 0.6463977301358805, "grad_norm": 0.70703125, "learning_rate": 0.00013766485759015472, "loss": 0.8769, "step": 25174 }, { "epoch": 0.6464234073318024, "grad_norm": 0.9453125, "learning_rate": 0.00013766072211161468, "loss": 0.8256, "step": 25175 }, { "epoch": 0.6464490845277242, "grad_norm": 0.7421875, "learning_rate": 0.0001376565865580202, "loss": 0.9438, "step": 25176 }, { "epoch": 0.6464747617236459, "grad_norm": 0.9609375, "learning_rate": 0.00013765245092937955, "loss": 0.7397, "step": 25177 }, { "epoch": 0.6465004389195678, "grad_norm": 0.78125, "learning_rate": 0.000137648315225701, "loss": 0.7263, "step": 25178 }, { "epoch": 0.6465261161154896, "grad_norm": 0.7890625, "learning_rate": 0.0001376441794469927, "loss": 0.8494, "step": 25179 }, { "epoch": 0.6465517933114114, "grad_norm": 0.78125, "learning_rate": 0.00013764004359326297, "loss": 0.8722, "step": 25180 }, { "epoch": 0.6465774705073333, "grad_norm": 0.73828125, "learning_rate": 0.00013763590766452003, "loss": 1.0094, "step": 25181 }, { "epoch": 0.6466031477032551, "grad_norm": 0.8125, "learning_rate": 0.00013763177166077216, "loss": 0.9433, "step": 25182 }, { "epoch": 0.6466288248991768, "grad_norm": 0.765625, "learning_rate": 0.00013762763558202755, "loss": 1.0309, "step": 25183 }, { "epoch": 0.6466545020950987, "grad_norm": 0.828125, "learning_rate": 0.00013762349942829448, "loss": 0.8462, "step": 25184 }, { "epoch": 0.6466801792910205, "grad_norm": 0.7578125, "learning_rate": 0.0001376193631995811, "loss": 0.9336, "step": 25185 }, { "epoch": 0.6467058564869423, "grad_norm": 0.7890625, "learning_rate": 0.0001376152268958958, "loss": 0.7647, "step": 25186 }, { "epoch": 0.6467315336828642, "grad_norm": 0.7734375, "learning_rate": 0.00013761109051724671, "loss": 0.7401, "step": 25187 }, { "epoch": 0.646757210878786, "grad_norm": 0.78125, "learning_rate": 0.00013760695406364213, "loss": 0.8149, "step": 25188 }, { "epoch": 0.6467828880747079, "grad_norm": 0.76171875, "learning_rate": 0.0001376028175350903, "loss": 0.9836, "step": 25189 }, { "epoch": 0.6468085652706296, "grad_norm": 0.67578125, "learning_rate": 0.00013759868093159944, "loss": 0.7256, "step": 25190 }, { "epoch": 0.6468342424665514, "grad_norm": 0.78515625, "learning_rate": 0.00013759454425317781, "loss": 0.7426, "step": 25191 }, { "epoch": 0.6468599196624732, "grad_norm": 0.75, "learning_rate": 0.00013759040749983362, "loss": 0.8309, "step": 25192 }, { "epoch": 0.6468855968583951, "grad_norm": 0.73828125, "learning_rate": 0.00013758627067157517, "loss": 0.8531, "step": 25193 }, { "epoch": 0.6469112740543169, "grad_norm": 0.8125, "learning_rate": 0.00013758213376841068, "loss": 0.967, "step": 25194 }, { "epoch": 0.6469369512502388, "grad_norm": 0.78515625, "learning_rate": 0.0001375779967903484, "loss": 0.8181, "step": 25195 }, { "epoch": 0.6469626284461606, "grad_norm": 0.67578125, "learning_rate": 0.00013757385973739653, "loss": 0.7432, "step": 25196 }, { "epoch": 0.6469883056420823, "grad_norm": 0.7109375, "learning_rate": 0.0001375697226095634, "loss": 0.9185, "step": 25197 }, { "epoch": 0.6470139828380042, "grad_norm": 0.73828125, "learning_rate": 0.0001375655854068572, "loss": 0.8082, "step": 25198 }, { "epoch": 0.647039660033926, "grad_norm": 0.76953125, "learning_rate": 0.00013756144812928614, "loss": 0.7711, "step": 25199 }, { "epoch": 0.6470653372298478, "grad_norm": 0.74609375, "learning_rate": 0.00013755731077685855, "loss": 0.8645, "step": 25200 }, { "epoch": 0.6470910144257697, "grad_norm": 0.84375, "learning_rate": 0.00013755317334958258, "loss": 0.8453, "step": 25201 }, { "epoch": 0.6471166916216915, "grad_norm": 0.89453125, "learning_rate": 0.00013754903584746656, "loss": 0.8532, "step": 25202 }, { "epoch": 0.6471423688176132, "grad_norm": 0.92578125, "learning_rate": 0.0001375448982705187, "loss": 0.758, "step": 25203 }, { "epoch": 0.6471680460135351, "grad_norm": 0.796875, "learning_rate": 0.00013754076061874728, "loss": 0.8418, "step": 25204 }, { "epoch": 0.6471937232094569, "grad_norm": 0.75390625, "learning_rate": 0.0001375366228921605, "loss": 0.7524, "step": 25205 }, { "epoch": 0.6472194004053787, "grad_norm": 0.91015625, "learning_rate": 0.00013753248509076657, "loss": 0.7678, "step": 25206 }, { "epoch": 0.6472450776013006, "grad_norm": 0.78515625, "learning_rate": 0.00013752834721457384, "loss": 0.7911, "step": 25207 }, { "epoch": 0.6472707547972224, "grad_norm": 0.7890625, "learning_rate": 0.00013752420926359049, "loss": 0.8406, "step": 25208 }, { "epoch": 0.6472964319931442, "grad_norm": 0.84375, "learning_rate": 0.00013752007123782477, "loss": 0.8892, "step": 25209 }, { "epoch": 0.647322109189066, "grad_norm": 0.98828125, "learning_rate": 0.00013751593313728497, "loss": 0.7886, "step": 25210 }, { "epoch": 0.6473477863849878, "grad_norm": 0.73046875, "learning_rate": 0.00013751179496197927, "loss": 0.902, "step": 25211 }, { "epoch": 0.6473734635809096, "grad_norm": 0.765625, "learning_rate": 0.00013750765671191598, "loss": 0.8989, "step": 25212 }, { "epoch": 0.6473991407768315, "grad_norm": 0.84375, "learning_rate": 0.0001375035183871033, "loss": 0.927, "step": 25213 }, { "epoch": 0.6474248179727533, "grad_norm": 0.77734375, "learning_rate": 0.00013749937998754948, "loss": 0.916, "step": 25214 }, { "epoch": 0.6474504951686751, "grad_norm": 0.77734375, "learning_rate": 0.0001374952415132628, "loss": 0.8348, "step": 25215 }, { "epoch": 0.6474761723645969, "grad_norm": 0.76953125, "learning_rate": 0.0001374911029642515, "loss": 0.8636, "step": 25216 }, { "epoch": 0.6475018495605187, "grad_norm": 0.8359375, "learning_rate": 0.0001374869643405238, "loss": 0.9451, "step": 25217 }, { "epoch": 0.6475275267564405, "grad_norm": 0.859375, "learning_rate": 0.000137482825642088, "loss": 0.8917, "step": 25218 }, { "epoch": 0.6475532039523624, "grad_norm": 0.71875, "learning_rate": 0.00013747868686895227, "loss": 0.8309, "step": 25219 }, { "epoch": 0.6475788811482842, "grad_norm": 0.75390625, "learning_rate": 0.00013747454802112493, "loss": 0.7224, "step": 25220 }, { "epoch": 0.647604558344206, "grad_norm": 0.80078125, "learning_rate": 0.0001374704090986142, "loss": 0.7609, "step": 25221 }, { "epoch": 0.6476302355401279, "grad_norm": 0.81640625, "learning_rate": 0.0001374662701014283, "loss": 0.9017, "step": 25222 }, { "epoch": 0.6476559127360496, "grad_norm": 0.8125, "learning_rate": 0.00013746213102957557, "loss": 0.8444, "step": 25223 }, { "epoch": 0.6476815899319714, "grad_norm": 0.8125, "learning_rate": 0.00013745799188306416, "loss": 0.8554, "step": 25224 }, { "epoch": 0.6477072671278933, "grad_norm": 0.7578125, "learning_rate": 0.00013745385266190235, "loss": 0.7776, "step": 25225 }, { "epoch": 0.6477329443238151, "grad_norm": 0.80859375, "learning_rate": 0.0001374497133660984, "loss": 0.9426, "step": 25226 }, { "epoch": 0.647758621519737, "grad_norm": 0.79296875, "learning_rate": 0.00013744557399566053, "loss": 0.8544, "step": 25227 }, { "epoch": 0.6477842987156588, "grad_norm": 0.7734375, "learning_rate": 0.00013744143455059703, "loss": 0.8346, "step": 25228 }, { "epoch": 0.6478099759115806, "grad_norm": 0.7578125, "learning_rate": 0.00013743729503091614, "loss": 0.7173, "step": 25229 }, { "epoch": 0.6478356531075024, "grad_norm": 0.7890625, "learning_rate": 0.00013743315543662612, "loss": 0.7846, "step": 25230 }, { "epoch": 0.6478613303034242, "grad_norm": 0.75390625, "learning_rate": 0.00013742901576773518, "loss": 0.8756, "step": 25231 }, { "epoch": 0.647887007499346, "grad_norm": 0.75390625, "learning_rate": 0.0001374248760242516, "loss": 0.8379, "step": 25232 }, { "epoch": 0.6479126846952679, "grad_norm": 0.76953125, "learning_rate": 0.00013742073620618358, "loss": 0.8419, "step": 25233 }, { "epoch": 0.6479383618911897, "grad_norm": 0.8359375, "learning_rate": 0.00013741659631353947, "loss": 0.863, "step": 25234 }, { "epoch": 0.6479640390871115, "grad_norm": 0.7734375, "learning_rate": 0.00013741245634632746, "loss": 0.8429, "step": 25235 }, { "epoch": 0.6479897162830333, "grad_norm": 0.74609375, "learning_rate": 0.00013740831630455579, "loss": 0.778, "step": 25236 }, { "epoch": 0.6480153934789551, "grad_norm": 0.796875, "learning_rate": 0.00013740417618823272, "loss": 0.8313, "step": 25237 }, { "epoch": 0.6480410706748769, "grad_norm": 0.73828125, "learning_rate": 0.0001374000359973665, "loss": 0.8314, "step": 25238 }, { "epoch": 0.6480667478707988, "grad_norm": 0.75390625, "learning_rate": 0.00013739589573196542, "loss": 0.7156, "step": 25239 }, { "epoch": 0.6480924250667206, "grad_norm": 0.75390625, "learning_rate": 0.00013739175539203766, "loss": 0.7901, "step": 25240 }, { "epoch": 0.6481181022626424, "grad_norm": 0.82421875, "learning_rate": 0.0001373876149775915, "loss": 0.8591, "step": 25241 }, { "epoch": 0.6481437794585643, "grad_norm": 0.8125, "learning_rate": 0.00013738347448863521, "loss": 0.8449, "step": 25242 }, { "epoch": 0.648169456654486, "grad_norm": 0.8203125, "learning_rate": 0.0001373793339251771, "loss": 0.853, "step": 25243 }, { "epoch": 0.6481951338504078, "grad_norm": 0.74609375, "learning_rate": 0.00013737519328722527, "loss": 0.8895, "step": 25244 }, { "epoch": 0.6482208110463297, "grad_norm": 0.72265625, "learning_rate": 0.0001373710525747881, "loss": 0.7283, "step": 25245 }, { "epoch": 0.6482464882422515, "grad_norm": 0.69921875, "learning_rate": 0.00013736691178787379, "loss": 0.7821, "step": 25246 }, { "epoch": 0.6482721654381733, "grad_norm": 0.859375, "learning_rate": 0.00013736277092649056, "loss": 0.8915, "step": 25247 }, { "epoch": 0.6482978426340952, "grad_norm": 0.765625, "learning_rate": 0.00013735862999064678, "loss": 0.9375, "step": 25248 }, { "epoch": 0.648323519830017, "grad_norm": 0.83984375, "learning_rate": 0.00013735448898035055, "loss": 0.91, "step": 25249 }, { "epoch": 0.6483491970259387, "grad_norm": 0.74609375, "learning_rate": 0.00013735034789561025, "loss": 0.7986, "step": 25250 }, { "epoch": 0.6483748742218606, "grad_norm": 0.8203125, "learning_rate": 0.00013734620673643404, "loss": 0.8174, "step": 25251 }, { "epoch": 0.6484005514177824, "grad_norm": 0.75, "learning_rate": 0.00013734206550283025, "loss": 0.8746, "step": 25252 }, { "epoch": 0.6484262286137042, "grad_norm": 0.87109375, "learning_rate": 0.00013733792419480707, "loss": 0.8863, "step": 25253 }, { "epoch": 0.6484519058096261, "grad_norm": 0.796875, "learning_rate": 0.00013733378281237277, "loss": 0.8321, "step": 25254 }, { "epoch": 0.6484775830055479, "grad_norm": 0.75390625, "learning_rate": 0.00013732964135553566, "loss": 0.9151, "step": 25255 }, { "epoch": 0.6485032602014696, "grad_norm": 0.75390625, "learning_rate": 0.00013732549982430392, "loss": 0.8445, "step": 25256 }, { "epoch": 0.6485289373973915, "grad_norm": 0.7890625, "learning_rate": 0.00013732135821868582, "loss": 0.8493, "step": 25257 }, { "epoch": 0.6485546145933133, "grad_norm": 0.86328125, "learning_rate": 0.00013731721653868962, "loss": 0.8554, "step": 25258 }, { "epoch": 0.6485802917892352, "grad_norm": 0.84375, "learning_rate": 0.0001373130747843236, "loss": 0.9813, "step": 25259 }, { "epoch": 0.648605968985157, "grad_norm": 0.84375, "learning_rate": 0.00013730893295559596, "loss": 0.9134, "step": 25260 }, { "epoch": 0.6486316461810788, "grad_norm": 0.76171875, "learning_rate": 0.00013730479105251504, "loss": 0.7726, "step": 25261 }, { "epoch": 0.6486573233770007, "grad_norm": 0.71484375, "learning_rate": 0.00013730064907508898, "loss": 0.8127, "step": 25262 }, { "epoch": 0.6486830005729224, "grad_norm": 0.74609375, "learning_rate": 0.00013729650702332618, "loss": 0.7979, "step": 25263 }, { "epoch": 0.6487086777688442, "grad_norm": 0.78515625, "learning_rate": 0.00013729236489723476, "loss": 0.9021, "step": 25264 }, { "epoch": 0.6487343549647661, "grad_norm": 0.8125, "learning_rate": 0.000137288222696823, "loss": 0.9618, "step": 25265 }, { "epoch": 0.6487600321606879, "grad_norm": 0.7734375, "learning_rate": 0.0001372840804220992, "loss": 0.8003, "step": 25266 }, { "epoch": 0.6487857093566097, "grad_norm": 0.75390625, "learning_rate": 0.0001372799380730716, "loss": 0.7921, "step": 25267 }, { "epoch": 0.6488113865525316, "grad_norm": 0.77734375, "learning_rate": 0.00013727579564974844, "loss": 0.837, "step": 25268 }, { "epoch": 0.6488370637484534, "grad_norm": 0.8828125, "learning_rate": 0.00013727165315213804, "loss": 0.8806, "step": 25269 }, { "epoch": 0.6488627409443751, "grad_norm": 0.796875, "learning_rate": 0.00013726751058024854, "loss": 0.7885, "step": 25270 }, { "epoch": 0.648888418140297, "grad_norm": 0.85546875, "learning_rate": 0.00013726336793408827, "loss": 0.9729, "step": 25271 }, { "epoch": 0.6489140953362188, "grad_norm": 0.79296875, "learning_rate": 0.0001372592252136655, "loss": 0.8562, "step": 25272 }, { "epoch": 0.6489397725321406, "grad_norm": 0.83203125, "learning_rate": 0.0001372550824189884, "loss": 0.8278, "step": 25273 }, { "epoch": 0.6489654497280625, "grad_norm": 0.79296875, "learning_rate": 0.00013725093955006533, "loss": 0.8107, "step": 25274 }, { "epoch": 0.6489911269239843, "grad_norm": 0.78515625, "learning_rate": 0.0001372467966069045, "loss": 0.7904, "step": 25275 }, { "epoch": 0.649016804119906, "grad_norm": 0.83203125, "learning_rate": 0.0001372426535895142, "loss": 0.7392, "step": 25276 }, { "epoch": 0.6490424813158279, "grad_norm": 0.76953125, "learning_rate": 0.0001372385104979026, "loss": 0.9405, "step": 25277 }, { "epoch": 0.6490681585117497, "grad_norm": 0.7109375, "learning_rate": 0.00013723436733207805, "loss": 0.9727, "step": 25278 }, { "epoch": 0.6490938357076715, "grad_norm": 0.83203125, "learning_rate": 0.00013723022409204875, "loss": 0.9506, "step": 25279 }, { "epoch": 0.6491195129035934, "grad_norm": 0.765625, "learning_rate": 0.000137226080777823, "loss": 0.7834, "step": 25280 }, { "epoch": 0.6491451900995152, "grad_norm": 0.8125, "learning_rate": 0.000137221937389409, "loss": 1.003, "step": 25281 }, { "epoch": 0.649170867295437, "grad_norm": 0.828125, "learning_rate": 0.00013721779392681508, "loss": 0.8163, "step": 25282 }, { "epoch": 0.6491965444913588, "grad_norm": 0.79296875, "learning_rate": 0.0001372136503900494, "loss": 0.8513, "step": 25283 }, { "epoch": 0.6492222216872806, "grad_norm": 0.875, "learning_rate": 0.00013720950677912033, "loss": 0.8631, "step": 25284 }, { "epoch": 0.6492478988832024, "grad_norm": 0.76953125, "learning_rate": 0.00013720536309403604, "loss": 0.8273, "step": 25285 }, { "epoch": 0.6492735760791243, "grad_norm": 0.78515625, "learning_rate": 0.00013720121933480484, "loss": 0.9312, "step": 25286 }, { "epoch": 0.6492992532750461, "grad_norm": 0.87109375, "learning_rate": 0.00013719707550143495, "loss": 0.8187, "step": 25287 }, { "epoch": 0.649324930470968, "grad_norm": 0.76953125, "learning_rate": 0.00013719293159393468, "loss": 0.8737, "step": 25288 }, { "epoch": 0.6493506076668898, "grad_norm": 0.7421875, "learning_rate": 0.00013718878761231223, "loss": 0.8214, "step": 25289 }, { "epoch": 0.6493762848628115, "grad_norm": 0.67578125, "learning_rate": 0.00013718464355657592, "loss": 0.7403, "step": 25290 }, { "epoch": 0.6494019620587334, "grad_norm": 0.70703125, "learning_rate": 0.00013718049942673395, "loss": 0.7515, "step": 25291 }, { "epoch": 0.6494276392546552, "grad_norm": 0.859375, "learning_rate": 0.0001371763552227946, "loss": 0.8756, "step": 25292 }, { "epoch": 0.649453316450577, "grad_norm": 0.7890625, "learning_rate": 0.00013717221094476612, "loss": 1.0629, "step": 25293 }, { "epoch": 0.6494789936464989, "grad_norm": 0.80078125, "learning_rate": 0.0001371680665926568, "loss": 0.8477, "step": 25294 }, { "epoch": 0.6495046708424207, "grad_norm": 0.75390625, "learning_rate": 0.0001371639221664749, "loss": 0.7958, "step": 25295 }, { "epoch": 0.6495303480383424, "grad_norm": 0.8046875, "learning_rate": 0.00013715977766622862, "loss": 0.8357, "step": 25296 }, { "epoch": 0.6495560252342643, "grad_norm": 0.7734375, "learning_rate": 0.0001371556330919263, "loss": 0.8375, "step": 25297 }, { "epoch": 0.6495817024301861, "grad_norm": 0.76953125, "learning_rate": 0.00013715148844357616, "loss": 0.7745, "step": 25298 }, { "epoch": 0.6496073796261079, "grad_norm": 0.8203125, "learning_rate": 0.0001371473437211864, "loss": 0.7372, "step": 25299 }, { "epoch": 0.6496330568220298, "grad_norm": 0.83984375, "learning_rate": 0.00013714319892476538, "loss": 0.8717, "step": 25300 }, { "epoch": 0.6496587340179516, "grad_norm": 0.7578125, "learning_rate": 0.00013713905405432134, "loss": 0.7514, "step": 25301 }, { "epoch": 0.6496844112138734, "grad_norm": 0.73046875, "learning_rate": 0.0001371349091098625, "loss": 0.9245, "step": 25302 }, { "epoch": 0.6497100884097952, "grad_norm": 0.82421875, "learning_rate": 0.00013713076409139712, "loss": 0.9084, "step": 25303 }, { "epoch": 0.649735765605717, "grad_norm": 0.83203125, "learning_rate": 0.00013712661899893352, "loss": 0.8907, "step": 25304 }, { "epoch": 0.6497614428016388, "grad_norm": 0.7734375, "learning_rate": 0.0001371224738324799, "loss": 0.8802, "step": 25305 }, { "epoch": 0.6497871199975607, "grad_norm": 0.7578125, "learning_rate": 0.0001371183285920445, "loss": 0.8881, "step": 25306 }, { "epoch": 0.6498127971934825, "grad_norm": 0.73828125, "learning_rate": 0.00013711418327763566, "loss": 0.7871, "step": 25307 }, { "epoch": 0.6498384743894043, "grad_norm": 0.7578125, "learning_rate": 0.00013711003788926165, "loss": 0.9131, "step": 25308 }, { "epoch": 0.6498641515853262, "grad_norm": 0.77734375, "learning_rate": 0.00013710589242693066, "loss": 0.915, "step": 25309 }, { "epoch": 0.6498898287812479, "grad_norm": 0.85546875, "learning_rate": 0.00013710174689065096, "loss": 0.8808, "step": 25310 }, { "epoch": 0.6499155059771697, "grad_norm": 0.80859375, "learning_rate": 0.00013709760128043083, "loss": 0.903, "step": 25311 }, { "epoch": 0.6499411831730916, "grad_norm": 0.80859375, "learning_rate": 0.00013709345559627856, "loss": 0.8343, "step": 25312 }, { "epoch": 0.6499668603690134, "grad_norm": 0.828125, "learning_rate": 0.00013708930983820234, "loss": 0.7951, "step": 25313 }, { "epoch": 0.6499925375649352, "grad_norm": 0.86328125, "learning_rate": 0.0001370851640062105, "loss": 0.8855, "step": 25314 }, { "epoch": 0.6500182147608571, "grad_norm": 0.9296875, "learning_rate": 0.00013708101810031129, "loss": 0.8466, "step": 25315 }, { "epoch": 0.6500438919567788, "grad_norm": 0.80078125, "learning_rate": 0.00013707687212051293, "loss": 0.8539, "step": 25316 }, { "epoch": 0.6500695691527006, "grad_norm": 0.8046875, "learning_rate": 0.00013707272606682376, "loss": 0.8718, "step": 25317 }, { "epoch": 0.6500952463486225, "grad_norm": 0.87890625, "learning_rate": 0.00013706857993925196, "loss": 0.977, "step": 25318 }, { "epoch": 0.6501209235445443, "grad_norm": 0.75390625, "learning_rate": 0.00013706443373780582, "loss": 0.6853, "step": 25319 }, { "epoch": 0.6501466007404662, "grad_norm": 0.796875, "learning_rate": 0.00013706028746249363, "loss": 0.9151, "step": 25320 }, { "epoch": 0.650172277936388, "grad_norm": 0.703125, "learning_rate": 0.00013705614111332365, "loss": 0.727, "step": 25321 }, { "epoch": 0.6501979551323098, "grad_norm": 0.76171875, "learning_rate": 0.0001370519946903041, "loss": 0.9129, "step": 25322 }, { "epoch": 0.6502236323282315, "grad_norm": 0.8203125, "learning_rate": 0.00013704784819344327, "loss": 0.8574, "step": 25323 }, { "epoch": 0.6502493095241534, "grad_norm": 0.796875, "learning_rate": 0.00013704370162274945, "loss": 0.891, "step": 25324 }, { "epoch": 0.6502749867200752, "grad_norm": 0.7109375, "learning_rate": 0.00013703955497823084, "loss": 0.77, "step": 25325 }, { "epoch": 0.6503006639159971, "grad_norm": 0.72265625, "learning_rate": 0.00013703540825989578, "loss": 0.8782, "step": 25326 }, { "epoch": 0.6503263411119189, "grad_norm": 0.74609375, "learning_rate": 0.00013703126146775247, "loss": 0.7488, "step": 25327 }, { "epoch": 0.6503520183078407, "grad_norm": 0.78515625, "learning_rate": 0.0001370271146018092, "loss": 0.871, "step": 25328 }, { "epoch": 0.6503776955037626, "grad_norm": 0.7734375, "learning_rate": 0.00013702296766207427, "loss": 0.8812, "step": 25329 }, { "epoch": 0.6504033726996843, "grad_norm": 0.7421875, "learning_rate": 0.0001370188206485559, "loss": 0.7828, "step": 25330 }, { "epoch": 0.6504290498956061, "grad_norm": 0.78515625, "learning_rate": 0.00013701467356126235, "loss": 0.9096, "step": 25331 }, { "epoch": 0.650454727091528, "grad_norm": 0.8046875, "learning_rate": 0.0001370105264002019, "loss": 0.8746, "step": 25332 }, { "epoch": 0.6504804042874498, "grad_norm": 0.86328125, "learning_rate": 0.00013700637916538283, "loss": 0.8504, "step": 25333 }, { "epoch": 0.6505060814833716, "grad_norm": 0.80078125, "learning_rate": 0.00013700223185681334, "loss": 0.8321, "step": 25334 }, { "epoch": 0.6505317586792935, "grad_norm": 0.80078125, "learning_rate": 0.00013699808447450182, "loss": 0.8822, "step": 25335 }, { "epoch": 0.6505574358752152, "grad_norm": 0.75, "learning_rate": 0.0001369939370184564, "loss": 0.9152, "step": 25336 }, { "epoch": 0.650583113071137, "grad_norm": 0.8515625, "learning_rate": 0.00013698978948868543, "loss": 0.87, "step": 25337 }, { "epoch": 0.6506087902670589, "grad_norm": 0.78125, "learning_rate": 0.00013698564188519715, "loss": 0.7638, "step": 25338 }, { "epoch": 0.6506344674629807, "grad_norm": 0.84765625, "learning_rate": 0.00013698149420799982, "loss": 0.8468, "step": 25339 }, { "epoch": 0.6506601446589025, "grad_norm": 0.78125, "learning_rate": 0.0001369773464571017, "loss": 0.8516, "step": 25340 }, { "epoch": 0.6506858218548244, "grad_norm": 0.7890625, "learning_rate": 0.0001369731986325111, "loss": 0.9131, "step": 25341 }, { "epoch": 0.6507114990507462, "grad_norm": 0.76171875, "learning_rate": 0.00013696905073423625, "loss": 0.7637, "step": 25342 }, { "epoch": 0.6507371762466679, "grad_norm": 0.7109375, "learning_rate": 0.00013696490276228542, "loss": 0.7535, "step": 25343 }, { "epoch": 0.6507628534425898, "grad_norm": 0.74609375, "learning_rate": 0.00013696075471666684, "loss": 0.8304, "step": 25344 }, { "epoch": 0.6507885306385116, "grad_norm": 0.7578125, "learning_rate": 0.00013695660659738884, "loss": 0.7963, "step": 25345 }, { "epoch": 0.6508142078344334, "grad_norm": 0.67578125, "learning_rate": 0.00013695245840445968, "loss": 0.8172, "step": 25346 }, { "epoch": 0.6508398850303553, "grad_norm": 0.83984375, "learning_rate": 0.00013694831013788758, "loss": 0.7258, "step": 25347 }, { "epoch": 0.6508655622262771, "grad_norm": 0.76953125, "learning_rate": 0.00013694416179768087, "loss": 0.8585, "step": 25348 }, { "epoch": 0.650891239422199, "grad_norm": 0.74609375, "learning_rate": 0.00013694001338384777, "loss": 0.966, "step": 25349 }, { "epoch": 0.6509169166181207, "grad_norm": 0.74609375, "learning_rate": 0.00013693586489639653, "loss": 0.7769, "step": 25350 }, { "epoch": 0.6509425938140425, "grad_norm": 0.74609375, "learning_rate": 0.00013693171633533547, "loss": 0.899, "step": 25351 }, { "epoch": 0.6509682710099643, "grad_norm": 0.75, "learning_rate": 0.00013692756770067283, "loss": 0.7928, "step": 25352 }, { "epoch": 0.6509939482058862, "grad_norm": 0.8203125, "learning_rate": 0.00013692341899241688, "loss": 0.8696, "step": 25353 }, { "epoch": 0.651019625401808, "grad_norm": 0.79296875, "learning_rate": 0.0001369192702105759, "loss": 0.8896, "step": 25354 }, { "epoch": 0.6510453025977299, "grad_norm": 0.85546875, "learning_rate": 0.00013691512135515817, "loss": 0.8301, "step": 25355 }, { "epoch": 0.6510709797936516, "grad_norm": 0.73046875, "learning_rate": 0.0001369109724261719, "loss": 0.8542, "step": 25356 }, { "epoch": 0.6510966569895734, "grad_norm": 0.796875, "learning_rate": 0.0001369068234236254, "loss": 0.8312, "step": 25357 }, { "epoch": 0.6511223341854953, "grad_norm": 0.76953125, "learning_rate": 0.00013690267434752695, "loss": 0.8596, "step": 25358 }, { "epoch": 0.6511480113814171, "grad_norm": 0.765625, "learning_rate": 0.0001368985251978848, "loss": 0.7708, "step": 25359 }, { "epoch": 0.6511736885773389, "grad_norm": 0.77734375, "learning_rate": 0.0001368943759747072, "loss": 0.7824, "step": 25360 }, { "epoch": 0.6511993657732608, "grad_norm": 0.8125, "learning_rate": 0.0001368902266780025, "loss": 0.803, "step": 25361 }, { "epoch": 0.6512250429691826, "grad_norm": 0.796875, "learning_rate": 0.00013688607730777888, "loss": 0.952, "step": 25362 }, { "epoch": 0.6512507201651043, "grad_norm": 0.734375, "learning_rate": 0.00013688192786404462, "loss": 0.7274, "step": 25363 }, { "epoch": 0.6512763973610262, "grad_norm": 0.703125, "learning_rate": 0.00013687777834680802, "loss": 0.7084, "step": 25364 }, { "epoch": 0.651302074556948, "grad_norm": 0.83203125, "learning_rate": 0.00013687362875607734, "loss": 0.9834, "step": 25365 }, { "epoch": 0.6513277517528698, "grad_norm": 0.7890625, "learning_rate": 0.00013686947909186085, "loss": 1.0087, "step": 25366 }, { "epoch": 0.6513534289487917, "grad_norm": 0.796875, "learning_rate": 0.0001368653293541668, "loss": 0.8499, "step": 25367 }, { "epoch": 0.6513791061447135, "grad_norm": 0.7578125, "learning_rate": 0.00013686117954300351, "loss": 0.7777, "step": 25368 }, { "epoch": 0.6514047833406353, "grad_norm": 0.9609375, "learning_rate": 0.0001368570296583792, "loss": 0.8412, "step": 25369 }, { "epoch": 0.6514304605365571, "grad_norm": 0.7578125, "learning_rate": 0.00013685287970030218, "loss": 0.7913, "step": 25370 }, { "epoch": 0.6514561377324789, "grad_norm": 0.77734375, "learning_rate": 0.00013684872966878068, "loss": 0.745, "step": 25371 }, { "epoch": 0.6514818149284007, "grad_norm": 0.75390625, "learning_rate": 0.000136844579563823, "loss": 0.8515, "step": 25372 }, { "epoch": 0.6515074921243226, "grad_norm": 0.78515625, "learning_rate": 0.0001368404293854374, "loss": 0.898, "step": 25373 }, { "epoch": 0.6515331693202444, "grad_norm": 0.78125, "learning_rate": 0.00013683627913363216, "loss": 0.8524, "step": 25374 }, { "epoch": 0.6515588465161662, "grad_norm": 0.734375, "learning_rate": 0.00013683212880841552, "loss": 0.6683, "step": 25375 }, { "epoch": 0.651584523712088, "grad_norm": 0.7890625, "learning_rate": 0.00013682797840979576, "loss": 0.8587, "step": 25376 }, { "epoch": 0.6516102009080098, "grad_norm": 0.80078125, "learning_rate": 0.00013682382793778121, "loss": 0.8764, "step": 25377 }, { "epoch": 0.6516358781039316, "grad_norm": 0.84765625, "learning_rate": 0.00013681967739238008, "loss": 0.932, "step": 25378 }, { "epoch": 0.6516615552998535, "grad_norm": 0.76953125, "learning_rate": 0.00013681552677360066, "loss": 0.8378, "step": 25379 }, { "epoch": 0.6516872324957753, "grad_norm": 0.78515625, "learning_rate": 0.0001368113760814512, "loss": 0.862, "step": 25380 }, { "epoch": 0.6517129096916972, "grad_norm": 0.77734375, "learning_rate": 0.00013680722531594003, "loss": 0.7325, "step": 25381 }, { "epoch": 0.651738586887619, "grad_norm": 0.67578125, "learning_rate": 0.00013680307447707537, "loss": 0.8017, "step": 25382 }, { "epoch": 0.6517642640835407, "grad_norm": 0.765625, "learning_rate": 0.0001367989235648655, "loss": 0.8831, "step": 25383 }, { "epoch": 0.6517899412794625, "grad_norm": 0.8125, "learning_rate": 0.0001367947725793187, "loss": 0.8214, "step": 25384 }, { "epoch": 0.6518156184753844, "grad_norm": 0.73046875, "learning_rate": 0.00013679062152044323, "loss": 0.7757, "step": 25385 }, { "epoch": 0.6518412956713062, "grad_norm": 0.8125, "learning_rate": 0.0001367864703882474, "loss": 0.6855, "step": 25386 }, { "epoch": 0.6518669728672281, "grad_norm": 0.71484375, "learning_rate": 0.00013678231918273944, "loss": 0.8605, "step": 25387 }, { "epoch": 0.6518926500631499, "grad_norm": 0.78125, "learning_rate": 0.00013677816790392766, "loss": 0.7933, "step": 25388 }, { "epoch": 0.6519183272590717, "grad_norm": 0.7421875, "learning_rate": 0.00013677401655182031, "loss": 0.794, "step": 25389 }, { "epoch": 0.6519440044549935, "grad_norm": 0.77734375, "learning_rate": 0.00013676986512642565, "loss": 0.9102, "step": 25390 }, { "epoch": 0.6519696816509153, "grad_norm": 0.70703125, "learning_rate": 0.00013676571362775198, "loss": 0.8297, "step": 25391 }, { "epoch": 0.6519953588468371, "grad_norm": 0.7109375, "learning_rate": 0.00013676156205580755, "loss": 0.6215, "step": 25392 }, { "epoch": 0.652021036042759, "grad_norm": 0.67578125, "learning_rate": 0.00013675741041060065, "loss": 0.7614, "step": 25393 }, { "epoch": 0.6520467132386808, "grad_norm": 0.796875, "learning_rate": 0.00013675325869213958, "loss": 0.9036, "step": 25394 }, { "epoch": 0.6520723904346026, "grad_norm": 0.73828125, "learning_rate": 0.00013674910690043254, "loss": 0.9698, "step": 25395 }, { "epoch": 0.6520980676305244, "grad_norm": 0.80078125, "learning_rate": 0.00013674495503548787, "loss": 0.7786, "step": 25396 }, { "epoch": 0.6521237448264462, "grad_norm": 0.72265625, "learning_rate": 0.00013674080309731385, "loss": 0.8018, "step": 25397 }, { "epoch": 0.652149422022368, "grad_norm": 0.7109375, "learning_rate": 0.0001367366510859187, "loss": 0.86, "step": 25398 }, { "epoch": 0.6521750992182899, "grad_norm": 0.8125, "learning_rate": 0.00013673249900131074, "loss": 0.9284, "step": 25399 }, { "epoch": 0.6522007764142117, "grad_norm": 0.7421875, "learning_rate": 0.0001367283468434982, "loss": 0.7815, "step": 25400 }, { "epoch": 0.6522264536101335, "grad_norm": 0.70703125, "learning_rate": 0.0001367241946124894, "loss": 0.853, "step": 25401 }, { "epoch": 0.6522521308060554, "grad_norm": 0.76953125, "learning_rate": 0.00013672004230829262, "loss": 0.7796, "step": 25402 }, { "epoch": 0.6522778080019771, "grad_norm": 0.76171875, "learning_rate": 0.00013671588993091608, "loss": 0.924, "step": 25403 }, { "epoch": 0.6523034851978989, "grad_norm": 0.734375, "learning_rate": 0.0001367117374803681, "loss": 0.8069, "step": 25404 }, { "epoch": 0.6523291623938208, "grad_norm": 0.84765625, "learning_rate": 0.00013670758495665694, "loss": 0.8412, "step": 25405 }, { "epoch": 0.6523548395897426, "grad_norm": 0.8203125, "learning_rate": 0.00013670343235979083, "loss": 0.924, "step": 25406 }, { "epoch": 0.6523805167856644, "grad_norm": 0.78125, "learning_rate": 0.00013669927968977817, "loss": 0.6844, "step": 25407 }, { "epoch": 0.6524061939815863, "grad_norm": 0.8515625, "learning_rate": 0.00013669512694662714, "loss": 0.9204, "step": 25408 }, { "epoch": 0.652431871177508, "grad_norm": 0.7421875, "learning_rate": 0.00013669097413034604, "loss": 0.6929, "step": 25409 }, { "epoch": 0.6524575483734298, "grad_norm": 0.765625, "learning_rate": 0.00013668682124094315, "loss": 0.8871, "step": 25410 }, { "epoch": 0.6524832255693517, "grad_norm": 0.7265625, "learning_rate": 0.00013668266827842669, "loss": 0.7748, "step": 25411 }, { "epoch": 0.6525089027652735, "grad_norm": 0.7265625, "learning_rate": 0.00013667851524280504, "loss": 0.8742, "step": 25412 }, { "epoch": 0.6525345799611953, "grad_norm": 0.7421875, "learning_rate": 0.0001366743621340864, "loss": 0.7503, "step": 25413 }, { "epoch": 0.6525602571571172, "grad_norm": 0.73046875, "learning_rate": 0.00013667020895227907, "loss": 0.8798, "step": 25414 }, { "epoch": 0.652585934353039, "grad_norm": 0.7421875, "learning_rate": 0.00013666605569739133, "loss": 0.7904, "step": 25415 }, { "epoch": 0.6526116115489607, "grad_norm": 0.7265625, "learning_rate": 0.00013666190236943143, "loss": 0.796, "step": 25416 }, { "epoch": 0.6526372887448826, "grad_norm": 0.7578125, "learning_rate": 0.00013665774896840768, "loss": 0.8594, "step": 25417 }, { "epoch": 0.6526629659408044, "grad_norm": 0.7734375, "learning_rate": 0.00013665359549432836, "loss": 0.7487, "step": 25418 }, { "epoch": 0.6526886431367263, "grad_norm": 1.03125, "learning_rate": 0.00013664944194720173, "loss": 0.9537, "step": 25419 }, { "epoch": 0.6527143203326481, "grad_norm": 0.796875, "learning_rate": 0.00013664528832703612, "loss": 0.7627, "step": 25420 }, { "epoch": 0.6527399975285699, "grad_norm": 1.1328125, "learning_rate": 0.00013664113463383968, "loss": 0.8332, "step": 25421 }, { "epoch": 0.6527656747244918, "grad_norm": 0.76171875, "learning_rate": 0.0001366369808676208, "loss": 0.8911, "step": 25422 }, { "epoch": 0.6527913519204135, "grad_norm": 0.77734375, "learning_rate": 0.00013663282702838772, "loss": 0.8457, "step": 25423 }, { "epoch": 0.6528170291163353, "grad_norm": 0.6953125, "learning_rate": 0.00013662867311614875, "loss": 0.7155, "step": 25424 }, { "epoch": 0.6528427063122572, "grad_norm": 0.80859375, "learning_rate": 0.00013662451913091212, "loss": 0.9981, "step": 25425 }, { "epoch": 0.652868383508179, "grad_norm": 0.73046875, "learning_rate": 0.00013662036507268614, "loss": 0.8473, "step": 25426 }, { "epoch": 0.6528940607041008, "grad_norm": 0.79296875, "learning_rate": 0.0001366162109414791, "loss": 0.8208, "step": 25427 }, { "epoch": 0.6529197379000227, "grad_norm": 0.75390625, "learning_rate": 0.00013661205673729925, "loss": 0.8326, "step": 25428 }, { "epoch": 0.6529454150959444, "grad_norm": 0.859375, "learning_rate": 0.00013660790246015485, "loss": 0.8468, "step": 25429 }, { "epoch": 0.6529710922918662, "grad_norm": 0.79296875, "learning_rate": 0.00013660374811005422, "loss": 0.8579, "step": 25430 }, { "epoch": 0.6529967694877881, "grad_norm": 0.77734375, "learning_rate": 0.00013659959368700564, "loss": 0.896, "step": 25431 }, { "epoch": 0.6530224466837099, "grad_norm": 0.7578125, "learning_rate": 0.00013659543919101734, "loss": 0.7598, "step": 25432 }, { "epoch": 0.6530481238796317, "grad_norm": 0.734375, "learning_rate": 0.00013659128462209768, "loss": 0.7557, "step": 25433 }, { "epoch": 0.6530738010755536, "grad_norm": 0.71484375, "learning_rate": 0.00013658712998025488, "loss": 0.8349, "step": 25434 }, { "epoch": 0.6530994782714754, "grad_norm": 0.78515625, "learning_rate": 0.00013658297526549724, "loss": 0.8321, "step": 25435 }, { "epoch": 0.6531251554673971, "grad_norm": 0.77734375, "learning_rate": 0.00013657882047783302, "loss": 0.8802, "step": 25436 }, { "epoch": 0.653150832663319, "grad_norm": 0.75390625, "learning_rate": 0.0001365746656172705, "loss": 0.8313, "step": 25437 }, { "epoch": 0.6531765098592408, "grad_norm": 0.76171875, "learning_rate": 0.00013657051068381802, "loss": 0.8839, "step": 25438 }, { "epoch": 0.6532021870551626, "grad_norm": 0.796875, "learning_rate": 0.0001365663556774838, "loss": 0.8775, "step": 25439 }, { "epoch": 0.6532278642510845, "grad_norm": 0.8359375, "learning_rate": 0.00013656220059827612, "loss": 0.8998, "step": 25440 }, { "epoch": 0.6532535414470063, "grad_norm": 0.8125, "learning_rate": 0.0001365580454462033, "loss": 0.7981, "step": 25441 }, { "epoch": 0.6532792186429282, "grad_norm": 0.80078125, "learning_rate": 0.00013655389022127357, "loss": 0.7489, "step": 25442 }, { "epoch": 0.6533048958388499, "grad_norm": 0.71875, "learning_rate": 0.00013654973492349527, "loss": 0.7028, "step": 25443 }, { "epoch": 0.6533305730347717, "grad_norm": 0.84765625, "learning_rate": 0.00013654557955287662, "loss": 0.8907, "step": 25444 }, { "epoch": 0.6533562502306935, "grad_norm": 0.77734375, "learning_rate": 0.00013654142410942594, "loss": 0.906, "step": 25445 }, { "epoch": 0.6533819274266154, "grad_norm": 0.84375, "learning_rate": 0.0001365372685931515, "loss": 0.8997, "step": 25446 }, { "epoch": 0.6534076046225372, "grad_norm": 0.75, "learning_rate": 0.0001365331130040616, "loss": 0.7619, "step": 25447 }, { "epoch": 0.653433281818459, "grad_norm": 0.78125, "learning_rate": 0.00013652895734216446, "loss": 0.9015, "step": 25448 }, { "epoch": 0.6534589590143808, "grad_norm": 0.78125, "learning_rate": 0.00013652480160746846, "loss": 0.8512, "step": 25449 }, { "epoch": 0.6534846362103026, "grad_norm": 1.2421875, "learning_rate": 0.00013652064579998182, "loss": 0.8376, "step": 25450 }, { "epoch": 0.6535103134062245, "grad_norm": 0.76953125, "learning_rate": 0.0001365164899197128, "loss": 0.8335, "step": 25451 }, { "epoch": 0.6535359906021463, "grad_norm": 0.75, "learning_rate": 0.00013651233396666976, "loss": 0.9078, "step": 25452 }, { "epoch": 0.6535616677980681, "grad_norm": 0.71875, "learning_rate": 0.00013650817794086091, "loss": 0.8774, "step": 25453 }, { "epoch": 0.65358734499399, "grad_norm": 0.80078125, "learning_rate": 0.00013650402184229457, "loss": 0.7892, "step": 25454 }, { "epoch": 0.6536130221899118, "grad_norm": 0.79296875, "learning_rate": 0.00013649986567097901, "loss": 0.8263, "step": 25455 }, { "epoch": 0.6536386993858335, "grad_norm": 0.828125, "learning_rate": 0.0001364957094269225, "loss": 0.8426, "step": 25456 }, { "epoch": 0.6536643765817554, "grad_norm": 0.80078125, "learning_rate": 0.00013649155311013336, "loss": 0.8135, "step": 25457 }, { "epoch": 0.6536900537776772, "grad_norm": 0.75390625, "learning_rate": 0.00013648739672061984, "loss": 0.8121, "step": 25458 }, { "epoch": 0.653715730973599, "grad_norm": 0.72265625, "learning_rate": 0.0001364832402583902, "loss": 0.7698, "step": 25459 }, { "epoch": 0.6537414081695209, "grad_norm": 0.7890625, "learning_rate": 0.00013647908372345285, "loss": 0.8363, "step": 25460 }, { "epoch": 0.6537670853654427, "grad_norm": 0.71484375, "learning_rate": 0.00013647492711581588, "loss": 0.7311, "step": 25461 }, { "epoch": 0.6537927625613645, "grad_norm": 0.84375, "learning_rate": 0.00013647077043548773, "loss": 0.7691, "step": 25462 }, { "epoch": 0.6538184397572863, "grad_norm": 0.8046875, "learning_rate": 0.00013646661368247663, "loss": 0.8763, "step": 25463 }, { "epoch": 0.6538441169532081, "grad_norm": 0.765625, "learning_rate": 0.00013646245685679082, "loss": 0.755, "step": 25464 }, { "epoch": 0.6538697941491299, "grad_norm": 0.73828125, "learning_rate": 0.00013645829995843865, "loss": 0.8856, "step": 25465 }, { "epoch": 0.6538954713450518, "grad_norm": 0.81640625, "learning_rate": 0.0001364541429874284, "loss": 0.8195, "step": 25466 }, { "epoch": 0.6539211485409736, "grad_norm": 0.80078125, "learning_rate": 0.00013644998594376835, "loss": 0.909, "step": 25467 }, { "epoch": 0.6539468257368954, "grad_norm": 0.86328125, "learning_rate": 0.00013644582882746671, "loss": 0.8429, "step": 25468 }, { "epoch": 0.6539725029328172, "grad_norm": 0.7890625, "learning_rate": 0.00013644167163853188, "loss": 0.824, "step": 25469 }, { "epoch": 0.653998180128739, "grad_norm": 0.8125, "learning_rate": 0.00013643751437697205, "loss": 0.9275, "step": 25470 }, { "epoch": 0.6540238573246608, "grad_norm": 0.7421875, "learning_rate": 0.0001364333570427956, "loss": 0.8452, "step": 25471 }, { "epoch": 0.6540495345205827, "grad_norm": 0.71484375, "learning_rate": 0.00013642919963601072, "loss": 0.8607, "step": 25472 }, { "epoch": 0.6540752117165045, "grad_norm": 0.81640625, "learning_rate": 0.00013642504215662576, "loss": 0.9245, "step": 25473 }, { "epoch": 0.6541008889124263, "grad_norm": 0.77734375, "learning_rate": 0.00013642088460464894, "loss": 0.8505, "step": 25474 }, { "epoch": 0.6541265661083482, "grad_norm": 0.734375, "learning_rate": 0.0001364167269800886, "loss": 0.8338, "step": 25475 }, { "epoch": 0.6541522433042699, "grad_norm": 0.765625, "learning_rate": 0.00013641256928295303, "loss": 0.8112, "step": 25476 }, { "epoch": 0.6541779205001917, "grad_norm": 0.77734375, "learning_rate": 0.0001364084115132505, "loss": 0.8603, "step": 25477 }, { "epoch": 0.6542035976961136, "grad_norm": 0.75, "learning_rate": 0.00013640425367098926, "loss": 0.8957, "step": 25478 }, { "epoch": 0.6542292748920354, "grad_norm": 0.79296875, "learning_rate": 0.00013640009575617768, "loss": 0.8923, "step": 25479 }, { "epoch": 0.6542549520879573, "grad_norm": 0.80859375, "learning_rate": 0.00013639593776882395, "loss": 0.8647, "step": 25480 }, { "epoch": 0.6542806292838791, "grad_norm": 0.80078125, "learning_rate": 0.00013639177970893644, "loss": 0.8771, "step": 25481 }, { "epoch": 0.6543063064798009, "grad_norm": 0.82421875, "learning_rate": 0.0001363876215765234, "loss": 0.7829, "step": 25482 }, { "epoch": 0.6543319836757227, "grad_norm": 0.703125, "learning_rate": 0.0001363834633715931, "loss": 0.8555, "step": 25483 }, { "epoch": 0.6543576608716445, "grad_norm": 0.79296875, "learning_rate": 0.00013637930509415383, "loss": 0.8296, "step": 25484 }, { "epoch": 0.6543833380675663, "grad_norm": 0.80078125, "learning_rate": 0.00013637514674421392, "loss": 0.9583, "step": 25485 }, { "epoch": 0.6544090152634882, "grad_norm": 0.7578125, "learning_rate": 0.00013637098832178164, "loss": 0.7909, "step": 25486 }, { "epoch": 0.65443469245941, "grad_norm": 0.8359375, "learning_rate": 0.00013636682982686522, "loss": 1.0382, "step": 25487 }, { "epoch": 0.6544603696553318, "grad_norm": 0.7734375, "learning_rate": 0.000136362671259473, "loss": 0.7375, "step": 25488 }, { "epoch": 0.6544860468512536, "grad_norm": 0.78515625, "learning_rate": 0.0001363585126196133, "loss": 0.8039, "step": 25489 }, { "epoch": 0.6545117240471754, "grad_norm": 0.859375, "learning_rate": 0.00013635435390729433, "loss": 0.9324, "step": 25490 }, { "epoch": 0.6545374012430972, "grad_norm": 0.73046875, "learning_rate": 0.00013635019512252443, "loss": 0.8521, "step": 25491 }, { "epoch": 0.6545630784390191, "grad_norm": 0.765625, "learning_rate": 0.00013634603626531188, "loss": 0.9122, "step": 25492 }, { "epoch": 0.6545887556349409, "grad_norm": 0.76171875, "learning_rate": 0.00013634187733566497, "loss": 0.9341, "step": 25493 }, { "epoch": 0.6546144328308627, "grad_norm": 0.82421875, "learning_rate": 0.00013633771833359196, "loss": 0.9222, "step": 25494 }, { "epoch": 0.6546401100267846, "grad_norm": 0.765625, "learning_rate": 0.00013633355925910117, "loss": 0.7926, "step": 25495 }, { "epoch": 0.6546657872227063, "grad_norm": 0.8203125, "learning_rate": 0.0001363294001122009, "loss": 0.777, "step": 25496 }, { "epoch": 0.6546914644186281, "grad_norm": 0.78515625, "learning_rate": 0.00013632524089289937, "loss": 0.9196, "step": 25497 }, { "epoch": 0.65471714161455, "grad_norm": 0.81640625, "learning_rate": 0.00013632108160120497, "loss": 0.9141, "step": 25498 }, { "epoch": 0.6547428188104718, "grad_norm": 0.78515625, "learning_rate": 0.0001363169222371259, "loss": 0.9346, "step": 25499 }, { "epoch": 0.6547684960063936, "grad_norm": 0.7734375, "learning_rate": 0.00013631276280067048, "loss": 0.8939, "step": 25500 }, { "epoch": 0.6547941732023155, "grad_norm": 0.7734375, "learning_rate": 0.00013630860329184703, "loss": 0.9627, "step": 25501 }, { "epoch": 0.6548198503982373, "grad_norm": 0.73046875, "learning_rate": 0.0001363044437106638, "loss": 0.7876, "step": 25502 }, { "epoch": 0.654845527594159, "grad_norm": 0.83984375, "learning_rate": 0.0001363002840571291, "loss": 0.8848, "step": 25503 }, { "epoch": 0.6548712047900809, "grad_norm": 0.74609375, "learning_rate": 0.00013629612433125118, "loss": 0.8252, "step": 25504 }, { "epoch": 0.6548968819860027, "grad_norm": 0.7890625, "learning_rate": 0.00013629196453303837, "loss": 0.9021, "step": 25505 }, { "epoch": 0.6549225591819245, "grad_norm": 0.77734375, "learning_rate": 0.000136287804662499, "loss": 0.7657, "step": 25506 }, { "epoch": 0.6549482363778464, "grad_norm": 0.79296875, "learning_rate": 0.0001362836447196413, "loss": 0.856, "step": 25507 }, { "epoch": 0.6549739135737682, "grad_norm": 0.76953125, "learning_rate": 0.00013627948470447353, "loss": 0.7844, "step": 25508 }, { "epoch": 0.6549995907696899, "grad_norm": 0.78515625, "learning_rate": 0.00013627532461700406, "loss": 0.8667, "step": 25509 }, { "epoch": 0.6550252679656118, "grad_norm": 0.76953125, "learning_rate": 0.00013627116445724113, "loss": 0.8562, "step": 25510 }, { "epoch": 0.6550509451615336, "grad_norm": 0.80078125, "learning_rate": 0.00013626700422519306, "loss": 0.8307, "step": 25511 }, { "epoch": 0.6550766223574555, "grad_norm": 0.72265625, "learning_rate": 0.0001362628439208681, "loss": 0.8399, "step": 25512 }, { "epoch": 0.6551022995533773, "grad_norm": 0.7890625, "learning_rate": 0.00013625868354427462, "loss": 0.7886, "step": 25513 }, { "epoch": 0.6551279767492991, "grad_norm": 0.87890625, "learning_rate": 0.0001362545230954208, "loss": 0.6971, "step": 25514 }, { "epoch": 0.655153653945221, "grad_norm": 0.79296875, "learning_rate": 0.00013625036257431503, "loss": 0.684, "step": 25515 }, { "epoch": 0.6551793311411427, "grad_norm": 0.796875, "learning_rate": 0.00013624620198096556, "loss": 0.849, "step": 25516 }, { "epoch": 0.6552050083370645, "grad_norm": 1.09375, "learning_rate": 0.00013624204131538066, "loss": 0.8005, "step": 25517 }, { "epoch": 0.6552306855329864, "grad_norm": 0.796875, "learning_rate": 0.00013623788057756864, "loss": 0.7337, "step": 25518 }, { "epoch": 0.6552563627289082, "grad_norm": 0.82421875, "learning_rate": 0.00013623371976753784, "loss": 0.7276, "step": 25519 }, { "epoch": 0.65528203992483, "grad_norm": 0.8203125, "learning_rate": 0.00013622955888529647, "loss": 0.8791, "step": 25520 }, { "epoch": 0.6553077171207519, "grad_norm": 0.8046875, "learning_rate": 0.0001362253979308529, "loss": 0.9121, "step": 25521 }, { "epoch": 0.6553333943166737, "grad_norm": 0.76953125, "learning_rate": 0.00013622123690421536, "loss": 0.7913, "step": 25522 }, { "epoch": 0.6553590715125954, "grad_norm": 0.7890625, "learning_rate": 0.00013621707580539213, "loss": 0.902, "step": 25523 }, { "epoch": 0.6553847487085173, "grad_norm": 0.83203125, "learning_rate": 0.00013621291463439158, "loss": 0.7166, "step": 25524 }, { "epoch": 0.6554104259044391, "grad_norm": 0.83203125, "learning_rate": 0.00013620875339122196, "loss": 0.8008, "step": 25525 }, { "epoch": 0.6554361031003609, "grad_norm": 0.80859375, "learning_rate": 0.00013620459207589158, "loss": 0.8952, "step": 25526 }, { "epoch": 0.6554617802962828, "grad_norm": 0.765625, "learning_rate": 0.00013620043068840871, "loss": 0.6721, "step": 25527 }, { "epoch": 0.6554874574922046, "grad_norm": 0.76953125, "learning_rate": 0.00013619626922878162, "loss": 0.897, "step": 25528 }, { "epoch": 0.6555131346881263, "grad_norm": 0.74609375, "learning_rate": 0.00013619210769701866, "loss": 0.8233, "step": 25529 }, { "epoch": 0.6555388118840482, "grad_norm": 0.87890625, "learning_rate": 0.00013618794609312812, "loss": 0.92, "step": 25530 }, { "epoch": 0.65556448907997, "grad_norm": 0.7890625, "learning_rate": 0.00013618378441711824, "loss": 0.8461, "step": 25531 }, { "epoch": 0.6555901662758918, "grad_norm": 1.171875, "learning_rate": 0.00013617962266899736, "loss": 0.8406, "step": 25532 }, { "epoch": 0.6556158434718137, "grad_norm": 0.84375, "learning_rate": 0.00013617546084877374, "loss": 0.7566, "step": 25533 }, { "epoch": 0.6556415206677355, "grad_norm": 0.8203125, "learning_rate": 0.0001361712989564557, "loss": 0.8248, "step": 25534 }, { "epoch": 0.6556671978636573, "grad_norm": 0.69921875, "learning_rate": 0.00013616713699205155, "loss": 0.8019, "step": 25535 }, { "epoch": 0.6556928750595791, "grad_norm": 0.765625, "learning_rate": 0.00013616297495556954, "loss": 0.8019, "step": 25536 }, { "epoch": 0.6557185522555009, "grad_norm": 0.703125, "learning_rate": 0.000136158812847018, "loss": 0.6367, "step": 25537 }, { "epoch": 0.6557442294514227, "grad_norm": 0.80859375, "learning_rate": 0.00013615465066640523, "loss": 0.8964, "step": 25538 }, { "epoch": 0.6557699066473446, "grad_norm": 0.7109375, "learning_rate": 0.00013615048841373947, "loss": 0.8376, "step": 25539 }, { "epoch": 0.6557955838432664, "grad_norm": 0.69921875, "learning_rate": 0.0001361463260890291, "loss": 0.7495, "step": 25540 }, { "epoch": 0.6558212610391883, "grad_norm": 0.69140625, "learning_rate": 0.0001361421636922823, "loss": 0.8548, "step": 25541 }, { "epoch": 0.6558469382351101, "grad_norm": 0.76171875, "learning_rate": 0.0001361380012235075, "loss": 0.9724, "step": 25542 }, { "epoch": 0.6558726154310318, "grad_norm": 0.78515625, "learning_rate": 0.00013613383868271288, "loss": 0.9534, "step": 25543 }, { "epoch": 0.6558982926269536, "grad_norm": 0.84375, "learning_rate": 0.0001361296760699068, "loss": 0.8691, "step": 25544 }, { "epoch": 0.6559239698228755, "grad_norm": 0.8828125, "learning_rate": 0.00013612551338509755, "loss": 0.8515, "step": 25545 }, { "epoch": 0.6559496470187973, "grad_norm": 0.97265625, "learning_rate": 0.0001361213506282934, "loss": 0.9065, "step": 25546 }, { "epoch": 0.6559753242147192, "grad_norm": 0.82421875, "learning_rate": 0.00013611718779950267, "loss": 0.8624, "step": 25547 }, { "epoch": 0.656001001410641, "grad_norm": 0.80859375, "learning_rate": 0.00013611302489873364, "loss": 0.8034, "step": 25548 }, { "epoch": 0.6560266786065627, "grad_norm": 0.7421875, "learning_rate": 0.0001361088619259946, "loss": 0.8043, "step": 25549 }, { "epoch": 0.6560523558024846, "grad_norm": 0.8359375, "learning_rate": 0.00013610469888129386, "loss": 0.8244, "step": 25550 }, { "epoch": 0.6560780329984064, "grad_norm": 0.7734375, "learning_rate": 0.00013610053576463974, "loss": 0.9414, "step": 25551 }, { "epoch": 0.6561037101943282, "grad_norm": 0.7265625, "learning_rate": 0.00013609637257604054, "loss": 0.7471, "step": 25552 }, { "epoch": 0.6561293873902501, "grad_norm": 0.8125, "learning_rate": 0.00013609220931550447, "loss": 0.8064, "step": 25553 }, { "epoch": 0.6561550645861719, "grad_norm": 0.7734375, "learning_rate": 0.00013608804598303994, "loss": 0.8551, "step": 25554 }, { "epoch": 0.6561807417820937, "grad_norm": 0.76953125, "learning_rate": 0.00013608388257865513, "loss": 0.9071, "step": 25555 }, { "epoch": 0.6562064189780155, "grad_norm": 0.78125, "learning_rate": 0.00013607971910235847, "loss": 0.7457, "step": 25556 }, { "epoch": 0.6562320961739373, "grad_norm": 0.765625, "learning_rate": 0.00013607555555415814, "loss": 0.7585, "step": 25557 }, { "epoch": 0.6562577733698591, "grad_norm": 0.796875, "learning_rate": 0.00013607139193406252, "loss": 0.7372, "step": 25558 }, { "epoch": 0.656283450565781, "grad_norm": 0.76171875, "learning_rate": 0.00013606722824207988, "loss": 0.8071, "step": 25559 }, { "epoch": 0.6563091277617028, "grad_norm": 0.82421875, "learning_rate": 0.00013606306447821846, "loss": 0.8588, "step": 25560 }, { "epoch": 0.6563348049576246, "grad_norm": 0.734375, "learning_rate": 0.00013605890064248666, "loss": 0.7999, "step": 25561 }, { "epoch": 0.6563604821535465, "grad_norm": 0.85546875, "learning_rate": 0.0001360547367348927, "loss": 0.8295, "step": 25562 }, { "epoch": 0.6563861593494682, "grad_norm": 0.796875, "learning_rate": 0.0001360505727554449, "loss": 0.9251, "step": 25563 }, { "epoch": 0.65641183654539, "grad_norm": 0.73828125, "learning_rate": 0.00013604640870415157, "loss": 0.7227, "step": 25564 }, { "epoch": 0.6564375137413119, "grad_norm": 0.87890625, "learning_rate": 0.00013604224458102103, "loss": 0.8035, "step": 25565 }, { "epoch": 0.6564631909372337, "grad_norm": 0.8671875, "learning_rate": 0.00013603808038606155, "loss": 0.8197, "step": 25566 }, { "epoch": 0.6564888681331555, "grad_norm": 0.80859375, "learning_rate": 0.0001360339161192814, "loss": 0.8398, "step": 25567 }, { "epoch": 0.6565145453290774, "grad_norm": 1.140625, "learning_rate": 0.0001360297517806889, "loss": 0.8974, "step": 25568 }, { "epoch": 0.6565402225249991, "grad_norm": 0.76953125, "learning_rate": 0.00013602558737029242, "loss": 0.899, "step": 25569 }, { "epoch": 0.6565658997209209, "grad_norm": 0.8125, "learning_rate": 0.00013602142288810017, "loss": 0.8617, "step": 25570 }, { "epoch": 0.6565915769168428, "grad_norm": 0.75, "learning_rate": 0.00013601725833412045, "loss": 0.8659, "step": 25571 }, { "epoch": 0.6566172541127646, "grad_norm": 0.8125, "learning_rate": 0.00013601309370836164, "loss": 0.8027, "step": 25572 }, { "epoch": 0.6566429313086865, "grad_norm": 0.765625, "learning_rate": 0.00013600892901083193, "loss": 0.786, "step": 25573 }, { "epoch": 0.6566686085046083, "grad_norm": 0.80859375, "learning_rate": 0.00013600476424153972, "loss": 0.8731, "step": 25574 }, { "epoch": 0.6566942857005301, "grad_norm": 0.8203125, "learning_rate": 0.00013600059940049327, "loss": 0.8126, "step": 25575 }, { "epoch": 0.6567199628964518, "grad_norm": 0.70703125, "learning_rate": 0.0001359964344877008, "loss": 0.7638, "step": 25576 }, { "epoch": 0.6567456400923737, "grad_norm": 0.81640625, "learning_rate": 0.00013599226950317077, "loss": 0.9317, "step": 25577 }, { "epoch": 0.6567713172882955, "grad_norm": 0.734375, "learning_rate": 0.00013598810444691138, "loss": 0.7859, "step": 25578 }, { "epoch": 0.6567969944842174, "grad_norm": 0.7265625, "learning_rate": 0.00013598393931893093, "loss": 0.7665, "step": 25579 }, { "epoch": 0.6568226716801392, "grad_norm": 0.8203125, "learning_rate": 0.00013597977411923778, "loss": 0.8678, "step": 25580 }, { "epoch": 0.656848348876061, "grad_norm": 0.796875, "learning_rate": 0.00013597560884784016, "loss": 0.7489, "step": 25581 }, { "epoch": 0.6568740260719829, "grad_norm": 0.796875, "learning_rate": 0.00013597144350474639, "loss": 0.7766, "step": 25582 }, { "epoch": 0.6568997032679046, "grad_norm": 0.8359375, "learning_rate": 0.00013596727808996481, "loss": 0.7766, "step": 25583 }, { "epoch": 0.6569253804638264, "grad_norm": 0.83203125, "learning_rate": 0.00013596311260350366, "loss": 0.8359, "step": 25584 }, { "epoch": 0.6569510576597483, "grad_norm": 0.8359375, "learning_rate": 0.0001359589470453713, "loss": 0.9078, "step": 25585 }, { "epoch": 0.6569767348556701, "grad_norm": 0.7734375, "learning_rate": 0.000135954781415576, "loss": 0.7915, "step": 25586 }, { "epoch": 0.6570024120515919, "grad_norm": 0.77734375, "learning_rate": 0.00013595061571412609, "loss": 0.9107, "step": 25587 }, { "epoch": 0.6570280892475138, "grad_norm": 0.76953125, "learning_rate": 0.00013594644994102982, "loss": 0.8064, "step": 25588 }, { "epoch": 0.6570537664434355, "grad_norm": 0.70703125, "learning_rate": 0.00013594228409629551, "loss": 0.865, "step": 25589 }, { "epoch": 0.6570794436393573, "grad_norm": 0.78515625, "learning_rate": 0.00013593811817993152, "loss": 0.736, "step": 25590 }, { "epoch": 0.6571051208352792, "grad_norm": 0.70703125, "learning_rate": 0.00013593395219194611, "loss": 0.827, "step": 25591 }, { "epoch": 0.657130798031201, "grad_norm": 0.76171875, "learning_rate": 0.00013592978613234753, "loss": 0.8767, "step": 25592 }, { "epoch": 0.6571564752271228, "grad_norm": 0.78515625, "learning_rate": 0.00013592562000114416, "loss": 0.8366, "step": 25593 }, { "epoch": 0.6571821524230447, "grad_norm": 1.2421875, "learning_rate": 0.00013592145379834425, "loss": 0.7554, "step": 25594 }, { "epoch": 0.6572078296189665, "grad_norm": 0.796875, "learning_rate": 0.00013591728752395616, "loss": 0.8209, "step": 25595 }, { "epoch": 0.6572335068148882, "grad_norm": 0.79296875, "learning_rate": 0.00013591312117798815, "loss": 0.861, "step": 25596 }, { "epoch": 0.6572591840108101, "grad_norm": 0.76953125, "learning_rate": 0.0001359089547604485, "loss": 0.7638, "step": 25597 }, { "epoch": 0.6572848612067319, "grad_norm": 0.765625, "learning_rate": 0.0001359047882713456, "loss": 0.8931, "step": 25598 }, { "epoch": 0.6573105384026537, "grad_norm": 0.796875, "learning_rate": 0.0001359006217106877, "loss": 0.8482, "step": 25599 }, { "epoch": 0.6573362155985756, "grad_norm": 0.8125, "learning_rate": 0.00013589645507848305, "loss": 0.8979, "step": 25600 }, { "epoch": 0.6573618927944974, "grad_norm": 0.82421875, "learning_rate": 0.00013589228837474005, "loss": 0.851, "step": 25601 }, { "epoch": 0.6573875699904193, "grad_norm": 0.78515625, "learning_rate": 0.00013588812159946696, "loss": 0.7947, "step": 25602 }, { "epoch": 0.657413247186341, "grad_norm": 0.78125, "learning_rate": 0.00013588395475267205, "loss": 0.9239, "step": 25603 }, { "epoch": 0.6574389243822628, "grad_norm": 0.76171875, "learning_rate": 0.0001358797878343637, "loss": 0.8734, "step": 25604 }, { "epoch": 0.6574646015781846, "grad_norm": 0.78125, "learning_rate": 0.00013587562084455018, "loss": 0.8259, "step": 25605 }, { "epoch": 0.6574902787741065, "grad_norm": 0.81640625, "learning_rate": 0.00013587145378323974, "loss": 0.8445, "step": 25606 }, { "epoch": 0.6575159559700283, "grad_norm": 0.69140625, "learning_rate": 0.0001358672866504408, "loss": 0.9848, "step": 25607 }, { "epoch": 0.6575416331659502, "grad_norm": 0.8125, "learning_rate": 0.00013586311944616152, "loss": 0.8696, "step": 25608 }, { "epoch": 0.6575673103618719, "grad_norm": 0.78125, "learning_rate": 0.00013585895217041035, "loss": 0.7143, "step": 25609 }, { "epoch": 0.6575929875577937, "grad_norm": 0.7734375, "learning_rate": 0.0001358547848231955, "loss": 0.8156, "step": 25610 }, { "epoch": 0.6576186647537156, "grad_norm": 0.75, "learning_rate": 0.0001358506174045253, "loss": 0.8822, "step": 25611 }, { "epoch": 0.6576443419496374, "grad_norm": 0.77734375, "learning_rate": 0.00013584644991440804, "loss": 0.9456, "step": 25612 }, { "epoch": 0.6576700191455592, "grad_norm": 0.80859375, "learning_rate": 0.00013584228235285207, "loss": 0.8427, "step": 25613 }, { "epoch": 0.6576956963414811, "grad_norm": 0.765625, "learning_rate": 0.00013583811471986566, "loss": 0.7437, "step": 25614 }, { "epoch": 0.6577213735374029, "grad_norm": 0.74609375, "learning_rate": 0.00013583394701545713, "loss": 0.9078, "step": 25615 }, { "epoch": 0.6577470507333246, "grad_norm": 0.7734375, "learning_rate": 0.00013582977923963475, "loss": 0.7373, "step": 25616 }, { "epoch": 0.6577727279292465, "grad_norm": 0.78125, "learning_rate": 0.0001358256113924069, "loss": 0.8509, "step": 25617 }, { "epoch": 0.6577984051251683, "grad_norm": 0.71875, "learning_rate": 0.00013582144347378184, "loss": 0.7965, "step": 25618 }, { "epoch": 0.6578240823210901, "grad_norm": 0.90625, "learning_rate": 0.00013581727548376787, "loss": 0.9578, "step": 25619 }, { "epoch": 0.657849759517012, "grad_norm": 0.796875, "learning_rate": 0.00013581310742237328, "loss": 0.8443, "step": 25620 }, { "epoch": 0.6578754367129338, "grad_norm": 0.8359375, "learning_rate": 0.00013580893928960642, "loss": 1.0388, "step": 25621 }, { "epoch": 0.6579011139088555, "grad_norm": 0.77734375, "learning_rate": 0.00013580477108547558, "loss": 0.8751, "step": 25622 }, { "epoch": 0.6579267911047774, "grad_norm": 0.73828125, "learning_rate": 0.00013580060280998907, "loss": 0.7617, "step": 25623 }, { "epoch": 0.6579524683006992, "grad_norm": 0.8046875, "learning_rate": 0.0001357964344631552, "loss": 0.8926, "step": 25624 }, { "epoch": 0.657978145496621, "grad_norm": 0.72265625, "learning_rate": 0.00013579226604498224, "loss": 0.8376, "step": 25625 }, { "epoch": 0.6580038226925429, "grad_norm": 0.796875, "learning_rate": 0.00013578809755547856, "loss": 0.8405, "step": 25626 }, { "epoch": 0.6580294998884647, "grad_norm": 0.765625, "learning_rate": 0.0001357839289946524, "loss": 0.8423, "step": 25627 }, { "epoch": 0.6580551770843865, "grad_norm": 0.8125, "learning_rate": 0.00013577976036251212, "loss": 0.9177, "step": 25628 }, { "epoch": 0.6580808542803083, "grad_norm": 0.7421875, "learning_rate": 0.00013577559165906599, "loss": 0.7035, "step": 25629 }, { "epoch": 0.6581065314762301, "grad_norm": 0.74609375, "learning_rate": 0.00013577142288432237, "loss": 0.8574, "step": 25630 }, { "epoch": 0.6581322086721519, "grad_norm": 0.80859375, "learning_rate": 0.00013576725403828953, "loss": 0.9205, "step": 25631 }, { "epoch": 0.6581578858680738, "grad_norm": 0.8125, "learning_rate": 0.0001357630851209758, "loss": 0.8618, "step": 25632 }, { "epoch": 0.6581835630639956, "grad_norm": 0.8125, "learning_rate": 0.00013575891613238942, "loss": 0.9491, "step": 25633 }, { "epoch": 0.6582092402599174, "grad_norm": 0.76953125, "learning_rate": 0.00013575474707253876, "loss": 0.8183, "step": 25634 }, { "epoch": 0.6582349174558393, "grad_norm": 0.83984375, "learning_rate": 0.00013575057794143214, "loss": 0.9315, "step": 25635 }, { "epoch": 0.658260594651761, "grad_norm": 0.79296875, "learning_rate": 0.00013574640873907786, "loss": 0.7968, "step": 25636 }, { "epoch": 0.6582862718476828, "grad_norm": 0.7734375, "learning_rate": 0.0001357422394654842, "loss": 0.9135, "step": 25637 }, { "epoch": 0.6583119490436047, "grad_norm": 0.79296875, "learning_rate": 0.00013573807012065952, "loss": 0.9402, "step": 25638 }, { "epoch": 0.6583376262395265, "grad_norm": 0.7578125, "learning_rate": 0.00013573390070461205, "loss": 0.8787, "step": 25639 }, { "epoch": 0.6583633034354484, "grad_norm": 0.828125, "learning_rate": 0.00013572973121735014, "loss": 0.7565, "step": 25640 }, { "epoch": 0.6583889806313702, "grad_norm": 0.71484375, "learning_rate": 0.00013572556165888212, "loss": 0.7647, "step": 25641 }, { "epoch": 0.6584146578272919, "grad_norm": 0.84375, "learning_rate": 0.00013572139202921627, "loss": 0.8588, "step": 25642 }, { "epoch": 0.6584403350232138, "grad_norm": 0.859375, "learning_rate": 0.00013571722232836092, "loss": 0.788, "step": 25643 }, { "epoch": 0.6584660122191356, "grad_norm": 0.78515625, "learning_rate": 0.0001357130525563244, "loss": 0.9333, "step": 25644 }, { "epoch": 0.6584916894150574, "grad_norm": 0.8203125, "learning_rate": 0.00013570888271311496, "loss": 0.8321, "step": 25645 }, { "epoch": 0.6585173666109793, "grad_norm": 0.80078125, "learning_rate": 0.00013570471279874096, "loss": 0.8535, "step": 25646 }, { "epoch": 0.6585430438069011, "grad_norm": 0.85546875, "learning_rate": 0.00013570054281321069, "loss": 0.9919, "step": 25647 }, { "epoch": 0.6585687210028229, "grad_norm": 0.78125, "learning_rate": 0.0001356963727565324, "loss": 0.9916, "step": 25648 }, { "epoch": 0.6585943981987447, "grad_norm": 0.7265625, "learning_rate": 0.00013569220262871454, "loss": 0.8211, "step": 25649 }, { "epoch": 0.6586200753946665, "grad_norm": 0.796875, "learning_rate": 0.00013568803242976533, "loss": 0.7977, "step": 25650 }, { "epoch": 0.6586457525905883, "grad_norm": 0.81640625, "learning_rate": 0.00013568386215969308, "loss": 0.9089, "step": 25651 }, { "epoch": 0.6586714297865102, "grad_norm": 0.71484375, "learning_rate": 0.00013567969181850614, "loss": 0.8681, "step": 25652 }, { "epoch": 0.658697106982432, "grad_norm": 0.75390625, "learning_rate": 0.00013567552140621278, "loss": 0.8417, "step": 25653 }, { "epoch": 0.6587227841783538, "grad_norm": 0.734375, "learning_rate": 0.00013567135092282132, "loss": 0.7049, "step": 25654 }, { "epoch": 0.6587484613742757, "grad_norm": 0.796875, "learning_rate": 0.0001356671803683401, "loss": 0.9245, "step": 25655 }, { "epoch": 0.6587741385701974, "grad_norm": 0.953125, "learning_rate": 0.0001356630097427774, "loss": 0.8029, "step": 25656 }, { "epoch": 0.6587998157661192, "grad_norm": 0.74609375, "learning_rate": 0.00013565883904614157, "loss": 0.7321, "step": 25657 }, { "epoch": 0.6588254929620411, "grad_norm": 0.75390625, "learning_rate": 0.00013565466827844083, "loss": 0.8703, "step": 25658 }, { "epoch": 0.6588511701579629, "grad_norm": 0.84765625, "learning_rate": 0.0001356504974396836, "loss": 0.92, "step": 25659 }, { "epoch": 0.6588768473538847, "grad_norm": 0.80078125, "learning_rate": 0.00013564632652987815, "loss": 0.7837, "step": 25660 }, { "epoch": 0.6589025245498066, "grad_norm": 0.8359375, "learning_rate": 0.00013564215554903277, "loss": 0.8987, "step": 25661 }, { "epoch": 0.6589282017457283, "grad_norm": 0.71875, "learning_rate": 0.0001356379844971558, "loss": 0.9242, "step": 25662 }, { "epoch": 0.6589538789416501, "grad_norm": 0.8046875, "learning_rate": 0.00013563381337425558, "loss": 1.0749, "step": 25663 }, { "epoch": 0.658979556137572, "grad_norm": 0.8125, "learning_rate": 0.00013562964218034036, "loss": 0.9238, "step": 25664 }, { "epoch": 0.6590052333334938, "grad_norm": 0.75390625, "learning_rate": 0.00013562547091541848, "loss": 0.7948, "step": 25665 }, { "epoch": 0.6590309105294156, "grad_norm": 0.87109375, "learning_rate": 0.00013562129957949825, "loss": 0.7906, "step": 25666 }, { "epoch": 0.6590565877253375, "grad_norm": 0.72265625, "learning_rate": 0.000135617128172588, "loss": 0.7191, "step": 25667 }, { "epoch": 0.6590822649212593, "grad_norm": 0.75, "learning_rate": 0.00013561295669469603, "loss": 0.8556, "step": 25668 }, { "epoch": 0.659107942117181, "grad_norm": 0.76171875, "learning_rate": 0.00013560878514583065, "loss": 0.9281, "step": 25669 }, { "epoch": 0.6591336193131029, "grad_norm": 0.828125, "learning_rate": 0.0001356046135260002, "loss": 0.8755, "step": 25670 }, { "epoch": 0.6591592965090247, "grad_norm": 0.828125, "learning_rate": 0.00013560044183521294, "loss": 0.8529, "step": 25671 }, { "epoch": 0.6591849737049466, "grad_norm": 0.75, "learning_rate": 0.0001355962700734772, "loss": 0.7357, "step": 25672 }, { "epoch": 0.6592106509008684, "grad_norm": 0.78125, "learning_rate": 0.00013559209824080137, "loss": 0.9173, "step": 25673 }, { "epoch": 0.6592363280967902, "grad_norm": 0.7421875, "learning_rate": 0.00013558792633719367, "loss": 0.7893, "step": 25674 }, { "epoch": 0.6592620052927121, "grad_norm": 0.796875, "learning_rate": 0.00013558375436266244, "loss": 0.9103, "step": 25675 }, { "epoch": 0.6592876824886338, "grad_norm": 0.796875, "learning_rate": 0.000135579582317216, "loss": 0.9198, "step": 25676 }, { "epoch": 0.6593133596845556, "grad_norm": 0.76171875, "learning_rate": 0.00013557541020086267, "loss": 0.9461, "step": 25677 }, { "epoch": 0.6593390368804775, "grad_norm": 0.75390625, "learning_rate": 0.00013557123801361077, "loss": 0.8916, "step": 25678 }, { "epoch": 0.6593647140763993, "grad_norm": 1.0859375, "learning_rate": 0.00013556706575546863, "loss": 0.979, "step": 25679 }, { "epoch": 0.6593903912723211, "grad_norm": 0.81640625, "learning_rate": 0.0001355628934264445, "loss": 0.7707, "step": 25680 }, { "epoch": 0.659416068468243, "grad_norm": 0.84375, "learning_rate": 0.00013555872102654674, "loss": 0.908, "step": 25681 }, { "epoch": 0.6594417456641647, "grad_norm": 0.73046875, "learning_rate": 0.00013555454855578364, "loss": 0.7824, "step": 25682 }, { "epoch": 0.6594674228600865, "grad_norm": 0.78515625, "learning_rate": 0.0001355503760141636, "loss": 0.811, "step": 25683 }, { "epoch": 0.6594931000560084, "grad_norm": 0.828125, "learning_rate": 0.00013554620340169483, "loss": 0.8659, "step": 25684 }, { "epoch": 0.6595187772519302, "grad_norm": 0.7734375, "learning_rate": 0.0001355420307183857, "loss": 0.7821, "step": 25685 }, { "epoch": 0.659544454447852, "grad_norm": 0.7734375, "learning_rate": 0.0001355378579642445, "loss": 0.8256, "step": 25686 }, { "epoch": 0.6595701316437739, "grad_norm": 0.83203125, "learning_rate": 0.00013553368513927958, "loss": 0.8457, "step": 25687 }, { "epoch": 0.6595958088396957, "grad_norm": 0.7578125, "learning_rate": 0.0001355295122434992, "loss": 0.9246, "step": 25688 }, { "epoch": 0.6596214860356174, "grad_norm": 0.80859375, "learning_rate": 0.00013552533927691174, "loss": 0.8852, "step": 25689 }, { "epoch": 0.6596471632315393, "grad_norm": 0.81640625, "learning_rate": 0.0001355211662395255, "loss": 0.9251, "step": 25690 }, { "epoch": 0.6596728404274611, "grad_norm": 0.71875, "learning_rate": 0.00013551699313134875, "loss": 0.8519, "step": 25691 }, { "epoch": 0.6596985176233829, "grad_norm": 0.7890625, "learning_rate": 0.00013551281995238988, "loss": 0.8813, "step": 25692 }, { "epoch": 0.6597241948193048, "grad_norm": 0.83984375, "learning_rate": 0.00013550864670265711, "loss": 0.8083, "step": 25693 }, { "epoch": 0.6597498720152266, "grad_norm": 0.78515625, "learning_rate": 0.00013550447338215885, "loss": 0.7517, "step": 25694 }, { "epoch": 0.6597755492111484, "grad_norm": 0.8125, "learning_rate": 0.0001355002999909034, "loss": 0.8995, "step": 25695 }, { "epoch": 0.6598012264070702, "grad_norm": 0.7578125, "learning_rate": 0.00013549612652889902, "loss": 0.8192, "step": 25696 }, { "epoch": 0.659826903602992, "grad_norm": 0.74609375, "learning_rate": 0.0001354919529961541, "loss": 0.903, "step": 25697 }, { "epoch": 0.6598525807989138, "grad_norm": 0.83203125, "learning_rate": 0.0001354877793926769, "loss": 0.8829, "step": 25698 }, { "epoch": 0.6598782579948357, "grad_norm": 0.76953125, "learning_rate": 0.00013548360571847575, "loss": 0.8324, "step": 25699 }, { "epoch": 0.6599039351907575, "grad_norm": 0.75, "learning_rate": 0.00013547943197355904, "loss": 0.7717, "step": 25700 }, { "epoch": 0.6599296123866794, "grad_norm": 0.83203125, "learning_rate": 0.00013547525815793497, "loss": 0.8133, "step": 25701 }, { "epoch": 0.6599552895826011, "grad_norm": 0.75390625, "learning_rate": 0.00013547108427161192, "loss": 0.8104, "step": 25702 }, { "epoch": 0.6599809667785229, "grad_norm": 0.74609375, "learning_rate": 0.00013546691031459822, "loss": 0.8196, "step": 25703 }, { "epoch": 0.6600066439744448, "grad_norm": 0.8359375, "learning_rate": 0.00013546273628690217, "loss": 0.8905, "step": 25704 }, { "epoch": 0.6600323211703666, "grad_norm": 0.796875, "learning_rate": 0.0001354585621885321, "loss": 0.9, "step": 25705 }, { "epoch": 0.6600579983662884, "grad_norm": 0.75, "learning_rate": 0.00013545438801949628, "loss": 0.6657, "step": 25706 }, { "epoch": 0.6600836755622103, "grad_norm": 0.859375, "learning_rate": 0.0001354502137798031, "loss": 0.7707, "step": 25707 }, { "epoch": 0.6601093527581321, "grad_norm": 0.83203125, "learning_rate": 0.00013544603946946085, "loss": 0.8718, "step": 25708 }, { "epoch": 0.6601350299540538, "grad_norm": 0.78515625, "learning_rate": 0.0001354418650884778, "loss": 0.9238, "step": 25709 }, { "epoch": 0.6601607071499757, "grad_norm": 0.77734375, "learning_rate": 0.00013543769063686237, "loss": 0.7713, "step": 25710 }, { "epoch": 0.6601863843458975, "grad_norm": 0.8359375, "learning_rate": 0.0001354335161146228, "loss": 0.8273, "step": 25711 }, { "epoch": 0.6602120615418193, "grad_norm": 0.78125, "learning_rate": 0.00013542934152176742, "loss": 0.8279, "step": 25712 }, { "epoch": 0.6602377387377412, "grad_norm": 0.78125, "learning_rate": 0.0001354251668583046, "loss": 0.884, "step": 25713 }, { "epoch": 0.660263415933663, "grad_norm": 0.765625, "learning_rate": 0.00013542099212424257, "loss": 0.7808, "step": 25714 }, { "epoch": 0.6602890931295848, "grad_norm": 0.76171875, "learning_rate": 0.00013541681731958974, "loss": 0.966, "step": 25715 }, { "epoch": 0.6603147703255066, "grad_norm": 0.80859375, "learning_rate": 0.0001354126424443544, "loss": 0.8486, "step": 25716 }, { "epoch": 0.6603404475214284, "grad_norm": 0.75, "learning_rate": 0.00013540846749854484, "loss": 0.7742, "step": 25717 }, { "epoch": 0.6603661247173502, "grad_norm": 0.8046875, "learning_rate": 0.00013540429248216942, "loss": 0.8734, "step": 25718 }, { "epoch": 0.6603918019132721, "grad_norm": 0.79296875, "learning_rate": 0.00013540011739523642, "loss": 0.7164, "step": 25719 }, { "epoch": 0.6604174791091939, "grad_norm": 0.78515625, "learning_rate": 0.0001353959422377542, "loss": 0.9115, "step": 25720 }, { "epoch": 0.6604431563051157, "grad_norm": 0.8203125, "learning_rate": 0.00013539176700973107, "loss": 0.7116, "step": 25721 }, { "epoch": 0.6604688335010375, "grad_norm": 0.7578125, "learning_rate": 0.0001353875917111753, "loss": 0.7923, "step": 25722 }, { "epoch": 0.6604945106969593, "grad_norm": 0.75390625, "learning_rate": 0.00013538341634209534, "loss": 0.7967, "step": 25723 }, { "epoch": 0.6605201878928811, "grad_norm": 0.8671875, "learning_rate": 0.00013537924090249937, "loss": 0.8475, "step": 25724 }, { "epoch": 0.660545865088803, "grad_norm": 0.88671875, "learning_rate": 0.00013537506539239576, "loss": 0.884, "step": 25725 }, { "epoch": 0.6605715422847248, "grad_norm": 0.734375, "learning_rate": 0.00013537088981179288, "loss": 0.8427, "step": 25726 }, { "epoch": 0.6605972194806466, "grad_norm": 0.8203125, "learning_rate": 0.00013536671416069897, "loss": 0.8941, "step": 25727 }, { "epoch": 0.6606228966765685, "grad_norm": 0.7421875, "learning_rate": 0.0001353625384391224, "loss": 0.7128, "step": 25728 }, { "epoch": 0.6606485738724902, "grad_norm": 0.75390625, "learning_rate": 0.0001353583626470715, "loss": 0.6822, "step": 25729 }, { "epoch": 0.660674251068412, "grad_norm": 0.79296875, "learning_rate": 0.00013535418678455458, "loss": 0.972, "step": 25730 }, { "epoch": 0.6606999282643339, "grad_norm": 0.79296875, "learning_rate": 0.00013535001085157998, "loss": 0.8549, "step": 25731 }, { "epoch": 0.6607256054602557, "grad_norm": 0.8125, "learning_rate": 0.00013534583484815595, "loss": 0.7556, "step": 25732 }, { "epoch": 0.6607512826561776, "grad_norm": 0.74609375, "learning_rate": 0.00013534165877429087, "loss": 0.8583, "step": 25733 }, { "epoch": 0.6607769598520994, "grad_norm": 0.7890625, "learning_rate": 0.0001353374826299931, "loss": 1.0131, "step": 25734 }, { "epoch": 0.6608026370480212, "grad_norm": 0.765625, "learning_rate": 0.0001353333064152709, "loss": 0.8174, "step": 25735 }, { "epoch": 0.660828314243943, "grad_norm": 0.77734375, "learning_rate": 0.0001353291301301326, "loss": 0.8458, "step": 25736 }, { "epoch": 0.6608539914398648, "grad_norm": 0.75390625, "learning_rate": 0.00013532495377458655, "loss": 0.7704, "step": 25737 }, { "epoch": 0.6608796686357866, "grad_norm": 0.7421875, "learning_rate": 0.00013532077734864105, "loss": 0.8757, "step": 25738 }, { "epoch": 0.6609053458317085, "grad_norm": 0.75390625, "learning_rate": 0.00013531660085230442, "loss": 0.8956, "step": 25739 }, { "epoch": 0.6609310230276303, "grad_norm": 0.7578125, "learning_rate": 0.00013531242428558503, "loss": 0.8516, "step": 25740 }, { "epoch": 0.6609567002235521, "grad_norm": 0.828125, "learning_rate": 0.00013530824764849112, "loss": 0.9739, "step": 25741 }, { "epoch": 0.6609823774194739, "grad_norm": 0.8125, "learning_rate": 0.0001353040709410311, "loss": 0.8385, "step": 25742 }, { "epoch": 0.6610080546153957, "grad_norm": 0.84375, "learning_rate": 0.00013529989416321322, "loss": 0.8525, "step": 25743 }, { "epoch": 0.6610337318113175, "grad_norm": 0.7734375, "learning_rate": 0.0001352957173150459, "loss": 0.9517, "step": 25744 }, { "epoch": 0.6610594090072394, "grad_norm": 0.8203125, "learning_rate": 0.00013529154039653736, "loss": 0.9762, "step": 25745 }, { "epoch": 0.6610850862031612, "grad_norm": 0.7421875, "learning_rate": 0.00013528736340769594, "loss": 0.7462, "step": 25746 }, { "epoch": 0.661110763399083, "grad_norm": 0.78515625, "learning_rate": 0.00013528318634853004, "loss": 0.8333, "step": 25747 }, { "epoch": 0.6611364405950049, "grad_norm": 0.765625, "learning_rate": 0.00013527900921904792, "loss": 0.8793, "step": 25748 }, { "epoch": 0.6611621177909266, "grad_norm": 0.75390625, "learning_rate": 0.0001352748320192579, "loss": 0.7812, "step": 25749 }, { "epoch": 0.6611877949868484, "grad_norm": 0.75, "learning_rate": 0.00013527065474916837, "loss": 0.7192, "step": 25750 }, { "epoch": 0.6612134721827703, "grad_norm": 0.78515625, "learning_rate": 0.0001352664774087876, "loss": 0.8636, "step": 25751 }, { "epoch": 0.6612391493786921, "grad_norm": 0.74609375, "learning_rate": 0.0001352622999981239, "loss": 0.9525, "step": 25752 }, { "epoch": 0.6612648265746139, "grad_norm": 0.875, "learning_rate": 0.00013525812251718566, "loss": 0.831, "step": 25753 }, { "epoch": 0.6612905037705358, "grad_norm": 0.81640625, "learning_rate": 0.00013525394496598112, "loss": 0.8325, "step": 25754 }, { "epoch": 0.6613161809664576, "grad_norm": 0.74609375, "learning_rate": 0.00013524976734451867, "loss": 0.7839, "step": 25755 }, { "epoch": 0.6613418581623793, "grad_norm": 0.7734375, "learning_rate": 0.00013524558965280665, "loss": 0.8264, "step": 25756 }, { "epoch": 0.6613675353583012, "grad_norm": 1.109375, "learning_rate": 0.00013524141189085332, "loss": 0.8477, "step": 25757 }, { "epoch": 0.661393212554223, "grad_norm": 0.7421875, "learning_rate": 0.00013523723405866705, "loss": 0.8825, "step": 25758 }, { "epoch": 0.6614188897501448, "grad_norm": 0.80859375, "learning_rate": 0.00013523305615625614, "loss": 0.7006, "step": 25759 }, { "epoch": 0.6614445669460667, "grad_norm": 0.85546875, "learning_rate": 0.00013522887818362894, "loss": 0.796, "step": 25760 }, { "epoch": 0.6614702441419885, "grad_norm": 0.78125, "learning_rate": 0.0001352247001407938, "loss": 0.8029, "step": 25761 }, { "epoch": 0.6614959213379102, "grad_norm": 1.09375, "learning_rate": 0.00013522052202775896, "loss": 0.9925, "step": 25762 }, { "epoch": 0.6615215985338321, "grad_norm": 0.8359375, "learning_rate": 0.00013521634384453286, "loss": 0.916, "step": 25763 }, { "epoch": 0.6615472757297539, "grad_norm": 0.7890625, "learning_rate": 0.00013521216559112372, "loss": 0.829, "step": 25764 }, { "epoch": 0.6615729529256758, "grad_norm": 0.734375, "learning_rate": 0.00013520798726753995, "loss": 0.752, "step": 25765 }, { "epoch": 0.6615986301215976, "grad_norm": 0.84765625, "learning_rate": 0.0001352038088737898, "loss": 0.8807, "step": 25766 }, { "epoch": 0.6616243073175194, "grad_norm": 0.765625, "learning_rate": 0.00013519963040988165, "loss": 0.8579, "step": 25767 }, { "epoch": 0.6616499845134413, "grad_norm": 0.78125, "learning_rate": 0.00013519545187582382, "loss": 0.78, "step": 25768 }, { "epoch": 0.661675661709363, "grad_norm": 0.8125, "learning_rate": 0.00013519127327162468, "loss": 0.8542, "step": 25769 }, { "epoch": 0.6617013389052848, "grad_norm": 0.76171875, "learning_rate": 0.00013518709459729245, "loss": 0.9472, "step": 25770 }, { "epoch": 0.6617270161012067, "grad_norm": 0.80078125, "learning_rate": 0.00013518291585283554, "loss": 0.984, "step": 25771 }, { "epoch": 0.6617526932971285, "grad_norm": 0.796875, "learning_rate": 0.00013517873703826228, "loss": 0.9862, "step": 25772 }, { "epoch": 0.6617783704930503, "grad_norm": 0.765625, "learning_rate": 0.00013517455815358092, "loss": 0.8502, "step": 25773 }, { "epoch": 0.6618040476889722, "grad_norm": 0.76953125, "learning_rate": 0.00013517037919879988, "loss": 0.7846, "step": 25774 }, { "epoch": 0.661829724884894, "grad_norm": 0.78515625, "learning_rate": 0.00013516620017392747, "loss": 0.769, "step": 25775 }, { "epoch": 0.6618554020808157, "grad_norm": 0.79296875, "learning_rate": 0.00013516202107897196, "loss": 0.8483, "step": 25776 }, { "epoch": 0.6618810792767376, "grad_norm": 0.86328125, "learning_rate": 0.00013515784191394173, "loss": 0.8713, "step": 25777 }, { "epoch": 0.6619067564726594, "grad_norm": 1.109375, "learning_rate": 0.00013515366267884512, "loss": 0.8186, "step": 25778 }, { "epoch": 0.6619324336685812, "grad_norm": 0.7265625, "learning_rate": 0.0001351494833736904, "loss": 0.7436, "step": 25779 }, { "epoch": 0.6619581108645031, "grad_norm": 0.8359375, "learning_rate": 0.00013514530399848598, "loss": 0.826, "step": 25780 }, { "epoch": 0.6619837880604249, "grad_norm": 0.6875, "learning_rate": 0.0001351411245532401, "loss": 0.722, "step": 25781 }, { "epoch": 0.6620094652563466, "grad_norm": 0.828125, "learning_rate": 0.00013513694503796114, "loss": 0.799, "step": 25782 }, { "epoch": 0.6620351424522685, "grad_norm": 0.71875, "learning_rate": 0.00013513276545265741, "loss": 0.7094, "step": 25783 }, { "epoch": 0.6620608196481903, "grad_norm": 0.796875, "learning_rate": 0.0001351285857973373, "loss": 0.8282, "step": 25784 }, { "epoch": 0.6620864968441121, "grad_norm": 0.73828125, "learning_rate": 0.00013512440607200906, "loss": 0.7689, "step": 25785 }, { "epoch": 0.662112174040034, "grad_norm": 0.7734375, "learning_rate": 0.00013512022627668103, "loss": 0.8808, "step": 25786 }, { "epoch": 0.6621378512359558, "grad_norm": 0.72265625, "learning_rate": 0.0001351160464113616, "loss": 0.8885, "step": 25787 }, { "epoch": 0.6621635284318776, "grad_norm": 0.8046875, "learning_rate": 0.00013511186647605908, "loss": 0.794, "step": 25788 }, { "epoch": 0.6621892056277994, "grad_norm": 0.82421875, "learning_rate": 0.00013510768647078173, "loss": 0.8239, "step": 25789 }, { "epoch": 0.6622148828237212, "grad_norm": 0.703125, "learning_rate": 0.00013510350639553797, "loss": 0.7798, "step": 25790 }, { "epoch": 0.662240560019643, "grad_norm": 0.71875, "learning_rate": 0.00013509932625033606, "loss": 0.7513, "step": 25791 }, { "epoch": 0.6622662372155649, "grad_norm": 0.859375, "learning_rate": 0.00013509514603518436, "loss": 0.8747, "step": 25792 }, { "epoch": 0.6622919144114867, "grad_norm": 0.75, "learning_rate": 0.00013509096575009123, "loss": 0.7926, "step": 25793 }, { "epoch": 0.6623175916074086, "grad_norm": 0.76953125, "learning_rate": 0.00013508678539506496, "loss": 0.8346, "step": 25794 }, { "epoch": 0.6623432688033304, "grad_norm": 0.7890625, "learning_rate": 0.00013508260497011393, "loss": 0.8208, "step": 25795 }, { "epoch": 0.6623689459992521, "grad_norm": 0.74609375, "learning_rate": 0.0001350784244752464, "loss": 0.8652, "step": 25796 }, { "epoch": 0.662394623195174, "grad_norm": 0.828125, "learning_rate": 0.00013507424391047076, "loss": 0.8935, "step": 25797 }, { "epoch": 0.6624203003910958, "grad_norm": 0.8125, "learning_rate": 0.0001350700632757953, "loss": 0.9544, "step": 25798 }, { "epoch": 0.6624459775870176, "grad_norm": 0.8046875, "learning_rate": 0.00013506588257122836, "loss": 0.8308, "step": 25799 }, { "epoch": 0.6624716547829395, "grad_norm": 0.83203125, "learning_rate": 0.00013506170179677834, "loss": 1.0129, "step": 25800 }, { "epoch": 0.6624973319788613, "grad_norm": 0.81640625, "learning_rate": 0.00013505752095245346, "loss": 0.9229, "step": 25801 }, { "epoch": 0.662523009174783, "grad_norm": 0.75390625, "learning_rate": 0.00013505334003826214, "loss": 0.9207, "step": 25802 }, { "epoch": 0.6625486863707049, "grad_norm": 0.78515625, "learning_rate": 0.00013504915905421266, "loss": 0.8027, "step": 25803 }, { "epoch": 0.6625743635666267, "grad_norm": 0.85546875, "learning_rate": 0.00013504497800031337, "loss": 0.7828, "step": 25804 }, { "epoch": 0.6626000407625485, "grad_norm": 0.7734375, "learning_rate": 0.00013504079687657263, "loss": 0.8478, "step": 25805 }, { "epoch": 0.6626257179584704, "grad_norm": 0.83984375, "learning_rate": 0.00013503661568299872, "loss": 0.8814, "step": 25806 }, { "epoch": 0.6626513951543922, "grad_norm": 0.734375, "learning_rate": 0.0001350324344196, "loss": 0.896, "step": 25807 }, { "epoch": 0.662677072350314, "grad_norm": 0.90625, "learning_rate": 0.00013502825308638484, "loss": 0.8947, "step": 25808 }, { "epoch": 0.6627027495462358, "grad_norm": 0.73046875, "learning_rate": 0.0001350240716833615, "loss": 0.7848, "step": 25809 }, { "epoch": 0.6627284267421576, "grad_norm": 0.859375, "learning_rate": 0.00013501989021053837, "loss": 0.7885, "step": 25810 }, { "epoch": 0.6627541039380794, "grad_norm": 0.765625, "learning_rate": 0.00013501570866792374, "loss": 0.9409, "step": 25811 }, { "epoch": 0.6627797811340013, "grad_norm": 0.8125, "learning_rate": 0.00013501152705552597, "loss": 0.8995, "step": 25812 }, { "epoch": 0.6628054583299231, "grad_norm": 0.8203125, "learning_rate": 0.0001350073453733534, "loss": 0.8507, "step": 25813 }, { "epoch": 0.6628311355258449, "grad_norm": 0.78515625, "learning_rate": 0.00013500316362141435, "loss": 0.824, "step": 25814 }, { "epoch": 0.6628568127217668, "grad_norm": 0.7890625, "learning_rate": 0.00013499898179971718, "loss": 0.7345, "step": 25815 }, { "epoch": 0.6628824899176885, "grad_norm": 0.75390625, "learning_rate": 0.00013499479990827014, "loss": 0.8928, "step": 25816 }, { "epoch": 0.6629081671136103, "grad_norm": 0.76171875, "learning_rate": 0.00013499061794708167, "loss": 0.9776, "step": 25817 }, { "epoch": 0.6629338443095322, "grad_norm": 0.8046875, "learning_rate": 0.00013498643591616005, "loss": 0.9846, "step": 25818 }, { "epoch": 0.662959521505454, "grad_norm": 0.875, "learning_rate": 0.00013498225381551363, "loss": 0.8493, "step": 25819 }, { "epoch": 0.6629851987013758, "grad_norm": 0.84765625, "learning_rate": 0.00013497807164515073, "loss": 0.9105, "step": 25820 }, { "epoch": 0.6630108758972977, "grad_norm": 0.80078125, "learning_rate": 0.00013497388940507968, "loss": 0.7653, "step": 25821 }, { "epoch": 0.6630365530932194, "grad_norm": 0.76171875, "learning_rate": 0.00013496970709530887, "loss": 0.8447, "step": 25822 }, { "epoch": 0.6630622302891412, "grad_norm": 0.78515625, "learning_rate": 0.00013496552471584653, "loss": 0.8635, "step": 25823 }, { "epoch": 0.6630879074850631, "grad_norm": 0.765625, "learning_rate": 0.0001349613422667011, "loss": 0.9066, "step": 25824 }, { "epoch": 0.6631135846809849, "grad_norm": 0.84765625, "learning_rate": 0.0001349571597478809, "loss": 0.8167, "step": 25825 }, { "epoch": 0.6631392618769067, "grad_norm": 0.82421875, "learning_rate": 0.00013495297715939418, "loss": 0.8194, "step": 25826 }, { "epoch": 0.6631649390728286, "grad_norm": 0.81640625, "learning_rate": 0.00013494879450124935, "loss": 0.8599, "step": 25827 }, { "epoch": 0.6631906162687504, "grad_norm": 0.80859375, "learning_rate": 0.00013494461177345474, "loss": 0.884, "step": 25828 }, { "epoch": 0.6632162934646721, "grad_norm": 0.77734375, "learning_rate": 0.0001349404289760187, "loss": 0.8485, "step": 25829 }, { "epoch": 0.663241970660594, "grad_norm": 0.80859375, "learning_rate": 0.0001349362461089495, "loss": 0.848, "step": 25830 }, { "epoch": 0.6632676478565158, "grad_norm": 0.7734375, "learning_rate": 0.0001349320631722555, "loss": 0.8748, "step": 25831 }, { "epoch": 0.6632933250524377, "grad_norm": 0.7890625, "learning_rate": 0.00013492788016594508, "loss": 0.8494, "step": 25832 }, { "epoch": 0.6633190022483595, "grad_norm": 0.75390625, "learning_rate": 0.00013492369709002658, "loss": 0.8623, "step": 25833 }, { "epoch": 0.6633446794442813, "grad_norm": 0.84375, "learning_rate": 0.00013491951394450826, "loss": 0.9083, "step": 25834 }, { "epoch": 0.663370356640203, "grad_norm": 0.77734375, "learning_rate": 0.00013491533072939856, "loss": 0.8198, "step": 25835 }, { "epoch": 0.6633960338361249, "grad_norm": 0.796875, "learning_rate": 0.0001349111474447057, "loss": 0.7569, "step": 25836 }, { "epoch": 0.6634217110320467, "grad_norm": 0.83203125, "learning_rate": 0.00013490696409043808, "loss": 0.9288, "step": 25837 }, { "epoch": 0.6634473882279686, "grad_norm": 0.76953125, "learning_rate": 0.00013490278066660406, "loss": 0.7842, "step": 25838 }, { "epoch": 0.6634730654238904, "grad_norm": 0.8125, "learning_rate": 0.00013489859717321194, "loss": 0.7827, "step": 25839 }, { "epoch": 0.6634987426198122, "grad_norm": 0.76953125, "learning_rate": 0.00013489441361027007, "loss": 0.8637, "step": 25840 }, { "epoch": 0.6635244198157341, "grad_norm": 0.75, "learning_rate": 0.00013489022997778682, "loss": 0.9282, "step": 25841 }, { "epoch": 0.6635500970116558, "grad_norm": 0.73046875, "learning_rate": 0.00013488604627577043, "loss": 0.7217, "step": 25842 }, { "epoch": 0.6635757742075776, "grad_norm": 0.7265625, "learning_rate": 0.00013488186250422933, "loss": 0.7066, "step": 25843 }, { "epoch": 0.6636014514034995, "grad_norm": 0.765625, "learning_rate": 0.00013487767866317182, "loss": 0.8645, "step": 25844 }, { "epoch": 0.6636271285994213, "grad_norm": 0.765625, "learning_rate": 0.00013487349475260627, "loss": 0.8147, "step": 25845 }, { "epoch": 0.6636528057953431, "grad_norm": 0.83984375, "learning_rate": 0.000134869310772541, "loss": 0.9545, "step": 25846 }, { "epoch": 0.663678482991265, "grad_norm": 0.75, "learning_rate": 0.0001348651267229843, "loss": 0.8899, "step": 25847 }, { "epoch": 0.6637041601871868, "grad_norm": 0.796875, "learning_rate": 0.0001348609426039446, "loss": 0.8827, "step": 25848 }, { "epoch": 0.6637298373831085, "grad_norm": 0.8515625, "learning_rate": 0.0001348567584154302, "loss": 1.0587, "step": 25849 }, { "epoch": 0.6637555145790304, "grad_norm": 0.80859375, "learning_rate": 0.00013485257415744937, "loss": 0.8487, "step": 25850 }, { "epoch": 0.6637811917749522, "grad_norm": 0.796875, "learning_rate": 0.00013484838983001052, "loss": 0.9043, "step": 25851 }, { "epoch": 0.663806868970874, "grad_norm": 0.82421875, "learning_rate": 0.000134844205433122, "loss": 0.8325, "step": 25852 }, { "epoch": 0.6638325461667959, "grad_norm": 0.7734375, "learning_rate": 0.0001348400209667921, "loss": 0.8508, "step": 25853 }, { "epoch": 0.6638582233627177, "grad_norm": 0.73046875, "learning_rate": 0.0001348358364310292, "loss": 0.8507, "step": 25854 }, { "epoch": 0.6638839005586394, "grad_norm": 0.8125, "learning_rate": 0.00013483165182584164, "loss": 0.961, "step": 25855 }, { "epoch": 0.6639095777545613, "grad_norm": 0.76953125, "learning_rate": 0.00013482746715123774, "loss": 0.8525, "step": 25856 }, { "epoch": 0.6639352549504831, "grad_norm": 0.77734375, "learning_rate": 0.00013482328240722584, "loss": 0.9089, "step": 25857 }, { "epoch": 0.663960932146405, "grad_norm": 0.84375, "learning_rate": 0.00013481909759381427, "loss": 0.9026, "step": 25858 }, { "epoch": 0.6639866093423268, "grad_norm": 0.7890625, "learning_rate": 0.0001348149127110114, "loss": 0.8321, "step": 25859 }, { "epoch": 0.6640122865382486, "grad_norm": 0.84375, "learning_rate": 0.00013481072775882555, "loss": 0.83, "step": 25860 }, { "epoch": 0.6640379637341705, "grad_norm": 0.70703125, "learning_rate": 0.00013480654273726508, "loss": 0.7562, "step": 25861 }, { "epoch": 0.6640636409300922, "grad_norm": 0.734375, "learning_rate": 0.00013480235764633827, "loss": 0.805, "step": 25862 }, { "epoch": 0.664089318126014, "grad_norm": 0.734375, "learning_rate": 0.00013479817248605354, "loss": 0.9422, "step": 25863 }, { "epoch": 0.6641149953219359, "grad_norm": 0.7890625, "learning_rate": 0.0001347939872564192, "loss": 0.7428, "step": 25864 }, { "epoch": 0.6641406725178577, "grad_norm": 0.72265625, "learning_rate": 0.00013478980195744355, "loss": 0.8752, "step": 25865 }, { "epoch": 0.6641663497137795, "grad_norm": 0.828125, "learning_rate": 0.00013478561658913498, "loss": 0.854, "step": 25866 }, { "epoch": 0.6641920269097014, "grad_norm": 0.81640625, "learning_rate": 0.00013478143115150184, "loss": 0.9429, "step": 25867 }, { "epoch": 0.6642177041056232, "grad_norm": 0.7421875, "learning_rate": 0.00013477724564455243, "loss": 0.822, "step": 25868 }, { "epoch": 0.6642433813015449, "grad_norm": 0.72265625, "learning_rate": 0.00013477306006829512, "loss": 0.6943, "step": 25869 }, { "epoch": 0.6642690584974668, "grad_norm": 0.6875, "learning_rate": 0.00013476887442273825, "loss": 0.8764, "step": 25870 }, { "epoch": 0.6642947356933886, "grad_norm": 0.82421875, "learning_rate": 0.00013476468870789011, "loss": 0.7987, "step": 25871 }, { "epoch": 0.6643204128893104, "grad_norm": 0.71875, "learning_rate": 0.00013476050292375913, "loss": 0.8248, "step": 25872 }, { "epoch": 0.6643460900852323, "grad_norm": 0.72265625, "learning_rate": 0.0001347563170703536, "loss": 0.7467, "step": 25873 }, { "epoch": 0.6643717672811541, "grad_norm": 0.80078125, "learning_rate": 0.00013475213114768188, "loss": 0.8277, "step": 25874 }, { "epoch": 0.6643974444770758, "grad_norm": 0.734375, "learning_rate": 0.0001347479451557523, "loss": 0.8588, "step": 25875 }, { "epoch": 0.6644231216729977, "grad_norm": 0.76171875, "learning_rate": 0.00013474375909457316, "loss": 0.8636, "step": 25876 }, { "epoch": 0.6644487988689195, "grad_norm": 0.75390625, "learning_rate": 0.00013473957296415291, "loss": 0.905, "step": 25877 }, { "epoch": 0.6644744760648413, "grad_norm": 0.84375, "learning_rate": 0.0001347353867644998, "loss": 0.8017, "step": 25878 }, { "epoch": 0.6645001532607632, "grad_norm": 0.80859375, "learning_rate": 0.0001347312004956222, "loss": 0.8788, "step": 25879 }, { "epoch": 0.664525830456685, "grad_norm": 0.765625, "learning_rate": 0.00013472701415752843, "loss": 0.8126, "step": 25880 }, { "epoch": 0.6645515076526068, "grad_norm": 0.8203125, "learning_rate": 0.0001347228277502269, "loss": 0.8781, "step": 25881 }, { "epoch": 0.6645771848485286, "grad_norm": 0.7890625, "learning_rate": 0.0001347186412737259, "loss": 0.7736, "step": 25882 }, { "epoch": 0.6646028620444504, "grad_norm": 0.76953125, "learning_rate": 0.0001347144547280338, "loss": 0.9665, "step": 25883 }, { "epoch": 0.6646285392403722, "grad_norm": 0.7734375, "learning_rate": 0.0001347102681131589, "loss": 0.8136, "step": 25884 }, { "epoch": 0.6646542164362941, "grad_norm": 0.7109375, "learning_rate": 0.00013470608142910953, "loss": 0.8221, "step": 25885 }, { "epoch": 0.6646798936322159, "grad_norm": 0.8203125, "learning_rate": 0.00013470189467589414, "loss": 0.9821, "step": 25886 }, { "epoch": 0.6647055708281377, "grad_norm": 0.8359375, "learning_rate": 0.000134697707853521, "loss": 0.9764, "step": 25887 }, { "epoch": 0.6647312480240596, "grad_norm": 0.77734375, "learning_rate": 0.00013469352096199844, "loss": 0.8837, "step": 25888 }, { "epoch": 0.6647569252199813, "grad_norm": 0.71875, "learning_rate": 0.00013468933400133487, "loss": 0.7343, "step": 25889 }, { "epoch": 0.6647826024159031, "grad_norm": 0.7890625, "learning_rate": 0.00013468514697153854, "loss": 0.8306, "step": 25890 }, { "epoch": 0.664808279611825, "grad_norm": 0.7890625, "learning_rate": 0.00013468095987261786, "loss": 0.781, "step": 25891 }, { "epoch": 0.6648339568077468, "grad_norm": 0.79296875, "learning_rate": 0.00013467677270458114, "loss": 0.8499, "step": 25892 }, { "epoch": 0.6648596340036687, "grad_norm": 0.77734375, "learning_rate": 0.0001346725854674368, "loss": 0.8351, "step": 25893 }, { "epoch": 0.6648853111995905, "grad_norm": 0.75390625, "learning_rate": 0.0001346683981611931, "loss": 0.7862, "step": 25894 }, { "epoch": 0.6649109883955122, "grad_norm": 0.83203125, "learning_rate": 0.0001346642107858584, "loss": 0.747, "step": 25895 }, { "epoch": 0.664936665591434, "grad_norm": 0.84375, "learning_rate": 0.00013466002334144107, "loss": 0.8638, "step": 25896 }, { "epoch": 0.6649623427873559, "grad_norm": 0.81640625, "learning_rate": 0.00013465583582794946, "loss": 0.9118, "step": 25897 }, { "epoch": 0.6649880199832777, "grad_norm": 0.75390625, "learning_rate": 0.00013465164824539186, "loss": 0.8671, "step": 25898 }, { "epoch": 0.6650136971791996, "grad_norm": 0.76171875, "learning_rate": 0.0001346474605937767, "loss": 0.8652, "step": 25899 }, { "epoch": 0.6650393743751214, "grad_norm": 0.89453125, "learning_rate": 0.00013464327287311228, "loss": 0.8958, "step": 25900 }, { "epoch": 0.6650650515710432, "grad_norm": 0.73046875, "learning_rate": 0.0001346390850834069, "loss": 0.7894, "step": 25901 }, { "epoch": 0.665090728766965, "grad_norm": 0.75, "learning_rate": 0.000134634897224669, "loss": 0.8777, "step": 25902 }, { "epoch": 0.6651164059628868, "grad_norm": 0.84765625, "learning_rate": 0.00013463070929690683, "loss": 0.8025, "step": 25903 }, { "epoch": 0.6651420831588086, "grad_norm": 0.80859375, "learning_rate": 0.00013462652130012882, "loss": 0.7733, "step": 25904 }, { "epoch": 0.6651677603547305, "grad_norm": 0.7265625, "learning_rate": 0.0001346223332343433, "loss": 0.7527, "step": 25905 }, { "epoch": 0.6651934375506523, "grad_norm": 0.796875, "learning_rate": 0.00013461814509955854, "loss": 0.8003, "step": 25906 }, { "epoch": 0.6652191147465741, "grad_norm": 0.69140625, "learning_rate": 0.000134613956895783, "loss": 0.8026, "step": 25907 }, { "epoch": 0.665244791942496, "grad_norm": 0.953125, "learning_rate": 0.00013460976862302493, "loss": 0.8802, "step": 25908 }, { "epoch": 0.6652704691384177, "grad_norm": 0.81640625, "learning_rate": 0.00013460558028129273, "loss": 0.7465, "step": 25909 }, { "epoch": 0.6652961463343395, "grad_norm": 0.828125, "learning_rate": 0.00013460139187059474, "loss": 0.7065, "step": 25910 }, { "epoch": 0.6653218235302614, "grad_norm": 0.81640625, "learning_rate": 0.0001345972033909393, "loss": 0.9346, "step": 25911 }, { "epoch": 0.6653475007261832, "grad_norm": 0.75390625, "learning_rate": 0.00013459301484233476, "loss": 0.948, "step": 25912 }, { "epoch": 0.665373177922105, "grad_norm": 0.75390625, "learning_rate": 0.00013458882622478945, "loss": 0.8321, "step": 25913 }, { "epoch": 0.6653988551180269, "grad_norm": 0.80078125, "learning_rate": 0.00013458463753831176, "loss": 0.7826, "step": 25914 }, { "epoch": 0.6654245323139486, "grad_norm": 0.78515625, "learning_rate": 0.00013458044878291, "loss": 0.8232, "step": 25915 }, { "epoch": 0.6654502095098704, "grad_norm": 0.828125, "learning_rate": 0.00013457625995859251, "loss": 0.8743, "step": 25916 }, { "epoch": 0.6654758867057923, "grad_norm": 1.0546875, "learning_rate": 0.0001345720710653677, "loss": 0.9277, "step": 25917 }, { "epoch": 0.6655015639017141, "grad_norm": 0.9140625, "learning_rate": 0.00013456788210324384, "loss": 0.7684, "step": 25918 }, { "epoch": 0.665527241097636, "grad_norm": 0.73046875, "learning_rate": 0.0001345636930722293, "loss": 0.959, "step": 25919 }, { "epoch": 0.6655529182935578, "grad_norm": 0.765625, "learning_rate": 0.0001345595039723325, "loss": 0.8366, "step": 25920 }, { "epoch": 0.6655785954894796, "grad_norm": 0.72265625, "learning_rate": 0.0001345553148035617, "loss": 0.8473, "step": 25921 }, { "epoch": 0.6656042726854013, "grad_norm": 0.88671875, "learning_rate": 0.00013455112556592526, "loss": 0.8486, "step": 25922 }, { "epoch": 0.6656299498813232, "grad_norm": 0.7890625, "learning_rate": 0.00013454693625943156, "loss": 0.8213, "step": 25923 }, { "epoch": 0.665655627077245, "grad_norm": 0.8203125, "learning_rate": 0.00013454274688408892, "loss": 0.8263, "step": 25924 }, { "epoch": 0.6656813042731669, "grad_norm": 0.796875, "learning_rate": 0.00013453855743990572, "loss": 0.8228, "step": 25925 }, { "epoch": 0.6657069814690887, "grad_norm": 0.8046875, "learning_rate": 0.00013453436792689033, "loss": 0.7956, "step": 25926 }, { "epoch": 0.6657326586650105, "grad_norm": 0.76171875, "learning_rate": 0.00013453017834505105, "loss": 0.8085, "step": 25927 }, { "epoch": 0.6657583358609324, "grad_norm": 0.7890625, "learning_rate": 0.00013452598869439621, "loss": 0.7926, "step": 25928 }, { "epoch": 0.6657840130568541, "grad_norm": 0.75390625, "learning_rate": 0.0001345217989749342, "loss": 0.8449, "step": 25929 }, { "epoch": 0.6658096902527759, "grad_norm": 0.8828125, "learning_rate": 0.00013451760918667337, "loss": 0.8296, "step": 25930 }, { "epoch": 0.6658353674486978, "grad_norm": 0.7109375, "learning_rate": 0.00013451341932962208, "loss": 0.73, "step": 25931 }, { "epoch": 0.6658610446446196, "grad_norm": 0.83203125, "learning_rate": 0.00013450922940378863, "loss": 0.8265, "step": 25932 }, { "epoch": 0.6658867218405414, "grad_norm": 0.921875, "learning_rate": 0.00013450503940918144, "loss": 0.8324, "step": 25933 }, { "epoch": 0.6659123990364633, "grad_norm": 0.8203125, "learning_rate": 0.00013450084934580882, "loss": 0.79, "step": 25934 }, { "epoch": 0.665938076232385, "grad_norm": 0.7734375, "learning_rate": 0.00013449665921367912, "loss": 0.7155, "step": 25935 }, { "epoch": 0.6659637534283068, "grad_norm": 0.8046875, "learning_rate": 0.00013449246901280066, "loss": 0.9473, "step": 25936 }, { "epoch": 0.6659894306242287, "grad_norm": 0.87109375, "learning_rate": 0.00013448827874318187, "loss": 1.0054, "step": 25937 }, { "epoch": 0.6660151078201505, "grad_norm": 0.7578125, "learning_rate": 0.000134484088404831, "loss": 0.8229, "step": 25938 }, { "epoch": 0.6660407850160723, "grad_norm": 0.77734375, "learning_rate": 0.00013447989799775652, "loss": 0.8343, "step": 25939 }, { "epoch": 0.6660664622119942, "grad_norm": 0.80078125, "learning_rate": 0.0001344757075219667, "loss": 0.8612, "step": 25940 }, { "epoch": 0.666092139407916, "grad_norm": 0.7890625, "learning_rate": 0.00013447151697746986, "loss": 0.8849, "step": 25941 }, { "epoch": 0.6661178166038377, "grad_norm": 0.84765625, "learning_rate": 0.00013446732636427446, "loss": 0.9461, "step": 25942 }, { "epoch": 0.6661434937997596, "grad_norm": 0.7421875, "learning_rate": 0.00013446313568238875, "loss": 0.7852, "step": 25943 }, { "epoch": 0.6661691709956814, "grad_norm": 0.8125, "learning_rate": 0.00013445894493182116, "loss": 0.8535, "step": 25944 }, { "epoch": 0.6661948481916032, "grad_norm": 0.83203125, "learning_rate": 0.00013445475411258, "loss": 0.8565, "step": 25945 }, { "epoch": 0.6662205253875251, "grad_norm": 0.7890625, "learning_rate": 0.0001344505632246736, "loss": 0.8271, "step": 25946 }, { "epoch": 0.6662462025834469, "grad_norm": 0.7578125, "learning_rate": 0.00013444637226811036, "loss": 0.8003, "step": 25947 }, { "epoch": 0.6662718797793687, "grad_norm": 0.71484375, "learning_rate": 0.0001344421812428986, "loss": 0.7726, "step": 25948 }, { "epoch": 0.6662975569752905, "grad_norm": 0.8359375, "learning_rate": 0.0001344379901490467, "loss": 0.8752, "step": 25949 }, { "epoch": 0.6663232341712123, "grad_norm": 0.78125, "learning_rate": 0.000134433798986563, "loss": 0.8175, "step": 25950 }, { "epoch": 0.6663489113671341, "grad_norm": 0.8671875, "learning_rate": 0.0001344296077554558, "loss": 0.8853, "step": 25951 }, { "epoch": 0.666374588563056, "grad_norm": 0.75, "learning_rate": 0.00013442541645573356, "loss": 0.899, "step": 25952 }, { "epoch": 0.6664002657589778, "grad_norm": 0.77734375, "learning_rate": 0.00013442122508740455, "loss": 1.0328, "step": 25953 }, { "epoch": 0.6664259429548997, "grad_norm": 0.75, "learning_rate": 0.00013441703365047716, "loss": 0.9021, "step": 25954 }, { "epoch": 0.6664516201508214, "grad_norm": 0.75, "learning_rate": 0.00013441284214495974, "loss": 0.7801, "step": 25955 }, { "epoch": 0.6664772973467432, "grad_norm": 0.72265625, "learning_rate": 0.00013440865057086062, "loss": 0.8387, "step": 25956 }, { "epoch": 0.666502974542665, "grad_norm": 0.83203125, "learning_rate": 0.00013440445892818815, "loss": 0.9245, "step": 25957 }, { "epoch": 0.6665286517385869, "grad_norm": 0.78125, "learning_rate": 0.0001344002672169507, "loss": 0.8231, "step": 25958 }, { "epoch": 0.6665543289345087, "grad_norm": 0.734375, "learning_rate": 0.00013439607543715664, "loss": 0.8297, "step": 25959 }, { "epoch": 0.6665800061304306, "grad_norm": 0.80859375, "learning_rate": 0.00013439188358881432, "loss": 0.9012, "step": 25960 }, { "epoch": 0.6666056833263524, "grad_norm": 0.7734375, "learning_rate": 0.00013438769167193205, "loss": 0.8257, "step": 25961 }, { "epoch": 0.6666313605222741, "grad_norm": 0.77734375, "learning_rate": 0.00013438349968651825, "loss": 0.7993, "step": 25962 }, { "epoch": 0.666657037718196, "grad_norm": 0.8671875, "learning_rate": 0.00013437930763258123, "loss": 0.9162, "step": 25963 }, { "epoch": 0.6666827149141178, "grad_norm": 0.796875, "learning_rate": 0.00013437511551012935, "loss": 0.8129, "step": 25964 }, { "epoch": 0.6667083921100396, "grad_norm": 0.8125, "learning_rate": 0.000134370923319171, "loss": 0.8456, "step": 25965 }, { "epoch": 0.6667340693059615, "grad_norm": 0.859375, "learning_rate": 0.0001343667310597145, "loss": 0.9872, "step": 25966 }, { "epoch": 0.6667597465018833, "grad_norm": 0.84765625, "learning_rate": 0.00013436253873176814, "loss": 0.8319, "step": 25967 }, { "epoch": 0.6667854236978051, "grad_norm": 0.81640625, "learning_rate": 0.0001343583463353404, "loss": 0.7869, "step": 25968 }, { "epoch": 0.6668111008937269, "grad_norm": 0.828125, "learning_rate": 0.00013435415387043958, "loss": 0.8768, "step": 25969 }, { "epoch": 0.6668367780896487, "grad_norm": 0.8828125, "learning_rate": 0.00013434996133707402, "loss": 0.8208, "step": 25970 }, { "epoch": 0.6668624552855705, "grad_norm": 0.78515625, "learning_rate": 0.00013434576873525212, "loss": 0.8658, "step": 25971 }, { "epoch": 0.6668881324814924, "grad_norm": 0.796875, "learning_rate": 0.00013434157606498215, "loss": 0.8108, "step": 25972 }, { "epoch": 0.6669138096774142, "grad_norm": 2.96875, "learning_rate": 0.00013433738332627258, "loss": 0.7483, "step": 25973 }, { "epoch": 0.666939486873336, "grad_norm": 0.796875, "learning_rate": 0.00013433319051913166, "loss": 0.9029, "step": 25974 }, { "epoch": 0.6669651640692578, "grad_norm": 0.73828125, "learning_rate": 0.00013432899764356782, "loss": 0.7651, "step": 25975 }, { "epoch": 0.6669908412651796, "grad_norm": 0.83984375, "learning_rate": 0.00013432480469958936, "loss": 0.7685, "step": 25976 }, { "epoch": 0.6670165184611014, "grad_norm": 0.78125, "learning_rate": 0.00013432061168720471, "loss": 0.8565, "step": 25977 }, { "epoch": 0.6670421956570233, "grad_norm": 0.84765625, "learning_rate": 0.00013431641860642211, "loss": 0.8557, "step": 25978 }, { "epoch": 0.6670678728529451, "grad_norm": 0.73828125, "learning_rate": 0.00013431222545725008, "loss": 0.803, "step": 25979 }, { "epoch": 0.667093550048867, "grad_norm": 0.89453125, "learning_rate": 0.00013430803223969684, "loss": 0.9118, "step": 25980 }, { "epoch": 0.6671192272447888, "grad_norm": 0.7421875, "learning_rate": 0.0001343038389537708, "loss": 0.7777, "step": 25981 }, { "epoch": 0.6671449044407105, "grad_norm": 0.78515625, "learning_rate": 0.00013429964559948028, "loss": 0.7917, "step": 25982 }, { "epoch": 0.6671705816366323, "grad_norm": 0.796875, "learning_rate": 0.00013429545217683367, "loss": 0.7904, "step": 25983 }, { "epoch": 0.6671962588325542, "grad_norm": 0.74609375, "learning_rate": 0.00013429125868583933, "loss": 0.8979, "step": 25984 }, { "epoch": 0.667221936028476, "grad_norm": 0.828125, "learning_rate": 0.0001342870651265056, "loss": 1.0113, "step": 25985 }, { "epoch": 0.6672476132243979, "grad_norm": 0.76171875, "learning_rate": 0.00013428287149884087, "loss": 0.8752, "step": 25986 }, { "epoch": 0.6672732904203197, "grad_norm": 0.7265625, "learning_rate": 0.0001342786778028535, "loss": 0.7966, "step": 25987 }, { "epoch": 0.6672989676162415, "grad_norm": 0.7578125, "learning_rate": 0.00013427448403855174, "loss": 0.8328, "step": 25988 }, { "epoch": 0.6673246448121632, "grad_norm": 0.71875, "learning_rate": 0.00013427029020594407, "loss": 0.7805, "step": 25989 }, { "epoch": 0.6673503220080851, "grad_norm": 0.90625, "learning_rate": 0.0001342660963050388, "loss": 0.8608, "step": 25990 }, { "epoch": 0.6673759992040069, "grad_norm": 0.87109375, "learning_rate": 0.0001342619023358443, "loss": 0.8476, "step": 25991 }, { "epoch": 0.6674016763999288, "grad_norm": 0.7578125, "learning_rate": 0.00013425770829836894, "loss": 0.8197, "step": 25992 }, { "epoch": 0.6674273535958506, "grad_norm": 0.7578125, "learning_rate": 0.00013425351419262108, "loss": 0.8893, "step": 25993 }, { "epoch": 0.6674530307917724, "grad_norm": 0.72265625, "learning_rate": 0.00013424932001860902, "loss": 0.7713, "step": 25994 }, { "epoch": 0.6674787079876942, "grad_norm": 0.765625, "learning_rate": 0.00013424512577634115, "loss": 0.8835, "step": 25995 }, { "epoch": 0.667504385183616, "grad_norm": 0.76171875, "learning_rate": 0.00013424093146582585, "loss": 0.7312, "step": 25996 }, { "epoch": 0.6675300623795378, "grad_norm": 0.74609375, "learning_rate": 0.0001342367370870715, "loss": 0.6951, "step": 25997 }, { "epoch": 0.6675557395754597, "grad_norm": 0.828125, "learning_rate": 0.00013423254264008637, "loss": 0.8904, "step": 25998 }, { "epoch": 0.6675814167713815, "grad_norm": 0.7734375, "learning_rate": 0.00013422834812487893, "loss": 0.8393, "step": 25999 }, { "epoch": 0.6676070939673033, "grad_norm": 0.73828125, "learning_rate": 0.00013422415354145744, "loss": 0.7981, "step": 26000 }, { "epoch": 0.6676070939673033, "eval_loss": 0.8428560495376587, "eval_runtime": 384.8264, "eval_samples_per_second": 25.986, "eval_steps_per_second": 0.813, "step": 26000 }, { "epoch": 0.6676327711632252, "grad_norm": 0.71484375, "learning_rate": 0.00013421995888983031, "loss": 0.7936, "step": 26001 }, { "epoch": 0.6676584483591469, "grad_norm": 0.80078125, "learning_rate": 0.00013421576417000592, "loss": 0.7887, "step": 26002 }, { "epoch": 0.6676841255550687, "grad_norm": 0.73046875, "learning_rate": 0.00013421156938199258, "loss": 0.6647, "step": 26003 }, { "epoch": 0.6677098027509906, "grad_norm": 0.80859375, "learning_rate": 0.00013420737452579868, "loss": 0.9763, "step": 26004 }, { "epoch": 0.6677354799469124, "grad_norm": 0.74609375, "learning_rate": 0.00013420317960143258, "loss": 0.7615, "step": 26005 }, { "epoch": 0.6677611571428342, "grad_norm": 0.78125, "learning_rate": 0.00013419898460890263, "loss": 0.8959, "step": 26006 }, { "epoch": 0.6677868343387561, "grad_norm": 0.83203125, "learning_rate": 0.0001341947895482172, "loss": 0.852, "step": 26007 }, { "epoch": 0.6678125115346779, "grad_norm": 0.75390625, "learning_rate": 0.0001341905944193846, "loss": 0.8634, "step": 26008 }, { "epoch": 0.6678381887305996, "grad_norm": 0.7890625, "learning_rate": 0.00013418639922241327, "loss": 0.9053, "step": 26009 }, { "epoch": 0.6678638659265215, "grad_norm": 0.87109375, "learning_rate": 0.00013418220395731154, "loss": 0.9855, "step": 26010 }, { "epoch": 0.6678895431224433, "grad_norm": 0.82421875, "learning_rate": 0.00013417800862408776, "loss": 0.792, "step": 26011 }, { "epoch": 0.6679152203183651, "grad_norm": 0.78125, "learning_rate": 0.00013417381322275028, "loss": 0.9099, "step": 26012 }, { "epoch": 0.667940897514287, "grad_norm": 0.76953125, "learning_rate": 0.0001341696177533075, "loss": 0.9016, "step": 26013 }, { "epoch": 0.6679665747102088, "grad_norm": 0.82421875, "learning_rate": 0.00013416542221576775, "loss": 0.8905, "step": 26014 }, { "epoch": 0.6679922519061305, "grad_norm": 0.77734375, "learning_rate": 0.0001341612266101394, "loss": 0.8712, "step": 26015 }, { "epoch": 0.6680179291020524, "grad_norm": 0.78125, "learning_rate": 0.00013415703093643078, "loss": 0.9079, "step": 26016 }, { "epoch": 0.6680436062979742, "grad_norm": 0.7890625, "learning_rate": 0.0001341528351946503, "loss": 0.8828, "step": 26017 }, { "epoch": 0.668069283493896, "grad_norm": 0.734375, "learning_rate": 0.00013414863938480633, "loss": 0.834, "step": 26018 }, { "epoch": 0.6680949606898179, "grad_norm": 0.84765625, "learning_rate": 0.0001341444435069072, "loss": 0.9479, "step": 26019 }, { "epoch": 0.6681206378857397, "grad_norm": 0.80859375, "learning_rate": 0.00013414024756096127, "loss": 0.7778, "step": 26020 }, { "epoch": 0.6681463150816616, "grad_norm": 0.75, "learning_rate": 0.0001341360515469769, "loss": 0.8422, "step": 26021 }, { "epoch": 0.6681719922775833, "grad_norm": 0.79296875, "learning_rate": 0.00013413185546496247, "loss": 0.8179, "step": 26022 }, { "epoch": 0.6681976694735051, "grad_norm": 0.76953125, "learning_rate": 0.00013412765931492633, "loss": 0.8081, "step": 26023 }, { "epoch": 0.668223346669427, "grad_norm": 0.7578125, "learning_rate": 0.00013412346309687686, "loss": 0.9196, "step": 26024 }, { "epoch": 0.6682490238653488, "grad_norm": 0.8203125, "learning_rate": 0.0001341192668108224, "loss": 1.0207, "step": 26025 }, { "epoch": 0.6682747010612706, "grad_norm": 0.74609375, "learning_rate": 0.00013411507045677133, "loss": 0.8243, "step": 26026 }, { "epoch": 0.6683003782571925, "grad_norm": 0.828125, "learning_rate": 0.00013411087403473202, "loss": 0.7471, "step": 26027 }, { "epoch": 0.6683260554531142, "grad_norm": 0.83203125, "learning_rate": 0.0001341066775447128, "loss": 0.9201, "step": 26028 }, { "epoch": 0.668351732649036, "grad_norm": 0.8203125, "learning_rate": 0.00013410248098672203, "loss": 0.8743, "step": 26029 }, { "epoch": 0.6683774098449579, "grad_norm": 2.96875, "learning_rate": 0.00013409828436076814, "loss": 0.7889, "step": 26030 }, { "epoch": 0.6684030870408797, "grad_norm": 1.1640625, "learning_rate": 0.00013409408766685942, "loss": 0.8608, "step": 26031 }, { "epoch": 0.6684287642368015, "grad_norm": 0.75, "learning_rate": 0.0001340898909050043, "loss": 0.8012, "step": 26032 }, { "epoch": 0.6684544414327234, "grad_norm": 0.74609375, "learning_rate": 0.00013408569407521108, "loss": 0.8274, "step": 26033 }, { "epoch": 0.6684801186286452, "grad_norm": 0.82421875, "learning_rate": 0.00013408149717748815, "loss": 0.81, "step": 26034 }, { "epoch": 0.6685057958245669, "grad_norm": 0.98828125, "learning_rate": 0.00013407730021184388, "loss": 0.9413, "step": 26035 }, { "epoch": 0.6685314730204888, "grad_norm": 0.87890625, "learning_rate": 0.0001340731031782866, "loss": 0.9493, "step": 26036 }, { "epoch": 0.6685571502164106, "grad_norm": 0.78125, "learning_rate": 0.00013406890607682475, "loss": 0.7982, "step": 26037 }, { "epoch": 0.6685828274123324, "grad_norm": 0.75, "learning_rate": 0.00013406470890746662, "loss": 0.7834, "step": 26038 }, { "epoch": 0.6686085046082543, "grad_norm": 0.75, "learning_rate": 0.00013406051167022063, "loss": 0.8567, "step": 26039 }, { "epoch": 0.6686341818041761, "grad_norm": 0.8125, "learning_rate": 0.00013405631436509506, "loss": 0.9336, "step": 26040 }, { "epoch": 0.668659859000098, "grad_norm": 0.7890625, "learning_rate": 0.0001340521169920984, "loss": 0.888, "step": 26041 }, { "epoch": 0.6686855361960197, "grad_norm": 0.73046875, "learning_rate": 0.0001340479195512389, "loss": 0.771, "step": 26042 }, { "epoch": 0.6687112133919415, "grad_norm": 0.84375, "learning_rate": 0.00013404372204252498, "loss": 0.8565, "step": 26043 }, { "epoch": 0.6687368905878633, "grad_norm": 0.75, "learning_rate": 0.00013403952446596498, "loss": 0.809, "step": 26044 }, { "epoch": 0.6687625677837852, "grad_norm": 0.95703125, "learning_rate": 0.00013403532682156735, "loss": 0.9777, "step": 26045 }, { "epoch": 0.668788244979707, "grad_norm": 0.7421875, "learning_rate": 0.0001340311291093403, "loss": 0.7878, "step": 26046 }, { "epoch": 0.6688139221756288, "grad_norm": 0.8125, "learning_rate": 0.00013402693132929234, "loss": 0.8521, "step": 26047 }, { "epoch": 0.6688395993715506, "grad_norm": 0.82421875, "learning_rate": 0.00013402273348143179, "loss": 0.8076, "step": 26048 }, { "epoch": 0.6688652765674724, "grad_norm": 0.77734375, "learning_rate": 0.00013401853556576696, "loss": 0.8893, "step": 26049 }, { "epoch": 0.6688909537633942, "grad_norm": 0.86328125, "learning_rate": 0.00013401433758230627, "loss": 0.9369, "step": 26050 }, { "epoch": 0.6689166309593161, "grad_norm": 0.7421875, "learning_rate": 0.0001340101395310581, "loss": 0.7725, "step": 26051 }, { "epoch": 0.6689423081552379, "grad_norm": 0.78125, "learning_rate": 0.0001340059414120308, "loss": 0.9071, "step": 26052 }, { "epoch": 0.6689679853511598, "grad_norm": 0.75, "learning_rate": 0.0001340017432252327, "loss": 0.9261, "step": 26053 }, { "epoch": 0.6689936625470816, "grad_norm": 0.81640625, "learning_rate": 0.0001339975449706722, "loss": 0.7855, "step": 26054 }, { "epoch": 0.6690193397430033, "grad_norm": 0.76953125, "learning_rate": 0.0001339933466483577, "loss": 0.7539, "step": 26055 }, { "epoch": 0.6690450169389252, "grad_norm": 0.76953125, "learning_rate": 0.0001339891482582975, "loss": 0.6943, "step": 26056 }, { "epoch": 0.669070694134847, "grad_norm": 0.78125, "learning_rate": 0.0001339849498005, "loss": 0.8073, "step": 26057 }, { "epoch": 0.6690963713307688, "grad_norm": 0.7734375, "learning_rate": 0.00013398075127497362, "loss": 0.8166, "step": 26058 }, { "epoch": 0.6691220485266907, "grad_norm": 0.7734375, "learning_rate": 0.0001339765526817266, "loss": 0.7849, "step": 26059 }, { "epoch": 0.6691477257226125, "grad_norm": 0.83984375, "learning_rate": 0.0001339723540207674, "loss": 1.0086, "step": 26060 }, { "epoch": 0.6691734029185343, "grad_norm": 0.734375, "learning_rate": 0.00013396815529210436, "loss": 0.735, "step": 26061 }, { "epoch": 0.6691990801144561, "grad_norm": 1.1171875, "learning_rate": 0.00013396395649574588, "loss": 0.7684, "step": 26062 }, { "epoch": 0.6692247573103779, "grad_norm": 0.80078125, "learning_rate": 0.00013395975763170025, "loss": 0.8982, "step": 26063 }, { "epoch": 0.6692504345062997, "grad_norm": 0.72265625, "learning_rate": 0.00013395555869997593, "loss": 0.8446, "step": 26064 }, { "epoch": 0.6692761117022216, "grad_norm": 0.73828125, "learning_rate": 0.00013395135970058127, "loss": 0.7509, "step": 26065 }, { "epoch": 0.6693017888981434, "grad_norm": 0.7578125, "learning_rate": 0.00013394716063352459, "loss": 0.7541, "step": 26066 }, { "epoch": 0.6693274660940652, "grad_norm": 0.7265625, "learning_rate": 0.00013394296149881428, "loss": 0.8932, "step": 26067 }, { "epoch": 0.669353143289987, "grad_norm": 0.8203125, "learning_rate": 0.0001339387622964587, "loss": 0.792, "step": 26068 }, { "epoch": 0.6693788204859088, "grad_norm": 0.8125, "learning_rate": 0.00013393456302646626, "loss": 0.9226, "step": 26069 }, { "epoch": 0.6694044976818306, "grad_norm": 0.75390625, "learning_rate": 0.00013393036368884533, "loss": 0.7573, "step": 26070 }, { "epoch": 0.6694301748777525, "grad_norm": 0.765625, "learning_rate": 0.0001339261642836042, "loss": 0.898, "step": 26071 }, { "epoch": 0.6694558520736743, "grad_norm": 0.71875, "learning_rate": 0.00013392196481075132, "loss": 0.8069, "step": 26072 }, { "epoch": 0.6694815292695961, "grad_norm": 0.84765625, "learning_rate": 0.000133917765270295, "loss": 1.039, "step": 26073 }, { "epoch": 0.669507206465518, "grad_norm": 0.82421875, "learning_rate": 0.00013391356566224365, "loss": 0.9047, "step": 26074 }, { "epoch": 0.6695328836614397, "grad_norm": 0.890625, "learning_rate": 0.0001339093659866056, "loss": 0.8636, "step": 26075 }, { "epoch": 0.6695585608573615, "grad_norm": 0.6875, "learning_rate": 0.00013390516624338928, "loss": 0.7832, "step": 26076 }, { "epoch": 0.6695842380532834, "grad_norm": 0.77734375, "learning_rate": 0.00013390096643260303, "loss": 0.7874, "step": 26077 }, { "epoch": 0.6696099152492052, "grad_norm": 0.70703125, "learning_rate": 0.0001338967665542552, "loss": 0.7735, "step": 26078 }, { "epoch": 0.669635592445127, "grad_norm": 0.796875, "learning_rate": 0.00013389256660835421, "loss": 0.7964, "step": 26079 }, { "epoch": 0.6696612696410489, "grad_norm": 0.828125, "learning_rate": 0.00013388836659490835, "loss": 0.8452, "step": 26080 }, { "epoch": 0.6696869468369707, "grad_norm": 0.76171875, "learning_rate": 0.00013388416651392607, "loss": 0.8318, "step": 26081 }, { "epoch": 0.6697126240328924, "grad_norm": 0.8359375, "learning_rate": 0.0001338799663654157, "loss": 0.8627, "step": 26082 }, { "epoch": 0.6697383012288143, "grad_norm": 0.7734375, "learning_rate": 0.0001338757661493856, "loss": 0.7606, "step": 26083 }, { "epoch": 0.6697639784247361, "grad_norm": 0.8515625, "learning_rate": 0.00013387156586584415, "loss": 0.8776, "step": 26084 }, { "epoch": 0.669789655620658, "grad_norm": 0.75, "learning_rate": 0.00013386736551479977, "loss": 0.9072, "step": 26085 }, { "epoch": 0.6698153328165798, "grad_norm": 0.71875, "learning_rate": 0.00013386316509626076, "loss": 0.7279, "step": 26086 }, { "epoch": 0.6698410100125016, "grad_norm": 0.7421875, "learning_rate": 0.00013385896461023554, "loss": 0.8001, "step": 26087 }, { "epoch": 0.6698666872084234, "grad_norm": 0.8125, "learning_rate": 0.00013385476405673242, "loss": 0.7714, "step": 26088 }, { "epoch": 0.6698923644043452, "grad_norm": 1.890625, "learning_rate": 0.00013385056343575985, "loss": 0.8339, "step": 26089 }, { "epoch": 0.669918041600267, "grad_norm": 0.86328125, "learning_rate": 0.00013384636274732615, "loss": 0.8902, "step": 26090 }, { "epoch": 0.6699437187961889, "grad_norm": 0.7890625, "learning_rate": 0.00013384216199143972, "loss": 0.8835, "step": 26091 }, { "epoch": 0.6699693959921107, "grad_norm": 0.7578125, "learning_rate": 0.0001338379611681089, "loss": 0.9276, "step": 26092 }, { "epoch": 0.6699950731880325, "grad_norm": 0.80859375, "learning_rate": 0.00013383376027734212, "loss": 0.7834, "step": 26093 }, { "epoch": 0.6700207503839544, "grad_norm": 0.82421875, "learning_rate": 0.00013382955931914766, "loss": 0.8661, "step": 26094 }, { "epoch": 0.6700464275798761, "grad_norm": 0.6953125, "learning_rate": 0.000133825358293534, "loss": 0.8233, "step": 26095 }, { "epoch": 0.6700721047757979, "grad_norm": 0.7578125, "learning_rate": 0.0001338211572005094, "loss": 0.7954, "step": 26096 }, { "epoch": 0.6700977819717198, "grad_norm": 0.765625, "learning_rate": 0.0001338169560400823, "loss": 0.871, "step": 26097 }, { "epoch": 0.6701234591676416, "grad_norm": 0.79296875, "learning_rate": 0.0001338127548122611, "loss": 0.7942, "step": 26098 }, { "epoch": 0.6701491363635634, "grad_norm": 0.71484375, "learning_rate": 0.00013380855351705412, "loss": 0.8372, "step": 26099 }, { "epoch": 0.6701748135594853, "grad_norm": 0.7578125, "learning_rate": 0.0001338043521544697, "loss": 0.8091, "step": 26100 }, { "epoch": 0.6702004907554071, "grad_norm": 0.8046875, "learning_rate": 0.0001338001507245163, "loss": 0.9792, "step": 26101 }, { "epoch": 0.6702261679513288, "grad_norm": 0.76171875, "learning_rate": 0.00013379594922720227, "loss": 0.9328, "step": 26102 }, { "epoch": 0.6702518451472507, "grad_norm": 0.76171875, "learning_rate": 0.00013379174766253593, "loss": 0.8843, "step": 26103 }, { "epoch": 0.6702775223431725, "grad_norm": 0.890625, "learning_rate": 0.0001337875460305257, "loss": 0.9591, "step": 26104 }, { "epoch": 0.6703031995390943, "grad_norm": 0.7734375, "learning_rate": 0.00013378334433117997, "loss": 0.7536, "step": 26105 }, { "epoch": 0.6703288767350162, "grad_norm": 0.73046875, "learning_rate": 0.00013377914256450704, "loss": 0.959, "step": 26106 }, { "epoch": 0.670354553930938, "grad_norm": 0.8515625, "learning_rate": 0.00013377494073051537, "loss": 0.9222, "step": 26107 }, { "epoch": 0.6703802311268597, "grad_norm": 0.73046875, "learning_rate": 0.00013377073882921324, "loss": 0.8312, "step": 26108 }, { "epoch": 0.6704059083227816, "grad_norm": 0.7421875, "learning_rate": 0.00013376653686060914, "loss": 0.7764, "step": 26109 }, { "epoch": 0.6704315855187034, "grad_norm": 0.796875, "learning_rate": 0.00013376233482471136, "loss": 0.8749, "step": 26110 }, { "epoch": 0.6704572627146252, "grad_norm": 0.8203125, "learning_rate": 0.00013375813272152828, "loss": 0.8759, "step": 26111 }, { "epoch": 0.6704829399105471, "grad_norm": 0.84765625, "learning_rate": 0.0001337539305510683, "loss": 0.8419, "step": 26112 }, { "epoch": 0.6705086171064689, "grad_norm": 0.78515625, "learning_rate": 0.0001337497283133398, "loss": 0.8228, "step": 26113 }, { "epoch": 0.6705342943023908, "grad_norm": 0.78515625, "learning_rate": 0.00013374552600835113, "loss": 0.7438, "step": 26114 }, { "epoch": 0.6705599714983125, "grad_norm": 0.7421875, "learning_rate": 0.00013374132363611066, "loss": 0.7605, "step": 26115 }, { "epoch": 0.6705856486942343, "grad_norm": 0.8203125, "learning_rate": 0.00013373712119662677, "loss": 0.9047, "step": 26116 }, { "epoch": 0.6706113258901562, "grad_norm": 0.83203125, "learning_rate": 0.0001337329186899079, "loss": 0.781, "step": 26117 }, { "epoch": 0.670637003086078, "grad_norm": 0.74609375, "learning_rate": 0.00013372871611596235, "loss": 0.8303, "step": 26118 }, { "epoch": 0.6706626802819998, "grad_norm": 0.73046875, "learning_rate": 0.0001337245134747985, "loss": 0.8807, "step": 26119 }, { "epoch": 0.6706883574779217, "grad_norm": 0.75, "learning_rate": 0.00013372031076642476, "loss": 0.9097, "step": 26120 }, { "epoch": 0.6707140346738435, "grad_norm": 0.8515625, "learning_rate": 0.00013371610799084945, "loss": 0.8257, "step": 26121 }, { "epoch": 0.6707397118697652, "grad_norm": 0.80859375, "learning_rate": 0.00013371190514808098, "loss": 0.8818, "step": 26122 }, { "epoch": 0.6707653890656871, "grad_norm": 0.76953125, "learning_rate": 0.00013370770223812775, "loss": 0.8234, "step": 26123 }, { "epoch": 0.6707910662616089, "grad_norm": 0.78515625, "learning_rate": 0.00013370349926099815, "loss": 0.8695, "step": 26124 }, { "epoch": 0.6708167434575307, "grad_norm": 0.8203125, "learning_rate": 0.00013369929621670048, "loss": 0.8102, "step": 26125 }, { "epoch": 0.6708424206534526, "grad_norm": 0.859375, "learning_rate": 0.00013369509310524316, "loss": 0.9363, "step": 26126 }, { "epoch": 0.6708680978493744, "grad_norm": 0.81640625, "learning_rate": 0.00013369088992663456, "loss": 0.8639, "step": 26127 }, { "epoch": 0.6708937750452961, "grad_norm": 0.80078125, "learning_rate": 0.00013368668668088307, "loss": 0.8237, "step": 26128 }, { "epoch": 0.670919452241218, "grad_norm": 0.765625, "learning_rate": 0.00013368248336799703, "loss": 0.7234, "step": 26129 }, { "epoch": 0.6709451294371398, "grad_norm": 0.78515625, "learning_rate": 0.00013367827998798487, "loss": 0.9015, "step": 26130 }, { "epoch": 0.6709708066330616, "grad_norm": 0.7265625, "learning_rate": 0.00013367407654085495, "loss": 0.7741, "step": 26131 }, { "epoch": 0.6709964838289835, "grad_norm": 0.80078125, "learning_rate": 0.00013366987302661562, "loss": 0.8099, "step": 26132 }, { "epoch": 0.6710221610249053, "grad_norm": 0.78515625, "learning_rate": 0.0001336656694452753, "loss": 0.8692, "step": 26133 }, { "epoch": 0.6710478382208271, "grad_norm": 0.76171875, "learning_rate": 0.0001336614657968423, "loss": 0.8551, "step": 26134 }, { "epoch": 0.6710735154167489, "grad_norm": 0.75, "learning_rate": 0.00013365726208132506, "loss": 0.8369, "step": 26135 }, { "epoch": 0.6710991926126707, "grad_norm": 0.83203125, "learning_rate": 0.00013365305829873193, "loss": 0.822, "step": 26136 }, { "epoch": 0.6711248698085925, "grad_norm": 0.80859375, "learning_rate": 0.0001336488544490713, "loss": 0.9702, "step": 26137 }, { "epoch": 0.6711505470045144, "grad_norm": 0.703125, "learning_rate": 0.00013364465053235155, "loss": 0.8104, "step": 26138 }, { "epoch": 0.6711762242004362, "grad_norm": 0.76171875, "learning_rate": 0.00013364044654858105, "loss": 0.8946, "step": 26139 }, { "epoch": 0.671201901396358, "grad_norm": 0.765625, "learning_rate": 0.00013363624249776817, "loss": 0.8135, "step": 26140 }, { "epoch": 0.6712275785922799, "grad_norm": 0.78125, "learning_rate": 0.00013363203837992132, "loss": 0.8112, "step": 26141 }, { "epoch": 0.6712532557882016, "grad_norm": 0.7109375, "learning_rate": 0.0001336278341950488, "loss": 0.7355, "step": 26142 }, { "epoch": 0.6712789329841234, "grad_norm": 0.7578125, "learning_rate": 0.00013362362994315908, "loss": 0.6864, "step": 26143 }, { "epoch": 0.6713046101800453, "grad_norm": 0.78125, "learning_rate": 0.00013361942562426052, "loss": 1.0435, "step": 26144 }, { "epoch": 0.6713302873759671, "grad_norm": 0.75390625, "learning_rate": 0.00013361522123836147, "loss": 0.7383, "step": 26145 }, { "epoch": 0.671355964571889, "grad_norm": 0.75390625, "learning_rate": 0.0001336110167854703, "loss": 0.8218, "step": 26146 }, { "epoch": 0.6713816417678108, "grad_norm": 0.73046875, "learning_rate": 0.00013360681226559545, "loss": 0.773, "step": 26147 }, { "epoch": 0.6714073189637325, "grad_norm": 0.765625, "learning_rate": 0.00013360260767874519, "loss": 0.9, "step": 26148 }, { "epoch": 0.6714329961596543, "grad_norm": 0.81640625, "learning_rate": 0.00013359840302492804, "loss": 0.8475, "step": 26149 }, { "epoch": 0.6714586733555762, "grad_norm": 0.84375, "learning_rate": 0.00013359419830415227, "loss": 0.8462, "step": 26150 }, { "epoch": 0.671484350551498, "grad_norm": 0.80859375, "learning_rate": 0.0001335899935164263, "loss": 0.8975, "step": 26151 }, { "epoch": 0.6715100277474199, "grad_norm": 0.78125, "learning_rate": 0.00013358578866175851, "loss": 0.8731, "step": 26152 }, { "epoch": 0.6715357049433417, "grad_norm": 0.765625, "learning_rate": 0.00013358158374015728, "loss": 0.7614, "step": 26153 }, { "epoch": 0.6715613821392635, "grad_norm": 0.88671875, "learning_rate": 0.00013357737875163098, "loss": 0.8326, "step": 26154 }, { "epoch": 0.6715870593351853, "grad_norm": 0.76953125, "learning_rate": 0.000133573173696188, "loss": 0.8459, "step": 26155 }, { "epoch": 0.6716127365311071, "grad_norm": 0.9921875, "learning_rate": 0.00013356896857383673, "loss": 0.7246, "step": 26156 }, { "epoch": 0.6716384137270289, "grad_norm": 0.85546875, "learning_rate": 0.00013356476338458555, "loss": 0.811, "step": 26157 }, { "epoch": 0.6716640909229508, "grad_norm": 0.7734375, "learning_rate": 0.00013356055812844278, "loss": 0.7807, "step": 26158 }, { "epoch": 0.6716897681188726, "grad_norm": 0.85546875, "learning_rate": 0.0001335563528054169, "loss": 0.9988, "step": 26159 }, { "epoch": 0.6717154453147944, "grad_norm": 0.91796875, "learning_rate": 0.0001335521474155162, "loss": 0.979, "step": 26160 }, { "epoch": 0.6717411225107163, "grad_norm": 0.765625, "learning_rate": 0.00013354794195874912, "loss": 0.7775, "step": 26161 }, { "epoch": 0.671766799706638, "grad_norm": 0.83203125, "learning_rate": 0.000133543736435124, "loss": 0.7728, "step": 26162 }, { "epoch": 0.6717924769025598, "grad_norm": 0.81640625, "learning_rate": 0.0001335395308446493, "loss": 0.7759, "step": 26163 }, { "epoch": 0.6718181540984817, "grad_norm": 0.75390625, "learning_rate": 0.00013353532518733328, "loss": 0.7066, "step": 26164 }, { "epoch": 0.6718438312944035, "grad_norm": 0.8125, "learning_rate": 0.00013353111946318443, "loss": 0.7983, "step": 26165 }, { "epoch": 0.6718695084903253, "grad_norm": 0.8125, "learning_rate": 0.00013352691367221107, "loss": 0.8607, "step": 26166 }, { "epoch": 0.6718951856862472, "grad_norm": 0.9921875, "learning_rate": 0.0001335227078144216, "loss": 0.7652, "step": 26167 }, { "epoch": 0.6719208628821689, "grad_norm": 0.80078125, "learning_rate": 0.0001335185018898244, "loss": 0.8934, "step": 26168 }, { "epoch": 0.6719465400780907, "grad_norm": 0.9140625, "learning_rate": 0.00013351429589842785, "loss": 0.8406, "step": 26169 }, { "epoch": 0.6719722172740126, "grad_norm": 0.80859375, "learning_rate": 0.00013351008984024035, "loss": 0.8643, "step": 26170 }, { "epoch": 0.6719978944699344, "grad_norm": 1.265625, "learning_rate": 0.00013350588371527025, "loss": 0.8144, "step": 26171 }, { "epoch": 0.6720235716658562, "grad_norm": 0.83203125, "learning_rate": 0.00013350167752352595, "loss": 0.8348, "step": 26172 }, { "epoch": 0.6720492488617781, "grad_norm": 0.7265625, "learning_rate": 0.00013349747126501586, "loss": 0.754, "step": 26173 }, { "epoch": 0.6720749260576999, "grad_norm": 0.75, "learning_rate": 0.0001334932649397483, "loss": 0.8768, "step": 26174 }, { "epoch": 0.6721006032536216, "grad_norm": 0.828125, "learning_rate": 0.0001334890585477317, "loss": 0.8986, "step": 26175 }, { "epoch": 0.6721262804495435, "grad_norm": 0.70703125, "learning_rate": 0.00013348485208897445, "loss": 0.8051, "step": 26176 }, { "epoch": 0.6721519576454653, "grad_norm": 0.69140625, "learning_rate": 0.00013348064556348491, "loss": 0.7261, "step": 26177 }, { "epoch": 0.6721776348413872, "grad_norm": 0.765625, "learning_rate": 0.00013347643897127146, "loss": 0.8981, "step": 26178 }, { "epoch": 0.672203312037309, "grad_norm": 0.78125, "learning_rate": 0.00013347223231234247, "loss": 0.8231, "step": 26179 }, { "epoch": 0.6722289892332308, "grad_norm": 0.75390625, "learning_rate": 0.0001334680255867064, "loss": 0.7588, "step": 26180 }, { "epoch": 0.6722546664291527, "grad_norm": 0.83203125, "learning_rate": 0.00013346381879437153, "loss": 0.8007, "step": 26181 }, { "epoch": 0.6722803436250744, "grad_norm": 0.875, "learning_rate": 0.00013345961193534628, "loss": 0.9615, "step": 26182 }, { "epoch": 0.6723060208209962, "grad_norm": 0.7265625, "learning_rate": 0.00013345540500963912, "loss": 0.8129, "step": 26183 }, { "epoch": 0.6723316980169181, "grad_norm": 0.85546875, "learning_rate": 0.0001334511980172583, "loss": 0.9984, "step": 26184 }, { "epoch": 0.6723573752128399, "grad_norm": 0.74609375, "learning_rate": 0.00013344699095821228, "loss": 0.9372, "step": 26185 }, { "epoch": 0.6723830524087617, "grad_norm": 0.79296875, "learning_rate": 0.0001334427838325094, "loss": 0.8464, "step": 26186 }, { "epoch": 0.6724087296046836, "grad_norm": 0.7734375, "learning_rate": 0.00013343857664015811, "loss": 0.8968, "step": 26187 }, { "epoch": 0.6724344068006053, "grad_norm": 0.72265625, "learning_rate": 0.00013343436938116672, "loss": 0.7526, "step": 26188 }, { "epoch": 0.6724600839965271, "grad_norm": 0.84375, "learning_rate": 0.00013343016205554368, "loss": 0.8181, "step": 26189 }, { "epoch": 0.672485761192449, "grad_norm": 0.83203125, "learning_rate": 0.00013342595466329734, "loss": 0.9861, "step": 26190 }, { "epoch": 0.6725114383883708, "grad_norm": 0.80859375, "learning_rate": 0.00013342174720443612, "loss": 0.7553, "step": 26191 }, { "epoch": 0.6725371155842926, "grad_norm": 0.8125, "learning_rate": 0.00013341753967896836, "loss": 0.9368, "step": 26192 }, { "epoch": 0.6725627927802145, "grad_norm": 0.76953125, "learning_rate": 0.00013341333208690244, "loss": 0.846, "step": 26193 }, { "epoch": 0.6725884699761363, "grad_norm": 0.79296875, "learning_rate": 0.00013340912442824678, "loss": 0.8791, "step": 26194 }, { "epoch": 0.672614147172058, "grad_norm": 0.90625, "learning_rate": 0.00013340491670300975, "loss": 0.7482, "step": 26195 }, { "epoch": 0.6726398243679799, "grad_norm": 0.73046875, "learning_rate": 0.00013340070891119975, "loss": 0.7628, "step": 26196 }, { "epoch": 0.6726655015639017, "grad_norm": 0.7734375, "learning_rate": 0.00013339650105282516, "loss": 0.8387, "step": 26197 }, { "epoch": 0.6726911787598235, "grad_norm": 0.734375, "learning_rate": 0.00013339229312789433, "loss": 0.8092, "step": 26198 }, { "epoch": 0.6727168559557454, "grad_norm": 0.73046875, "learning_rate": 0.00013338808513641568, "loss": 0.8238, "step": 26199 }, { "epoch": 0.6727425331516672, "grad_norm": 0.7578125, "learning_rate": 0.00013338387707839761, "loss": 0.8639, "step": 26200 }, { "epoch": 0.672768210347589, "grad_norm": 0.74609375, "learning_rate": 0.00013337966895384846, "loss": 0.7962, "step": 26201 }, { "epoch": 0.6727938875435108, "grad_norm": 0.8046875, "learning_rate": 0.00013337546076277668, "loss": 0.8196, "step": 26202 }, { "epoch": 0.6728195647394326, "grad_norm": 0.75390625, "learning_rate": 0.00013337125250519063, "loss": 0.8327, "step": 26203 }, { "epoch": 0.6728452419353544, "grad_norm": 0.81640625, "learning_rate": 0.00013336704418109868, "loss": 0.9455, "step": 26204 }, { "epoch": 0.6728709191312763, "grad_norm": 0.796875, "learning_rate": 0.0001333628357905092, "loss": 0.713, "step": 26205 }, { "epoch": 0.6728965963271981, "grad_norm": 0.82421875, "learning_rate": 0.0001333586273334306, "loss": 0.8583, "step": 26206 }, { "epoch": 0.67292227352312, "grad_norm": 0.7578125, "learning_rate": 0.00013335441880987128, "loss": 0.8253, "step": 26207 }, { "epoch": 0.6729479507190417, "grad_norm": 0.8359375, "learning_rate": 0.00013335021021983963, "loss": 0.933, "step": 26208 }, { "epoch": 0.6729736279149635, "grad_norm": 0.83203125, "learning_rate": 0.000133346001563344, "loss": 0.8161, "step": 26209 }, { "epoch": 0.6729993051108853, "grad_norm": 0.86328125, "learning_rate": 0.00013334179284039282, "loss": 0.9011, "step": 26210 }, { "epoch": 0.6730249823068072, "grad_norm": 0.7421875, "learning_rate": 0.00013333758405099444, "loss": 0.8165, "step": 26211 }, { "epoch": 0.673050659502729, "grad_norm": 0.7578125, "learning_rate": 0.0001333333751951573, "loss": 0.7488, "step": 26212 }, { "epoch": 0.6730763366986509, "grad_norm": 0.7578125, "learning_rate": 0.00013332916627288973, "loss": 0.7511, "step": 26213 }, { "epoch": 0.6731020138945727, "grad_norm": 0.765625, "learning_rate": 0.00013332495728420014, "loss": 0.7946, "step": 26214 }, { "epoch": 0.6731276910904944, "grad_norm": 0.828125, "learning_rate": 0.00013332074822909693, "loss": 0.7961, "step": 26215 }, { "epoch": 0.6731533682864163, "grad_norm": 0.71875, "learning_rate": 0.00013331653910758848, "loss": 0.8272, "step": 26216 }, { "epoch": 0.6731790454823381, "grad_norm": 0.72265625, "learning_rate": 0.00013331232991968317, "loss": 0.8322, "step": 26217 }, { "epoch": 0.6732047226782599, "grad_norm": 0.73046875, "learning_rate": 0.00013330812066538942, "loss": 0.8718, "step": 26218 }, { "epoch": 0.6732303998741818, "grad_norm": 0.7578125, "learning_rate": 0.00013330391134471552, "loss": 0.7709, "step": 26219 }, { "epoch": 0.6732560770701036, "grad_norm": 0.765625, "learning_rate": 0.00013329970195767002, "loss": 0.8379, "step": 26220 }, { "epoch": 0.6732817542660254, "grad_norm": 0.78125, "learning_rate": 0.00013329549250426116, "loss": 0.8876, "step": 26221 }, { "epoch": 0.6733074314619472, "grad_norm": 0.74609375, "learning_rate": 0.0001332912829844974, "loss": 0.7567, "step": 26222 }, { "epoch": 0.673333108657869, "grad_norm": 0.984375, "learning_rate": 0.00013328707339838717, "loss": 0.8785, "step": 26223 }, { "epoch": 0.6733587858537908, "grad_norm": 0.80078125, "learning_rate": 0.00013328286374593876, "loss": 0.8268, "step": 26224 }, { "epoch": 0.6733844630497127, "grad_norm": 0.8203125, "learning_rate": 0.00013327865402716063, "loss": 0.8809, "step": 26225 }, { "epoch": 0.6734101402456345, "grad_norm": 0.74609375, "learning_rate": 0.00013327444424206114, "loss": 0.8358, "step": 26226 }, { "epoch": 0.6734358174415563, "grad_norm": 0.7421875, "learning_rate": 0.0001332702343906487, "loss": 0.7992, "step": 26227 }, { "epoch": 0.6734614946374781, "grad_norm": 0.81640625, "learning_rate": 0.00013326602447293165, "loss": 1.0073, "step": 26228 }, { "epoch": 0.6734871718333999, "grad_norm": 0.76171875, "learning_rate": 0.0001332618144889185, "loss": 0.8745, "step": 26229 }, { "epoch": 0.6735128490293217, "grad_norm": 0.97265625, "learning_rate": 0.00013325760443861747, "loss": 0.8212, "step": 26230 }, { "epoch": 0.6735385262252436, "grad_norm": 0.74609375, "learning_rate": 0.0001332533943220371, "loss": 0.7265, "step": 26231 }, { "epoch": 0.6735642034211654, "grad_norm": 0.70703125, "learning_rate": 0.0001332491841391857, "loss": 0.8545, "step": 26232 }, { "epoch": 0.6735898806170872, "grad_norm": 0.734375, "learning_rate": 0.00013324497389007167, "loss": 0.7439, "step": 26233 }, { "epoch": 0.6736155578130091, "grad_norm": 0.79296875, "learning_rate": 0.0001332407635747034, "loss": 0.8633, "step": 26234 }, { "epoch": 0.6736412350089308, "grad_norm": 0.828125, "learning_rate": 0.00013323655319308933, "loss": 0.9243, "step": 26235 }, { "epoch": 0.6736669122048526, "grad_norm": 0.8203125, "learning_rate": 0.00013323234274523776, "loss": 0.8236, "step": 26236 }, { "epoch": 0.6736925894007745, "grad_norm": 0.8046875, "learning_rate": 0.00013322813223115717, "loss": 1.0274, "step": 26237 }, { "epoch": 0.6737182665966963, "grad_norm": 0.82421875, "learning_rate": 0.00013322392165085592, "loss": 0.8453, "step": 26238 }, { "epoch": 0.6737439437926181, "grad_norm": 0.83984375, "learning_rate": 0.00013321971100434236, "loss": 0.7608, "step": 26239 }, { "epoch": 0.67376962098854, "grad_norm": 0.8203125, "learning_rate": 0.00013321550029162492, "loss": 0.7949, "step": 26240 }, { "epoch": 0.6737952981844617, "grad_norm": 0.73046875, "learning_rate": 0.000133211289512712, "loss": 0.7772, "step": 26241 }, { "epoch": 0.6738209753803835, "grad_norm": 0.734375, "learning_rate": 0.000133207078667612, "loss": 0.7252, "step": 26242 }, { "epoch": 0.6738466525763054, "grad_norm": 0.8203125, "learning_rate": 0.00013320286775633328, "loss": 0.8736, "step": 26243 }, { "epoch": 0.6738723297722272, "grad_norm": 0.83984375, "learning_rate": 0.00013319865677888424, "loss": 0.8403, "step": 26244 }, { "epoch": 0.673898006968149, "grad_norm": 0.76953125, "learning_rate": 0.00013319444573527326, "loss": 0.9831, "step": 26245 }, { "epoch": 0.6739236841640709, "grad_norm": 1.046875, "learning_rate": 0.00013319023462550876, "loss": 0.827, "step": 26246 }, { "epoch": 0.6739493613599927, "grad_norm": 0.81640625, "learning_rate": 0.00013318602344959915, "loss": 0.9503, "step": 26247 }, { "epoch": 0.6739750385559145, "grad_norm": 0.75390625, "learning_rate": 0.00013318181220755274, "loss": 0.7221, "step": 26248 }, { "epoch": 0.6740007157518363, "grad_norm": 0.78515625, "learning_rate": 0.000133177600899378, "loss": 0.8203, "step": 26249 }, { "epoch": 0.6740263929477581, "grad_norm": 0.87890625, "learning_rate": 0.00013317338952508332, "loss": 0.817, "step": 26250 }, { "epoch": 0.67405207014368, "grad_norm": 0.73046875, "learning_rate": 0.00013316917808467704, "loss": 0.8067, "step": 26251 }, { "epoch": 0.6740777473396018, "grad_norm": 0.796875, "learning_rate": 0.0001331649665781676, "loss": 0.7974, "step": 26252 }, { "epoch": 0.6741034245355236, "grad_norm": 0.90625, "learning_rate": 0.00013316075500556336, "loss": 0.7912, "step": 26253 }, { "epoch": 0.6741291017314455, "grad_norm": 0.8125, "learning_rate": 0.00013315654336687276, "loss": 0.8372, "step": 26254 }, { "epoch": 0.6741547789273672, "grad_norm": 0.83984375, "learning_rate": 0.00013315233166210415, "loss": 0.9095, "step": 26255 }, { "epoch": 0.674180456123289, "grad_norm": 0.7890625, "learning_rate": 0.00013314811989126592, "loss": 0.8245, "step": 26256 }, { "epoch": 0.6742061333192109, "grad_norm": 0.9609375, "learning_rate": 0.00013314390805436652, "loss": 0.7326, "step": 26257 }, { "epoch": 0.6742318105151327, "grad_norm": 0.8203125, "learning_rate": 0.0001331396961514143, "loss": 0.9882, "step": 26258 }, { "epoch": 0.6742574877110545, "grad_norm": 0.76171875, "learning_rate": 0.0001331354841824176, "loss": 0.9018, "step": 26259 }, { "epoch": 0.6742831649069764, "grad_norm": 0.77734375, "learning_rate": 0.00013313127214738493, "loss": 0.8371, "step": 26260 }, { "epoch": 0.6743088421028981, "grad_norm": 0.8125, "learning_rate": 0.0001331270600463246, "loss": 0.8273, "step": 26261 }, { "epoch": 0.6743345192988199, "grad_norm": 0.78125, "learning_rate": 0.00013312284787924506, "loss": 0.8745, "step": 26262 }, { "epoch": 0.6743601964947418, "grad_norm": 0.734375, "learning_rate": 0.00013311863564615466, "loss": 0.7393, "step": 26263 }, { "epoch": 0.6743858736906636, "grad_norm": 0.79296875, "learning_rate": 0.00013311442334706182, "loss": 0.8561, "step": 26264 }, { "epoch": 0.6744115508865854, "grad_norm": 0.74609375, "learning_rate": 0.0001331102109819749, "loss": 0.7767, "step": 26265 }, { "epoch": 0.6744372280825073, "grad_norm": 0.734375, "learning_rate": 0.00013310599855090235, "loss": 0.7769, "step": 26266 }, { "epoch": 0.6744629052784291, "grad_norm": 0.8671875, "learning_rate": 0.00013310178605385249, "loss": 0.8452, "step": 26267 }, { "epoch": 0.6744885824743508, "grad_norm": 0.8046875, "learning_rate": 0.00013309757349083378, "loss": 0.745, "step": 26268 }, { "epoch": 0.6745142596702727, "grad_norm": 0.7109375, "learning_rate": 0.00013309336086185465, "loss": 0.8636, "step": 26269 }, { "epoch": 0.6745399368661945, "grad_norm": 0.83203125, "learning_rate": 0.00013308914816692336, "loss": 0.8853, "step": 26270 }, { "epoch": 0.6745656140621163, "grad_norm": 0.8125, "learning_rate": 0.00013308493540604844, "loss": 0.8558, "step": 26271 }, { "epoch": 0.6745912912580382, "grad_norm": 0.72265625, "learning_rate": 0.00013308072257923822, "loss": 0.8637, "step": 26272 }, { "epoch": 0.67461696845396, "grad_norm": 0.84765625, "learning_rate": 0.0001330765096865011, "loss": 0.7888, "step": 26273 }, { "epoch": 0.6746426456498819, "grad_norm": 0.8046875, "learning_rate": 0.0001330722967278455, "loss": 0.8689, "step": 26274 }, { "epoch": 0.6746683228458036, "grad_norm": 0.6953125, "learning_rate": 0.0001330680837032798, "loss": 0.842, "step": 26275 }, { "epoch": 0.6746940000417254, "grad_norm": 0.85546875, "learning_rate": 0.0001330638706128124, "loss": 0.8305, "step": 26276 }, { "epoch": 0.6747196772376473, "grad_norm": 0.94140625, "learning_rate": 0.00013305965745645165, "loss": 0.9713, "step": 26277 }, { "epoch": 0.6747453544335691, "grad_norm": 0.78515625, "learning_rate": 0.000133055444234206, "loss": 0.8242, "step": 26278 }, { "epoch": 0.6747710316294909, "grad_norm": 0.77734375, "learning_rate": 0.00013305123094608387, "loss": 0.8117, "step": 26279 }, { "epoch": 0.6747967088254128, "grad_norm": 0.83203125, "learning_rate": 0.00013304701759209362, "loss": 0.8256, "step": 26280 }, { "epoch": 0.6748223860213345, "grad_norm": 0.80859375, "learning_rate": 0.00013304280417224363, "loss": 0.8423, "step": 26281 }, { "epoch": 0.6748480632172563, "grad_norm": 0.859375, "learning_rate": 0.00013303859068654233, "loss": 0.8456, "step": 26282 }, { "epoch": 0.6748737404131782, "grad_norm": 0.71484375, "learning_rate": 0.00013303437713499806, "loss": 0.8412, "step": 26283 }, { "epoch": 0.6748994176091, "grad_norm": 0.8046875, "learning_rate": 0.0001330301635176193, "loss": 0.7872, "step": 26284 }, { "epoch": 0.6749250948050218, "grad_norm": 0.9453125, "learning_rate": 0.0001330259498344144, "loss": 0.9143, "step": 26285 }, { "epoch": 0.6749507720009437, "grad_norm": 0.84375, "learning_rate": 0.00013302173608539176, "loss": 0.9663, "step": 26286 }, { "epoch": 0.6749764491968655, "grad_norm": 0.79296875, "learning_rate": 0.00013301752227055978, "loss": 0.8508, "step": 26287 }, { "epoch": 0.6750021263927872, "grad_norm": 0.8125, "learning_rate": 0.0001330133083899269, "loss": 0.8787, "step": 26288 }, { "epoch": 0.6750278035887091, "grad_norm": 0.75, "learning_rate": 0.00013300909444350145, "loss": 0.826, "step": 26289 }, { "epoch": 0.6750534807846309, "grad_norm": 0.76953125, "learning_rate": 0.00013300488043129188, "loss": 0.6995, "step": 26290 }, { "epoch": 0.6750791579805527, "grad_norm": 0.7265625, "learning_rate": 0.0001330006663533065, "loss": 0.6995, "step": 26291 }, { "epoch": 0.6751048351764746, "grad_norm": 0.99609375, "learning_rate": 0.00013299645220955382, "loss": 0.8698, "step": 26292 }, { "epoch": 0.6751305123723964, "grad_norm": 0.80078125, "learning_rate": 0.0001329922380000422, "loss": 0.797, "step": 26293 }, { "epoch": 0.6751561895683182, "grad_norm": 0.72265625, "learning_rate": 0.00013298802372478, "loss": 0.8622, "step": 26294 }, { "epoch": 0.67518186676424, "grad_norm": 0.75, "learning_rate": 0.00013298380938377567, "loss": 0.8679, "step": 26295 }, { "epoch": 0.6752075439601618, "grad_norm": 0.80859375, "learning_rate": 0.00013297959497703756, "loss": 0.9301, "step": 26296 }, { "epoch": 0.6752332211560836, "grad_norm": 0.765625, "learning_rate": 0.00013297538050457413, "loss": 0.7999, "step": 26297 }, { "epoch": 0.6752588983520055, "grad_norm": 0.734375, "learning_rate": 0.00013297116596639374, "loss": 0.7503, "step": 26298 }, { "epoch": 0.6752845755479273, "grad_norm": 0.7265625, "learning_rate": 0.00013296695136250474, "loss": 0.7202, "step": 26299 }, { "epoch": 0.6753102527438491, "grad_norm": 0.703125, "learning_rate": 0.00013296273669291566, "loss": 0.7772, "step": 26300 }, { "epoch": 0.6753359299397709, "grad_norm": 0.75390625, "learning_rate": 0.0001329585219576348, "loss": 0.8496, "step": 26301 }, { "epoch": 0.6753616071356927, "grad_norm": 1.2109375, "learning_rate": 0.00013295430715667056, "loss": 0.8493, "step": 26302 }, { "epoch": 0.6753872843316145, "grad_norm": 0.8984375, "learning_rate": 0.0001329500922900314, "loss": 0.8821, "step": 26303 }, { "epoch": 0.6754129615275364, "grad_norm": 0.86328125, "learning_rate": 0.00013294587735772562, "loss": 0.9405, "step": 26304 }, { "epoch": 0.6754386387234582, "grad_norm": 0.7421875, "learning_rate": 0.00013294166235976174, "loss": 0.7986, "step": 26305 }, { "epoch": 0.67546431591938, "grad_norm": 0.8046875, "learning_rate": 0.0001329374472961481, "loss": 0.9417, "step": 26306 }, { "epoch": 0.6754899931153019, "grad_norm": 0.65625, "learning_rate": 0.00013293323216689306, "loss": 0.7499, "step": 26307 }, { "epoch": 0.6755156703112236, "grad_norm": 0.83984375, "learning_rate": 0.00013292901697200513, "loss": 0.8893, "step": 26308 }, { "epoch": 0.6755413475071455, "grad_norm": 0.70703125, "learning_rate": 0.0001329248017114926, "loss": 0.6777, "step": 26309 }, { "epoch": 0.6755670247030673, "grad_norm": 0.82421875, "learning_rate": 0.0001329205863853639, "loss": 0.8337, "step": 26310 }, { "epoch": 0.6755927018989891, "grad_norm": 0.80859375, "learning_rate": 0.00013291637099362746, "loss": 0.8527, "step": 26311 }, { "epoch": 0.675618379094911, "grad_norm": 0.78125, "learning_rate": 0.0001329121555362917, "loss": 0.9142, "step": 26312 }, { "epoch": 0.6756440562908328, "grad_norm": 0.74609375, "learning_rate": 0.00013290794001336492, "loss": 0.8851, "step": 26313 }, { "epoch": 0.6756697334867546, "grad_norm": 0.81640625, "learning_rate": 0.00013290372442485563, "loss": 0.8, "step": 26314 }, { "epoch": 0.6756954106826764, "grad_norm": 0.80859375, "learning_rate": 0.0001328995087707722, "loss": 0.9511, "step": 26315 }, { "epoch": 0.6757210878785982, "grad_norm": 0.76171875, "learning_rate": 0.000132895293051123, "loss": 0.7039, "step": 26316 }, { "epoch": 0.67574676507452, "grad_norm": 0.8671875, "learning_rate": 0.00013289107726591648, "loss": 0.9427, "step": 26317 }, { "epoch": 0.6757724422704419, "grad_norm": 0.78125, "learning_rate": 0.00013288686141516096, "loss": 0.8268, "step": 26318 }, { "epoch": 0.6757981194663637, "grad_norm": 0.796875, "learning_rate": 0.00013288264549886494, "loss": 1.0028, "step": 26319 }, { "epoch": 0.6758237966622855, "grad_norm": 0.7890625, "learning_rate": 0.00013287842951703678, "loss": 0.8569, "step": 26320 }, { "epoch": 0.6758494738582073, "grad_norm": 0.82421875, "learning_rate": 0.00013287421346968486, "loss": 0.8812, "step": 26321 }, { "epoch": 0.6758751510541291, "grad_norm": 0.76953125, "learning_rate": 0.0001328699973568176, "loss": 0.7661, "step": 26322 }, { "epoch": 0.6759008282500509, "grad_norm": 0.73046875, "learning_rate": 0.0001328657811784434, "loss": 0.7708, "step": 26323 }, { "epoch": 0.6759265054459728, "grad_norm": 0.86328125, "learning_rate": 0.00013286156493457068, "loss": 0.8384, "step": 26324 }, { "epoch": 0.6759521826418946, "grad_norm": 0.81640625, "learning_rate": 0.00013285734862520784, "loss": 0.8229, "step": 26325 }, { "epoch": 0.6759778598378164, "grad_norm": 0.8671875, "learning_rate": 0.00013285313225036325, "loss": 0.8987, "step": 26326 }, { "epoch": 0.6760035370337383, "grad_norm": 0.7734375, "learning_rate": 0.00013284891581004534, "loss": 0.8226, "step": 26327 }, { "epoch": 0.67602921422966, "grad_norm": 0.77734375, "learning_rate": 0.00013284469930426252, "loss": 0.8646, "step": 26328 }, { "epoch": 0.6760548914255818, "grad_norm": 0.76953125, "learning_rate": 0.0001328404827330232, "loss": 0.8904, "step": 26329 }, { "epoch": 0.6760805686215037, "grad_norm": 0.8515625, "learning_rate": 0.00013283626609633572, "loss": 0.8195, "step": 26330 }, { "epoch": 0.6761062458174255, "grad_norm": 0.7578125, "learning_rate": 0.00013283204939420852, "loss": 0.922, "step": 26331 }, { "epoch": 0.6761319230133473, "grad_norm": 0.8515625, "learning_rate": 0.00013282783262665008, "loss": 0.8737, "step": 26332 }, { "epoch": 0.6761576002092692, "grad_norm": 1.0078125, "learning_rate": 0.00013282361579366867, "loss": 1.0637, "step": 26333 }, { "epoch": 0.676183277405191, "grad_norm": 0.7734375, "learning_rate": 0.00013281939889527277, "loss": 0.8686, "step": 26334 }, { "epoch": 0.6762089546011127, "grad_norm": 0.8671875, "learning_rate": 0.0001328151819314708, "loss": 0.8041, "step": 26335 }, { "epoch": 0.6762346317970346, "grad_norm": 0.8203125, "learning_rate": 0.00013281096490227112, "loss": 0.9102, "step": 26336 }, { "epoch": 0.6762603089929564, "grad_norm": 0.8203125, "learning_rate": 0.00013280674780768214, "loss": 0.846, "step": 26337 }, { "epoch": 0.6762859861888783, "grad_norm": 1.6875, "learning_rate": 0.0001328025306477123, "loss": 0.7601, "step": 26338 }, { "epoch": 0.6763116633848001, "grad_norm": 0.7890625, "learning_rate": 0.00013279831342236995, "loss": 0.7935, "step": 26339 }, { "epoch": 0.6763373405807219, "grad_norm": 0.734375, "learning_rate": 0.00013279409613166355, "loss": 0.9336, "step": 26340 }, { "epoch": 0.6763630177766436, "grad_norm": 0.78515625, "learning_rate": 0.00013278987877560146, "loss": 0.8131, "step": 26341 }, { "epoch": 0.6763886949725655, "grad_norm": 0.79296875, "learning_rate": 0.00013278566135419212, "loss": 0.856, "step": 26342 }, { "epoch": 0.6764143721684873, "grad_norm": 0.796875, "learning_rate": 0.00013278144386744391, "loss": 0.7829, "step": 26343 }, { "epoch": 0.6764400493644092, "grad_norm": 0.73046875, "learning_rate": 0.00013277722631536523, "loss": 0.9097, "step": 26344 }, { "epoch": 0.676465726560331, "grad_norm": 1.0859375, "learning_rate": 0.0001327730086979645, "loss": 0.861, "step": 26345 }, { "epoch": 0.6764914037562528, "grad_norm": 0.75, "learning_rate": 0.00013276879101525017, "loss": 0.8151, "step": 26346 }, { "epoch": 0.6765170809521747, "grad_norm": 0.75390625, "learning_rate": 0.00013276457326723054, "loss": 0.8984, "step": 26347 }, { "epoch": 0.6765427581480964, "grad_norm": 0.73828125, "learning_rate": 0.00013276035545391413, "loss": 0.866, "step": 26348 }, { "epoch": 0.6765684353440182, "grad_norm": 0.76953125, "learning_rate": 0.00013275613757530926, "loss": 0.9318, "step": 26349 }, { "epoch": 0.6765941125399401, "grad_norm": 0.796875, "learning_rate": 0.00013275191963142436, "loss": 0.7921, "step": 26350 }, { "epoch": 0.6766197897358619, "grad_norm": 0.7890625, "learning_rate": 0.00013274770162226786, "loss": 0.7839, "step": 26351 }, { "epoch": 0.6766454669317837, "grad_norm": 0.7890625, "learning_rate": 0.00013274348354784814, "loss": 0.9844, "step": 26352 }, { "epoch": 0.6766711441277056, "grad_norm": 0.81640625, "learning_rate": 0.00013273926540817362, "loss": 0.7455, "step": 26353 }, { "epoch": 0.6766968213236274, "grad_norm": 0.765625, "learning_rate": 0.00013273504720325268, "loss": 0.8465, "step": 26354 }, { "epoch": 0.6767224985195491, "grad_norm": 0.76953125, "learning_rate": 0.00013273082893309375, "loss": 0.7937, "step": 26355 }, { "epoch": 0.676748175715471, "grad_norm": 0.7421875, "learning_rate": 0.0001327266105977053, "loss": 0.7638, "step": 26356 }, { "epoch": 0.6767738529113928, "grad_norm": 0.76171875, "learning_rate": 0.00013272239219709562, "loss": 0.8708, "step": 26357 }, { "epoch": 0.6767995301073146, "grad_norm": 0.78125, "learning_rate": 0.00013271817373127314, "loss": 0.8147, "step": 26358 }, { "epoch": 0.6768252073032365, "grad_norm": 0.80078125, "learning_rate": 0.0001327139552002463, "loss": 0.8061, "step": 26359 }, { "epoch": 0.6768508844991583, "grad_norm": 0.75, "learning_rate": 0.00013270973660402355, "loss": 0.7523, "step": 26360 }, { "epoch": 0.67687656169508, "grad_norm": 0.875, "learning_rate": 0.00013270551794261322, "loss": 0.9512, "step": 26361 }, { "epoch": 0.6769022388910019, "grad_norm": 0.80859375, "learning_rate": 0.00013270129921602377, "loss": 0.8222, "step": 26362 }, { "epoch": 0.6769279160869237, "grad_norm": 0.90234375, "learning_rate": 0.00013269708042426356, "loss": 0.9443, "step": 26363 }, { "epoch": 0.6769535932828455, "grad_norm": 0.765625, "learning_rate": 0.00013269286156734103, "loss": 0.8212, "step": 26364 }, { "epoch": 0.6769792704787674, "grad_norm": 0.8203125, "learning_rate": 0.00013268864264526456, "loss": 0.9028, "step": 26365 }, { "epoch": 0.6770049476746892, "grad_norm": 0.78515625, "learning_rate": 0.00013268442365804259, "loss": 0.8296, "step": 26366 }, { "epoch": 0.677030624870611, "grad_norm": 0.75390625, "learning_rate": 0.00013268020460568356, "loss": 0.8024, "step": 26367 }, { "epoch": 0.6770563020665328, "grad_norm": 0.80859375, "learning_rate": 0.00013267598548819578, "loss": 0.8062, "step": 26368 }, { "epoch": 0.6770819792624546, "grad_norm": 0.7734375, "learning_rate": 0.00013267176630558773, "loss": 0.9007, "step": 26369 }, { "epoch": 0.6771076564583765, "grad_norm": 0.984375, "learning_rate": 0.0001326675470578678, "loss": 0.806, "step": 26370 }, { "epoch": 0.6771333336542983, "grad_norm": 0.74609375, "learning_rate": 0.0001326633277450444, "loss": 0.8198, "step": 26371 }, { "epoch": 0.6771590108502201, "grad_norm": 0.73046875, "learning_rate": 0.00013265910836712593, "loss": 0.7871, "step": 26372 }, { "epoch": 0.677184688046142, "grad_norm": 0.76953125, "learning_rate": 0.00013265488892412082, "loss": 0.8797, "step": 26373 }, { "epoch": 0.6772103652420638, "grad_norm": 0.76171875, "learning_rate": 0.00013265066941603747, "loss": 0.9356, "step": 26374 }, { "epoch": 0.6772360424379855, "grad_norm": 0.78125, "learning_rate": 0.0001326464498428843, "loss": 0.8842, "step": 26375 }, { "epoch": 0.6772617196339074, "grad_norm": 0.921875, "learning_rate": 0.00013264223020466965, "loss": 0.6711, "step": 26376 }, { "epoch": 0.6772873968298292, "grad_norm": 0.7578125, "learning_rate": 0.00013263801050140202, "loss": 0.7952, "step": 26377 }, { "epoch": 0.677313074025751, "grad_norm": 0.7265625, "learning_rate": 0.00013263379073308977, "loss": 0.7431, "step": 26378 }, { "epoch": 0.6773387512216729, "grad_norm": 0.79296875, "learning_rate": 0.00013262957089974132, "loss": 0.8564, "step": 26379 }, { "epoch": 0.6773644284175947, "grad_norm": 0.80078125, "learning_rate": 0.0001326253510013651, "loss": 0.8393, "step": 26380 }, { "epoch": 0.6773901056135164, "grad_norm": 0.7421875, "learning_rate": 0.00013262113103796951, "loss": 0.91, "step": 26381 }, { "epoch": 0.6774157828094383, "grad_norm": 0.83984375, "learning_rate": 0.00013261691100956293, "loss": 0.8512, "step": 26382 }, { "epoch": 0.6774414600053601, "grad_norm": 0.7890625, "learning_rate": 0.0001326126909161538, "loss": 1.0009, "step": 26383 }, { "epoch": 0.6774671372012819, "grad_norm": 0.80859375, "learning_rate": 0.00013260847075775053, "loss": 0.7716, "step": 26384 }, { "epoch": 0.6774928143972038, "grad_norm": 0.82421875, "learning_rate": 0.00013260425053436152, "loss": 0.8009, "step": 26385 }, { "epoch": 0.6775184915931256, "grad_norm": 0.79296875, "learning_rate": 0.00013260003024599519, "loss": 0.8583, "step": 26386 }, { "epoch": 0.6775441687890474, "grad_norm": 0.8125, "learning_rate": 0.00013259580989265994, "loss": 0.8845, "step": 26387 }, { "epoch": 0.6775698459849692, "grad_norm": 0.75, "learning_rate": 0.0001325915894743642, "loss": 0.8026, "step": 26388 }, { "epoch": 0.677595523180891, "grad_norm": 0.74609375, "learning_rate": 0.00013258736899111634, "loss": 0.7554, "step": 26389 }, { "epoch": 0.6776212003768128, "grad_norm": 0.73046875, "learning_rate": 0.00013258314844292478, "loss": 0.7782, "step": 26390 }, { "epoch": 0.6776468775727347, "grad_norm": 0.7734375, "learning_rate": 0.00013257892782979797, "loss": 0.8095, "step": 26391 }, { "epoch": 0.6776725547686565, "grad_norm": 0.8828125, "learning_rate": 0.0001325747071517443, "loss": 0.9718, "step": 26392 }, { "epoch": 0.6776982319645783, "grad_norm": 0.7578125, "learning_rate": 0.0001325704864087722, "loss": 0.8521, "step": 26393 }, { "epoch": 0.6777239091605002, "grad_norm": 0.828125, "learning_rate": 0.00013256626560089005, "loss": 0.9129, "step": 26394 }, { "epoch": 0.6777495863564219, "grad_norm": 0.765625, "learning_rate": 0.00013256204472810628, "loss": 0.7453, "step": 26395 }, { "epoch": 0.6777752635523437, "grad_norm": 0.83203125, "learning_rate": 0.00013255782379042926, "loss": 0.8577, "step": 26396 }, { "epoch": 0.6778009407482656, "grad_norm": 0.75, "learning_rate": 0.0001325536027878675, "loss": 0.8216, "step": 26397 }, { "epoch": 0.6778266179441874, "grad_norm": 2.3125, "learning_rate": 0.00013254938172042928, "loss": 0.7033, "step": 26398 }, { "epoch": 0.6778522951401093, "grad_norm": 0.7265625, "learning_rate": 0.00013254516058812312, "loss": 0.8142, "step": 26399 }, { "epoch": 0.6778779723360311, "grad_norm": 0.80078125, "learning_rate": 0.00013254093939095738, "loss": 0.8953, "step": 26400 }, { "epoch": 0.6779036495319528, "grad_norm": 0.74609375, "learning_rate": 0.00013253671812894053, "loss": 0.8454, "step": 26401 }, { "epoch": 0.6779293267278746, "grad_norm": 0.7734375, "learning_rate": 0.0001325324968020809, "loss": 0.8391, "step": 26402 }, { "epoch": 0.6779550039237965, "grad_norm": 0.76171875, "learning_rate": 0.00013252827541038694, "loss": 0.8294, "step": 26403 }, { "epoch": 0.6779806811197183, "grad_norm": 0.7578125, "learning_rate": 0.00013252405395386708, "loss": 0.7372, "step": 26404 }, { "epoch": 0.6780063583156402, "grad_norm": 0.671875, "learning_rate": 0.00013251983243252972, "loss": 0.7605, "step": 26405 }, { "epoch": 0.678032035511562, "grad_norm": 0.74609375, "learning_rate": 0.00013251561084638326, "loss": 0.7064, "step": 26406 }, { "epoch": 0.6780577127074838, "grad_norm": 0.69921875, "learning_rate": 0.00013251138919543617, "loss": 0.7891, "step": 26407 }, { "epoch": 0.6780833899034056, "grad_norm": 0.79296875, "learning_rate": 0.00013250716747969675, "loss": 0.9035, "step": 26408 }, { "epoch": 0.6781090670993274, "grad_norm": 0.85546875, "learning_rate": 0.0001325029456991735, "loss": 0.8373, "step": 26409 }, { "epoch": 0.6781347442952492, "grad_norm": 0.83203125, "learning_rate": 0.00013249872385387486, "loss": 0.8872, "step": 26410 }, { "epoch": 0.6781604214911711, "grad_norm": 0.73046875, "learning_rate": 0.00013249450194380912, "loss": 0.8434, "step": 26411 }, { "epoch": 0.6781860986870929, "grad_norm": 1.0625, "learning_rate": 0.00013249027996898484, "loss": 0.7786, "step": 26412 }, { "epoch": 0.6782117758830147, "grad_norm": 0.80078125, "learning_rate": 0.00013248605792941037, "loss": 0.8559, "step": 26413 }, { "epoch": 0.6782374530789366, "grad_norm": 0.80859375, "learning_rate": 0.00013248183582509408, "loss": 0.8142, "step": 26414 }, { "epoch": 0.6782631302748583, "grad_norm": 0.7421875, "learning_rate": 0.00013247761365604445, "loss": 0.8209, "step": 26415 }, { "epoch": 0.6782888074707801, "grad_norm": 0.8203125, "learning_rate": 0.00013247339142226984, "loss": 0.8859, "step": 26416 }, { "epoch": 0.678314484666702, "grad_norm": 0.8984375, "learning_rate": 0.00013246916912377872, "loss": 0.8432, "step": 26417 }, { "epoch": 0.6783401618626238, "grad_norm": 0.79296875, "learning_rate": 0.0001324649467605795, "loss": 0.803, "step": 26418 }, { "epoch": 0.6783658390585456, "grad_norm": 0.72265625, "learning_rate": 0.00013246072433268052, "loss": 0.8241, "step": 26419 }, { "epoch": 0.6783915162544675, "grad_norm": 2.71875, "learning_rate": 0.0001324565018400903, "loss": 0.768, "step": 26420 }, { "epoch": 0.6784171934503892, "grad_norm": 0.73046875, "learning_rate": 0.00013245227928281717, "loss": 0.827, "step": 26421 }, { "epoch": 0.678442870646311, "grad_norm": 0.765625, "learning_rate": 0.0001324480566608696, "loss": 0.7488, "step": 26422 }, { "epoch": 0.6784685478422329, "grad_norm": 0.81640625, "learning_rate": 0.00013244383397425597, "loss": 0.8931, "step": 26423 }, { "epoch": 0.6784942250381547, "grad_norm": 0.73828125, "learning_rate": 0.0001324396112229847, "loss": 0.7704, "step": 26424 }, { "epoch": 0.6785199022340765, "grad_norm": 0.7890625, "learning_rate": 0.00013243538840706422, "loss": 0.8609, "step": 26425 }, { "epoch": 0.6785455794299984, "grad_norm": 0.69921875, "learning_rate": 0.00013243116552650295, "loss": 0.8505, "step": 26426 }, { "epoch": 0.6785712566259202, "grad_norm": 0.78515625, "learning_rate": 0.0001324269425813093, "loss": 0.9167, "step": 26427 }, { "epoch": 0.6785969338218419, "grad_norm": 0.796875, "learning_rate": 0.00013242271957149168, "loss": 0.7041, "step": 26428 }, { "epoch": 0.6786226110177638, "grad_norm": 0.78125, "learning_rate": 0.0001324184964970585, "loss": 0.8577, "step": 26429 }, { "epoch": 0.6786482882136856, "grad_norm": 0.9453125, "learning_rate": 0.00013241427335801818, "loss": 0.8841, "step": 26430 }, { "epoch": 0.6786739654096074, "grad_norm": 0.73046875, "learning_rate": 0.00013241005015437917, "loss": 0.7001, "step": 26431 }, { "epoch": 0.6786996426055293, "grad_norm": 0.7734375, "learning_rate": 0.00013240582688614982, "loss": 0.8988, "step": 26432 }, { "epoch": 0.6787253198014511, "grad_norm": 0.80859375, "learning_rate": 0.00013240160355333864, "loss": 0.8752, "step": 26433 }, { "epoch": 0.678750996997373, "grad_norm": 0.7578125, "learning_rate": 0.00013239738015595394, "loss": 0.9008, "step": 26434 }, { "epoch": 0.6787766741932947, "grad_norm": 0.7265625, "learning_rate": 0.0001323931566940042, "loss": 0.7536, "step": 26435 }, { "epoch": 0.6788023513892165, "grad_norm": 0.796875, "learning_rate": 0.00013238893316749783, "loss": 0.9062, "step": 26436 }, { "epoch": 0.6788280285851384, "grad_norm": 0.78125, "learning_rate": 0.00013238470957644324, "loss": 0.8462, "step": 26437 }, { "epoch": 0.6788537057810602, "grad_norm": 0.79296875, "learning_rate": 0.00013238048592084884, "loss": 0.9023, "step": 26438 }, { "epoch": 0.678879382976982, "grad_norm": 0.8203125, "learning_rate": 0.00013237626220072307, "loss": 0.6853, "step": 26439 }, { "epoch": 0.6789050601729039, "grad_norm": 0.83984375, "learning_rate": 0.00013237203841607432, "loss": 0.8985, "step": 26440 }, { "epoch": 0.6789307373688256, "grad_norm": 0.828125, "learning_rate": 0.00013236781456691106, "loss": 0.8881, "step": 26441 }, { "epoch": 0.6789564145647474, "grad_norm": 0.7265625, "learning_rate": 0.00013236359065324163, "loss": 0.8059, "step": 26442 }, { "epoch": 0.6789820917606693, "grad_norm": 0.8046875, "learning_rate": 0.0001323593666750745, "loss": 0.8224, "step": 26443 }, { "epoch": 0.6790077689565911, "grad_norm": 0.9296875, "learning_rate": 0.00013235514263241807, "loss": 1.032, "step": 26444 }, { "epoch": 0.6790334461525129, "grad_norm": 0.828125, "learning_rate": 0.00013235091852528076, "loss": 0.8024, "step": 26445 }, { "epoch": 0.6790591233484348, "grad_norm": 0.76171875, "learning_rate": 0.000132346694353671, "loss": 0.9076, "step": 26446 }, { "epoch": 0.6790848005443566, "grad_norm": 0.80859375, "learning_rate": 0.00013234247011759722, "loss": 0.9643, "step": 26447 }, { "epoch": 0.6791104777402783, "grad_norm": 0.80078125, "learning_rate": 0.00013233824581706778, "loss": 0.8361, "step": 26448 }, { "epoch": 0.6791361549362002, "grad_norm": 0.73828125, "learning_rate": 0.00013233402145209114, "loss": 0.8154, "step": 26449 }, { "epoch": 0.679161832132122, "grad_norm": 0.74609375, "learning_rate": 0.00013232979702267574, "loss": 0.7559, "step": 26450 }, { "epoch": 0.6791875093280438, "grad_norm": 0.78515625, "learning_rate": 0.00013232557252882993, "loss": 0.928, "step": 26451 }, { "epoch": 0.6792131865239657, "grad_norm": 0.72265625, "learning_rate": 0.00013232134797056225, "loss": 0.8635, "step": 26452 }, { "epoch": 0.6792388637198875, "grad_norm": 0.8125, "learning_rate": 0.00013231712334788097, "loss": 0.8475, "step": 26453 }, { "epoch": 0.6792645409158092, "grad_norm": 0.78125, "learning_rate": 0.00013231289866079464, "loss": 0.9353, "step": 26454 }, { "epoch": 0.6792902181117311, "grad_norm": 0.77734375, "learning_rate": 0.00013230867390931158, "loss": 0.9021, "step": 26455 }, { "epoch": 0.6793158953076529, "grad_norm": 0.828125, "learning_rate": 0.00013230444909344025, "loss": 0.9222, "step": 26456 }, { "epoch": 0.6793415725035747, "grad_norm": 0.76171875, "learning_rate": 0.0001323002242131891, "loss": 0.7592, "step": 26457 }, { "epoch": 0.6793672496994966, "grad_norm": 0.875, "learning_rate": 0.00013229599926856654, "loss": 0.8372, "step": 26458 }, { "epoch": 0.6793929268954184, "grad_norm": 0.75390625, "learning_rate": 0.0001322917742595809, "loss": 0.6468, "step": 26459 }, { "epoch": 0.6794186040913403, "grad_norm": 0.8359375, "learning_rate": 0.00013228754918624073, "loss": 0.9766, "step": 26460 }, { "epoch": 0.679444281287262, "grad_norm": 0.73828125, "learning_rate": 0.00013228332404855437, "loss": 0.7536, "step": 26461 }, { "epoch": 0.6794699584831838, "grad_norm": 0.76953125, "learning_rate": 0.00013227909884653026, "loss": 0.769, "step": 26462 }, { "epoch": 0.6794956356791056, "grad_norm": 0.9921875, "learning_rate": 0.00013227487358017683, "loss": 0.7995, "step": 26463 }, { "epoch": 0.6795213128750275, "grad_norm": 0.8125, "learning_rate": 0.00013227064824950245, "loss": 0.9038, "step": 26464 }, { "epoch": 0.6795469900709493, "grad_norm": 0.73046875, "learning_rate": 0.00013226642285451564, "loss": 0.8772, "step": 26465 }, { "epoch": 0.6795726672668712, "grad_norm": 0.796875, "learning_rate": 0.00013226219739522475, "loss": 0.8252, "step": 26466 }, { "epoch": 0.679598344462793, "grad_norm": 0.77734375, "learning_rate": 0.00013225797187163822, "loss": 0.7834, "step": 26467 }, { "epoch": 0.6796240216587147, "grad_norm": 0.84375, "learning_rate": 0.00013225374628376445, "loss": 0.8435, "step": 26468 }, { "epoch": 0.6796496988546366, "grad_norm": 0.8828125, "learning_rate": 0.00013224952063161188, "loss": 0.8045, "step": 26469 }, { "epoch": 0.6796753760505584, "grad_norm": 0.78125, "learning_rate": 0.00013224529491518891, "loss": 0.8489, "step": 26470 }, { "epoch": 0.6797010532464802, "grad_norm": 0.76171875, "learning_rate": 0.000132241069134504, "loss": 0.7246, "step": 26471 }, { "epoch": 0.6797267304424021, "grad_norm": 0.7578125, "learning_rate": 0.00013223684328956555, "loss": 0.8116, "step": 26472 }, { "epoch": 0.6797524076383239, "grad_norm": 0.84375, "learning_rate": 0.000132232617380382, "loss": 0.931, "step": 26473 }, { "epoch": 0.6797780848342456, "grad_norm": 0.88671875, "learning_rate": 0.00013222839140696174, "loss": 0.8288, "step": 26474 }, { "epoch": 0.6798037620301675, "grad_norm": 0.875, "learning_rate": 0.0001322241653693132, "loss": 0.9639, "step": 26475 }, { "epoch": 0.6798294392260893, "grad_norm": 0.84375, "learning_rate": 0.00013221993926744482, "loss": 0.8692, "step": 26476 }, { "epoch": 0.6798551164220111, "grad_norm": 0.7890625, "learning_rate": 0.000132215713101365, "loss": 0.9845, "step": 26477 }, { "epoch": 0.679880793617933, "grad_norm": 0.73046875, "learning_rate": 0.00013221148687108219, "loss": 0.8727, "step": 26478 }, { "epoch": 0.6799064708138548, "grad_norm": 0.79296875, "learning_rate": 0.0001322072605766048, "loss": 0.9201, "step": 26479 }, { "epoch": 0.6799321480097766, "grad_norm": 0.78515625, "learning_rate": 0.00013220303421794123, "loss": 0.9149, "step": 26480 }, { "epoch": 0.6799578252056984, "grad_norm": 0.80859375, "learning_rate": 0.00013219880779509992, "loss": 0.7766, "step": 26481 }, { "epoch": 0.6799835024016202, "grad_norm": 0.80078125, "learning_rate": 0.0001321945813080893, "loss": 0.9169, "step": 26482 }, { "epoch": 0.680009179597542, "grad_norm": 0.6875, "learning_rate": 0.0001321903547569178, "loss": 0.672, "step": 26483 }, { "epoch": 0.6800348567934639, "grad_norm": 0.8125, "learning_rate": 0.0001321861281415938, "loss": 0.8868, "step": 26484 }, { "epoch": 0.6800605339893857, "grad_norm": 0.79296875, "learning_rate": 0.0001321819014621258, "loss": 0.8782, "step": 26485 }, { "epoch": 0.6800862111853075, "grad_norm": 0.76171875, "learning_rate": 0.00013217767471852216, "loss": 0.7666, "step": 26486 }, { "epoch": 0.6801118883812294, "grad_norm": 0.80078125, "learning_rate": 0.00013217344791079133, "loss": 0.8865, "step": 26487 }, { "epoch": 0.6801375655771511, "grad_norm": 0.89453125, "learning_rate": 0.00013216922103894167, "loss": 0.957, "step": 26488 }, { "epoch": 0.6801632427730729, "grad_norm": 0.75, "learning_rate": 0.0001321649941029817, "loss": 0.899, "step": 26489 }, { "epoch": 0.6801889199689948, "grad_norm": 0.73046875, "learning_rate": 0.0001321607671029198, "loss": 0.7215, "step": 26490 }, { "epoch": 0.6802145971649166, "grad_norm": 0.88671875, "learning_rate": 0.00013215654003876436, "loss": 0.7672, "step": 26491 }, { "epoch": 0.6802402743608384, "grad_norm": 0.8046875, "learning_rate": 0.00013215231291052393, "loss": 0.8127, "step": 26492 }, { "epoch": 0.6802659515567603, "grad_norm": 0.9140625, "learning_rate": 0.00013214808571820677, "loss": 1.1086, "step": 26493 }, { "epoch": 0.680291628752682, "grad_norm": 0.76171875, "learning_rate": 0.0001321438584618214, "loss": 0.8897, "step": 26494 }, { "epoch": 0.6803173059486038, "grad_norm": 0.76953125, "learning_rate": 0.00013213963114137623, "loss": 0.7848, "step": 26495 }, { "epoch": 0.6803429831445257, "grad_norm": 0.7890625, "learning_rate": 0.00013213540375687965, "loss": 0.8391, "step": 26496 }, { "epoch": 0.6803686603404475, "grad_norm": 0.71875, "learning_rate": 0.00013213117630834012, "loss": 0.7138, "step": 26497 }, { "epoch": 0.6803943375363694, "grad_norm": 0.8203125, "learning_rate": 0.0001321269487957661, "loss": 0.8476, "step": 26498 }, { "epoch": 0.6804200147322912, "grad_norm": 0.7421875, "learning_rate": 0.00013212272121916594, "loss": 0.8102, "step": 26499 }, { "epoch": 0.680445691928213, "grad_norm": 0.76171875, "learning_rate": 0.00013211849357854812, "loss": 0.7082, "step": 26500 }, { "epoch": 0.6804713691241348, "grad_norm": 0.7421875, "learning_rate": 0.00013211426587392099, "loss": 0.6846, "step": 26501 }, { "epoch": 0.6804970463200566, "grad_norm": 0.765625, "learning_rate": 0.00013211003810529307, "loss": 0.8405, "step": 26502 }, { "epoch": 0.6805227235159784, "grad_norm": 0.75390625, "learning_rate": 0.00013210581027267275, "loss": 0.7175, "step": 26503 }, { "epoch": 0.6805484007119003, "grad_norm": 0.8046875, "learning_rate": 0.00013210158237606843, "loss": 0.945, "step": 26504 }, { "epoch": 0.6805740779078221, "grad_norm": 0.77734375, "learning_rate": 0.00013209735441548857, "loss": 0.8477, "step": 26505 }, { "epoch": 0.6805997551037439, "grad_norm": 0.80078125, "learning_rate": 0.0001320931263909416, "loss": 0.8236, "step": 26506 }, { "epoch": 0.6806254322996658, "grad_norm": 0.7734375, "learning_rate": 0.0001320888983024359, "loss": 0.7678, "step": 26507 }, { "epoch": 0.6806511094955875, "grad_norm": 0.828125, "learning_rate": 0.00013208467014997992, "loss": 0.9368, "step": 26508 }, { "epoch": 0.6806767866915093, "grad_norm": 0.78125, "learning_rate": 0.0001320804419335821, "loss": 0.7824, "step": 26509 }, { "epoch": 0.6807024638874312, "grad_norm": 0.8046875, "learning_rate": 0.00013207621365325084, "loss": 0.8165, "step": 26510 }, { "epoch": 0.680728141083353, "grad_norm": 0.75390625, "learning_rate": 0.00013207198530899462, "loss": 0.7544, "step": 26511 }, { "epoch": 0.6807538182792748, "grad_norm": 0.82421875, "learning_rate": 0.00013206775690082184, "loss": 0.7286, "step": 26512 }, { "epoch": 0.6807794954751967, "grad_norm": 0.78515625, "learning_rate": 0.00013206352842874087, "loss": 0.8101, "step": 26513 }, { "epoch": 0.6808051726711184, "grad_norm": 0.73828125, "learning_rate": 0.0001320592998927602, "loss": 0.7763, "step": 26514 }, { "epoch": 0.6808308498670402, "grad_norm": 0.80078125, "learning_rate": 0.00013205507129288824, "loss": 0.7776, "step": 26515 }, { "epoch": 0.6808565270629621, "grad_norm": 0.73046875, "learning_rate": 0.00013205084262913342, "loss": 0.7959, "step": 26516 }, { "epoch": 0.6808822042588839, "grad_norm": 0.76953125, "learning_rate": 0.00013204661390150415, "loss": 0.8107, "step": 26517 }, { "epoch": 0.6809078814548057, "grad_norm": 0.765625, "learning_rate": 0.0001320423851100089, "loss": 0.8549, "step": 26518 }, { "epoch": 0.6809335586507276, "grad_norm": 0.7265625, "learning_rate": 0.00013203815625465606, "loss": 0.796, "step": 26519 }, { "epoch": 0.6809592358466494, "grad_norm": 0.69140625, "learning_rate": 0.00013203392733545407, "loss": 0.6583, "step": 26520 }, { "epoch": 0.6809849130425711, "grad_norm": 0.7578125, "learning_rate": 0.00013202969835241136, "loss": 0.7927, "step": 26521 }, { "epoch": 0.681010590238493, "grad_norm": 0.734375, "learning_rate": 0.00013202546930553635, "loss": 0.7441, "step": 26522 }, { "epoch": 0.6810362674344148, "grad_norm": 0.80078125, "learning_rate": 0.00013202124019483748, "loss": 0.8918, "step": 26523 }, { "epoch": 0.6810619446303366, "grad_norm": 0.76953125, "learning_rate": 0.00013201701102032317, "loss": 0.9145, "step": 26524 }, { "epoch": 0.6810876218262585, "grad_norm": 0.77734375, "learning_rate": 0.00013201278178200186, "loss": 0.9161, "step": 26525 }, { "epoch": 0.6811132990221803, "grad_norm": 0.88671875, "learning_rate": 0.00013200855247988194, "loss": 0.8058, "step": 26526 }, { "epoch": 0.6811389762181022, "grad_norm": 0.82421875, "learning_rate": 0.00013200432311397188, "loss": 0.8467, "step": 26527 }, { "epoch": 0.6811646534140239, "grad_norm": 0.7890625, "learning_rate": 0.00013200009368428007, "loss": 0.8118, "step": 26528 }, { "epoch": 0.6811903306099457, "grad_norm": 0.8046875, "learning_rate": 0.000131995864190815, "loss": 0.9785, "step": 26529 }, { "epoch": 0.6812160078058676, "grad_norm": 0.7578125, "learning_rate": 0.00013199163463358505, "loss": 0.7786, "step": 26530 }, { "epoch": 0.6812416850017894, "grad_norm": 0.82421875, "learning_rate": 0.00013198740501259865, "loss": 0.7613, "step": 26531 }, { "epoch": 0.6812673621977112, "grad_norm": 0.73828125, "learning_rate": 0.00013198317532786427, "loss": 0.8499, "step": 26532 }, { "epoch": 0.6812930393936331, "grad_norm": 0.87890625, "learning_rate": 0.0001319789455793903, "loss": 0.9403, "step": 26533 }, { "epoch": 0.6813187165895548, "grad_norm": 0.8203125, "learning_rate": 0.00013197471576718517, "loss": 0.8673, "step": 26534 }, { "epoch": 0.6813443937854766, "grad_norm": 0.84375, "learning_rate": 0.00013197048589125732, "loss": 0.9986, "step": 26535 }, { "epoch": 0.6813700709813985, "grad_norm": 0.7109375, "learning_rate": 0.00013196625595161517, "loss": 0.8294, "step": 26536 }, { "epoch": 0.6813957481773203, "grad_norm": 0.703125, "learning_rate": 0.0001319620259482672, "loss": 0.7357, "step": 26537 }, { "epoch": 0.6814214253732421, "grad_norm": 0.890625, "learning_rate": 0.00013195779588122178, "loss": 0.9094, "step": 26538 }, { "epoch": 0.681447102569164, "grad_norm": 0.82421875, "learning_rate": 0.00013195356575048734, "loss": 0.7872, "step": 26539 }, { "epoch": 0.6814727797650858, "grad_norm": 0.84375, "learning_rate": 0.00013194933555607234, "loss": 0.7227, "step": 26540 }, { "epoch": 0.6814984569610075, "grad_norm": 0.78125, "learning_rate": 0.00013194510529798521, "loss": 0.7131, "step": 26541 }, { "epoch": 0.6815241341569294, "grad_norm": 0.72265625, "learning_rate": 0.00013194087497623436, "loss": 0.8768, "step": 26542 }, { "epoch": 0.6815498113528512, "grad_norm": 0.77734375, "learning_rate": 0.00013193664459082823, "loss": 0.9291, "step": 26543 }, { "epoch": 0.681575488548773, "grad_norm": 0.77734375, "learning_rate": 0.00013193241414177523, "loss": 0.9402, "step": 26544 }, { "epoch": 0.6816011657446949, "grad_norm": 0.79296875, "learning_rate": 0.00013192818362908388, "loss": 0.8536, "step": 26545 }, { "epoch": 0.6816268429406167, "grad_norm": 0.80859375, "learning_rate": 0.00013192395305276249, "loss": 0.843, "step": 26546 }, { "epoch": 0.6816525201365385, "grad_norm": 0.83203125, "learning_rate": 0.00013191972241281957, "loss": 0.8622, "step": 26547 }, { "epoch": 0.6816781973324603, "grad_norm": 0.8125, "learning_rate": 0.0001319154917092635, "loss": 0.8977, "step": 26548 }, { "epoch": 0.6817038745283821, "grad_norm": 0.91015625, "learning_rate": 0.00013191126094210274, "loss": 0.8523, "step": 26549 }, { "epoch": 0.6817295517243039, "grad_norm": 0.9453125, "learning_rate": 0.00013190703011134572, "loss": 0.9071, "step": 26550 }, { "epoch": 0.6817552289202258, "grad_norm": 0.7265625, "learning_rate": 0.00013190279921700091, "loss": 0.9197, "step": 26551 }, { "epoch": 0.6817809061161476, "grad_norm": 0.81640625, "learning_rate": 0.00013189856825907668, "loss": 0.8649, "step": 26552 }, { "epoch": 0.6818065833120694, "grad_norm": 0.77734375, "learning_rate": 0.00013189433723758147, "loss": 0.8129, "step": 26553 }, { "epoch": 0.6818322605079912, "grad_norm": 0.78515625, "learning_rate": 0.00013189010615252374, "loss": 0.9473, "step": 26554 }, { "epoch": 0.681857937703913, "grad_norm": 0.796875, "learning_rate": 0.0001318858750039119, "loss": 0.7475, "step": 26555 }, { "epoch": 0.6818836148998348, "grad_norm": 0.8046875, "learning_rate": 0.0001318816437917544, "loss": 1.0872, "step": 26556 }, { "epoch": 0.6819092920957567, "grad_norm": 0.7734375, "learning_rate": 0.00013187741251605964, "loss": 0.8448, "step": 26557 }, { "epoch": 0.6819349692916785, "grad_norm": 0.79296875, "learning_rate": 0.00013187318117683614, "loss": 0.7932, "step": 26558 }, { "epoch": 0.6819606464876004, "grad_norm": 0.9609375, "learning_rate": 0.00013186894977409224, "loss": 0.9481, "step": 26559 }, { "epoch": 0.6819863236835222, "grad_norm": 0.76171875, "learning_rate": 0.00013186471830783634, "loss": 0.8829, "step": 26560 }, { "epoch": 0.6820120008794439, "grad_norm": 0.73828125, "learning_rate": 0.000131860486778077, "loss": 0.758, "step": 26561 }, { "epoch": 0.6820376780753657, "grad_norm": 0.81640625, "learning_rate": 0.0001318562551848226, "loss": 0.7714, "step": 26562 }, { "epoch": 0.6820633552712876, "grad_norm": 0.8046875, "learning_rate": 0.0001318520235280815, "loss": 0.948, "step": 26563 }, { "epoch": 0.6820890324672094, "grad_norm": 0.78125, "learning_rate": 0.00013184779180786224, "loss": 0.8053, "step": 26564 }, { "epoch": 0.6821147096631313, "grad_norm": 0.81640625, "learning_rate": 0.0001318435600241732, "loss": 0.8987, "step": 26565 }, { "epoch": 0.6821403868590531, "grad_norm": 0.7265625, "learning_rate": 0.0001318393281770228, "loss": 0.716, "step": 26566 }, { "epoch": 0.6821660640549749, "grad_norm": 0.7734375, "learning_rate": 0.00013183509626641952, "loss": 0.9491, "step": 26567 }, { "epoch": 0.6821917412508967, "grad_norm": 0.74609375, "learning_rate": 0.00013183086429237174, "loss": 0.8562, "step": 26568 }, { "epoch": 0.6822174184468185, "grad_norm": 0.7890625, "learning_rate": 0.00013182663225488797, "loss": 0.8369, "step": 26569 }, { "epoch": 0.6822430956427403, "grad_norm": 0.73046875, "learning_rate": 0.00013182240015397655, "loss": 0.7419, "step": 26570 }, { "epoch": 0.6822687728386622, "grad_norm": 0.71875, "learning_rate": 0.000131818167989646, "loss": 0.8221, "step": 26571 }, { "epoch": 0.682294450034584, "grad_norm": 0.76953125, "learning_rate": 0.0001318139357619047, "loss": 0.7978, "step": 26572 }, { "epoch": 0.6823201272305058, "grad_norm": 0.76171875, "learning_rate": 0.0001318097034707611, "loss": 0.7413, "step": 26573 }, { "epoch": 0.6823458044264276, "grad_norm": 0.79296875, "learning_rate": 0.00013180547111622362, "loss": 0.9105, "step": 26574 }, { "epoch": 0.6823714816223494, "grad_norm": 0.7109375, "learning_rate": 0.00013180123869830073, "loss": 0.8445, "step": 26575 }, { "epoch": 0.6823971588182712, "grad_norm": 0.89453125, "learning_rate": 0.00013179700621700083, "loss": 0.8169, "step": 26576 }, { "epoch": 0.6824228360141931, "grad_norm": 0.80078125, "learning_rate": 0.00013179277367233235, "loss": 0.8167, "step": 26577 }, { "epoch": 0.6824485132101149, "grad_norm": 0.7890625, "learning_rate": 0.00013178854106430377, "loss": 0.7722, "step": 26578 }, { "epoch": 0.6824741904060367, "grad_norm": 0.828125, "learning_rate": 0.0001317843083929235, "loss": 0.9609, "step": 26579 }, { "epoch": 0.6824998676019586, "grad_norm": 0.76953125, "learning_rate": 0.0001317800756582, "loss": 0.9489, "step": 26580 }, { "epoch": 0.6825255447978803, "grad_norm": 0.83984375, "learning_rate": 0.00013177584286014162, "loss": 0.9862, "step": 26581 }, { "epoch": 0.6825512219938021, "grad_norm": 0.7109375, "learning_rate": 0.0001317716099987569, "loss": 0.7465, "step": 26582 }, { "epoch": 0.682576899189724, "grad_norm": 0.80078125, "learning_rate": 0.0001317673770740542, "loss": 0.7929, "step": 26583 }, { "epoch": 0.6826025763856458, "grad_norm": 0.8828125, "learning_rate": 0.000131763144086042, "loss": 0.7724, "step": 26584 }, { "epoch": 0.6826282535815676, "grad_norm": 0.71875, "learning_rate": 0.00013175891103472872, "loss": 0.7361, "step": 26585 }, { "epoch": 0.6826539307774895, "grad_norm": 0.796875, "learning_rate": 0.00013175467792012283, "loss": 0.8296, "step": 26586 }, { "epoch": 0.6826796079734113, "grad_norm": 0.703125, "learning_rate": 0.00013175044474223268, "loss": 0.8802, "step": 26587 }, { "epoch": 0.682705285169333, "grad_norm": 0.78515625, "learning_rate": 0.00013174621150106682, "loss": 0.8134, "step": 26588 }, { "epoch": 0.6827309623652549, "grad_norm": 0.8359375, "learning_rate": 0.00013174197819663356, "loss": 0.8591, "step": 26589 }, { "epoch": 0.6827566395611767, "grad_norm": 0.8203125, "learning_rate": 0.00013173774482894146, "loss": 0.8457, "step": 26590 }, { "epoch": 0.6827823167570986, "grad_norm": 0.75390625, "learning_rate": 0.0001317335113979989, "loss": 0.8545, "step": 26591 }, { "epoch": 0.6828079939530204, "grad_norm": 0.8203125, "learning_rate": 0.0001317292779038143, "loss": 0.8631, "step": 26592 }, { "epoch": 0.6828336711489422, "grad_norm": 0.79296875, "learning_rate": 0.00013172504434639612, "loss": 0.8374, "step": 26593 }, { "epoch": 0.682859348344864, "grad_norm": 0.75390625, "learning_rate": 0.00013172081072575277, "loss": 0.7184, "step": 26594 }, { "epoch": 0.6828850255407858, "grad_norm": 0.796875, "learning_rate": 0.00013171657704189274, "loss": 0.7093, "step": 26595 }, { "epoch": 0.6829107027367076, "grad_norm": 0.8046875, "learning_rate": 0.00013171234329482445, "loss": 0.7977, "step": 26596 }, { "epoch": 0.6829363799326295, "grad_norm": 0.765625, "learning_rate": 0.00013170810948455626, "loss": 0.7155, "step": 26597 }, { "epoch": 0.6829620571285513, "grad_norm": 0.80078125, "learning_rate": 0.00013170387561109673, "loss": 0.7622, "step": 26598 }, { "epoch": 0.6829877343244731, "grad_norm": 0.8828125, "learning_rate": 0.00013169964167445425, "loss": 0.8445, "step": 26599 }, { "epoch": 0.683013411520395, "grad_norm": 0.765625, "learning_rate": 0.0001316954076746372, "loss": 0.845, "step": 26600 }, { "epoch": 0.6830390887163167, "grad_norm": 0.9765625, "learning_rate": 0.0001316911736116541, "loss": 0.8214, "step": 26601 }, { "epoch": 0.6830647659122385, "grad_norm": 0.80859375, "learning_rate": 0.0001316869394855133, "loss": 0.8129, "step": 26602 }, { "epoch": 0.6830904431081604, "grad_norm": 0.796875, "learning_rate": 0.00013168270529622338, "loss": 0.8227, "step": 26603 }, { "epoch": 0.6831161203040822, "grad_norm": 0.7109375, "learning_rate": 0.00013167847104379266, "loss": 0.8564, "step": 26604 }, { "epoch": 0.683141797500004, "grad_norm": 0.86328125, "learning_rate": 0.00013167423672822955, "loss": 0.8463, "step": 26605 }, { "epoch": 0.6831674746959259, "grad_norm": 0.75, "learning_rate": 0.00013167000234954262, "loss": 0.8819, "step": 26606 }, { "epoch": 0.6831931518918477, "grad_norm": 0.7265625, "learning_rate": 0.0001316657679077402, "loss": 0.8634, "step": 26607 }, { "epoch": 0.6832188290877694, "grad_norm": 0.9375, "learning_rate": 0.00013166153340283073, "loss": 0.8305, "step": 26608 }, { "epoch": 0.6832445062836913, "grad_norm": 0.86328125, "learning_rate": 0.00013165729883482275, "loss": 0.844, "step": 26609 }, { "epoch": 0.6832701834796131, "grad_norm": 0.859375, "learning_rate": 0.0001316530642037246, "loss": 0.8213, "step": 26610 }, { "epoch": 0.6832958606755349, "grad_norm": 0.8671875, "learning_rate": 0.0001316488295095448, "loss": 0.7456, "step": 26611 }, { "epoch": 0.6833215378714568, "grad_norm": 0.8828125, "learning_rate": 0.0001316445947522917, "loss": 0.9356, "step": 26612 }, { "epoch": 0.6833472150673786, "grad_norm": 0.75390625, "learning_rate": 0.00013164035993197375, "loss": 0.7296, "step": 26613 }, { "epoch": 0.6833728922633003, "grad_norm": 0.76171875, "learning_rate": 0.00013163612504859946, "loss": 0.8054, "step": 26614 }, { "epoch": 0.6833985694592222, "grad_norm": 0.77734375, "learning_rate": 0.00013163189010217721, "loss": 0.8234, "step": 26615 }, { "epoch": 0.683424246655144, "grad_norm": 0.828125, "learning_rate": 0.00013162765509271547, "loss": 0.8836, "step": 26616 }, { "epoch": 0.6834499238510658, "grad_norm": 0.94921875, "learning_rate": 0.0001316234200202227, "loss": 0.8398, "step": 26617 }, { "epoch": 0.6834756010469877, "grad_norm": 0.76171875, "learning_rate": 0.00013161918488470726, "loss": 0.7819, "step": 26618 }, { "epoch": 0.6835012782429095, "grad_norm": 0.77734375, "learning_rate": 0.00013161494968617768, "loss": 0.8147, "step": 26619 }, { "epoch": 0.6835269554388314, "grad_norm": 0.76171875, "learning_rate": 0.00013161071442464235, "loss": 0.8438, "step": 26620 }, { "epoch": 0.6835526326347531, "grad_norm": 1.140625, "learning_rate": 0.0001316064791001097, "loss": 0.8383, "step": 26621 }, { "epoch": 0.6835783098306749, "grad_norm": 0.65625, "learning_rate": 0.0001316022437125882, "loss": 0.7027, "step": 26622 }, { "epoch": 0.6836039870265967, "grad_norm": 0.79296875, "learning_rate": 0.0001315980082620863, "loss": 0.8089, "step": 26623 }, { "epoch": 0.6836296642225186, "grad_norm": 0.81640625, "learning_rate": 0.0001315937727486124, "loss": 0.7193, "step": 26624 }, { "epoch": 0.6836553414184404, "grad_norm": 0.7734375, "learning_rate": 0.000131589537172175, "loss": 0.7554, "step": 26625 }, { "epoch": 0.6836810186143623, "grad_norm": 0.796875, "learning_rate": 0.0001315853015327825, "loss": 0.8394, "step": 26626 }, { "epoch": 0.6837066958102841, "grad_norm": 0.84765625, "learning_rate": 0.0001315810658304433, "loss": 0.9157, "step": 26627 }, { "epoch": 0.6837323730062058, "grad_norm": 0.78125, "learning_rate": 0.00013157683006516594, "loss": 0.833, "step": 26628 }, { "epoch": 0.6837580502021277, "grad_norm": 0.72265625, "learning_rate": 0.0001315725942369588, "loss": 0.7206, "step": 26629 }, { "epoch": 0.6837837273980495, "grad_norm": 0.77734375, "learning_rate": 0.00013156835834583032, "loss": 0.8397, "step": 26630 }, { "epoch": 0.6838094045939713, "grad_norm": 0.734375, "learning_rate": 0.00013156412239178898, "loss": 0.8554, "step": 26631 }, { "epoch": 0.6838350817898932, "grad_norm": 0.796875, "learning_rate": 0.00013155988637484316, "loss": 0.855, "step": 26632 }, { "epoch": 0.683860758985815, "grad_norm": 0.7421875, "learning_rate": 0.00013155565029500136, "loss": 0.8896, "step": 26633 }, { "epoch": 0.6838864361817367, "grad_norm": 0.796875, "learning_rate": 0.00013155141415227198, "loss": 0.8109, "step": 26634 }, { "epoch": 0.6839121133776586, "grad_norm": 0.765625, "learning_rate": 0.0001315471779466635, "loss": 0.8312, "step": 26635 }, { "epoch": 0.6839377905735804, "grad_norm": 0.84375, "learning_rate": 0.00013154294167818433, "loss": 0.9622, "step": 26636 }, { "epoch": 0.6839634677695022, "grad_norm": 0.80078125, "learning_rate": 0.00013153870534684292, "loss": 0.851, "step": 26637 }, { "epoch": 0.6839891449654241, "grad_norm": 0.78125, "learning_rate": 0.00013153446895264773, "loss": 0.9827, "step": 26638 }, { "epoch": 0.6840148221613459, "grad_norm": 0.73046875, "learning_rate": 0.0001315302324956072, "loss": 0.8658, "step": 26639 }, { "epoch": 0.6840404993572677, "grad_norm": 0.79296875, "learning_rate": 0.00013152599597572974, "loss": 0.8773, "step": 26640 }, { "epoch": 0.6840661765531895, "grad_norm": 0.83203125, "learning_rate": 0.00013152175939302384, "loss": 0.9731, "step": 26641 }, { "epoch": 0.6840918537491113, "grad_norm": 0.69921875, "learning_rate": 0.00013151752274749793, "loss": 0.7186, "step": 26642 }, { "epoch": 0.6841175309450331, "grad_norm": 0.8125, "learning_rate": 0.00013151328603916042, "loss": 0.8385, "step": 26643 }, { "epoch": 0.684143208140955, "grad_norm": 0.72265625, "learning_rate": 0.00013150904926801982, "loss": 0.7045, "step": 26644 }, { "epoch": 0.6841688853368768, "grad_norm": 0.8359375, "learning_rate": 0.00013150481243408448, "loss": 0.7759, "step": 26645 }, { "epoch": 0.6841945625327986, "grad_norm": 0.84375, "learning_rate": 0.0001315005755373629, "loss": 0.7838, "step": 26646 }, { "epoch": 0.6842202397287204, "grad_norm": 0.7578125, "learning_rate": 0.00013149633857786354, "loss": 0.8104, "step": 26647 }, { "epoch": 0.6842459169246422, "grad_norm": 0.859375, "learning_rate": 0.0001314921015555948, "loss": 0.8589, "step": 26648 }, { "epoch": 0.684271594120564, "grad_norm": 0.72265625, "learning_rate": 0.00013148786447056518, "loss": 0.9313, "step": 26649 }, { "epoch": 0.6842972713164859, "grad_norm": 0.734375, "learning_rate": 0.00013148362732278308, "loss": 0.8147, "step": 26650 }, { "epoch": 0.6843229485124077, "grad_norm": 0.79296875, "learning_rate": 0.00013147939011225694, "loss": 0.8166, "step": 26651 }, { "epoch": 0.6843486257083295, "grad_norm": 0.8515625, "learning_rate": 0.00013147515283899521, "loss": 0.9459, "step": 26652 }, { "epoch": 0.6843743029042514, "grad_norm": 0.71875, "learning_rate": 0.00013147091550300637, "loss": 0.7479, "step": 26653 }, { "epoch": 0.6843999801001731, "grad_norm": 0.76953125, "learning_rate": 0.0001314666781042988, "loss": 0.8571, "step": 26654 }, { "epoch": 0.684425657296095, "grad_norm": 0.8515625, "learning_rate": 0.000131462440642881, "loss": 0.9417, "step": 26655 }, { "epoch": 0.6844513344920168, "grad_norm": 0.8671875, "learning_rate": 0.00013145820311876144, "loss": 0.8204, "step": 26656 }, { "epoch": 0.6844770116879386, "grad_norm": 0.7578125, "learning_rate": 0.0001314539655319485, "loss": 0.8752, "step": 26657 }, { "epoch": 0.6845026888838605, "grad_norm": 0.84765625, "learning_rate": 0.0001314497278824506, "loss": 0.8753, "step": 26658 }, { "epoch": 0.6845283660797823, "grad_norm": 0.83203125, "learning_rate": 0.00013144549017027628, "loss": 0.9382, "step": 26659 }, { "epoch": 0.6845540432757041, "grad_norm": 0.75390625, "learning_rate": 0.00013144125239543394, "loss": 0.8271, "step": 26660 }, { "epoch": 0.6845797204716259, "grad_norm": 0.796875, "learning_rate": 0.000131437014557932, "loss": 0.8969, "step": 26661 }, { "epoch": 0.6846053976675477, "grad_norm": 0.76171875, "learning_rate": 0.00013143277665777893, "loss": 0.8532, "step": 26662 }, { "epoch": 0.6846310748634695, "grad_norm": 0.74609375, "learning_rate": 0.0001314285386949832, "loss": 0.7375, "step": 26663 }, { "epoch": 0.6846567520593914, "grad_norm": 0.71484375, "learning_rate": 0.00013142430066955324, "loss": 0.7495, "step": 26664 }, { "epoch": 0.6846824292553132, "grad_norm": 0.7421875, "learning_rate": 0.00013142006258149745, "loss": 0.6703, "step": 26665 }, { "epoch": 0.684708106451235, "grad_norm": 0.73828125, "learning_rate": 0.00013141582443082435, "loss": 0.9208, "step": 26666 }, { "epoch": 0.6847337836471568, "grad_norm": 0.80859375, "learning_rate": 0.00013141158621754232, "loss": 0.9397, "step": 26667 }, { "epoch": 0.6847594608430786, "grad_norm": 0.8046875, "learning_rate": 0.00013140734794165986, "loss": 0.7836, "step": 26668 }, { "epoch": 0.6847851380390004, "grad_norm": 0.90625, "learning_rate": 0.00013140310960318535, "loss": 0.8126, "step": 26669 }, { "epoch": 0.6848108152349223, "grad_norm": 0.7890625, "learning_rate": 0.00013139887120212733, "loss": 0.853, "step": 26670 }, { "epoch": 0.6848364924308441, "grad_norm": 0.7578125, "learning_rate": 0.00013139463273849415, "loss": 0.7712, "step": 26671 }, { "epoch": 0.6848621696267659, "grad_norm": 0.80078125, "learning_rate": 0.00013139039421229432, "loss": 0.8154, "step": 26672 }, { "epoch": 0.6848878468226878, "grad_norm": 0.78515625, "learning_rate": 0.0001313861556235363, "loss": 0.7949, "step": 26673 }, { "epoch": 0.6849135240186095, "grad_norm": 0.7421875, "learning_rate": 0.00013138191697222845, "loss": 0.7206, "step": 26674 }, { "epoch": 0.6849392012145313, "grad_norm": 0.73828125, "learning_rate": 0.00013137767825837932, "loss": 0.759, "step": 26675 }, { "epoch": 0.6849648784104532, "grad_norm": 0.8046875, "learning_rate": 0.00013137343948199732, "loss": 0.8588, "step": 26676 }, { "epoch": 0.684990555606375, "grad_norm": 0.82421875, "learning_rate": 0.00013136920064309087, "loss": 0.8185, "step": 26677 }, { "epoch": 0.6850162328022968, "grad_norm": 0.7734375, "learning_rate": 0.00013136496174166843, "loss": 0.8553, "step": 26678 }, { "epoch": 0.6850419099982187, "grad_norm": 0.703125, "learning_rate": 0.00013136072277773845, "loss": 0.7599, "step": 26679 }, { "epoch": 0.6850675871941405, "grad_norm": 0.84375, "learning_rate": 0.0001313564837513094, "loss": 0.8678, "step": 26680 }, { "epoch": 0.6850932643900622, "grad_norm": 0.73046875, "learning_rate": 0.00013135224466238968, "loss": 0.8276, "step": 26681 }, { "epoch": 0.6851189415859841, "grad_norm": 0.78515625, "learning_rate": 0.0001313480055109878, "loss": 0.8819, "step": 26682 }, { "epoch": 0.6851446187819059, "grad_norm": 0.7109375, "learning_rate": 0.00013134376629711218, "loss": 0.8196, "step": 26683 }, { "epoch": 0.6851702959778277, "grad_norm": 0.74609375, "learning_rate": 0.00013133952702077125, "loss": 0.8836, "step": 26684 }, { "epoch": 0.6851959731737496, "grad_norm": 0.77734375, "learning_rate": 0.00013133528768197346, "loss": 0.7223, "step": 26685 }, { "epoch": 0.6852216503696714, "grad_norm": 0.765625, "learning_rate": 0.0001313310482807273, "loss": 0.8283, "step": 26686 }, { "epoch": 0.6852473275655931, "grad_norm": 0.7890625, "learning_rate": 0.00013132680881704117, "loss": 0.8003, "step": 26687 }, { "epoch": 0.685273004761515, "grad_norm": 0.77734375, "learning_rate": 0.00013132256929092355, "loss": 0.8494, "step": 26688 }, { "epoch": 0.6852986819574368, "grad_norm": 0.7578125, "learning_rate": 0.00013131832970238286, "loss": 0.8497, "step": 26689 }, { "epoch": 0.6853243591533587, "grad_norm": 0.73828125, "learning_rate": 0.0001313140900514276, "loss": 0.8335, "step": 26690 }, { "epoch": 0.6853500363492805, "grad_norm": 0.734375, "learning_rate": 0.00013130985033806618, "loss": 0.869, "step": 26691 }, { "epoch": 0.6853757135452023, "grad_norm": 0.7890625, "learning_rate": 0.00013130561056230706, "loss": 0.7838, "step": 26692 }, { "epoch": 0.6854013907411242, "grad_norm": 0.78515625, "learning_rate": 0.00013130137072415866, "loss": 0.9254, "step": 26693 }, { "epoch": 0.6854270679370459, "grad_norm": 0.72265625, "learning_rate": 0.00013129713082362946, "loss": 0.7513, "step": 26694 }, { "epoch": 0.6854527451329677, "grad_norm": 0.87109375, "learning_rate": 0.00013129289086072794, "loss": 0.8283, "step": 26695 }, { "epoch": 0.6854784223288896, "grad_norm": 0.84765625, "learning_rate": 0.0001312886508354625, "loss": 0.6823, "step": 26696 }, { "epoch": 0.6855040995248114, "grad_norm": 0.84765625, "learning_rate": 0.0001312844107478416, "loss": 0.9094, "step": 26697 }, { "epoch": 0.6855297767207332, "grad_norm": 0.7890625, "learning_rate": 0.00013128017059787367, "loss": 0.6803, "step": 26698 }, { "epoch": 0.6855554539166551, "grad_norm": 0.77734375, "learning_rate": 0.0001312759303855672, "loss": 0.8522, "step": 26699 }, { "epoch": 0.6855811311125769, "grad_norm": 0.75390625, "learning_rate": 0.00013127169011093064, "loss": 0.7814, "step": 26700 }, { "epoch": 0.6856068083084986, "grad_norm": 0.78125, "learning_rate": 0.0001312674497739724, "loss": 0.9144, "step": 26701 }, { "epoch": 0.6856324855044205, "grad_norm": 0.78515625, "learning_rate": 0.000131263209374701, "loss": 0.8447, "step": 26702 }, { "epoch": 0.6856581627003423, "grad_norm": 0.80078125, "learning_rate": 0.00013125896891312482, "loss": 0.9791, "step": 26703 }, { "epoch": 0.6856838398962641, "grad_norm": 0.6796875, "learning_rate": 0.00013125472838925234, "loss": 0.8586, "step": 26704 }, { "epoch": 0.685709517092186, "grad_norm": 0.90625, "learning_rate": 0.00013125048780309202, "loss": 0.8, "step": 26705 }, { "epoch": 0.6857351942881078, "grad_norm": 0.76171875, "learning_rate": 0.00013124624715465228, "loss": 0.8624, "step": 26706 }, { "epoch": 0.6857608714840295, "grad_norm": 0.75390625, "learning_rate": 0.0001312420064439416, "loss": 0.8979, "step": 26707 }, { "epoch": 0.6857865486799514, "grad_norm": 0.70703125, "learning_rate": 0.00013123776567096845, "loss": 0.8023, "step": 26708 }, { "epoch": 0.6858122258758732, "grad_norm": 0.74609375, "learning_rate": 0.00013123352483574121, "loss": 0.7861, "step": 26709 }, { "epoch": 0.685837903071795, "grad_norm": 0.77734375, "learning_rate": 0.00013122928393826842, "loss": 0.8613, "step": 26710 }, { "epoch": 0.6858635802677169, "grad_norm": 0.796875, "learning_rate": 0.00013122504297855843, "loss": 0.8087, "step": 26711 }, { "epoch": 0.6858892574636387, "grad_norm": 0.77734375, "learning_rate": 0.0001312208019566198, "loss": 0.6896, "step": 26712 }, { "epoch": 0.6859149346595605, "grad_norm": 0.78515625, "learning_rate": 0.00013121656087246091, "loss": 0.8923, "step": 26713 }, { "epoch": 0.6859406118554823, "grad_norm": 0.83203125, "learning_rate": 0.00013121231972609024, "loss": 0.8425, "step": 26714 }, { "epoch": 0.6859662890514041, "grad_norm": 0.84765625, "learning_rate": 0.00013120807851751625, "loss": 0.905, "step": 26715 }, { "epoch": 0.685991966247326, "grad_norm": 0.99609375, "learning_rate": 0.00013120383724674737, "loss": 0.8624, "step": 26716 }, { "epoch": 0.6860176434432478, "grad_norm": 0.74609375, "learning_rate": 0.00013119959591379208, "loss": 0.8075, "step": 26717 }, { "epoch": 0.6860433206391696, "grad_norm": 0.7578125, "learning_rate": 0.00013119535451865878, "loss": 0.7922, "step": 26718 }, { "epoch": 0.6860689978350915, "grad_norm": 0.76953125, "learning_rate": 0.00013119111306135594, "loss": 0.8075, "step": 26719 }, { "epoch": 0.6860946750310133, "grad_norm": 0.8046875, "learning_rate": 0.00013118687154189206, "loss": 0.9635, "step": 26720 }, { "epoch": 0.686120352226935, "grad_norm": 0.73828125, "learning_rate": 0.00013118262996027557, "loss": 0.9683, "step": 26721 }, { "epoch": 0.6861460294228569, "grad_norm": 0.74609375, "learning_rate": 0.00013117838831651487, "loss": 0.7599, "step": 26722 }, { "epoch": 0.6861717066187787, "grad_norm": 0.73828125, "learning_rate": 0.00013117414661061854, "loss": 0.7276, "step": 26723 }, { "epoch": 0.6861973838147005, "grad_norm": 0.7734375, "learning_rate": 0.0001311699048425949, "loss": 0.7434, "step": 26724 }, { "epoch": 0.6862230610106224, "grad_norm": 0.70703125, "learning_rate": 0.00013116566301245246, "loss": 0.764, "step": 26725 }, { "epoch": 0.6862487382065442, "grad_norm": 0.78515625, "learning_rate": 0.00013116142112019967, "loss": 0.8057, "step": 26726 }, { "epoch": 0.6862744154024659, "grad_norm": 0.75, "learning_rate": 0.00013115717916584497, "loss": 0.8803, "step": 26727 }, { "epoch": 0.6863000925983878, "grad_norm": 0.734375, "learning_rate": 0.00013115293714939682, "loss": 0.7659, "step": 26728 }, { "epoch": 0.6863257697943096, "grad_norm": 0.78125, "learning_rate": 0.00013114869507086372, "loss": 0.7385, "step": 26729 }, { "epoch": 0.6863514469902314, "grad_norm": 0.8125, "learning_rate": 0.00013114445293025407, "loss": 0.9016, "step": 26730 }, { "epoch": 0.6863771241861533, "grad_norm": 0.72265625, "learning_rate": 0.0001311402107275763, "loss": 0.7456, "step": 26731 }, { "epoch": 0.6864028013820751, "grad_norm": 0.7734375, "learning_rate": 0.00013113596846283895, "loss": 0.7849, "step": 26732 }, { "epoch": 0.6864284785779969, "grad_norm": 0.80859375, "learning_rate": 0.0001311317261360504, "loss": 0.8104, "step": 26733 }, { "epoch": 0.6864541557739187, "grad_norm": 0.88671875, "learning_rate": 0.00013112748374721916, "loss": 0.9181, "step": 26734 }, { "epoch": 0.6864798329698405, "grad_norm": 0.72265625, "learning_rate": 0.00013112324129635364, "loss": 0.8595, "step": 26735 }, { "epoch": 0.6865055101657623, "grad_norm": 0.7890625, "learning_rate": 0.00013111899878346232, "loss": 0.7788, "step": 26736 }, { "epoch": 0.6865311873616842, "grad_norm": 0.80859375, "learning_rate": 0.00013111475620855364, "loss": 0.8111, "step": 26737 }, { "epoch": 0.686556864557606, "grad_norm": 0.72265625, "learning_rate": 0.00013111051357163605, "loss": 0.8616, "step": 26738 }, { "epoch": 0.6865825417535278, "grad_norm": 0.796875, "learning_rate": 0.00013110627087271803, "loss": 0.8381, "step": 26739 }, { "epoch": 0.6866082189494497, "grad_norm": 0.7734375, "learning_rate": 0.000131102028111808, "loss": 0.8351, "step": 26740 }, { "epoch": 0.6866338961453714, "grad_norm": 0.796875, "learning_rate": 0.00013109778528891446, "loss": 0.9152, "step": 26741 }, { "epoch": 0.6866595733412932, "grad_norm": 0.81640625, "learning_rate": 0.00013109354240404585, "loss": 0.8425, "step": 26742 }, { "epoch": 0.6866852505372151, "grad_norm": 0.8203125, "learning_rate": 0.0001310892994572106, "loss": 0.8364, "step": 26743 }, { "epoch": 0.6867109277331369, "grad_norm": 0.79296875, "learning_rate": 0.00013108505644841717, "loss": 0.7685, "step": 26744 }, { "epoch": 0.6867366049290587, "grad_norm": 0.77734375, "learning_rate": 0.00013108081337767405, "loss": 0.971, "step": 26745 }, { "epoch": 0.6867622821249806, "grad_norm": 0.828125, "learning_rate": 0.0001310765702449897, "loss": 0.8035, "step": 26746 }, { "epoch": 0.6867879593209023, "grad_norm": 0.79296875, "learning_rate": 0.00013107232705037252, "loss": 0.8641, "step": 26747 }, { "epoch": 0.6868136365168241, "grad_norm": 0.796875, "learning_rate": 0.00013106808379383103, "loss": 0.8245, "step": 26748 }, { "epoch": 0.686839313712746, "grad_norm": 1.0, "learning_rate": 0.00013106384047537363, "loss": 0.8224, "step": 26749 }, { "epoch": 0.6868649909086678, "grad_norm": 0.75390625, "learning_rate": 0.00013105959709500882, "loss": 0.8726, "step": 26750 }, { "epoch": 0.6868906681045897, "grad_norm": 0.859375, "learning_rate": 0.00013105535365274499, "loss": 0.9096, "step": 26751 }, { "epoch": 0.6869163453005115, "grad_norm": 0.8828125, "learning_rate": 0.0001310511101485907, "loss": 0.9887, "step": 26752 }, { "epoch": 0.6869420224964333, "grad_norm": 0.8125, "learning_rate": 0.00013104686658255434, "loss": 0.8489, "step": 26753 }, { "epoch": 0.686967699692355, "grad_norm": 0.78125, "learning_rate": 0.00013104262295464433, "loss": 0.9027, "step": 26754 }, { "epoch": 0.6869933768882769, "grad_norm": 1.0078125, "learning_rate": 0.00013103837926486925, "loss": 0.9083, "step": 26755 }, { "epoch": 0.6870190540841987, "grad_norm": 0.71484375, "learning_rate": 0.00013103413551323744, "loss": 0.8962, "step": 26756 }, { "epoch": 0.6870447312801206, "grad_norm": 0.76171875, "learning_rate": 0.00013102989169975742, "loss": 0.8, "step": 26757 }, { "epoch": 0.6870704084760424, "grad_norm": 0.80078125, "learning_rate": 0.00013102564782443762, "loss": 0.8349, "step": 26758 }, { "epoch": 0.6870960856719642, "grad_norm": 0.8125, "learning_rate": 0.0001310214038872865, "loss": 0.7807, "step": 26759 }, { "epoch": 0.6871217628678861, "grad_norm": 0.75390625, "learning_rate": 0.00013101715988831253, "loss": 0.8322, "step": 26760 }, { "epoch": 0.6871474400638078, "grad_norm": 0.84375, "learning_rate": 0.00013101291582752418, "loss": 0.9378, "step": 26761 }, { "epoch": 0.6871731172597296, "grad_norm": 0.8515625, "learning_rate": 0.00013100867170492988, "loss": 0.9034, "step": 26762 }, { "epoch": 0.6871987944556515, "grad_norm": 0.86328125, "learning_rate": 0.0001310044275205381, "loss": 0.9549, "step": 26763 }, { "epoch": 0.6872244716515733, "grad_norm": 0.75, "learning_rate": 0.00013100018327435726, "loss": 0.8164, "step": 26764 }, { "epoch": 0.6872501488474951, "grad_norm": 0.796875, "learning_rate": 0.00013099593896639592, "loss": 0.7626, "step": 26765 }, { "epoch": 0.687275826043417, "grad_norm": 0.82421875, "learning_rate": 0.00013099169459666242, "loss": 0.9608, "step": 26766 }, { "epoch": 0.6873015032393387, "grad_norm": 0.80859375, "learning_rate": 0.00013098745016516526, "loss": 0.8403, "step": 26767 }, { "epoch": 0.6873271804352605, "grad_norm": 0.7421875, "learning_rate": 0.00013098320567191295, "loss": 0.7987, "step": 26768 }, { "epoch": 0.6873528576311824, "grad_norm": 0.76953125, "learning_rate": 0.00013097896111691394, "loss": 0.8291, "step": 26769 }, { "epoch": 0.6873785348271042, "grad_norm": 0.69140625, "learning_rate": 0.0001309747165001766, "loss": 0.8359, "step": 26770 }, { "epoch": 0.687404212023026, "grad_norm": 0.77734375, "learning_rate": 0.00013097047182170947, "loss": 0.8726, "step": 26771 }, { "epoch": 0.6874298892189479, "grad_norm": 0.8046875, "learning_rate": 0.000130966227081521, "loss": 0.7616, "step": 26772 }, { "epoch": 0.6874555664148697, "grad_norm": 0.80859375, "learning_rate": 0.0001309619822796196, "loss": 0.8936, "step": 26773 }, { "epoch": 0.6874812436107914, "grad_norm": 0.765625, "learning_rate": 0.0001309577374160138, "loss": 0.7312, "step": 26774 }, { "epoch": 0.6875069208067133, "grad_norm": 1.0703125, "learning_rate": 0.000130953492490712, "loss": 0.8168, "step": 26775 }, { "epoch": 0.6875325980026351, "grad_norm": 0.74609375, "learning_rate": 0.0001309492475037227, "loss": 0.7474, "step": 26776 }, { "epoch": 0.687558275198557, "grad_norm": 0.74609375, "learning_rate": 0.00013094500245505438, "loss": 0.8113, "step": 26777 }, { "epoch": 0.6875839523944788, "grad_norm": 0.74609375, "learning_rate": 0.00013094075734471541, "loss": 0.7377, "step": 26778 }, { "epoch": 0.6876096295904006, "grad_norm": 0.7890625, "learning_rate": 0.00013093651217271432, "loss": 0.9297, "step": 26779 }, { "epoch": 0.6876353067863225, "grad_norm": 0.74609375, "learning_rate": 0.0001309322669390596, "loss": 0.8293, "step": 26780 }, { "epoch": 0.6876609839822442, "grad_norm": 0.72265625, "learning_rate": 0.00013092802164375956, "loss": 0.7746, "step": 26781 }, { "epoch": 0.687686661178166, "grad_norm": 0.78125, "learning_rate": 0.00013092377628682288, "loss": 0.7933, "step": 26782 }, { "epoch": 0.6877123383740879, "grad_norm": 0.85546875, "learning_rate": 0.00013091953086825787, "loss": 0.7091, "step": 26783 }, { "epoch": 0.6877380155700097, "grad_norm": 0.77734375, "learning_rate": 0.00013091528538807299, "loss": 0.76, "step": 26784 }, { "epoch": 0.6877636927659315, "grad_norm": 0.80859375, "learning_rate": 0.00013091103984627677, "loss": 0.8538, "step": 26785 }, { "epoch": 0.6877893699618534, "grad_norm": 0.83203125, "learning_rate": 0.00013090679424287763, "loss": 0.875, "step": 26786 }, { "epoch": 0.6878150471577751, "grad_norm": 0.81640625, "learning_rate": 0.00013090254857788403, "loss": 0.8452, "step": 26787 }, { "epoch": 0.6878407243536969, "grad_norm": 0.79296875, "learning_rate": 0.00013089830285130448, "loss": 0.7725, "step": 26788 }, { "epoch": 0.6878664015496188, "grad_norm": 0.82421875, "learning_rate": 0.0001308940570631474, "loss": 0.8668, "step": 26789 }, { "epoch": 0.6878920787455406, "grad_norm": 0.78125, "learning_rate": 0.00013088981121342123, "loss": 0.8852, "step": 26790 }, { "epoch": 0.6879177559414624, "grad_norm": 0.7734375, "learning_rate": 0.00013088556530213444, "loss": 0.7253, "step": 26791 }, { "epoch": 0.6879434331373843, "grad_norm": 0.81640625, "learning_rate": 0.00013088131932929557, "loss": 0.8883, "step": 26792 }, { "epoch": 0.6879691103333061, "grad_norm": 0.83203125, "learning_rate": 0.000130877073294913, "loss": 0.8523, "step": 26793 }, { "epoch": 0.6879947875292278, "grad_norm": 0.8359375, "learning_rate": 0.00013087282719899515, "loss": 1.027, "step": 26794 }, { "epoch": 0.6880204647251497, "grad_norm": 0.76953125, "learning_rate": 0.00013086858104155062, "loss": 0.919, "step": 26795 }, { "epoch": 0.6880461419210715, "grad_norm": 0.7890625, "learning_rate": 0.00013086433482258775, "loss": 0.8703, "step": 26796 }, { "epoch": 0.6880718191169933, "grad_norm": 0.76953125, "learning_rate": 0.00013086008854211508, "loss": 0.861, "step": 26797 }, { "epoch": 0.6880974963129152, "grad_norm": 0.72265625, "learning_rate": 0.00013085584220014101, "loss": 0.7316, "step": 26798 }, { "epoch": 0.688123173508837, "grad_norm": 0.7265625, "learning_rate": 0.00013085159579667407, "loss": 0.8574, "step": 26799 }, { "epoch": 0.6881488507047588, "grad_norm": 0.8125, "learning_rate": 0.00013084734933172264, "loss": 0.8409, "step": 26800 }, { "epoch": 0.6881745279006806, "grad_norm": 0.76953125, "learning_rate": 0.00013084310280529526, "loss": 0.8665, "step": 26801 }, { "epoch": 0.6882002050966024, "grad_norm": 0.73046875, "learning_rate": 0.00013083885621740038, "loss": 0.8155, "step": 26802 }, { "epoch": 0.6882258822925242, "grad_norm": 0.84375, "learning_rate": 0.00013083460956804643, "loss": 0.799, "step": 26803 }, { "epoch": 0.6882515594884461, "grad_norm": 0.80078125, "learning_rate": 0.00013083036285724186, "loss": 0.854, "step": 26804 }, { "epoch": 0.6882772366843679, "grad_norm": 0.796875, "learning_rate": 0.00013082611608499517, "loss": 0.6999, "step": 26805 }, { "epoch": 0.6883029138802897, "grad_norm": 0.85546875, "learning_rate": 0.00013082186925131482, "loss": 0.8933, "step": 26806 }, { "epoch": 0.6883285910762115, "grad_norm": 0.828125, "learning_rate": 0.0001308176223562093, "loss": 0.7693, "step": 26807 }, { "epoch": 0.6883542682721333, "grad_norm": 0.765625, "learning_rate": 0.00013081337539968703, "loss": 0.7167, "step": 26808 }, { "epoch": 0.6883799454680551, "grad_norm": 0.80859375, "learning_rate": 0.00013080912838175647, "loss": 0.907, "step": 26809 }, { "epoch": 0.688405622663977, "grad_norm": 0.83984375, "learning_rate": 0.0001308048813024261, "loss": 0.8408, "step": 26810 }, { "epoch": 0.6884312998598988, "grad_norm": 0.74609375, "learning_rate": 0.00013080063416170439, "loss": 0.7567, "step": 26811 }, { "epoch": 0.6884569770558207, "grad_norm": 0.7265625, "learning_rate": 0.0001307963869595998, "loss": 0.8393, "step": 26812 }, { "epoch": 0.6884826542517425, "grad_norm": 0.796875, "learning_rate": 0.0001307921396961208, "loss": 0.7639, "step": 26813 }, { "epoch": 0.6885083314476642, "grad_norm": 0.78515625, "learning_rate": 0.00013078789237127582, "loss": 0.7936, "step": 26814 }, { "epoch": 0.688534008643586, "grad_norm": 0.828125, "learning_rate": 0.00013078364498507336, "loss": 0.919, "step": 26815 }, { "epoch": 0.6885596858395079, "grad_norm": 0.73046875, "learning_rate": 0.00013077939753752192, "loss": 0.7389, "step": 26816 }, { "epoch": 0.6885853630354297, "grad_norm": 0.8359375, "learning_rate": 0.00013077515002862987, "loss": 0.9264, "step": 26817 }, { "epoch": 0.6886110402313516, "grad_norm": 0.75390625, "learning_rate": 0.00013077090245840572, "loss": 0.7664, "step": 26818 }, { "epoch": 0.6886367174272734, "grad_norm": 0.83203125, "learning_rate": 0.00013076665482685798, "loss": 0.8087, "step": 26819 }, { "epoch": 0.6886623946231952, "grad_norm": 0.75, "learning_rate": 0.00013076240713399506, "loss": 0.8247, "step": 26820 }, { "epoch": 0.688688071819117, "grad_norm": 0.8203125, "learning_rate": 0.00013075815937982544, "loss": 0.9767, "step": 26821 }, { "epoch": 0.6887137490150388, "grad_norm": 0.80859375, "learning_rate": 0.00013075391156435758, "loss": 0.8503, "step": 26822 }, { "epoch": 0.6887394262109606, "grad_norm": 0.7578125, "learning_rate": 0.00013074966368759996, "loss": 0.69, "step": 26823 }, { "epoch": 0.6887651034068825, "grad_norm": 0.84375, "learning_rate": 0.00013074541574956106, "loss": 0.7128, "step": 26824 }, { "epoch": 0.6887907806028043, "grad_norm": 0.76171875, "learning_rate": 0.0001307411677502493, "loss": 0.8301, "step": 26825 }, { "epoch": 0.6888164577987261, "grad_norm": 0.7578125, "learning_rate": 0.00013073691968967314, "loss": 0.833, "step": 26826 }, { "epoch": 0.6888421349946479, "grad_norm": 0.78515625, "learning_rate": 0.0001307326715678411, "loss": 0.7984, "step": 26827 }, { "epoch": 0.6888678121905697, "grad_norm": 0.77734375, "learning_rate": 0.00013072842338476164, "loss": 0.9096, "step": 26828 }, { "epoch": 0.6888934893864915, "grad_norm": 0.79296875, "learning_rate": 0.0001307241751404432, "loss": 0.841, "step": 26829 }, { "epoch": 0.6889191665824134, "grad_norm": 0.76171875, "learning_rate": 0.00013071992683489427, "loss": 0.9461, "step": 26830 }, { "epoch": 0.6889448437783352, "grad_norm": 0.74609375, "learning_rate": 0.00013071567846812326, "loss": 0.7744, "step": 26831 }, { "epoch": 0.688970520974257, "grad_norm": 0.71484375, "learning_rate": 0.00013071143004013872, "loss": 0.8555, "step": 26832 }, { "epoch": 0.6889961981701789, "grad_norm": 1.3125, "learning_rate": 0.00013070718155094903, "loss": 1.0153, "step": 26833 }, { "epoch": 0.6890218753661006, "grad_norm": 0.76953125, "learning_rate": 0.00013070293300056272, "loss": 0.8674, "step": 26834 }, { "epoch": 0.6890475525620224, "grad_norm": 0.71484375, "learning_rate": 0.00013069868438898827, "loss": 0.8737, "step": 26835 }, { "epoch": 0.6890732297579443, "grad_norm": 0.703125, "learning_rate": 0.00013069443571623405, "loss": 0.8302, "step": 26836 }, { "epoch": 0.6890989069538661, "grad_norm": 0.82421875, "learning_rate": 0.00013069018698230863, "loss": 0.8477, "step": 26837 }, { "epoch": 0.6891245841497879, "grad_norm": 0.80859375, "learning_rate": 0.00013068593818722047, "loss": 0.8388, "step": 26838 }, { "epoch": 0.6891502613457098, "grad_norm": 0.734375, "learning_rate": 0.00013068168933097796, "loss": 0.7828, "step": 26839 }, { "epoch": 0.6891759385416316, "grad_norm": 0.75390625, "learning_rate": 0.00013067744041358963, "loss": 0.7653, "step": 26840 }, { "epoch": 0.6892016157375533, "grad_norm": 0.8671875, "learning_rate": 0.00013067319143506391, "loss": 0.8623, "step": 26841 }, { "epoch": 0.6892272929334752, "grad_norm": 0.84375, "learning_rate": 0.0001306689423954093, "loss": 0.7802, "step": 26842 }, { "epoch": 0.689252970129397, "grad_norm": 0.78125, "learning_rate": 0.0001306646932946343, "loss": 0.7063, "step": 26843 }, { "epoch": 0.6892786473253188, "grad_norm": 0.79296875, "learning_rate": 0.0001306604441327473, "loss": 0.8229, "step": 26844 }, { "epoch": 0.6893043245212407, "grad_norm": 0.7890625, "learning_rate": 0.0001306561949097568, "loss": 0.8729, "step": 26845 }, { "epoch": 0.6893300017171625, "grad_norm": 0.796875, "learning_rate": 0.0001306519456256713, "loss": 0.9071, "step": 26846 }, { "epoch": 0.6893556789130842, "grad_norm": 0.72265625, "learning_rate": 0.0001306476962804992, "loss": 0.8865, "step": 26847 }, { "epoch": 0.6893813561090061, "grad_norm": 0.7890625, "learning_rate": 0.00013064344687424906, "loss": 0.7769, "step": 26848 }, { "epoch": 0.6894070333049279, "grad_norm": 0.77734375, "learning_rate": 0.00013063919740692928, "loss": 0.8785, "step": 26849 }, { "epoch": 0.6894327105008498, "grad_norm": 0.92578125, "learning_rate": 0.00013063494787854833, "loss": 0.937, "step": 26850 }, { "epoch": 0.6894583876967716, "grad_norm": 0.8203125, "learning_rate": 0.0001306306982891147, "loss": 0.9039, "step": 26851 }, { "epoch": 0.6894840648926934, "grad_norm": 0.76171875, "learning_rate": 0.00013062644863863685, "loss": 0.7444, "step": 26852 }, { "epoch": 0.6895097420886153, "grad_norm": 0.7109375, "learning_rate": 0.00013062219892712325, "loss": 0.7462, "step": 26853 }, { "epoch": 0.689535419284537, "grad_norm": 0.7578125, "learning_rate": 0.00013061794915458243, "loss": 0.8565, "step": 26854 }, { "epoch": 0.6895610964804588, "grad_norm": 1.0078125, "learning_rate": 0.00013061369932102274, "loss": 0.8604, "step": 26855 }, { "epoch": 0.6895867736763807, "grad_norm": 0.8359375, "learning_rate": 0.00013060944942645274, "loss": 0.7634, "step": 26856 }, { "epoch": 0.6896124508723025, "grad_norm": 0.8671875, "learning_rate": 0.00013060519947088087, "loss": 0.8992, "step": 26857 }, { "epoch": 0.6896381280682243, "grad_norm": 0.82421875, "learning_rate": 0.00013060094945431557, "loss": 0.8791, "step": 26858 }, { "epoch": 0.6896638052641462, "grad_norm": 0.86328125, "learning_rate": 0.0001305966993767654, "loss": 0.8493, "step": 26859 }, { "epoch": 0.6896894824600679, "grad_norm": 0.76953125, "learning_rate": 0.00013059244923823874, "loss": 0.7878, "step": 26860 }, { "epoch": 0.6897151596559897, "grad_norm": 0.74609375, "learning_rate": 0.0001305881990387441, "loss": 0.8174, "step": 26861 }, { "epoch": 0.6897408368519116, "grad_norm": 0.734375, "learning_rate": 0.00013058394877828993, "loss": 0.9012, "step": 26862 }, { "epoch": 0.6897665140478334, "grad_norm": 0.86328125, "learning_rate": 0.00013057969845688472, "loss": 0.9825, "step": 26863 }, { "epoch": 0.6897921912437552, "grad_norm": 0.78125, "learning_rate": 0.0001305754480745369, "loss": 0.818, "step": 26864 }, { "epoch": 0.6898178684396771, "grad_norm": 0.89453125, "learning_rate": 0.00013057119763125504, "loss": 0.8783, "step": 26865 }, { "epoch": 0.6898435456355989, "grad_norm": 0.76953125, "learning_rate": 0.0001305669471270475, "loss": 0.7901, "step": 26866 }, { "epoch": 0.6898692228315206, "grad_norm": 0.9140625, "learning_rate": 0.00013056269656192283, "loss": 0.8687, "step": 26867 }, { "epoch": 0.6898949000274425, "grad_norm": 0.80859375, "learning_rate": 0.00013055844593588942, "loss": 0.9122, "step": 26868 }, { "epoch": 0.6899205772233643, "grad_norm": 0.76171875, "learning_rate": 0.00013055419524895582, "loss": 0.731, "step": 26869 }, { "epoch": 0.6899462544192861, "grad_norm": 0.76171875, "learning_rate": 0.00013054994450113048, "loss": 0.8333, "step": 26870 }, { "epoch": 0.689971931615208, "grad_norm": 0.78515625, "learning_rate": 0.00013054569369242181, "loss": 0.9154, "step": 26871 }, { "epoch": 0.6899976088111298, "grad_norm": 0.76953125, "learning_rate": 0.00013054144282283837, "loss": 0.8735, "step": 26872 }, { "epoch": 0.6900232860070517, "grad_norm": 0.76953125, "learning_rate": 0.0001305371918923886, "loss": 0.847, "step": 26873 }, { "epoch": 0.6900489632029734, "grad_norm": 0.765625, "learning_rate": 0.00013053294090108094, "loss": 0.8093, "step": 26874 }, { "epoch": 0.6900746403988952, "grad_norm": 0.7578125, "learning_rate": 0.0001305286898489239, "loss": 0.8805, "step": 26875 }, { "epoch": 0.690100317594817, "grad_norm": 0.79296875, "learning_rate": 0.00013052443873592593, "loss": 0.7654, "step": 26876 }, { "epoch": 0.6901259947907389, "grad_norm": 0.703125, "learning_rate": 0.0001305201875620955, "loss": 0.7588, "step": 26877 }, { "epoch": 0.6901516719866607, "grad_norm": 0.78515625, "learning_rate": 0.0001305159363274411, "loss": 0.8215, "step": 26878 }, { "epoch": 0.6901773491825826, "grad_norm": 0.76171875, "learning_rate": 0.0001305116850319712, "loss": 0.8391, "step": 26879 }, { "epoch": 0.6902030263785043, "grad_norm": 0.90234375, "learning_rate": 0.00013050743367569432, "loss": 0.8484, "step": 26880 }, { "epoch": 0.6902287035744261, "grad_norm": 0.78125, "learning_rate": 0.0001305031822586188, "loss": 0.9136, "step": 26881 }, { "epoch": 0.690254380770348, "grad_norm": 0.70703125, "learning_rate": 0.00013049893078075325, "loss": 0.8485, "step": 26882 }, { "epoch": 0.6902800579662698, "grad_norm": 0.703125, "learning_rate": 0.00013049467924210604, "loss": 0.7208, "step": 26883 }, { "epoch": 0.6903057351621916, "grad_norm": 0.88671875, "learning_rate": 0.0001304904276426857, "loss": 0.8664, "step": 26884 }, { "epoch": 0.6903314123581135, "grad_norm": 0.8359375, "learning_rate": 0.00013048617598250073, "loss": 0.8053, "step": 26885 }, { "epoch": 0.6903570895540353, "grad_norm": 0.83984375, "learning_rate": 0.00013048192426155954, "loss": 0.8918, "step": 26886 }, { "epoch": 0.690382766749957, "grad_norm": 0.765625, "learning_rate": 0.00013047767247987064, "loss": 0.9044, "step": 26887 }, { "epoch": 0.6904084439458789, "grad_norm": 0.76171875, "learning_rate": 0.00013047342063744245, "loss": 0.8008, "step": 26888 }, { "epoch": 0.6904341211418007, "grad_norm": 0.7578125, "learning_rate": 0.00013046916873428352, "loss": 0.723, "step": 26889 }, { "epoch": 0.6904597983377225, "grad_norm": 0.77734375, "learning_rate": 0.0001304649167704023, "loss": 0.8452, "step": 26890 }, { "epoch": 0.6904854755336444, "grad_norm": 0.7578125, "learning_rate": 0.00013046066474580722, "loss": 0.7076, "step": 26891 }, { "epoch": 0.6905111527295662, "grad_norm": 0.796875, "learning_rate": 0.0001304564126605068, "loss": 0.6931, "step": 26892 }, { "epoch": 0.690536829925488, "grad_norm": 0.8203125, "learning_rate": 0.0001304521605145095, "loss": 1.0514, "step": 26893 }, { "epoch": 0.6905625071214098, "grad_norm": 0.87890625, "learning_rate": 0.00013044790830782382, "loss": 0.9045, "step": 26894 }, { "epoch": 0.6905881843173316, "grad_norm": 0.75, "learning_rate": 0.00013044365604045818, "loss": 0.9138, "step": 26895 }, { "epoch": 0.6906138615132534, "grad_norm": 0.73046875, "learning_rate": 0.00013043940371242107, "loss": 0.8212, "step": 26896 }, { "epoch": 0.6906395387091753, "grad_norm": 0.7890625, "learning_rate": 0.000130435151323721, "loss": 0.891, "step": 26897 }, { "epoch": 0.6906652159050971, "grad_norm": 0.73828125, "learning_rate": 0.00013043089887436642, "loss": 0.7875, "step": 26898 }, { "epoch": 0.6906908931010189, "grad_norm": 0.75390625, "learning_rate": 0.0001304266463643658, "loss": 0.7602, "step": 26899 }, { "epoch": 0.6907165702969407, "grad_norm": 0.84375, "learning_rate": 0.00013042239379372765, "loss": 0.9375, "step": 26900 }, { "epoch": 0.6907422474928625, "grad_norm": 0.7421875, "learning_rate": 0.0001304181411624604, "loss": 0.8082, "step": 26901 }, { "epoch": 0.6907679246887843, "grad_norm": 0.796875, "learning_rate": 0.00013041388847057254, "loss": 0.9276, "step": 26902 }, { "epoch": 0.6907936018847062, "grad_norm": 0.7578125, "learning_rate": 0.00013040963571807253, "loss": 0.9034, "step": 26903 }, { "epoch": 0.690819279080628, "grad_norm": 0.7421875, "learning_rate": 0.00013040538290496888, "loss": 0.9547, "step": 26904 }, { "epoch": 0.6908449562765498, "grad_norm": 1.0390625, "learning_rate": 0.00013040113003127005, "loss": 0.9169, "step": 26905 }, { "epoch": 0.6908706334724717, "grad_norm": 0.83984375, "learning_rate": 0.00013039687709698452, "loss": 0.9069, "step": 26906 }, { "epoch": 0.6908963106683934, "grad_norm": 0.81640625, "learning_rate": 0.00013039262410212077, "loss": 0.9278, "step": 26907 }, { "epoch": 0.6909219878643152, "grad_norm": 0.88671875, "learning_rate": 0.00013038837104668722, "loss": 0.7863, "step": 26908 }, { "epoch": 0.6909476650602371, "grad_norm": 0.8359375, "learning_rate": 0.00013038411793069244, "loss": 0.747, "step": 26909 }, { "epoch": 0.6909733422561589, "grad_norm": 0.76953125, "learning_rate": 0.00013037986475414483, "loss": 0.8906, "step": 26910 }, { "epoch": 0.6909990194520808, "grad_norm": 0.78125, "learning_rate": 0.00013037561151705288, "loss": 0.9618, "step": 26911 }, { "epoch": 0.6910246966480026, "grad_norm": 0.67578125, "learning_rate": 0.0001303713582194251, "loss": 0.8012, "step": 26912 }, { "epoch": 0.6910503738439244, "grad_norm": 0.8359375, "learning_rate": 0.00013036710486126994, "loss": 0.8235, "step": 26913 }, { "epoch": 0.6910760510398462, "grad_norm": 0.7421875, "learning_rate": 0.0001303628514425959, "loss": 0.8683, "step": 26914 }, { "epoch": 0.691101728235768, "grad_norm": 0.7890625, "learning_rate": 0.00013035859796341147, "loss": 0.9761, "step": 26915 }, { "epoch": 0.6911274054316898, "grad_norm": 0.83203125, "learning_rate": 0.000130354344423725, "loss": 0.9681, "step": 26916 }, { "epoch": 0.6911530826276117, "grad_norm": 0.7578125, "learning_rate": 0.00013035009082354513, "loss": 0.7844, "step": 26917 }, { "epoch": 0.6911787598235335, "grad_norm": 0.74609375, "learning_rate": 0.00013034583716288028, "loss": 0.8832, "step": 26918 }, { "epoch": 0.6912044370194553, "grad_norm": 0.828125, "learning_rate": 0.00013034158344173887, "loss": 0.746, "step": 26919 }, { "epoch": 0.6912301142153771, "grad_norm": 0.6953125, "learning_rate": 0.00013033732966012947, "loss": 0.9254, "step": 26920 }, { "epoch": 0.6912557914112989, "grad_norm": 0.796875, "learning_rate": 0.0001303330758180605, "loss": 0.7922, "step": 26921 }, { "epoch": 0.6912814686072207, "grad_norm": 0.8046875, "learning_rate": 0.00013032882191554043, "loss": 0.8753, "step": 26922 }, { "epoch": 0.6913071458031426, "grad_norm": 0.859375, "learning_rate": 0.00013032456795257778, "loss": 0.7433, "step": 26923 }, { "epoch": 0.6913328229990644, "grad_norm": 0.76953125, "learning_rate": 0.000130320313929181, "loss": 0.6986, "step": 26924 }, { "epoch": 0.6913585001949862, "grad_norm": 0.73828125, "learning_rate": 0.00013031605984535857, "loss": 0.8164, "step": 26925 }, { "epoch": 0.6913841773909081, "grad_norm": 0.76953125, "learning_rate": 0.00013031180570111897, "loss": 0.8312, "step": 26926 }, { "epoch": 0.6914098545868298, "grad_norm": 0.8203125, "learning_rate": 0.0001303075514964707, "loss": 0.7112, "step": 26927 }, { "epoch": 0.6914355317827516, "grad_norm": 0.79296875, "learning_rate": 0.00013030329723142222, "loss": 0.8098, "step": 26928 }, { "epoch": 0.6914612089786735, "grad_norm": 0.796875, "learning_rate": 0.00013029904290598194, "loss": 0.9593, "step": 26929 }, { "epoch": 0.6914868861745953, "grad_norm": 0.7578125, "learning_rate": 0.00013029478852015847, "loss": 0.8465, "step": 26930 }, { "epoch": 0.6915125633705171, "grad_norm": 0.7421875, "learning_rate": 0.0001302905340739602, "loss": 0.8389, "step": 26931 }, { "epoch": 0.691538240566439, "grad_norm": 0.79296875, "learning_rate": 0.00013028627956739562, "loss": 0.8486, "step": 26932 }, { "epoch": 0.6915639177623608, "grad_norm": 0.83984375, "learning_rate": 0.00013028202500047327, "loss": 0.8586, "step": 26933 }, { "epoch": 0.6915895949582825, "grad_norm": 0.71484375, "learning_rate": 0.00013027777037320152, "loss": 0.7866, "step": 26934 }, { "epoch": 0.6916152721542044, "grad_norm": 0.7109375, "learning_rate": 0.0001302735156855889, "loss": 0.8147, "step": 26935 }, { "epoch": 0.6916409493501262, "grad_norm": 0.75390625, "learning_rate": 0.00013026926093764397, "loss": 0.7997, "step": 26936 }, { "epoch": 0.691666626546048, "grad_norm": 0.77734375, "learning_rate": 0.0001302650061293751, "loss": 0.8511, "step": 26937 }, { "epoch": 0.6916923037419699, "grad_norm": 0.78125, "learning_rate": 0.0001302607512607908, "loss": 0.8288, "step": 26938 }, { "epoch": 0.6917179809378917, "grad_norm": 0.7421875, "learning_rate": 0.00013025649633189955, "loss": 0.7588, "step": 26939 }, { "epoch": 0.6917436581338134, "grad_norm": 0.76953125, "learning_rate": 0.00013025224134270987, "loss": 0.8416, "step": 26940 }, { "epoch": 0.6917693353297353, "grad_norm": 0.83203125, "learning_rate": 0.00013024798629323017, "loss": 0.9662, "step": 26941 }, { "epoch": 0.6917950125256571, "grad_norm": 0.77734375, "learning_rate": 0.00013024373118346896, "loss": 0.8684, "step": 26942 }, { "epoch": 0.691820689721579, "grad_norm": 0.99609375, "learning_rate": 0.00013023947601343474, "loss": 0.858, "step": 26943 }, { "epoch": 0.6918463669175008, "grad_norm": 0.828125, "learning_rate": 0.000130235220783136, "loss": 1.0202, "step": 26944 }, { "epoch": 0.6918720441134226, "grad_norm": 0.75390625, "learning_rate": 0.00013023096549258115, "loss": 0.7869, "step": 26945 }, { "epoch": 0.6918977213093445, "grad_norm": 0.70703125, "learning_rate": 0.00013022671014177877, "loss": 0.6975, "step": 26946 }, { "epoch": 0.6919233985052662, "grad_norm": 0.7578125, "learning_rate": 0.00013022245473073726, "loss": 0.8294, "step": 26947 }, { "epoch": 0.691949075701188, "grad_norm": 0.76171875, "learning_rate": 0.0001302181992594651, "loss": 0.7974, "step": 26948 }, { "epoch": 0.6919747528971099, "grad_norm": 0.8515625, "learning_rate": 0.00013021394372797083, "loss": 0.922, "step": 26949 }, { "epoch": 0.6920004300930317, "grad_norm": 0.77734375, "learning_rate": 0.0001302096881362629, "loss": 0.8165, "step": 26950 }, { "epoch": 0.6920261072889535, "grad_norm": 0.81640625, "learning_rate": 0.00013020543248434978, "loss": 0.87, "step": 26951 }, { "epoch": 0.6920517844848754, "grad_norm": 0.8984375, "learning_rate": 0.00013020117677223998, "loss": 0.8382, "step": 26952 }, { "epoch": 0.6920774616807972, "grad_norm": 0.81640625, "learning_rate": 0.00013019692099994193, "loss": 0.8478, "step": 26953 }, { "epoch": 0.6921031388767189, "grad_norm": 0.74609375, "learning_rate": 0.0001301926651674642, "loss": 0.812, "step": 26954 }, { "epoch": 0.6921288160726408, "grad_norm": 0.7421875, "learning_rate": 0.00013018840927481518, "loss": 0.7743, "step": 26955 }, { "epoch": 0.6921544932685626, "grad_norm": 0.796875, "learning_rate": 0.00013018415332200335, "loss": 0.9425, "step": 26956 }, { "epoch": 0.6921801704644844, "grad_norm": 0.8125, "learning_rate": 0.00013017989730903726, "loss": 0.8236, "step": 26957 }, { "epoch": 0.6922058476604063, "grad_norm": 0.73046875, "learning_rate": 0.0001301756412359254, "loss": 0.775, "step": 26958 }, { "epoch": 0.6922315248563281, "grad_norm": 0.8125, "learning_rate": 0.00013017138510267615, "loss": 0.8714, "step": 26959 }, { "epoch": 0.6922572020522498, "grad_norm": 0.76953125, "learning_rate": 0.0001301671289092981, "loss": 0.9375, "step": 26960 }, { "epoch": 0.6922828792481717, "grad_norm": 0.78515625, "learning_rate": 0.00013016287265579964, "loss": 0.7737, "step": 26961 }, { "epoch": 0.6923085564440935, "grad_norm": 0.79296875, "learning_rate": 0.00013015861634218935, "loss": 0.777, "step": 26962 }, { "epoch": 0.6923342336400153, "grad_norm": 0.8359375, "learning_rate": 0.00013015435996847566, "loss": 0.8358, "step": 26963 }, { "epoch": 0.6923599108359372, "grad_norm": 0.796875, "learning_rate": 0.00013015010353466702, "loss": 0.8756, "step": 26964 }, { "epoch": 0.692385588031859, "grad_norm": 0.8046875, "learning_rate": 0.00013014584704077196, "loss": 1.0232, "step": 26965 }, { "epoch": 0.6924112652277808, "grad_norm": 0.78125, "learning_rate": 0.00013014159048679896, "loss": 0.8421, "step": 26966 }, { "epoch": 0.6924369424237026, "grad_norm": 0.72265625, "learning_rate": 0.00013013733387275649, "loss": 0.7253, "step": 26967 }, { "epoch": 0.6924626196196244, "grad_norm": 0.86328125, "learning_rate": 0.000130133077198653, "loss": 0.9746, "step": 26968 }, { "epoch": 0.6924882968155462, "grad_norm": 0.7109375, "learning_rate": 0.00013012882046449704, "loss": 0.826, "step": 26969 }, { "epoch": 0.6925139740114681, "grad_norm": 0.828125, "learning_rate": 0.00013012456367029709, "loss": 0.7927, "step": 26970 }, { "epoch": 0.6925396512073899, "grad_norm": 0.73046875, "learning_rate": 0.00013012030681606158, "loss": 0.6965, "step": 26971 }, { "epoch": 0.6925653284033118, "grad_norm": 0.83984375, "learning_rate": 0.00013011604990179903, "loss": 0.9028, "step": 26972 }, { "epoch": 0.6925910055992336, "grad_norm": 0.83984375, "learning_rate": 0.0001301117929275179, "loss": 0.7899, "step": 26973 }, { "epoch": 0.6926166827951553, "grad_norm": 0.7421875, "learning_rate": 0.00013010753589322668, "loss": 0.787, "step": 26974 }, { "epoch": 0.6926423599910772, "grad_norm": 0.78125, "learning_rate": 0.00013010327879893385, "loss": 0.8875, "step": 26975 }, { "epoch": 0.692668037186999, "grad_norm": 0.79296875, "learning_rate": 0.00013009902164464794, "loss": 0.8262, "step": 26976 }, { "epoch": 0.6926937143829208, "grad_norm": 0.6796875, "learning_rate": 0.00013009476443037735, "loss": 0.8111, "step": 26977 }, { "epoch": 0.6927193915788427, "grad_norm": 0.9375, "learning_rate": 0.00013009050715613065, "loss": 0.8512, "step": 26978 }, { "epoch": 0.6927450687747645, "grad_norm": 0.76953125, "learning_rate": 0.00013008624982191632, "loss": 0.7515, "step": 26979 }, { "epoch": 0.6927707459706862, "grad_norm": 0.7890625, "learning_rate": 0.00013008199242774276, "loss": 0.8526, "step": 26980 }, { "epoch": 0.692796423166608, "grad_norm": 0.79296875, "learning_rate": 0.00013007773497361851, "loss": 0.7401, "step": 26981 }, { "epoch": 0.6928221003625299, "grad_norm": 0.73828125, "learning_rate": 0.00013007347745955207, "loss": 0.8983, "step": 26982 }, { "epoch": 0.6928477775584517, "grad_norm": 0.83203125, "learning_rate": 0.00013006921988555187, "loss": 0.7823, "step": 26983 }, { "epoch": 0.6928734547543736, "grad_norm": 0.75390625, "learning_rate": 0.00013006496225162646, "loss": 0.8548, "step": 26984 }, { "epoch": 0.6928991319502954, "grad_norm": 0.85546875, "learning_rate": 0.0001300607045577843, "loss": 0.7555, "step": 26985 }, { "epoch": 0.6929248091462172, "grad_norm": 0.79296875, "learning_rate": 0.00013005644680403388, "loss": 0.9061, "step": 26986 }, { "epoch": 0.692950486342139, "grad_norm": 0.81640625, "learning_rate": 0.00013005218899038367, "loss": 0.7394, "step": 26987 }, { "epoch": 0.6929761635380608, "grad_norm": 0.72265625, "learning_rate": 0.00013004793111684213, "loss": 0.8244, "step": 26988 }, { "epoch": 0.6930018407339826, "grad_norm": 0.859375, "learning_rate": 0.00013004367318341782, "loss": 0.8804, "step": 26989 }, { "epoch": 0.6930275179299045, "grad_norm": 0.828125, "learning_rate": 0.00013003941519011916, "loss": 0.8565, "step": 26990 }, { "epoch": 0.6930531951258263, "grad_norm": 0.80078125, "learning_rate": 0.00013003515713695465, "loss": 0.7662, "step": 26991 }, { "epoch": 0.6930788723217481, "grad_norm": 0.87890625, "learning_rate": 0.00013003089902393283, "loss": 0.8637, "step": 26992 }, { "epoch": 0.69310454951767, "grad_norm": 0.7890625, "learning_rate": 0.0001300266408510621, "loss": 0.8273, "step": 26993 }, { "epoch": 0.6931302267135917, "grad_norm": 0.78515625, "learning_rate": 0.00013002238261835097, "loss": 0.7686, "step": 26994 }, { "epoch": 0.6931559039095135, "grad_norm": 0.68359375, "learning_rate": 0.00013001812432580798, "loss": 0.6451, "step": 26995 }, { "epoch": 0.6931815811054354, "grad_norm": 0.72265625, "learning_rate": 0.00013001386597344156, "loss": 0.853, "step": 26996 }, { "epoch": 0.6932072583013572, "grad_norm": 0.76953125, "learning_rate": 0.00013000960756126025, "loss": 0.8902, "step": 26997 }, { "epoch": 0.693232935497279, "grad_norm": 0.93359375, "learning_rate": 0.00013000534908927248, "loss": 0.82, "step": 26998 }, { "epoch": 0.6932586126932009, "grad_norm": 0.85546875, "learning_rate": 0.00013000109055748675, "loss": 1.0165, "step": 26999 }, { "epoch": 0.6932842898891226, "grad_norm": 0.84765625, "learning_rate": 0.00012999683196591156, "loss": 0.9192, "step": 27000 }, { "epoch": 0.6932842898891226, "eval_loss": 0.8348429799079895, "eval_runtime": 388.4811, "eval_samples_per_second": 25.741, "eval_steps_per_second": 0.806, "step": 27000 }, { "epoch": 0.6933099670850444, "grad_norm": 0.76953125, "learning_rate": 0.0001299925733145554, "loss": 0.8545, "step": 27001 }, { "epoch": 0.6933356442809663, "grad_norm": 0.7421875, "learning_rate": 0.00012998831460342674, "loss": 0.8844, "step": 27002 }, { "epoch": 0.6933613214768881, "grad_norm": 0.796875, "learning_rate": 0.00012998405583253408, "loss": 0.8758, "step": 27003 }, { "epoch": 0.69338699867281, "grad_norm": 0.87109375, "learning_rate": 0.00012997979700188589, "loss": 0.9657, "step": 27004 }, { "epoch": 0.6934126758687318, "grad_norm": 0.86328125, "learning_rate": 0.00012997553811149075, "loss": 0.9708, "step": 27005 }, { "epoch": 0.6934383530646536, "grad_norm": 0.77734375, "learning_rate": 0.00012997127916135697, "loss": 0.7353, "step": 27006 }, { "epoch": 0.6934640302605753, "grad_norm": 0.8046875, "learning_rate": 0.0001299670201514932, "loss": 0.8765, "step": 27007 }, { "epoch": 0.6934897074564972, "grad_norm": 0.7734375, "learning_rate": 0.00012996276108190785, "loss": 0.7747, "step": 27008 }, { "epoch": 0.693515384652419, "grad_norm": 0.77734375, "learning_rate": 0.00012995850195260942, "loss": 0.9435, "step": 27009 }, { "epoch": 0.6935410618483409, "grad_norm": 0.7421875, "learning_rate": 0.0001299542427636064, "loss": 0.9104, "step": 27010 }, { "epoch": 0.6935667390442627, "grad_norm": 0.96875, "learning_rate": 0.0001299499835149073, "loss": 0.82, "step": 27011 }, { "epoch": 0.6935924162401845, "grad_norm": 0.734375, "learning_rate": 0.00012994572420652056, "loss": 0.7521, "step": 27012 }, { "epoch": 0.6936180934361064, "grad_norm": 0.7890625, "learning_rate": 0.0001299414648384547, "loss": 0.8337, "step": 27013 }, { "epoch": 0.6936437706320281, "grad_norm": 0.78515625, "learning_rate": 0.00012993720541071822, "loss": 0.8509, "step": 27014 }, { "epoch": 0.6936694478279499, "grad_norm": 0.77734375, "learning_rate": 0.00012993294592331957, "loss": 0.806, "step": 27015 }, { "epoch": 0.6936951250238718, "grad_norm": 0.6875, "learning_rate": 0.00012992868637626729, "loss": 0.8181, "step": 27016 }, { "epoch": 0.6937208022197936, "grad_norm": 0.71484375, "learning_rate": 0.0001299244267695698, "loss": 0.7755, "step": 27017 }, { "epoch": 0.6937464794157154, "grad_norm": 0.96484375, "learning_rate": 0.00012992016710323568, "loss": 0.8631, "step": 27018 }, { "epoch": 0.6937721566116373, "grad_norm": 0.76171875, "learning_rate": 0.0001299159073772734, "loss": 0.6687, "step": 27019 }, { "epoch": 0.693797833807559, "grad_norm": 0.796875, "learning_rate": 0.00012991164759169134, "loss": 0.9351, "step": 27020 }, { "epoch": 0.6938235110034808, "grad_norm": 0.82421875, "learning_rate": 0.00012990738774649812, "loss": 0.8974, "step": 27021 }, { "epoch": 0.6938491881994027, "grad_norm": 0.76171875, "learning_rate": 0.00012990312784170213, "loss": 0.7767, "step": 27022 }, { "epoch": 0.6938748653953245, "grad_norm": 0.6953125, "learning_rate": 0.00012989886787731194, "loss": 0.7514, "step": 27023 }, { "epoch": 0.6939005425912463, "grad_norm": 0.71875, "learning_rate": 0.000129894607853336, "loss": 0.8103, "step": 27024 }, { "epoch": 0.6939262197871682, "grad_norm": 0.89453125, "learning_rate": 0.00012989034776978282, "loss": 1.0924, "step": 27025 }, { "epoch": 0.69395189698309, "grad_norm": 0.72265625, "learning_rate": 0.00012988608762666085, "loss": 0.7654, "step": 27026 }, { "epoch": 0.6939775741790117, "grad_norm": 0.77734375, "learning_rate": 0.00012988182742397863, "loss": 0.8888, "step": 27027 }, { "epoch": 0.6940032513749336, "grad_norm": 0.84375, "learning_rate": 0.00012987756716174463, "loss": 0.788, "step": 27028 }, { "epoch": 0.6940289285708554, "grad_norm": 0.78125, "learning_rate": 0.0001298733068399673, "loss": 0.869, "step": 27029 }, { "epoch": 0.6940546057667772, "grad_norm": 0.78125, "learning_rate": 0.00012986904645865521, "loss": 0.741, "step": 27030 }, { "epoch": 0.6940802829626991, "grad_norm": 0.81640625, "learning_rate": 0.00012986478601781677, "loss": 0.7718, "step": 27031 }, { "epoch": 0.6941059601586209, "grad_norm": 0.7265625, "learning_rate": 0.00012986052551746058, "loss": 0.8663, "step": 27032 }, { "epoch": 0.6941316373545428, "grad_norm": 0.6953125, "learning_rate": 0.00012985626495759498, "loss": 0.8549, "step": 27033 }, { "epoch": 0.6941573145504645, "grad_norm": 0.8203125, "learning_rate": 0.00012985200433822858, "loss": 0.8789, "step": 27034 }, { "epoch": 0.6941829917463863, "grad_norm": 0.875, "learning_rate": 0.00012984774365936983, "loss": 0.8344, "step": 27035 }, { "epoch": 0.6942086689423081, "grad_norm": 0.8359375, "learning_rate": 0.0001298434829210272, "loss": 0.8636, "step": 27036 }, { "epoch": 0.69423434613823, "grad_norm": 0.81640625, "learning_rate": 0.00012983922212320923, "loss": 0.9247, "step": 27037 }, { "epoch": 0.6942600233341518, "grad_norm": 0.82421875, "learning_rate": 0.0001298349612659244, "loss": 0.9421, "step": 27038 }, { "epoch": 0.6942857005300737, "grad_norm": 0.78125, "learning_rate": 0.00012983070034918115, "loss": 0.7907, "step": 27039 }, { "epoch": 0.6943113777259954, "grad_norm": 0.78515625, "learning_rate": 0.000129826439372988, "loss": 0.9031, "step": 27040 }, { "epoch": 0.6943370549219172, "grad_norm": 0.90625, "learning_rate": 0.0001298221783373535, "loss": 0.9106, "step": 27041 }, { "epoch": 0.694362732117839, "grad_norm": 0.734375, "learning_rate": 0.00012981791724228604, "loss": 0.8701, "step": 27042 }, { "epoch": 0.6943884093137609, "grad_norm": 0.7734375, "learning_rate": 0.0001298136560877942, "loss": 0.7764, "step": 27043 }, { "epoch": 0.6944140865096827, "grad_norm": 0.7734375, "learning_rate": 0.00012980939487388643, "loss": 0.8586, "step": 27044 }, { "epoch": 0.6944397637056046, "grad_norm": 0.7890625, "learning_rate": 0.00012980513360057123, "loss": 0.9678, "step": 27045 }, { "epoch": 0.6944654409015264, "grad_norm": 0.8671875, "learning_rate": 0.00012980087226785708, "loss": 0.8284, "step": 27046 }, { "epoch": 0.6944911180974481, "grad_norm": 0.80859375, "learning_rate": 0.0001297966108757525, "loss": 0.8503, "step": 27047 }, { "epoch": 0.69451679529337, "grad_norm": 0.8046875, "learning_rate": 0.00012979234942426594, "loss": 0.7764, "step": 27048 }, { "epoch": 0.6945424724892918, "grad_norm": 0.859375, "learning_rate": 0.00012978808791340592, "loss": 0.9258, "step": 27049 }, { "epoch": 0.6945681496852136, "grad_norm": 0.75390625, "learning_rate": 0.00012978382634318096, "loss": 0.7947, "step": 27050 }, { "epoch": 0.6945938268811355, "grad_norm": 0.76953125, "learning_rate": 0.00012977956471359954, "loss": 0.8257, "step": 27051 }, { "epoch": 0.6946195040770573, "grad_norm": 0.765625, "learning_rate": 0.0001297753030246701, "loss": 0.8572, "step": 27052 }, { "epoch": 0.6946451812729791, "grad_norm": 0.73828125, "learning_rate": 0.00012977104127640118, "loss": 0.7765, "step": 27053 }, { "epoch": 0.6946708584689009, "grad_norm": 0.75, "learning_rate": 0.00012976677946880124, "loss": 0.7698, "step": 27054 }, { "epoch": 0.6946965356648227, "grad_norm": 0.77734375, "learning_rate": 0.00012976251760187882, "loss": 1.0086, "step": 27055 }, { "epoch": 0.6947222128607445, "grad_norm": 0.734375, "learning_rate": 0.0001297582556756424, "loss": 0.7012, "step": 27056 }, { "epoch": 0.6947478900566664, "grad_norm": 0.73828125, "learning_rate": 0.00012975399369010045, "loss": 0.7293, "step": 27057 }, { "epoch": 0.6947735672525882, "grad_norm": 0.75390625, "learning_rate": 0.0001297497316452615, "loss": 0.7826, "step": 27058 }, { "epoch": 0.69479924444851, "grad_norm": 0.7265625, "learning_rate": 0.000129745469541134, "loss": 0.8508, "step": 27059 }, { "epoch": 0.6948249216444318, "grad_norm": 0.80078125, "learning_rate": 0.00012974120737772649, "loss": 0.7806, "step": 27060 }, { "epoch": 0.6948505988403536, "grad_norm": 0.78125, "learning_rate": 0.0001297369451550474, "loss": 0.8171, "step": 27061 }, { "epoch": 0.6948762760362754, "grad_norm": 0.8046875, "learning_rate": 0.00012973268287310528, "loss": 0.7946, "step": 27062 }, { "epoch": 0.6949019532321973, "grad_norm": 0.72265625, "learning_rate": 0.0001297284205319086, "loss": 0.7521, "step": 27063 }, { "epoch": 0.6949276304281191, "grad_norm": 0.77734375, "learning_rate": 0.0001297241581314659, "loss": 0.8402, "step": 27064 }, { "epoch": 0.694953307624041, "grad_norm": 0.74609375, "learning_rate": 0.0001297198956717856, "loss": 0.8223, "step": 27065 }, { "epoch": 0.6949789848199628, "grad_norm": 0.78515625, "learning_rate": 0.0001297156331528763, "loss": 0.7447, "step": 27066 }, { "epoch": 0.6950046620158845, "grad_norm": 0.83203125, "learning_rate": 0.00012971137057474636, "loss": 0.8058, "step": 27067 }, { "epoch": 0.6950303392118063, "grad_norm": 0.73046875, "learning_rate": 0.00012970710793740436, "loss": 0.79, "step": 27068 }, { "epoch": 0.6950560164077282, "grad_norm": 0.88671875, "learning_rate": 0.00012970284524085876, "loss": 0.9817, "step": 27069 }, { "epoch": 0.69508169360365, "grad_norm": 0.8125, "learning_rate": 0.0001296985824851181, "loss": 0.9347, "step": 27070 }, { "epoch": 0.6951073707995719, "grad_norm": 0.80078125, "learning_rate": 0.00012969431967019084, "loss": 0.8792, "step": 27071 }, { "epoch": 0.6951330479954937, "grad_norm": 0.8359375, "learning_rate": 0.00012969005679608548, "loss": 0.9413, "step": 27072 }, { "epoch": 0.6951587251914154, "grad_norm": 0.80078125, "learning_rate": 0.0001296857938628105, "loss": 0.7548, "step": 27073 }, { "epoch": 0.6951844023873373, "grad_norm": 0.7421875, "learning_rate": 0.00012968153087037444, "loss": 0.8963, "step": 27074 }, { "epoch": 0.6952100795832591, "grad_norm": 0.80859375, "learning_rate": 0.00012967726781878578, "loss": 0.8185, "step": 27075 }, { "epoch": 0.6952357567791809, "grad_norm": 0.72265625, "learning_rate": 0.00012967300470805297, "loss": 0.7172, "step": 27076 }, { "epoch": 0.6952614339751028, "grad_norm": 0.73046875, "learning_rate": 0.00012966874153818455, "loss": 0.8589, "step": 27077 }, { "epoch": 0.6952871111710246, "grad_norm": 0.8046875, "learning_rate": 0.000129664478309189, "loss": 0.9193, "step": 27078 }, { "epoch": 0.6953127883669464, "grad_norm": 0.7734375, "learning_rate": 0.0001296602150210749, "loss": 0.8078, "step": 27079 }, { "epoch": 0.6953384655628682, "grad_norm": 0.76171875, "learning_rate": 0.0001296559516738506, "loss": 0.8478, "step": 27080 }, { "epoch": 0.69536414275879, "grad_norm": 0.8125, "learning_rate": 0.00012965168826752467, "loss": 0.8114, "step": 27081 }, { "epoch": 0.6953898199547118, "grad_norm": 0.8203125, "learning_rate": 0.00012964742480210562, "loss": 0.7694, "step": 27082 }, { "epoch": 0.6954154971506337, "grad_norm": 0.8125, "learning_rate": 0.00012964316127760193, "loss": 0.818, "step": 27083 }, { "epoch": 0.6954411743465555, "grad_norm": 0.74609375, "learning_rate": 0.0001296388976940221, "loss": 0.782, "step": 27084 }, { "epoch": 0.6954668515424773, "grad_norm": 0.80859375, "learning_rate": 0.0001296346340513746, "loss": 0.9571, "step": 27085 }, { "epoch": 0.6954925287383992, "grad_norm": 0.71875, "learning_rate": 0.00012963037034966797, "loss": 0.8164, "step": 27086 }, { "epoch": 0.6955182059343209, "grad_norm": 0.75390625, "learning_rate": 0.00012962610658891067, "loss": 0.8194, "step": 27087 }, { "epoch": 0.6955438831302427, "grad_norm": 0.85546875, "learning_rate": 0.00012962184276911124, "loss": 0.8997, "step": 27088 }, { "epoch": 0.6955695603261646, "grad_norm": 0.71875, "learning_rate": 0.00012961757889027814, "loss": 0.7293, "step": 27089 }, { "epoch": 0.6955952375220864, "grad_norm": 0.75390625, "learning_rate": 0.00012961331495241988, "loss": 0.8559, "step": 27090 }, { "epoch": 0.6956209147180082, "grad_norm": 0.8671875, "learning_rate": 0.000129609050955545, "loss": 0.8741, "step": 27091 }, { "epoch": 0.6956465919139301, "grad_norm": 0.78515625, "learning_rate": 0.0001296047868996619, "loss": 0.9075, "step": 27092 }, { "epoch": 0.6956722691098518, "grad_norm": 0.7421875, "learning_rate": 0.00012960052278477915, "loss": 0.7209, "step": 27093 }, { "epoch": 0.6956979463057736, "grad_norm": 0.734375, "learning_rate": 0.00012959625861090523, "loss": 0.7788, "step": 27094 }, { "epoch": 0.6957236235016955, "grad_norm": 0.79296875, "learning_rate": 0.00012959199437804865, "loss": 0.8057, "step": 27095 }, { "epoch": 0.6957493006976173, "grad_norm": 0.7578125, "learning_rate": 0.0001295877300862179, "loss": 0.7896, "step": 27096 }, { "epoch": 0.6957749778935391, "grad_norm": 0.83203125, "learning_rate": 0.00012958346573542148, "loss": 0.8345, "step": 27097 }, { "epoch": 0.695800655089461, "grad_norm": 0.75390625, "learning_rate": 0.0001295792013256679, "loss": 0.8428, "step": 27098 }, { "epoch": 0.6958263322853828, "grad_norm": 0.72265625, "learning_rate": 0.0001295749368569656, "loss": 0.7341, "step": 27099 }, { "epoch": 0.6958520094813045, "grad_norm": 0.765625, "learning_rate": 0.00012957067232932312, "loss": 0.8406, "step": 27100 }, { "epoch": 0.6958776866772264, "grad_norm": 0.82421875, "learning_rate": 0.00012956640774274897, "loss": 0.873, "step": 27101 }, { "epoch": 0.6959033638731482, "grad_norm": 0.7109375, "learning_rate": 0.00012956214309725166, "loss": 0.7333, "step": 27102 }, { "epoch": 0.69592904106907, "grad_norm": 0.828125, "learning_rate": 0.00012955787839283967, "loss": 0.7694, "step": 27103 }, { "epoch": 0.6959547182649919, "grad_norm": 0.76953125, "learning_rate": 0.0001295536136295215, "loss": 0.926, "step": 27104 }, { "epoch": 0.6959803954609137, "grad_norm": 1.0390625, "learning_rate": 0.00012954934880730562, "loss": 0.803, "step": 27105 }, { "epoch": 0.6960060726568356, "grad_norm": 0.703125, "learning_rate": 0.0001295450839262006, "loss": 0.769, "step": 27106 }, { "epoch": 0.6960317498527573, "grad_norm": 0.734375, "learning_rate": 0.00012954081898621487, "loss": 0.8019, "step": 27107 }, { "epoch": 0.6960574270486791, "grad_norm": 0.703125, "learning_rate": 0.00012953655398735694, "loss": 0.6727, "step": 27108 }, { "epoch": 0.696083104244601, "grad_norm": 0.80078125, "learning_rate": 0.00012953228892963534, "loss": 0.891, "step": 27109 }, { "epoch": 0.6961087814405228, "grad_norm": 0.98828125, "learning_rate": 0.00012952802381305855, "loss": 1.0149, "step": 27110 }, { "epoch": 0.6961344586364446, "grad_norm": 0.84765625, "learning_rate": 0.0001295237586376351, "loss": 0.9459, "step": 27111 }, { "epoch": 0.6961601358323665, "grad_norm": 0.76953125, "learning_rate": 0.00012951949340337345, "loss": 0.9327, "step": 27112 }, { "epoch": 0.6961858130282882, "grad_norm": 0.7734375, "learning_rate": 0.00012951522811028212, "loss": 0.7506, "step": 27113 }, { "epoch": 0.69621149022421, "grad_norm": 0.83984375, "learning_rate": 0.0001295109627583696, "loss": 0.8449, "step": 27114 }, { "epoch": 0.6962371674201319, "grad_norm": 0.7734375, "learning_rate": 0.0001295066973476444, "loss": 0.7301, "step": 27115 }, { "epoch": 0.6962628446160537, "grad_norm": 0.84765625, "learning_rate": 0.000129502431878115, "loss": 0.8828, "step": 27116 }, { "epoch": 0.6962885218119755, "grad_norm": 0.80078125, "learning_rate": 0.00012949816634978998, "loss": 0.8546, "step": 27117 }, { "epoch": 0.6963141990078974, "grad_norm": 0.87890625, "learning_rate": 0.00012949390076267772, "loss": 0.9524, "step": 27118 }, { "epoch": 0.6963398762038192, "grad_norm": 0.9296875, "learning_rate": 0.00012948963511678677, "loss": 0.8795, "step": 27119 }, { "epoch": 0.6963655533997409, "grad_norm": 0.80078125, "learning_rate": 0.00012948536941212568, "loss": 0.7989, "step": 27120 }, { "epoch": 0.6963912305956628, "grad_norm": 0.7421875, "learning_rate": 0.00012948110364870288, "loss": 0.8227, "step": 27121 }, { "epoch": 0.6964169077915846, "grad_norm": 0.80859375, "learning_rate": 0.00012947683782652694, "loss": 0.9172, "step": 27122 }, { "epoch": 0.6964425849875064, "grad_norm": 0.85546875, "learning_rate": 0.0001294725719456063, "loss": 0.8579, "step": 27123 }, { "epoch": 0.6964682621834283, "grad_norm": 0.8515625, "learning_rate": 0.0001294683060059495, "loss": 0.8172, "step": 27124 }, { "epoch": 0.6964939393793501, "grad_norm": 0.79296875, "learning_rate": 0.00012946404000756503, "loss": 0.8463, "step": 27125 }, { "epoch": 0.696519616575272, "grad_norm": 0.90625, "learning_rate": 0.00012945977395046137, "loss": 0.804, "step": 27126 }, { "epoch": 0.6965452937711937, "grad_norm": 0.78515625, "learning_rate": 0.00012945550783464706, "loss": 0.6487, "step": 27127 }, { "epoch": 0.6965709709671155, "grad_norm": 0.76171875, "learning_rate": 0.00012945124166013056, "loss": 0.775, "step": 27128 }, { "epoch": 0.6965966481630373, "grad_norm": 0.796875, "learning_rate": 0.00012944697542692038, "loss": 0.8164, "step": 27129 }, { "epoch": 0.6966223253589592, "grad_norm": 0.76171875, "learning_rate": 0.00012944270913502513, "loss": 0.8864, "step": 27130 }, { "epoch": 0.696648002554881, "grad_norm": 0.7109375, "learning_rate": 0.00012943844278445315, "loss": 0.7699, "step": 27131 }, { "epoch": 0.6966736797508029, "grad_norm": 0.7734375, "learning_rate": 0.000129434176375213, "loss": 0.8068, "step": 27132 }, { "epoch": 0.6966993569467246, "grad_norm": 0.73828125, "learning_rate": 0.00012942990990731323, "loss": 0.923, "step": 27133 }, { "epoch": 0.6967250341426464, "grad_norm": 0.76953125, "learning_rate": 0.00012942564338076228, "loss": 0.8791, "step": 27134 }, { "epoch": 0.6967507113385683, "grad_norm": 0.71875, "learning_rate": 0.00012942137679556868, "loss": 0.8497, "step": 27135 }, { "epoch": 0.6967763885344901, "grad_norm": 0.8125, "learning_rate": 0.00012941711015174097, "loss": 0.802, "step": 27136 }, { "epoch": 0.6968020657304119, "grad_norm": 0.84375, "learning_rate": 0.0001294128434492876, "loss": 0.839, "step": 27137 }, { "epoch": 0.6968277429263338, "grad_norm": 0.8359375, "learning_rate": 0.0001294085766882171, "loss": 0.8539, "step": 27138 }, { "epoch": 0.6968534201222556, "grad_norm": 0.828125, "learning_rate": 0.00012940430986853793, "loss": 0.8493, "step": 27139 }, { "epoch": 0.6968790973181773, "grad_norm": 0.7890625, "learning_rate": 0.00012940004299025865, "loss": 0.7705, "step": 27140 }, { "epoch": 0.6969047745140992, "grad_norm": 0.78515625, "learning_rate": 0.00012939577605338776, "loss": 0.8685, "step": 27141 }, { "epoch": 0.696930451710021, "grad_norm": 0.84375, "learning_rate": 0.0001293915090579337, "loss": 0.8756, "step": 27142 }, { "epoch": 0.6969561289059428, "grad_norm": 0.76171875, "learning_rate": 0.00012938724200390507, "loss": 0.8203, "step": 27143 }, { "epoch": 0.6969818061018647, "grad_norm": 0.82421875, "learning_rate": 0.00012938297489131027, "loss": 0.8961, "step": 27144 }, { "epoch": 0.6970074832977865, "grad_norm": 0.69140625, "learning_rate": 0.00012937870772015785, "loss": 0.7882, "step": 27145 }, { "epoch": 0.6970331604937083, "grad_norm": 0.77734375, "learning_rate": 0.00012937444049045634, "loss": 0.8874, "step": 27146 }, { "epoch": 0.6970588376896301, "grad_norm": 0.7421875, "learning_rate": 0.00012937017320221425, "loss": 0.866, "step": 27147 }, { "epoch": 0.6970845148855519, "grad_norm": 0.76171875, "learning_rate": 0.00012936590585544002, "loss": 0.8365, "step": 27148 }, { "epoch": 0.6971101920814737, "grad_norm": 0.81640625, "learning_rate": 0.00012936163845014218, "loss": 1.0474, "step": 27149 }, { "epoch": 0.6971358692773956, "grad_norm": 0.80859375, "learning_rate": 0.0001293573709863293, "loss": 0.8689, "step": 27150 }, { "epoch": 0.6971615464733174, "grad_norm": 0.80078125, "learning_rate": 0.0001293531034640098, "loss": 0.7873, "step": 27151 }, { "epoch": 0.6971872236692392, "grad_norm": 0.7734375, "learning_rate": 0.00012934883588319225, "loss": 0.9756, "step": 27152 }, { "epoch": 0.697212900865161, "grad_norm": 0.73828125, "learning_rate": 0.00012934456824388507, "loss": 0.7695, "step": 27153 }, { "epoch": 0.6972385780610828, "grad_norm": 0.78515625, "learning_rate": 0.00012934030054609683, "loss": 0.7668, "step": 27154 }, { "epoch": 0.6972642552570046, "grad_norm": 0.74609375, "learning_rate": 0.00012933603278983603, "loss": 0.8134, "step": 27155 }, { "epoch": 0.6972899324529265, "grad_norm": 0.7578125, "learning_rate": 0.00012933176497511117, "loss": 0.7391, "step": 27156 }, { "epoch": 0.6973156096488483, "grad_norm": 0.78125, "learning_rate": 0.00012932749710193076, "loss": 0.79, "step": 27157 }, { "epoch": 0.6973412868447701, "grad_norm": 0.82421875, "learning_rate": 0.0001293232291703033, "loss": 0.9059, "step": 27158 }, { "epoch": 0.697366964040692, "grad_norm": 0.72265625, "learning_rate": 0.00012931896118023725, "loss": 0.7583, "step": 27159 }, { "epoch": 0.6973926412366137, "grad_norm": 0.74609375, "learning_rate": 0.00012931469313174121, "loss": 0.8677, "step": 27160 }, { "epoch": 0.6974183184325355, "grad_norm": 0.82421875, "learning_rate": 0.00012931042502482357, "loss": 0.9937, "step": 27161 }, { "epoch": 0.6974439956284574, "grad_norm": 0.796875, "learning_rate": 0.00012930615685949296, "loss": 0.8672, "step": 27162 }, { "epoch": 0.6974696728243792, "grad_norm": 0.76953125, "learning_rate": 0.0001293018886357578, "loss": 0.868, "step": 27163 }, { "epoch": 0.697495350020301, "grad_norm": 0.78125, "learning_rate": 0.00012929762035362666, "loss": 0.7687, "step": 27164 }, { "epoch": 0.6975210272162229, "grad_norm": 0.765625, "learning_rate": 0.00012929335201310798, "loss": 0.8666, "step": 27165 }, { "epoch": 0.6975467044121447, "grad_norm": 0.77734375, "learning_rate": 0.00012928908361421027, "loss": 0.8152, "step": 27166 }, { "epoch": 0.6975723816080664, "grad_norm": 0.765625, "learning_rate": 0.0001292848151569421, "loss": 0.7166, "step": 27167 }, { "epoch": 0.6975980588039883, "grad_norm": 0.76171875, "learning_rate": 0.00012928054664131192, "loss": 0.8754, "step": 27168 }, { "epoch": 0.6976237359999101, "grad_norm": 0.84375, "learning_rate": 0.00012927627806732828, "loss": 0.8971, "step": 27169 }, { "epoch": 0.697649413195832, "grad_norm": 0.75390625, "learning_rate": 0.00012927200943499965, "loss": 0.8266, "step": 27170 }, { "epoch": 0.6976750903917538, "grad_norm": 0.75390625, "learning_rate": 0.00012926774074433452, "loss": 0.8928, "step": 27171 }, { "epoch": 0.6977007675876756, "grad_norm": 0.71875, "learning_rate": 0.00012926347199534145, "loss": 0.7127, "step": 27172 }, { "epoch": 0.6977264447835974, "grad_norm": 0.76171875, "learning_rate": 0.00012925920318802896, "loss": 0.802, "step": 27173 }, { "epoch": 0.6977521219795192, "grad_norm": 0.76953125, "learning_rate": 0.00012925493432240545, "loss": 0.7147, "step": 27174 }, { "epoch": 0.697777799175441, "grad_norm": 0.74609375, "learning_rate": 0.00012925066539847953, "loss": 0.9097, "step": 27175 }, { "epoch": 0.6978034763713629, "grad_norm": 0.89453125, "learning_rate": 0.0001292463964162597, "loss": 0.9255, "step": 27176 }, { "epoch": 0.6978291535672847, "grad_norm": 0.6640625, "learning_rate": 0.00012924212737575443, "loss": 0.7324, "step": 27177 }, { "epoch": 0.6978548307632065, "grad_norm": 0.84375, "learning_rate": 0.00012923785827697224, "loss": 0.9719, "step": 27178 }, { "epoch": 0.6978805079591284, "grad_norm": 0.9140625, "learning_rate": 0.0001292335891199216, "loss": 0.8078, "step": 27179 }, { "epoch": 0.6979061851550501, "grad_norm": 0.76953125, "learning_rate": 0.00012922931990461112, "loss": 0.7359, "step": 27180 }, { "epoch": 0.6979318623509719, "grad_norm": 0.81640625, "learning_rate": 0.00012922505063104922, "loss": 0.9068, "step": 27181 }, { "epoch": 0.6979575395468938, "grad_norm": 0.86328125, "learning_rate": 0.0001292207812992444, "loss": 1.0258, "step": 27182 }, { "epoch": 0.6979832167428156, "grad_norm": 0.765625, "learning_rate": 0.00012921651190920524, "loss": 0.8386, "step": 27183 }, { "epoch": 0.6980088939387374, "grad_norm": 0.828125, "learning_rate": 0.00012921224246094024, "loss": 0.7955, "step": 27184 }, { "epoch": 0.6980345711346593, "grad_norm": 0.8125, "learning_rate": 0.00012920797295445783, "loss": 0.8423, "step": 27185 }, { "epoch": 0.6980602483305811, "grad_norm": 0.78515625, "learning_rate": 0.00012920370338976656, "loss": 0.8348, "step": 27186 }, { "epoch": 0.6980859255265028, "grad_norm": 0.71484375, "learning_rate": 0.000129199433766875, "loss": 0.7872, "step": 27187 }, { "epoch": 0.6981116027224247, "grad_norm": 0.7578125, "learning_rate": 0.00012919516408579156, "loss": 0.7446, "step": 27188 }, { "epoch": 0.6981372799183465, "grad_norm": 0.78515625, "learning_rate": 0.00012919089434652482, "loss": 0.8663, "step": 27189 }, { "epoch": 0.6981629571142683, "grad_norm": 0.76953125, "learning_rate": 0.00012918662454908323, "loss": 0.7469, "step": 27190 }, { "epoch": 0.6981886343101902, "grad_norm": 0.80078125, "learning_rate": 0.0001291823546934754, "loss": 0.8609, "step": 27191 }, { "epoch": 0.698214311506112, "grad_norm": 0.83203125, "learning_rate": 0.00012917808477970973, "loss": 0.8737, "step": 27192 }, { "epoch": 0.6982399887020337, "grad_norm": 0.78515625, "learning_rate": 0.00012917381480779473, "loss": 1.0503, "step": 27193 }, { "epoch": 0.6982656658979556, "grad_norm": 0.734375, "learning_rate": 0.000129169544777739, "loss": 0.7871, "step": 27194 }, { "epoch": 0.6982913430938774, "grad_norm": 0.76953125, "learning_rate": 0.000129165274689551, "loss": 0.7906, "step": 27195 }, { "epoch": 0.6983170202897993, "grad_norm": 0.78125, "learning_rate": 0.00012916100454323927, "loss": 0.7662, "step": 27196 }, { "epoch": 0.6983426974857211, "grad_norm": 0.7734375, "learning_rate": 0.00012915673433881226, "loss": 0.94, "step": 27197 }, { "epoch": 0.6983683746816429, "grad_norm": 0.69921875, "learning_rate": 0.0001291524640762785, "loss": 0.823, "step": 27198 }, { "epoch": 0.6983940518775648, "grad_norm": 0.7578125, "learning_rate": 0.00012914819375564655, "loss": 0.7989, "step": 27199 }, { "epoch": 0.6984197290734865, "grad_norm": 0.734375, "learning_rate": 0.00012914392337692486, "loss": 0.8225, "step": 27200 }, { "epoch": 0.6984454062694083, "grad_norm": 0.76171875, "learning_rate": 0.00012913965294012193, "loss": 0.7991, "step": 27201 }, { "epoch": 0.6984710834653302, "grad_norm": 0.734375, "learning_rate": 0.00012913538244524637, "loss": 0.7978, "step": 27202 }, { "epoch": 0.698496760661252, "grad_norm": 0.80859375, "learning_rate": 0.00012913111189230658, "loss": 0.8922, "step": 27203 }, { "epoch": 0.6985224378571738, "grad_norm": 0.73046875, "learning_rate": 0.00012912684128131113, "loss": 0.7513, "step": 27204 }, { "epoch": 0.6985481150530957, "grad_norm": 0.6875, "learning_rate": 0.00012912257061226852, "loss": 0.7092, "step": 27205 }, { "epoch": 0.6985737922490175, "grad_norm": 0.85546875, "learning_rate": 0.00012911829988518725, "loss": 0.8263, "step": 27206 }, { "epoch": 0.6985994694449392, "grad_norm": 0.83203125, "learning_rate": 0.00012911402910007583, "loss": 0.794, "step": 27207 }, { "epoch": 0.6986251466408611, "grad_norm": 0.8984375, "learning_rate": 0.0001291097582569428, "loss": 0.9705, "step": 27208 }, { "epoch": 0.6986508238367829, "grad_norm": 0.73046875, "learning_rate": 0.00012910548735579663, "loss": 0.8997, "step": 27209 }, { "epoch": 0.6986765010327047, "grad_norm": 0.80078125, "learning_rate": 0.0001291012163966459, "loss": 0.8592, "step": 27210 }, { "epoch": 0.6987021782286266, "grad_norm": 0.81640625, "learning_rate": 0.000129096945379499, "loss": 0.8957, "step": 27211 }, { "epoch": 0.6987278554245484, "grad_norm": 0.7734375, "learning_rate": 0.00012909267430436455, "loss": 0.9969, "step": 27212 }, { "epoch": 0.6987535326204701, "grad_norm": 0.71875, "learning_rate": 0.00012908840317125107, "loss": 0.798, "step": 27213 }, { "epoch": 0.698779209816392, "grad_norm": 0.71875, "learning_rate": 0.00012908413198016696, "loss": 0.7422, "step": 27214 }, { "epoch": 0.6988048870123138, "grad_norm": 0.734375, "learning_rate": 0.00012907986073112083, "loss": 0.8007, "step": 27215 }, { "epoch": 0.6988305642082356, "grad_norm": 0.80078125, "learning_rate": 0.0001290755894241212, "loss": 0.727, "step": 27216 }, { "epoch": 0.6988562414041575, "grad_norm": 0.75390625, "learning_rate": 0.0001290713180591765, "loss": 0.7798, "step": 27217 }, { "epoch": 0.6988819186000793, "grad_norm": 0.8125, "learning_rate": 0.00012906704663629532, "loss": 0.7836, "step": 27218 }, { "epoch": 0.6989075957960011, "grad_norm": 0.859375, "learning_rate": 0.0001290627751554861, "loss": 0.9023, "step": 27219 }, { "epoch": 0.6989332729919229, "grad_norm": 0.6875, "learning_rate": 0.00012905850361675744, "loss": 0.8027, "step": 27220 }, { "epoch": 0.6989589501878447, "grad_norm": 0.72265625, "learning_rate": 0.0001290542320201178, "loss": 0.825, "step": 27221 }, { "epoch": 0.6989846273837665, "grad_norm": 0.796875, "learning_rate": 0.0001290499603655757, "loss": 0.7519, "step": 27222 }, { "epoch": 0.6990103045796884, "grad_norm": 0.73828125, "learning_rate": 0.0001290456886531396, "loss": 0.7995, "step": 27223 }, { "epoch": 0.6990359817756102, "grad_norm": 0.83203125, "learning_rate": 0.00012904141688281816, "loss": 0.8109, "step": 27224 }, { "epoch": 0.699061658971532, "grad_norm": 0.76953125, "learning_rate": 0.00012903714505461971, "loss": 0.8666, "step": 27225 }, { "epoch": 0.6990873361674539, "grad_norm": 0.7890625, "learning_rate": 0.00012903287316855288, "loss": 0.9417, "step": 27226 }, { "epoch": 0.6991130133633756, "grad_norm": 0.8046875, "learning_rate": 0.00012902860122462615, "loss": 0.7482, "step": 27227 }, { "epoch": 0.6991386905592974, "grad_norm": 0.8359375, "learning_rate": 0.00012902432922284808, "loss": 0.781, "step": 27228 }, { "epoch": 0.6991643677552193, "grad_norm": 0.8046875, "learning_rate": 0.00012902005716322715, "loss": 0.8272, "step": 27229 }, { "epoch": 0.6991900449511411, "grad_norm": 0.76171875, "learning_rate": 0.0001290157850457718, "loss": 0.9114, "step": 27230 }, { "epoch": 0.699215722147063, "grad_norm": 0.8359375, "learning_rate": 0.00012901151287049068, "loss": 0.7871, "step": 27231 }, { "epoch": 0.6992413993429848, "grad_norm": 0.72265625, "learning_rate": 0.0001290072406373922, "loss": 0.8095, "step": 27232 }, { "epoch": 0.6992670765389065, "grad_norm": 0.8046875, "learning_rate": 0.0001290029683464849, "loss": 0.7888, "step": 27233 }, { "epoch": 0.6992927537348284, "grad_norm": 0.78515625, "learning_rate": 0.00012899869599777733, "loss": 0.8676, "step": 27234 }, { "epoch": 0.6993184309307502, "grad_norm": 0.73046875, "learning_rate": 0.00012899442359127794, "loss": 0.7082, "step": 27235 }, { "epoch": 0.699344108126672, "grad_norm": 0.8828125, "learning_rate": 0.00012899015112699533, "loss": 0.7966, "step": 27236 }, { "epoch": 0.6993697853225939, "grad_norm": 0.796875, "learning_rate": 0.00012898587860493796, "loss": 0.8929, "step": 27237 }, { "epoch": 0.6993954625185157, "grad_norm": 0.73046875, "learning_rate": 0.0001289816060251143, "loss": 0.9214, "step": 27238 }, { "epoch": 0.6994211397144375, "grad_norm": 0.84765625, "learning_rate": 0.00012897733338753298, "loss": 0.804, "step": 27239 }, { "epoch": 0.6994468169103593, "grad_norm": 0.81640625, "learning_rate": 0.00012897306069220243, "loss": 0.8823, "step": 27240 }, { "epoch": 0.6994724941062811, "grad_norm": 0.8125, "learning_rate": 0.00012896878793913115, "loss": 0.9599, "step": 27241 }, { "epoch": 0.6994981713022029, "grad_norm": 0.90234375, "learning_rate": 0.00012896451512832776, "loss": 0.9454, "step": 27242 }, { "epoch": 0.6995238484981248, "grad_norm": 0.75390625, "learning_rate": 0.00012896024225980067, "loss": 0.8452, "step": 27243 }, { "epoch": 0.6995495256940466, "grad_norm": 0.77734375, "learning_rate": 0.00012895596933355842, "loss": 0.8056, "step": 27244 }, { "epoch": 0.6995752028899684, "grad_norm": 0.75, "learning_rate": 0.00012895169634960957, "loss": 0.7975, "step": 27245 }, { "epoch": 0.6996008800858903, "grad_norm": 0.71484375, "learning_rate": 0.00012894742330796257, "loss": 0.7161, "step": 27246 }, { "epoch": 0.699626557281812, "grad_norm": 0.765625, "learning_rate": 0.00012894315020862602, "loss": 0.8238, "step": 27247 }, { "epoch": 0.6996522344777338, "grad_norm": 0.9921875, "learning_rate": 0.00012893887705160833, "loss": 0.8939, "step": 27248 }, { "epoch": 0.6996779116736557, "grad_norm": 0.74609375, "learning_rate": 0.00012893460383691814, "loss": 0.7716, "step": 27249 }, { "epoch": 0.6997035888695775, "grad_norm": 0.83984375, "learning_rate": 0.00012893033056456384, "loss": 0.8194, "step": 27250 }, { "epoch": 0.6997292660654993, "grad_norm": 0.765625, "learning_rate": 0.000128926057234554, "loss": 0.855, "step": 27251 }, { "epoch": 0.6997549432614212, "grad_norm": 0.859375, "learning_rate": 0.00012892178384689716, "loss": 0.7114, "step": 27252 }, { "epoch": 0.6997806204573429, "grad_norm": 0.734375, "learning_rate": 0.00012891751040160183, "loss": 0.7814, "step": 27253 }, { "epoch": 0.6998062976532647, "grad_norm": 0.7421875, "learning_rate": 0.0001289132368986765, "loss": 0.8026, "step": 27254 }, { "epoch": 0.6998319748491866, "grad_norm": 0.75, "learning_rate": 0.00012890896333812973, "loss": 0.811, "step": 27255 }, { "epoch": 0.6998576520451084, "grad_norm": 0.7734375, "learning_rate": 0.00012890468971996996, "loss": 0.8879, "step": 27256 }, { "epoch": 0.6998833292410302, "grad_norm": 0.8046875, "learning_rate": 0.00012890041604420578, "loss": 0.782, "step": 27257 }, { "epoch": 0.6999090064369521, "grad_norm": 0.72265625, "learning_rate": 0.00012889614231084568, "loss": 0.8644, "step": 27258 }, { "epoch": 0.6999346836328739, "grad_norm": 0.74609375, "learning_rate": 0.00012889186851989815, "loss": 0.7974, "step": 27259 }, { "epoch": 0.6999603608287956, "grad_norm": 0.76953125, "learning_rate": 0.00012888759467137177, "loss": 0.7931, "step": 27260 }, { "epoch": 0.6999860380247175, "grad_norm": 0.7265625, "learning_rate": 0.00012888332076527504, "loss": 0.7452, "step": 27261 }, { "epoch": 0.7000117152206393, "grad_norm": 0.73046875, "learning_rate": 0.0001288790468016164, "loss": 0.6989, "step": 27262 }, { "epoch": 0.7000373924165612, "grad_norm": 0.78515625, "learning_rate": 0.0001288747727804045, "loss": 0.8554, "step": 27263 }, { "epoch": 0.700063069612483, "grad_norm": 0.72265625, "learning_rate": 0.00012887049870164774, "loss": 0.9377, "step": 27264 }, { "epoch": 0.7000887468084048, "grad_norm": 0.828125, "learning_rate": 0.0001288662245653547, "loss": 0.9093, "step": 27265 }, { "epoch": 0.7001144240043266, "grad_norm": 0.7421875, "learning_rate": 0.00012886195037153388, "loss": 0.8294, "step": 27266 }, { "epoch": 0.7001401012002484, "grad_norm": 0.8359375, "learning_rate": 0.00012885767612019378, "loss": 0.9004, "step": 27267 }, { "epoch": 0.7001657783961702, "grad_norm": 0.6953125, "learning_rate": 0.00012885340181134297, "loss": 0.8684, "step": 27268 }, { "epoch": 0.7001914555920921, "grad_norm": 0.86328125, "learning_rate": 0.00012884912744498994, "loss": 0.8517, "step": 27269 }, { "epoch": 0.7002171327880139, "grad_norm": 0.89453125, "learning_rate": 0.00012884485302114314, "loss": 0.8353, "step": 27270 }, { "epoch": 0.7002428099839357, "grad_norm": 0.90625, "learning_rate": 0.0001288405785398112, "loss": 0.9446, "step": 27271 }, { "epoch": 0.7002684871798576, "grad_norm": 0.875, "learning_rate": 0.0001288363040010026, "loss": 0.851, "step": 27272 }, { "epoch": 0.7002941643757793, "grad_norm": 0.796875, "learning_rate": 0.00012883202940472586, "loss": 0.7917, "step": 27273 }, { "epoch": 0.7003198415717011, "grad_norm": 0.6796875, "learning_rate": 0.00012882775475098947, "loss": 0.7029, "step": 27274 }, { "epoch": 0.700345518767623, "grad_norm": 0.81640625, "learning_rate": 0.00012882348003980196, "loss": 0.8629, "step": 27275 }, { "epoch": 0.7003711959635448, "grad_norm": 0.75390625, "learning_rate": 0.00012881920527117187, "loss": 0.8171, "step": 27276 }, { "epoch": 0.7003968731594666, "grad_norm": 0.80859375, "learning_rate": 0.00012881493044510772, "loss": 1.0428, "step": 27277 }, { "epoch": 0.7004225503553885, "grad_norm": 0.73046875, "learning_rate": 0.000128810655561618, "loss": 0.8013, "step": 27278 }, { "epoch": 0.7004482275513103, "grad_norm": 0.68359375, "learning_rate": 0.00012880638062071125, "loss": 0.7218, "step": 27279 }, { "epoch": 0.700473904747232, "grad_norm": 0.76953125, "learning_rate": 0.000128802105622396, "loss": 0.8777, "step": 27280 }, { "epoch": 0.7004995819431539, "grad_norm": 0.78515625, "learning_rate": 0.00012879783056668072, "loss": 0.8899, "step": 27281 }, { "epoch": 0.7005252591390757, "grad_norm": 0.76171875, "learning_rate": 0.000128793555453574, "loss": 0.758, "step": 27282 }, { "epoch": 0.7005509363349975, "grad_norm": 0.80078125, "learning_rate": 0.0001287892802830843, "loss": 0.8472, "step": 27283 }, { "epoch": 0.7005766135309194, "grad_norm": 0.70703125, "learning_rate": 0.00012878500505522018, "loss": 0.822, "step": 27284 }, { "epoch": 0.7006022907268412, "grad_norm": 0.77734375, "learning_rate": 0.00012878072976999012, "loss": 0.7062, "step": 27285 }, { "epoch": 0.7006279679227629, "grad_norm": 0.80078125, "learning_rate": 0.00012877645442740268, "loss": 0.9807, "step": 27286 }, { "epoch": 0.7006536451186848, "grad_norm": 0.75, "learning_rate": 0.00012877217902746636, "loss": 0.8333, "step": 27287 }, { "epoch": 0.7006793223146066, "grad_norm": 0.85546875, "learning_rate": 0.0001287679035701897, "loss": 1.1637, "step": 27288 }, { "epoch": 0.7007049995105284, "grad_norm": 0.69140625, "learning_rate": 0.00012876362805558123, "loss": 0.8045, "step": 27289 }, { "epoch": 0.7007306767064503, "grad_norm": 0.734375, "learning_rate": 0.0001287593524836494, "loss": 0.8546, "step": 27290 }, { "epoch": 0.7007563539023721, "grad_norm": 0.72265625, "learning_rate": 0.0001287550768544028, "loss": 0.8057, "step": 27291 }, { "epoch": 0.700782031098294, "grad_norm": 0.8203125, "learning_rate": 0.00012875080116784996, "loss": 0.865, "step": 27292 }, { "epoch": 0.7008077082942157, "grad_norm": 0.7734375, "learning_rate": 0.00012874652542399932, "loss": 0.9225, "step": 27293 }, { "epoch": 0.7008333854901375, "grad_norm": 0.93359375, "learning_rate": 0.00012874224962285946, "loss": 0.8728, "step": 27294 }, { "epoch": 0.7008590626860594, "grad_norm": 0.7578125, "learning_rate": 0.00012873797376443893, "loss": 0.9588, "step": 27295 }, { "epoch": 0.7008847398819812, "grad_norm": 0.82421875, "learning_rate": 0.00012873369784874617, "loss": 0.9032, "step": 27296 }, { "epoch": 0.700910417077903, "grad_norm": 0.80859375, "learning_rate": 0.00012872942187578976, "loss": 1.0169, "step": 27297 }, { "epoch": 0.7009360942738249, "grad_norm": 0.765625, "learning_rate": 0.0001287251458455782, "loss": 0.7879, "step": 27298 }, { "epoch": 0.7009617714697467, "grad_norm": 0.8203125, "learning_rate": 0.00012872086975812002, "loss": 0.8679, "step": 27299 }, { "epoch": 0.7009874486656684, "grad_norm": 0.71875, "learning_rate": 0.00012871659361342376, "loss": 0.9028, "step": 27300 }, { "epoch": 0.7010131258615903, "grad_norm": 0.78125, "learning_rate": 0.00012871231741149792, "loss": 0.831, "step": 27301 }, { "epoch": 0.7010388030575121, "grad_norm": 0.78125, "learning_rate": 0.000128708041152351, "loss": 0.7064, "step": 27302 }, { "epoch": 0.7010644802534339, "grad_norm": 0.734375, "learning_rate": 0.00012870376483599158, "loss": 0.7862, "step": 27303 }, { "epoch": 0.7010901574493558, "grad_norm": 0.76171875, "learning_rate": 0.0001286994884624281, "loss": 0.8035, "step": 27304 }, { "epoch": 0.7011158346452776, "grad_norm": 0.76171875, "learning_rate": 0.0001286952120316692, "loss": 0.88, "step": 27305 }, { "epoch": 0.7011415118411993, "grad_norm": 0.828125, "learning_rate": 0.0001286909355437233, "loss": 0.7665, "step": 27306 }, { "epoch": 0.7011671890371212, "grad_norm": 0.734375, "learning_rate": 0.00012868665899859894, "loss": 0.7904, "step": 27307 }, { "epoch": 0.701192866233043, "grad_norm": 0.796875, "learning_rate": 0.0001286823823963047, "loss": 0.8987, "step": 27308 }, { "epoch": 0.7012185434289648, "grad_norm": 0.7421875, "learning_rate": 0.00012867810573684904, "loss": 0.9696, "step": 27309 }, { "epoch": 0.7012442206248867, "grad_norm": 0.80859375, "learning_rate": 0.0001286738290202405, "loss": 0.8171, "step": 27310 }, { "epoch": 0.7012698978208085, "grad_norm": 0.77734375, "learning_rate": 0.00012866955224648762, "loss": 0.7681, "step": 27311 }, { "epoch": 0.7012955750167303, "grad_norm": 0.7578125, "learning_rate": 0.00012866527541559889, "loss": 0.6988, "step": 27312 }, { "epoch": 0.7013212522126521, "grad_norm": 0.80078125, "learning_rate": 0.00012866099852758286, "loss": 0.8421, "step": 27313 }, { "epoch": 0.7013469294085739, "grad_norm": 0.828125, "learning_rate": 0.0001286567215824481, "loss": 0.9205, "step": 27314 }, { "epoch": 0.7013726066044957, "grad_norm": 0.6953125, "learning_rate": 0.00012865244458020302, "loss": 0.6702, "step": 27315 }, { "epoch": 0.7013982838004176, "grad_norm": 0.71484375, "learning_rate": 0.00012864816752085624, "loss": 0.7489, "step": 27316 }, { "epoch": 0.7014239609963394, "grad_norm": 0.74609375, "learning_rate": 0.00012864389040441625, "loss": 0.8246, "step": 27317 }, { "epoch": 0.7014496381922612, "grad_norm": 0.74609375, "learning_rate": 0.00012863961323089152, "loss": 0.7355, "step": 27318 }, { "epoch": 0.7014753153881831, "grad_norm": 0.8125, "learning_rate": 0.0001286353360002907, "loss": 0.86, "step": 27319 }, { "epoch": 0.7015009925841048, "grad_norm": 0.80859375, "learning_rate": 0.00012863105871262223, "loss": 0.747, "step": 27320 }, { "epoch": 0.7015266697800266, "grad_norm": 0.765625, "learning_rate": 0.0001286267813678946, "loss": 0.9022, "step": 27321 }, { "epoch": 0.7015523469759485, "grad_norm": 0.76953125, "learning_rate": 0.00012862250396611643, "loss": 0.9501, "step": 27322 }, { "epoch": 0.7015780241718703, "grad_norm": 0.76171875, "learning_rate": 0.00012861822650729615, "loss": 0.8026, "step": 27323 }, { "epoch": 0.7016037013677922, "grad_norm": 0.76171875, "learning_rate": 0.00012861394899144236, "loss": 0.8358, "step": 27324 }, { "epoch": 0.701629378563714, "grad_norm": 0.7421875, "learning_rate": 0.00012860967141856358, "loss": 0.8595, "step": 27325 }, { "epoch": 0.7016550557596357, "grad_norm": 0.75390625, "learning_rate": 0.00012860539378866824, "loss": 0.8057, "step": 27326 }, { "epoch": 0.7016807329555576, "grad_norm": 0.76171875, "learning_rate": 0.00012860111610176497, "loss": 0.7464, "step": 27327 }, { "epoch": 0.7017064101514794, "grad_norm": 0.83203125, "learning_rate": 0.00012859683835786227, "loss": 0.982, "step": 27328 }, { "epoch": 0.7017320873474012, "grad_norm": 0.74609375, "learning_rate": 0.00012859256055696867, "loss": 0.8366, "step": 27329 }, { "epoch": 0.7017577645433231, "grad_norm": 0.7109375, "learning_rate": 0.00012858828269909263, "loss": 0.7883, "step": 27330 }, { "epoch": 0.7017834417392449, "grad_norm": 0.70703125, "learning_rate": 0.00012858400478424275, "loss": 0.7571, "step": 27331 }, { "epoch": 0.7018091189351667, "grad_norm": 0.7578125, "learning_rate": 0.00012857972681242755, "loss": 0.8326, "step": 27332 }, { "epoch": 0.7018347961310885, "grad_norm": 0.74609375, "learning_rate": 0.0001285754487836555, "loss": 0.7762, "step": 27333 }, { "epoch": 0.7018604733270103, "grad_norm": 0.76953125, "learning_rate": 0.0001285711706979352, "loss": 0.7908, "step": 27334 }, { "epoch": 0.7018861505229321, "grad_norm": 0.765625, "learning_rate": 0.00012856689255527512, "loss": 0.8036, "step": 27335 }, { "epoch": 0.701911827718854, "grad_norm": 0.78515625, "learning_rate": 0.00012856261435568376, "loss": 0.8585, "step": 27336 }, { "epoch": 0.7019375049147758, "grad_norm": 0.796875, "learning_rate": 0.00012855833609916976, "loss": 0.7269, "step": 27337 }, { "epoch": 0.7019631821106976, "grad_norm": 0.75, "learning_rate": 0.00012855405778574154, "loss": 0.7251, "step": 27338 }, { "epoch": 0.7019888593066195, "grad_norm": 0.7890625, "learning_rate": 0.00012854977941540767, "loss": 0.7665, "step": 27339 }, { "epoch": 0.7020145365025412, "grad_norm": 0.85546875, "learning_rate": 0.00012854550098817665, "loss": 0.8889, "step": 27340 }, { "epoch": 0.702040213698463, "grad_norm": 0.77734375, "learning_rate": 0.00012854122250405706, "loss": 0.8934, "step": 27341 }, { "epoch": 0.7020658908943849, "grad_norm": 0.8515625, "learning_rate": 0.00012853694396305738, "loss": 0.9413, "step": 27342 }, { "epoch": 0.7020915680903067, "grad_norm": 0.828125, "learning_rate": 0.00012853266536518612, "loss": 0.8464, "step": 27343 }, { "epoch": 0.7021172452862285, "grad_norm": 0.8359375, "learning_rate": 0.00012852838671045187, "loss": 0.8577, "step": 27344 }, { "epoch": 0.7021429224821504, "grad_norm": 0.7890625, "learning_rate": 0.00012852410799886309, "loss": 0.693, "step": 27345 }, { "epoch": 0.7021685996780721, "grad_norm": 0.76171875, "learning_rate": 0.0001285198292304284, "loss": 0.7301, "step": 27346 }, { "epoch": 0.7021942768739939, "grad_norm": 0.80859375, "learning_rate": 0.00012851555040515618, "loss": 1.0019, "step": 27347 }, { "epoch": 0.7022199540699158, "grad_norm": 0.73046875, "learning_rate": 0.00012851127152305511, "loss": 0.7471, "step": 27348 }, { "epoch": 0.7022456312658376, "grad_norm": 0.79296875, "learning_rate": 0.00012850699258413364, "loss": 0.798, "step": 27349 }, { "epoch": 0.7022713084617594, "grad_norm": 0.8359375, "learning_rate": 0.00012850271358840027, "loss": 0.9157, "step": 27350 }, { "epoch": 0.7022969856576813, "grad_norm": 0.79296875, "learning_rate": 0.00012849843453586362, "loss": 0.8495, "step": 27351 }, { "epoch": 0.7023226628536031, "grad_norm": 0.796875, "learning_rate": 0.00012849415542653213, "loss": 0.8797, "step": 27352 }, { "epoch": 0.7023483400495248, "grad_norm": 0.8359375, "learning_rate": 0.00012848987626041435, "loss": 0.8244, "step": 27353 }, { "epoch": 0.7023740172454467, "grad_norm": 0.74609375, "learning_rate": 0.00012848559703751888, "loss": 0.8418, "step": 27354 }, { "epoch": 0.7023996944413685, "grad_norm": 0.796875, "learning_rate": 0.00012848131775785413, "loss": 0.777, "step": 27355 }, { "epoch": 0.7024253716372904, "grad_norm": 0.9609375, "learning_rate": 0.00012847703842142871, "loss": 0.8472, "step": 27356 }, { "epoch": 0.7024510488332122, "grad_norm": 0.76171875, "learning_rate": 0.00012847275902825114, "loss": 0.9153, "step": 27357 }, { "epoch": 0.702476726029134, "grad_norm": 0.81640625, "learning_rate": 0.0001284684795783299, "loss": 0.876, "step": 27358 }, { "epoch": 0.7025024032250559, "grad_norm": 0.86328125, "learning_rate": 0.00012846420007167354, "loss": 0.862, "step": 27359 }, { "epoch": 0.7025280804209776, "grad_norm": 0.7578125, "learning_rate": 0.00012845992050829064, "loss": 0.8266, "step": 27360 }, { "epoch": 0.7025537576168994, "grad_norm": 0.7890625, "learning_rate": 0.0001284556408881897, "loss": 0.7683, "step": 27361 }, { "epoch": 0.7025794348128213, "grad_norm": 0.78515625, "learning_rate": 0.0001284513612113792, "loss": 0.8933, "step": 27362 }, { "epoch": 0.7026051120087431, "grad_norm": 0.83203125, "learning_rate": 0.0001284470814778677, "loss": 0.8799, "step": 27363 }, { "epoch": 0.7026307892046649, "grad_norm": 0.796875, "learning_rate": 0.00012844280168766375, "loss": 0.8689, "step": 27364 }, { "epoch": 0.7026564664005868, "grad_norm": 0.75, "learning_rate": 0.0001284385218407759, "loss": 0.9278, "step": 27365 }, { "epoch": 0.7026821435965085, "grad_norm": 0.83984375, "learning_rate": 0.0001284342419372126, "loss": 0.7155, "step": 27366 }, { "epoch": 0.7027078207924303, "grad_norm": 0.7109375, "learning_rate": 0.00012842996197698247, "loss": 0.8312, "step": 27367 }, { "epoch": 0.7027334979883522, "grad_norm": 0.82421875, "learning_rate": 0.00012842568196009394, "loss": 0.9166, "step": 27368 }, { "epoch": 0.702759175184274, "grad_norm": 0.78515625, "learning_rate": 0.00012842140188655562, "loss": 0.797, "step": 27369 }, { "epoch": 0.7027848523801958, "grad_norm": 0.84765625, "learning_rate": 0.000128417121756376, "loss": 0.7981, "step": 27370 }, { "epoch": 0.7028105295761177, "grad_norm": 0.77734375, "learning_rate": 0.00012841284156956363, "loss": 0.7923, "step": 27371 }, { "epoch": 0.7028362067720395, "grad_norm": 0.81640625, "learning_rate": 0.00012840856132612704, "loss": 0.8084, "step": 27372 }, { "epoch": 0.7028618839679612, "grad_norm": 0.76953125, "learning_rate": 0.0001284042810260748, "loss": 0.7433, "step": 27373 }, { "epoch": 0.7028875611638831, "grad_norm": 0.828125, "learning_rate": 0.00012840000066941532, "loss": 0.888, "step": 27374 }, { "epoch": 0.7029132383598049, "grad_norm": 0.73828125, "learning_rate": 0.0001283957202561572, "loss": 0.7657, "step": 27375 }, { "epoch": 0.7029389155557267, "grad_norm": 0.828125, "learning_rate": 0.00012839143978630904, "loss": 0.725, "step": 27376 }, { "epoch": 0.7029645927516486, "grad_norm": 0.83984375, "learning_rate": 0.00012838715925987925, "loss": 0.9112, "step": 27377 }, { "epoch": 0.7029902699475704, "grad_norm": 0.75, "learning_rate": 0.00012838287867687645, "loss": 0.8555, "step": 27378 }, { "epoch": 0.7030159471434922, "grad_norm": 0.7578125, "learning_rate": 0.00012837859803730908, "loss": 0.6447, "step": 27379 }, { "epoch": 0.703041624339414, "grad_norm": 0.796875, "learning_rate": 0.0001283743173411858, "loss": 0.7558, "step": 27380 }, { "epoch": 0.7030673015353358, "grad_norm": 0.7578125, "learning_rate": 0.00012837003658851503, "loss": 0.8854, "step": 27381 }, { "epoch": 0.7030929787312576, "grad_norm": 0.73828125, "learning_rate": 0.00012836575577930537, "loss": 0.791, "step": 27382 }, { "epoch": 0.7031186559271795, "grad_norm": 0.80078125, "learning_rate": 0.0001283614749135653, "loss": 0.7961, "step": 27383 }, { "epoch": 0.7031443331231013, "grad_norm": 0.83203125, "learning_rate": 0.00012835719399130336, "loss": 0.879, "step": 27384 }, { "epoch": 0.7031700103190232, "grad_norm": 0.7109375, "learning_rate": 0.0001283529130125281, "loss": 0.7689, "step": 27385 }, { "epoch": 0.7031956875149449, "grad_norm": 0.84765625, "learning_rate": 0.00012834863197724804, "loss": 0.8158, "step": 27386 }, { "epoch": 0.7032213647108667, "grad_norm": 0.73046875, "learning_rate": 0.00012834435088547174, "loss": 0.7861, "step": 27387 }, { "epoch": 0.7032470419067886, "grad_norm": 0.734375, "learning_rate": 0.0001283400697372077, "loss": 0.7973, "step": 27388 }, { "epoch": 0.7032727191027104, "grad_norm": 0.8671875, "learning_rate": 0.00012833578853246443, "loss": 0.8056, "step": 27389 }, { "epoch": 0.7032983962986322, "grad_norm": 0.75, "learning_rate": 0.00012833150727125053, "loss": 0.9349, "step": 27390 }, { "epoch": 0.7033240734945541, "grad_norm": 0.80859375, "learning_rate": 0.0001283272259535745, "loss": 0.7807, "step": 27391 }, { "epoch": 0.7033497506904759, "grad_norm": 0.87890625, "learning_rate": 0.00012832294457944482, "loss": 0.8716, "step": 27392 }, { "epoch": 0.7033754278863976, "grad_norm": 0.76953125, "learning_rate": 0.00012831866314887014, "loss": 0.7879, "step": 27393 }, { "epoch": 0.7034011050823195, "grad_norm": 0.88671875, "learning_rate": 0.0001283143816618589, "loss": 0.825, "step": 27394 }, { "epoch": 0.7034267822782413, "grad_norm": 0.75, "learning_rate": 0.00012831010011841963, "loss": 0.802, "step": 27395 }, { "epoch": 0.7034524594741631, "grad_norm": 0.8203125, "learning_rate": 0.0001283058185185609, "loss": 0.7747, "step": 27396 }, { "epoch": 0.703478136670085, "grad_norm": 0.81640625, "learning_rate": 0.00012830153686229124, "loss": 0.9454, "step": 27397 }, { "epoch": 0.7035038138660068, "grad_norm": 0.76171875, "learning_rate": 0.00012829725514961913, "loss": 0.8095, "step": 27398 }, { "epoch": 0.7035294910619286, "grad_norm": 0.7265625, "learning_rate": 0.00012829297338055317, "loss": 0.7196, "step": 27399 }, { "epoch": 0.7035551682578504, "grad_norm": 0.78125, "learning_rate": 0.0001282886915551019, "loss": 0.9866, "step": 27400 }, { "epoch": 0.7035808454537722, "grad_norm": 0.7734375, "learning_rate": 0.0001282844096732738, "loss": 0.7999, "step": 27401 }, { "epoch": 0.703606522649694, "grad_norm": 0.77734375, "learning_rate": 0.00012828012773507744, "loss": 0.9051, "step": 27402 }, { "epoch": 0.7036321998456159, "grad_norm": 0.87109375, "learning_rate": 0.0001282758457405213, "loss": 0.9046, "step": 27403 }, { "epoch": 0.7036578770415377, "grad_norm": 0.79296875, "learning_rate": 0.00012827156368961398, "loss": 0.8828, "step": 27404 }, { "epoch": 0.7036835542374595, "grad_norm": 0.84765625, "learning_rate": 0.000128267281582364, "loss": 0.791, "step": 27405 }, { "epoch": 0.7037092314333813, "grad_norm": 0.78125, "learning_rate": 0.00012826299941877983, "loss": 0.8858, "step": 27406 }, { "epoch": 0.7037349086293031, "grad_norm": 0.7734375, "learning_rate": 0.00012825871719887016, "loss": 0.8229, "step": 27407 }, { "epoch": 0.7037605858252249, "grad_norm": 0.8046875, "learning_rate": 0.00012825443492264332, "loss": 0.7842, "step": 27408 }, { "epoch": 0.7037862630211468, "grad_norm": 0.8125, "learning_rate": 0.00012825015259010798, "loss": 0.8479, "step": 27409 }, { "epoch": 0.7038119402170686, "grad_norm": 0.77734375, "learning_rate": 0.00012824587020127264, "loss": 0.8499, "step": 27410 }, { "epoch": 0.7038376174129904, "grad_norm": 0.6640625, "learning_rate": 0.0001282415877561458, "loss": 0.7515, "step": 27411 }, { "epoch": 0.7038632946089123, "grad_norm": 0.78125, "learning_rate": 0.00012823730525473608, "loss": 0.9866, "step": 27412 }, { "epoch": 0.703888971804834, "grad_norm": 0.734375, "learning_rate": 0.00012823302269705192, "loss": 0.8632, "step": 27413 }, { "epoch": 0.7039146490007558, "grad_norm": 0.734375, "learning_rate": 0.00012822874008310193, "loss": 0.9019, "step": 27414 }, { "epoch": 0.7039403261966777, "grad_norm": 0.76953125, "learning_rate": 0.0001282244574128946, "loss": 0.8532, "step": 27415 }, { "epoch": 0.7039660033925995, "grad_norm": 0.82421875, "learning_rate": 0.00012822017468643843, "loss": 0.8807, "step": 27416 }, { "epoch": 0.7039916805885214, "grad_norm": 0.73828125, "learning_rate": 0.00012821589190374207, "loss": 0.9231, "step": 27417 }, { "epoch": 0.7040173577844432, "grad_norm": 0.75, "learning_rate": 0.00012821160906481395, "loss": 0.9148, "step": 27418 }, { "epoch": 0.704043034980365, "grad_norm": 0.78515625, "learning_rate": 0.00012820732616966262, "loss": 0.8379, "step": 27419 }, { "epoch": 0.7040687121762867, "grad_norm": 0.734375, "learning_rate": 0.0001282030432182967, "loss": 0.8003, "step": 27420 }, { "epoch": 0.7040943893722086, "grad_norm": 0.73046875, "learning_rate": 0.0001281987602107246, "loss": 0.7873, "step": 27421 }, { "epoch": 0.7041200665681304, "grad_norm": 0.734375, "learning_rate": 0.00012819447714695496, "loss": 0.8964, "step": 27422 }, { "epoch": 0.7041457437640523, "grad_norm": 0.75, "learning_rate": 0.00012819019402699627, "loss": 0.9465, "step": 27423 }, { "epoch": 0.7041714209599741, "grad_norm": 0.765625, "learning_rate": 0.00012818591085085704, "loss": 0.7966, "step": 27424 }, { "epoch": 0.7041970981558959, "grad_norm": 0.81640625, "learning_rate": 0.00012818162761854584, "loss": 0.8315, "step": 27425 }, { "epoch": 0.7042227753518177, "grad_norm": 0.74609375, "learning_rate": 0.00012817734433007123, "loss": 0.8689, "step": 27426 }, { "epoch": 0.7042484525477395, "grad_norm": 0.75390625, "learning_rate": 0.0001281730609854417, "loss": 0.9425, "step": 27427 }, { "epoch": 0.7042741297436613, "grad_norm": 0.90625, "learning_rate": 0.0001281687775846658, "loss": 0.9166, "step": 27428 }, { "epoch": 0.7042998069395832, "grad_norm": 0.984375, "learning_rate": 0.0001281644941277521, "loss": 0.7772, "step": 27429 }, { "epoch": 0.704325484135505, "grad_norm": 0.765625, "learning_rate": 0.00012816021061470907, "loss": 0.7137, "step": 27430 }, { "epoch": 0.7043511613314268, "grad_norm": 0.75, "learning_rate": 0.0001281559270455453, "loss": 0.8446, "step": 27431 }, { "epoch": 0.7043768385273487, "grad_norm": 0.83203125, "learning_rate": 0.00012815164342026928, "loss": 0.8508, "step": 27432 }, { "epoch": 0.7044025157232704, "grad_norm": 0.796875, "learning_rate": 0.00012814735973888964, "loss": 0.7535, "step": 27433 }, { "epoch": 0.7044281929191922, "grad_norm": 0.75, "learning_rate": 0.00012814307600141485, "loss": 0.7368, "step": 27434 }, { "epoch": 0.7044538701151141, "grad_norm": 0.75390625, "learning_rate": 0.0001281387922078534, "loss": 0.7736, "step": 27435 }, { "epoch": 0.7044795473110359, "grad_norm": 0.75390625, "learning_rate": 0.0001281345083582139, "loss": 0.968, "step": 27436 }, { "epoch": 0.7045052245069577, "grad_norm": 0.80859375, "learning_rate": 0.00012813022445250485, "loss": 0.8564, "step": 27437 }, { "epoch": 0.7045309017028796, "grad_norm": 0.8203125, "learning_rate": 0.00012812594049073484, "loss": 0.8142, "step": 27438 }, { "epoch": 0.7045565788988014, "grad_norm": 0.84375, "learning_rate": 0.00012812165647291236, "loss": 0.7227, "step": 27439 }, { "epoch": 0.7045822560947231, "grad_norm": 0.74609375, "learning_rate": 0.00012811737239904595, "loss": 0.7702, "step": 27440 }, { "epoch": 0.704607933290645, "grad_norm": 0.7734375, "learning_rate": 0.00012811308826914418, "loss": 0.7502, "step": 27441 }, { "epoch": 0.7046336104865668, "grad_norm": 0.76171875, "learning_rate": 0.00012810880408321553, "loss": 0.9132, "step": 27442 }, { "epoch": 0.7046592876824886, "grad_norm": 0.77734375, "learning_rate": 0.00012810451984126856, "loss": 0.8284, "step": 27443 }, { "epoch": 0.7046849648784105, "grad_norm": 0.83984375, "learning_rate": 0.00012810023554331185, "loss": 0.7877, "step": 27444 }, { "epoch": 0.7047106420743323, "grad_norm": 0.765625, "learning_rate": 0.00012809595118935392, "loss": 0.8397, "step": 27445 }, { "epoch": 0.704736319270254, "grad_norm": 0.76171875, "learning_rate": 0.0001280916667794033, "loss": 0.8123, "step": 27446 }, { "epoch": 0.7047619964661759, "grad_norm": 0.796875, "learning_rate": 0.00012808738231346852, "loss": 0.7637, "step": 27447 }, { "epoch": 0.7047876736620977, "grad_norm": 0.859375, "learning_rate": 0.00012808309779155808, "loss": 0.7733, "step": 27448 }, { "epoch": 0.7048133508580195, "grad_norm": 0.75390625, "learning_rate": 0.00012807881321368062, "loss": 0.7576, "step": 27449 }, { "epoch": 0.7048390280539414, "grad_norm": 0.76953125, "learning_rate": 0.0001280745285798446, "loss": 0.7701, "step": 27450 }, { "epoch": 0.7048647052498632, "grad_norm": 0.8203125, "learning_rate": 0.00012807024389005855, "loss": 0.8217, "step": 27451 }, { "epoch": 0.7048903824457851, "grad_norm": 0.7578125, "learning_rate": 0.00012806595914433108, "loss": 0.8844, "step": 27452 }, { "epoch": 0.7049160596417068, "grad_norm": 0.796875, "learning_rate": 0.00012806167434267066, "loss": 1.0064, "step": 27453 }, { "epoch": 0.7049417368376286, "grad_norm": 0.8359375, "learning_rate": 0.00012805738948508586, "loss": 0.7964, "step": 27454 }, { "epoch": 0.7049674140335505, "grad_norm": 0.703125, "learning_rate": 0.00012805310457158527, "loss": 0.739, "step": 27455 }, { "epoch": 0.7049930912294723, "grad_norm": 0.78125, "learning_rate": 0.00012804881960217732, "loss": 0.9082, "step": 27456 }, { "epoch": 0.7050187684253941, "grad_norm": 0.70703125, "learning_rate": 0.00012804453457687063, "loss": 0.8059, "step": 27457 }, { "epoch": 0.705044445621316, "grad_norm": 0.8046875, "learning_rate": 0.00012804024949567371, "loss": 0.7195, "step": 27458 }, { "epoch": 0.7050701228172378, "grad_norm": 0.74609375, "learning_rate": 0.0001280359643585951, "loss": 0.8669, "step": 27459 }, { "epoch": 0.7050958000131595, "grad_norm": 0.82421875, "learning_rate": 0.00012803167916564337, "loss": 1.0048, "step": 27460 }, { "epoch": 0.7051214772090814, "grad_norm": 0.734375, "learning_rate": 0.00012802739391682699, "loss": 0.9292, "step": 27461 }, { "epoch": 0.7051471544050032, "grad_norm": 0.83203125, "learning_rate": 0.0001280231086121546, "loss": 0.9358, "step": 27462 }, { "epoch": 0.705172831600925, "grad_norm": 0.765625, "learning_rate": 0.00012801882325163465, "loss": 0.7792, "step": 27463 }, { "epoch": 0.7051985087968469, "grad_norm": 0.734375, "learning_rate": 0.00012801453783527572, "loss": 0.7641, "step": 27464 }, { "epoch": 0.7052241859927687, "grad_norm": 0.81640625, "learning_rate": 0.00012801025236308634, "loss": 0.852, "step": 27465 }, { "epoch": 0.7052498631886904, "grad_norm": 0.78125, "learning_rate": 0.00012800596683507507, "loss": 0.854, "step": 27466 }, { "epoch": 0.7052755403846123, "grad_norm": 0.77734375, "learning_rate": 0.00012800168125125044, "loss": 0.8684, "step": 27467 }, { "epoch": 0.7053012175805341, "grad_norm": 0.73046875, "learning_rate": 0.000127997395611621, "loss": 0.7614, "step": 27468 }, { "epoch": 0.7053268947764559, "grad_norm": 0.83984375, "learning_rate": 0.00012799310991619525, "loss": 0.9294, "step": 27469 }, { "epoch": 0.7053525719723778, "grad_norm": 0.8046875, "learning_rate": 0.00012798882416498178, "loss": 0.9476, "step": 27470 }, { "epoch": 0.7053782491682996, "grad_norm": 0.7734375, "learning_rate": 0.0001279845383579891, "loss": 0.8185, "step": 27471 }, { "epoch": 0.7054039263642214, "grad_norm": 0.76953125, "learning_rate": 0.00012798025249522577, "loss": 0.787, "step": 27472 }, { "epoch": 0.7054296035601432, "grad_norm": 0.75390625, "learning_rate": 0.00012797596657670033, "loss": 0.8307, "step": 27473 }, { "epoch": 0.705455280756065, "grad_norm": 0.78125, "learning_rate": 0.00012797168060242132, "loss": 0.8755, "step": 27474 }, { "epoch": 0.7054809579519868, "grad_norm": 0.7578125, "learning_rate": 0.00012796739457239723, "loss": 0.7762, "step": 27475 }, { "epoch": 0.7055066351479087, "grad_norm": 0.7265625, "learning_rate": 0.0001279631084866367, "loss": 0.8045, "step": 27476 }, { "epoch": 0.7055323123438305, "grad_norm": 0.82421875, "learning_rate": 0.00012795882234514818, "loss": 0.8099, "step": 27477 }, { "epoch": 0.7055579895397524, "grad_norm": 0.7265625, "learning_rate": 0.00012795453614794028, "loss": 0.814, "step": 27478 }, { "epoch": 0.7055836667356741, "grad_norm": 0.83203125, "learning_rate": 0.00012795024989502155, "loss": 0.9553, "step": 27479 }, { "epoch": 0.7056093439315959, "grad_norm": 0.80078125, "learning_rate": 0.00012794596358640042, "loss": 0.8861, "step": 27480 }, { "epoch": 0.7056350211275177, "grad_norm": 0.73828125, "learning_rate": 0.00012794167722208555, "loss": 0.8515, "step": 27481 }, { "epoch": 0.7056606983234396, "grad_norm": 0.78515625, "learning_rate": 0.00012793739080208547, "loss": 0.7613, "step": 27482 }, { "epoch": 0.7056863755193614, "grad_norm": 0.78515625, "learning_rate": 0.00012793310432640865, "loss": 0.8724, "step": 27483 }, { "epoch": 0.7057120527152833, "grad_norm": 0.7109375, "learning_rate": 0.00012792881779506368, "loss": 0.7622, "step": 27484 }, { "epoch": 0.7057377299112051, "grad_norm": 0.8203125, "learning_rate": 0.0001279245312080591, "loss": 0.8662, "step": 27485 }, { "epoch": 0.7057634071071268, "grad_norm": 1.015625, "learning_rate": 0.00012792024456540347, "loss": 0.8431, "step": 27486 }, { "epoch": 0.7057890843030487, "grad_norm": 0.78125, "learning_rate": 0.00012791595786710533, "loss": 0.7418, "step": 27487 }, { "epoch": 0.7058147614989705, "grad_norm": 0.74609375, "learning_rate": 0.00012791167111317317, "loss": 0.8417, "step": 27488 }, { "epoch": 0.7058404386948923, "grad_norm": 0.73828125, "learning_rate": 0.0001279073843036156, "loss": 0.8355, "step": 27489 }, { "epoch": 0.7058661158908142, "grad_norm": 0.81640625, "learning_rate": 0.00012790309743844112, "loss": 0.9422, "step": 27490 }, { "epoch": 0.705891793086736, "grad_norm": 0.70703125, "learning_rate": 0.00012789881051765827, "loss": 0.6774, "step": 27491 }, { "epoch": 0.7059174702826578, "grad_norm": 0.765625, "learning_rate": 0.00012789452354127563, "loss": 0.8725, "step": 27492 }, { "epoch": 0.7059431474785796, "grad_norm": 0.75, "learning_rate": 0.00012789023650930172, "loss": 0.7472, "step": 27493 }, { "epoch": 0.7059688246745014, "grad_norm": 0.78125, "learning_rate": 0.00012788594942174508, "loss": 0.8071, "step": 27494 }, { "epoch": 0.7059945018704232, "grad_norm": 0.8125, "learning_rate": 0.00012788166227861426, "loss": 0.8263, "step": 27495 }, { "epoch": 0.7060201790663451, "grad_norm": 0.76953125, "learning_rate": 0.00012787737507991782, "loss": 0.8502, "step": 27496 }, { "epoch": 0.7060458562622669, "grad_norm": 0.87109375, "learning_rate": 0.00012787308782566428, "loss": 0.9177, "step": 27497 }, { "epoch": 0.7060715334581887, "grad_norm": 0.83984375, "learning_rate": 0.0001278688005158622, "loss": 0.9769, "step": 27498 }, { "epoch": 0.7060972106541105, "grad_norm": 0.71484375, "learning_rate": 0.00012786451315052012, "loss": 0.8166, "step": 27499 }, { "epoch": 0.7061228878500323, "grad_norm": 0.765625, "learning_rate": 0.00012786022572964658, "loss": 0.7703, "step": 27500 }, { "epoch": 0.7061485650459541, "grad_norm": 0.71484375, "learning_rate": 0.00012785593825325012, "loss": 0.9251, "step": 27501 }, { "epoch": 0.706174242241876, "grad_norm": 0.83203125, "learning_rate": 0.00012785165072133929, "loss": 0.7908, "step": 27502 }, { "epoch": 0.7061999194377978, "grad_norm": 0.7734375, "learning_rate": 0.00012784736313392264, "loss": 0.9393, "step": 27503 }, { "epoch": 0.7062255966337196, "grad_norm": 0.78515625, "learning_rate": 0.00012784307549100872, "loss": 0.7962, "step": 27504 }, { "epoch": 0.7062512738296415, "grad_norm": 0.79296875, "learning_rate": 0.00012783878779260607, "loss": 0.8227, "step": 27505 }, { "epoch": 0.7062769510255632, "grad_norm": 0.80859375, "learning_rate": 0.00012783450003872322, "loss": 0.8073, "step": 27506 }, { "epoch": 0.706302628221485, "grad_norm": 0.82421875, "learning_rate": 0.00012783021222936874, "loss": 0.8682, "step": 27507 }, { "epoch": 0.7063283054174069, "grad_norm": 0.76953125, "learning_rate": 0.0001278259243645511, "loss": 0.852, "step": 27508 }, { "epoch": 0.7063539826133287, "grad_norm": 0.8515625, "learning_rate": 0.00012782163644427895, "loss": 0.8016, "step": 27509 }, { "epoch": 0.7063796598092505, "grad_norm": 0.73046875, "learning_rate": 0.00012781734846856082, "loss": 0.8636, "step": 27510 }, { "epoch": 0.7064053370051724, "grad_norm": 0.7734375, "learning_rate": 0.0001278130604374052, "loss": 0.9689, "step": 27511 }, { "epoch": 0.7064310142010942, "grad_norm": 0.7734375, "learning_rate": 0.00012780877235082068, "loss": 0.785, "step": 27512 }, { "epoch": 0.706456691397016, "grad_norm": 0.83984375, "learning_rate": 0.00012780448420881575, "loss": 0.7869, "step": 27513 }, { "epoch": 0.7064823685929378, "grad_norm": 0.83984375, "learning_rate": 0.00012780019601139902, "loss": 0.8029, "step": 27514 }, { "epoch": 0.7065080457888596, "grad_norm": 0.75, "learning_rate": 0.000127795907758579, "loss": 0.8572, "step": 27515 }, { "epoch": 0.7065337229847815, "grad_norm": 0.7421875, "learning_rate": 0.00012779161945036428, "loss": 0.9805, "step": 27516 }, { "epoch": 0.7065594001807033, "grad_norm": 0.76953125, "learning_rate": 0.0001277873310867633, "loss": 0.8402, "step": 27517 }, { "epoch": 0.7065850773766251, "grad_norm": 0.73828125, "learning_rate": 0.00012778304266778477, "loss": 0.8891, "step": 27518 }, { "epoch": 0.7066107545725469, "grad_norm": 0.81640625, "learning_rate": 0.0001277787541934371, "loss": 0.8814, "step": 27519 }, { "epoch": 0.7066364317684687, "grad_norm": 0.80078125, "learning_rate": 0.0001277744656637289, "loss": 0.7922, "step": 27520 }, { "epoch": 0.7066621089643905, "grad_norm": 0.80859375, "learning_rate": 0.00012777017707866865, "loss": 0.8268, "step": 27521 }, { "epoch": 0.7066877861603124, "grad_norm": 0.84765625, "learning_rate": 0.000127765888438265, "loss": 0.727, "step": 27522 }, { "epoch": 0.7067134633562342, "grad_norm": 0.80078125, "learning_rate": 0.0001277615997425264, "loss": 0.7806, "step": 27523 }, { "epoch": 0.706739140552156, "grad_norm": 0.78515625, "learning_rate": 0.00012775731099146147, "loss": 0.8765, "step": 27524 }, { "epoch": 0.7067648177480779, "grad_norm": 0.7578125, "learning_rate": 0.0001277530221850787, "loss": 0.7771, "step": 27525 }, { "epoch": 0.7067904949439996, "grad_norm": 0.76171875, "learning_rate": 0.0001277487333233867, "loss": 0.7786, "step": 27526 }, { "epoch": 0.7068161721399214, "grad_norm": 0.796875, "learning_rate": 0.00012774444440639398, "loss": 0.8963, "step": 27527 }, { "epoch": 0.7068418493358433, "grad_norm": 0.73046875, "learning_rate": 0.00012774015543410904, "loss": 0.8822, "step": 27528 }, { "epoch": 0.7068675265317651, "grad_norm": 0.8515625, "learning_rate": 0.0001277358664065405, "loss": 0.8719, "step": 27529 }, { "epoch": 0.7068932037276869, "grad_norm": 0.78125, "learning_rate": 0.0001277315773236969, "loss": 0.7711, "step": 27530 }, { "epoch": 0.7069188809236088, "grad_norm": 0.73828125, "learning_rate": 0.00012772728818558678, "loss": 0.7691, "step": 27531 }, { "epoch": 0.7069445581195306, "grad_norm": 0.88671875, "learning_rate": 0.00012772299899221865, "loss": 0.8581, "step": 27532 }, { "epoch": 0.7069702353154523, "grad_norm": 0.76953125, "learning_rate": 0.0001277187097436011, "loss": 0.7911, "step": 27533 }, { "epoch": 0.7069959125113742, "grad_norm": 0.75390625, "learning_rate": 0.00012771442043974264, "loss": 0.7785, "step": 27534 }, { "epoch": 0.707021589707296, "grad_norm": 0.78125, "learning_rate": 0.00012771013108065188, "loss": 0.8266, "step": 27535 }, { "epoch": 0.7070472669032178, "grad_norm": 0.83984375, "learning_rate": 0.0001277058416663373, "loss": 0.8395, "step": 27536 }, { "epoch": 0.7070729440991397, "grad_norm": 0.796875, "learning_rate": 0.00012770155219680752, "loss": 0.8121, "step": 27537 }, { "epoch": 0.7070986212950615, "grad_norm": 0.84765625, "learning_rate": 0.00012769726267207102, "loss": 0.7187, "step": 27538 }, { "epoch": 0.7071242984909832, "grad_norm": 0.81640625, "learning_rate": 0.00012769297309213638, "loss": 0.8048, "step": 27539 }, { "epoch": 0.7071499756869051, "grad_norm": 0.76953125, "learning_rate": 0.00012768868345701216, "loss": 0.9022, "step": 27540 }, { "epoch": 0.7071756528828269, "grad_norm": 0.73828125, "learning_rate": 0.00012768439376670686, "loss": 0.8983, "step": 27541 }, { "epoch": 0.7072013300787487, "grad_norm": 0.7109375, "learning_rate": 0.0001276801040212291, "loss": 0.8131, "step": 27542 }, { "epoch": 0.7072270072746706, "grad_norm": 0.81640625, "learning_rate": 0.00012767581422058738, "loss": 0.8159, "step": 27543 }, { "epoch": 0.7072526844705924, "grad_norm": 0.7578125, "learning_rate": 0.00012767152436479027, "loss": 0.897, "step": 27544 }, { "epoch": 0.7072783616665143, "grad_norm": 0.7578125, "learning_rate": 0.00012766723445384632, "loss": 0.8261, "step": 27545 }, { "epoch": 0.707304038862436, "grad_norm": 0.73828125, "learning_rate": 0.00012766294448776403, "loss": 0.87, "step": 27546 }, { "epoch": 0.7073297160583578, "grad_norm": 0.85546875, "learning_rate": 0.00012765865446655203, "loss": 0.9679, "step": 27547 }, { "epoch": 0.7073553932542797, "grad_norm": 0.79296875, "learning_rate": 0.00012765436439021884, "loss": 0.9759, "step": 27548 }, { "epoch": 0.7073810704502015, "grad_norm": 0.703125, "learning_rate": 0.00012765007425877294, "loss": 0.6806, "step": 27549 }, { "epoch": 0.7074067476461233, "grad_norm": 0.75390625, "learning_rate": 0.000127645784072223, "loss": 0.7772, "step": 27550 }, { "epoch": 0.7074324248420452, "grad_norm": 0.7265625, "learning_rate": 0.00012764149383057748, "loss": 0.8224, "step": 27551 }, { "epoch": 0.707458102037967, "grad_norm": 0.70703125, "learning_rate": 0.00012763720353384497, "loss": 0.7678, "step": 27552 }, { "epoch": 0.7074837792338887, "grad_norm": 0.75390625, "learning_rate": 0.000127632913182034, "loss": 0.7965, "step": 27553 }, { "epoch": 0.7075094564298106, "grad_norm": 0.828125, "learning_rate": 0.00012762862277515313, "loss": 0.9505, "step": 27554 }, { "epoch": 0.7075351336257324, "grad_norm": 0.86328125, "learning_rate": 0.00012762433231321092, "loss": 0.8418, "step": 27555 }, { "epoch": 0.7075608108216542, "grad_norm": 0.8359375, "learning_rate": 0.0001276200417962159, "loss": 0.7854, "step": 27556 }, { "epoch": 0.7075864880175761, "grad_norm": 0.7890625, "learning_rate": 0.00012761575122417665, "loss": 1.0013, "step": 27557 }, { "epoch": 0.7076121652134979, "grad_norm": 0.74609375, "learning_rate": 0.0001276114605971017, "loss": 0.7969, "step": 27558 }, { "epoch": 0.7076378424094196, "grad_norm": 0.80859375, "learning_rate": 0.0001276071699149996, "loss": 0.7566, "step": 27559 }, { "epoch": 0.7076635196053415, "grad_norm": 0.81640625, "learning_rate": 0.00012760287917787887, "loss": 0.8809, "step": 27560 }, { "epoch": 0.7076891968012633, "grad_norm": 0.796875, "learning_rate": 0.00012759858838574814, "loss": 0.8778, "step": 27561 }, { "epoch": 0.7077148739971851, "grad_norm": 0.76953125, "learning_rate": 0.00012759429753861587, "loss": 0.8022, "step": 27562 }, { "epoch": 0.707740551193107, "grad_norm": 0.8359375, "learning_rate": 0.0001275900066364907, "loss": 0.8393, "step": 27563 }, { "epoch": 0.7077662283890288, "grad_norm": 0.88671875, "learning_rate": 0.00012758571567938115, "loss": 0.9885, "step": 27564 }, { "epoch": 0.7077919055849506, "grad_norm": 0.7890625, "learning_rate": 0.00012758142466729572, "loss": 0.8553, "step": 27565 }, { "epoch": 0.7078175827808724, "grad_norm": 0.75, "learning_rate": 0.00012757713360024304, "loss": 0.736, "step": 27566 }, { "epoch": 0.7078432599767942, "grad_norm": 0.7109375, "learning_rate": 0.0001275728424782316, "loss": 0.771, "step": 27567 }, { "epoch": 0.707868937172716, "grad_norm": 0.75, "learning_rate": 0.00012756855130126996, "loss": 0.7653, "step": 27568 }, { "epoch": 0.7078946143686379, "grad_norm": 0.79296875, "learning_rate": 0.0001275642600693667, "loss": 0.8526, "step": 27569 }, { "epoch": 0.7079202915645597, "grad_norm": 0.89453125, "learning_rate": 0.00012755996878253037, "loss": 0.915, "step": 27570 }, { "epoch": 0.7079459687604815, "grad_norm": 0.72265625, "learning_rate": 0.00012755567744076953, "loss": 0.8592, "step": 27571 }, { "epoch": 0.7079716459564034, "grad_norm": 0.81640625, "learning_rate": 0.0001275513860440927, "loss": 0.8111, "step": 27572 }, { "epoch": 0.7079973231523251, "grad_norm": 0.79296875, "learning_rate": 0.0001275470945925084, "loss": 0.7868, "step": 27573 }, { "epoch": 0.708023000348247, "grad_norm": 0.82421875, "learning_rate": 0.0001275428030860253, "loss": 0.8195, "step": 27574 }, { "epoch": 0.7080486775441688, "grad_norm": 0.7890625, "learning_rate": 0.00012753851152465183, "loss": 0.8459, "step": 27575 }, { "epoch": 0.7080743547400906, "grad_norm": 0.73828125, "learning_rate": 0.0001275342199083966, "loss": 0.8034, "step": 27576 }, { "epoch": 0.7081000319360125, "grad_norm": 0.82421875, "learning_rate": 0.00012752992823726817, "loss": 0.8003, "step": 27577 }, { "epoch": 0.7081257091319343, "grad_norm": 0.84765625, "learning_rate": 0.00012752563651127508, "loss": 0.8026, "step": 27578 }, { "epoch": 0.708151386327856, "grad_norm": 0.734375, "learning_rate": 0.0001275213447304259, "loss": 0.6254, "step": 27579 }, { "epoch": 0.7081770635237778, "grad_norm": 0.7109375, "learning_rate": 0.00012751705289472916, "loss": 0.7673, "step": 27580 }, { "epoch": 0.7082027407196997, "grad_norm": 0.79296875, "learning_rate": 0.00012751276100419337, "loss": 0.8615, "step": 27581 }, { "epoch": 0.7082284179156215, "grad_norm": 0.79296875, "learning_rate": 0.00012750846905882715, "loss": 0.809, "step": 27582 }, { "epoch": 0.7082540951115434, "grad_norm": 0.8046875, "learning_rate": 0.0001275041770586391, "loss": 0.7856, "step": 27583 }, { "epoch": 0.7082797723074652, "grad_norm": 0.7890625, "learning_rate": 0.00012749988500363764, "loss": 0.7984, "step": 27584 }, { "epoch": 0.708305449503387, "grad_norm": 0.7578125, "learning_rate": 0.00012749559289383143, "loss": 0.8364, "step": 27585 }, { "epoch": 0.7083311266993088, "grad_norm": 0.80859375, "learning_rate": 0.00012749130072922896, "loss": 0.6848, "step": 27586 }, { "epoch": 0.7083568038952306, "grad_norm": 0.7890625, "learning_rate": 0.00012748700850983882, "loss": 0.8544, "step": 27587 }, { "epoch": 0.7083824810911524, "grad_norm": 0.76171875, "learning_rate": 0.0001274827162356696, "loss": 0.9597, "step": 27588 }, { "epoch": 0.7084081582870743, "grad_norm": 0.80859375, "learning_rate": 0.00012747842390672972, "loss": 0.7614, "step": 27589 }, { "epoch": 0.7084338354829961, "grad_norm": 0.84765625, "learning_rate": 0.0001274741315230279, "loss": 0.9058, "step": 27590 }, { "epoch": 0.7084595126789179, "grad_norm": 0.7265625, "learning_rate": 0.00012746983908457258, "loss": 0.877, "step": 27591 }, { "epoch": 0.7084851898748398, "grad_norm": 0.8515625, "learning_rate": 0.00012746554659137237, "loss": 0.8316, "step": 27592 }, { "epoch": 0.7085108670707615, "grad_norm": 0.76171875, "learning_rate": 0.0001274612540434358, "loss": 0.8177, "step": 27593 }, { "epoch": 0.7085365442666833, "grad_norm": 0.77734375, "learning_rate": 0.00012745696144077142, "loss": 0.8284, "step": 27594 }, { "epoch": 0.7085622214626052, "grad_norm": 0.91796875, "learning_rate": 0.0001274526687833878, "loss": 0.9127, "step": 27595 }, { "epoch": 0.708587898658527, "grad_norm": 0.75, "learning_rate": 0.00012744837607129352, "loss": 0.7009, "step": 27596 }, { "epoch": 0.7086135758544488, "grad_norm": 0.7890625, "learning_rate": 0.00012744408330449708, "loss": 0.9618, "step": 27597 }, { "epoch": 0.7086392530503707, "grad_norm": 0.79296875, "learning_rate": 0.00012743979048300707, "loss": 0.7447, "step": 27598 }, { "epoch": 0.7086649302462924, "grad_norm": 0.75390625, "learning_rate": 0.00012743549760683203, "loss": 0.7246, "step": 27599 }, { "epoch": 0.7086906074422142, "grad_norm": 0.8671875, "learning_rate": 0.00012743120467598052, "loss": 0.9863, "step": 27600 }, { "epoch": 0.7087162846381361, "grad_norm": 0.75, "learning_rate": 0.0001274269116904611, "loss": 0.8134, "step": 27601 }, { "epoch": 0.7087419618340579, "grad_norm": 0.76171875, "learning_rate": 0.00012742261865028232, "loss": 0.7738, "step": 27602 }, { "epoch": 0.7087676390299797, "grad_norm": 0.76171875, "learning_rate": 0.00012741832555545273, "loss": 0.8125, "step": 27603 }, { "epoch": 0.7087933162259016, "grad_norm": 0.80859375, "learning_rate": 0.00012741403240598093, "loss": 0.8844, "step": 27604 }, { "epoch": 0.7088189934218234, "grad_norm": 0.76171875, "learning_rate": 0.00012740973920187537, "loss": 0.9122, "step": 27605 }, { "epoch": 0.7088446706177451, "grad_norm": 0.78125, "learning_rate": 0.00012740544594314475, "loss": 0.7789, "step": 27606 }, { "epoch": 0.708870347813667, "grad_norm": 0.77734375, "learning_rate": 0.00012740115262979752, "loss": 0.7547, "step": 27607 }, { "epoch": 0.7088960250095888, "grad_norm": 0.83984375, "learning_rate": 0.00012739685926184223, "loss": 0.8416, "step": 27608 }, { "epoch": 0.7089217022055107, "grad_norm": 0.7265625, "learning_rate": 0.00012739256583928752, "loss": 0.8204, "step": 27609 }, { "epoch": 0.7089473794014325, "grad_norm": 0.78125, "learning_rate": 0.00012738827236214193, "loss": 0.8267, "step": 27610 }, { "epoch": 0.7089730565973543, "grad_norm": 0.73046875, "learning_rate": 0.00012738397883041392, "loss": 0.7241, "step": 27611 }, { "epoch": 0.7089987337932762, "grad_norm": 0.76953125, "learning_rate": 0.00012737968524411216, "loss": 0.8131, "step": 27612 }, { "epoch": 0.7090244109891979, "grad_norm": 0.83984375, "learning_rate": 0.00012737539160324513, "loss": 0.9167, "step": 27613 }, { "epoch": 0.7090500881851197, "grad_norm": 0.8359375, "learning_rate": 0.00012737109790782146, "loss": 0.8137, "step": 27614 }, { "epoch": 0.7090757653810416, "grad_norm": 0.76171875, "learning_rate": 0.00012736680415784962, "loss": 0.7348, "step": 27615 }, { "epoch": 0.7091014425769634, "grad_norm": 0.72265625, "learning_rate": 0.0001273625103533382, "loss": 0.7768, "step": 27616 }, { "epoch": 0.7091271197728852, "grad_norm": 0.8203125, "learning_rate": 0.00012735821649429582, "loss": 0.8839, "step": 27617 }, { "epoch": 0.7091527969688071, "grad_norm": 0.94921875, "learning_rate": 0.00012735392258073097, "loss": 0.8879, "step": 27618 }, { "epoch": 0.7091784741647288, "grad_norm": 0.828125, "learning_rate": 0.0001273496286126522, "loss": 0.8452, "step": 27619 }, { "epoch": 0.7092041513606506, "grad_norm": 0.8125, "learning_rate": 0.00012734533459006812, "loss": 0.6916, "step": 27620 }, { "epoch": 0.7092298285565725, "grad_norm": 0.7578125, "learning_rate": 0.00012734104051298722, "loss": 0.8197, "step": 27621 }, { "epoch": 0.7092555057524943, "grad_norm": 0.9140625, "learning_rate": 0.00012733674638141813, "loss": 0.7827, "step": 27622 }, { "epoch": 0.7092811829484161, "grad_norm": 0.96484375, "learning_rate": 0.00012733245219536939, "loss": 0.9173, "step": 27623 }, { "epoch": 0.709306860144338, "grad_norm": 0.79296875, "learning_rate": 0.0001273281579548495, "loss": 0.8022, "step": 27624 }, { "epoch": 0.7093325373402598, "grad_norm": 0.79296875, "learning_rate": 0.00012732386365986708, "loss": 0.8089, "step": 27625 }, { "epoch": 0.7093582145361815, "grad_norm": 0.7734375, "learning_rate": 0.00012731956931043064, "loss": 0.7762, "step": 27626 }, { "epoch": 0.7093838917321034, "grad_norm": 0.7265625, "learning_rate": 0.0001273152749065488, "loss": 0.7254, "step": 27627 }, { "epoch": 0.7094095689280252, "grad_norm": 0.79296875, "learning_rate": 0.0001273109804482301, "loss": 0.8523, "step": 27628 }, { "epoch": 0.709435246123947, "grad_norm": 0.83984375, "learning_rate": 0.00012730668593548302, "loss": 0.823, "step": 27629 }, { "epoch": 0.7094609233198689, "grad_norm": 0.765625, "learning_rate": 0.00012730239136831624, "loss": 0.7734, "step": 27630 }, { "epoch": 0.7094866005157907, "grad_norm": 0.765625, "learning_rate": 0.00012729809674673822, "loss": 0.7644, "step": 27631 }, { "epoch": 0.7095122777117125, "grad_norm": 1.0625, "learning_rate": 0.00012729380207075758, "loss": 0.9395, "step": 27632 }, { "epoch": 0.7095379549076343, "grad_norm": 0.7421875, "learning_rate": 0.0001272895073403829, "loss": 0.7792, "step": 27633 }, { "epoch": 0.7095636321035561, "grad_norm": 0.82421875, "learning_rate": 0.0001272852125556226, "loss": 0.8839, "step": 27634 }, { "epoch": 0.7095893092994779, "grad_norm": 0.8046875, "learning_rate": 0.0001272809177164854, "loss": 0.8078, "step": 27635 }, { "epoch": 0.7096149864953998, "grad_norm": 0.80078125, "learning_rate": 0.00012727662282297977, "loss": 0.9905, "step": 27636 }, { "epoch": 0.7096406636913216, "grad_norm": 0.79296875, "learning_rate": 0.00012727232787511432, "loss": 0.8326, "step": 27637 }, { "epoch": 0.7096663408872435, "grad_norm": 0.8125, "learning_rate": 0.0001272680328728976, "loss": 0.966, "step": 27638 }, { "epoch": 0.7096920180831652, "grad_norm": 0.8046875, "learning_rate": 0.0001272637378163381, "loss": 0.7764, "step": 27639 }, { "epoch": 0.709717695279087, "grad_norm": 0.8359375, "learning_rate": 0.00012725944270544444, "loss": 0.8511, "step": 27640 }, { "epoch": 0.7097433724750088, "grad_norm": 0.8359375, "learning_rate": 0.0001272551475402252, "loss": 0.8657, "step": 27641 }, { "epoch": 0.7097690496709307, "grad_norm": 0.7734375, "learning_rate": 0.00012725085232068888, "loss": 0.7589, "step": 27642 }, { "epoch": 0.7097947268668525, "grad_norm": 0.75, "learning_rate": 0.00012724655704684414, "loss": 0.7533, "step": 27643 }, { "epoch": 0.7098204040627744, "grad_norm": 0.7578125, "learning_rate": 0.00012724226171869943, "loss": 0.8245, "step": 27644 }, { "epoch": 0.7098460812586962, "grad_norm": 0.796875, "learning_rate": 0.00012723796633626336, "loss": 0.7941, "step": 27645 }, { "epoch": 0.7098717584546179, "grad_norm": 0.7109375, "learning_rate": 0.00012723367089954448, "loss": 0.7686, "step": 27646 }, { "epoch": 0.7098974356505398, "grad_norm": 0.828125, "learning_rate": 0.00012722937540855136, "loss": 0.8846, "step": 27647 }, { "epoch": 0.7099231128464616, "grad_norm": 0.73046875, "learning_rate": 0.00012722507986329254, "loss": 0.8758, "step": 27648 }, { "epoch": 0.7099487900423834, "grad_norm": 0.78515625, "learning_rate": 0.00012722078426377658, "loss": 0.9541, "step": 27649 }, { "epoch": 0.7099744672383053, "grad_norm": 0.7734375, "learning_rate": 0.0001272164886100121, "loss": 0.6981, "step": 27650 }, { "epoch": 0.7100001444342271, "grad_norm": 0.6953125, "learning_rate": 0.0001272121929020076, "loss": 0.6826, "step": 27651 }, { "epoch": 0.7100258216301489, "grad_norm": 0.80859375, "learning_rate": 0.00012720789713977163, "loss": 0.9983, "step": 27652 }, { "epoch": 0.7100514988260707, "grad_norm": 0.75, "learning_rate": 0.00012720360132331278, "loss": 0.8366, "step": 27653 }, { "epoch": 0.7100771760219925, "grad_norm": 0.78515625, "learning_rate": 0.00012719930545263965, "loss": 0.9011, "step": 27654 }, { "epoch": 0.7101028532179143, "grad_norm": 0.76953125, "learning_rate": 0.00012719500952776075, "loss": 0.9565, "step": 27655 }, { "epoch": 0.7101285304138362, "grad_norm": 0.71484375, "learning_rate": 0.00012719071354868464, "loss": 0.7432, "step": 27656 }, { "epoch": 0.710154207609758, "grad_norm": 0.84375, "learning_rate": 0.0001271864175154199, "loss": 0.878, "step": 27657 }, { "epoch": 0.7101798848056798, "grad_norm": 0.78125, "learning_rate": 0.00012718212142797506, "loss": 1.033, "step": 27658 }, { "epoch": 0.7102055620016016, "grad_norm": 0.75390625, "learning_rate": 0.00012717782528635872, "loss": 0.8244, "step": 27659 }, { "epoch": 0.7102312391975234, "grad_norm": 0.8203125, "learning_rate": 0.00012717352909057946, "loss": 0.8755, "step": 27660 }, { "epoch": 0.7102569163934452, "grad_norm": 0.80078125, "learning_rate": 0.0001271692328406458, "loss": 0.8416, "step": 27661 }, { "epoch": 0.7102825935893671, "grad_norm": 0.8671875, "learning_rate": 0.0001271649365365663, "loss": 0.9164, "step": 27662 }, { "epoch": 0.7103082707852889, "grad_norm": 0.78515625, "learning_rate": 0.0001271606401783495, "loss": 0.7898, "step": 27663 }, { "epoch": 0.7103339479812107, "grad_norm": 0.73046875, "learning_rate": 0.00012715634376600407, "loss": 0.8516, "step": 27664 }, { "epoch": 0.7103596251771326, "grad_norm": 0.8203125, "learning_rate": 0.00012715204729953848, "loss": 0.9209, "step": 27665 }, { "epoch": 0.7103853023730543, "grad_norm": 0.71875, "learning_rate": 0.00012714775077896125, "loss": 0.7688, "step": 27666 }, { "epoch": 0.7104109795689761, "grad_norm": 0.7890625, "learning_rate": 0.0001271434542042811, "loss": 0.9712, "step": 27667 }, { "epoch": 0.710436656764898, "grad_norm": 0.796875, "learning_rate": 0.00012713915757550645, "loss": 0.878, "step": 27668 }, { "epoch": 0.7104623339608198, "grad_norm": 0.765625, "learning_rate": 0.00012713486089264587, "loss": 0.8712, "step": 27669 }, { "epoch": 0.7104880111567417, "grad_norm": 0.77734375, "learning_rate": 0.00012713056415570803, "loss": 0.8606, "step": 27670 }, { "epoch": 0.7105136883526635, "grad_norm": 0.74609375, "learning_rate": 0.0001271262673647014, "loss": 0.8288, "step": 27671 }, { "epoch": 0.7105393655485853, "grad_norm": 0.77734375, "learning_rate": 0.00012712197051963458, "loss": 0.9312, "step": 27672 }, { "epoch": 0.710565042744507, "grad_norm": 0.6796875, "learning_rate": 0.0001271176736205161, "loss": 0.743, "step": 27673 }, { "epoch": 0.7105907199404289, "grad_norm": 0.81640625, "learning_rate": 0.00012711337666735455, "loss": 1.0312, "step": 27674 }, { "epoch": 0.7106163971363507, "grad_norm": 0.77734375, "learning_rate": 0.0001271090796601585, "loss": 0.7654, "step": 27675 }, { "epoch": 0.7106420743322726, "grad_norm": 0.89453125, "learning_rate": 0.0001271047825989365, "loss": 0.742, "step": 27676 }, { "epoch": 0.7106677515281944, "grad_norm": 0.73828125, "learning_rate": 0.00012710048548369713, "loss": 0.8489, "step": 27677 }, { "epoch": 0.7106934287241162, "grad_norm": 0.70703125, "learning_rate": 0.00012709618831444893, "loss": 0.7765, "step": 27678 }, { "epoch": 0.710719105920038, "grad_norm": 0.79296875, "learning_rate": 0.00012709189109120044, "loss": 1.006, "step": 27679 }, { "epoch": 0.7107447831159598, "grad_norm": 1.4140625, "learning_rate": 0.0001270875938139603, "loss": 0.8726, "step": 27680 }, { "epoch": 0.7107704603118816, "grad_norm": 0.79296875, "learning_rate": 0.00012708329648273705, "loss": 0.7837, "step": 27681 }, { "epoch": 0.7107961375078035, "grad_norm": 0.8359375, "learning_rate": 0.0001270789990975392, "loss": 0.7822, "step": 27682 }, { "epoch": 0.7108218147037253, "grad_norm": 0.84765625, "learning_rate": 0.0001270747016583754, "loss": 0.7022, "step": 27683 }, { "epoch": 0.7108474918996471, "grad_norm": 0.73046875, "learning_rate": 0.00012707040416525412, "loss": 0.7603, "step": 27684 }, { "epoch": 0.710873169095569, "grad_norm": 0.93359375, "learning_rate": 0.00012706610661818396, "loss": 0.8589, "step": 27685 }, { "epoch": 0.7108988462914907, "grad_norm": 0.7265625, "learning_rate": 0.0001270618090171735, "loss": 0.8469, "step": 27686 }, { "epoch": 0.7109245234874125, "grad_norm": 0.7578125, "learning_rate": 0.00012705751136223129, "loss": 0.7194, "step": 27687 }, { "epoch": 0.7109502006833344, "grad_norm": 0.8671875, "learning_rate": 0.00012705321365336593, "loss": 0.8006, "step": 27688 }, { "epoch": 0.7109758778792562, "grad_norm": 0.7890625, "learning_rate": 0.000127048915890586, "loss": 0.7512, "step": 27689 }, { "epoch": 0.711001555075178, "grad_norm": 0.79296875, "learning_rate": 0.00012704461807389996, "loss": 0.8143, "step": 27690 }, { "epoch": 0.7110272322710999, "grad_norm": 0.83203125, "learning_rate": 0.00012704032020331645, "loss": 1.0095, "step": 27691 }, { "epoch": 0.7110529094670216, "grad_norm": 0.7421875, "learning_rate": 0.00012703602227884405, "loss": 1.0343, "step": 27692 }, { "epoch": 0.7110785866629434, "grad_norm": 0.79296875, "learning_rate": 0.00012703172430049124, "loss": 0.8144, "step": 27693 }, { "epoch": 0.7111042638588653, "grad_norm": 0.71484375, "learning_rate": 0.00012702742626826669, "loss": 0.8402, "step": 27694 }, { "epoch": 0.7111299410547871, "grad_norm": 0.76171875, "learning_rate": 0.0001270231281821789, "loss": 0.7118, "step": 27695 }, { "epoch": 0.7111556182507089, "grad_norm": 0.7734375, "learning_rate": 0.0001270188300422365, "loss": 0.8158, "step": 27696 }, { "epoch": 0.7111812954466308, "grad_norm": 0.80078125, "learning_rate": 0.00012701453184844801, "loss": 0.8032, "step": 27697 }, { "epoch": 0.7112069726425526, "grad_norm": 0.76953125, "learning_rate": 0.00012701023360082195, "loss": 0.8701, "step": 27698 }, { "epoch": 0.7112326498384743, "grad_norm": 0.7265625, "learning_rate": 0.00012700593529936695, "loss": 0.8289, "step": 27699 }, { "epoch": 0.7112583270343962, "grad_norm": 0.83203125, "learning_rate": 0.00012700163694409159, "loss": 0.9478, "step": 27700 }, { "epoch": 0.711284004230318, "grad_norm": 0.79296875, "learning_rate": 0.00012699733853500434, "loss": 0.9622, "step": 27701 }, { "epoch": 0.7113096814262398, "grad_norm": 0.734375, "learning_rate": 0.0001269930400721139, "loss": 0.81, "step": 27702 }, { "epoch": 0.7113353586221617, "grad_norm": 0.74609375, "learning_rate": 0.00012698874155542874, "loss": 0.8943, "step": 27703 }, { "epoch": 0.7113610358180835, "grad_norm": 0.89453125, "learning_rate": 0.00012698444298495746, "loss": 0.8409, "step": 27704 }, { "epoch": 0.7113867130140054, "grad_norm": 0.84375, "learning_rate": 0.00012698014436070864, "loss": 0.7887, "step": 27705 }, { "epoch": 0.7114123902099271, "grad_norm": 0.828125, "learning_rate": 0.0001269758456826908, "loss": 0.8764, "step": 27706 }, { "epoch": 0.7114380674058489, "grad_norm": 0.80078125, "learning_rate": 0.00012697154695091257, "loss": 0.8343, "step": 27707 }, { "epoch": 0.7114637446017708, "grad_norm": 0.875, "learning_rate": 0.00012696724816538247, "loss": 0.8419, "step": 27708 }, { "epoch": 0.7114894217976926, "grad_norm": 0.734375, "learning_rate": 0.00012696294932610906, "loss": 0.7524, "step": 27709 }, { "epoch": 0.7115150989936144, "grad_norm": 0.79296875, "learning_rate": 0.00012695865043310095, "loss": 0.8757, "step": 27710 }, { "epoch": 0.7115407761895363, "grad_norm": 0.84765625, "learning_rate": 0.00012695435148636666, "loss": 0.8607, "step": 27711 }, { "epoch": 0.711566453385458, "grad_norm": 0.71875, "learning_rate": 0.0001269500524859148, "loss": 0.7702, "step": 27712 }, { "epoch": 0.7115921305813798, "grad_norm": 0.77734375, "learning_rate": 0.00012694575343175391, "loss": 0.8466, "step": 27713 }, { "epoch": 0.7116178077773017, "grad_norm": 0.79296875, "learning_rate": 0.00012694145432389257, "loss": 0.8171, "step": 27714 }, { "epoch": 0.7116434849732235, "grad_norm": 0.734375, "learning_rate": 0.00012693715516233934, "loss": 0.7184, "step": 27715 }, { "epoch": 0.7116691621691453, "grad_norm": 0.71484375, "learning_rate": 0.0001269328559471028, "loss": 0.7196, "step": 27716 }, { "epoch": 0.7116948393650672, "grad_norm": 0.734375, "learning_rate": 0.00012692855667819156, "loss": 0.7896, "step": 27717 }, { "epoch": 0.711720516560989, "grad_norm": 0.796875, "learning_rate": 0.00012692425735561408, "loss": 0.9383, "step": 27718 }, { "epoch": 0.7117461937569107, "grad_norm": 0.98828125, "learning_rate": 0.00012691995797937898, "loss": 0.816, "step": 27719 }, { "epoch": 0.7117718709528326, "grad_norm": 0.7578125, "learning_rate": 0.00012691565854949487, "loss": 0.8402, "step": 27720 }, { "epoch": 0.7117975481487544, "grad_norm": 0.828125, "learning_rate": 0.00012691135906597027, "loss": 0.9498, "step": 27721 }, { "epoch": 0.7118232253446762, "grad_norm": 0.7421875, "learning_rate": 0.00012690705952881376, "loss": 0.847, "step": 27722 }, { "epoch": 0.7118489025405981, "grad_norm": 0.71484375, "learning_rate": 0.00012690275993803394, "loss": 0.8325, "step": 27723 }, { "epoch": 0.7118745797365199, "grad_norm": 0.7265625, "learning_rate": 0.0001268984602936393, "loss": 0.8015, "step": 27724 }, { "epoch": 0.7119002569324417, "grad_norm": 0.76171875, "learning_rate": 0.00012689416059563847, "loss": 0.7514, "step": 27725 }, { "epoch": 0.7119259341283635, "grad_norm": 0.7890625, "learning_rate": 0.00012688986084404005, "loss": 0.7829, "step": 27726 }, { "epoch": 0.7119516113242853, "grad_norm": 0.81640625, "learning_rate": 0.0001268855610388525, "loss": 0.6726, "step": 27727 }, { "epoch": 0.7119772885202071, "grad_norm": 0.71484375, "learning_rate": 0.0001268812611800845, "loss": 0.8424, "step": 27728 }, { "epoch": 0.712002965716129, "grad_norm": 0.7109375, "learning_rate": 0.0001268769612677446, "loss": 0.7067, "step": 27729 }, { "epoch": 0.7120286429120508, "grad_norm": 0.96484375, "learning_rate": 0.0001268726613018413, "loss": 0.8295, "step": 27730 }, { "epoch": 0.7120543201079726, "grad_norm": 0.7578125, "learning_rate": 0.00012686836128238322, "loss": 0.8597, "step": 27731 }, { "epoch": 0.7120799973038944, "grad_norm": 0.76171875, "learning_rate": 0.00012686406120937893, "loss": 0.8251, "step": 27732 }, { "epoch": 0.7121056744998162, "grad_norm": 0.84375, "learning_rate": 0.00012685976108283697, "loss": 0.8726, "step": 27733 }, { "epoch": 0.712131351695738, "grad_norm": 0.7578125, "learning_rate": 0.00012685546090276595, "loss": 0.9316, "step": 27734 }, { "epoch": 0.7121570288916599, "grad_norm": 0.76953125, "learning_rate": 0.00012685116066917443, "loss": 0.8041, "step": 27735 }, { "epoch": 0.7121827060875817, "grad_norm": 0.80078125, "learning_rate": 0.00012684686038207097, "loss": 0.8514, "step": 27736 }, { "epoch": 0.7122083832835036, "grad_norm": 0.8515625, "learning_rate": 0.00012684256004146413, "loss": 0.9287, "step": 27737 }, { "epoch": 0.7122340604794254, "grad_norm": 0.75, "learning_rate": 0.0001268382596473625, "loss": 0.8345, "step": 27738 }, { "epoch": 0.7122597376753471, "grad_norm": 0.75390625, "learning_rate": 0.00012683395919977465, "loss": 0.7524, "step": 27739 }, { "epoch": 0.712285414871269, "grad_norm": 0.77734375, "learning_rate": 0.00012682965869870912, "loss": 0.781, "step": 27740 }, { "epoch": 0.7123110920671908, "grad_norm": 0.83984375, "learning_rate": 0.0001268253581441745, "loss": 0.7735, "step": 27741 }, { "epoch": 0.7123367692631126, "grad_norm": 0.83203125, "learning_rate": 0.0001268210575361794, "loss": 0.8083, "step": 27742 }, { "epoch": 0.7123624464590345, "grad_norm": 0.75390625, "learning_rate": 0.00012681675687473235, "loss": 0.8561, "step": 27743 }, { "epoch": 0.7123881236549563, "grad_norm": 0.84765625, "learning_rate": 0.0001268124561598419, "loss": 0.8611, "step": 27744 }, { "epoch": 0.7124138008508781, "grad_norm": 0.765625, "learning_rate": 0.00012680815539151667, "loss": 0.7203, "step": 27745 }, { "epoch": 0.7124394780467999, "grad_norm": 0.90234375, "learning_rate": 0.0001268038545697652, "loss": 0.8054, "step": 27746 }, { "epoch": 0.7124651552427217, "grad_norm": 0.83203125, "learning_rate": 0.00012679955369459606, "loss": 0.8479, "step": 27747 }, { "epoch": 0.7124908324386435, "grad_norm": 0.76953125, "learning_rate": 0.00012679525276601786, "loss": 0.8711, "step": 27748 }, { "epoch": 0.7125165096345654, "grad_norm": 0.7734375, "learning_rate": 0.0001267909517840391, "loss": 0.8158, "step": 27749 }, { "epoch": 0.7125421868304872, "grad_norm": 0.77734375, "learning_rate": 0.00012678665074866845, "loss": 0.8653, "step": 27750 }, { "epoch": 0.712567864026409, "grad_norm": 0.76953125, "learning_rate": 0.00012678234965991437, "loss": 0.9317, "step": 27751 }, { "epoch": 0.7125935412223308, "grad_norm": 0.69921875, "learning_rate": 0.0001267780485177855, "loss": 0.7509, "step": 27752 }, { "epoch": 0.7126192184182526, "grad_norm": 0.765625, "learning_rate": 0.0001267737473222904, "loss": 0.9129, "step": 27753 }, { "epoch": 0.7126448956141744, "grad_norm": 0.71484375, "learning_rate": 0.00012676944607343765, "loss": 0.7227, "step": 27754 }, { "epoch": 0.7126705728100963, "grad_norm": 0.796875, "learning_rate": 0.00012676514477123584, "loss": 0.851, "step": 27755 }, { "epoch": 0.7126962500060181, "grad_norm": 0.72265625, "learning_rate": 0.00012676084341569342, "loss": 0.7137, "step": 27756 }, { "epoch": 0.7127219272019399, "grad_norm": 0.79296875, "learning_rate": 0.00012675654200681912, "loss": 0.8741, "step": 27757 }, { "epoch": 0.7127476043978618, "grad_norm": 0.78125, "learning_rate": 0.00012675224054462146, "loss": 0.8655, "step": 27758 }, { "epoch": 0.7127732815937835, "grad_norm": 0.79296875, "learning_rate": 0.00012674793902910897, "loss": 0.7653, "step": 27759 }, { "epoch": 0.7127989587897053, "grad_norm": 0.7734375, "learning_rate": 0.00012674363746029027, "loss": 0.7827, "step": 27760 }, { "epoch": 0.7128246359856272, "grad_norm": 0.7421875, "learning_rate": 0.00012673933583817393, "loss": 0.778, "step": 27761 }, { "epoch": 0.712850313181549, "grad_norm": 0.78515625, "learning_rate": 0.00012673503416276848, "loss": 0.9418, "step": 27762 }, { "epoch": 0.7128759903774708, "grad_norm": 0.81640625, "learning_rate": 0.00012673073243408254, "loss": 0.9155, "step": 27763 }, { "epoch": 0.7129016675733927, "grad_norm": 0.82421875, "learning_rate": 0.00012672643065212466, "loss": 0.7603, "step": 27764 }, { "epoch": 0.7129273447693145, "grad_norm": 0.74609375, "learning_rate": 0.00012672212881690342, "loss": 0.9146, "step": 27765 }, { "epoch": 0.7129530219652362, "grad_norm": 0.73828125, "learning_rate": 0.00012671782692842739, "loss": 0.8152, "step": 27766 }, { "epoch": 0.7129786991611581, "grad_norm": 0.75390625, "learning_rate": 0.00012671352498670513, "loss": 0.7405, "step": 27767 }, { "epoch": 0.7130043763570799, "grad_norm": 0.76171875, "learning_rate": 0.0001267092229917453, "loss": 0.7766, "step": 27768 }, { "epoch": 0.7130300535530018, "grad_norm": 0.83203125, "learning_rate": 0.00012670492094355633, "loss": 0.8866, "step": 27769 }, { "epoch": 0.7130557307489236, "grad_norm": 0.796875, "learning_rate": 0.00012670061884214687, "loss": 0.9536, "step": 27770 }, { "epoch": 0.7130814079448454, "grad_norm": 0.86328125, "learning_rate": 0.0001266963166875255, "loss": 0.7436, "step": 27771 }, { "epoch": 0.7131070851407671, "grad_norm": 0.77734375, "learning_rate": 0.00012669201447970075, "loss": 0.777, "step": 27772 }, { "epoch": 0.713132762336689, "grad_norm": 0.8203125, "learning_rate": 0.00012668771221868125, "loss": 0.8118, "step": 27773 }, { "epoch": 0.7131584395326108, "grad_norm": 0.75390625, "learning_rate": 0.0001266834099044756, "loss": 0.8684, "step": 27774 }, { "epoch": 0.7131841167285327, "grad_norm": 0.76171875, "learning_rate": 0.00012667910753709225, "loss": 0.7915, "step": 27775 }, { "epoch": 0.7132097939244545, "grad_norm": 0.7734375, "learning_rate": 0.00012667480511653988, "loss": 0.9003, "step": 27776 }, { "epoch": 0.7132354711203763, "grad_norm": 0.796875, "learning_rate": 0.00012667050264282705, "loss": 0.7529, "step": 27777 }, { "epoch": 0.7132611483162982, "grad_norm": 0.6953125, "learning_rate": 0.0001266662001159623, "loss": 0.8259, "step": 27778 }, { "epoch": 0.7132868255122199, "grad_norm": 0.85546875, "learning_rate": 0.0001266618975359542, "loss": 0.8324, "step": 27779 }, { "epoch": 0.7133125027081417, "grad_norm": 0.96484375, "learning_rate": 0.00012665759490281138, "loss": 0.7331, "step": 27780 }, { "epoch": 0.7133381799040636, "grad_norm": 0.7578125, "learning_rate": 0.00012665329221654237, "loss": 0.8479, "step": 27781 }, { "epoch": 0.7133638570999854, "grad_norm": 0.8046875, "learning_rate": 0.00012664898947715577, "loss": 0.8477, "step": 27782 }, { "epoch": 0.7133895342959072, "grad_norm": 0.8203125, "learning_rate": 0.0001266446866846601, "loss": 0.8443, "step": 27783 }, { "epoch": 0.7134152114918291, "grad_norm": 0.70703125, "learning_rate": 0.00012664038383906402, "loss": 0.7232, "step": 27784 }, { "epoch": 0.7134408886877509, "grad_norm": 0.80078125, "learning_rate": 0.00012663608094037602, "loss": 0.8184, "step": 27785 }, { "epoch": 0.7134665658836726, "grad_norm": 0.8515625, "learning_rate": 0.00012663177798860473, "loss": 0.8444, "step": 27786 }, { "epoch": 0.7134922430795945, "grad_norm": 0.72265625, "learning_rate": 0.00012662747498375874, "loss": 0.8404, "step": 27787 }, { "epoch": 0.7135179202755163, "grad_norm": 0.8125, "learning_rate": 0.00012662317192584655, "loss": 0.8876, "step": 27788 }, { "epoch": 0.7135435974714381, "grad_norm": 0.7734375, "learning_rate": 0.00012661886881487684, "loss": 0.7594, "step": 27789 }, { "epoch": 0.71356927466736, "grad_norm": 0.765625, "learning_rate": 0.00012661456565085812, "loss": 0.7512, "step": 27790 }, { "epoch": 0.7135949518632818, "grad_norm": 0.8203125, "learning_rate": 0.00012661026243379893, "loss": 0.7886, "step": 27791 }, { "epoch": 0.7136206290592035, "grad_norm": 0.95703125, "learning_rate": 0.00012660595916370792, "loss": 0.7379, "step": 27792 }, { "epoch": 0.7136463062551254, "grad_norm": 0.7890625, "learning_rate": 0.00012660165584059361, "loss": 0.7361, "step": 27793 }, { "epoch": 0.7136719834510472, "grad_norm": 0.8125, "learning_rate": 0.00012659735246446462, "loss": 0.7467, "step": 27794 }, { "epoch": 0.713697660646969, "grad_norm": 0.8203125, "learning_rate": 0.00012659304903532955, "loss": 0.921, "step": 27795 }, { "epoch": 0.7137233378428909, "grad_norm": 0.73828125, "learning_rate": 0.00012658874555319689, "loss": 0.8094, "step": 27796 }, { "epoch": 0.7137490150388127, "grad_norm": 0.78515625, "learning_rate": 0.00012658444201807528, "loss": 0.8026, "step": 27797 }, { "epoch": 0.7137746922347346, "grad_norm": 0.69140625, "learning_rate": 0.00012658013842997327, "loss": 0.7832, "step": 27798 }, { "epoch": 0.7138003694306563, "grad_norm": 0.72265625, "learning_rate": 0.00012657583478889944, "loss": 0.9466, "step": 27799 }, { "epoch": 0.7138260466265781, "grad_norm": 0.80078125, "learning_rate": 0.0001265715310948624, "loss": 0.8318, "step": 27800 }, { "epoch": 0.7138517238225, "grad_norm": 0.84765625, "learning_rate": 0.00012656722734787068, "loss": 0.9237, "step": 27801 }, { "epoch": 0.7138774010184218, "grad_norm": 0.7734375, "learning_rate": 0.00012656292354793286, "loss": 0.8635, "step": 27802 }, { "epoch": 0.7139030782143436, "grad_norm": 0.7578125, "learning_rate": 0.00012655861969505756, "loss": 0.9285, "step": 27803 }, { "epoch": 0.7139287554102655, "grad_norm": 0.72265625, "learning_rate": 0.0001265543157892533, "loss": 0.793, "step": 27804 }, { "epoch": 0.7139544326061873, "grad_norm": 0.7265625, "learning_rate": 0.0001265500118305287, "loss": 0.714, "step": 27805 }, { "epoch": 0.713980109802109, "grad_norm": 0.7265625, "learning_rate": 0.00012654570781889235, "loss": 0.7752, "step": 27806 }, { "epoch": 0.7140057869980309, "grad_norm": 0.75390625, "learning_rate": 0.00012654140375435274, "loss": 0.8225, "step": 27807 }, { "epoch": 0.7140314641939527, "grad_norm": 0.77734375, "learning_rate": 0.0001265370996369186, "loss": 0.8587, "step": 27808 }, { "epoch": 0.7140571413898745, "grad_norm": 0.8359375, "learning_rate": 0.00012653279546659835, "loss": 0.8018, "step": 27809 }, { "epoch": 0.7140828185857964, "grad_norm": 0.734375, "learning_rate": 0.00012652849124340066, "loss": 0.8899, "step": 27810 }, { "epoch": 0.7141084957817182, "grad_norm": 0.8046875, "learning_rate": 0.00012652418696733407, "loss": 0.899, "step": 27811 }, { "epoch": 0.7141341729776399, "grad_norm": 0.73046875, "learning_rate": 0.00012651988263840715, "loss": 0.7512, "step": 27812 }, { "epoch": 0.7141598501735618, "grad_norm": 0.80078125, "learning_rate": 0.0001265155782566285, "loss": 0.7852, "step": 27813 }, { "epoch": 0.7141855273694836, "grad_norm": 0.84765625, "learning_rate": 0.00012651127382200679, "loss": 0.8367, "step": 27814 }, { "epoch": 0.7142112045654054, "grad_norm": 0.8359375, "learning_rate": 0.00012650696933455042, "loss": 0.8794, "step": 27815 }, { "epoch": 0.7142368817613273, "grad_norm": 0.703125, "learning_rate": 0.00012650266479426807, "loss": 0.7524, "step": 27816 }, { "epoch": 0.7142625589572491, "grad_norm": 0.76953125, "learning_rate": 0.00012649836020116832, "loss": 0.8558, "step": 27817 }, { "epoch": 0.7142882361531709, "grad_norm": 0.88671875, "learning_rate": 0.00012649405555525972, "loss": 0.7952, "step": 27818 }, { "epoch": 0.7143139133490927, "grad_norm": 0.75, "learning_rate": 0.00012648975085655085, "loss": 0.7814, "step": 27819 }, { "epoch": 0.7143395905450145, "grad_norm": 0.7890625, "learning_rate": 0.0001264854461050503, "loss": 0.9092, "step": 27820 }, { "epoch": 0.7143652677409363, "grad_norm": 0.7890625, "learning_rate": 0.00012648114130076667, "loss": 0.8346, "step": 27821 }, { "epoch": 0.7143909449368582, "grad_norm": 0.7734375, "learning_rate": 0.00012647683644370853, "loss": 0.9136, "step": 27822 }, { "epoch": 0.71441662213278, "grad_norm": 0.74609375, "learning_rate": 0.00012647253153388438, "loss": 0.8321, "step": 27823 }, { "epoch": 0.7144422993287018, "grad_norm": 0.890625, "learning_rate": 0.0001264682265713029, "loss": 0.7993, "step": 27824 }, { "epoch": 0.7144679765246237, "grad_norm": 0.78515625, "learning_rate": 0.00012646392155597267, "loss": 0.9992, "step": 27825 }, { "epoch": 0.7144936537205454, "grad_norm": 0.78515625, "learning_rate": 0.0001264596164879022, "loss": 0.917, "step": 27826 }, { "epoch": 0.7145193309164672, "grad_norm": 0.83984375, "learning_rate": 0.00012645531136710013, "loss": 0.8211, "step": 27827 }, { "epoch": 0.7145450081123891, "grad_norm": 0.7578125, "learning_rate": 0.00012645100619357498, "loss": 0.7921, "step": 27828 }, { "epoch": 0.7145706853083109, "grad_norm": 0.7890625, "learning_rate": 0.00012644670096733537, "loss": 0.9801, "step": 27829 }, { "epoch": 0.7145963625042328, "grad_norm": 0.7734375, "learning_rate": 0.0001264423956883899, "loss": 0.8694, "step": 27830 }, { "epoch": 0.7146220397001546, "grad_norm": 0.80078125, "learning_rate": 0.0001264380903567471, "loss": 0.8199, "step": 27831 }, { "epoch": 0.7146477168960763, "grad_norm": 0.8671875, "learning_rate": 0.0001264337849724156, "loss": 0.8759, "step": 27832 }, { "epoch": 0.7146733940919981, "grad_norm": 0.796875, "learning_rate": 0.0001264294795354039, "loss": 0.7603, "step": 27833 }, { "epoch": 0.71469907128792, "grad_norm": 0.81640625, "learning_rate": 0.0001264251740457207, "loss": 0.8108, "step": 27834 }, { "epoch": 0.7147247484838418, "grad_norm": 0.76953125, "learning_rate": 0.0001264208685033745, "loss": 0.885, "step": 27835 }, { "epoch": 0.7147504256797637, "grad_norm": 0.73828125, "learning_rate": 0.00012641656290837385, "loss": 0.8375, "step": 27836 }, { "epoch": 0.7147761028756855, "grad_norm": 0.85546875, "learning_rate": 0.0001264122572607274, "loss": 0.797, "step": 27837 }, { "epoch": 0.7148017800716073, "grad_norm": 0.78125, "learning_rate": 0.00012640795156044373, "loss": 0.8029, "step": 27838 }, { "epoch": 0.714827457267529, "grad_norm": 0.828125, "learning_rate": 0.00012640364580753137, "loss": 0.9302, "step": 27839 }, { "epoch": 0.7148531344634509, "grad_norm": 0.76171875, "learning_rate": 0.00012639934000199893, "loss": 0.7825, "step": 27840 }, { "epoch": 0.7148788116593727, "grad_norm": 0.74609375, "learning_rate": 0.00012639503414385502, "loss": 0.7501, "step": 27841 }, { "epoch": 0.7149044888552946, "grad_norm": 0.7578125, "learning_rate": 0.0001263907282331082, "loss": 0.8071, "step": 27842 }, { "epoch": 0.7149301660512164, "grad_norm": 0.765625, "learning_rate": 0.000126386422269767, "loss": 0.7862, "step": 27843 }, { "epoch": 0.7149558432471382, "grad_norm": 0.75390625, "learning_rate": 0.00012638211625384005, "loss": 0.8565, "step": 27844 }, { "epoch": 0.7149815204430601, "grad_norm": 0.765625, "learning_rate": 0.00012637781018533595, "loss": 0.9533, "step": 27845 }, { "epoch": 0.7150071976389818, "grad_norm": 0.73046875, "learning_rate": 0.00012637350406426322, "loss": 0.7014, "step": 27846 }, { "epoch": 0.7150328748349036, "grad_norm": 0.7890625, "learning_rate": 0.00012636919789063051, "loss": 0.8355, "step": 27847 }, { "epoch": 0.7150585520308255, "grad_norm": 0.75390625, "learning_rate": 0.0001263648916644464, "loss": 0.8324, "step": 27848 }, { "epoch": 0.7150842292267473, "grad_norm": 0.7421875, "learning_rate": 0.00012636058538571937, "loss": 0.7854, "step": 27849 }, { "epoch": 0.7151099064226691, "grad_norm": 0.796875, "learning_rate": 0.0001263562790544581, "loss": 0.7332, "step": 27850 }, { "epoch": 0.715135583618591, "grad_norm": 0.87890625, "learning_rate": 0.00012635197267067116, "loss": 0.7146, "step": 27851 }, { "epoch": 0.7151612608145127, "grad_norm": 0.7421875, "learning_rate": 0.0001263476662343671, "loss": 0.7808, "step": 27852 }, { "epoch": 0.7151869380104345, "grad_norm": 0.8046875, "learning_rate": 0.00012634335974555453, "loss": 0.7902, "step": 27853 }, { "epoch": 0.7152126152063564, "grad_norm": 0.8359375, "learning_rate": 0.00012633905320424206, "loss": 0.8376, "step": 27854 }, { "epoch": 0.7152382924022782, "grad_norm": 0.83203125, "learning_rate": 0.0001263347466104382, "loss": 0.783, "step": 27855 }, { "epoch": 0.7152639695982, "grad_norm": 0.71484375, "learning_rate": 0.00012633043996415158, "loss": 0.7144, "step": 27856 }, { "epoch": 0.7152896467941219, "grad_norm": 0.7265625, "learning_rate": 0.00012632613326539076, "loss": 0.8526, "step": 27857 }, { "epoch": 0.7153153239900437, "grad_norm": 0.796875, "learning_rate": 0.0001263218265141643, "loss": 0.8917, "step": 27858 }, { "epoch": 0.7153410011859654, "grad_norm": 0.8125, "learning_rate": 0.0001263175197104809, "loss": 0.9077, "step": 27859 }, { "epoch": 0.7153666783818873, "grad_norm": 0.8046875, "learning_rate": 0.000126313212854349, "loss": 0.9859, "step": 27860 }, { "epoch": 0.7153923555778091, "grad_norm": 0.859375, "learning_rate": 0.00012630890594577727, "loss": 0.9453, "step": 27861 }, { "epoch": 0.715418032773731, "grad_norm": 0.81640625, "learning_rate": 0.00012630459898477426, "loss": 0.8613, "step": 27862 }, { "epoch": 0.7154437099696528, "grad_norm": 0.75, "learning_rate": 0.00012630029197134853, "loss": 0.7365, "step": 27863 }, { "epoch": 0.7154693871655746, "grad_norm": 0.8046875, "learning_rate": 0.00012629598490550872, "loss": 0.8938, "step": 27864 }, { "epoch": 0.7154950643614965, "grad_norm": 0.75, "learning_rate": 0.0001262916777872634, "loss": 0.8452, "step": 27865 }, { "epoch": 0.7155207415574182, "grad_norm": 0.75, "learning_rate": 0.00012628737061662112, "loss": 0.702, "step": 27866 }, { "epoch": 0.71554641875334, "grad_norm": 0.86328125, "learning_rate": 0.00012628306339359052, "loss": 0.9339, "step": 27867 }, { "epoch": 0.7155720959492619, "grad_norm": 0.76953125, "learning_rate": 0.00012627875611818012, "loss": 0.866, "step": 27868 }, { "epoch": 0.7155977731451837, "grad_norm": 0.7734375, "learning_rate": 0.00012627444879039854, "loss": 0.8363, "step": 27869 }, { "epoch": 0.7156234503411055, "grad_norm": 0.78125, "learning_rate": 0.00012627014141025438, "loss": 0.9078, "step": 27870 }, { "epoch": 0.7156491275370274, "grad_norm": 0.8359375, "learning_rate": 0.00012626583397775613, "loss": 0.8938, "step": 27871 }, { "epoch": 0.7156748047329491, "grad_norm": 0.7890625, "learning_rate": 0.0001262615264929125, "loss": 1.0054, "step": 27872 }, { "epoch": 0.7157004819288709, "grad_norm": 0.80859375, "learning_rate": 0.00012625721895573203, "loss": 0.7519, "step": 27873 }, { "epoch": 0.7157261591247928, "grad_norm": 0.74609375, "learning_rate": 0.00012625291136622328, "loss": 0.8797, "step": 27874 }, { "epoch": 0.7157518363207146, "grad_norm": 0.78125, "learning_rate": 0.00012624860372439483, "loss": 0.7826, "step": 27875 }, { "epoch": 0.7157775135166364, "grad_norm": 0.77734375, "learning_rate": 0.0001262442960302553, "loss": 0.8402, "step": 27876 }, { "epoch": 0.7158031907125583, "grad_norm": 0.828125, "learning_rate": 0.00012623998828381326, "loss": 0.8082, "step": 27877 }, { "epoch": 0.7158288679084801, "grad_norm": 0.7734375, "learning_rate": 0.0001262356804850773, "loss": 0.8686, "step": 27878 }, { "epoch": 0.7158545451044018, "grad_norm": 0.76953125, "learning_rate": 0.00012623137263405597, "loss": 0.7348, "step": 27879 }, { "epoch": 0.7158802223003237, "grad_norm": 0.7265625, "learning_rate": 0.0001262270647307579, "loss": 0.7724, "step": 27880 }, { "epoch": 0.7159058994962455, "grad_norm": 0.84375, "learning_rate": 0.00012622275677519166, "loss": 0.8942, "step": 27881 }, { "epoch": 0.7159315766921673, "grad_norm": 0.8828125, "learning_rate": 0.00012621844876736584, "loss": 0.9164, "step": 27882 }, { "epoch": 0.7159572538880892, "grad_norm": 0.81640625, "learning_rate": 0.00012621414070728904, "loss": 0.8655, "step": 27883 }, { "epoch": 0.715982931084011, "grad_norm": 0.8203125, "learning_rate": 0.00012620983259496976, "loss": 0.7907, "step": 27884 }, { "epoch": 0.7160086082799327, "grad_norm": 0.8828125, "learning_rate": 0.00012620552443041668, "loss": 0.9725, "step": 27885 }, { "epoch": 0.7160342854758546, "grad_norm": 0.828125, "learning_rate": 0.00012620121621363838, "loss": 0.9504, "step": 27886 }, { "epoch": 0.7160599626717764, "grad_norm": 0.7421875, "learning_rate": 0.00012619690794464343, "loss": 0.6951, "step": 27887 }, { "epoch": 0.7160856398676982, "grad_norm": 0.77734375, "learning_rate": 0.0001261925996234404, "loss": 0.8042, "step": 27888 }, { "epoch": 0.7161113170636201, "grad_norm": 0.83203125, "learning_rate": 0.00012618829125003787, "loss": 0.8827, "step": 27889 }, { "epoch": 0.7161369942595419, "grad_norm": 0.8125, "learning_rate": 0.00012618398282444442, "loss": 0.7389, "step": 27890 }, { "epoch": 0.7161626714554638, "grad_norm": 0.91015625, "learning_rate": 0.0001261796743466687, "loss": 0.8271, "step": 27891 }, { "epoch": 0.7161883486513855, "grad_norm": 0.95703125, "learning_rate": 0.00012617536581671922, "loss": 0.8201, "step": 27892 }, { "epoch": 0.7162140258473073, "grad_norm": 0.8203125, "learning_rate": 0.00012617105723460465, "loss": 0.8837, "step": 27893 }, { "epoch": 0.7162397030432291, "grad_norm": 0.7578125, "learning_rate": 0.0001261667486003335, "loss": 0.9062, "step": 27894 }, { "epoch": 0.716265380239151, "grad_norm": 0.75, "learning_rate": 0.00012616243991391435, "loss": 0.8557, "step": 27895 }, { "epoch": 0.7162910574350728, "grad_norm": 0.73046875, "learning_rate": 0.00012615813117535586, "loss": 0.7544, "step": 27896 }, { "epoch": 0.7163167346309947, "grad_norm": 0.71875, "learning_rate": 0.00012615382238466657, "loss": 0.7582, "step": 27897 }, { "epoch": 0.7163424118269165, "grad_norm": 0.765625, "learning_rate": 0.00012614951354185508, "loss": 0.816, "step": 27898 }, { "epoch": 0.7163680890228382, "grad_norm": 0.8515625, "learning_rate": 0.00012614520464692997, "loss": 0.9047, "step": 27899 }, { "epoch": 0.71639376621876, "grad_norm": 0.8125, "learning_rate": 0.0001261408956998998, "loss": 0.8184, "step": 27900 }, { "epoch": 0.7164194434146819, "grad_norm": 0.83203125, "learning_rate": 0.00012613658670077322, "loss": 0.8586, "step": 27901 }, { "epoch": 0.7164451206106037, "grad_norm": 0.7734375, "learning_rate": 0.00012613227764955877, "loss": 0.8724, "step": 27902 }, { "epoch": 0.7164707978065256, "grad_norm": 0.84375, "learning_rate": 0.00012612796854626505, "loss": 0.9057, "step": 27903 }, { "epoch": 0.7164964750024474, "grad_norm": 0.74609375, "learning_rate": 0.00012612365939090064, "loss": 0.7916, "step": 27904 }, { "epoch": 0.7165221521983691, "grad_norm": 0.7421875, "learning_rate": 0.00012611935018347415, "loss": 0.7664, "step": 27905 }, { "epoch": 0.716547829394291, "grad_norm": 0.7734375, "learning_rate": 0.00012611504092399417, "loss": 0.8948, "step": 27906 }, { "epoch": 0.7165735065902128, "grad_norm": 0.7421875, "learning_rate": 0.00012611073161246928, "loss": 0.7305, "step": 27907 }, { "epoch": 0.7165991837861346, "grad_norm": 0.90234375, "learning_rate": 0.000126106422248908, "loss": 0.9373, "step": 27908 }, { "epoch": 0.7166248609820565, "grad_norm": 0.68359375, "learning_rate": 0.00012610211283331903, "loss": 0.7131, "step": 27909 }, { "epoch": 0.7166505381779783, "grad_norm": 0.76953125, "learning_rate": 0.00012609780336571088, "loss": 0.9358, "step": 27910 }, { "epoch": 0.7166762153739001, "grad_norm": 0.76171875, "learning_rate": 0.00012609349384609217, "loss": 0.9136, "step": 27911 }, { "epoch": 0.7167018925698219, "grad_norm": 0.7890625, "learning_rate": 0.0001260891842744715, "loss": 0.8598, "step": 27912 }, { "epoch": 0.7167275697657437, "grad_norm": 0.84375, "learning_rate": 0.00012608487465085743, "loss": 0.828, "step": 27913 }, { "epoch": 0.7167532469616655, "grad_norm": 0.89453125, "learning_rate": 0.00012608056497525857, "loss": 0.9336, "step": 27914 }, { "epoch": 0.7167789241575874, "grad_norm": 0.78515625, "learning_rate": 0.0001260762552476835, "loss": 0.772, "step": 27915 }, { "epoch": 0.7168046013535092, "grad_norm": 0.734375, "learning_rate": 0.0001260719454681408, "loss": 0.8281, "step": 27916 }, { "epoch": 0.716830278549431, "grad_norm": 0.79296875, "learning_rate": 0.00012606763563663905, "loss": 0.7381, "step": 27917 }, { "epoch": 0.7168559557453529, "grad_norm": 0.78515625, "learning_rate": 0.0001260633257531869, "loss": 0.9643, "step": 27918 }, { "epoch": 0.7168816329412746, "grad_norm": 0.78515625, "learning_rate": 0.00012605901581779286, "loss": 0.7925, "step": 27919 }, { "epoch": 0.7169073101371964, "grad_norm": 0.79296875, "learning_rate": 0.00012605470583046556, "loss": 0.9332, "step": 27920 }, { "epoch": 0.7169329873331183, "grad_norm": 0.71875, "learning_rate": 0.0001260503957912136, "loss": 0.693, "step": 27921 }, { "epoch": 0.7169586645290401, "grad_norm": 0.6953125, "learning_rate": 0.00012604608570004555, "loss": 0.7935, "step": 27922 }, { "epoch": 0.716984341724962, "grad_norm": 0.7734375, "learning_rate": 0.00012604177555697, "loss": 0.7769, "step": 27923 }, { "epoch": 0.7170100189208838, "grad_norm": 0.828125, "learning_rate": 0.00012603746536199553, "loss": 0.8582, "step": 27924 }, { "epoch": 0.7170356961168055, "grad_norm": 0.78515625, "learning_rate": 0.00012603315511513076, "loss": 0.7335, "step": 27925 }, { "epoch": 0.7170613733127273, "grad_norm": 0.73046875, "learning_rate": 0.00012602884481638428, "loss": 0.8448, "step": 27926 }, { "epoch": 0.7170870505086492, "grad_norm": 0.7421875, "learning_rate": 0.00012602453446576467, "loss": 0.808, "step": 27927 }, { "epoch": 0.717112727704571, "grad_norm": 0.76171875, "learning_rate": 0.00012602022406328048, "loss": 0.9395, "step": 27928 }, { "epoch": 0.7171384049004929, "grad_norm": 0.79296875, "learning_rate": 0.0001260159136089403, "loss": 0.9141, "step": 27929 }, { "epoch": 0.7171640820964147, "grad_norm": 0.76171875, "learning_rate": 0.0001260116031027528, "loss": 0.8113, "step": 27930 }, { "epoch": 0.7171897592923365, "grad_norm": 0.79296875, "learning_rate": 0.00012600729254472654, "loss": 0.8408, "step": 27931 }, { "epoch": 0.7172154364882583, "grad_norm": 0.859375, "learning_rate": 0.00012600298193487006, "loss": 0.7723, "step": 27932 }, { "epoch": 0.7172411136841801, "grad_norm": 0.796875, "learning_rate": 0.00012599867127319203, "loss": 0.7449, "step": 27933 }, { "epoch": 0.7172667908801019, "grad_norm": 0.765625, "learning_rate": 0.00012599436055970098, "loss": 0.7869, "step": 27934 }, { "epoch": 0.7172924680760238, "grad_norm": 0.69140625, "learning_rate": 0.0001259900497944055, "loss": 0.7085, "step": 27935 }, { "epoch": 0.7173181452719456, "grad_norm": 0.7421875, "learning_rate": 0.00012598573897731422, "loss": 0.7622, "step": 27936 }, { "epoch": 0.7173438224678674, "grad_norm": 0.79296875, "learning_rate": 0.00012598142810843566, "loss": 0.9057, "step": 27937 }, { "epoch": 0.7173694996637893, "grad_norm": 0.7265625, "learning_rate": 0.0001259771171877785, "loss": 0.9156, "step": 27938 }, { "epoch": 0.717395176859711, "grad_norm": 0.76953125, "learning_rate": 0.0001259728062153513, "loss": 0.8381, "step": 27939 }, { "epoch": 0.7174208540556328, "grad_norm": 0.75390625, "learning_rate": 0.00012596849519116264, "loss": 0.7489, "step": 27940 }, { "epoch": 0.7174465312515547, "grad_norm": 0.73046875, "learning_rate": 0.0001259641841152211, "loss": 0.7446, "step": 27941 }, { "epoch": 0.7174722084474765, "grad_norm": 0.79296875, "learning_rate": 0.0001259598729875353, "loss": 0.8169, "step": 27942 }, { "epoch": 0.7174978856433983, "grad_norm": 0.7890625, "learning_rate": 0.00012595556180811378, "loss": 0.8598, "step": 27943 }, { "epoch": 0.7175235628393202, "grad_norm": 0.87109375, "learning_rate": 0.0001259512505769652, "loss": 0.977, "step": 27944 }, { "epoch": 0.7175492400352419, "grad_norm": 0.7578125, "learning_rate": 0.00012594693929409814, "loss": 0.775, "step": 27945 }, { "epoch": 0.7175749172311637, "grad_norm": 0.77734375, "learning_rate": 0.00012594262795952114, "loss": 0.797, "step": 27946 }, { "epoch": 0.7176005944270856, "grad_norm": 0.8515625, "learning_rate": 0.00012593831657324285, "loss": 0.8285, "step": 27947 }, { "epoch": 0.7176262716230074, "grad_norm": 0.75, "learning_rate": 0.00012593400513527184, "loss": 0.7386, "step": 27948 }, { "epoch": 0.7176519488189292, "grad_norm": 0.7578125, "learning_rate": 0.0001259296936456167, "loss": 0.8943, "step": 27949 }, { "epoch": 0.7176776260148511, "grad_norm": 0.82421875, "learning_rate": 0.000125925382104286, "loss": 0.7946, "step": 27950 }, { "epoch": 0.7177033032107729, "grad_norm": 0.7890625, "learning_rate": 0.00012592107051128835, "loss": 0.9236, "step": 27951 }, { "epoch": 0.7177289804066946, "grad_norm": 0.72265625, "learning_rate": 0.00012591675886663238, "loss": 0.8386, "step": 27952 }, { "epoch": 0.7177546576026165, "grad_norm": 0.75, "learning_rate": 0.00012591244717032662, "loss": 0.8654, "step": 27953 }, { "epoch": 0.7177803347985383, "grad_norm": 0.80078125, "learning_rate": 0.0001259081354223797, "loss": 0.9845, "step": 27954 }, { "epoch": 0.7178060119944601, "grad_norm": 0.71484375, "learning_rate": 0.00012590382362280024, "loss": 0.8802, "step": 27955 }, { "epoch": 0.717831689190382, "grad_norm": 0.8671875, "learning_rate": 0.00012589951177159676, "loss": 0.9093, "step": 27956 }, { "epoch": 0.7178573663863038, "grad_norm": 0.7109375, "learning_rate": 0.00012589519986877791, "loss": 0.8878, "step": 27957 }, { "epoch": 0.7178830435822257, "grad_norm": 0.70703125, "learning_rate": 0.0001258908879143523, "loss": 0.8788, "step": 27958 }, { "epoch": 0.7179087207781474, "grad_norm": 0.92578125, "learning_rate": 0.00012588657590832845, "loss": 0.8138, "step": 27959 }, { "epoch": 0.7179343979740692, "grad_norm": 0.7421875, "learning_rate": 0.00012588226385071497, "loss": 0.8927, "step": 27960 }, { "epoch": 0.717960075169991, "grad_norm": 0.78515625, "learning_rate": 0.00012587795174152052, "loss": 0.7984, "step": 27961 }, { "epoch": 0.7179857523659129, "grad_norm": 0.7734375, "learning_rate": 0.0001258736395807536, "loss": 0.7515, "step": 27962 }, { "epoch": 0.7180114295618347, "grad_norm": 0.82421875, "learning_rate": 0.0001258693273684229, "loss": 0.8414, "step": 27963 }, { "epoch": 0.7180371067577566, "grad_norm": 0.765625, "learning_rate": 0.00012586501510453694, "loss": 0.8023, "step": 27964 }, { "epoch": 0.7180627839536783, "grad_norm": 0.78515625, "learning_rate": 0.00012586070278910436, "loss": 0.8077, "step": 27965 }, { "epoch": 0.7180884611496001, "grad_norm": 0.84765625, "learning_rate": 0.00012585639042213373, "loss": 0.8445, "step": 27966 }, { "epoch": 0.718114138345522, "grad_norm": 0.78515625, "learning_rate": 0.00012585207800363366, "loss": 0.85, "step": 27967 }, { "epoch": 0.7181398155414438, "grad_norm": 0.90625, "learning_rate": 0.00012584776553361271, "loss": 0.8903, "step": 27968 }, { "epoch": 0.7181654927373656, "grad_norm": 0.72265625, "learning_rate": 0.0001258434530120795, "loss": 0.8251, "step": 27969 }, { "epoch": 0.7181911699332875, "grad_norm": 0.78125, "learning_rate": 0.00012583914043904262, "loss": 0.8305, "step": 27970 }, { "epoch": 0.7182168471292093, "grad_norm": 0.71484375, "learning_rate": 0.0001258348278145107, "loss": 0.7236, "step": 27971 }, { "epoch": 0.718242524325131, "grad_norm": 0.8046875, "learning_rate": 0.00012583051513849228, "loss": 0.899, "step": 27972 }, { "epoch": 0.7182682015210529, "grad_norm": 0.78515625, "learning_rate": 0.00012582620241099598, "loss": 0.8911, "step": 27973 }, { "epoch": 0.7182938787169747, "grad_norm": 0.71484375, "learning_rate": 0.0001258218896320304, "loss": 0.9855, "step": 27974 }, { "epoch": 0.7183195559128965, "grad_norm": 0.79296875, "learning_rate": 0.0001258175768016041, "loss": 0.9305, "step": 27975 }, { "epoch": 0.7183452331088184, "grad_norm": 0.78125, "learning_rate": 0.0001258132639197257, "loss": 0.8422, "step": 27976 }, { "epoch": 0.7183709103047402, "grad_norm": 0.7734375, "learning_rate": 0.00012580895098640381, "loss": 0.8357, "step": 27977 }, { "epoch": 0.718396587500662, "grad_norm": 0.7890625, "learning_rate": 0.00012580463800164704, "loss": 0.8347, "step": 27978 }, { "epoch": 0.7184222646965838, "grad_norm": 0.76953125, "learning_rate": 0.00012580032496546396, "loss": 0.7738, "step": 27979 }, { "epoch": 0.7184479418925056, "grad_norm": 0.8046875, "learning_rate": 0.00012579601187786313, "loss": 0.8614, "step": 27980 }, { "epoch": 0.7184736190884274, "grad_norm": 0.8203125, "learning_rate": 0.00012579169873885317, "loss": 0.8869, "step": 27981 }, { "epoch": 0.7184992962843493, "grad_norm": 0.72265625, "learning_rate": 0.0001257873855484427, "loss": 0.7907, "step": 27982 }, { "epoch": 0.7185249734802711, "grad_norm": 0.80859375, "learning_rate": 0.0001257830723066403, "loss": 0.9146, "step": 27983 }, { "epoch": 0.718550650676193, "grad_norm": 0.82421875, "learning_rate": 0.00012577875901345457, "loss": 0.8621, "step": 27984 }, { "epoch": 0.7185763278721147, "grad_norm": 0.78125, "learning_rate": 0.0001257744456688941, "loss": 0.7861, "step": 27985 }, { "epoch": 0.7186020050680365, "grad_norm": 0.7421875, "learning_rate": 0.00012577013227296746, "loss": 0.9259, "step": 27986 }, { "epoch": 0.7186276822639583, "grad_norm": 0.8125, "learning_rate": 0.00012576581882568332, "loss": 0.8149, "step": 27987 }, { "epoch": 0.7186533594598802, "grad_norm": 0.82421875, "learning_rate": 0.0001257615053270502, "loss": 0.7818, "step": 27988 }, { "epoch": 0.718679036655802, "grad_norm": 0.7734375, "learning_rate": 0.00012575719177707674, "loss": 0.9653, "step": 27989 }, { "epoch": 0.7187047138517239, "grad_norm": 0.88671875, "learning_rate": 0.00012575287817577152, "loss": 0.9845, "step": 27990 }, { "epoch": 0.7187303910476457, "grad_norm": 0.7890625, "learning_rate": 0.00012574856452314312, "loss": 0.8394, "step": 27991 }, { "epoch": 0.7187560682435674, "grad_norm": 0.83984375, "learning_rate": 0.00012574425081920022, "loss": 0.8208, "step": 27992 }, { "epoch": 0.7187817454394893, "grad_norm": 0.77734375, "learning_rate": 0.0001257399370639513, "loss": 0.9521, "step": 27993 }, { "epoch": 0.7188074226354111, "grad_norm": 0.85546875, "learning_rate": 0.000125735623257405, "loss": 0.8719, "step": 27994 }, { "epoch": 0.7188330998313329, "grad_norm": 0.80078125, "learning_rate": 0.00012573130939956998, "loss": 0.8455, "step": 27995 }, { "epoch": 0.7188587770272548, "grad_norm": 0.7578125, "learning_rate": 0.0001257269954904547, "loss": 0.8838, "step": 27996 }, { "epoch": 0.7188844542231766, "grad_norm": 0.85546875, "learning_rate": 0.0001257226815300679, "loss": 0.7844, "step": 27997 }, { "epoch": 0.7189101314190984, "grad_norm": 0.73046875, "learning_rate": 0.00012571836751841816, "loss": 0.8294, "step": 27998 }, { "epoch": 0.7189358086150202, "grad_norm": 0.765625, "learning_rate": 0.00012571405345551396, "loss": 0.8136, "step": 27999 }, { "epoch": 0.718961485810942, "grad_norm": 0.7890625, "learning_rate": 0.00012570973934136404, "loss": 0.7894, "step": 28000 }, { "epoch": 0.718961485810942, "eval_loss": 0.8289976716041565, "eval_runtime": 388.3635, "eval_samples_per_second": 25.749, "eval_steps_per_second": 0.806, "step": 28000 }, { "epoch": 0.7189871630068638, "grad_norm": 0.796875, "learning_rate": 0.00012570542517597685, "loss": 0.8733, "step": 28001 }, { "epoch": 0.7190128402027857, "grad_norm": 0.75, "learning_rate": 0.00012570111095936115, "loss": 0.8158, "step": 28002 }, { "epoch": 0.7190385173987075, "grad_norm": 0.87890625, "learning_rate": 0.0001256967966915254, "loss": 0.8502, "step": 28003 }, { "epoch": 0.7190641945946293, "grad_norm": 0.671875, "learning_rate": 0.00012569248237247828, "loss": 0.638, "step": 28004 }, { "epoch": 0.7190898717905511, "grad_norm": 0.74609375, "learning_rate": 0.0001256881680022284, "loss": 0.7704, "step": 28005 }, { "epoch": 0.7191155489864729, "grad_norm": 0.7109375, "learning_rate": 0.00012568385358078426, "loss": 0.7349, "step": 28006 }, { "epoch": 0.7191412261823947, "grad_norm": 0.75, "learning_rate": 0.00012567953910815454, "loss": 0.8097, "step": 28007 }, { "epoch": 0.7191669033783166, "grad_norm": 0.75390625, "learning_rate": 0.00012567522458434781, "loss": 0.8769, "step": 28008 }, { "epoch": 0.7191925805742384, "grad_norm": 0.79296875, "learning_rate": 0.0001256709100093727, "loss": 0.8821, "step": 28009 }, { "epoch": 0.7192182577701602, "grad_norm": 0.90625, "learning_rate": 0.00012566659538323777, "loss": 0.9129, "step": 28010 }, { "epoch": 0.7192439349660821, "grad_norm": 0.7578125, "learning_rate": 0.00012566228070595164, "loss": 0.846, "step": 28011 }, { "epoch": 0.7192696121620038, "grad_norm": 0.76953125, "learning_rate": 0.0001256579659775229, "loss": 0.8552, "step": 28012 }, { "epoch": 0.7192952893579256, "grad_norm": 0.82421875, "learning_rate": 0.00012565365119796015, "loss": 0.8976, "step": 28013 }, { "epoch": 0.7193209665538475, "grad_norm": 0.828125, "learning_rate": 0.00012564933636727198, "loss": 0.9187, "step": 28014 }, { "epoch": 0.7193466437497693, "grad_norm": 0.84375, "learning_rate": 0.00012564502148546702, "loss": 0.7776, "step": 28015 }, { "epoch": 0.7193723209456911, "grad_norm": 0.7734375, "learning_rate": 0.00012564070655255385, "loss": 0.7882, "step": 28016 }, { "epoch": 0.719397998141613, "grad_norm": 0.78515625, "learning_rate": 0.00012563639156854103, "loss": 0.8869, "step": 28017 }, { "epoch": 0.7194236753375348, "grad_norm": 0.875, "learning_rate": 0.00012563207653343728, "loss": 0.7569, "step": 28018 }, { "epoch": 0.7194493525334565, "grad_norm": 0.71484375, "learning_rate": 0.00012562776144725105, "loss": 0.7495, "step": 28019 }, { "epoch": 0.7194750297293784, "grad_norm": 0.78125, "learning_rate": 0.000125623446309991, "loss": 0.856, "step": 28020 }, { "epoch": 0.7195007069253002, "grad_norm": 0.83203125, "learning_rate": 0.00012561913112166576, "loss": 1.0173, "step": 28021 }, { "epoch": 0.719526384121222, "grad_norm": 0.7265625, "learning_rate": 0.0001256148158822839, "loss": 0.9168, "step": 28022 }, { "epoch": 0.7195520613171439, "grad_norm": 0.73046875, "learning_rate": 0.00012561050059185403, "loss": 0.7953, "step": 28023 }, { "epoch": 0.7195777385130657, "grad_norm": 0.703125, "learning_rate": 0.00012560618525038475, "loss": 0.7969, "step": 28024 }, { "epoch": 0.7196034157089874, "grad_norm": 0.85546875, "learning_rate": 0.00012560186985788465, "loss": 1.0084, "step": 28025 }, { "epoch": 0.7196290929049093, "grad_norm": 0.70703125, "learning_rate": 0.00012559755441436234, "loss": 0.8254, "step": 28026 }, { "epoch": 0.7196547701008311, "grad_norm": 0.76953125, "learning_rate": 0.0001255932389198264, "loss": 0.8942, "step": 28027 }, { "epoch": 0.719680447296753, "grad_norm": 0.80078125, "learning_rate": 0.00012558892337428544, "loss": 0.8738, "step": 28028 }, { "epoch": 0.7197061244926748, "grad_norm": 0.8984375, "learning_rate": 0.0001255846077777481, "loss": 0.8627, "step": 28029 }, { "epoch": 0.7197318016885966, "grad_norm": 0.82421875, "learning_rate": 0.00012558029213022293, "loss": 0.9021, "step": 28030 }, { "epoch": 0.7197574788845185, "grad_norm": 0.71875, "learning_rate": 0.00012557597643171855, "loss": 0.7512, "step": 28031 }, { "epoch": 0.7197831560804402, "grad_norm": 0.81640625, "learning_rate": 0.00012557166068224358, "loss": 0.8728, "step": 28032 }, { "epoch": 0.719808833276362, "grad_norm": 0.7578125, "learning_rate": 0.00012556734488180656, "loss": 0.8481, "step": 28033 }, { "epoch": 0.7198345104722839, "grad_norm": 0.765625, "learning_rate": 0.00012556302903041615, "loss": 0.888, "step": 28034 }, { "epoch": 0.7198601876682057, "grad_norm": 0.765625, "learning_rate": 0.00012555871312808095, "loss": 0.7781, "step": 28035 }, { "epoch": 0.7198858648641275, "grad_norm": 0.83984375, "learning_rate": 0.0001255543971748095, "loss": 0.7048, "step": 28036 }, { "epoch": 0.7199115420600494, "grad_norm": 0.7421875, "learning_rate": 0.00012555008117061048, "loss": 0.8595, "step": 28037 }, { "epoch": 0.7199372192559712, "grad_norm": 0.69140625, "learning_rate": 0.00012554576511549247, "loss": 0.8011, "step": 28038 }, { "epoch": 0.7199628964518929, "grad_norm": 0.8046875, "learning_rate": 0.000125541449009464, "loss": 0.8055, "step": 28039 }, { "epoch": 0.7199885736478148, "grad_norm": 0.91015625, "learning_rate": 0.0001255371328525338, "loss": 0.9242, "step": 28040 }, { "epoch": 0.7200142508437366, "grad_norm": 0.73828125, "learning_rate": 0.00012553281664471033, "loss": 0.8012, "step": 28041 }, { "epoch": 0.7200399280396584, "grad_norm": 0.8125, "learning_rate": 0.0001255285003860023, "loss": 0.8612, "step": 28042 }, { "epoch": 0.7200656052355803, "grad_norm": 0.76171875, "learning_rate": 0.0001255241840764183, "loss": 0.8089, "step": 28043 }, { "epoch": 0.7200912824315021, "grad_norm": 0.78125, "learning_rate": 0.00012551986771596686, "loss": 0.9439, "step": 28044 }, { "epoch": 0.7201169596274238, "grad_norm": 0.8515625, "learning_rate": 0.00012551555130465666, "loss": 0.8543, "step": 28045 }, { "epoch": 0.7201426368233457, "grad_norm": 0.70703125, "learning_rate": 0.00012551123484249625, "loss": 0.7881, "step": 28046 }, { "epoch": 0.7201683140192675, "grad_norm": 0.7265625, "learning_rate": 0.00012550691832949426, "loss": 0.8298, "step": 28047 }, { "epoch": 0.7201939912151893, "grad_norm": 0.765625, "learning_rate": 0.0001255026017656593, "loss": 0.8877, "step": 28048 }, { "epoch": 0.7202196684111112, "grad_norm": 0.7578125, "learning_rate": 0.00012549828515099994, "loss": 0.8812, "step": 28049 }, { "epoch": 0.720245345607033, "grad_norm": 0.78515625, "learning_rate": 0.00012549396848552478, "loss": 0.8438, "step": 28050 }, { "epoch": 0.7202710228029549, "grad_norm": 0.75, "learning_rate": 0.0001254896517692425, "loss": 0.7826, "step": 28051 }, { "epoch": 0.7202966999988766, "grad_norm": 0.75390625, "learning_rate": 0.0001254853350021616, "loss": 0.8552, "step": 28052 }, { "epoch": 0.7203223771947984, "grad_norm": 0.80078125, "learning_rate": 0.00012548101818429076, "loss": 0.8737, "step": 28053 }, { "epoch": 0.7203480543907202, "grad_norm": 0.703125, "learning_rate": 0.00012547670131563853, "loss": 0.7996, "step": 28054 }, { "epoch": 0.7203737315866421, "grad_norm": 0.81640625, "learning_rate": 0.00012547238439621357, "loss": 0.8374, "step": 28055 }, { "epoch": 0.7203994087825639, "grad_norm": 0.8828125, "learning_rate": 0.00012546806742602443, "loss": 0.7705, "step": 28056 }, { "epoch": 0.7204250859784858, "grad_norm": 0.734375, "learning_rate": 0.00012546375040507968, "loss": 0.7851, "step": 28057 }, { "epoch": 0.7204507631744076, "grad_norm": 0.734375, "learning_rate": 0.00012545943333338805, "loss": 0.66, "step": 28058 }, { "epoch": 0.7204764403703293, "grad_norm": 0.71875, "learning_rate": 0.00012545511621095808, "loss": 0.8041, "step": 28059 }, { "epoch": 0.7205021175662512, "grad_norm": 0.8125, "learning_rate": 0.0001254507990377983, "loss": 0.8347, "step": 28060 }, { "epoch": 0.720527794762173, "grad_norm": 0.77734375, "learning_rate": 0.00012544648181391738, "loss": 0.7751, "step": 28061 }, { "epoch": 0.7205534719580948, "grad_norm": 0.7421875, "learning_rate": 0.00012544216453932395, "loss": 0.8316, "step": 28062 }, { "epoch": 0.7205791491540167, "grad_norm": 0.75, "learning_rate": 0.00012543784721402655, "loss": 0.7539, "step": 28063 }, { "epoch": 0.7206048263499385, "grad_norm": 0.87109375, "learning_rate": 0.0001254335298380339, "loss": 0.8018, "step": 28064 }, { "epoch": 0.7206305035458602, "grad_norm": 1.09375, "learning_rate": 0.00012542921241135444, "loss": 0.7839, "step": 28065 }, { "epoch": 0.7206561807417821, "grad_norm": 0.75390625, "learning_rate": 0.00012542489493399687, "loss": 0.8527, "step": 28066 }, { "epoch": 0.7206818579377039, "grad_norm": 0.796875, "learning_rate": 0.0001254205774059698, "loss": 0.7249, "step": 28067 }, { "epoch": 0.7207075351336257, "grad_norm": 0.75390625, "learning_rate": 0.0001254162598272818, "loss": 0.9159, "step": 28068 }, { "epoch": 0.7207332123295476, "grad_norm": 0.73828125, "learning_rate": 0.0001254119421979415, "loss": 0.7119, "step": 28069 }, { "epoch": 0.7207588895254694, "grad_norm": 0.8359375, "learning_rate": 0.0001254076245179575, "loss": 0.8238, "step": 28070 }, { "epoch": 0.7207845667213912, "grad_norm": 0.765625, "learning_rate": 0.0001254033067873384, "loss": 0.869, "step": 28071 }, { "epoch": 0.720810243917313, "grad_norm": 0.72265625, "learning_rate": 0.0001253989890060928, "loss": 0.7722, "step": 28072 }, { "epoch": 0.7208359211132348, "grad_norm": 0.796875, "learning_rate": 0.00012539467117422928, "loss": 0.7978, "step": 28073 }, { "epoch": 0.7208615983091566, "grad_norm": 0.71875, "learning_rate": 0.00012539035329175652, "loss": 0.6615, "step": 28074 }, { "epoch": 0.7208872755050785, "grad_norm": 0.75390625, "learning_rate": 0.00012538603535868306, "loss": 0.775, "step": 28075 }, { "epoch": 0.7209129527010003, "grad_norm": 0.8203125, "learning_rate": 0.00012538171737501753, "loss": 0.9177, "step": 28076 }, { "epoch": 0.7209386298969221, "grad_norm": 0.80078125, "learning_rate": 0.00012537739934076852, "loss": 0.8486, "step": 28077 }, { "epoch": 0.720964307092844, "grad_norm": 0.828125, "learning_rate": 0.00012537308125594465, "loss": 0.8007, "step": 28078 }, { "epoch": 0.7209899842887657, "grad_norm": 0.80078125, "learning_rate": 0.0001253687631205545, "loss": 0.8882, "step": 28079 }, { "epoch": 0.7210156614846875, "grad_norm": 0.74609375, "learning_rate": 0.00012536444493460675, "loss": 0.8187, "step": 28080 }, { "epoch": 0.7210413386806094, "grad_norm": 0.69921875, "learning_rate": 0.0001253601266981099, "loss": 0.7951, "step": 28081 }, { "epoch": 0.7210670158765312, "grad_norm": 0.7890625, "learning_rate": 0.00012535580841107263, "loss": 0.8665, "step": 28082 }, { "epoch": 0.721092693072453, "grad_norm": 0.78515625, "learning_rate": 0.00012535149007350353, "loss": 0.7967, "step": 28083 }, { "epoch": 0.7211183702683749, "grad_norm": 0.796875, "learning_rate": 0.00012534717168541119, "loss": 1.0446, "step": 28084 }, { "epoch": 0.7211440474642966, "grad_norm": 0.7265625, "learning_rate": 0.00012534285324680423, "loss": 0.8271, "step": 28085 }, { "epoch": 0.7211697246602184, "grad_norm": 0.8203125, "learning_rate": 0.00012533853475769124, "loss": 0.8782, "step": 28086 }, { "epoch": 0.7211954018561403, "grad_norm": 0.7890625, "learning_rate": 0.00012533421621808085, "loss": 0.7966, "step": 28087 }, { "epoch": 0.7212210790520621, "grad_norm": 0.78125, "learning_rate": 0.00012532989762798166, "loss": 0.784, "step": 28088 }, { "epoch": 0.721246756247984, "grad_norm": 0.80078125, "learning_rate": 0.00012532557898740226, "loss": 0.8507, "step": 28089 }, { "epoch": 0.7212724334439058, "grad_norm": 0.7578125, "learning_rate": 0.0001253212602963513, "loss": 0.7766, "step": 28090 }, { "epoch": 0.7212981106398276, "grad_norm": 0.9375, "learning_rate": 0.00012531694155483734, "loss": 0.7118, "step": 28091 }, { "epoch": 0.7213237878357494, "grad_norm": 0.796875, "learning_rate": 0.00012531262276286898, "loss": 0.7967, "step": 28092 }, { "epoch": 0.7213494650316712, "grad_norm": 0.796875, "learning_rate": 0.00012530830392045488, "loss": 0.9531, "step": 28093 }, { "epoch": 0.721375142227593, "grad_norm": 0.7890625, "learning_rate": 0.00012530398502760357, "loss": 0.952, "step": 28094 }, { "epoch": 0.7214008194235149, "grad_norm": 0.80078125, "learning_rate": 0.00012529966608432376, "loss": 0.7714, "step": 28095 }, { "epoch": 0.7214264966194367, "grad_norm": 0.75390625, "learning_rate": 0.00012529534709062397, "loss": 0.9484, "step": 28096 }, { "epoch": 0.7214521738153585, "grad_norm": 0.765625, "learning_rate": 0.00012529102804651284, "loss": 0.7779, "step": 28097 }, { "epoch": 0.7214778510112803, "grad_norm": 0.8203125, "learning_rate": 0.000125286708951999, "loss": 0.7864, "step": 28098 }, { "epoch": 0.7215035282072021, "grad_norm": 0.83984375, "learning_rate": 0.00012528238980709098, "loss": 0.8166, "step": 28099 }, { "epoch": 0.7215292054031239, "grad_norm": 0.83984375, "learning_rate": 0.00012527807061179749, "loss": 0.8496, "step": 28100 }, { "epoch": 0.7215548825990458, "grad_norm": 0.78515625, "learning_rate": 0.00012527375136612708, "loss": 0.7118, "step": 28101 }, { "epoch": 0.7215805597949676, "grad_norm": 0.82421875, "learning_rate": 0.00012526943207008832, "loss": 0.8097, "step": 28102 }, { "epoch": 0.7216062369908894, "grad_norm": 0.75, "learning_rate": 0.00012526511272368992, "loss": 0.8196, "step": 28103 }, { "epoch": 0.7216319141868113, "grad_norm": 0.7421875, "learning_rate": 0.00012526079332694044, "loss": 0.7466, "step": 28104 }, { "epoch": 0.721657591382733, "grad_norm": 0.78125, "learning_rate": 0.00012525647387984844, "loss": 0.7374, "step": 28105 }, { "epoch": 0.7216832685786548, "grad_norm": 0.7890625, "learning_rate": 0.0001252521543824226, "loss": 0.7971, "step": 28106 }, { "epoch": 0.7217089457745767, "grad_norm": 0.7890625, "learning_rate": 0.00012524783483467146, "loss": 0.7636, "step": 28107 }, { "epoch": 0.7217346229704985, "grad_norm": 0.74609375, "learning_rate": 0.0001252435152366037, "loss": 0.7835, "step": 28108 }, { "epoch": 0.7217603001664203, "grad_norm": 0.76953125, "learning_rate": 0.00012523919558822788, "loss": 0.7454, "step": 28109 }, { "epoch": 0.7217859773623422, "grad_norm": 0.88671875, "learning_rate": 0.00012523487588955262, "loss": 0.8619, "step": 28110 }, { "epoch": 0.721811654558264, "grad_norm": 0.77734375, "learning_rate": 0.00012523055614058654, "loss": 0.7362, "step": 28111 }, { "epoch": 0.7218373317541857, "grad_norm": 0.8125, "learning_rate": 0.00012522623634133825, "loss": 0.9257, "step": 28112 }, { "epoch": 0.7218630089501076, "grad_norm": 0.7734375, "learning_rate": 0.00012522191649181632, "loss": 0.8725, "step": 28113 }, { "epoch": 0.7218886861460294, "grad_norm": 0.84375, "learning_rate": 0.00012521759659202942, "loss": 0.8157, "step": 28114 }, { "epoch": 0.7219143633419512, "grad_norm": 0.80859375, "learning_rate": 0.0001252132766419861, "loss": 0.9176, "step": 28115 }, { "epoch": 0.7219400405378731, "grad_norm": 0.76953125, "learning_rate": 0.000125208956641695, "loss": 0.793, "step": 28116 }, { "epoch": 0.7219657177337949, "grad_norm": 0.79296875, "learning_rate": 0.00012520463659116478, "loss": 0.7368, "step": 28117 }, { "epoch": 0.7219913949297166, "grad_norm": 0.74609375, "learning_rate": 0.0001252003164904039, "loss": 0.8392, "step": 28118 }, { "epoch": 0.7220170721256385, "grad_norm": 0.80078125, "learning_rate": 0.00012519599633942113, "loss": 0.7761, "step": 28119 }, { "epoch": 0.7220427493215603, "grad_norm": 0.80078125, "learning_rate": 0.00012519167613822503, "loss": 0.7085, "step": 28120 }, { "epoch": 0.7220684265174822, "grad_norm": 0.75, "learning_rate": 0.00012518735588682415, "loss": 0.8804, "step": 28121 }, { "epoch": 0.722094103713404, "grad_norm": 0.77734375, "learning_rate": 0.00012518303558522716, "loss": 0.7573, "step": 28122 }, { "epoch": 0.7221197809093258, "grad_norm": 0.7890625, "learning_rate": 0.00012517871523344266, "loss": 0.8122, "step": 28123 }, { "epoch": 0.7221454581052477, "grad_norm": 0.77734375, "learning_rate": 0.00012517439483147928, "loss": 0.8898, "step": 28124 }, { "epoch": 0.7221711353011694, "grad_norm": 0.8203125, "learning_rate": 0.0001251700743793456, "loss": 0.7932, "step": 28125 }, { "epoch": 0.7221968124970912, "grad_norm": 0.78125, "learning_rate": 0.0001251657538770502, "loss": 0.8126, "step": 28126 }, { "epoch": 0.7222224896930131, "grad_norm": 0.7265625, "learning_rate": 0.00012516143332460172, "loss": 0.8014, "step": 28127 }, { "epoch": 0.7222481668889349, "grad_norm": 0.78125, "learning_rate": 0.00012515711272200882, "loss": 0.7806, "step": 28128 }, { "epoch": 0.7222738440848567, "grad_norm": 0.81640625, "learning_rate": 0.00012515279206928002, "loss": 0.932, "step": 28129 }, { "epoch": 0.7222995212807786, "grad_norm": 0.75, "learning_rate": 0.00012514847136642403, "loss": 0.8343, "step": 28130 }, { "epoch": 0.7223251984767004, "grad_norm": 0.796875, "learning_rate": 0.00012514415061344936, "loss": 0.915, "step": 28131 }, { "epoch": 0.7223508756726221, "grad_norm": 0.78125, "learning_rate": 0.00012513982981036468, "loss": 0.7741, "step": 28132 }, { "epoch": 0.722376552868544, "grad_norm": 0.73828125, "learning_rate": 0.0001251355089571786, "loss": 0.7882, "step": 28133 }, { "epoch": 0.7224022300644658, "grad_norm": 0.79296875, "learning_rate": 0.00012513118805389971, "loss": 0.828, "step": 28134 }, { "epoch": 0.7224279072603876, "grad_norm": 0.70703125, "learning_rate": 0.00012512686710053664, "loss": 0.7474, "step": 28135 }, { "epoch": 0.7224535844563095, "grad_norm": 0.78125, "learning_rate": 0.00012512254609709802, "loss": 0.8882, "step": 28136 }, { "epoch": 0.7224792616522313, "grad_norm": 0.7265625, "learning_rate": 0.0001251182250435924, "loss": 0.8204, "step": 28137 }, { "epoch": 0.722504938848153, "grad_norm": 0.71875, "learning_rate": 0.00012511390394002845, "loss": 0.9029, "step": 28138 }, { "epoch": 0.7225306160440749, "grad_norm": 0.76171875, "learning_rate": 0.0001251095827864147, "loss": 0.7308, "step": 28139 }, { "epoch": 0.7225562932399967, "grad_norm": 0.78515625, "learning_rate": 0.00012510526158275988, "loss": 0.9126, "step": 28140 }, { "epoch": 0.7225819704359185, "grad_norm": 0.73046875, "learning_rate": 0.00012510094032907253, "loss": 0.8427, "step": 28141 }, { "epoch": 0.7226076476318404, "grad_norm": 0.76171875, "learning_rate": 0.00012509661902536124, "loss": 0.7899, "step": 28142 }, { "epoch": 0.7226333248277622, "grad_norm": 0.76953125, "learning_rate": 0.00012509229767163472, "loss": 0.885, "step": 28143 }, { "epoch": 0.722659002023684, "grad_norm": 0.6953125, "learning_rate": 0.0001250879762679015, "loss": 0.8667, "step": 28144 }, { "epoch": 0.7226846792196058, "grad_norm": 0.75390625, "learning_rate": 0.00012508365481417018, "loss": 0.7803, "step": 28145 }, { "epoch": 0.7227103564155276, "grad_norm": 0.78515625, "learning_rate": 0.00012507933331044942, "loss": 0.7612, "step": 28146 }, { "epoch": 0.7227360336114494, "grad_norm": 0.83984375, "learning_rate": 0.00012507501175674777, "loss": 0.9542, "step": 28147 }, { "epoch": 0.7227617108073713, "grad_norm": 0.703125, "learning_rate": 0.00012507069015307395, "loss": 0.79, "step": 28148 }, { "epoch": 0.7227873880032931, "grad_norm": 0.80078125, "learning_rate": 0.00012506636849943646, "loss": 0.8401, "step": 28149 }, { "epoch": 0.722813065199215, "grad_norm": 0.7734375, "learning_rate": 0.000125062046795844, "loss": 0.763, "step": 28150 }, { "epoch": 0.7228387423951368, "grad_norm": 0.8203125, "learning_rate": 0.00012505772504230514, "loss": 0.9193, "step": 28151 }, { "epoch": 0.7228644195910585, "grad_norm": 0.73046875, "learning_rate": 0.0001250534032388285, "loss": 0.8085, "step": 28152 }, { "epoch": 0.7228900967869804, "grad_norm": 0.85546875, "learning_rate": 0.00012504908138542264, "loss": 0.8186, "step": 28153 }, { "epoch": 0.7229157739829022, "grad_norm": 0.7890625, "learning_rate": 0.0001250447594820963, "loss": 0.871, "step": 28154 }, { "epoch": 0.722941451178824, "grad_norm": 0.796875, "learning_rate": 0.00012504043752885796, "loss": 0.8373, "step": 28155 }, { "epoch": 0.7229671283747459, "grad_norm": 0.86328125, "learning_rate": 0.0001250361155257163, "loss": 0.8286, "step": 28156 }, { "epoch": 0.7229928055706677, "grad_norm": 0.8046875, "learning_rate": 0.00012503179347267995, "loss": 0.7325, "step": 28157 }, { "epoch": 0.7230184827665894, "grad_norm": 0.75390625, "learning_rate": 0.00012502747136975746, "loss": 0.8305, "step": 28158 }, { "epoch": 0.7230441599625113, "grad_norm": 0.78125, "learning_rate": 0.00012502314921695751, "loss": 0.8319, "step": 28159 }, { "epoch": 0.7230698371584331, "grad_norm": 0.7421875, "learning_rate": 0.00012501882701428866, "loss": 0.8722, "step": 28160 }, { "epoch": 0.7230955143543549, "grad_norm": 0.79296875, "learning_rate": 0.00012501450476175955, "loss": 0.7816, "step": 28161 }, { "epoch": 0.7231211915502768, "grad_norm": 0.94921875, "learning_rate": 0.0001250101824593788, "loss": 0.8898, "step": 28162 }, { "epoch": 0.7231468687461986, "grad_norm": 0.73828125, "learning_rate": 0.000125005860107155, "loss": 0.8359, "step": 28163 }, { "epoch": 0.7231725459421204, "grad_norm": 0.7890625, "learning_rate": 0.00012500153770509683, "loss": 0.8115, "step": 28164 }, { "epoch": 0.7231982231380422, "grad_norm": 0.69921875, "learning_rate": 0.0001249972152532128, "loss": 0.8437, "step": 28165 }, { "epoch": 0.723223900333964, "grad_norm": 0.81640625, "learning_rate": 0.0001249928927515116, "loss": 0.8008, "step": 28166 }, { "epoch": 0.7232495775298858, "grad_norm": 0.921875, "learning_rate": 0.00012498857020000183, "loss": 0.959, "step": 28167 }, { "epoch": 0.7232752547258077, "grad_norm": 0.78125, "learning_rate": 0.00012498424759869206, "loss": 0.8654, "step": 28168 }, { "epoch": 0.7233009319217295, "grad_norm": 0.80078125, "learning_rate": 0.00012497992494759098, "loss": 0.7671, "step": 28169 }, { "epoch": 0.7233266091176513, "grad_norm": 0.734375, "learning_rate": 0.00012497560224670716, "loss": 0.7232, "step": 28170 }, { "epoch": 0.7233522863135732, "grad_norm": 0.8046875, "learning_rate": 0.00012497127949604922, "loss": 0.8646, "step": 28171 }, { "epoch": 0.7233779635094949, "grad_norm": 0.75390625, "learning_rate": 0.00012496695669562578, "loss": 0.7727, "step": 28172 }, { "epoch": 0.7234036407054167, "grad_norm": 0.78125, "learning_rate": 0.00012496263384544545, "loss": 0.823, "step": 28173 }, { "epoch": 0.7234293179013386, "grad_norm": 0.7578125, "learning_rate": 0.0001249583109455168, "loss": 0.8189, "step": 28174 }, { "epoch": 0.7234549950972604, "grad_norm": 0.73828125, "learning_rate": 0.00012495398799584854, "loss": 0.7668, "step": 28175 }, { "epoch": 0.7234806722931822, "grad_norm": 0.78515625, "learning_rate": 0.00012494966499644922, "loss": 0.7194, "step": 28176 }, { "epoch": 0.7235063494891041, "grad_norm": 0.8203125, "learning_rate": 0.00012494534194732747, "loss": 0.803, "step": 28177 }, { "epoch": 0.7235320266850258, "grad_norm": 0.76953125, "learning_rate": 0.0001249410188484919, "loss": 0.9249, "step": 28178 }, { "epoch": 0.7235577038809476, "grad_norm": 0.87109375, "learning_rate": 0.00012493669569995115, "loss": 0.8826, "step": 28179 }, { "epoch": 0.7235833810768695, "grad_norm": 0.8046875, "learning_rate": 0.00012493237250171382, "loss": 0.7843, "step": 28180 }, { "epoch": 0.7236090582727913, "grad_norm": 0.71875, "learning_rate": 0.00012492804925378853, "loss": 0.8569, "step": 28181 }, { "epoch": 0.7236347354687132, "grad_norm": 0.734375, "learning_rate": 0.00012492372595618385, "loss": 0.9736, "step": 28182 }, { "epoch": 0.723660412664635, "grad_norm": 0.73828125, "learning_rate": 0.00012491940260890848, "loss": 0.8662, "step": 28183 }, { "epoch": 0.7236860898605568, "grad_norm": 0.78125, "learning_rate": 0.00012491507921197099, "loss": 0.7595, "step": 28184 }, { "epoch": 0.7237117670564785, "grad_norm": 0.7421875, "learning_rate": 0.00012491075576537996, "loss": 0.8982, "step": 28185 }, { "epoch": 0.7237374442524004, "grad_norm": 0.92578125, "learning_rate": 0.00012490643226914408, "loss": 0.8309, "step": 28186 }, { "epoch": 0.7237631214483222, "grad_norm": 0.73828125, "learning_rate": 0.00012490210872327187, "loss": 0.8972, "step": 28187 }, { "epoch": 0.7237887986442441, "grad_norm": 0.69921875, "learning_rate": 0.00012489778512777205, "loss": 0.7511, "step": 28188 }, { "epoch": 0.7238144758401659, "grad_norm": 0.7578125, "learning_rate": 0.00012489346148265325, "loss": 0.842, "step": 28189 }, { "epoch": 0.7238401530360877, "grad_norm": 0.80078125, "learning_rate": 0.00012488913778792394, "loss": 0.8501, "step": 28190 }, { "epoch": 0.7238658302320096, "grad_norm": 0.69140625, "learning_rate": 0.00012488481404359286, "loss": 0.8579, "step": 28191 }, { "epoch": 0.7238915074279313, "grad_norm": 0.7734375, "learning_rate": 0.00012488049024966862, "loss": 0.9152, "step": 28192 }, { "epoch": 0.7239171846238531, "grad_norm": 0.91015625, "learning_rate": 0.00012487616640615977, "loss": 0.8593, "step": 28193 }, { "epoch": 0.723942861819775, "grad_norm": 0.77734375, "learning_rate": 0.000124871842513075, "loss": 0.8707, "step": 28194 }, { "epoch": 0.7239685390156968, "grad_norm": 0.78125, "learning_rate": 0.00012486751857042285, "loss": 0.8043, "step": 28195 }, { "epoch": 0.7239942162116186, "grad_norm": 0.8515625, "learning_rate": 0.000124863194578212, "loss": 0.8251, "step": 28196 }, { "epoch": 0.7240198934075405, "grad_norm": 0.83203125, "learning_rate": 0.00012485887053645107, "loss": 0.8776, "step": 28197 }, { "epoch": 0.7240455706034622, "grad_norm": 0.8359375, "learning_rate": 0.00012485454644514863, "loss": 0.8242, "step": 28198 }, { "epoch": 0.724071247799384, "grad_norm": 0.7421875, "learning_rate": 0.00012485022230431333, "loss": 0.7787, "step": 28199 }, { "epoch": 0.7240969249953059, "grad_norm": 0.7265625, "learning_rate": 0.0001248458981139538, "loss": 0.7264, "step": 28200 }, { "epoch": 0.7241226021912277, "grad_norm": 0.83203125, "learning_rate": 0.00012484157387407862, "loss": 0.9453, "step": 28201 }, { "epoch": 0.7241482793871495, "grad_norm": 0.95703125, "learning_rate": 0.00012483724958469648, "loss": 0.812, "step": 28202 }, { "epoch": 0.7241739565830714, "grad_norm": 0.8046875, "learning_rate": 0.00012483292524581588, "loss": 0.9243, "step": 28203 }, { "epoch": 0.7241996337789932, "grad_norm": 0.84375, "learning_rate": 0.0001248286008574455, "loss": 0.7922, "step": 28204 }, { "epoch": 0.7242253109749149, "grad_norm": 0.92578125, "learning_rate": 0.000124824276419594, "loss": 0.8714, "step": 28205 }, { "epoch": 0.7242509881708368, "grad_norm": 0.828125, "learning_rate": 0.00012481995193226993, "loss": 0.8426, "step": 28206 }, { "epoch": 0.7242766653667586, "grad_norm": 0.74609375, "learning_rate": 0.00012481562739548197, "loss": 0.7925, "step": 28207 }, { "epoch": 0.7243023425626804, "grad_norm": 0.79296875, "learning_rate": 0.00012481130280923868, "loss": 0.9107, "step": 28208 }, { "epoch": 0.7243280197586023, "grad_norm": 0.7890625, "learning_rate": 0.0001248069781735487, "loss": 0.8587, "step": 28209 }, { "epoch": 0.7243536969545241, "grad_norm": 0.765625, "learning_rate": 0.0001248026534884207, "loss": 0.9149, "step": 28210 }, { "epoch": 0.724379374150446, "grad_norm": 0.765625, "learning_rate": 0.00012479832875386318, "loss": 0.8921, "step": 28211 }, { "epoch": 0.7244050513463677, "grad_norm": 0.8203125, "learning_rate": 0.0001247940039698849, "loss": 0.7404, "step": 28212 }, { "epoch": 0.7244307285422895, "grad_norm": 0.75, "learning_rate": 0.00012478967913649437, "loss": 0.759, "step": 28213 }, { "epoch": 0.7244564057382114, "grad_norm": 0.80859375, "learning_rate": 0.00012478535425370025, "loss": 0.8525, "step": 28214 }, { "epoch": 0.7244820829341332, "grad_norm": 0.79296875, "learning_rate": 0.0001247810293215112, "loss": 0.9254, "step": 28215 }, { "epoch": 0.724507760130055, "grad_norm": 0.7890625, "learning_rate": 0.00012477670433993574, "loss": 0.8296, "step": 28216 }, { "epoch": 0.7245334373259769, "grad_norm": 0.77734375, "learning_rate": 0.00012477237930898257, "loss": 0.936, "step": 28217 }, { "epoch": 0.7245591145218986, "grad_norm": 0.78515625, "learning_rate": 0.00012476805422866028, "loss": 0.7901, "step": 28218 }, { "epoch": 0.7245847917178204, "grad_norm": 0.77734375, "learning_rate": 0.0001247637290989775, "loss": 0.8487, "step": 28219 }, { "epoch": 0.7246104689137423, "grad_norm": 0.78125, "learning_rate": 0.00012475940391994282, "loss": 0.8036, "step": 28220 }, { "epoch": 0.7246361461096641, "grad_norm": 0.75390625, "learning_rate": 0.00012475507869156492, "loss": 0.8209, "step": 28221 }, { "epoch": 0.7246618233055859, "grad_norm": 0.7578125, "learning_rate": 0.00012475075341385238, "loss": 0.8051, "step": 28222 }, { "epoch": 0.7246875005015078, "grad_norm": 0.765625, "learning_rate": 0.00012474642808681381, "loss": 0.9628, "step": 28223 }, { "epoch": 0.7247131776974296, "grad_norm": 0.76953125, "learning_rate": 0.00012474210271045785, "loss": 0.7571, "step": 28224 }, { "epoch": 0.7247388548933513, "grad_norm": 0.8125, "learning_rate": 0.00012473777728479312, "loss": 0.9767, "step": 28225 }, { "epoch": 0.7247645320892732, "grad_norm": 0.796875, "learning_rate": 0.00012473345180982824, "loss": 0.9211, "step": 28226 }, { "epoch": 0.724790209285195, "grad_norm": 0.80859375, "learning_rate": 0.0001247291262855718, "loss": 0.811, "step": 28227 }, { "epoch": 0.7248158864811168, "grad_norm": 0.80078125, "learning_rate": 0.00012472480071203248, "loss": 0.7424, "step": 28228 }, { "epoch": 0.7248415636770387, "grad_norm": 0.79296875, "learning_rate": 0.00012472047508921887, "loss": 0.7979, "step": 28229 }, { "epoch": 0.7248672408729605, "grad_norm": 0.8203125, "learning_rate": 0.00012471614941713953, "loss": 0.8837, "step": 28230 }, { "epoch": 0.7248929180688823, "grad_norm": 0.7421875, "learning_rate": 0.0001247118236958032, "loss": 0.8392, "step": 28231 }, { "epoch": 0.7249185952648041, "grad_norm": 0.8359375, "learning_rate": 0.0001247074979252184, "loss": 0.9365, "step": 28232 }, { "epoch": 0.7249442724607259, "grad_norm": 0.74609375, "learning_rate": 0.00012470317210539378, "loss": 0.8306, "step": 28233 }, { "epoch": 0.7249699496566477, "grad_norm": 0.828125, "learning_rate": 0.000124698846236338, "loss": 0.8131, "step": 28234 }, { "epoch": 0.7249956268525696, "grad_norm": 0.8046875, "learning_rate": 0.00012469452031805962, "loss": 0.8184, "step": 28235 }, { "epoch": 0.7250213040484914, "grad_norm": 0.80859375, "learning_rate": 0.0001246901943505673, "loss": 0.8637, "step": 28236 }, { "epoch": 0.7250469812444132, "grad_norm": 0.79296875, "learning_rate": 0.00012468586833386968, "loss": 0.8725, "step": 28237 }, { "epoch": 0.725072658440335, "grad_norm": 0.72265625, "learning_rate": 0.0001246815422679753, "loss": 0.7312, "step": 28238 }, { "epoch": 0.7250983356362568, "grad_norm": 0.8046875, "learning_rate": 0.0001246772161528929, "loss": 0.7211, "step": 28239 }, { "epoch": 0.7251240128321786, "grad_norm": 1.0078125, "learning_rate": 0.000124672889988631, "loss": 0.8001, "step": 28240 }, { "epoch": 0.7251496900281005, "grad_norm": 0.70703125, "learning_rate": 0.00012466856377519824, "loss": 0.7656, "step": 28241 }, { "epoch": 0.7251753672240223, "grad_norm": 0.94140625, "learning_rate": 0.00012466423751260333, "loss": 0.7813, "step": 28242 }, { "epoch": 0.7252010444199442, "grad_norm": 0.83203125, "learning_rate": 0.00012465991120085473, "loss": 0.9227, "step": 28243 }, { "epoch": 0.725226721615866, "grad_norm": 0.78125, "learning_rate": 0.00012465558483996123, "loss": 0.8256, "step": 28244 }, { "epoch": 0.7252523988117877, "grad_norm": 0.87890625, "learning_rate": 0.00012465125842993135, "loss": 0.898, "step": 28245 }, { "epoch": 0.7252780760077095, "grad_norm": 0.78125, "learning_rate": 0.00012464693197077373, "loss": 0.9104, "step": 28246 }, { "epoch": 0.7253037532036314, "grad_norm": 0.73828125, "learning_rate": 0.000124642605462497, "loss": 0.8095, "step": 28247 }, { "epoch": 0.7253294303995532, "grad_norm": 0.80078125, "learning_rate": 0.0001246382789051098, "loss": 0.8749, "step": 28248 }, { "epoch": 0.7253551075954751, "grad_norm": 0.80078125, "learning_rate": 0.00012463395229862074, "loss": 0.9571, "step": 28249 }, { "epoch": 0.7253807847913969, "grad_norm": 0.80078125, "learning_rate": 0.00012462962564303845, "loss": 0.8301, "step": 28250 }, { "epoch": 0.7254064619873187, "grad_norm": 0.79296875, "learning_rate": 0.00012462529893837146, "loss": 0.8472, "step": 28251 }, { "epoch": 0.7254321391832405, "grad_norm": 0.80859375, "learning_rate": 0.00012462097218462857, "loss": 0.907, "step": 28252 }, { "epoch": 0.7254578163791623, "grad_norm": 0.7734375, "learning_rate": 0.00012461664538181825, "loss": 0.9452, "step": 28253 }, { "epoch": 0.7254834935750841, "grad_norm": 0.87109375, "learning_rate": 0.00012461231852994918, "loss": 0.8228, "step": 28254 }, { "epoch": 0.725509170771006, "grad_norm": 0.78125, "learning_rate": 0.00012460799162903002, "loss": 0.7552, "step": 28255 }, { "epoch": 0.7255348479669278, "grad_norm": 0.7734375, "learning_rate": 0.0001246036646790693, "loss": 0.7595, "step": 28256 }, { "epoch": 0.7255605251628496, "grad_norm": 0.78515625, "learning_rate": 0.00012459933768007577, "loss": 0.7719, "step": 28257 }, { "epoch": 0.7255862023587714, "grad_norm": 0.77734375, "learning_rate": 0.00012459501063205797, "loss": 0.8704, "step": 28258 }, { "epoch": 0.7256118795546932, "grad_norm": 0.75, "learning_rate": 0.00012459068353502448, "loss": 0.8066, "step": 28259 }, { "epoch": 0.725637556750615, "grad_norm": 0.76171875, "learning_rate": 0.00012458635638898401, "loss": 0.7938, "step": 28260 }, { "epoch": 0.7256632339465369, "grad_norm": 0.7890625, "learning_rate": 0.00012458202919394515, "loss": 0.7634, "step": 28261 }, { "epoch": 0.7256889111424587, "grad_norm": 0.77734375, "learning_rate": 0.00012457770194991652, "loss": 0.8307, "step": 28262 }, { "epoch": 0.7257145883383805, "grad_norm": 0.69921875, "learning_rate": 0.0001245733746569068, "loss": 0.7907, "step": 28263 }, { "epoch": 0.7257402655343024, "grad_norm": 0.828125, "learning_rate": 0.00012456904731492449, "loss": 0.8117, "step": 28264 }, { "epoch": 0.7257659427302241, "grad_norm": 0.76953125, "learning_rate": 0.00012456471992397834, "loss": 0.7977, "step": 28265 }, { "epoch": 0.7257916199261459, "grad_norm": 0.79296875, "learning_rate": 0.00012456039248407688, "loss": 0.8511, "step": 28266 }, { "epoch": 0.7258172971220678, "grad_norm": 0.79296875, "learning_rate": 0.0001245560649952288, "loss": 0.841, "step": 28267 }, { "epoch": 0.7258429743179896, "grad_norm": 0.79296875, "learning_rate": 0.00012455173745744272, "loss": 0.8319, "step": 28268 }, { "epoch": 0.7258686515139114, "grad_norm": 0.8046875, "learning_rate": 0.00012454740987072723, "loss": 0.7482, "step": 28269 }, { "epoch": 0.7258943287098333, "grad_norm": 0.828125, "learning_rate": 0.00012454308223509096, "loss": 0.7017, "step": 28270 }, { "epoch": 0.7259200059057551, "grad_norm": 0.78125, "learning_rate": 0.00012453875455054256, "loss": 0.9517, "step": 28271 }, { "epoch": 0.7259456831016768, "grad_norm": 0.6796875, "learning_rate": 0.0001245344268170906, "loss": 0.6883, "step": 28272 }, { "epoch": 0.7259713602975987, "grad_norm": 0.828125, "learning_rate": 0.0001245300990347438, "loss": 0.884, "step": 28273 }, { "epoch": 0.7259970374935205, "grad_norm": 0.8125, "learning_rate": 0.00012452577120351067, "loss": 0.9185, "step": 28274 }, { "epoch": 0.7260227146894424, "grad_norm": 0.8125, "learning_rate": 0.00012452144332339993, "loss": 0.8111, "step": 28275 }, { "epoch": 0.7260483918853642, "grad_norm": 0.765625, "learning_rate": 0.00012451711539442016, "loss": 0.8322, "step": 28276 }, { "epoch": 0.726074069081286, "grad_norm": 0.83984375, "learning_rate": 0.00012451278741658, "loss": 0.8328, "step": 28277 }, { "epoch": 0.7260997462772077, "grad_norm": 1.6640625, "learning_rate": 0.00012450845938988806, "loss": 0.8907, "step": 28278 }, { "epoch": 0.7261254234731296, "grad_norm": 0.71875, "learning_rate": 0.000124504131314353, "loss": 0.8309, "step": 28279 }, { "epoch": 0.7261511006690514, "grad_norm": 0.703125, "learning_rate": 0.00012449980318998338, "loss": 0.7669, "step": 28280 }, { "epoch": 0.7261767778649733, "grad_norm": 0.71875, "learning_rate": 0.0001244954750167879, "loss": 0.8231, "step": 28281 }, { "epoch": 0.7262024550608951, "grad_norm": 0.8203125, "learning_rate": 0.00012449114679477513, "loss": 0.8726, "step": 28282 }, { "epoch": 0.7262281322568169, "grad_norm": 0.80859375, "learning_rate": 0.0001244868185239537, "loss": 0.8623, "step": 28283 }, { "epoch": 0.7262538094527388, "grad_norm": 1.0, "learning_rate": 0.0001244824902043323, "loss": 0.753, "step": 28284 }, { "epoch": 0.7262794866486605, "grad_norm": 0.7421875, "learning_rate": 0.00012447816183591947, "loss": 0.8453, "step": 28285 }, { "epoch": 0.7263051638445823, "grad_norm": 0.76171875, "learning_rate": 0.00012447383341872387, "loss": 0.8218, "step": 28286 }, { "epoch": 0.7263308410405042, "grad_norm": 0.70703125, "learning_rate": 0.00012446950495275416, "loss": 0.7893, "step": 28287 }, { "epoch": 0.726356518236426, "grad_norm": 0.8046875, "learning_rate": 0.0001244651764380189, "loss": 0.8497, "step": 28288 }, { "epoch": 0.7263821954323478, "grad_norm": 0.828125, "learning_rate": 0.0001244608478745268, "loss": 0.882, "step": 28289 }, { "epoch": 0.7264078726282697, "grad_norm": 0.7734375, "learning_rate": 0.00012445651926228644, "loss": 0.8982, "step": 28290 }, { "epoch": 0.7264335498241915, "grad_norm": 0.76171875, "learning_rate": 0.0001244521906013064, "loss": 0.7937, "step": 28291 }, { "epoch": 0.7264592270201132, "grad_norm": 0.81640625, "learning_rate": 0.00012444786189159536, "loss": 0.8195, "step": 28292 }, { "epoch": 0.7264849042160351, "grad_norm": 0.80859375, "learning_rate": 0.00012444353313316198, "loss": 0.8569, "step": 28293 }, { "epoch": 0.7265105814119569, "grad_norm": 0.8046875, "learning_rate": 0.00012443920432601483, "loss": 0.8654, "step": 28294 }, { "epoch": 0.7265362586078787, "grad_norm": 0.76953125, "learning_rate": 0.00012443487547016255, "loss": 0.934, "step": 28295 }, { "epoch": 0.7265619358038006, "grad_norm": 0.69921875, "learning_rate": 0.00012443054656561376, "loss": 0.7637, "step": 28296 }, { "epoch": 0.7265876129997224, "grad_norm": 0.7890625, "learning_rate": 0.00012442621761237708, "loss": 0.94, "step": 28297 }, { "epoch": 0.7266132901956441, "grad_norm": 0.76171875, "learning_rate": 0.0001244218886104612, "loss": 0.7847, "step": 28298 }, { "epoch": 0.726638967391566, "grad_norm": 0.8671875, "learning_rate": 0.00012441755955987466, "loss": 0.9342, "step": 28299 }, { "epoch": 0.7266646445874878, "grad_norm": 0.74609375, "learning_rate": 0.00012441323046062617, "loss": 0.8146, "step": 28300 }, { "epoch": 0.7266903217834096, "grad_norm": 0.8984375, "learning_rate": 0.0001244089013127243, "loss": 0.8427, "step": 28301 }, { "epoch": 0.7267159989793315, "grad_norm": 0.80078125, "learning_rate": 0.0001244045721161777, "loss": 0.8068, "step": 28302 }, { "epoch": 0.7267416761752533, "grad_norm": 0.84375, "learning_rate": 0.00012440024287099498, "loss": 0.8938, "step": 28303 }, { "epoch": 0.7267673533711752, "grad_norm": 0.73046875, "learning_rate": 0.00012439591357718476, "loss": 0.7363, "step": 28304 }, { "epoch": 0.7267930305670969, "grad_norm": 0.82421875, "learning_rate": 0.00012439158423475572, "loss": 0.9281, "step": 28305 }, { "epoch": 0.7268187077630187, "grad_norm": 0.70703125, "learning_rate": 0.00012438725484371644, "loss": 0.8973, "step": 28306 }, { "epoch": 0.7268443849589405, "grad_norm": 0.74609375, "learning_rate": 0.00012438292540407558, "loss": 0.717, "step": 28307 }, { "epoch": 0.7268700621548624, "grad_norm": 0.7421875, "learning_rate": 0.00012437859591584176, "loss": 0.8416, "step": 28308 }, { "epoch": 0.7268957393507842, "grad_norm": 0.78125, "learning_rate": 0.0001243742663790236, "loss": 0.9053, "step": 28309 }, { "epoch": 0.7269214165467061, "grad_norm": 0.85546875, "learning_rate": 0.00012436993679362966, "loss": 0.9302, "step": 28310 }, { "epoch": 0.7269470937426278, "grad_norm": 0.8203125, "learning_rate": 0.00012436560715966869, "loss": 0.8548, "step": 28311 }, { "epoch": 0.7269727709385496, "grad_norm": 0.82421875, "learning_rate": 0.00012436127747714927, "loss": 0.7594, "step": 28312 }, { "epoch": 0.7269984481344715, "grad_norm": 0.73828125, "learning_rate": 0.00012435694774608002, "loss": 0.897, "step": 28313 }, { "epoch": 0.7270241253303933, "grad_norm": 0.828125, "learning_rate": 0.0001243526179664696, "loss": 0.9095, "step": 28314 }, { "epoch": 0.7270498025263151, "grad_norm": 0.76953125, "learning_rate": 0.00012434828813832658, "loss": 0.7812, "step": 28315 }, { "epoch": 0.727075479722237, "grad_norm": 0.71484375, "learning_rate": 0.00012434395826165962, "loss": 0.7631, "step": 28316 }, { "epoch": 0.7271011569181588, "grad_norm": 0.7890625, "learning_rate": 0.00012433962833647736, "loss": 0.8439, "step": 28317 }, { "epoch": 0.7271268341140805, "grad_norm": 0.8046875, "learning_rate": 0.0001243352983627884, "loss": 0.7533, "step": 28318 }, { "epoch": 0.7271525113100024, "grad_norm": 0.88671875, "learning_rate": 0.0001243309683406014, "loss": 1.0389, "step": 28319 }, { "epoch": 0.7271781885059242, "grad_norm": 0.703125, "learning_rate": 0.00012432663826992502, "loss": 0.7097, "step": 28320 }, { "epoch": 0.727203865701846, "grad_norm": 0.76171875, "learning_rate": 0.0001243223081507678, "loss": 0.8844, "step": 28321 }, { "epoch": 0.7272295428977679, "grad_norm": 0.7109375, "learning_rate": 0.0001243179779831384, "loss": 0.6785, "step": 28322 }, { "epoch": 0.7272552200936897, "grad_norm": 0.79296875, "learning_rate": 0.00012431364776704552, "loss": 0.8554, "step": 28323 }, { "epoch": 0.7272808972896115, "grad_norm": 0.73828125, "learning_rate": 0.0001243093175024977, "loss": 0.7745, "step": 28324 }, { "epoch": 0.7273065744855333, "grad_norm": 0.83984375, "learning_rate": 0.00012430498718950364, "loss": 1.0062, "step": 28325 }, { "epoch": 0.7273322516814551, "grad_norm": 0.7421875, "learning_rate": 0.00012430065682807187, "loss": 0.876, "step": 28326 }, { "epoch": 0.7273579288773769, "grad_norm": 0.80859375, "learning_rate": 0.00012429632641821115, "loss": 0.9707, "step": 28327 }, { "epoch": 0.7273836060732988, "grad_norm": 0.734375, "learning_rate": 0.00012429199595993, "loss": 0.6398, "step": 28328 }, { "epoch": 0.7274092832692206, "grad_norm": 0.73828125, "learning_rate": 0.00012428766545323714, "loss": 0.8749, "step": 28329 }, { "epoch": 0.7274349604651424, "grad_norm": 0.8203125, "learning_rate": 0.00012428333489814114, "loss": 0.8828, "step": 28330 }, { "epoch": 0.7274606376610642, "grad_norm": 0.80078125, "learning_rate": 0.00012427900429465063, "loss": 0.8694, "step": 28331 }, { "epoch": 0.727486314856986, "grad_norm": 0.734375, "learning_rate": 0.00012427467364277428, "loss": 0.9016, "step": 28332 }, { "epoch": 0.7275119920529078, "grad_norm": 0.82421875, "learning_rate": 0.00012427034294252068, "loss": 0.892, "step": 28333 }, { "epoch": 0.7275376692488297, "grad_norm": 0.7421875, "learning_rate": 0.0001242660121938985, "loss": 0.8073, "step": 28334 }, { "epoch": 0.7275633464447515, "grad_norm": 0.81640625, "learning_rate": 0.00012426168139691637, "loss": 0.8791, "step": 28335 }, { "epoch": 0.7275890236406733, "grad_norm": 0.8046875, "learning_rate": 0.00012425735055158284, "loss": 0.8722, "step": 28336 }, { "epoch": 0.7276147008365952, "grad_norm": 0.78125, "learning_rate": 0.00012425301965790663, "loss": 0.8855, "step": 28337 }, { "epoch": 0.7276403780325169, "grad_norm": 0.7734375, "learning_rate": 0.00012424868871589633, "loss": 0.8537, "step": 28338 }, { "epoch": 0.7276660552284387, "grad_norm": 0.87890625, "learning_rate": 0.0001242443577255606, "loss": 0.7779, "step": 28339 }, { "epoch": 0.7276917324243606, "grad_norm": 0.79296875, "learning_rate": 0.00012424002668690807, "loss": 0.8314, "step": 28340 }, { "epoch": 0.7277174096202824, "grad_norm": 0.83203125, "learning_rate": 0.00012423569559994732, "loss": 0.9875, "step": 28341 }, { "epoch": 0.7277430868162043, "grad_norm": 0.796875, "learning_rate": 0.00012423136446468704, "loss": 0.6615, "step": 28342 }, { "epoch": 0.7277687640121261, "grad_norm": 0.77734375, "learning_rate": 0.00012422703328113585, "loss": 0.921, "step": 28343 }, { "epoch": 0.7277944412080479, "grad_norm": 0.8203125, "learning_rate": 0.00012422270204930232, "loss": 0.8421, "step": 28344 }, { "epoch": 0.7278201184039697, "grad_norm": 0.74609375, "learning_rate": 0.0001242183707691952, "loss": 0.7407, "step": 28345 }, { "epoch": 0.7278457955998915, "grad_norm": 1.109375, "learning_rate": 0.00012421403944082302, "loss": 0.8747, "step": 28346 }, { "epoch": 0.7278714727958133, "grad_norm": 0.81640625, "learning_rate": 0.00012420970806419445, "loss": 0.7731, "step": 28347 }, { "epoch": 0.7278971499917352, "grad_norm": 0.83984375, "learning_rate": 0.00012420537663931813, "loss": 0.7787, "step": 28348 }, { "epoch": 0.727922827187657, "grad_norm": 0.8125, "learning_rate": 0.00012420104516620265, "loss": 0.7477, "step": 28349 }, { "epoch": 0.7279485043835788, "grad_norm": 0.73828125, "learning_rate": 0.0001241967136448567, "loss": 0.7431, "step": 28350 }, { "epoch": 0.7279741815795006, "grad_norm": 0.7734375, "learning_rate": 0.00012419238207528888, "loss": 0.9111, "step": 28351 }, { "epoch": 0.7279998587754224, "grad_norm": 0.796875, "learning_rate": 0.0001241880504575078, "loss": 0.8442, "step": 28352 }, { "epoch": 0.7280255359713442, "grad_norm": 0.7734375, "learning_rate": 0.00012418371879152217, "loss": 0.8502, "step": 28353 }, { "epoch": 0.7280512131672661, "grad_norm": 0.8046875, "learning_rate": 0.0001241793870773406, "loss": 0.8958, "step": 28354 }, { "epoch": 0.7280768903631879, "grad_norm": 0.75, "learning_rate": 0.0001241750553149716, "loss": 0.7733, "step": 28355 }, { "epoch": 0.7281025675591097, "grad_norm": 0.81640625, "learning_rate": 0.00012417072350442396, "loss": 0.7028, "step": 28356 }, { "epoch": 0.7281282447550316, "grad_norm": 0.73828125, "learning_rate": 0.00012416639164570623, "loss": 0.8718, "step": 28357 }, { "epoch": 0.7281539219509533, "grad_norm": 0.75, "learning_rate": 0.00012416205973882707, "loss": 0.7889, "step": 28358 }, { "epoch": 0.7281795991468751, "grad_norm": 0.75, "learning_rate": 0.00012415772778379507, "loss": 0.8298, "step": 28359 }, { "epoch": 0.728205276342797, "grad_norm": 0.79296875, "learning_rate": 0.00012415339578061897, "loss": 0.7242, "step": 28360 }, { "epoch": 0.7282309535387188, "grad_norm": 0.80078125, "learning_rate": 0.0001241490637293073, "loss": 0.8758, "step": 28361 }, { "epoch": 0.7282566307346406, "grad_norm": 0.71484375, "learning_rate": 0.00012414473162986872, "loss": 0.7849, "step": 28362 }, { "epoch": 0.7282823079305625, "grad_norm": 0.73828125, "learning_rate": 0.00012414039948231188, "loss": 0.7565, "step": 28363 }, { "epoch": 0.7283079851264843, "grad_norm": 0.81640625, "learning_rate": 0.00012413606728664542, "loss": 0.7806, "step": 28364 }, { "epoch": 0.728333662322406, "grad_norm": 0.84375, "learning_rate": 0.00012413173504287794, "loss": 0.9482, "step": 28365 }, { "epoch": 0.7283593395183279, "grad_norm": 0.7421875, "learning_rate": 0.00012412740275101808, "loss": 0.8259, "step": 28366 }, { "epoch": 0.7283850167142497, "grad_norm": 0.89453125, "learning_rate": 0.0001241230704110745, "loss": 0.8324, "step": 28367 }, { "epoch": 0.7284106939101715, "grad_norm": 0.78125, "learning_rate": 0.00012411873802305583, "loss": 0.6892, "step": 28368 }, { "epoch": 0.7284363711060934, "grad_norm": 0.80078125, "learning_rate": 0.0001241144055869707, "loss": 0.8631, "step": 28369 }, { "epoch": 0.7284620483020152, "grad_norm": 0.66796875, "learning_rate": 0.00012411007310282772, "loss": 0.7222, "step": 28370 }, { "epoch": 0.7284877254979369, "grad_norm": 0.84375, "learning_rate": 0.0001241057405706355, "loss": 0.7974, "step": 28371 }, { "epoch": 0.7285134026938588, "grad_norm": 0.78125, "learning_rate": 0.00012410140799040277, "loss": 0.8855, "step": 28372 }, { "epoch": 0.7285390798897806, "grad_norm": 0.79296875, "learning_rate": 0.00012409707536213812, "loss": 0.8127, "step": 28373 }, { "epoch": 0.7285647570857025, "grad_norm": 0.8125, "learning_rate": 0.00012409274268585017, "loss": 0.8058, "step": 28374 }, { "epoch": 0.7285904342816243, "grad_norm": 0.796875, "learning_rate": 0.00012408840996154755, "loss": 0.7845, "step": 28375 }, { "epoch": 0.7286161114775461, "grad_norm": 0.859375, "learning_rate": 0.00012408407718923886, "loss": 0.7875, "step": 28376 }, { "epoch": 0.728641788673468, "grad_norm": 0.84375, "learning_rate": 0.00012407974436893284, "loss": 0.9444, "step": 28377 }, { "epoch": 0.7286674658693897, "grad_norm": 0.77734375, "learning_rate": 0.00012407541150063806, "loss": 0.7765, "step": 28378 }, { "epoch": 0.7286931430653115, "grad_norm": 0.83984375, "learning_rate": 0.00012407107858436312, "loss": 0.8063, "step": 28379 }, { "epoch": 0.7287188202612334, "grad_norm": 0.8203125, "learning_rate": 0.00012406674562011674, "loss": 0.6881, "step": 28380 }, { "epoch": 0.7287444974571552, "grad_norm": 0.765625, "learning_rate": 0.00012406241260790748, "loss": 0.7864, "step": 28381 }, { "epoch": 0.728770174653077, "grad_norm": 0.80078125, "learning_rate": 0.000124058079547744, "loss": 0.852, "step": 28382 }, { "epoch": 0.7287958518489989, "grad_norm": 0.72265625, "learning_rate": 0.00012405374643963498, "loss": 0.7822, "step": 28383 }, { "epoch": 0.7288215290449207, "grad_norm": 0.77734375, "learning_rate": 0.00012404941328358898, "loss": 0.9152, "step": 28384 }, { "epoch": 0.7288472062408424, "grad_norm": 0.65234375, "learning_rate": 0.0001240450800796147, "loss": 0.7494, "step": 28385 }, { "epoch": 0.7288728834367643, "grad_norm": 0.75, "learning_rate": 0.00012404074682772074, "loss": 0.7598, "step": 28386 }, { "epoch": 0.7288985606326861, "grad_norm": 0.7890625, "learning_rate": 0.00012403641352791572, "loss": 0.9656, "step": 28387 }, { "epoch": 0.7289242378286079, "grad_norm": 0.72265625, "learning_rate": 0.0001240320801802083, "loss": 0.8113, "step": 28388 }, { "epoch": 0.7289499150245298, "grad_norm": 0.8203125, "learning_rate": 0.00012402774678460713, "loss": 0.9991, "step": 28389 }, { "epoch": 0.7289755922204516, "grad_norm": 0.7890625, "learning_rate": 0.00012402341334112084, "loss": 0.884, "step": 28390 }, { "epoch": 0.7290012694163733, "grad_norm": 0.71875, "learning_rate": 0.00012401907984975805, "loss": 0.8484, "step": 28391 }, { "epoch": 0.7290269466122952, "grad_norm": 0.76171875, "learning_rate": 0.00012401474631052738, "loss": 0.8523, "step": 28392 }, { "epoch": 0.729052623808217, "grad_norm": 0.78515625, "learning_rate": 0.00012401041272343754, "loss": 0.7695, "step": 28393 }, { "epoch": 0.7290783010041388, "grad_norm": 0.8359375, "learning_rate": 0.00012400607908849708, "loss": 0.8507, "step": 28394 }, { "epoch": 0.7291039782000607, "grad_norm": 0.80859375, "learning_rate": 0.00012400174540571469, "loss": 0.8486, "step": 28395 }, { "epoch": 0.7291296553959825, "grad_norm": 0.7265625, "learning_rate": 0.00012399741167509897, "loss": 0.8622, "step": 28396 }, { "epoch": 0.7291553325919043, "grad_norm": 0.81640625, "learning_rate": 0.00012399307789665858, "loss": 0.8783, "step": 28397 }, { "epoch": 0.7291810097878261, "grad_norm": 0.765625, "learning_rate": 0.00012398874407040214, "loss": 0.8379, "step": 28398 }, { "epoch": 0.7292066869837479, "grad_norm": 0.7890625, "learning_rate": 0.00012398441019633832, "loss": 0.8341, "step": 28399 }, { "epoch": 0.7292323641796697, "grad_norm": 0.7265625, "learning_rate": 0.00012398007627447574, "loss": 0.6617, "step": 28400 }, { "epoch": 0.7292580413755916, "grad_norm": 0.78515625, "learning_rate": 0.00012397574230482304, "loss": 0.739, "step": 28401 }, { "epoch": 0.7292837185715134, "grad_norm": 0.75390625, "learning_rate": 0.00012397140828738885, "loss": 0.9061, "step": 28402 }, { "epoch": 0.7293093957674353, "grad_norm": 0.75390625, "learning_rate": 0.00012396707422218176, "loss": 0.818, "step": 28403 }, { "epoch": 0.7293350729633571, "grad_norm": 0.83203125, "learning_rate": 0.00012396274010921052, "loss": 1.0368, "step": 28404 }, { "epoch": 0.7293607501592788, "grad_norm": 0.77734375, "learning_rate": 0.0001239584059484837, "loss": 0.936, "step": 28405 }, { "epoch": 0.7293864273552007, "grad_norm": 0.79296875, "learning_rate": 0.00012395407174000991, "loss": 0.8972, "step": 28406 }, { "epoch": 0.7294121045511225, "grad_norm": 0.84375, "learning_rate": 0.00012394973748379784, "loss": 0.8207, "step": 28407 }, { "epoch": 0.7294377817470443, "grad_norm": 0.78125, "learning_rate": 0.00012394540317985604, "loss": 0.7664, "step": 28408 }, { "epoch": 0.7294634589429662, "grad_norm": 0.734375, "learning_rate": 0.0001239410688281933, "loss": 0.7299, "step": 28409 }, { "epoch": 0.729489136138888, "grad_norm": 0.6796875, "learning_rate": 0.00012393673442881814, "loss": 0.7253, "step": 28410 }, { "epoch": 0.7295148133348097, "grad_norm": 0.8359375, "learning_rate": 0.0001239323999817392, "loss": 0.8685, "step": 28411 }, { "epoch": 0.7295404905307316, "grad_norm": 0.73046875, "learning_rate": 0.0001239280654869652, "loss": 0.8531, "step": 28412 }, { "epoch": 0.7295661677266534, "grad_norm": 0.8125, "learning_rate": 0.00012392373094450472, "loss": 0.8698, "step": 28413 }, { "epoch": 0.7295918449225752, "grad_norm": 0.765625, "learning_rate": 0.0001239193963543664, "loss": 0.8699, "step": 28414 }, { "epoch": 0.7296175221184971, "grad_norm": 0.7734375, "learning_rate": 0.0001239150617165589, "loss": 0.8394, "step": 28415 }, { "epoch": 0.7296431993144189, "grad_norm": 0.82421875, "learning_rate": 0.0001239107270310908, "loss": 0.8524, "step": 28416 }, { "epoch": 0.7296688765103407, "grad_norm": 0.77734375, "learning_rate": 0.0001239063922979708, "loss": 0.8111, "step": 28417 }, { "epoch": 0.7296945537062625, "grad_norm": 0.74609375, "learning_rate": 0.0001239020575172075, "loss": 0.7569, "step": 28418 }, { "epoch": 0.7297202309021843, "grad_norm": 0.77734375, "learning_rate": 0.0001238977226888096, "loss": 0.7463, "step": 28419 }, { "epoch": 0.7297459080981061, "grad_norm": 0.76953125, "learning_rate": 0.00012389338781278568, "loss": 0.7413, "step": 28420 }, { "epoch": 0.729771585294028, "grad_norm": 0.859375, "learning_rate": 0.00012388905288914436, "loss": 0.9043, "step": 28421 }, { "epoch": 0.7297972624899498, "grad_norm": 0.8515625, "learning_rate": 0.00012388471791789434, "loss": 0.8986, "step": 28422 }, { "epoch": 0.7298229396858716, "grad_norm": 0.8046875, "learning_rate": 0.00012388038289904427, "loss": 0.9179, "step": 28423 }, { "epoch": 0.7298486168817935, "grad_norm": 0.75390625, "learning_rate": 0.00012387604783260268, "loss": 0.752, "step": 28424 }, { "epoch": 0.7298742940777152, "grad_norm": 0.80078125, "learning_rate": 0.00012387171271857836, "loss": 0.7543, "step": 28425 }, { "epoch": 0.729899971273637, "grad_norm": 0.84765625, "learning_rate": 0.00012386737755697985, "loss": 0.8401, "step": 28426 }, { "epoch": 0.7299256484695589, "grad_norm": 0.7890625, "learning_rate": 0.0001238630423478158, "loss": 0.9404, "step": 28427 }, { "epoch": 0.7299513256654807, "grad_norm": 0.7890625, "learning_rate": 0.0001238587070910949, "loss": 0.871, "step": 28428 }, { "epoch": 0.7299770028614025, "grad_norm": 0.8046875, "learning_rate": 0.00012385437178682567, "loss": 0.6871, "step": 28429 }, { "epoch": 0.7300026800573244, "grad_norm": 0.859375, "learning_rate": 0.0001238500364350169, "loss": 0.8295, "step": 28430 }, { "epoch": 0.7300283572532461, "grad_norm": 0.75390625, "learning_rate": 0.00012384570103567713, "loss": 0.8367, "step": 28431 }, { "epoch": 0.7300540344491679, "grad_norm": 0.84375, "learning_rate": 0.00012384136558881505, "loss": 0.9891, "step": 28432 }, { "epoch": 0.7300797116450898, "grad_norm": 0.77734375, "learning_rate": 0.00012383703009443927, "loss": 0.8062, "step": 28433 }, { "epoch": 0.7301053888410116, "grad_norm": 0.8125, "learning_rate": 0.00012383269455255844, "loss": 0.987, "step": 28434 }, { "epoch": 0.7301310660369335, "grad_norm": 0.78125, "learning_rate": 0.00012382835896318124, "loss": 0.8537, "step": 28435 }, { "epoch": 0.7301567432328553, "grad_norm": 0.796875, "learning_rate": 0.00012382402332631625, "loss": 0.7896, "step": 28436 }, { "epoch": 0.7301824204287771, "grad_norm": 0.7890625, "learning_rate": 0.0001238196876419721, "loss": 0.9747, "step": 28437 }, { "epoch": 0.7302080976246988, "grad_norm": 1.0234375, "learning_rate": 0.0001238153519101575, "loss": 0.8199, "step": 28438 }, { "epoch": 0.7302337748206207, "grad_norm": 0.78515625, "learning_rate": 0.00012381101613088107, "loss": 0.7653, "step": 28439 }, { "epoch": 0.7302594520165425, "grad_norm": 0.7734375, "learning_rate": 0.00012380668030415138, "loss": 0.7676, "step": 28440 }, { "epoch": 0.7302851292124644, "grad_norm": 0.7109375, "learning_rate": 0.0001238023444299772, "loss": 0.7253, "step": 28441 }, { "epoch": 0.7303108064083862, "grad_norm": 0.77734375, "learning_rate": 0.00012379800850836702, "loss": 0.7433, "step": 28442 }, { "epoch": 0.730336483604308, "grad_norm": 0.76171875, "learning_rate": 0.0001237936725393296, "loss": 0.9457, "step": 28443 }, { "epoch": 0.7303621608002299, "grad_norm": 0.74609375, "learning_rate": 0.00012378933652287352, "loss": 0.9042, "step": 28444 }, { "epoch": 0.7303878379961516, "grad_norm": 0.81640625, "learning_rate": 0.00012378500045900746, "loss": 0.8266, "step": 28445 }, { "epoch": 0.7304135151920734, "grad_norm": 0.7109375, "learning_rate": 0.00012378066434774005, "loss": 0.8123, "step": 28446 }, { "epoch": 0.7304391923879953, "grad_norm": 0.765625, "learning_rate": 0.00012377632818907992, "loss": 0.8343, "step": 28447 }, { "epoch": 0.7304648695839171, "grad_norm": 0.80078125, "learning_rate": 0.00012377199198303568, "loss": 0.9525, "step": 28448 }, { "epoch": 0.7304905467798389, "grad_norm": 0.7890625, "learning_rate": 0.00012376765572961603, "loss": 0.756, "step": 28449 }, { "epoch": 0.7305162239757608, "grad_norm": 0.7578125, "learning_rate": 0.00012376331942882962, "loss": 0.8546, "step": 28450 }, { "epoch": 0.7305419011716825, "grad_norm": 0.78125, "learning_rate": 0.00012375898308068502, "loss": 0.6763, "step": 28451 }, { "epoch": 0.7305675783676043, "grad_norm": 0.765625, "learning_rate": 0.00012375464668519095, "loss": 0.8218, "step": 28452 }, { "epoch": 0.7305932555635262, "grad_norm": 0.78125, "learning_rate": 0.00012375031024235597, "loss": 0.8279, "step": 28453 }, { "epoch": 0.730618932759448, "grad_norm": 0.81640625, "learning_rate": 0.0001237459737521888, "loss": 0.7873, "step": 28454 }, { "epoch": 0.7306446099553698, "grad_norm": 0.81640625, "learning_rate": 0.00012374163721469805, "loss": 0.9447, "step": 28455 }, { "epoch": 0.7306702871512917, "grad_norm": 0.765625, "learning_rate": 0.00012373730062989233, "loss": 0.8102, "step": 28456 }, { "epoch": 0.7306959643472135, "grad_norm": 0.87109375, "learning_rate": 0.00012373296399778035, "loss": 0.884, "step": 28457 }, { "epoch": 0.7307216415431352, "grad_norm": 0.9453125, "learning_rate": 0.0001237286273183707, "loss": 0.7545, "step": 28458 }, { "epoch": 0.7307473187390571, "grad_norm": 0.78515625, "learning_rate": 0.00012372429059167206, "loss": 0.7647, "step": 28459 }, { "epoch": 0.7307729959349789, "grad_norm": 0.921875, "learning_rate": 0.000123719953817693, "loss": 0.7913, "step": 28460 }, { "epoch": 0.7307986731309007, "grad_norm": 0.72265625, "learning_rate": 0.00012371561699644226, "loss": 0.7834, "step": 28461 }, { "epoch": 0.7308243503268226, "grad_norm": 0.81640625, "learning_rate": 0.0001237112801279284, "loss": 0.8531, "step": 28462 }, { "epoch": 0.7308500275227444, "grad_norm": 0.83984375, "learning_rate": 0.00012370694321216013, "loss": 0.8001, "step": 28463 }, { "epoch": 0.7308757047186663, "grad_norm": 0.78515625, "learning_rate": 0.00012370260624914607, "loss": 0.8492, "step": 28464 }, { "epoch": 0.730901381914588, "grad_norm": 0.76953125, "learning_rate": 0.00012369826923889487, "loss": 0.7945, "step": 28465 }, { "epoch": 0.7309270591105098, "grad_norm": 0.7734375, "learning_rate": 0.00012369393218141512, "loss": 0.9625, "step": 28466 }, { "epoch": 0.7309527363064316, "grad_norm": 0.78125, "learning_rate": 0.0001236895950767155, "loss": 0.8312, "step": 28467 }, { "epoch": 0.7309784135023535, "grad_norm": 0.77734375, "learning_rate": 0.0001236852579248047, "loss": 0.7804, "step": 28468 }, { "epoch": 0.7310040906982753, "grad_norm": 0.84765625, "learning_rate": 0.00012368092072569128, "loss": 0.8481, "step": 28469 }, { "epoch": 0.7310297678941972, "grad_norm": 0.73046875, "learning_rate": 0.00012367658347938392, "loss": 0.8203, "step": 28470 }, { "epoch": 0.7310554450901189, "grad_norm": 0.80859375, "learning_rate": 0.00012367224618589133, "loss": 0.9948, "step": 28471 }, { "epoch": 0.7310811222860407, "grad_norm": 0.75390625, "learning_rate": 0.00012366790884522203, "loss": 0.8492, "step": 28472 }, { "epoch": 0.7311067994819626, "grad_norm": 0.74609375, "learning_rate": 0.00012366357145738476, "loss": 0.7136, "step": 28473 }, { "epoch": 0.7311324766778844, "grad_norm": 0.69921875, "learning_rate": 0.00012365923402238811, "loss": 0.7693, "step": 28474 }, { "epoch": 0.7311581538738062, "grad_norm": 0.7421875, "learning_rate": 0.00012365489654024073, "loss": 0.7814, "step": 28475 }, { "epoch": 0.7311838310697281, "grad_norm": 0.76171875, "learning_rate": 0.0001236505590109513, "loss": 0.7765, "step": 28476 }, { "epoch": 0.7312095082656499, "grad_norm": 0.7578125, "learning_rate": 0.00012364622143452844, "loss": 0.8425, "step": 28477 }, { "epoch": 0.7312351854615716, "grad_norm": 0.984375, "learning_rate": 0.00012364188381098083, "loss": 0.8416, "step": 28478 }, { "epoch": 0.7312608626574935, "grad_norm": 0.7421875, "learning_rate": 0.00012363754614031703, "loss": 0.7385, "step": 28479 }, { "epoch": 0.7312865398534153, "grad_norm": 0.859375, "learning_rate": 0.00012363320842254574, "loss": 0.959, "step": 28480 }, { "epoch": 0.7313122170493371, "grad_norm": 0.7421875, "learning_rate": 0.00012362887065767562, "loss": 0.8415, "step": 28481 }, { "epoch": 0.731337894245259, "grad_norm": 0.8203125, "learning_rate": 0.0001236245328457153, "loss": 0.8039, "step": 28482 }, { "epoch": 0.7313635714411808, "grad_norm": 0.71875, "learning_rate": 0.00012362019498667339, "loss": 0.7228, "step": 28483 }, { "epoch": 0.7313892486371026, "grad_norm": 0.734375, "learning_rate": 0.00012361585708055859, "loss": 0.7261, "step": 28484 }, { "epoch": 0.7314149258330244, "grad_norm": 0.76953125, "learning_rate": 0.00012361151912737952, "loss": 0.8922, "step": 28485 }, { "epoch": 0.7314406030289462, "grad_norm": 0.80859375, "learning_rate": 0.00012360718112714483, "loss": 0.9269, "step": 28486 }, { "epoch": 0.731466280224868, "grad_norm": 0.75, "learning_rate": 0.00012360284307986314, "loss": 0.7491, "step": 28487 }, { "epoch": 0.7314919574207899, "grad_norm": 0.8203125, "learning_rate": 0.00012359850498554312, "loss": 0.8542, "step": 28488 }, { "epoch": 0.7315176346167117, "grad_norm": 0.7734375, "learning_rate": 0.0001235941668441934, "loss": 0.879, "step": 28489 }, { "epoch": 0.7315433118126335, "grad_norm": 0.7734375, "learning_rate": 0.00012358982865582268, "loss": 0.9328, "step": 28490 }, { "epoch": 0.7315689890085553, "grad_norm": 0.75, "learning_rate": 0.0001235854904204395, "loss": 0.7637, "step": 28491 }, { "epoch": 0.7315946662044771, "grad_norm": 0.8203125, "learning_rate": 0.00012358115213805264, "loss": 0.8517, "step": 28492 }, { "epoch": 0.7316203434003989, "grad_norm": 0.91796875, "learning_rate": 0.00012357681380867062, "loss": 0.7679, "step": 28493 }, { "epoch": 0.7316460205963208, "grad_norm": 0.8203125, "learning_rate": 0.00012357247543230216, "loss": 0.8119, "step": 28494 }, { "epoch": 0.7316716977922426, "grad_norm": 0.76171875, "learning_rate": 0.0001235681370089559, "loss": 0.7247, "step": 28495 }, { "epoch": 0.7316973749881645, "grad_norm": 1.015625, "learning_rate": 0.0001235637985386404, "loss": 0.8265, "step": 28496 }, { "epoch": 0.7317230521840863, "grad_norm": 0.90625, "learning_rate": 0.00012355946002136445, "loss": 0.9099, "step": 28497 }, { "epoch": 0.731748729380008, "grad_norm": 0.796875, "learning_rate": 0.0001235551214571366, "loss": 0.7643, "step": 28498 }, { "epoch": 0.7317744065759298, "grad_norm": 0.84765625, "learning_rate": 0.00012355078284596556, "loss": 0.9523, "step": 28499 }, { "epoch": 0.7318000837718517, "grad_norm": 0.7578125, "learning_rate": 0.0001235464441878599, "loss": 0.7859, "step": 28500 }, { "epoch": 0.7318257609677735, "grad_norm": 0.80859375, "learning_rate": 0.0001235421054828283, "loss": 0.8776, "step": 28501 }, { "epoch": 0.7318514381636954, "grad_norm": 0.76953125, "learning_rate": 0.00012353776673087942, "loss": 0.7367, "step": 28502 }, { "epoch": 0.7318771153596172, "grad_norm": 0.875, "learning_rate": 0.0001235334279320219, "loss": 0.8008, "step": 28503 }, { "epoch": 0.7319027925555389, "grad_norm": 0.83203125, "learning_rate": 0.00012352908908626434, "loss": 0.8111, "step": 28504 }, { "epoch": 0.7319284697514608, "grad_norm": 0.84765625, "learning_rate": 0.0001235247501936155, "loss": 0.8057, "step": 28505 }, { "epoch": 0.7319541469473826, "grad_norm": 0.7890625, "learning_rate": 0.00012352041125408394, "loss": 0.7882, "step": 28506 }, { "epoch": 0.7319798241433044, "grad_norm": 0.80078125, "learning_rate": 0.0001235160722676783, "loss": 1.0049, "step": 28507 }, { "epoch": 0.7320055013392263, "grad_norm": 0.95703125, "learning_rate": 0.00012351173323440728, "loss": 0.821, "step": 28508 }, { "epoch": 0.7320311785351481, "grad_norm": 0.84765625, "learning_rate": 0.00012350739415427944, "loss": 0.9259, "step": 28509 }, { "epoch": 0.7320568557310699, "grad_norm": 0.7265625, "learning_rate": 0.00012350305502730356, "loss": 0.7239, "step": 28510 }, { "epoch": 0.7320825329269917, "grad_norm": 0.80859375, "learning_rate": 0.00012349871585348818, "loss": 0.9068, "step": 28511 }, { "epoch": 0.7321082101229135, "grad_norm": 0.79296875, "learning_rate": 0.000123494376632842, "loss": 0.7294, "step": 28512 }, { "epoch": 0.7321338873188353, "grad_norm": 0.75390625, "learning_rate": 0.00012349003736537362, "loss": 0.8333, "step": 28513 }, { "epoch": 0.7321595645147572, "grad_norm": 0.78125, "learning_rate": 0.00012348569805109173, "loss": 0.9304, "step": 28514 }, { "epoch": 0.732185241710679, "grad_norm": 0.796875, "learning_rate": 0.000123481358690005, "loss": 0.87, "step": 28515 }, { "epoch": 0.7322109189066008, "grad_norm": 0.7265625, "learning_rate": 0.000123477019282122, "loss": 0.7854, "step": 28516 }, { "epoch": 0.7322365961025227, "grad_norm": 0.83984375, "learning_rate": 0.0001234726798274514, "loss": 0.894, "step": 28517 }, { "epoch": 0.7322622732984444, "grad_norm": 0.7109375, "learning_rate": 0.00012346834032600195, "loss": 0.9063, "step": 28518 }, { "epoch": 0.7322879504943662, "grad_norm": 0.734375, "learning_rate": 0.00012346400077778219, "loss": 0.6577, "step": 28519 }, { "epoch": 0.7323136276902881, "grad_norm": 0.75390625, "learning_rate": 0.00012345966118280076, "loss": 0.7862, "step": 28520 }, { "epoch": 0.7323393048862099, "grad_norm": 0.765625, "learning_rate": 0.00012345532154106636, "loss": 0.7889, "step": 28521 }, { "epoch": 0.7323649820821317, "grad_norm": 0.71875, "learning_rate": 0.00012345098185258762, "loss": 0.7877, "step": 28522 }, { "epoch": 0.7323906592780536, "grad_norm": 0.71875, "learning_rate": 0.00012344664211737317, "loss": 0.7939, "step": 28523 }, { "epoch": 0.7324163364739753, "grad_norm": 0.921875, "learning_rate": 0.00012344230233543173, "loss": 0.9082, "step": 28524 }, { "epoch": 0.7324420136698971, "grad_norm": 0.84765625, "learning_rate": 0.00012343796250677185, "loss": 0.8081, "step": 28525 }, { "epoch": 0.732467690865819, "grad_norm": 0.7890625, "learning_rate": 0.00012343362263140227, "loss": 0.7833, "step": 28526 }, { "epoch": 0.7324933680617408, "grad_norm": 0.8671875, "learning_rate": 0.00012342928270933157, "loss": 0.776, "step": 28527 }, { "epoch": 0.7325190452576626, "grad_norm": 0.7734375, "learning_rate": 0.00012342494274056843, "loss": 0.7934, "step": 28528 }, { "epoch": 0.7325447224535845, "grad_norm": 0.796875, "learning_rate": 0.00012342060272512153, "loss": 0.8061, "step": 28529 }, { "epoch": 0.7325703996495063, "grad_norm": 0.82421875, "learning_rate": 0.00012341626266299945, "loss": 0.9275, "step": 28530 }, { "epoch": 0.732596076845428, "grad_norm": 0.7734375, "learning_rate": 0.00012341192255421087, "loss": 0.9095, "step": 28531 }, { "epoch": 0.7326217540413499, "grad_norm": 0.765625, "learning_rate": 0.00012340758239876446, "loss": 0.9038, "step": 28532 }, { "epoch": 0.7326474312372717, "grad_norm": 0.79296875, "learning_rate": 0.00012340324219666885, "loss": 0.7918, "step": 28533 }, { "epoch": 0.7326731084331936, "grad_norm": 0.75, "learning_rate": 0.0001233989019479327, "loss": 0.8538, "step": 28534 }, { "epoch": 0.7326987856291154, "grad_norm": 0.75, "learning_rate": 0.00012339456165256463, "loss": 0.7182, "step": 28535 }, { "epoch": 0.7327244628250372, "grad_norm": 0.796875, "learning_rate": 0.0001233902213105733, "loss": 0.7603, "step": 28536 }, { "epoch": 0.7327501400209591, "grad_norm": 0.79296875, "learning_rate": 0.0001233858809219674, "loss": 0.8438, "step": 28537 }, { "epoch": 0.7327758172168808, "grad_norm": 0.7578125, "learning_rate": 0.00012338154048675555, "loss": 0.7693, "step": 28538 }, { "epoch": 0.7328014944128026, "grad_norm": 0.71875, "learning_rate": 0.0001233772000049464, "loss": 0.7628, "step": 28539 }, { "epoch": 0.7328271716087245, "grad_norm": 0.77734375, "learning_rate": 0.0001233728594765486, "loss": 0.9498, "step": 28540 }, { "epoch": 0.7328528488046463, "grad_norm": 0.76171875, "learning_rate": 0.00012336851890157078, "loss": 0.8498, "step": 28541 }, { "epoch": 0.7328785260005681, "grad_norm": 0.77734375, "learning_rate": 0.00012336417828002165, "loss": 0.7815, "step": 28542 }, { "epoch": 0.73290420319649, "grad_norm": 0.8203125, "learning_rate": 0.0001233598376119098, "loss": 0.8858, "step": 28543 }, { "epoch": 0.7329298803924117, "grad_norm": 0.77734375, "learning_rate": 0.0001233554968972439, "loss": 0.779, "step": 28544 }, { "epoch": 0.7329555575883335, "grad_norm": 0.75, "learning_rate": 0.00012335115613603263, "loss": 0.9383, "step": 28545 }, { "epoch": 0.7329812347842554, "grad_norm": 0.78125, "learning_rate": 0.00012334681532828458, "loss": 0.816, "step": 28546 }, { "epoch": 0.7330069119801772, "grad_norm": 0.75390625, "learning_rate": 0.00012334247447400848, "loss": 0.7874, "step": 28547 }, { "epoch": 0.733032589176099, "grad_norm": 0.72265625, "learning_rate": 0.00012333813357321292, "loss": 0.8522, "step": 28548 }, { "epoch": 0.7330582663720209, "grad_norm": 0.7265625, "learning_rate": 0.00012333379262590653, "loss": 0.7346, "step": 28549 }, { "epoch": 0.7330839435679427, "grad_norm": 0.79296875, "learning_rate": 0.00012332945163209802, "loss": 0.8804, "step": 28550 }, { "epoch": 0.7331096207638644, "grad_norm": 0.75, "learning_rate": 0.00012332511059179606, "loss": 0.7892, "step": 28551 }, { "epoch": 0.7331352979597863, "grad_norm": 0.77734375, "learning_rate": 0.00012332076950500922, "loss": 0.7664, "step": 28552 }, { "epoch": 0.7331609751557081, "grad_norm": 0.796875, "learning_rate": 0.00012331642837174622, "loss": 0.7524, "step": 28553 }, { "epoch": 0.7331866523516299, "grad_norm": 0.76171875, "learning_rate": 0.00012331208719201567, "loss": 0.7421, "step": 28554 }, { "epoch": 0.7332123295475518, "grad_norm": 0.765625, "learning_rate": 0.0001233077459658262, "loss": 0.8682, "step": 28555 }, { "epoch": 0.7332380067434736, "grad_norm": 0.79296875, "learning_rate": 0.00012330340469318655, "loss": 0.9411, "step": 28556 }, { "epoch": 0.7332636839393954, "grad_norm": 0.8046875, "learning_rate": 0.0001232990633741053, "loss": 0.772, "step": 28557 }, { "epoch": 0.7332893611353172, "grad_norm": 0.73828125, "learning_rate": 0.00012329472200859113, "loss": 0.7596, "step": 28558 }, { "epoch": 0.733315038331239, "grad_norm": 0.70703125, "learning_rate": 0.00012329038059665268, "loss": 0.6362, "step": 28559 }, { "epoch": 0.7333407155271608, "grad_norm": 0.74609375, "learning_rate": 0.0001232860391382986, "loss": 0.8234, "step": 28560 }, { "epoch": 0.7333663927230827, "grad_norm": 0.78515625, "learning_rate": 0.00012328169763353754, "loss": 0.8547, "step": 28561 }, { "epoch": 0.7333920699190045, "grad_norm": 0.75390625, "learning_rate": 0.00012327735608237816, "loss": 0.8415, "step": 28562 }, { "epoch": 0.7334177471149264, "grad_norm": 0.82421875, "learning_rate": 0.00012327301448482913, "loss": 0.8706, "step": 28563 }, { "epoch": 0.7334434243108481, "grad_norm": 0.765625, "learning_rate": 0.0001232686728408991, "loss": 0.7195, "step": 28564 }, { "epoch": 0.7334691015067699, "grad_norm": 0.7890625, "learning_rate": 0.00012326433115059665, "loss": 0.762, "step": 28565 }, { "epoch": 0.7334947787026918, "grad_norm": 0.75390625, "learning_rate": 0.00012325998941393053, "loss": 1.0202, "step": 28566 }, { "epoch": 0.7335204558986136, "grad_norm": 0.87109375, "learning_rate": 0.00012325564763090936, "loss": 0.9126, "step": 28567 }, { "epoch": 0.7335461330945354, "grad_norm": 0.80078125, "learning_rate": 0.00012325130580154173, "loss": 0.8428, "step": 28568 }, { "epoch": 0.7335718102904573, "grad_norm": 0.78125, "learning_rate": 0.0001232469639258364, "loss": 0.7775, "step": 28569 }, { "epoch": 0.7335974874863791, "grad_norm": 0.7890625, "learning_rate": 0.00012324262200380198, "loss": 0.6664, "step": 28570 }, { "epoch": 0.7336231646823008, "grad_norm": 0.8359375, "learning_rate": 0.00012323828003544708, "loss": 0.9924, "step": 28571 }, { "epoch": 0.7336488418782227, "grad_norm": 0.77734375, "learning_rate": 0.0001232339380207804, "loss": 0.8399, "step": 28572 }, { "epoch": 0.7336745190741445, "grad_norm": 0.77734375, "learning_rate": 0.00012322959595981056, "loss": 0.874, "step": 28573 }, { "epoch": 0.7337001962700663, "grad_norm": 0.83984375, "learning_rate": 0.00012322525385254627, "loss": 0.7455, "step": 28574 }, { "epoch": 0.7337258734659882, "grad_norm": 0.734375, "learning_rate": 0.0001232209116989961, "loss": 0.7452, "step": 28575 }, { "epoch": 0.73375155066191, "grad_norm": 0.81640625, "learning_rate": 0.0001232165694991688, "loss": 0.8813, "step": 28576 }, { "epoch": 0.7337772278578318, "grad_norm": 0.78125, "learning_rate": 0.00012321222725307298, "loss": 0.7973, "step": 28577 }, { "epoch": 0.7338029050537536, "grad_norm": 0.9296875, "learning_rate": 0.00012320788496071722, "loss": 0.9181, "step": 28578 }, { "epoch": 0.7338285822496754, "grad_norm": 0.765625, "learning_rate": 0.0001232035426221103, "loss": 0.6999, "step": 28579 }, { "epoch": 0.7338542594455972, "grad_norm": 0.8046875, "learning_rate": 0.00012319920023726082, "loss": 0.8613, "step": 28580 }, { "epoch": 0.7338799366415191, "grad_norm": 0.80859375, "learning_rate": 0.00012319485780617735, "loss": 0.7483, "step": 28581 }, { "epoch": 0.7339056138374409, "grad_norm": 0.765625, "learning_rate": 0.0001231905153288687, "loss": 0.7973, "step": 28582 }, { "epoch": 0.7339312910333627, "grad_norm": 0.73828125, "learning_rate": 0.00012318617280534345, "loss": 0.7677, "step": 28583 }, { "epoch": 0.7339569682292845, "grad_norm": 0.7734375, "learning_rate": 0.00012318183023561023, "loss": 0.8471, "step": 28584 }, { "epoch": 0.7339826454252063, "grad_norm": 0.703125, "learning_rate": 0.00012317748761967774, "loss": 0.8733, "step": 28585 }, { "epoch": 0.7340083226211281, "grad_norm": 0.84765625, "learning_rate": 0.00012317314495755455, "loss": 0.8597, "step": 28586 }, { "epoch": 0.73403399981705, "grad_norm": 0.74609375, "learning_rate": 0.00012316880224924942, "loss": 0.7913, "step": 28587 }, { "epoch": 0.7340596770129718, "grad_norm": 0.80078125, "learning_rate": 0.00012316445949477097, "loss": 0.8784, "step": 28588 }, { "epoch": 0.7340853542088936, "grad_norm": 0.80078125, "learning_rate": 0.0001231601166941278, "loss": 0.9384, "step": 28589 }, { "epoch": 0.7341110314048155, "grad_norm": 0.78515625, "learning_rate": 0.00012315577384732865, "loss": 0.8394, "step": 28590 }, { "epoch": 0.7341367086007372, "grad_norm": 0.80078125, "learning_rate": 0.00012315143095438213, "loss": 0.7851, "step": 28591 }, { "epoch": 0.734162385796659, "grad_norm": 0.69921875, "learning_rate": 0.0001231470880152969, "loss": 0.8798, "step": 28592 }, { "epoch": 0.7341880629925809, "grad_norm": 0.828125, "learning_rate": 0.0001231427450300816, "loss": 0.8658, "step": 28593 }, { "epoch": 0.7342137401885027, "grad_norm": 0.734375, "learning_rate": 0.00012313840199874488, "loss": 0.742, "step": 28594 }, { "epoch": 0.7342394173844246, "grad_norm": 0.77734375, "learning_rate": 0.00012313405892129545, "loss": 0.7428, "step": 28595 }, { "epoch": 0.7342650945803464, "grad_norm": 0.78515625, "learning_rate": 0.00012312971579774194, "loss": 0.8189, "step": 28596 }, { "epoch": 0.7342907717762682, "grad_norm": 0.81640625, "learning_rate": 0.00012312537262809298, "loss": 0.8323, "step": 28597 }, { "epoch": 0.73431644897219, "grad_norm": 0.7578125, "learning_rate": 0.00012312102941235722, "loss": 0.8311, "step": 28598 }, { "epoch": 0.7343421261681118, "grad_norm": 0.7421875, "learning_rate": 0.00012311668615054336, "loss": 0.8343, "step": 28599 }, { "epoch": 0.7343678033640336, "grad_norm": 0.85546875, "learning_rate": 0.00012311234284266003, "loss": 0.8837, "step": 28600 }, { "epoch": 0.7343934805599555, "grad_norm": 0.76953125, "learning_rate": 0.00012310799948871587, "loss": 0.7279, "step": 28601 }, { "epoch": 0.7344191577558773, "grad_norm": 0.7421875, "learning_rate": 0.00012310365608871957, "loss": 0.629, "step": 28602 }, { "epoch": 0.7344448349517991, "grad_norm": 0.765625, "learning_rate": 0.0001230993126426798, "loss": 0.81, "step": 28603 }, { "epoch": 0.7344705121477209, "grad_norm": 0.82421875, "learning_rate": 0.00012309496915060514, "loss": 0.8832, "step": 28604 }, { "epoch": 0.7344961893436427, "grad_norm": 0.77734375, "learning_rate": 0.0001230906256125043, "loss": 0.7754, "step": 28605 }, { "epoch": 0.7345218665395645, "grad_norm": 0.9140625, "learning_rate": 0.00012308628202838595, "loss": 0.8616, "step": 28606 }, { "epoch": 0.7345475437354864, "grad_norm": 0.88671875, "learning_rate": 0.0001230819383982587, "loss": 0.899, "step": 28607 }, { "epoch": 0.7345732209314082, "grad_norm": 0.6953125, "learning_rate": 0.00012307759472213124, "loss": 0.7731, "step": 28608 }, { "epoch": 0.73459889812733, "grad_norm": 0.921875, "learning_rate": 0.00012307325100001224, "loss": 0.8377, "step": 28609 }, { "epoch": 0.7346245753232519, "grad_norm": 0.7890625, "learning_rate": 0.0001230689072319103, "loss": 0.8515, "step": 28610 }, { "epoch": 0.7346502525191736, "grad_norm": 0.7578125, "learning_rate": 0.00012306456341783416, "loss": 0.8862, "step": 28611 }, { "epoch": 0.7346759297150954, "grad_norm": 0.74609375, "learning_rate": 0.0001230602195577924, "loss": 0.8165, "step": 28612 }, { "epoch": 0.7347016069110173, "grad_norm": 0.75390625, "learning_rate": 0.00012305587565179368, "loss": 0.8524, "step": 28613 }, { "epoch": 0.7347272841069391, "grad_norm": 0.81640625, "learning_rate": 0.00012305153169984672, "loss": 0.935, "step": 28614 }, { "epoch": 0.7347529613028609, "grad_norm": 0.73046875, "learning_rate": 0.00012304718770196014, "loss": 0.8556, "step": 28615 }, { "epoch": 0.7347786384987828, "grad_norm": 0.80078125, "learning_rate": 0.00012304284365814255, "loss": 0.876, "step": 28616 }, { "epoch": 0.7348043156947046, "grad_norm": 0.73828125, "learning_rate": 0.00012303849956840271, "loss": 0.8092, "step": 28617 }, { "epoch": 0.7348299928906263, "grad_norm": 0.8046875, "learning_rate": 0.0001230341554327492, "loss": 0.8881, "step": 28618 }, { "epoch": 0.7348556700865482, "grad_norm": 0.80078125, "learning_rate": 0.00012302981125119069, "loss": 0.7499, "step": 28619 }, { "epoch": 0.73488134728247, "grad_norm": 0.8125, "learning_rate": 0.00012302546702373584, "loss": 0.7853, "step": 28620 }, { "epoch": 0.7349070244783918, "grad_norm": 0.796875, "learning_rate": 0.00012302112275039333, "loss": 0.9886, "step": 28621 }, { "epoch": 0.7349327016743137, "grad_norm": 0.76171875, "learning_rate": 0.0001230167784311718, "loss": 0.8245, "step": 28622 }, { "epoch": 0.7349583788702355, "grad_norm": 0.69140625, "learning_rate": 0.0001230124340660799, "loss": 0.853, "step": 28623 }, { "epoch": 0.7349840560661572, "grad_norm": 0.75390625, "learning_rate": 0.00012300808965512634, "loss": 0.7133, "step": 28624 }, { "epoch": 0.7350097332620791, "grad_norm": 0.75, "learning_rate": 0.0001230037451983197, "loss": 0.8906, "step": 28625 }, { "epoch": 0.7350354104580009, "grad_norm": 0.6953125, "learning_rate": 0.00012299940069566867, "loss": 0.6775, "step": 28626 }, { "epoch": 0.7350610876539228, "grad_norm": 0.73046875, "learning_rate": 0.0001229950561471819, "loss": 0.8446, "step": 28627 }, { "epoch": 0.7350867648498446, "grad_norm": 0.9140625, "learning_rate": 0.0001229907115528681, "loss": 0.8912, "step": 28628 }, { "epoch": 0.7351124420457664, "grad_norm": 0.765625, "learning_rate": 0.00012298636691273585, "loss": 0.8331, "step": 28629 }, { "epoch": 0.7351381192416883, "grad_norm": 0.765625, "learning_rate": 0.0001229820222267939, "loss": 0.8824, "step": 28630 }, { "epoch": 0.73516379643761, "grad_norm": 0.765625, "learning_rate": 0.0001229776774950508, "loss": 0.8227, "step": 28631 }, { "epoch": 0.7351894736335318, "grad_norm": 0.7578125, "learning_rate": 0.00012297333271751529, "loss": 0.7821, "step": 28632 }, { "epoch": 0.7352151508294537, "grad_norm": 0.84375, "learning_rate": 0.000122968987894196, "loss": 0.889, "step": 28633 }, { "epoch": 0.7352408280253755, "grad_norm": 0.703125, "learning_rate": 0.00012296464302510155, "loss": 0.8284, "step": 28634 }, { "epoch": 0.7352665052212973, "grad_norm": 0.8203125, "learning_rate": 0.0001229602981102407, "loss": 0.805, "step": 28635 }, { "epoch": 0.7352921824172192, "grad_norm": 0.71484375, "learning_rate": 0.00012295595314962203, "loss": 0.7779, "step": 28636 }, { "epoch": 0.735317859613141, "grad_norm": 0.86328125, "learning_rate": 0.00012295160814325423, "loss": 0.8211, "step": 28637 }, { "epoch": 0.7353435368090627, "grad_norm": 0.73046875, "learning_rate": 0.00012294726309114594, "loss": 0.8558, "step": 28638 }, { "epoch": 0.7353692140049846, "grad_norm": 0.82421875, "learning_rate": 0.00012294291799330582, "loss": 0.8403, "step": 28639 }, { "epoch": 0.7353948912009064, "grad_norm": 0.68359375, "learning_rate": 0.00012293857284974253, "loss": 0.6835, "step": 28640 }, { "epoch": 0.7354205683968282, "grad_norm": 0.8046875, "learning_rate": 0.00012293422766046475, "loss": 0.885, "step": 28641 }, { "epoch": 0.7354462455927501, "grad_norm": 0.78515625, "learning_rate": 0.0001229298824254811, "loss": 0.8212, "step": 28642 }, { "epoch": 0.7354719227886719, "grad_norm": 0.72265625, "learning_rate": 0.00012292553714480034, "loss": 0.9199, "step": 28643 }, { "epoch": 0.7354975999845936, "grad_norm": 0.7421875, "learning_rate": 0.000122921191818431, "loss": 0.9445, "step": 28644 }, { "epoch": 0.7355232771805155, "grad_norm": 0.8203125, "learning_rate": 0.00012291684644638178, "loss": 0.7915, "step": 28645 }, { "epoch": 0.7355489543764373, "grad_norm": 0.7109375, "learning_rate": 0.00012291250102866138, "loss": 0.726, "step": 28646 }, { "epoch": 0.7355746315723591, "grad_norm": 0.82421875, "learning_rate": 0.00012290815556527843, "loss": 1.0016, "step": 28647 }, { "epoch": 0.735600308768281, "grad_norm": 0.890625, "learning_rate": 0.0001229038100562416, "loss": 0.8886, "step": 28648 }, { "epoch": 0.7356259859642028, "grad_norm": 0.78515625, "learning_rate": 0.00012289946450155955, "loss": 0.7883, "step": 28649 }, { "epoch": 0.7356516631601246, "grad_norm": 0.8046875, "learning_rate": 0.0001228951189012409, "loss": 0.8418, "step": 28650 }, { "epoch": 0.7356773403560464, "grad_norm": 0.83984375, "learning_rate": 0.0001228907732552944, "loss": 0.9194, "step": 28651 }, { "epoch": 0.7357030175519682, "grad_norm": 0.80078125, "learning_rate": 0.00012288642756372862, "loss": 0.8334, "step": 28652 }, { "epoch": 0.73572869474789, "grad_norm": 0.8046875, "learning_rate": 0.00012288208182655225, "loss": 0.8124, "step": 28653 }, { "epoch": 0.7357543719438119, "grad_norm": 0.76953125, "learning_rate": 0.00012287773604377397, "loss": 0.8188, "step": 28654 }, { "epoch": 0.7357800491397337, "grad_norm": 0.74609375, "learning_rate": 0.00012287339021540246, "loss": 0.8521, "step": 28655 }, { "epoch": 0.7358057263356556, "grad_norm": 0.72265625, "learning_rate": 0.00012286904434144632, "loss": 0.8719, "step": 28656 }, { "epoch": 0.7358314035315774, "grad_norm": 0.7421875, "learning_rate": 0.00012286469842191425, "loss": 0.9548, "step": 28657 }, { "epoch": 0.7358570807274991, "grad_norm": 0.84375, "learning_rate": 0.00012286035245681487, "loss": 0.9268, "step": 28658 }, { "epoch": 0.735882757923421, "grad_norm": 0.8125, "learning_rate": 0.0001228560064461569, "loss": 0.7973, "step": 28659 }, { "epoch": 0.7359084351193428, "grad_norm": 0.76171875, "learning_rate": 0.00012285166038994896, "loss": 0.779, "step": 28660 }, { "epoch": 0.7359341123152646, "grad_norm": 0.76953125, "learning_rate": 0.00012284731428819972, "loss": 0.8067, "step": 28661 }, { "epoch": 0.7359597895111865, "grad_norm": 0.703125, "learning_rate": 0.0001228429681409179, "loss": 0.7257, "step": 28662 }, { "epoch": 0.7359854667071083, "grad_norm": 0.8046875, "learning_rate": 0.00012283862194811208, "loss": 0.8866, "step": 28663 }, { "epoch": 0.73601114390303, "grad_norm": 0.73046875, "learning_rate": 0.0001228342757097909, "loss": 0.8119, "step": 28664 }, { "epoch": 0.7360368210989519, "grad_norm": 0.765625, "learning_rate": 0.00012282992942596314, "loss": 0.7728, "step": 28665 }, { "epoch": 0.7360624982948737, "grad_norm": 0.76171875, "learning_rate": 0.00012282558309663734, "loss": 0.709, "step": 28666 }, { "epoch": 0.7360881754907955, "grad_norm": 0.76171875, "learning_rate": 0.00012282123672182225, "loss": 0.8424, "step": 28667 }, { "epoch": 0.7361138526867174, "grad_norm": 0.7109375, "learning_rate": 0.00012281689030152647, "loss": 0.8919, "step": 28668 }, { "epoch": 0.7361395298826392, "grad_norm": 0.8359375, "learning_rate": 0.00012281254383575874, "loss": 0.8582, "step": 28669 }, { "epoch": 0.736165207078561, "grad_norm": 0.78515625, "learning_rate": 0.00012280819732452762, "loss": 0.8167, "step": 28670 }, { "epoch": 0.7361908842744828, "grad_norm": 0.75390625, "learning_rate": 0.00012280385076784183, "loss": 0.803, "step": 28671 }, { "epoch": 0.7362165614704046, "grad_norm": 0.73046875, "learning_rate": 0.00012279950416571, "loss": 0.7945, "step": 28672 }, { "epoch": 0.7362422386663264, "grad_norm": 0.8203125, "learning_rate": 0.00012279515751814087, "loss": 0.909, "step": 28673 }, { "epoch": 0.7362679158622483, "grad_norm": 0.78125, "learning_rate": 0.00012279081082514304, "loss": 0.6981, "step": 28674 }, { "epoch": 0.7362935930581701, "grad_norm": 0.73046875, "learning_rate": 0.00012278646408672516, "loss": 0.7686, "step": 28675 }, { "epoch": 0.7363192702540919, "grad_norm": 0.79296875, "learning_rate": 0.00012278211730289593, "loss": 0.8907, "step": 28676 }, { "epoch": 0.7363449474500138, "grad_norm": 0.7578125, "learning_rate": 0.000122777770473664, "loss": 0.7875, "step": 28677 }, { "epoch": 0.7363706246459355, "grad_norm": 0.7578125, "learning_rate": 0.00012277342359903805, "loss": 0.8017, "step": 28678 }, { "epoch": 0.7363963018418573, "grad_norm": 0.734375, "learning_rate": 0.00012276907667902668, "loss": 0.8161, "step": 28679 }, { "epoch": 0.7364219790377792, "grad_norm": 0.78515625, "learning_rate": 0.00012276472971363861, "loss": 0.8499, "step": 28680 }, { "epoch": 0.736447656233701, "grad_norm": 0.78125, "learning_rate": 0.0001227603827028825, "loss": 0.8481, "step": 28681 }, { "epoch": 0.7364733334296228, "grad_norm": 0.72265625, "learning_rate": 0.00012275603564676702, "loss": 0.8739, "step": 28682 }, { "epoch": 0.7364990106255447, "grad_norm": 0.71875, "learning_rate": 0.0001227516885453008, "loss": 0.8566, "step": 28683 }, { "epoch": 0.7365246878214664, "grad_norm": 0.70703125, "learning_rate": 0.0001227473413984925, "loss": 0.6273, "step": 28684 }, { "epoch": 0.7365503650173882, "grad_norm": 0.734375, "learning_rate": 0.00012274299420635083, "loss": 0.7224, "step": 28685 }, { "epoch": 0.7365760422133101, "grad_norm": 0.76953125, "learning_rate": 0.00012273864696888442, "loss": 0.9003, "step": 28686 }, { "epoch": 0.7366017194092319, "grad_norm": 0.7734375, "learning_rate": 0.00012273429968610194, "loss": 0.7783, "step": 28687 }, { "epoch": 0.7366273966051538, "grad_norm": 0.81640625, "learning_rate": 0.00012272995235801203, "loss": 0.846, "step": 28688 }, { "epoch": 0.7366530738010756, "grad_norm": 0.74609375, "learning_rate": 0.00012272560498462345, "loss": 0.8052, "step": 28689 }, { "epoch": 0.7366787509969974, "grad_norm": 0.76171875, "learning_rate": 0.0001227212575659447, "loss": 0.7892, "step": 28690 }, { "epoch": 0.7367044281929191, "grad_norm": 0.8046875, "learning_rate": 0.0001227169101019846, "loss": 0.7345, "step": 28691 }, { "epoch": 0.736730105388841, "grad_norm": 0.77734375, "learning_rate": 0.00012271256259275174, "loss": 0.8335, "step": 28692 }, { "epoch": 0.7367557825847628, "grad_norm": 0.80078125, "learning_rate": 0.00012270821503825477, "loss": 1.0742, "step": 28693 }, { "epoch": 0.7367814597806847, "grad_norm": 0.84765625, "learning_rate": 0.00012270386743850237, "loss": 0.8119, "step": 28694 }, { "epoch": 0.7368071369766065, "grad_norm": 0.79296875, "learning_rate": 0.00012269951979350326, "loss": 0.8761, "step": 28695 }, { "epoch": 0.7368328141725283, "grad_norm": 0.77734375, "learning_rate": 0.00012269517210326602, "loss": 0.8426, "step": 28696 }, { "epoch": 0.7368584913684502, "grad_norm": 0.8203125, "learning_rate": 0.00012269082436779937, "loss": 0.8702, "step": 28697 }, { "epoch": 0.7368841685643719, "grad_norm": 0.79296875, "learning_rate": 0.00012268647658711195, "loss": 0.7217, "step": 28698 }, { "epoch": 0.7369098457602937, "grad_norm": 0.76171875, "learning_rate": 0.00012268212876121243, "loss": 0.9575, "step": 28699 }, { "epoch": 0.7369355229562156, "grad_norm": 0.75, "learning_rate": 0.00012267778089010948, "loss": 0.7862, "step": 28700 }, { "epoch": 0.7369612001521374, "grad_norm": 0.8046875, "learning_rate": 0.00012267343297381176, "loss": 0.8532, "step": 28701 }, { "epoch": 0.7369868773480592, "grad_norm": 0.7421875, "learning_rate": 0.00012266908501232793, "loss": 0.8453, "step": 28702 }, { "epoch": 0.7370125545439811, "grad_norm": 0.75390625, "learning_rate": 0.00012266473700566668, "loss": 0.7468, "step": 28703 }, { "epoch": 0.7370382317399028, "grad_norm": 0.73828125, "learning_rate": 0.00012266038895383664, "loss": 0.848, "step": 28704 }, { "epoch": 0.7370639089358246, "grad_norm": 0.75390625, "learning_rate": 0.0001226560408568465, "loss": 0.8349, "step": 28705 }, { "epoch": 0.7370895861317465, "grad_norm": 0.75, "learning_rate": 0.0001226516927147049, "loss": 0.7726, "step": 28706 }, { "epoch": 0.7371152633276683, "grad_norm": 0.86328125, "learning_rate": 0.00012264734452742054, "loss": 0.7621, "step": 28707 }, { "epoch": 0.7371409405235901, "grad_norm": 0.85546875, "learning_rate": 0.00012264299629500207, "loss": 0.8226, "step": 28708 }, { "epoch": 0.737166617719512, "grad_norm": 0.734375, "learning_rate": 0.00012263864801745813, "loss": 0.8664, "step": 28709 }, { "epoch": 0.7371922949154338, "grad_norm": 0.84375, "learning_rate": 0.00012263429969479745, "loss": 0.8619, "step": 28710 }, { "epoch": 0.7372179721113555, "grad_norm": 0.79296875, "learning_rate": 0.00012262995132702862, "loss": 0.8417, "step": 28711 }, { "epoch": 0.7372436493072774, "grad_norm": 0.73828125, "learning_rate": 0.00012262560291416037, "loss": 0.6849, "step": 28712 }, { "epoch": 0.7372693265031992, "grad_norm": 0.78125, "learning_rate": 0.0001226212544562013, "loss": 0.8563, "step": 28713 }, { "epoch": 0.737295003699121, "grad_norm": 0.70703125, "learning_rate": 0.00012261690595316013, "loss": 0.8822, "step": 28714 }, { "epoch": 0.7373206808950429, "grad_norm": 0.71875, "learning_rate": 0.00012261255740504554, "loss": 0.7, "step": 28715 }, { "epoch": 0.7373463580909647, "grad_norm": 0.84765625, "learning_rate": 0.00012260820881186612, "loss": 0.9837, "step": 28716 }, { "epoch": 0.7373720352868864, "grad_norm": 0.77734375, "learning_rate": 0.00012260386017363058, "loss": 0.8377, "step": 28717 }, { "epoch": 0.7373977124828083, "grad_norm": 0.80078125, "learning_rate": 0.00012259951149034762, "loss": 0.8742, "step": 28718 }, { "epoch": 0.7374233896787301, "grad_norm": 0.80859375, "learning_rate": 0.00012259516276202586, "loss": 0.7644, "step": 28719 }, { "epoch": 0.737449066874652, "grad_norm": 0.81640625, "learning_rate": 0.000122590813988674, "loss": 0.7443, "step": 28720 }, { "epoch": 0.7374747440705738, "grad_norm": 0.78515625, "learning_rate": 0.00012258646517030068, "loss": 0.7887, "step": 28721 }, { "epoch": 0.7375004212664956, "grad_norm": 0.77734375, "learning_rate": 0.00012258211630691455, "loss": 0.9203, "step": 28722 }, { "epoch": 0.7375260984624175, "grad_norm": 0.73046875, "learning_rate": 0.00012257776739852433, "loss": 0.817, "step": 28723 }, { "epoch": 0.7375517756583392, "grad_norm": 0.7890625, "learning_rate": 0.00012257341844513863, "loss": 0.7913, "step": 28724 }, { "epoch": 0.737577452854261, "grad_norm": 0.76953125, "learning_rate": 0.00012256906944676618, "loss": 0.858, "step": 28725 }, { "epoch": 0.7376031300501829, "grad_norm": 0.94140625, "learning_rate": 0.00012256472040341563, "loss": 0.8486, "step": 28726 }, { "epoch": 0.7376288072461047, "grad_norm": 0.84765625, "learning_rate": 0.00012256037131509558, "loss": 0.8906, "step": 28727 }, { "epoch": 0.7376544844420265, "grad_norm": 0.77734375, "learning_rate": 0.00012255602218181477, "loss": 0.7984, "step": 28728 }, { "epoch": 0.7376801616379484, "grad_norm": 0.83203125, "learning_rate": 0.00012255167300358186, "loss": 0.7542, "step": 28729 }, { "epoch": 0.7377058388338702, "grad_norm": 0.859375, "learning_rate": 0.00012254732378040549, "loss": 0.9218, "step": 28730 }, { "epoch": 0.7377315160297919, "grad_norm": 0.7578125, "learning_rate": 0.0001225429745122943, "loss": 0.6525, "step": 28731 }, { "epoch": 0.7377571932257138, "grad_norm": 0.8125, "learning_rate": 0.00012253862519925705, "loss": 0.9186, "step": 28732 }, { "epoch": 0.7377828704216356, "grad_norm": 0.78515625, "learning_rate": 0.00012253427584130232, "loss": 0.8494, "step": 28733 }, { "epoch": 0.7378085476175574, "grad_norm": 0.73828125, "learning_rate": 0.00012252992643843884, "loss": 0.837, "step": 28734 }, { "epoch": 0.7378342248134793, "grad_norm": 0.83984375, "learning_rate": 0.00012252557699067527, "loss": 0.7736, "step": 28735 }, { "epoch": 0.7378599020094011, "grad_norm": 0.80078125, "learning_rate": 0.0001225212274980202, "loss": 0.8226, "step": 28736 }, { "epoch": 0.7378855792053228, "grad_norm": 0.6796875, "learning_rate": 0.00012251687796048242, "loss": 0.6984, "step": 28737 }, { "epoch": 0.7379112564012447, "grad_norm": 0.69140625, "learning_rate": 0.0001225125283780705, "loss": 0.7244, "step": 28738 }, { "epoch": 0.7379369335971665, "grad_norm": 0.7421875, "learning_rate": 0.00012250817875079318, "loss": 0.8455, "step": 28739 }, { "epoch": 0.7379626107930883, "grad_norm": 0.7421875, "learning_rate": 0.00012250382907865904, "loss": 0.668, "step": 28740 }, { "epoch": 0.7379882879890102, "grad_norm": 0.77734375, "learning_rate": 0.00012249947936167683, "loss": 0.952, "step": 28741 }, { "epoch": 0.738013965184932, "grad_norm": 0.7734375, "learning_rate": 0.0001224951295998552, "loss": 0.7549, "step": 28742 }, { "epoch": 0.7380396423808538, "grad_norm": 0.78125, "learning_rate": 0.00012249077979320278, "loss": 0.7716, "step": 28743 }, { "epoch": 0.7380653195767756, "grad_norm": 0.7734375, "learning_rate": 0.0001224864299417283, "loss": 0.7063, "step": 28744 }, { "epoch": 0.7380909967726974, "grad_norm": 0.8203125, "learning_rate": 0.0001224820800454404, "loss": 0.9054, "step": 28745 }, { "epoch": 0.7381166739686192, "grad_norm": 0.828125, "learning_rate": 0.00012247773010434768, "loss": 0.8301, "step": 28746 }, { "epoch": 0.7381423511645411, "grad_norm": 0.7890625, "learning_rate": 0.00012247338011845896, "loss": 0.8025, "step": 28747 }, { "epoch": 0.7381680283604629, "grad_norm": 0.74609375, "learning_rate": 0.00012246903008778277, "loss": 0.7626, "step": 28748 }, { "epoch": 0.7381937055563847, "grad_norm": 0.7734375, "learning_rate": 0.00012246468001232786, "loss": 0.8813, "step": 28749 }, { "epoch": 0.7382193827523066, "grad_norm": 0.78515625, "learning_rate": 0.00012246032989210286, "loss": 0.77, "step": 28750 }, { "epoch": 0.7382450599482283, "grad_norm": 0.796875, "learning_rate": 0.00012245597972711643, "loss": 0.7953, "step": 28751 }, { "epoch": 0.7382707371441501, "grad_norm": 0.76171875, "learning_rate": 0.00012245162951737728, "loss": 0.8236, "step": 28752 }, { "epoch": 0.738296414340072, "grad_norm": 0.7578125, "learning_rate": 0.0001224472792628941, "loss": 0.8208, "step": 28753 }, { "epoch": 0.7383220915359938, "grad_norm": 0.78515625, "learning_rate": 0.00012244292896367544, "loss": 0.7966, "step": 28754 }, { "epoch": 0.7383477687319157, "grad_norm": 0.734375, "learning_rate": 0.00012243857861973011, "loss": 0.865, "step": 28755 }, { "epoch": 0.7383734459278375, "grad_norm": 0.7734375, "learning_rate": 0.00012243422823106668, "loss": 0.854, "step": 28756 }, { "epoch": 0.7383991231237592, "grad_norm": 0.76171875, "learning_rate": 0.00012242987779769387, "loss": 0.8954, "step": 28757 }, { "epoch": 0.738424800319681, "grad_norm": 0.7421875, "learning_rate": 0.00012242552731962038, "loss": 0.8258, "step": 28758 }, { "epoch": 0.7384504775156029, "grad_norm": 0.77734375, "learning_rate": 0.0001224211767968548, "loss": 0.7394, "step": 28759 }, { "epoch": 0.7384761547115247, "grad_norm": 0.80078125, "learning_rate": 0.00012241682622940586, "loss": 0.7682, "step": 28760 }, { "epoch": 0.7385018319074466, "grad_norm": 0.74609375, "learning_rate": 0.0001224124756172822, "loss": 0.6467, "step": 28761 }, { "epoch": 0.7385275091033684, "grad_norm": 0.7109375, "learning_rate": 0.00012240812496049249, "loss": 0.7645, "step": 28762 }, { "epoch": 0.7385531862992902, "grad_norm": 0.81640625, "learning_rate": 0.00012240377425904543, "loss": 0.8281, "step": 28763 }, { "epoch": 0.738578863495212, "grad_norm": 0.796875, "learning_rate": 0.00012239942351294965, "loss": 0.8085, "step": 28764 }, { "epoch": 0.7386045406911338, "grad_norm": 0.75, "learning_rate": 0.00012239507272221385, "loss": 0.8461, "step": 28765 }, { "epoch": 0.7386302178870556, "grad_norm": 0.765625, "learning_rate": 0.0001223907218868467, "loss": 0.8951, "step": 28766 }, { "epoch": 0.7386558950829775, "grad_norm": 0.76953125, "learning_rate": 0.00012238637100685687, "loss": 0.8135, "step": 28767 }, { "epoch": 0.7386815722788993, "grad_norm": 0.76171875, "learning_rate": 0.00012238202008225303, "loss": 0.7616, "step": 28768 }, { "epoch": 0.7387072494748211, "grad_norm": 0.765625, "learning_rate": 0.00012237766911304382, "loss": 0.8703, "step": 28769 }, { "epoch": 0.738732926670743, "grad_norm": 0.82421875, "learning_rate": 0.00012237331809923795, "loss": 0.8327, "step": 28770 }, { "epoch": 0.7387586038666647, "grad_norm": 0.73828125, "learning_rate": 0.00012236896704084408, "loss": 0.691, "step": 28771 }, { "epoch": 0.7387842810625865, "grad_norm": 0.71484375, "learning_rate": 0.00012236461593787085, "loss": 0.7707, "step": 28772 }, { "epoch": 0.7388099582585084, "grad_norm": 0.8046875, "learning_rate": 0.000122360264790327, "loss": 0.8298, "step": 28773 }, { "epoch": 0.7388356354544302, "grad_norm": 0.77734375, "learning_rate": 0.00012235591359822118, "loss": 0.9096, "step": 28774 }, { "epoch": 0.738861312650352, "grad_norm": 1.0078125, "learning_rate": 0.000122351562361562, "loss": 0.8051, "step": 28775 }, { "epoch": 0.7388869898462739, "grad_norm": 0.76171875, "learning_rate": 0.0001223472110803582, "loss": 0.8421, "step": 28776 }, { "epoch": 0.7389126670421956, "grad_norm": 0.78515625, "learning_rate": 0.0001223428597546184, "loss": 0.8118, "step": 28777 }, { "epoch": 0.7389383442381174, "grad_norm": 0.77734375, "learning_rate": 0.0001223385083843513, "loss": 0.8607, "step": 28778 }, { "epoch": 0.7389640214340393, "grad_norm": 0.8046875, "learning_rate": 0.0001223341569695656, "loss": 0.8671, "step": 28779 }, { "epoch": 0.7389896986299611, "grad_norm": 0.7578125, "learning_rate": 0.00012232980551026994, "loss": 0.879, "step": 28780 }, { "epoch": 0.739015375825883, "grad_norm": 0.765625, "learning_rate": 0.00012232545400647297, "loss": 0.7722, "step": 28781 }, { "epoch": 0.7390410530218048, "grad_norm": 0.8203125, "learning_rate": 0.0001223211024581834, "loss": 0.7967, "step": 28782 }, { "epoch": 0.7390667302177266, "grad_norm": 0.79296875, "learning_rate": 0.0001223167508654099, "loss": 0.7549, "step": 28783 }, { "epoch": 0.7390924074136483, "grad_norm": 0.703125, "learning_rate": 0.0001223123992281611, "loss": 0.7814, "step": 28784 }, { "epoch": 0.7391180846095702, "grad_norm": 0.78515625, "learning_rate": 0.00012230804754644574, "loss": 0.81, "step": 28785 }, { "epoch": 0.739143761805492, "grad_norm": 0.7265625, "learning_rate": 0.00012230369582027242, "loss": 0.9171, "step": 28786 }, { "epoch": 0.7391694390014139, "grad_norm": 0.703125, "learning_rate": 0.00012229934404964986, "loss": 0.7822, "step": 28787 }, { "epoch": 0.7391951161973357, "grad_norm": 0.77734375, "learning_rate": 0.00012229499223458674, "loss": 0.6845, "step": 28788 }, { "epoch": 0.7392207933932575, "grad_norm": 0.76171875, "learning_rate": 0.0001222906403750917, "loss": 0.756, "step": 28789 }, { "epoch": 0.7392464705891794, "grad_norm": 0.93359375, "learning_rate": 0.00012228628847117345, "loss": 0.7746, "step": 28790 }, { "epoch": 0.7392721477851011, "grad_norm": 0.76953125, "learning_rate": 0.0001222819365228406, "loss": 0.7845, "step": 28791 }, { "epoch": 0.7392978249810229, "grad_norm": 0.7890625, "learning_rate": 0.0001222775845301019, "loss": 0.8828, "step": 28792 }, { "epoch": 0.7393235021769448, "grad_norm": 0.74609375, "learning_rate": 0.00012227323249296596, "loss": 0.805, "step": 28793 }, { "epoch": 0.7393491793728666, "grad_norm": 0.76953125, "learning_rate": 0.0001222688804114415, "loss": 0.9301, "step": 28794 }, { "epoch": 0.7393748565687884, "grad_norm": 0.828125, "learning_rate": 0.00012226452828553716, "loss": 0.7108, "step": 28795 }, { "epoch": 0.7394005337647103, "grad_norm": 0.7734375, "learning_rate": 0.00012226017611526163, "loss": 0.7634, "step": 28796 }, { "epoch": 0.739426210960632, "grad_norm": 0.75, "learning_rate": 0.00012225582390062358, "loss": 0.8014, "step": 28797 }, { "epoch": 0.7394518881565538, "grad_norm": 0.76171875, "learning_rate": 0.00012225147164163168, "loss": 0.8239, "step": 28798 }, { "epoch": 0.7394775653524757, "grad_norm": 0.86328125, "learning_rate": 0.00012224711933829461, "loss": 0.8077, "step": 28799 }, { "epoch": 0.7395032425483975, "grad_norm": 0.765625, "learning_rate": 0.00012224276699062103, "loss": 0.7615, "step": 28800 }, { "epoch": 0.7395289197443193, "grad_norm": 0.76953125, "learning_rate": 0.00012223841459861964, "loss": 0.8484, "step": 28801 }, { "epoch": 0.7395545969402412, "grad_norm": 0.76953125, "learning_rate": 0.0001222340621622991, "loss": 0.74, "step": 28802 }, { "epoch": 0.739580274136163, "grad_norm": 0.9140625, "learning_rate": 0.0001222297096816681, "loss": 0.8486, "step": 28803 }, { "epoch": 0.7396059513320847, "grad_norm": 0.77734375, "learning_rate": 0.00012222535715673523, "loss": 0.7482, "step": 28804 }, { "epoch": 0.7396316285280066, "grad_norm": 0.7265625, "learning_rate": 0.0001222210045875093, "loss": 0.8597, "step": 28805 }, { "epoch": 0.7396573057239284, "grad_norm": 0.8125, "learning_rate": 0.00012221665197399887, "loss": 0.8808, "step": 28806 }, { "epoch": 0.7396829829198502, "grad_norm": 0.7421875, "learning_rate": 0.0001222122993162127, "loss": 0.8302, "step": 28807 }, { "epoch": 0.7397086601157721, "grad_norm": 0.72265625, "learning_rate": 0.0001222079466141594, "loss": 0.859, "step": 28808 }, { "epoch": 0.7397343373116939, "grad_norm": 0.7890625, "learning_rate": 0.00012220359386784765, "loss": 0.7504, "step": 28809 }, { "epoch": 0.7397600145076157, "grad_norm": 0.80078125, "learning_rate": 0.00012219924107728617, "loss": 0.7597, "step": 28810 }, { "epoch": 0.7397856917035375, "grad_norm": 0.79296875, "learning_rate": 0.0001221948882424836, "loss": 0.8605, "step": 28811 }, { "epoch": 0.7398113688994593, "grad_norm": 0.8203125, "learning_rate": 0.00012219053536344863, "loss": 0.7982, "step": 28812 }, { "epoch": 0.7398370460953811, "grad_norm": 0.84375, "learning_rate": 0.00012218618244018992, "loss": 0.9032, "step": 28813 }, { "epoch": 0.739862723291303, "grad_norm": 0.8125, "learning_rate": 0.00012218182947271617, "loss": 0.8841, "step": 28814 }, { "epoch": 0.7398884004872248, "grad_norm": 0.79296875, "learning_rate": 0.00012217747646103602, "loss": 0.8266, "step": 28815 }, { "epoch": 0.7399140776831467, "grad_norm": 0.81640625, "learning_rate": 0.00012217312340515816, "loss": 0.9114, "step": 28816 }, { "epoch": 0.7399397548790684, "grad_norm": 0.7890625, "learning_rate": 0.00012216877030509128, "loss": 0.8877, "step": 28817 }, { "epoch": 0.7399654320749902, "grad_norm": 0.83203125, "learning_rate": 0.00012216441716084403, "loss": 0.6915, "step": 28818 }, { "epoch": 0.739991109270912, "grad_norm": 0.7734375, "learning_rate": 0.0001221600639724251, "loss": 0.8115, "step": 28819 }, { "epoch": 0.7400167864668339, "grad_norm": 0.91015625, "learning_rate": 0.00012215571073984319, "loss": 0.8417, "step": 28820 }, { "epoch": 0.7400424636627557, "grad_norm": 1.1328125, "learning_rate": 0.00012215135746310692, "loss": 0.91, "step": 28821 }, { "epoch": 0.7400681408586776, "grad_norm": 0.83203125, "learning_rate": 0.000122147004142225, "loss": 0.7721, "step": 28822 }, { "epoch": 0.7400938180545994, "grad_norm": 0.8359375, "learning_rate": 0.00012214265077720607, "loss": 0.8709, "step": 28823 }, { "epoch": 0.7401194952505211, "grad_norm": 0.82421875, "learning_rate": 0.00012213829736805888, "loss": 0.7371, "step": 28824 }, { "epoch": 0.740145172446443, "grad_norm": 0.76171875, "learning_rate": 0.00012213394391479208, "loss": 0.8729, "step": 28825 }, { "epoch": 0.7401708496423648, "grad_norm": 0.77734375, "learning_rate": 0.00012212959041741427, "loss": 0.8311, "step": 28826 }, { "epoch": 0.7401965268382866, "grad_norm": 0.7578125, "learning_rate": 0.00012212523687593423, "loss": 0.7867, "step": 28827 }, { "epoch": 0.7402222040342085, "grad_norm": 0.8671875, "learning_rate": 0.0001221208832903606, "loss": 0.7449, "step": 28828 }, { "epoch": 0.7402478812301303, "grad_norm": 0.76953125, "learning_rate": 0.000122116529660702, "loss": 0.8068, "step": 28829 }, { "epoch": 0.7402735584260521, "grad_norm": 0.77734375, "learning_rate": 0.0001221121759869672, "loss": 0.8209, "step": 28830 }, { "epoch": 0.7402992356219739, "grad_norm": 0.74609375, "learning_rate": 0.00012210782226916483, "loss": 0.8849, "step": 28831 }, { "epoch": 0.7403249128178957, "grad_norm": 0.76953125, "learning_rate": 0.00012210346850730354, "loss": 0.8721, "step": 28832 }, { "epoch": 0.7403505900138175, "grad_norm": 0.85546875, "learning_rate": 0.00012209911470139205, "loss": 0.8282, "step": 28833 }, { "epoch": 0.7403762672097394, "grad_norm": 0.79296875, "learning_rate": 0.000122094760851439, "loss": 0.8121, "step": 28834 }, { "epoch": 0.7404019444056612, "grad_norm": 0.73828125, "learning_rate": 0.00012209040695745313, "loss": 0.7939, "step": 28835 }, { "epoch": 0.740427621601583, "grad_norm": 0.75, "learning_rate": 0.000122086053019443, "loss": 0.6591, "step": 28836 }, { "epoch": 0.7404532987975048, "grad_norm": 0.75390625, "learning_rate": 0.00012208169903741743, "loss": 0.7758, "step": 28837 }, { "epoch": 0.7404789759934266, "grad_norm": 0.83984375, "learning_rate": 0.00012207734501138502, "loss": 0.9941, "step": 28838 }, { "epoch": 0.7405046531893484, "grad_norm": 0.74609375, "learning_rate": 0.00012207299094135442, "loss": 0.7745, "step": 28839 }, { "epoch": 0.7405303303852703, "grad_norm": 0.76953125, "learning_rate": 0.00012206863682733441, "loss": 0.7716, "step": 28840 }, { "epoch": 0.7405560075811921, "grad_norm": 0.78125, "learning_rate": 0.00012206428266933351, "loss": 0.7951, "step": 28841 }, { "epoch": 0.740581684777114, "grad_norm": 0.703125, "learning_rate": 0.00012205992846736056, "loss": 0.7342, "step": 28842 }, { "epoch": 0.7406073619730358, "grad_norm": 0.75390625, "learning_rate": 0.00012205557422142414, "loss": 0.8724, "step": 28843 }, { "epoch": 0.7406330391689575, "grad_norm": 0.79296875, "learning_rate": 0.00012205121993153292, "loss": 0.8098, "step": 28844 }, { "epoch": 0.7406587163648793, "grad_norm": 0.796875, "learning_rate": 0.00012204686559769564, "loss": 0.7644, "step": 28845 }, { "epoch": 0.7406843935608012, "grad_norm": 0.765625, "learning_rate": 0.00012204251121992096, "loss": 0.7534, "step": 28846 }, { "epoch": 0.740710070756723, "grad_norm": 0.765625, "learning_rate": 0.00012203815679821756, "loss": 0.7451, "step": 28847 }, { "epoch": 0.7407357479526449, "grad_norm": 0.7890625, "learning_rate": 0.00012203380233259407, "loss": 0.9046, "step": 28848 }, { "epoch": 0.7407614251485667, "grad_norm": 0.7734375, "learning_rate": 0.0001220294478230592, "loss": 0.7567, "step": 28849 }, { "epoch": 0.7407871023444885, "grad_norm": 0.82421875, "learning_rate": 0.00012202509326962167, "loss": 0.7301, "step": 28850 }, { "epoch": 0.7408127795404102, "grad_norm": 0.85546875, "learning_rate": 0.00012202073867229007, "loss": 0.9079, "step": 28851 }, { "epoch": 0.7408384567363321, "grad_norm": 0.81640625, "learning_rate": 0.00012201638403107314, "loss": 0.9045, "step": 28852 }, { "epoch": 0.7408641339322539, "grad_norm": 0.78125, "learning_rate": 0.00012201202934597957, "loss": 0.7545, "step": 28853 }, { "epoch": 0.7408898111281758, "grad_norm": 0.8125, "learning_rate": 0.00012200767461701801, "loss": 0.917, "step": 28854 }, { "epoch": 0.7409154883240976, "grad_norm": 0.72265625, "learning_rate": 0.00012200331984419712, "loss": 0.8657, "step": 28855 }, { "epoch": 0.7409411655200194, "grad_norm": 0.7578125, "learning_rate": 0.00012199896502752562, "loss": 0.7239, "step": 28856 }, { "epoch": 0.7409668427159412, "grad_norm": 0.734375, "learning_rate": 0.00012199461016701217, "loss": 0.8282, "step": 28857 }, { "epoch": 0.740992519911863, "grad_norm": 0.74609375, "learning_rate": 0.0001219902552626654, "loss": 0.7255, "step": 28858 }, { "epoch": 0.7410181971077848, "grad_norm": 0.76171875, "learning_rate": 0.0001219859003144941, "loss": 0.749, "step": 28859 }, { "epoch": 0.7410438743037067, "grad_norm": 0.8671875, "learning_rate": 0.00012198154532250687, "loss": 0.7901, "step": 28860 }, { "epoch": 0.7410695514996285, "grad_norm": 0.7734375, "learning_rate": 0.0001219771902867124, "loss": 0.8642, "step": 28861 }, { "epoch": 0.7410952286955503, "grad_norm": 0.83203125, "learning_rate": 0.00012197283520711937, "loss": 0.8806, "step": 28862 }, { "epoch": 0.7411209058914722, "grad_norm": 0.85546875, "learning_rate": 0.00012196848008373647, "loss": 0.8335, "step": 28863 }, { "epoch": 0.7411465830873939, "grad_norm": 0.83984375, "learning_rate": 0.00012196412491657237, "loss": 0.9122, "step": 28864 }, { "epoch": 0.7411722602833157, "grad_norm": 0.76171875, "learning_rate": 0.00012195976970563576, "loss": 0.8454, "step": 28865 }, { "epoch": 0.7411979374792376, "grad_norm": 0.75, "learning_rate": 0.00012195541445093533, "loss": 0.8456, "step": 28866 }, { "epoch": 0.7412236146751594, "grad_norm": 0.859375, "learning_rate": 0.0001219510591524797, "loss": 0.8906, "step": 28867 }, { "epoch": 0.7412492918710812, "grad_norm": 0.76171875, "learning_rate": 0.00012194670381027762, "loss": 0.7876, "step": 28868 }, { "epoch": 0.7412749690670031, "grad_norm": 0.81640625, "learning_rate": 0.00012194234842433774, "loss": 0.8552, "step": 28869 }, { "epoch": 0.7413006462629249, "grad_norm": 0.77734375, "learning_rate": 0.00012193799299466873, "loss": 0.8836, "step": 28870 }, { "epoch": 0.7413263234588466, "grad_norm": 0.8046875, "learning_rate": 0.00012193363752127927, "loss": 0.8229, "step": 28871 }, { "epoch": 0.7413520006547685, "grad_norm": 0.80859375, "learning_rate": 0.00012192928200417808, "loss": 0.8054, "step": 28872 }, { "epoch": 0.7413776778506903, "grad_norm": 0.8359375, "learning_rate": 0.0001219249264433738, "loss": 0.9297, "step": 28873 }, { "epoch": 0.7414033550466121, "grad_norm": 0.859375, "learning_rate": 0.00012192057083887514, "loss": 0.7937, "step": 28874 }, { "epoch": 0.741429032242534, "grad_norm": 0.7890625, "learning_rate": 0.00012191621519069074, "loss": 0.8101, "step": 28875 }, { "epoch": 0.7414547094384558, "grad_norm": 0.8515625, "learning_rate": 0.00012191185949882928, "loss": 0.7995, "step": 28876 }, { "epoch": 0.7414803866343775, "grad_norm": 0.75390625, "learning_rate": 0.00012190750376329952, "loss": 0.8189, "step": 28877 }, { "epoch": 0.7415060638302994, "grad_norm": 0.8125, "learning_rate": 0.00012190314798411005, "loss": 0.839, "step": 28878 }, { "epoch": 0.7415317410262212, "grad_norm": 0.8515625, "learning_rate": 0.00012189879216126958, "loss": 0.8803, "step": 28879 }, { "epoch": 0.741557418222143, "grad_norm": 0.73828125, "learning_rate": 0.00012189443629478683, "loss": 0.7128, "step": 28880 }, { "epoch": 0.7415830954180649, "grad_norm": 0.8359375, "learning_rate": 0.0001218900803846704, "loss": 0.8901, "step": 28881 }, { "epoch": 0.7416087726139867, "grad_norm": 0.87109375, "learning_rate": 0.00012188572443092905, "loss": 0.7187, "step": 28882 }, { "epoch": 0.7416344498099086, "grad_norm": 0.75390625, "learning_rate": 0.0001218813684335714, "loss": 0.9312, "step": 28883 }, { "epoch": 0.7416601270058303, "grad_norm": 0.88671875, "learning_rate": 0.00012187701239260619, "loss": 0.8245, "step": 28884 }, { "epoch": 0.7416858042017521, "grad_norm": 0.75, "learning_rate": 0.00012187265630804205, "loss": 0.8149, "step": 28885 }, { "epoch": 0.741711481397674, "grad_norm": 0.765625, "learning_rate": 0.00012186830017988768, "loss": 0.7798, "step": 28886 }, { "epoch": 0.7417371585935958, "grad_norm": 0.75390625, "learning_rate": 0.0001218639440081518, "loss": 0.7549, "step": 28887 }, { "epoch": 0.7417628357895176, "grad_norm": 0.76171875, "learning_rate": 0.00012185958779284302, "loss": 0.9289, "step": 28888 }, { "epoch": 0.7417885129854395, "grad_norm": 0.82421875, "learning_rate": 0.00012185523153397005, "loss": 0.7243, "step": 28889 }, { "epoch": 0.7418141901813613, "grad_norm": 0.73046875, "learning_rate": 0.0001218508752315416, "loss": 0.7948, "step": 28890 }, { "epoch": 0.741839867377283, "grad_norm": 0.7734375, "learning_rate": 0.00012184651888556631, "loss": 0.8921, "step": 28891 }, { "epoch": 0.7418655445732049, "grad_norm": 0.74609375, "learning_rate": 0.00012184216249605287, "loss": 0.9306, "step": 28892 }, { "epoch": 0.7418912217691267, "grad_norm": 0.80078125, "learning_rate": 0.00012183780606301003, "loss": 0.8514, "step": 28893 }, { "epoch": 0.7419168989650485, "grad_norm": 0.796875, "learning_rate": 0.00012183344958644638, "loss": 0.8277, "step": 28894 }, { "epoch": 0.7419425761609704, "grad_norm": 0.734375, "learning_rate": 0.00012182909306637062, "loss": 0.7663, "step": 28895 }, { "epoch": 0.7419682533568922, "grad_norm": 0.78125, "learning_rate": 0.00012182473650279147, "loss": 0.7551, "step": 28896 }, { "epoch": 0.7419939305528139, "grad_norm": 0.7421875, "learning_rate": 0.00012182037989571757, "loss": 0.7307, "step": 28897 }, { "epoch": 0.7420196077487358, "grad_norm": 0.75, "learning_rate": 0.00012181602324515764, "loss": 0.8813, "step": 28898 }, { "epoch": 0.7420452849446576, "grad_norm": 0.765625, "learning_rate": 0.00012181166655112037, "loss": 0.8435, "step": 28899 }, { "epoch": 0.7420709621405794, "grad_norm": 0.74609375, "learning_rate": 0.00012180730981361439, "loss": 0.8167, "step": 28900 }, { "epoch": 0.7420966393365013, "grad_norm": 0.79296875, "learning_rate": 0.00012180295303264841, "loss": 0.9262, "step": 28901 }, { "epoch": 0.7421223165324231, "grad_norm": 0.7578125, "learning_rate": 0.00012179859620823113, "loss": 0.8366, "step": 28902 }, { "epoch": 0.742147993728345, "grad_norm": 0.796875, "learning_rate": 0.00012179423934037119, "loss": 0.9848, "step": 28903 }, { "epoch": 0.7421736709242667, "grad_norm": 0.83984375, "learning_rate": 0.00012178988242907734, "loss": 1.0096, "step": 28904 }, { "epoch": 0.7421993481201885, "grad_norm": 0.85546875, "learning_rate": 0.0001217855254743582, "loss": 0.8606, "step": 28905 }, { "epoch": 0.7422250253161103, "grad_norm": 0.7265625, "learning_rate": 0.00012178116847622248, "loss": 0.8788, "step": 28906 }, { "epoch": 0.7422507025120322, "grad_norm": 0.7890625, "learning_rate": 0.00012177681143467886, "loss": 0.9463, "step": 28907 }, { "epoch": 0.742276379707954, "grad_norm": 0.84375, "learning_rate": 0.00012177245434973602, "loss": 0.8221, "step": 28908 }, { "epoch": 0.7423020569038759, "grad_norm": 0.71875, "learning_rate": 0.00012176809722140263, "loss": 0.7966, "step": 28909 }, { "epoch": 0.7423277340997977, "grad_norm": 0.84765625, "learning_rate": 0.0001217637400496874, "loss": 0.8208, "step": 28910 }, { "epoch": 0.7423534112957194, "grad_norm": 0.7734375, "learning_rate": 0.00012175938283459898, "loss": 0.8643, "step": 28911 }, { "epoch": 0.7423790884916412, "grad_norm": 1.03125, "learning_rate": 0.0001217550255761461, "loss": 0.794, "step": 28912 }, { "epoch": 0.7424047656875631, "grad_norm": 0.7734375, "learning_rate": 0.00012175066827433743, "loss": 0.73, "step": 28913 }, { "epoch": 0.7424304428834849, "grad_norm": 0.76171875, "learning_rate": 0.00012174631092918162, "loss": 0.7922, "step": 28914 }, { "epoch": 0.7424561200794068, "grad_norm": 0.78515625, "learning_rate": 0.00012174195354068741, "loss": 0.9719, "step": 28915 }, { "epoch": 0.7424817972753286, "grad_norm": 0.83984375, "learning_rate": 0.00012173759610886342, "loss": 0.8597, "step": 28916 }, { "epoch": 0.7425074744712503, "grad_norm": 0.734375, "learning_rate": 0.00012173323863371837, "loss": 0.8978, "step": 28917 }, { "epoch": 0.7425331516671722, "grad_norm": 0.73828125, "learning_rate": 0.00012172888111526095, "loss": 0.9154, "step": 28918 }, { "epoch": 0.742558828863094, "grad_norm": 0.83203125, "learning_rate": 0.00012172452355349982, "loss": 0.8941, "step": 28919 }, { "epoch": 0.7425845060590158, "grad_norm": 0.85546875, "learning_rate": 0.00012172016594844369, "loss": 0.9617, "step": 28920 }, { "epoch": 0.7426101832549377, "grad_norm": 0.75390625, "learning_rate": 0.00012171580830010119, "loss": 0.7203, "step": 28921 }, { "epoch": 0.7426358604508595, "grad_norm": 0.70703125, "learning_rate": 0.00012171145060848108, "loss": 0.8168, "step": 28922 }, { "epoch": 0.7426615376467813, "grad_norm": 0.7265625, "learning_rate": 0.00012170709287359203, "loss": 0.6673, "step": 28923 }, { "epoch": 0.7426872148427031, "grad_norm": 0.74609375, "learning_rate": 0.00012170273509544265, "loss": 0.7401, "step": 28924 }, { "epoch": 0.7427128920386249, "grad_norm": 0.7578125, "learning_rate": 0.00012169837727404172, "loss": 0.8454, "step": 28925 }, { "epoch": 0.7427385692345467, "grad_norm": 0.7265625, "learning_rate": 0.00012169401940939788, "loss": 0.7929, "step": 28926 }, { "epoch": 0.7427642464304686, "grad_norm": 0.79296875, "learning_rate": 0.00012168966150151981, "loss": 0.7781, "step": 28927 }, { "epoch": 0.7427899236263904, "grad_norm": 0.75, "learning_rate": 0.00012168530355041623, "loss": 0.8472, "step": 28928 }, { "epoch": 0.7428156008223122, "grad_norm": 0.83984375, "learning_rate": 0.00012168094555609574, "loss": 0.7237, "step": 28929 }, { "epoch": 0.742841278018234, "grad_norm": 0.84375, "learning_rate": 0.00012167658751856714, "loss": 0.8164, "step": 28930 }, { "epoch": 0.7428669552141558, "grad_norm": 0.7109375, "learning_rate": 0.00012167222943783904, "loss": 0.8717, "step": 28931 }, { "epoch": 0.7428926324100776, "grad_norm": 0.7421875, "learning_rate": 0.00012166787131392014, "loss": 0.8029, "step": 28932 }, { "epoch": 0.7429183096059995, "grad_norm": 0.7421875, "learning_rate": 0.00012166351314681916, "loss": 0.7552, "step": 28933 }, { "epoch": 0.7429439868019213, "grad_norm": 0.78125, "learning_rate": 0.0001216591549365447, "loss": 0.8081, "step": 28934 }, { "epoch": 0.7429696639978431, "grad_norm": 0.75390625, "learning_rate": 0.00012165479668310554, "loss": 0.8648, "step": 28935 }, { "epoch": 0.742995341193765, "grad_norm": 0.79296875, "learning_rate": 0.00012165043838651031, "loss": 0.8288, "step": 28936 }, { "epoch": 0.7430210183896867, "grad_norm": 0.74609375, "learning_rate": 0.00012164608004676772, "loss": 0.7406, "step": 28937 }, { "epoch": 0.7430466955856085, "grad_norm": 0.76953125, "learning_rate": 0.00012164172166388643, "loss": 0.7695, "step": 28938 }, { "epoch": 0.7430723727815304, "grad_norm": 0.76953125, "learning_rate": 0.0001216373632378752, "loss": 0.7296, "step": 28939 }, { "epoch": 0.7430980499774522, "grad_norm": 0.7578125, "learning_rate": 0.0001216330047687426, "loss": 0.8797, "step": 28940 }, { "epoch": 0.743123727173374, "grad_norm": 0.78125, "learning_rate": 0.0001216286462564974, "loss": 0.7964, "step": 28941 }, { "epoch": 0.7431494043692959, "grad_norm": 0.75390625, "learning_rate": 0.00012162428770114826, "loss": 0.755, "step": 28942 }, { "epoch": 0.7431750815652177, "grad_norm": 0.7890625, "learning_rate": 0.00012161992910270386, "loss": 0.9727, "step": 28943 }, { "epoch": 0.7432007587611394, "grad_norm": 0.8203125, "learning_rate": 0.0001216155704611729, "loss": 0.8801, "step": 28944 }, { "epoch": 0.7432264359570613, "grad_norm": 0.7109375, "learning_rate": 0.00012161121177656404, "loss": 0.7293, "step": 28945 }, { "epoch": 0.7432521131529831, "grad_norm": 0.7109375, "learning_rate": 0.00012160685304888602, "loss": 0.7957, "step": 28946 }, { "epoch": 0.743277790348905, "grad_norm": 0.75390625, "learning_rate": 0.0001216024942781475, "loss": 0.7432, "step": 28947 }, { "epoch": 0.7433034675448268, "grad_norm": 0.7734375, "learning_rate": 0.00012159813546435712, "loss": 0.7085, "step": 28948 }, { "epoch": 0.7433291447407486, "grad_norm": 1.03125, "learning_rate": 0.00012159377660752362, "loss": 0.8484, "step": 28949 }, { "epoch": 0.7433548219366704, "grad_norm": 0.80859375, "learning_rate": 0.00012158941770765569, "loss": 0.8019, "step": 28950 }, { "epoch": 0.7433804991325922, "grad_norm": 0.78125, "learning_rate": 0.00012158505876476199, "loss": 0.8656, "step": 28951 }, { "epoch": 0.743406176328514, "grad_norm": 0.78125, "learning_rate": 0.00012158069977885122, "loss": 0.7746, "step": 28952 }, { "epoch": 0.7434318535244359, "grad_norm": 0.76171875, "learning_rate": 0.00012157634074993204, "loss": 0.7171, "step": 28953 }, { "epoch": 0.7434575307203577, "grad_norm": 0.7734375, "learning_rate": 0.0001215719816780132, "loss": 0.7883, "step": 28954 }, { "epoch": 0.7434832079162795, "grad_norm": 0.73046875, "learning_rate": 0.00012156762256310335, "loss": 0.7895, "step": 28955 }, { "epoch": 0.7435088851122014, "grad_norm": 0.8203125, "learning_rate": 0.00012156326340521114, "loss": 0.8493, "step": 28956 }, { "epoch": 0.7435345623081231, "grad_norm": 0.84765625, "learning_rate": 0.00012155890420434533, "loss": 0.8337, "step": 28957 }, { "epoch": 0.7435602395040449, "grad_norm": 0.80859375, "learning_rate": 0.00012155454496051455, "loss": 0.8412, "step": 28958 }, { "epoch": 0.7435859166999668, "grad_norm": 0.76953125, "learning_rate": 0.00012155018567372752, "loss": 0.7875, "step": 28959 }, { "epoch": 0.7436115938958886, "grad_norm": 0.82421875, "learning_rate": 0.00012154582634399292, "loss": 0.8274, "step": 28960 }, { "epoch": 0.7436372710918104, "grad_norm": 0.76953125, "learning_rate": 0.00012154146697131941, "loss": 0.9705, "step": 28961 }, { "epoch": 0.7436629482877323, "grad_norm": 0.7578125, "learning_rate": 0.00012153710755571573, "loss": 0.6963, "step": 28962 }, { "epoch": 0.7436886254836541, "grad_norm": 0.83984375, "learning_rate": 0.00012153274809719053, "loss": 0.9789, "step": 28963 }, { "epoch": 0.7437143026795758, "grad_norm": 0.8125, "learning_rate": 0.00012152838859575248, "loss": 0.8561, "step": 28964 }, { "epoch": 0.7437399798754977, "grad_norm": 0.828125, "learning_rate": 0.00012152402905141036, "loss": 0.8496, "step": 28965 }, { "epoch": 0.7437656570714195, "grad_norm": 0.80078125, "learning_rate": 0.00012151966946417272, "loss": 0.8086, "step": 28966 }, { "epoch": 0.7437913342673413, "grad_norm": 0.77734375, "learning_rate": 0.00012151530983404837, "loss": 0.757, "step": 28967 }, { "epoch": 0.7438170114632632, "grad_norm": 0.703125, "learning_rate": 0.00012151095016104595, "loss": 0.8284, "step": 28968 }, { "epoch": 0.743842688659185, "grad_norm": 0.68359375, "learning_rate": 0.00012150659044517412, "loss": 0.7779, "step": 28969 }, { "epoch": 0.7438683658551067, "grad_norm": 0.8203125, "learning_rate": 0.00012150223068644162, "loss": 0.8112, "step": 28970 }, { "epoch": 0.7438940430510286, "grad_norm": 0.75390625, "learning_rate": 0.00012149787088485713, "loss": 0.8104, "step": 28971 }, { "epoch": 0.7439197202469504, "grad_norm": 0.8203125, "learning_rate": 0.00012149351104042932, "loss": 0.9034, "step": 28972 }, { "epoch": 0.7439453974428722, "grad_norm": 0.8203125, "learning_rate": 0.00012148915115316688, "loss": 0.83, "step": 28973 }, { "epoch": 0.7439710746387941, "grad_norm": 0.86328125, "learning_rate": 0.00012148479122307847, "loss": 0.8773, "step": 28974 }, { "epoch": 0.7439967518347159, "grad_norm": 0.8046875, "learning_rate": 0.00012148043125017283, "loss": 0.8636, "step": 28975 }, { "epoch": 0.7440224290306378, "grad_norm": 0.80078125, "learning_rate": 0.00012147607123445865, "loss": 0.9144, "step": 28976 }, { "epoch": 0.7440481062265595, "grad_norm": 0.76953125, "learning_rate": 0.00012147171117594458, "loss": 0.8029, "step": 28977 }, { "epoch": 0.7440737834224813, "grad_norm": 0.69921875, "learning_rate": 0.00012146735107463935, "loss": 0.7964, "step": 28978 }, { "epoch": 0.7440994606184032, "grad_norm": 0.71875, "learning_rate": 0.00012146299093055163, "loss": 0.7782, "step": 28979 }, { "epoch": 0.744125137814325, "grad_norm": 0.703125, "learning_rate": 0.00012145863074369008, "loss": 0.8349, "step": 28980 }, { "epoch": 0.7441508150102468, "grad_norm": 0.90234375, "learning_rate": 0.00012145427051406344, "loss": 0.7359, "step": 28981 }, { "epoch": 0.7441764922061687, "grad_norm": 0.765625, "learning_rate": 0.00012144991024168038, "loss": 0.9569, "step": 28982 }, { "epoch": 0.7442021694020905, "grad_norm": 0.75, "learning_rate": 0.00012144554992654956, "loss": 0.8219, "step": 28983 }, { "epoch": 0.7442278465980122, "grad_norm": 0.765625, "learning_rate": 0.00012144118956867971, "loss": 0.8179, "step": 28984 }, { "epoch": 0.7442535237939341, "grad_norm": 0.7890625, "learning_rate": 0.0001214368291680795, "loss": 0.8038, "step": 28985 }, { "epoch": 0.7442792009898559, "grad_norm": 0.73828125, "learning_rate": 0.00012143246872475764, "loss": 0.7191, "step": 28986 }, { "epoch": 0.7443048781857777, "grad_norm": 0.796875, "learning_rate": 0.00012142810823872281, "loss": 0.8161, "step": 28987 }, { "epoch": 0.7443305553816996, "grad_norm": 0.75390625, "learning_rate": 0.00012142374770998366, "loss": 0.6849, "step": 28988 }, { "epoch": 0.7443562325776214, "grad_norm": 0.80078125, "learning_rate": 0.00012141938713854894, "loss": 0.9828, "step": 28989 }, { "epoch": 0.7443819097735431, "grad_norm": 0.84765625, "learning_rate": 0.00012141502652442734, "loss": 0.7973, "step": 28990 }, { "epoch": 0.744407586969465, "grad_norm": 0.80078125, "learning_rate": 0.00012141066586762751, "loss": 0.802, "step": 28991 }, { "epoch": 0.7444332641653868, "grad_norm": 0.7265625, "learning_rate": 0.00012140630516815814, "loss": 0.7882, "step": 28992 }, { "epoch": 0.7444589413613086, "grad_norm": 0.79296875, "learning_rate": 0.00012140194442602794, "loss": 0.798, "step": 28993 }, { "epoch": 0.7444846185572305, "grad_norm": 0.7578125, "learning_rate": 0.0001213975836412456, "loss": 0.7491, "step": 28994 }, { "epoch": 0.7445102957531523, "grad_norm": 0.82421875, "learning_rate": 0.00012139322281381981, "loss": 0.7667, "step": 28995 }, { "epoch": 0.7445359729490741, "grad_norm": 0.75390625, "learning_rate": 0.00012138886194375927, "loss": 0.899, "step": 28996 }, { "epoch": 0.7445616501449959, "grad_norm": 0.796875, "learning_rate": 0.00012138450103107266, "loss": 0.7297, "step": 28997 }, { "epoch": 0.7445873273409177, "grad_norm": 0.75, "learning_rate": 0.00012138014007576866, "loss": 0.7889, "step": 28998 }, { "epoch": 0.7446130045368395, "grad_norm": 0.78125, "learning_rate": 0.00012137577907785598, "loss": 0.9599, "step": 28999 }, { "epoch": 0.7446386817327614, "grad_norm": 0.75, "learning_rate": 0.00012137141803734331, "loss": 0.7178, "step": 29000 }, { "epoch": 0.7446386817327614, "eval_loss": 0.8222226500511169, "eval_runtime": 395.4366, "eval_samples_per_second": 25.289, "eval_steps_per_second": 0.792, "step": 29000 }, { "epoch": 0.7446643589286832, "grad_norm": 0.72265625, "learning_rate": 0.00012136705695423932, "loss": 0.7878, "step": 29001 }, { "epoch": 0.744690036124605, "grad_norm": 0.8828125, "learning_rate": 0.0001213626958285527, "loss": 0.845, "step": 29002 }, { "epoch": 0.7447157133205269, "grad_norm": 0.7734375, "learning_rate": 0.00012135833466029218, "loss": 0.81, "step": 29003 }, { "epoch": 0.7447413905164486, "grad_norm": 0.7265625, "learning_rate": 0.00012135397344946642, "loss": 0.8514, "step": 29004 }, { "epoch": 0.7447670677123704, "grad_norm": 0.76171875, "learning_rate": 0.00012134961219608416, "loss": 0.89, "step": 29005 }, { "epoch": 0.7447927449082923, "grad_norm": 0.81640625, "learning_rate": 0.00012134525090015399, "loss": 0.767, "step": 29006 }, { "epoch": 0.7448184221042141, "grad_norm": 0.796875, "learning_rate": 0.0001213408895616847, "loss": 0.8595, "step": 29007 }, { "epoch": 0.744844099300136, "grad_norm": 0.9296875, "learning_rate": 0.00012133652818068494, "loss": 0.8183, "step": 29008 }, { "epoch": 0.7448697764960578, "grad_norm": 0.82421875, "learning_rate": 0.00012133216675716339, "loss": 0.8639, "step": 29009 }, { "epoch": 0.7448954536919795, "grad_norm": 0.75390625, "learning_rate": 0.00012132780529112877, "loss": 0.8525, "step": 29010 }, { "epoch": 0.7449211308879014, "grad_norm": 0.87109375, "learning_rate": 0.00012132344378258977, "loss": 0.8043, "step": 29011 }, { "epoch": 0.7449468080838232, "grad_norm": 0.78515625, "learning_rate": 0.00012131908223155506, "loss": 0.7321, "step": 29012 }, { "epoch": 0.744972485279745, "grad_norm": 0.88671875, "learning_rate": 0.00012131472063803335, "loss": 0.815, "step": 29013 }, { "epoch": 0.7449981624756669, "grad_norm": 0.84765625, "learning_rate": 0.00012131035900203332, "loss": 0.8416, "step": 29014 }, { "epoch": 0.7450238396715887, "grad_norm": 0.74609375, "learning_rate": 0.00012130599732356366, "loss": 0.9004, "step": 29015 }, { "epoch": 0.7450495168675105, "grad_norm": 0.7109375, "learning_rate": 0.0001213016356026331, "loss": 0.76, "step": 29016 }, { "epoch": 0.7450751940634323, "grad_norm": 0.796875, "learning_rate": 0.00012129727383925027, "loss": 0.793, "step": 29017 }, { "epoch": 0.7451008712593541, "grad_norm": 0.7890625, "learning_rate": 0.00012129291203342394, "loss": 0.8701, "step": 29018 }, { "epoch": 0.7451265484552759, "grad_norm": 0.85546875, "learning_rate": 0.00012128855018516274, "loss": 0.9796, "step": 29019 }, { "epoch": 0.7451522256511978, "grad_norm": 0.8359375, "learning_rate": 0.00012128418829447539, "loss": 0.9429, "step": 29020 }, { "epoch": 0.7451779028471196, "grad_norm": 0.80078125, "learning_rate": 0.00012127982636137057, "loss": 0.7862, "step": 29021 }, { "epoch": 0.7452035800430414, "grad_norm": 0.73828125, "learning_rate": 0.00012127546438585695, "loss": 0.8347, "step": 29022 }, { "epoch": 0.7452292572389633, "grad_norm": 0.75, "learning_rate": 0.00012127110236794328, "loss": 0.7715, "step": 29023 }, { "epoch": 0.745254934434885, "grad_norm": 0.765625, "learning_rate": 0.00012126674030763823, "loss": 0.7888, "step": 29024 }, { "epoch": 0.7452806116308068, "grad_norm": 0.78125, "learning_rate": 0.00012126237820495048, "loss": 0.7698, "step": 29025 }, { "epoch": 0.7453062888267287, "grad_norm": 0.88671875, "learning_rate": 0.00012125801605988871, "loss": 0.8403, "step": 29026 }, { "epoch": 0.7453319660226505, "grad_norm": 0.75, "learning_rate": 0.00012125365387246165, "loss": 0.8533, "step": 29027 }, { "epoch": 0.7453576432185723, "grad_norm": 0.8046875, "learning_rate": 0.00012124929164267797, "loss": 0.8405, "step": 29028 }, { "epoch": 0.7453833204144942, "grad_norm": 0.8515625, "learning_rate": 0.00012124492937054639, "loss": 0.9236, "step": 29029 }, { "epoch": 0.7454089976104159, "grad_norm": 0.75, "learning_rate": 0.00012124056705607558, "loss": 0.7794, "step": 29030 }, { "epoch": 0.7454346748063377, "grad_norm": 0.75, "learning_rate": 0.00012123620469927423, "loss": 0.8591, "step": 29031 }, { "epoch": 0.7454603520022596, "grad_norm": 1.03125, "learning_rate": 0.00012123184230015106, "loss": 0.8384, "step": 29032 }, { "epoch": 0.7454860291981814, "grad_norm": 0.8203125, "learning_rate": 0.00012122747985871472, "loss": 0.8409, "step": 29033 }, { "epoch": 0.7455117063941032, "grad_norm": 0.7265625, "learning_rate": 0.00012122311737497396, "loss": 0.723, "step": 29034 }, { "epoch": 0.7455373835900251, "grad_norm": 0.734375, "learning_rate": 0.00012121875484893742, "loss": 0.8291, "step": 29035 }, { "epoch": 0.7455630607859469, "grad_norm": 0.765625, "learning_rate": 0.0001212143922806138, "loss": 0.7325, "step": 29036 }, { "epoch": 0.7455887379818686, "grad_norm": 0.80859375, "learning_rate": 0.00012121002967001188, "loss": 0.8461, "step": 29037 }, { "epoch": 0.7456144151777905, "grad_norm": 0.76171875, "learning_rate": 0.00012120566701714023, "loss": 0.8645, "step": 29038 }, { "epoch": 0.7456400923737123, "grad_norm": 0.87890625, "learning_rate": 0.0001212013043220076, "loss": 0.8381, "step": 29039 }, { "epoch": 0.7456657695696342, "grad_norm": 0.70703125, "learning_rate": 0.00012119694158462271, "loss": 0.6952, "step": 29040 }, { "epoch": 0.745691446765556, "grad_norm": 0.80078125, "learning_rate": 0.00012119257880499422, "loss": 0.7362, "step": 29041 }, { "epoch": 0.7457171239614778, "grad_norm": 0.81640625, "learning_rate": 0.00012118821598313084, "loss": 0.8542, "step": 29042 }, { "epoch": 0.7457428011573997, "grad_norm": 0.8671875, "learning_rate": 0.00012118385311904125, "loss": 0.8121, "step": 29043 }, { "epoch": 0.7457684783533214, "grad_norm": 0.765625, "learning_rate": 0.00012117949021273416, "loss": 0.8062, "step": 29044 }, { "epoch": 0.7457941555492432, "grad_norm": 0.76171875, "learning_rate": 0.00012117512726421827, "loss": 0.9317, "step": 29045 }, { "epoch": 0.7458198327451651, "grad_norm": 0.75, "learning_rate": 0.00012117076427350225, "loss": 0.8727, "step": 29046 }, { "epoch": 0.7458455099410869, "grad_norm": 0.82421875, "learning_rate": 0.0001211664012405948, "loss": 0.7826, "step": 29047 }, { "epoch": 0.7458711871370087, "grad_norm": 0.78515625, "learning_rate": 0.00012116203816550464, "loss": 0.7142, "step": 29048 }, { "epoch": 0.7458968643329306, "grad_norm": 0.77734375, "learning_rate": 0.00012115767504824043, "loss": 0.8468, "step": 29049 }, { "epoch": 0.7459225415288523, "grad_norm": 0.85546875, "learning_rate": 0.00012115331188881092, "loss": 0.8231, "step": 29050 }, { "epoch": 0.7459482187247741, "grad_norm": 0.796875, "learning_rate": 0.00012114894868722475, "loss": 0.7126, "step": 29051 }, { "epoch": 0.745973895920696, "grad_norm": 0.734375, "learning_rate": 0.00012114458544349063, "loss": 0.8648, "step": 29052 }, { "epoch": 0.7459995731166178, "grad_norm": 0.78125, "learning_rate": 0.00012114022215761726, "loss": 0.8073, "step": 29053 }, { "epoch": 0.7460252503125396, "grad_norm": 0.76953125, "learning_rate": 0.00012113585882961334, "loss": 0.7626, "step": 29054 }, { "epoch": 0.7460509275084615, "grad_norm": 0.79296875, "learning_rate": 0.00012113149545948758, "loss": 0.8101, "step": 29055 }, { "epoch": 0.7460766047043833, "grad_norm": 0.80078125, "learning_rate": 0.00012112713204724863, "loss": 0.7461, "step": 29056 }, { "epoch": 0.746102281900305, "grad_norm": 0.76953125, "learning_rate": 0.00012112276859290526, "loss": 0.7522, "step": 29057 }, { "epoch": 0.7461279590962269, "grad_norm": 0.828125, "learning_rate": 0.00012111840509646608, "loss": 0.8358, "step": 29058 }, { "epoch": 0.7461536362921487, "grad_norm": 0.7734375, "learning_rate": 0.00012111404155793982, "loss": 0.8463, "step": 29059 }, { "epoch": 0.7461793134880705, "grad_norm": 0.74609375, "learning_rate": 0.00012110967797733521, "loss": 0.8368, "step": 29060 }, { "epoch": 0.7462049906839924, "grad_norm": 0.8828125, "learning_rate": 0.0001211053143546609, "loss": 0.9075, "step": 29061 }, { "epoch": 0.7462306678799142, "grad_norm": 0.8046875, "learning_rate": 0.00012110095068992562, "loss": 0.8826, "step": 29062 }, { "epoch": 0.746256345075836, "grad_norm": 0.76171875, "learning_rate": 0.00012109658698313803, "loss": 0.8764, "step": 29063 }, { "epoch": 0.7462820222717578, "grad_norm": 0.7890625, "learning_rate": 0.00012109222323430689, "loss": 0.8301, "step": 29064 }, { "epoch": 0.7463076994676796, "grad_norm": 0.76953125, "learning_rate": 0.00012108785944344081, "loss": 0.9575, "step": 29065 }, { "epoch": 0.7463333766636014, "grad_norm": 1.7734375, "learning_rate": 0.00012108349561054855, "loss": 0.8462, "step": 29066 }, { "epoch": 0.7463590538595233, "grad_norm": 0.765625, "learning_rate": 0.00012107913173563878, "loss": 0.7985, "step": 29067 }, { "epoch": 0.7463847310554451, "grad_norm": 0.8125, "learning_rate": 0.0001210747678187202, "loss": 0.8056, "step": 29068 }, { "epoch": 0.746410408251367, "grad_norm": 0.80859375, "learning_rate": 0.00012107040385980154, "loss": 0.8361, "step": 29069 }, { "epoch": 0.7464360854472887, "grad_norm": 0.79296875, "learning_rate": 0.00012106603985889144, "loss": 0.822, "step": 29070 }, { "epoch": 0.7464617626432105, "grad_norm": 0.75, "learning_rate": 0.00012106167581599863, "loss": 0.7792, "step": 29071 }, { "epoch": 0.7464874398391323, "grad_norm": 0.76953125, "learning_rate": 0.00012105731173113182, "loss": 0.8084, "step": 29072 }, { "epoch": 0.7465131170350542, "grad_norm": 0.7734375, "learning_rate": 0.00012105294760429965, "loss": 0.8239, "step": 29073 }, { "epoch": 0.746538794230976, "grad_norm": 0.73046875, "learning_rate": 0.00012104858343551089, "loss": 0.6579, "step": 29074 }, { "epoch": 0.7465644714268979, "grad_norm": 0.72265625, "learning_rate": 0.0001210442192247742, "loss": 0.713, "step": 29075 }, { "epoch": 0.7465901486228197, "grad_norm": 0.74609375, "learning_rate": 0.00012103985497209826, "loss": 0.7369, "step": 29076 }, { "epoch": 0.7466158258187414, "grad_norm": 0.7578125, "learning_rate": 0.00012103549067749185, "loss": 0.69, "step": 29077 }, { "epoch": 0.7466415030146633, "grad_norm": 0.79296875, "learning_rate": 0.00012103112634096354, "loss": 0.9809, "step": 29078 }, { "epoch": 0.7466671802105851, "grad_norm": 0.796875, "learning_rate": 0.00012102676196252213, "loss": 0.8245, "step": 29079 }, { "epoch": 0.7466928574065069, "grad_norm": 0.70703125, "learning_rate": 0.00012102239754217628, "loss": 0.7205, "step": 29080 }, { "epoch": 0.7467185346024288, "grad_norm": 0.828125, "learning_rate": 0.00012101803307993466, "loss": 0.8925, "step": 29081 }, { "epoch": 0.7467442117983506, "grad_norm": 0.84375, "learning_rate": 0.00012101366857580605, "loss": 0.9112, "step": 29082 }, { "epoch": 0.7467698889942724, "grad_norm": 0.75390625, "learning_rate": 0.00012100930402979906, "loss": 0.8562, "step": 29083 }, { "epoch": 0.7467955661901942, "grad_norm": 0.78125, "learning_rate": 0.00012100493944192246, "loss": 0.8213, "step": 29084 }, { "epoch": 0.746821243386116, "grad_norm": 0.7734375, "learning_rate": 0.00012100057481218487, "loss": 0.8185, "step": 29085 }, { "epoch": 0.7468469205820378, "grad_norm": 0.7734375, "learning_rate": 0.00012099621014059503, "loss": 0.8432, "step": 29086 }, { "epoch": 0.7468725977779597, "grad_norm": 0.75, "learning_rate": 0.00012099184542716169, "loss": 0.744, "step": 29087 }, { "epoch": 0.7468982749738815, "grad_norm": 0.73046875, "learning_rate": 0.00012098748067189345, "loss": 0.8802, "step": 29088 }, { "epoch": 0.7469239521698033, "grad_norm": 0.77734375, "learning_rate": 0.00012098311587479907, "loss": 0.7565, "step": 29089 }, { "epoch": 0.7469496293657251, "grad_norm": 0.68359375, "learning_rate": 0.00012097875103588727, "loss": 0.8472, "step": 29090 }, { "epoch": 0.7469753065616469, "grad_norm": 0.79296875, "learning_rate": 0.00012097438615516665, "loss": 0.8682, "step": 29091 }, { "epoch": 0.7470009837575687, "grad_norm": 0.765625, "learning_rate": 0.00012097002123264603, "loss": 0.7984, "step": 29092 }, { "epoch": 0.7470266609534906, "grad_norm": 0.7890625, "learning_rate": 0.00012096565626833403, "loss": 0.8506, "step": 29093 }, { "epoch": 0.7470523381494124, "grad_norm": 0.8125, "learning_rate": 0.00012096129126223937, "loss": 0.8541, "step": 29094 }, { "epoch": 0.7470780153453342, "grad_norm": 0.7421875, "learning_rate": 0.00012095692621437074, "loss": 0.6858, "step": 29095 }, { "epoch": 0.7471036925412561, "grad_norm": 0.76171875, "learning_rate": 0.00012095256112473685, "loss": 0.8003, "step": 29096 }, { "epoch": 0.7471293697371778, "grad_norm": 0.828125, "learning_rate": 0.00012094819599334641, "loss": 0.8833, "step": 29097 }, { "epoch": 0.7471550469330996, "grad_norm": 0.80078125, "learning_rate": 0.00012094383082020812, "loss": 0.9121, "step": 29098 }, { "epoch": 0.7471807241290215, "grad_norm": 0.75, "learning_rate": 0.00012093946560533065, "loss": 0.8491, "step": 29099 }, { "epoch": 0.7472064013249433, "grad_norm": 0.6953125, "learning_rate": 0.0001209351003487227, "loss": 0.7705, "step": 29100 }, { "epoch": 0.7472320785208652, "grad_norm": 0.734375, "learning_rate": 0.00012093073505039303, "loss": 0.7934, "step": 29101 }, { "epoch": 0.747257755716787, "grad_norm": 0.74609375, "learning_rate": 0.00012092636971035025, "loss": 0.869, "step": 29102 }, { "epoch": 0.7472834329127088, "grad_norm": 0.796875, "learning_rate": 0.00012092200432860314, "loss": 0.7442, "step": 29103 }, { "epoch": 0.7473091101086305, "grad_norm": 0.78125, "learning_rate": 0.00012091763890516037, "loss": 0.9381, "step": 29104 }, { "epoch": 0.7473347873045524, "grad_norm": 0.82421875, "learning_rate": 0.00012091327344003059, "loss": 0.8262, "step": 29105 }, { "epoch": 0.7473604645004742, "grad_norm": 0.7421875, "learning_rate": 0.00012090890793322258, "loss": 0.7763, "step": 29106 }, { "epoch": 0.7473861416963961, "grad_norm": 0.76171875, "learning_rate": 0.00012090454238474498, "loss": 0.8319, "step": 29107 }, { "epoch": 0.7474118188923179, "grad_norm": 0.7734375, "learning_rate": 0.00012090017679460653, "loss": 0.9221, "step": 29108 }, { "epoch": 0.7474374960882397, "grad_norm": 0.8203125, "learning_rate": 0.00012089581116281591, "loss": 0.9828, "step": 29109 }, { "epoch": 0.7474631732841615, "grad_norm": 0.7265625, "learning_rate": 0.00012089144548938185, "loss": 0.7177, "step": 29110 }, { "epoch": 0.7474888504800833, "grad_norm": 0.796875, "learning_rate": 0.00012088707977431302, "loss": 0.928, "step": 29111 }, { "epoch": 0.7475145276760051, "grad_norm": 0.86328125, "learning_rate": 0.00012088271401761813, "loss": 0.8537, "step": 29112 }, { "epoch": 0.747540204871927, "grad_norm": 0.78125, "learning_rate": 0.00012087834821930584, "loss": 0.8757, "step": 29113 }, { "epoch": 0.7475658820678488, "grad_norm": 0.86328125, "learning_rate": 0.00012087398237938492, "loss": 0.8144, "step": 29114 }, { "epoch": 0.7475915592637706, "grad_norm": 0.86328125, "learning_rate": 0.00012086961649786404, "loss": 0.913, "step": 29115 }, { "epoch": 0.7476172364596925, "grad_norm": 0.8046875, "learning_rate": 0.00012086525057475188, "loss": 1.0232, "step": 29116 }, { "epoch": 0.7476429136556142, "grad_norm": 0.7421875, "learning_rate": 0.00012086088461005718, "loss": 0.8063, "step": 29117 }, { "epoch": 0.747668590851536, "grad_norm": 0.72265625, "learning_rate": 0.0001208565186037886, "loss": 0.8103, "step": 29118 }, { "epoch": 0.7476942680474579, "grad_norm": 0.80859375, "learning_rate": 0.00012085215255595486, "loss": 0.8914, "step": 29119 }, { "epoch": 0.7477199452433797, "grad_norm": 0.76953125, "learning_rate": 0.0001208477864665647, "loss": 0.7988, "step": 29120 }, { "epoch": 0.7477456224393015, "grad_norm": 0.7265625, "learning_rate": 0.00012084342033562672, "loss": 0.9089, "step": 29121 }, { "epoch": 0.7477712996352234, "grad_norm": 1.03125, "learning_rate": 0.00012083905416314974, "loss": 0.8647, "step": 29122 }, { "epoch": 0.7477969768311451, "grad_norm": 0.76171875, "learning_rate": 0.00012083468794914241, "loss": 0.8203, "step": 29123 }, { "epoch": 0.7478226540270669, "grad_norm": 0.87109375, "learning_rate": 0.00012083032169361342, "loss": 0.8392, "step": 29124 }, { "epoch": 0.7478483312229888, "grad_norm": 0.75390625, "learning_rate": 0.00012082595539657146, "loss": 0.8454, "step": 29125 }, { "epoch": 0.7478740084189106, "grad_norm": 0.74609375, "learning_rate": 0.00012082158905802526, "loss": 0.8073, "step": 29126 }, { "epoch": 0.7478996856148324, "grad_norm": 0.71875, "learning_rate": 0.00012081722267798352, "loss": 0.7803, "step": 29127 }, { "epoch": 0.7479253628107543, "grad_norm": 0.86328125, "learning_rate": 0.00012081285625645495, "loss": 0.8851, "step": 29128 }, { "epoch": 0.7479510400066761, "grad_norm": 0.8828125, "learning_rate": 0.0001208084897934482, "loss": 0.8979, "step": 29129 }, { "epoch": 0.7479767172025978, "grad_norm": 0.75, "learning_rate": 0.00012080412328897205, "loss": 0.7674, "step": 29130 }, { "epoch": 0.7480023943985197, "grad_norm": 0.77734375, "learning_rate": 0.00012079975674303514, "loss": 0.8659, "step": 29131 }, { "epoch": 0.7480280715944415, "grad_norm": 0.76953125, "learning_rate": 0.0001207953901556462, "loss": 0.8999, "step": 29132 }, { "epoch": 0.7480537487903633, "grad_norm": 0.78515625, "learning_rate": 0.00012079102352681392, "loss": 0.7333, "step": 29133 }, { "epoch": 0.7480794259862852, "grad_norm": 0.69140625, "learning_rate": 0.000120786656856547, "loss": 0.7341, "step": 29134 }, { "epoch": 0.748105103182207, "grad_norm": 0.75, "learning_rate": 0.00012078229014485418, "loss": 0.8, "step": 29135 }, { "epoch": 0.7481307803781289, "grad_norm": 0.78515625, "learning_rate": 0.00012077792339174412, "loss": 0.8454, "step": 29136 }, { "epoch": 0.7481564575740506, "grad_norm": 0.7734375, "learning_rate": 0.00012077355659722553, "loss": 0.755, "step": 29137 }, { "epoch": 0.7481821347699724, "grad_norm": 0.76171875, "learning_rate": 0.00012076918976130713, "loss": 0.8459, "step": 29138 }, { "epoch": 0.7482078119658943, "grad_norm": 0.8671875, "learning_rate": 0.00012076482288399758, "loss": 0.8307, "step": 29139 }, { "epoch": 0.7482334891618161, "grad_norm": 0.89453125, "learning_rate": 0.00012076045596530564, "loss": 0.9164, "step": 29140 }, { "epoch": 0.7482591663577379, "grad_norm": 0.77734375, "learning_rate": 0.00012075608900523998, "loss": 0.8372, "step": 29141 }, { "epoch": 0.7482848435536598, "grad_norm": 0.8828125, "learning_rate": 0.00012075172200380929, "loss": 0.8917, "step": 29142 }, { "epoch": 0.7483105207495815, "grad_norm": 0.75, "learning_rate": 0.00012074735496102234, "loss": 0.7565, "step": 29143 }, { "epoch": 0.7483361979455033, "grad_norm": 0.76953125, "learning_rate": 0.00012074298787688776, "loss": 0.9182, "step": 29144 }, { "epoch": 0.7483618751414252, "grad_norm": 0.75390625, "learning_rate": 0.00012073862075141427, "loss": 0.7273, "step": 29145 }, { "epoch": 0.748387552337347, "grad_norm": 0.7265625, "learning_rate": 0.00012073425358461059, "loss": 0.7853, "step": 29146 }, { "epoch": 0.7484132295332688, "grad_norm": 0.71484375, "learning_rate": 0.00012072988637648541, "loss": 0.8137, "step": 29147 }, { "epoch": 0.7484389067291907, "grad_norm": 0.78515625, "learning_rate": 0.00012072551912704744, "loss": 0.6577, "step": 29148 }, { "epoch": 0.7484645839251125, "grad_norm": 0.73046875, "learning_rate": 0.00012072115183630541, "loss": 0.8115, "step": 29149 }, { "epoch": 0.7484902611210342, "grad_norm": 0.80859375, "learning_rate": 0.00012071678450426796, "loss": 0.763, "step": 29150 }, { "epoch": 0.7485159383169561, "grad_norm": 0.84765625, "learning_rate": 0.00012071241713094386, "loss": 0.7527, "step": 29151 }, { "epoch": 0.7485416155128779, "grad_norm": 0.75390625, "learning_rate": 0.00012070804971634176, "loss": 0.7193, "step": 29152 }, { "epoch": 0.7485672927087997, "grad_norm": 0.7734375, "learning_rate": 0.00012070368226047037, "loss": 0.8556, "step": 29153 }, { "epoch": 0.7485929699047216, "grad_norm": 0.84765625, "learning_rate": 0.00012069931476333844, "loss": 1.0043, "step": 29154 }, { "epoch": 0.7486186471006434, "grad_norm": 0.75390625, "learning_rate": 0.00012069494722495465, "loss": 0.7657, "step": 29155 }, { "epoch": 0.7486443242965652, "grad_norm": 0.75390625, "learning_rate": 0.00012069057964532768, "loss": 0.7769, "step": 29156 }, { "epoch": 0.748670001492487, "grad_norm": 0.8046875, "learning_rate": 0.00012068621202446628, "loss": 0.8746, "step": 29157 }, { "epoch": 0.7486956786884088, "grad_norm": 0.82421875, "learning_rate": 0.00012068184436237911, "loss": 0.8141, "step": 29158 }, { "epoch": 0.7487213558843306, "grad_norm": 0.765625, "learning_rate": 0.00012067747665907489, "loss": 0.6913, "step": 29159 }, { "epoch": 0.7487470330802525, "grad_norm": 0.80078125, "learning_rate": 0.00012067310891456233, "loss": 0.79, "step": 29160 }, { "epoch": 0.7487727102761743, "grad_norm": 0.82421875, "learning_rate": 0.00012066874112885012, "loss": 0.9181, "step": 29161 }, { "epoch": 0.7487983874720961, "grad_norm": 0.7890625, "learning_rate": 0.00012066437330194702, "loss": 0.8998, "step": 29162 }, { "epoch": 0.7488240646680179, "grad_norm": 0.8046875, "learning_rate": 0.00012066000543386163, "loss": 0.8666, "step": 29163 }, { "epoch": 0.7488497418639397, "grad_norm": 0.83203125, "learning_rate": 0.00012065563752460276, "loss": 0.9453, "step": 29164 }, { "epoch": 0.7488754190598615, "grad_norm": 0.80078125, "learning_rate": 0.00012065126957417905, "loss": 0.7695, "step": 29165 }, { "epoch": 0.7489010962557834, "grad_norm": 0.76171875, "learning_rate": 0.00012064690158259921, "loss": 0.8083, "step": 29166 }, { "epoch": 0.7489267734517052, "grad_norm": 0.80078125, "learning_rate": 0.00012064253354987198, "loss": 0.7965, "step": 29167 }, { "epoch": 0.748952450647627, "grad_norm": 0.76953125, "learning_rate": 0.00012063816547600606, "loss": 0.7606, "step": 29168 }, { "epoch": 0.7489781278435489, "grad_norm": 0.828125, "learning_rate": 0.00012063379736101013, "loss": 0.9127, "step": 29169 }, { "epoch": 0.7490038050394706, "grad_norm": 0.78515625, "learning_rate": 0.00012062942920489291, "loss": 0.868, "step": 29170 }, { "epoch": 0.7490294822353925, "grad_norm": 0.78125, "learning_rate": 0.00012062506100766306, "loss": 0.8451, "step": 29171 }, { "epoch": 0.7490551594313143, "grad_norm": 0.74609375, "learning_rate": 0.00012062069276932936, "loss": 0.8197, "step": 29172 }, { "epoch": 0.7490808366272361, "grad_norm": 0.796875, "learning_rate": 0.0001206163244899005, "loss": 0.9657, "step": 29173 }, { "epoch": 0.749106513823158, "grad_norm": 0.7734375, "learning_rate": 0.00012061195616938513, "loss": 0.9224, "step": 29174 }, { "epoch": 0.7491321910190798, "grad_norm": 0.7734375, "learning_rate": 0.000120607587807792, "loss": 0.8121, "step": 29175 }, { "epoch": 0.7491578682150016, "grad_norm": 0.8046875, "learning_rate": 0.00012060321940512984, "loss": 0.8234, "step": 29176 }, { "epoch": 0.7491835454109234, "grad_norm": 0.7265625, "learning_rate": 0.00012059885096140731, "loss": 0.8342, "step": 29177 }, { "epoch": 0.7492092226068452, "grad_norm": 0.7734375, "learning_rate": 0.00012059448247663311, "loss": 0.9134, "step": 29178 }, { "epoch": 0.749234899802767, "grad_norm": 0.7578125, "learning_rate": 0.00012059011395081598, "loss": 1.0274, "step": 29179 }, { "epoch": 0.7492605769986889, "grad_norm": 0.81640625, "learning_rate": 0.00012058574538396463, "loss": 0.7888, "step": 29180 }, { "epoch": 0.7492862541946107, "grad_norm": 0.94140625, "learning_rate": 0.00012058137677608772, "loss": 0.862, "step": 29181 }, { "epoch": 0.7493119313905325, "grad_norm": 0.75, "learning_rate": 0.00012057700812719401, "loss": 0.6983, "step": 29182 }, { "epoch": 0.7493376085864543, "grad_norm": 0.76171875, "learning_rate": 0.00012057263943729218, "loss": 0.8728, "step": 29183 }, { "epoch": 0.7493632857823761, "grad_norm": 0.765625, "learning_rate": 0.00012056827070639091, "loss": 0.8994, "step": 29184 }, { "epoch": 0.7493889629782979, "grad_norm": 0.7421875, "learning_rate": 0.00012056390193449896, "loss": 0.8543, "step": 29185 }, { "epoch": 0.7494146401742198, "grad_norm": 0.79296875, "learning_rate": 0.00012055953312162501, "loss": 0.9872, "step": 29186 }, { "epoch": 0.7494403173701416, "grad_norm": 0.875, "learning_rate": 0.00012055516426777775, "loss": 0.7844, "step": 29187 }, { "epoch": 0.7494659945660634, "grad_norm": 0.80859375, "learning_rate": 0.00012055079537296592, "loss": 0.8821, "step": 29188 }, { "epoch": 0.7494916717619853, "grad_norm": 0.8515625, "learning_rate": 0.00012054642643719822, "loss": 0.8975, "step": 29189 }, { "epoch": 0.749517348957907, "grad_norm": 0.7265625, "learning_rate": 0.00012054205746048332, "loss": 0.8307, "step": 29190 }, { "epoch": 0.7495430261538288, "grad_norm": 0.79296875, "learning_rate": 0.00012053768844282998, "loss": 0.7736, "step": 29191 }, { "epoch": 0.7495687033497507, "grad_norm": 0.78515625, "learning_rate": 0.00012053331938424688, "loss": 0.7424, "step": 29192 }, { "epoch": 0.7495943805456725, "grad_norm": 0.75390625, "learning_rate": 0.0001205289502847427, "loss": 0.7836, "step": 29193 }, { "epoch": 0.7496200577415943, "grad_norm": 0.7578125, "learning_rate": 0.00012052458114432621, "loss": 0.7344, "step": 29194 }, { "epoch": 0.7496457349375162, "grad_norm": 0.72265625, "learning_rate": 0.00012052021196300607, "loss": 0.8195, "step": 29195 }, { "epoch": 0.749671412133438, "grad_norm": 0.71484375, "learning_rate": 0.00012051584274079102, "loss": 0.7834, "step": 29196 }, { "epoch": 0.7496970893293597, "grad_norm": 0.69140625, "learning_rate": 0.00012051147347768971, "loss": 0.7353, "step": 29197 }, { "epoch": 0.7497227665252816, "grad_norm": 0.80078125, "learning_rate": 0.00012050710417371092, "loss": 0.7775, "step": 29198 }, { "epoch": 0.7497484437212034, "grad_norm": 0.76171875, "learning_rate": 0.00012050273482886329, "loss": 0.9002, "step": 29199 }, { "epoch": 0.7497741209171253, "grad_norm": 0.7578125, "learning_rate": 0.0001204983654431556, "loss": 0.7365, "step": 29200 }, { "epoch": 0.7497997981130471, "grad_norm": 0.8046875, "learning_rate": 0.0001204939960165965, "loss": 0.972, "step": 29201 }, { "epoch": 0.7498254753089689, "grad_norm": 0.7578125, "learning_rate": 0.00012048962654919473, "loss": 0.6976, "step": 29202 }, { "epoch": 0.7498511525048907, "grad_norm": 0.87890625, "learning_rate": 0.00012048525704095896, "loss": 0.8622, "step": 29203 }, { "epoch": 0.7498768297008125, "grad_norm": 0.796875, "learning_rate": 0.00012048088749189794, "loss": 0.8046, "step": 29204 }, { "epoch": 0.7499025068967343, "grad_norm": 0.75, "learning_rate": 0.00012047651790202037, "loss": 0.8626, "step": 29205 }, { "epoch": 0.7499281840926562, "grad_norm": 0.7578125, "learning_rate": 0.00012047214827133493, "loss": 0.9059, "step": 29206 }, { "epoch": 0.749953861288578, "grad_norm": 0.7734375, "learning_rate": 0.00012046777859985035, "loss": 0.8239, "step": 29207 }, { "epoch": 0.7499795384844998, "grad_norm": 0.828125, "learning_rate": 0.00012046340888757534, "loss": 0.7699, "step": 29208 }, { "epoch": 0.7500052156804217, "grad_norm": 0.80859375, "learning_rate": 0.00012045903913451862, "loss": 1.0981, "step": 29209 }, { "epoch": 0.7500308928763434, "grad_norm": 0.78125, "learning_rate": 0.00012045466934068888, "loss": 0.8401, "step": 29210 }, { "epoch": 0.7500565700722652, "grad_norm": 0.90234375, "learning_rate": 0.00012045029950609481, "loss": 0.8724, "step": 29211 }, { "epoch": 0.7500822472681871, "grad_norm": 0.80078125, "learning_rate": 0.00012044592963074515, "loss": 0.809, "step": 29212 }, { "epoch": 0.7501079244641089, "grad_norm": 0.73046875, "learning_rate": 0.0001204415597146486, "loss": 0.7883, "step": 29213 }, { "epoch": 0.7501336016600307, "grad_norm": 0.77734375, "learning_rate": 0.00012043718975781386, "loss": 0.9797, "step": 29214 }, { "epoch": 0.7501592788559526, "grad_norm": 0.78515625, "learning_rate": 0.00012043281976024967, "loss": 0.8734, "step": 29215 }, { "epoch": 0.7501849560518744, "grad_norm": 0.765625, "learning_rate": 0.0001204284497219647, "loss": 0.7778, "step": 29216 }, { "epoch": 0.7502106332477961, "grad_norm": 0.73828125, "learning_rate": 0.00012042407964296767, "loss": 0.7502, "step": 29217 }, { "epoch": 0.750236310443718, "grad_norm": 0.82421875, "learning_rate": 0.00012041970952326732, "loss": 0.8578, "step": 29218 }, { "epoch": 0.7502619876396398, "grad_norm": 0.72265625, "learning_rate": 0.00012041533936287228, "loss": 0.8104, "step": 29219 }, { "epoch": 0.7502876648355616, "grad_norm": 0.734375, "learning_rate": 0.00012041096916179135, "loss": 0.7381, "step": 29220 }, { "epoch": 0.7503133420314835, "grad_norm": 0.7265625, "learning_rate": 0.00012040659892003321, "loss": 0.6719, "step": 29221 }, { "epoch": 0.7503390192274053, "grad_norm": 0.77734375, "learning_rate": 0.00012040222863760655, "loss": 0.9064, "step": 29222 }, { "epoch": 0.750364696423327, "grad_norm": 0.75, "learning_rate": 0.0001203978583145201, "loss": 0.9081, "step": 29223 }, { "epoch": 0.7503903736192489, "grad_norm": 0.828125, "learning_rate": 0.00012039348795078253, "loss": 0.8567, "step": 29224 }, { "epoch": 0.7504160508151707, "grad_norm": 0.81640625, "learning_rate": 0.00012038911754640261, "loss": 0.7224, "step": 29225 }, { "epoch": 0.7504417280110925, "grad_norm": 0.73046875, "learning_rate": 0.00012038474710138903, "loss": 0.8211, "step": 29226 }, { "epoch": 0.7504674052070144, "grad_norm": 0.75, "learning_rate": 0.00012038037661575045, "loss": 0.7743, "step": 29227 }, { "epoch": 0.7504930824029362, "grad_norm": 0.78515625, "learning_rate": 0.00012037600608949569, "loss": 0.7765, "step": 29228 }, { "epoch": 0.750518759598858, "grad_norm": 0.7578125, "learning_rate": 0.00012037163552263333, "loss": 0.8254, "step": 29229 }, { "epoch": 0.7505444367947798, "grad_norm": 0.80859375, "learning_rate": 0.00012036726491517214, "loss": 0.8486, "step": 29230 }, { "epoch": 0.7505701139907016, "grad_norm": 0.79296875, "learning_rate": 0.00012036289426712087, "loss": 0.8489, "step": 29231 }, { "epoch": 0.7505957911866235, "grad_norm": 0.72265625, "learning_rate": 0.00012035852357848813, "loss": 0.7996, "step": 29232 }, { "epoch": 0.7506214683825453, "grad_norm": 0.77734375, "learning_rate": 0.00012035415284928273, "loss": 0.7691, "step": 29233 }, { "epoch": 0.7506471455784671, "grad_norm": 0.69921875, "learning_rate": 0.00012034978207951336, "loss": 0.7647, "step": 29234 }, { "epoch": 0.750672822774389, "grad_norm": 0.7578125, "learning_rate": 0.0001203454112691887, "loss": 0.9242, "step": 29235 }, { "epoch": 0.7506984999703108, "grad_norm": 0.76953125, "learning_rate": 0.00012034104041831746, "loss": 0.7522, "step": 29236 }, { "epoch": 0.7507241771662325, "grad_norm": 0.7421875, "learning_rate": 0.00012033666952690838, "loss": 0.8771, "step": 29237 }, { "epoch": 0.7507498543621544, "grad_norm": 0.73046875, "learning_rate": 0.00012033229859497013, "loss": 0.7689, "step": 29238 }, { "epoch": 0.7507755315580762, "grad_norm": 0.703125, "learning_rate": 0.00012032792762251147, "loss": 0.8056, "step": 29239 }, { "epoch": 0.750801208753998, "grad_norm": 0.8046875, "learning_rate": 0.00012032355660954108, "loss": 0.8324, "step": 29240 }, { "epoch": 0.7508268859499199, "grad_norm": 0.71484375, "learning_rate": 0.00012031918555606769, "loss": 0.7297, "step": 29241 }, { "epoch": 0.7508525631458417, "grad_norm": 0.85546875, "learning_rate": 0.00012031481446209998, "loss": 0.752, "step": 29242 }, { "epoch": 0.7508782403417634, "grad_norm": 0.76953125, "learning_rate": 0.00012031044332764667, "loss": 0.8061, "step": 29243 }, { "epoch": 0.7509039175376853, "grad_norm": 0.79296875, "learning_rate": 0.0001203060721527165, "loss": 0.706, "step": 29244 }, { "epoch": 0.7509295947336071, "grad_norm": 0.84375, "learning_rate": 0.00012030170093731816, "loss": 1.0384, "step": 29245 }, { "epoch": 0.7509552719295289, "grad_norm": 0.8125, "learning_rate": 0.00012029732968146034, "loss": 0.7782, "step": 29246 }, { "epoch": 0.7509809491254508, "grad_norm": 0.75, "learning_rate": 0.0001202929583851518, "loss": 0.8059, "step": 29247 }, { "epoch": 0.7510066263213726, "grad_norm": 0.8046875, "learning_rate": 0.00012028858704840122, "loss": 0.8944, "step": 29248 }, { "epoch": 0.7510323035172944, "grad_norm": 0.8125, "learning_rate": 0.00012028421567121732, "loss": 0.9224, "step": 29249 }, { "epoch": 0.7510579807132162, "grad_norm": 0.7734375, "learning_rate": 0.00012027984425360883, "loss": 0.7353, "step": 29250 }, { "epoch": 0.751083657909138, "grad_norm": 0.80078125, "learning_rate": 0.00012027547279558442, "loss": 0.8851, "step": 29251 }, { "epoch": 0.7511093351050598, "grad_norm": 0.77734375, "learning_rate": 0.00012027110129715282, "loss": 0.8471, "step": 29252 }, { "epoch": 0.7511350123009817, "grad_norm": 0.77734375, "learning_rate": 0.00012026672975832276, "loss": 0.7939, "step": 29253 }, { "epoch": 0.7511606894969035, "grad_norm": 0.7890625, "learning_rate": 0.00012026235817910294, "loss": 0.7313, "step": 29254 }, { "epoch": 0.7511863666928253, "grad_norm": 0.7578125, "learning_rate": 0.00012025798655950207, "loss": 0.8671, "step": 29255 }, { "epoch": 0.7512120438887472, "grad_norm": 0.8046875, "learning_rate": 0.00012025361489952884, "loss": 0.8958, "step": 29256 }, { "epoch": 0.7512377210846689, "grad_norm": 0.8203125, "learning_rate": 0.00012024924319919199, "loss": 0.7595, "step": 29257 }, { "epoch": 0.7512633982805907, "grad_norm": 0.73828125, "learning_rate": 0.00012024487145850025, "loss": 0.893, "step": 29258 }, { "epoch": 0.7512890754765126, "grad_norm": 0.7578125, "learning_rate": 0.00012024049967746229, "loss": 0.8157, "step": 29259 }, { "epoch": 0.7513147526724344, "grad_norm": 0.78125, "learning_rate": 0.00012023612785608685, "loss": 0.8649, "step": 29260 }, { "epoch": 0.7513404298683563, "grad_norm": 0.76171875, "learning_rate": 0.00012023175599438265, "loss": 0.7129, "step": 29261 }, { "epoch": 0.7513661070642781, "grad_norm": 0.75, "learning_rate": 0.00012022738409235837, "loss": 0.7924, "step": 29262 }, { "epoch": 0.7513917842601998, "grad_norm": 0.82421875, "learning_rate": 0.00012022301215002276, "loss": 0.798, "step": 29263 }, { "epoch": 0.7514174614561216, "grad_norm": 0.75, "learning_rate": 0.00012021864016738447, "loss": 0.8759, "step": 29264 }, { "epoch": 0.7514431386520435, "grad_norm": 0.75390625, "learning_rate": 0.00012021426814445229, "loss": 0.715, "step": 29265 }, { "epoch": 0.7514688158479653, "grad_norm": 0.75, "learning_rate": 0.00012020989608123489, "loss": 0.7702, "step": 29266 }, { "epoch": 0.7514944930438872, "grad_norm": 0.7421875, "learning_rate": 0.00012020552397774099, "loss": 0.7203, "step": 29267 }, { "epoch": 0.751520170239809, "grad_norm": 0.7734375, "learning_rate": 0.00012020115183397936, "loss": 0.8805, "step": 29268 }, { "epoch": 0.7515458474357308, "grad_norm": 0.71484375, "learning_rate": 0.00012019677964995862, "loss": 0.7699, "step": 29269 }, { "epoch": 0.7515715246316526, "grad_norm": 0.7265625, "learning_rate": 0.00012019240742568747, "loss": 0.754, "step": 29270 }, { "epoch": 0.7515972018275744, "grad_norm": 0.7265625, "learning_rate": 0.00012018803516117473, "loss": 0.8499, "step": 29271 }, { "epoch": 0.7516228790234962, "grad_norm": 0.74609375, "learning_rate": 0.00012018366285642903, "loss": 0.9202, "step": 29272 }, { "epoch": 0.7516485562194181, "grad_norm": 0.75, "learning_rate": 0.00012017929051145916, "loss": 0.804, "step": 29273 }, { "epoch": 0.7516742334153399, "grad_norm": 0.75, "learning_rate": 0.00012017491812627377, "loss": 0.7202, "step": 29274 }, { "epoch": 0.7516999106112617, "grad_norm": 0.75390625, "learning_rate": 0.00012017054570088157, "loss": 0.7804, "step": 29275 }, { "epoch": 0.7517255878071836, "grad_norm": 0.79296875, "learning_rate": 0.0001201661732352913, "loss": 0.8128, "step": 29276 }, { "epoch": 0.7517512650031053, "grad_norm": 0.72265625, "learning_rate": 0.00012016180072951168, "loss": 0.7847, "step": 29277 }, { "epoch": 0.7517769421990271, "grad_norm": 0.80859375, "learning_rate": 0.0001201574281835514, "loss": 0.8137, "step": 29278 }, { "epoch": 0.751802619394949, "grad_norm": 0.8203125, "learning_rate": 0.00012015305559741918, "loss": 0.9218, "step": 29279 }, { "epoch": 0.7518282965908708, "grad_norm": 0.7265625, "learning_rate": 0.00012014868297112376, "loss": 0.8287, "step": 29280 }, { "epoch": 0.7518539737867926, "grad_norm": 0.78515625, "learning_rate": 0.00012014431030467384, "loss": 0.7358, "step": 29281 }, { "epoch": 0.7518796509827145, "grad_norm": 0.79296875, "learning_rate": 0.00012013993759807811, "loss": 0.8587, "step": 29282 }, { "epoch": 0.7519053281786362, "grad_norm": 0.80078125, "learning_rate": 0.00012013556485134529, "loss": 0.8703, "step": 29283 }, { "epoch": 0.751931005374558, "grad_norm": 0.7890625, "learning_rate": 0.00012013119206448413, "loss": 0.8381, "step": 29284 }, { "epoch": 0.7519566825704799, "grad_norm": 0.7578125, "learning_rate": 0.00012012681923750332, "loss": 0.8084, "step": 29285 }, { "epoch": 0.7519823597664017, "grad_norm": 0.79296875, "learning_rate": 0.0001201224463704116, "loss": 0.772, "step": 29286 }, { "epoch": 0.7520080369623235, "grad_norm": 0.69921875, "learning_rate": 0.00012011807346321764, "loss": 0.8449, "step": 29287 }, { "epoch": 0.7520337141582454, "grad_norm": 0.828125, "learning_rate": 0.00012011370051593016, "loss": 0.8468, "step": 29288 }, { "epoch": 0.7520593913541672, "grad_norm": 0.7578125, "learning_rate": 0.00012010932752855793, "loss": 0.7869, "step": 29289 }, { "epoch": 0.7520850685500889, "grad_norm": 0.71875, "learning_rate": 0.00012010495450110961, "loss": 0.8776, "step": 29290 }, { "epoch": 0.7521107457460108, "grad_norm": 0.78515625, "learning_rate": 0.0001201005814335939, "loss": 0.8172, "step": 29291 }, { "epoch": 0.7521364229419326, "grad_norm": 0.75390625, "learning_rate": 0.00012009620832601957, "loss": 0.7716, "step": 29292 }, { "epoch": 0.7521621001378545, "grad_norm": 0.80078125, "learning_rate": 0.00012009183517839533, "loss": 0.8182, "step": 29293 }, { "epoch": 0.7521877773337763, "grad_norm": 0.7265625, "learning_rate": 0.00012008746199072987, "loss": 0.8316, "step": 29294 }, { "epoch": 0.7522134545296981, "grad_norm": 0.94140625, "learning_rate": 0.00012008308876303191, "loss": 0.8252, "step": 29295 }, { "epoch": 0.75223913172562, "grad_norm": 0.87890625, "learning_rate": 0.00012007871549531016, "loss": 0.8732, "step": 29296 }, { "epoch": 0.7522648089215417, "grad_norm": 0.8203125, "learning_rate": 0.00012007434218757334, "loss": 0.8923, "step": 29297 }, { "epoch": 0.7522904861174635, "grad_norm": 0.80859375, "learning_rate": 0.0001200699688398302, "loss": 0.8436, "step": 29298 }, { "epoch": 0.7523161633133854, "grad_norm": 0.81640625, "learning_rate": 0.00012006559545208938, "loss": 0.8325, "step": 29299 }, { "epoch": 0.7523418405093072, "grad_norm": 0.796875, "learning_rate": 0.00012006122202435966, "loss": 0.8981, "step": 29300 }, { "epoch": 0.752367517705229, "grad_norm": 0.8125, "learning_rate": 0.00012005684855664974, "loss": 0.8558, "step": 29301 }, { "epoch": 0.7523931949011509, "grad_norm": 0.7890625, "learning_rate": 0.00012005247504896835, "loss": 0.9046, "step": 29302 }, { "epoch": 0.7524188720970726, "grad_norm": 0.828125, "learning_rate": 0.00012004810150132418, "loss": 0.8777, "step": 29303 }, { "epoch": 0.7524445492929944, "grad_norm": 0.8046875, "learning_rate": 0.00012004372791372591, "loss": 0.8223, "step": 29304 }, { "epoch": 0.7524702264889163, "grad_norm": 0.8046875, "learning_rate": 0.00012003935428618233, "loss": 0.8542, "step": 29305 }, { "epoch": 0.7524959036848381, "grad_norm": 0.8984375, "learning_rate": 0.00012003498061870215, "loss": 0.7422, "step": 29306 }, { "epoch": 0.7525215808807599, "grad_norm": 0.78125, "learning_rate": 0.00012003060691129406, "loss": 0.7094, "step": 29307 }, { "epoch": 0.7525472580766818, "grad_norm": 0.69140625, "learning_rate": 0.00012002623316396678, "loss": 0.7492, "step": 29308 }, { "epoch": 0.7525729352726036, "grad_norm": 0.79296875, "learning_rate": 0.00012002185937672898, "loss": 0.833, "step": 29309 }, { "epoch": 0.7525986124685253, "grad_norm": 0.84375, "learning_rate": 0.00012001748554958947, "loss": 0.8604, "step": 29310 }, { "epoch": 0.7526242896644472, "grad_norm": 0.75390625, "learning_rate": 0.00012001311168255691, "loss": 0.8222, "step": 29311 }, { "epoch": 0.752649966860369, "grad_norm": 0.8046875, "learning_rate": 0.00012000873777563999, "loss": 0.7986, "step": 29312 }, { "epoch": 0.7526756440562908, "grad_norm": 0.82421875, "learning_rate": 0.0001200043638288475, "loss": 0.8001, "step": 29313 }, { "epoch": 0.7527013212522127, "grad_norm": 0.80859375, "learning_rate": 0.00011999998984218814, "loss": 0.8068, "step": 29314 }, { "epoch": 0.7527269984481345, "grad_norm": 0.80859375, "learning_rate": 0.00011999561581567058, "loss": 0.7341, "step": 29315 }, { "epoch": 0.7527526756440563, "grad_norm": 0.75, "learning_rate": 0.00011999124174930356, "loss": 0.7443, "step": 29316 }, { "epoch": 0.7527783528399781, "grad_norm": 0.75390625, "learning_rate": 0.00011998686764309581, "loss": 0.7865, "step": 29317 }, { "epoch": 0.7528040300358999, "grad_norm": 0.84765625, "learning_rate": 0.00011998249349705601, "loss": 0.8333, "step": 29318 }, { "epoch": 0.7528297072318217, "grad_norm": 0.76953125, "learning_rate": 0.00011997811931119293, "loss": 0.7594, "step": 29319 }, { "epoch": 0.7528553844277436, "grad_norm": 0.7890625, "learning_rate": 0.00011997374508551528, "loss": 0.8347, "step": 29320 }, { "epoch": 0.7528810616236654, "grad_norm": 0.73828125, "learning_rate": 0.00011996937082003175, "loss": 0.7649, "step": 29321 }, { "epoch": 0.7529067388195873, "grad_norm": 0.6796875, "learning_rate": 0.00011996499651475107, "loss": 0.8573, "step": 29322 }, { "epoch": 0.752932416015509, "grad_norm": 0.86328125, "learning_rate": 0.00011996062216968194, "loss": 0.7392, "step": 29323 }, { "epoch": 0.7529580932114308, "grad_norm": 0.77734375, "learning_rate": 0.0001199562477848331, "loss": 0.8485, "step": 29324 }, { "epoch": 0.7529837704073526, "grad_norm": 0.79296875, "learning_rate": 0.00011995187336021325, "loss": 0.7929, "step": 29325 }, { "epoch": 0.7530094476032745, "grad_norm": 0.77734375, "learning_rate": 0.00011994749889583112, "loss": 0.7877, "step": 29326 }, { "epoch": 0.7530351247991963, "grad_norm": 0.75390625, "learning_rate": 0.00011994312439169546, "loss": 0.7733, "step": 29327 }, { "epoch": 0.7530608019951182, "grad_norm": 0.8125, "learning_rate": 0.0001199387498478149, "loss": 0.7616, "step": 29328 }, { "epoch": 0.75308647919104, "grad_norm": 0.87109375, "learning_rate": 0.00011993437526419826, "loss": 0.8676, "step": 29329 }, { "epoch": 0.7531121563869617, "grad_norm": 0.7421875, "learning_rate": 0.0001199300006408542, "loss": 0.8643, "step": 29330 }, { "epoch": 0.7531378335828836, "grad_norm": 0.73046875, "learning_rate": 0.00011992562597779143, "loss": 0.7532, "step": 29331 }, { "epoch": 0.7531635107788054, "grad_norm": 0.8515625, "learning_rate": 0.0001199212512750187, "loss": 0.8099, "step": 29332 }, { "epoch": 0.7531891879747272, "grad_norm": 0.76953125, "learning_rate": 0.0001199168765325447, "loss": 0.8487, "step": 29333 }, { "epoch": 0.7532148651706491, "grad_norm": 0.91015625, "learning_rate": 0.0001199125017503782, "loss": 0.8071, "step": 29334 }, { "epoch": 0.7532405423665709, "grad_norm": 0.8359375, "learning_rate": 0.00011990812692852786, "loss": 0.9166, "step": 29335 }, { "epoch": 0.7532662195624926, "grad_norm": 0.765625, "learning_rate": 0.00011990375206700242, "loss": 0.9159, "step": 29336 }, { "epoch": 0.7532918967584145, "grad_norm": 0.80859375, "learning_rate": 0.00011989937716581061, "loss": 0.836, "step": 29337 }, { "epoch": 0.7533175739543363, "grad_norm": 0.78515625, "learning_rate": 0.00011989500222496113, "loss": 0.7281, "step": 29338 }, { "epoch": 0.7533432511502581, "grad_norm": 0.765625, "learning_rate": 0.00011989062724446269, "loss": 0.76, "step": 29339 }, { "epoch": 0.75336892834618, "grad_norm": 0.75, "learning_rate": 0.00011988625222432408, "loss": 0.8437, "step": 29340 }, { "epoch": 0.7533946055421018, "grad_norm": 0.75, "learning_rate": 0.0001198818771645539, "loss": 0.7505, "step": 29341 }, { "epoch": 0.7534202827380236, "grad_norm": 0.796875, "learning_rate": 0.00011987750206516097, "loss": 0.7699, "step": 29342 }, { "epoch": 0.7534459599339454, "grad_norm": 0.7578125, "learning_rate": 0.00011987312692615398, "loss": 0.8462, "step": 29343 }, { "epoch": 0.7534716371298672, "grad_norm": 0.78125, "learning_rate": 0.00011986875174754162, "loss": 0.7153, "step": 29344 }, { "epoch": 0.753497314325789, "grad_norm": 0.77734375, "learning_rate": 0.00011986437652933265, "loss": 0.8903, "step": 29345 }, { "epoch": 0.7535229915217109, "grad_norm": 0.796875, "learning_rate": 0.00011986000127153578, "loss": 0.85, "step": 29346 }, { "epoch": 0.7535486687176327, "grad_norm": 0.76171875, "learning_rate": 0.00011985562597415971, "loss": 0.8546, "step": 29347 }, { "epoch": 0.7535743459135545, "grad_norm": 0.85546875, "learning_rate": 0.00011985125063721319, "loss": 0.9883, "step": 29348 }, { "epoch": 0.7536000231094764, "grad_norm": 0.68359375, "learning_rate": 0.00011984687526070486, "loss": 0.8065, "step": 29349 }, { "epoch": 0.7536257003053981, "grad_norm": 0.75, "learning_rate": 0.00011984249984464355, "loss": 0.6921, "step": 29350 }, { "epoch": 0.7536513775013199, "grad_norm": 0.7734375, "learning_rate": 0.00011983812438903794, "loss": 0.8479, "step": 29351 }, { "epoch": 0.7536770546972418, "grad_norm": 0.703125, "learning_rate": 0.00011983374889389669, "loss": 0.7565, "step": 29352 }, { "epoch": 0.7537027318931636, "grad_norm": 0.80859375, "learning_rate": 0.00011982937335922863, "loss": 0.7751, "step": 29353 }, { "epoch": 0.7537284090890854, "grad_norm": 0.80859375, "learning_rate": 0.0001198249977850424, "loss": 0.8438, "step": 29354 }, { "epoch": 0.7537540862850073, "grad_norm": 0.73046875, "learning_rate": 0.00011982062217134671, "loss": 0.7179, "step": 29355 }, { "epoch": 0.753779763480929, "grad_norm": 0.82421875, "learning_rate": 0.00011981624651815035, "loss": 0.7588, "step": 29356 }, { "epoch": 0.7538054406768508, "grad_norm": 0.86328125, "learning_rate": 0.00011981187082546197, "loss": 0.8889, "step": 29357 }, { "epoch": 0.7538311178727727, "grad_norm": 0.875, "learning_rate": 0.00011980749509329033, "loss": 0.8068, "step": 29358 }, { "epoch": 0.7538567950686945, "grad_norm": 0.78515625, "learning_rate": 0.00011980311932164414, "loss": 0.7722, "step": 29359 }, { "epoch": 0.7538824722646164, "grad_norm": 0.7578125, "learning_rate": 0.00011979874351053215, "loss": 0.7726, "step": 29360 }, { "epoch": 0.7539081494605382, "grad_norm": 0.8671875, "learning_rate": 0.00011979436765996303, "loss": 0.8589, "step": 29361 }, { "epoch": 0.75393382665646, "grad_norm": 0.8203125, "learning_rate": 0.00011978999176994552, "loss": 0.8869, "step": 29362 }, { "epoch": 0.7539595038523818, "grad_norm": 0.72265625, "learning_rate": 0.00011978561584048833, "loss": 0.7433, "step": 29363 }, { "epoch": 0.7539851810483036, "grad_norm": 0.6796875, "learning_rate": 0.00011978123987160021, "loss": 0.736, "step": 29364 }, { "epoch": 0.7540108582442254, "grad_norm": 0.765625, "learning_rate": 0.00011977686386328987, "loss": 0.7286, "step": 29365 }, { "epoch": 0.7540365354401473, "grad_norm": 0.79296875, "learning_rate": 0.00011977248781556605, "loss": 0.8793, "step": 29366 }, { "epoch": 0.7540622126360691, "grad_norm": 0.6875, "learning_rate": 0.00011976811172843742, "loss": 0.7918, "step": 29367 }, { "epoch": 0.7540878898319909, "grad_norm": 0.75390625, "learning_rate": 0.00011976373560191271, "loss": 0.6669, "step": 29368 }, { "epoch": 0.7541135670279128, "grad_norm": 0.76171875, "learning_rate": 0.0001197593594360007, "loss": 0.9133, "step": 29369 }, { "epoch": 0.7541392442238345, "grad_norm": 0.73046875, "learning_rate": 0.00011975498323071006, "loss": 0.8632, "step": 29370 }, { "epoch": 0.7541649214197563, "grad_norm": 0.765625, "learning_rate": 0.0001197506069860495, "loss": 0.7965, "step": 29371 }, { "epoch": 0.7541905986156782, "grad_norm": 0.984375, "learning_rate": 0.00011974623070202777, "loss": 0.8062, "step": 29372 }, { "epoch": 0.7542162758116, "grad_norm": 0.82421875, "learning_rate": 0.00011974185437865362, "loss": 0.9052, "step": 29373 }, { "epoch": 0.7542419530075218, "grad_norm": 0.7265625, "learning_rate": 0.00011973747801593571, "loss": 0.7117, "step": 29374 }, { "epoch": 0.7542676302034437, "grad_norm": 0.76953125, "learning_rate": 0.00011973310161388281, "loss": 0.7487, "step": 29375 }, { "epoch": 0.7542933073993654, "grad_norm": 0.7578125, "learning_rate": 0.00011972872517250358, "loss": 0.8818, "step": 29376 }, { "epoch": 0.7543189845952872, "grad_norm": 0.70703125, "learning_rate": 0.0001197243486918068, "loss": 0.7754, "step": 29377 }, { "epoch": 0.7543446617912091, "grad_norm": 0.83984375, "learning_rate": 0.0001197199721718012, "loss": 0.819, "step": 29378 }, { "epoch": 0.7543703389871309, "grad_norm": 0.7734375, "learning_rate": 0.00011971559561249546, "loss": 0.7982, "step": 29379 }, { "epoch": 0.7543960161830527, "grad_norm": 0.73828125, "learning_rate": 0.00011971121901389832, "loss": 0.8345, "step": 29380 }, { "epoch": 0.7544216933789746, "grad_norm": 0.9140625, "learning_rate": 0.00011970684237601848, "loss": 0.8704, "step": 29381 }, { "epoch": 0.7544473705748964, "grad_norm": 0.77734375, "learning_rate": 0.0001197024656988647, "loss": 0.7144, "step": 29382 }, { "epoch": 0.7544730477708181, "grad_norm": 0.8125, "learning_rate": 0.0001196980889824457, "loss": 0.7299, "step": 29383 }, { "epoch": 0.75449872496674, "grad_norm": 0.82421875, "learning_rate": 0.00011969371222677015, "loss": 0.8374, "step": 29384 }, { "epoch": 0.7545244021626618, "grad_norm": 0.80859375, "learning_rate": 0.00011968933543184683, "loss": 0.813, "step": 29385 }, { "epoch": 0.7545500793585836, "grad_norm": 0.8125, "learning_rate": 0.00011968495859768447, "loss": 0.8571, "step": 29386 }, { "epoch": 0.7545757565545055, "grad_norm": 0.83984375, "learning_rate": 0.00011968058172429175, "loss": 0.7733, "step": 29387 }, { "epoch": 0.7546014337504273, "grad_norm": 0.76171875, "learning_rate": 0.00011967620481167741, "loss": 0.7465, "step": 29388 }, { "epoch": 0.7546271109463492, "grad_norm": 0.8515625, "learning_rate": 0.00011967182785985014, "loss": 0.7819, "step": 29389 }, { "epoch": 0.7546527881422709, "grad_norm": 0.74609375, "learning_rate": 0.00011966745086881873, "loss": 0.993, "step": 29390 }, { "epoch": 0.7546784653381927, "grad_norm": 0.75390625, "learning_rate": 0.00011966307383859185, "loss": 0.9212, "step": 29391 }, { "epoch": 0.7547041425341146, "grad_norm": 0.8515625, "learning_rate": 0.00011965869676917823, "loss": 0.8168, "step": 29392 }, { "epoch": 0.7547298197300364, "grad_norm": 0.78515625, "learning_rate": 0.00011965431966058665, "loss": 0.7165, "step": 29393 }, { "epoch": 0.7547554969259582, "grad_norm": 0.9375, "learning_rate": 0.00011964994251282573, "loss": 0.6951, "step": 29394 }, { "epoch": 0.7547811741218801, "grad_norm": 0.7265625, "learning_rate": 0.00011964556532590428, "loss": 0.8789, "step": 29395 }, { "epoch": 0.7548068513178018, "grad_norm": 0.69921875, "learning_rate": 0.000119641188099831, "loss": 0.8058, "step": 29396 }, { "epoch": 0.7548325285137236, "grad_norm": 0.78515625, "learning_rate": 0.00011963681083461457, "loss": 0.7645, "step": 29397 }, { "epoch": 0.7548582057096455, "grad_norm": 0.7890625, "learning_rate": 0.0001196324335302638, "loss": 0.8052, "step": 29398 }, { "epoch": 0.7548838829055673, "grad_norm": 0.859375, "learning_rate": 0.00011962805618678735, "loss": 1.025, "step": 29399 }, { "epoch": 0.7549095601014891, "grad_norm": 0.74609375, "learning_rate": 0.00011962367880419391, "loss": 0.8706, "step": 29400 }, { "epoch": 0.754935237297411, "grad_norm": 0.75, "learning_rate": 0.00011961930138249231, "loss": 0.7582, "step": 29401 }, { "epoch": 0.7549609144933328, "grad_norm": 0.84375, "learning_rate": 0.00011961492392169119, "loss": 0.8809, "step": 29402 }, { "epoch": 0.7549865916892545, "grad_norm": 0.7734375, "learning_rate": 0.00011961054642179928, "loss": 0.7397, "step": 29403 }, { "epoch": 0.7550122688851764, "grad_norm": 0.79296875, "learning_rate": 0.00011960616888282535, "loss": 0.8694, "step": 29404 }, { "epoch": 0.7550379460810982, "grad_norm": 0.83203125, "learning_rate": 0.00011960179130477809, "loss": 0.885, "step": 29405 }, { "epoch": 0.75506362327702, "grad_norm": 0.80859375, "learning_rate": 0.00011959741368766621, "loss": 1.0298, "step": 29406 }, { "epoch": 0.7550893004729419, "grad_norm": 0.7890625, "learning_rate": 0.00011959303603149847, "loss": 0.7637, "step": 29407 }, { "epoch": 0.7551149776688637, "grad_norm": 0.78125, "learning_rate": 0.00011958865833628357, "loss": 0.7529, "step": 29408 }, { "epoch": 0.7551406548647855, "grad_norm": 0.80859375, "learning_rate": 0.00011958428060203026, "loss": 0.831, "step": 29409 }, { "epoch": 0.7551663320607073, "grad_norm": 0.87109375, "learning_rate": 0.00011957990282874724, "loss": 0.8588, "step": 29410 }, { "epoch": 0.7551920092566291, "grad_norm": 0.75, "learning_rate": 0.00011957552501644325, "loss": 0.8233, "step": 29411 }, { "epoch": 0.7552176864525509, "grad_norm": 0.7421875, "learning_rate": 0.000119571147165127, "loss": 0.81, "step": 29412 }, { "epoch": 0.7552433636484728, "grad_norm": 0.71875, "learning_rate": 0.00011956676927480721, "loss": 0.8356, "step": 29413 }, { "epoch": 0.7552690408443946, "grad_norm": 0.82421875, "learning_rate": 0.00011956239134549262, "loss": 0.7696, "step": 29414 }, { "epoch": 0.7552947180403164, "grad_norm": 0.796875, "learning_rate": 0.000119558013377192, "loss": 0.7707, "step": 29415 }, { "epoch": 0.7553203952362382, "grad_norm": 0.72265625, "learning_rate": 0.00011955363536991395, "loss": 0.7359, "step": 29416 }, { "epoch": 0.75534607243216, "grad_norm": 0.73046875, "learning_rate": 0.0001195492573236673, "loss": 0.7758, "step": 29417 }, { "epoch": 0.7553717496280818, "grad_norm": 0.82421875, "learning_rate": 0.00011954487923846076, "loss": 0.7995, "step": 29418 }, { "epoch": 0.7553974268240037, "grad_norm": 0.77734375, "learning_rate": 0.00011954050111430306, "loss": 0.7854, "step": 29419 }, { "epoch": 0.7554231040199255, "grad_norm": 0.8515625, "learning_rate": 0.00011953612295120286, "loss": 0.7997, "step": 29420 }, { "epoch": 0.7554487812158474, "grad_norm": 0.75390625, "learning_rate": 0.00011953174474916896, "loss": 0.7945, "step": 29421 }, { "epoch": 0.7554744584117692, "grad_norm": 0.7890625, "learning_rate": 0.00011952736650821003, "loss": 0.7378, "step": 29422 }, { "epoch": 0.7555001356076909, "grad_norm": 0.80078125, "learning_rate": 0.00011952298822833486, "loss": 0.8366, "step": 29423 }, { "epoch": 0.7555258128036128, "grad_norm": 0.8671875, "learning_rate": 0.0001195186099095521, "loss": 0.8896, "step": 29424 }, { "epoch": 0.7555514899995346, "grad_norm": 0.78125, "learning_rate": 0.00011951423155187055, "loss": 0.8053, "step": 29425 }, { "epoch": 0.7555771671954564, "grad_norm": 0.82421875, "learning_rate": 0.00011950985315529887, "loss": 0.9484, "step": 29426 }, { "epoch": 0.7556028443913783, "grad_norm": 0.75390625, "learning_rate": 0.00011950547471984583, "loss": 0.8017, "step": 29427 }, { "epoch": 0.7556285215873001, "grad_norm": 0.7265625, "learning_rate": 0.00011950109624552015, "loss": 0.793, "step": 29428 }, { "epoch": 0.7556541987832219, "grad_norm": 0.87109375, "learning_rate": 0.00011949671773233051, "loss": 0.7964, "step": 29429 }, { "epoch": 0.7556798759791437, "grad_norm": 0.92578125, "learning_rate": 0.0001194923391802857, "loss": 0.8843, "step": 29430 }, { "epoch": 0.7557055531750655, "grad_norm": 0.859375, "learning_rate": 0.00011948796058939442, "loss": 0.8751, "step": 29431 }, { "epoch": 0.7557312303709873, "grad_norm": 0.828125, "learning_rate": 0.0001194835819596654, "loss": 0.9262, "step": 29432 }, { "epoch": 0.7557569075669092, "grad_norm": 0.734375, "learning_rate": 0.00011947920329110735, "loss": 0.7524, "step": 29433 }, { "epoch": 0.755782584762831, "grad_norm": 0.94140625, "learning_rate": 0.000119474824583729, "loss": 0.9523, "step": 29434 }, { "epoch": 0.7558082619587528, "grad_norm": 0.7734375, "learning_rate": 0.00011947044583753911, "loss": 0.83, "step": 29435 }, { "epoch": 0.7558339391546746, "grad_norm": 0.796875, "learning_rate": 0.00011946606705254635, "loss": 0.9, "step": 29436 }, { "epoch": 0.7558596163505964, "grad_norm": 0.734375, "learning_rate": 0.00011946168822875948, "loss": 0.6987, "step": 29437 }, { "epoch": 0.7558852935465182, "grad_norm": 0.74609375, "learning_rate": 0.00011945730936618724, "loss": 0.8154, "step": 29438 }, { "epoch": 0.7559109707424401, "grad_norm": 0.76171875, "learning_rate": 0.00011945293046483836, "loss": 0.9294, "step": 29439 }, { "epoch": 0.7559366479383619, "grad_norm": 0.73046875, "learning_rate": 0.0001194485515247215, "loss": 0.6925, "step": 29440 }, { "epoch": 0.7559623251342837, "grad_norm": 0.78125, "learning_rate": 0.00011944417254584545, "loss": 0.773, "step": 29441 }, { "epoch": 0.7559880023302056, "grad_norm": 0.78125, "learning_rate": 0.00011943979352821894, "loss": 0.9402, "step": 29442 }, { "epoch": 0.7560136795261273, "grad_norm": 0.8046875, "learning_rate": 0.00011943541447185065, "loss": 0.7383, "step": 29443 }, { "epoch": 0.7560393567220491, "grad_norm": 0.77734375, "learning_rate": 0.00011943103537674933, "loss": 0.7609, "step": 29444 }, { "epoch": 0.756065033917971, "grad_norm": 0.89453125, "learning_rate": 0.00011942665624292375, "loss": 0.9369, "step": 29445 }, { "epoch": 0.7560907111138928, "grad_norm": 0.75390625, "learning_rate": 0.00011942227707038258, "loss": 0.7812, "step": 29446 }, { "epoch": 0.7561163883098146, "grad_norm": 0.76171875, "learning_rate": 0.00011941789785913457, "loss": 0.7937, "step": 29447 }, { "epoch": 0.7561420655057365, "grad_norm": 0.7109375, "learning_rate": 0.0001194135186091884, "loss": 0.829, "step": 29448 }, { "epoch": 0.7561677427016583, "grad_norm": 0.83203125, "learning_rate": 0.00011940913932055289, "loss": 0.8894, "step": 29449 }, { "epoch": 0.75619341989758, "grad_norm": 0.83984375, "learning_rate": 0.00011940475999323673, "loss": 0.7825, "step": 29450 }, { "epoch": 0.7562190970935019, "grad_norm": 0.73046875, "learning_rate": 0.0001194003806272486, "loss": 0.6487, "step": 29451 }, { "epoch": 0.7562447742894237, "grad_norm": 0.87109375, "learning_rate": 0.0001193960012225973, "loss": 0.8759, "step": 29452 }, { "epoch": 0.7562704514853456, "grad_norm": 0.67578125, "learning_rate": 0.00011939162177929146, "loss": 0.652, "step": 29453 }, { "epoch": 0.7562961286812674, "grad_norm": 0.6953125, "learning_rate": 0.00011938724229733993, "loss": 0.7781, "step": 29454 }, { "epoch": 0.7563218058771892, "grad_norm": 0.74609375, "learning_rate": 0.00011938286277675135, "loss": 0.7247, "step": 29455 }, { "epoch": 0.756347483073111, "grad_norm": 0.80859375, "learning_rate": 0.00011937848321753446, "loss": 0.9183, "step": 29456 }, { "epoch": 0.7563731602690328, "grad_norm": 0.765625, "learning_rate": 0.00011937410361969803, "loss": 0.777, "step": 29457 }, { "epoch": 0.7563988374649546, "grad_norm": 0.90234375, "learning_rate": 0.00011936972398325075, "loss": 0.8722, "step": 29458 }, { "epoch": 0.7564245146608765, "grad_norm": 0.859375, "learning_rate": 0.00011936534430820137, "loss": 0.7669, "step": 29459 }, { "epoch": 0.7564501918567983, "grad_norm": 0.8046875, "learning_rate": 0.00011936096459455862, "loss": 0.8477, "step": 29460 }, { "epoch": 0.7564758690527201, "grad_norm": 0.765625, "learning_rate": 0.00011935658484233117, "loss": 0.7809, "step": 29461 }, { "epoch": 0.756501546248642, "grad_norm": 0.8125, "learning_rate": 0.00011935220505152785, "loss": 0.8764, "step": 29462 }, { "epoch": 0.7565272234445637, "grad_norm": 0.80078125, "learning_rate": 0.00011934782522215729, "loss": 0.9189, "step": 29463 }, { "epoch": 0.7565529006404855, "grad_norm": 0.8046875, "learning_rate": 0.00011934344535422828, "loss": 0.9295, "step": 29464 }, { "epoch": 0.7565785778364074, "grad_norm": 0.71875, "learning_rate": 0.00011933906544774953, "loss": 0.799, "step": 29465 }, { "epoch": 0.7566042550323292, "grad_norm": 0.80859375, "learning_rate": 0.00011933468550272977, "loss": 0.7479, "step": 29466 }, { "epoch": 0.756629932228251, "grad_norm": 0.8671875, "learning_rate": 0.00011933030551917772, "loss": 0.8158, "step": 29467 }, { "epoch": 0.7566556094241729, "grad_norm": 0.828125, "learning_rate": 0.00011932592549710213, "loss": 0.8688, "step": 29468 }, { "epoch": 0.7566812866200947, "grad_norm": 0.76953125, "learning_rate": 0.0001193215454365117, "loss": 0.8332, "step": 29469 }, { "epoch": 0.7567069638160164, "grad_norm": 0.828125, "learning_rate": 0.0001193171653374152, "loss": 0.9022, "step": 29470 }, { "epoch": 0.7567326410119383, "grad_norm": 0.73046875, "learning_rate": 0.00011931278519982133, "loss": 0.8279, "step": 29471 }, { "epoch": 0.7567583182078601, "grad_norm": 0.73828125, "learning_rate": 0.00011930840502373881, "loss": 0.7372, "step": 29472 }, { "epoch": 0.7567839954037819, "grad_norm": 0.7890625, "learning_rate": 0.0001193040248091764, "loss": 0.8417, "step": 29473 }, { "epoch": 0.7568096725997038, "grad_norm": 0.7421875, "learning_rate": 0.00011929964455614278, "loss": 0.7956, "step": 29474 }, { "epoch": 0.7568353497956256, "grad_norm": 0.7578125, "learning_rate": 0.00011929526426464672, "loss": 0.8227, "step": 29475 }, { "epoch": 0.7568610269915473, "grad_norm": 0.80859375, "learning_rate": 0.00011929088393469697, "loss": 0.8972, "step": 29476 }, { "epoch": 0.7568867041874692, "grad_norm": 0.703125, "learning_rate": 0.0001192865035663022, "loss": 0.6895, "step": 29477 }, { "epoch": 0.756912381383391, "grad_norm": 0.765625, "learning_rate": 0.0001192821231594712, "loss": 0.7879, "step": 29478 }, { "epoch": 0.7569380585793128, "grad_norm": 0.80078125, "learning_rate": 0.00011927774271421266, "loss": 0.8228, "step": 29479 }, { "epoch": 0.7569637357752347, "grad_norm": 0.8125, "learning_rate": 0.00011927336223053529, "loss": 0.7467, "step": 29480 }, { "epoch": 0.7569894129711565, "grad_norm": 0.8046875, "learning_rate": 0.00011926898170844789, "loss": 0.881, "step": 29481 }, { "epoch": 0.7570150901670784, "grad_norm": 0.75, "learning_rate": 0.00011926460114795912, "loss": 0.9, "step": 29482 }, { "epoch": 0.7570407673630001, "grad_norm": 0.75390625, "learning_rate": 0.00011926022054907775, "loss": 0.8251, "step": 29483 }, { "epoch": 0.7570664445589219, "grad_norm": 0.76953125, "learning_rate": 0.0001192558399118125, "loss": 0.9437, "step": 29484 }, { "epoch": 0.7570921217548437, "grad_norm": 0.77734375, "learning_rate": 0.00011925145923617211, "loss": 0.8841, "step": 29485 }, { "epoch": 0.7571177989507656, "grad_norm": 0.7890625, "learning_rate": 0.00011924707852216532, "loss": 0.7778, "step": 29486 }, { "epoch": 0.7571434761466874, "grad_norm": 0.765625, "learning_rate": 0.00011924269776980081, "loss": 0.912, "step": 29487 }, { "epoch": 0.7571691533426093, "grad_norm": 0.7890625, "learning_rate": 0.00011923831697908733, "loss": 0.8586, "step": 29488 }, { "epoch": 0.7571948305385311, "grad_norm": 0.6875, "learning_rate": 0.00011923393615003365, "loss": 0.587, "step": 29489 }, { "epoch": 0.7572205077344528, "grad_norm": 0.6953125, "learning_rate": 0.00011922955528264847, "loss": 0.6675, "step": 29490 }, { "epoch": 0.7572461849303747, "grad_norm": 0.83203125, "learning_rate": 0.00011922517437694052, "loss": 0.7779, "step": 29491 }, { "epoch": 0.7572718621262965, "grad_norm": 0.7265625, "learning_rate": 0.00011922079343291854, "loss": 0.801, "step": 29492 }, { "epoch": 0.7572975393222183, "grad_norm": 0.7578125, "learning_rate": 0.00011921641245059124, "loss": 0.8574, "step": 29493 }, { "epoch": 0.7573232165181402, "grad_norm": 0.82421875, "learning_rate": 0.00011921203142996735, "loss": 0.8232, "step": 29494 }, { "epoch": 0.757348893714062, "grad_norm": 0.69921875, "learning_rate": 0.00011920765037105566, "loss": 0.6746, "step": 29495 }, { "epoch": 0.7573745709099837, "grad_norm": 0.76953125, "learning_rate": 0.0001192032692738648, "loss": 0.9019, "step": 29496 }, { "epoch": 0.7574002481059056, "grad_norm": 0.875, "learning_rate": 0.0001191988881384036, "loss": 0.8902, "step": 29497 }, { "epoch": 0.7574259253018274, "grad_norm": 0.76171875, "learning_rate": 0.00011919450696468075, "loss": 0.908, "step": 29498 }, { "epoch": 0.7574516024977492, "grad_norm": 0.76953125, "learning_rate": 0.00011919012575270499, "loss": 0.8704, "step": 29499 }, { "epoch": 0.7574772796936711, "grad_norm": 0.6796875, "learning_rate": 0.00011918574450248503, "loss": 0.7494, "step": 29500 }, { "epoch": 0.7575029568895929, "grad_norm": 0.75390625, "learning_rate": 0.0001191813632140296, "loss": 0.8292, "step": 29501 }, { "epoch": 0.7575286340855147, "grad_norm": 0.8203125, "learning_rate": 0.00011917698188734747, "loss": 0.8402, "step": 29502 }, { "epoch": 0.7575543112814365, "grad_norm": 0.796875, "learning_rate": 0.00011917260052244734, "loss": 0.8443, "step": 29503 }, { "epoch": 0.7575799884773583, "grad_norm": 0.828125, "learning_rate": 0.00011916821911933796, "loss": 0.8771, "step": 29504 }, { "epoch": 0.7576056656732801, "grad_norm": 0.77734375, "learning_rate": 0.00011916383767802803, "loss": 0.9069, "step": 29505 }, { "epoch": 0.757631342869202, "grad_norm": 0.75390625, "learning_rate": 0.00011915945619852629, "loss": 0.8096, "step": 29506 }, { "epoch": 0.7576570200651238, "grad_norm": 0.77734375, "learning_rate": 0.0001191550746808415, "loss": 0.7735, "step": 29507 }, { "epoch": 0.7576826972610456, "grad_norm": 0.78515625, "learning_rate": 0.0001191506931249824, "loss": 0.8795, "step": 29508 }, { "epoch": 0.7577083744569675, "grad_norm": 0.828125, "learning_rate": 0.00011914631153095766, "loss": 0.8357, "step": 29509 }, { "epoch": 0.7577340516528892, "grad_norm": 0.796875, "learning_rate": 0.00011914192989877607, "loss": 0.8827, "step": 29510 }, { "epoch": 0.757759728848811, "grad_norm": 0.7421875, "learning_rate": 0.00011913754822844634, "loss": 0.8492, "step": 29511 }, { "epoch": 0.7577854060447329, "grad_norm": 0.8046875, "learning_rate": 0.00011913316651997721, "loss": 0.8865, "step": 29512 }, { "epoch": 0.7578110832406547, "grad_norm": 0.75390625, "learning_rate": 0.00011912878477337741, "loss": 0.7504, "step": 29513 }, { "epoch": 0.7578367604365766, "grad_norm": 0.82421875, "learning_rate": 0.00011912440298865566, "loss": 0.7609, "step": 29514 }, { "epoch": 0.7578624376324984, "grad_norm": 0.765625, "learning_rate": 0.00011912002116582071, "loss": 0.8853, "step": 29515 }, { "epoch": 0.7578881148284201, "grad_norm": 0.8828125, "learning_rate": 0.00011911563930488128, "loss": 0.8292, "step": 29516 }, { "epoch": 0.757913792024342, "grad_norm": 0.8125, "learning_rate": 0.00011911125740584607, "loss": 0.7824, "step": 29517 }, { "epoch": 0.7579394692202638, "grad_norm": 0.79296875, "learning_rate": 0.00011910687546872394, "loss": 0.8226, "step": 29518 }, { "epoch": 0.7579651464161856, "grad_norm": 0.78125, "learning_rate": 0.00011910249349352347, "loss": 0.7966, "step": 29519 }, { "epoch": 0.7579908236121075, "grad_norm": 0.828125, "learning_rate": 0.00011909811148025344, "loss": 0.805, "step": 29520 }, { "epoch": 0.7580165008080293, "grad_norm": 0.8046875, "learning_rate": 0.00011909372942892265, "loss": 0.9018, "step": 29521 }, { "epoch": 0.7580421780039511, "grad_norm": 0.77734375, "learning_rate": 0.00011908934733953974, "loss": 0.6967, "step": 29522 }, { "epoch": 0.7580678551998729, "grad_norm": 0.71484375, "learning_rate": 0.00011908496521211352, "loss": 0.7911, "step": 29523 }, { "epoch": 0.7580935323957947, "grad_norm": 0.8125, "learning_rate": 0.00011908058304665268, "loss": 0.8461, "step": 29524 }, { "epoch": 0.7581192095917165, "grad_norm": 0.77734375, "learning_rate": 0.00011907620084316595, "loss": 0.731, "step": 29525 }, { "epoch": 0.7581448867876384, "grad_norm": 0.8125, "learning_rate": 0.00011907181860166208, "loss": 0.7601, "step": 29526 }, { "epoch": 0.7581705639835602, "grad_norm": 0.8125, "learning_rate": 0.0001190674363221498, "loss": 0.848, "step": 29527 }, { "epoch": 0.758196241179482, "grad_norm": 0.75390625, "learning_rate": 0.00011906305400463782, "loss": 0.8155, "step": 29528 }, { "epoch": 0.7582219183754039, "grad_norm": 0.79296875, "learning_rate": 0.00011905867164913493, "loss": 0.8462, "step": 29529 }, { "epoch": 0.7582475955713256, "grad_norm": 0.734375, "learning_rate": 0.0001190542892556498, "loss": 0.7822, "step": 29530 }, { "epoch": 0.7582732727672474, "grad_norm": 0.7421875, "learning_rate": 0.00011904990682419121, "loss": 0.7813, "step": 29531 }, { "epoch": 0.7582989499631693, "grad_norm": 0.80859375, "learning_rate": 0.0001190455243547679, "loss": 0.727, "step": 29532 }, { "epoch": 0.7583246271590911, "grad_norm": 0.765625, "learning_rate": 0.00011904114184738853, "loss": 0.9176, "step": 29533 }, { "epoch": 0.7583503043550129, "grad_norm": 0.75390625, "learning_rate": 0.0001190367593020619, "loss": 0.7779, "step": 29534 }, { "epoch": 0.7583759815509348, "grad_norm": 0.76171875, "learning_rate": 0.00011903237671879675, "loss": 0.8279, "step": 29535 }, { "epoch": 0.7584016587468565, "grad_norm": 0.828125, "learning_rate": 0.00011902799409760175, "loss": 0.8934, "step": 29536 }, { "epoch": 0.7584273359427783, "grad_norm": 0.80078125, "learning_rate": 0.00011902361143848573, "loss": 0.7396, "step": 29537 }, { "epoch": 0.7584530131387002, "grad_norm": 0.859375, "learning_rate": 0.00011901922874145734, "loss": 0.8849, "step": 29538 }, { "epoch": 0.758478690334622, "grad_norm": 0.73046875, "learning_rate": 0.00011901484600652536, "loss": 0.737, "step": 29539 }, { "epoch": 0.7585043675305438, "grad_norm": 0.7734375, "learning_rate": 0.00011901046323369848, "loss": 0.8118, "step": 29540 }, { "epoch": 0.7585300447264657, "grad_norm": 0.81640625, "learning_rate": 0.00011900608042298548, "loss": 0.7493, "step": 29541 }, { "epoch": 0.7585557219223875, "grad_norm": 0.8046875, "learning_rate": 0.00011900169757439509, "loss": 0.931, "step": 29542 }, { "epoch": 0.7585813991183092, "grad_norm": 0.71484375, "learning_rate": 0.00011899731468793603, "loss": 0.7743, "step": 29543 }, { "epoch": 0.7586070763142311, "grad_norm": 0.80859375, "learning_rate": 0.00011899293176361703, "loss": 0.9863, "step": 29544 }, { "epoch": 0.7586327535101529, "grad_norm": 0.84375, "learning_rate": 0.00011898854880144685, "loss": 0.845, "step": 29545 }, { "epoch": 0.7586584307060747, "grad_norm": 0.78125, "learning_rate": 0.00011898416580143417, "loss": 0.817, "step": 29546 }, { "epoch": 0.7586841079019966, "grad_norm": 0.76171875, "learning_rate": 0.0001189797827635878, "loss": 0.8208, "step": 29547 }, { "epoch": 0.7587097850979184, "grad_norm": 0.765625, "learning_rate": 0.00011897539968791642, "loss": 0.7291, "step": 29548 }, { "epoch": 0.7587354622938401, "grad_norm": 0.796875, "learning_rate": 0.00011897101657442877, "loss": 0.7581, "step": 29549 }, { "epoch": 0.758761139489762, "grad_norm": 0.97265625, "learning_rate": 0.00011896663342313363, "loss": 0.9013, "step": 29550 }, { "epoch": 0.7587868166856838, "grad_norm": 0.91796875, "learning_rate": 0.00011896225023403968, "loss": 0.7185, "step": 29551 }, { "epoch": 0.7588124938816057, "grad_norm": 0.70703125, "learning_rate": 0.0001189578670071557, "loss": 0.7157, "step": 29552 }, { "epoch": 0.7588381710775275, "grad_norm": 0.8125, "learning_rate": 0.00011895348374249038, "loss": 0.7929, "step": 29553 }, { "epoch": 0.7588638482734493, "grad_norm": 0.734375, "learning_rate": 0.00011894910044005248, "loss": 0.8629, "step": 29554 }, { "epoch": 0.7588895254693712, "grad_norm": 0.80859375, "learning_rate": 0.00011894471709985074, "loss": 0.8156, "step": 29555 }, { "epoch": 0.7589152026652929, "grad_norm": 0.84375, "learning_rate": 0.00011894033372189388, "loss": 0.7817, "step": 29556 }, { "epoch": 0.7589408798612147, "grad_norm": 0.83984375, "learning_rate": 0.00011893595030619067, "loss": 0.902, "step": 29557 }, { "epoch": 0.7589665570571366, "grad_norm": 0.76953125, "learning_rate": 0.00011893156685274982, "loss": 0.814, "step": 29558 }, { "epoch": 0.7589922342530584, "grad_norm": 0.8046875, "learning_rate": 0.00011892718336158004, "loss": 0.9633, "step": 29559 }, { "epoch": 0.7590179114489802, "grad_norm": 0.875, "learning_rate": 0.00011892279983269011, "loss": 1.0013, "step": 29560 }, { "epoch": 0.7590435886449021, "grad_norm": 0.7578125, "learning_rate": 0.00011891841626608875, "loss": 0.8655, "step": 29561 }, { "epoch": 0.7590692658408239, "grad_norm": 0.79296875, "learning_rate": 0.00011891403266178467, "loss": 0.732, "step": 29562 }, { "epoch": 0.7590949430367456, "grad_norm": 0.84765625, "learning_rate": 0.00011890964901978667, "loss": 0.8697, "step": 29563 }, { "epoch": 0.7591206202326675, "grad_norm": 0.796875, "learning_rate": 0.00011890526534010345, "loss": 0.8703, "step": 29564 }, { "epoch": 0.7591462974285893, "grad_norm": 0.70703125, "learning_rate": 0.00011890088162274371, "loss": 0.827, "step": 29565 }, { "epoch": 0.7591719746245111, "grad_norm": 0.7578125, "learning_rate": 0.00011889649786771623, "loss": 0.8202, "step": 29566 }, { "epoch": 0.759197651820433, "grad_norm": 0.76171875, "learning_rate": 0.00011889211407502976, "loss": 0.8049, "step": 29567 }, { "epoch": 0.7592233290163548, "grad_norm": 0.91796875, "learning_rate": 0.00011888773024469298, "loss": 0.7869, "step": 29568 }, { "epoch": 0.7592490062122765, "grad_norm": 0.78515625, "learning_rate": 0.00011888334637671467, "loss": 0.8113, "step": 29569 }, { "epoch": 0.7592746834081984, "grad_norm": 0.734375, "learning_rate": 0.00011887896247110357, "loss": 0.7951, "step": 29570 }, { "epoch": 0.7593003606041202, "grad_norm": 0.86328125, "learning_rate": 0.00011887457852786841, "loss": 0.7108, "step": 29571 }, { "epoch": 0.759326037800042, "grad_norm": 0.80078125, "learning_rate": 0.0001188701945470179, "loss": 0.8825, "step": 29572 }, { "epoch": 0.7593517149959639, "grad_norm": 1.1015625, "learning_rate": 0.00011886581052856081, "loss": 0.8004, "step": 29573 }, { "epoch": 0.7593773921918857, "grad_norm": 0.8203125, "learning_rate": 0.00011886142647250584, "loss": 0.8126, "step": 29574 }, { "epoch": 0.7594030693878075, "grad_norm": 0.8359375, "learning_rate": 0.00011885704237886178, "loss": 0.7738, "step": 29575 }, { "epoch": 0.7594287465837293, "grad_norm": 0.828125, "learning_rate": 0.00011885265824763731, "loss": 0.8103, "step": 29576 }, { "epoch": 0.7594544237796511, "grad_norm": 0.7578125, "learning_rate": 0.00011884827407884123, "loss": 0.8488, "step": 29577 }, { "epoch": 0.759480100975573, "grad_norm": 0.87109375, "learning_rate": 0.00011884388987248221, "loss": 0.7994, "step": 29578 }, { "epoch": 0.7595057781714948, "grad_norm": 0.69921875, "learning_rate": 0.00011883950562856902, "loss": 0.7797, "step": 29579 }, { "epoch": 0.7595314553674166, "grad_norm": 0.796875, "learning_rate": 0.00011883512134711043, "loss": 0.8351, "step": 29580 }, { "epoch": 0.7595571325633385, "grad_norm": 0.77734375, "learning_rate": 0.00011883073702811512, "loss": 0.9027, "step": 29581 }, { "epoch": 0.7595828097592603, "grad_norm": 0.8203125, "learning_rate": 0.00011882635267159186, "loss": 0.7516, "step": 29582 }, { "epoch": 0.759608486955182, "grad_norm": 0.75390625, "learning_rate": 0.00011882196827754936, "loss": 0.7091, "step": 29583 }, { "epoch": 0.7596341641511039, "grad_norm": 0.86328125, "learning_rate": 0.0001188175838459964, "loss": 0.9281, "step": 29584 }, { "epoch": 0.7596598413470257, "grad_norm": 0.82421875, "learning_rate": 0.00011881319937694169, "loss": 0.9404, "step": 29585 }, { "epoch": 0.7596855185429475, "grad_norm": 0.796875, "learning_rate": 0.00011880881487039394, "loss": 0.82, "step": 29586 }, { "epoch": 0.7597111957388694, "grad_norm": 0.84765625, "learning_rate": 0.00011880443032636197, "loss": 0.8717, "step": 29587 }, { "epoch": 0.7597368729347912, "grad_norm": 0.86328125, "learning_rate": 0.00011880004574485447, "loss": 0.9435, "step": 29588 }, { "epoch": 0.7597625501307129, "grad_norm": 0.76171875, "learning_rate": 0.00011879566112588012, "loss": 0.7501, "step": 29589 }, { "epoch": 0.7597882273266348, "grad_norm": 0.75, "learning_rate": 0.0001187912764694478, "loss": 0.8388, "step": 29590 }, { "epoch": 0.7598139045225566, "grad_norm": 0.78125, "learning_rate": 0.00011878689177556608, "loss": 0.8916, "step": 29591 }, { "epoch": 0.7598395817184784, "grad_norm": 0.82421875, "learning_rate": 0.00011878250704424383, "loss": 0.9661, "step": 29592 }, { "epoch": 0.7598652589144003, "grad_norm": 0.86328125, "learning_rate": 0.00011877812227548975, "loss": 0.8623, "step": 29593 }, { "epoch": 0.7598909361103221, "grad_norm": 0.7890625, "learning_rate": 0.0001187737374693125, "loss": 0.8944, "step": 29594 }, { "epoch": 0.7599166133062439, "grad_norm": 0.83203125, "learning_rate": 0.00011876935262572095, "loss": 0.7985, "step": 29595 }, { "epoch": 0.7599422905021657, "grad_norm": 1.078125, "learning_rate": 0.00011876496774472377, "loss": 0.7994, "step": 29596 }, { "epoch": 0.7599679676980875, "grad_norm": 0.91015625, "learning_rate": 0.00011876058282632969, "loss": 0.7715, "step": 29597 }, { "epoch": 0.7599936448940093, "grad_norm": 0.80859375, "learning_rate": 0.00011875619787054747, "loss": 0.7804, "step": 29598 }, { "epoch": 0.7600193220899312, "grad_norm": 0.76953125, "learning_rate": 0.00011875181287738583, "loss": 0.8041, "step": 29599 }, { "epoch": 0.760044999285853, "grad_norm": 0.73046875, "learning_rate": 0.00011874742784685353, "loss": 0.7557, "step": 29600 }, { "epoch": 0.7600706764817748, "grad_norm": 0.859375, "learning_rate": 0.00011874304277895931, "loss": 0.7508, "step": 29601 }, { "epoch": 0.7600963536776967, "grad_norm": 0.765625, "learning_rate": 0.00011873865767371186, "loss": 0.9117, "step": 29602 }, { "epoch": 0.7601220308736184, "grad_norm": 0.83984375, "learning_rate": 0.00011873427253112002, "loss": 0.8854, "step": 29603 }, { "epoch": 0.7601477080695402, "grad_norm": 0.80859375, "learning_rate": 0.00011872988735119244, "loss": 0.7361, "step": 29604 }, { "epoch": 0.7601733852654621, "grad_norm": 0.765625, "learning_rate": 0.00011872550213393788, "loss": 0.7512, "step": 29605 }, { "epoch": 0.7601990624613839, "grad_norm": 0.75, "learning_rate": 0.00011872111687936506, "loss": 0.8061, "step": 29606 }, { "epoch": 0.7602247396573057, "grad_norm": 0.7421875, "learning_rate": 0.00011871673158748275, "loss": 0.8257, "step": 29607 }, { "epoch": 0.7602504168532276, "grad_norm": 0.75390625, "learning_rate": 0.00011871234625829973, "loss": 0.8425, "step": 29608 }, { "epoch": 0.7602760940491493, "grad_norm": 0.84375, "learning_rate": 0.00011870796089182467, "loss": 0.876, "step": 29609 }, { "epoch": 0.7603017712450711, "grad_norm": 0.83203125, "learning_rate": 0.00011870357548806635, "loss": 0.918, "step": 29610 }, { "epoch": 0.760327448440993, "grad_norm": 0.81640625, "learning_rate": 0.00011869919004703347, "loss": 0.9158, "step": 29611 }, { "epoch": 0.7603531256369148, "grad_norm": 0.8125, "learning_rate": 0.00011869480456873481, "loss": 0.8913, "step": 29612 }, { "epoch": 0.7603788028328367, "grad_norm": 0.765625, "learning_rate": 0.00011869041905317906, "loss": 0.8553, "step": 29613 }, { "epoch": 0.7604044800287585, "grad_norm": 0.76953125, "learning_rate": 0.00011868603350037502, "loss": 1.0044, "step": 29614 }, { "epoch": 0.7604301572246803, "grad_norm": 0.77734375, "learning_rate": 0.00011868164791033141, "loss": 0.7123, "step": 29615 }, { "epoch": 0.760455834420602, "grad_norm": 0.73046875, "learning_rate": 0.00011867726228305696, "loss": 0.7668, "step": 29616 }, { "epoch": 0.7604815116165239, "grad_norm": 0.72265625, "learning_rate": 0.0001186728766185604, "loss": 0.8511, "step": 29617 }, { "epoch": 0.7605071888124457, "grad_norm": 0.8125, "learning_rate": 0.00011866849091685046, "loss": 0.8067, "step": 29618 }, { "epoch": 0.7605328660083676, "grad_norm": 0.80078125, "learning_rate": 0.00011866410517793594, "loss": 0.9186, "step": 29619 }, { "epoch": 0.7605585432042894, "grad_norm": 0.76171875, "learning_rate": 0.00011865971940182554, "loss": 0.8156, "step": 29620 }, { "epoch": 0.7605842204002112, "grad_norm": 0.76171875, "learning_rate": 0.00011865533358852801, "loss": 0.7884, "step": 29621 }, { "epoch": 0.7606098975961331, "grad_norm": 0.8046875, "learning_rate": 0.00011865094773805207, "loss": 0.7809, "step": 29622 }, { "epoch": 0.7606355747920548, "grad_norm": 0.796875, "learning_rate": 0.00011864656185040648, "loss": 0.8493, "step": 29623 }, { "epoch": 0.7606612519879766, "grad_norm": 0.8359375, "learning_rate": 0.00011864217592559998, "loss": 0.9608, "step": 29624 }, { "epoch": 0.7606869291838985, "grad_norm": 0.7578125, "learning_rate": 0.00011863778996364132, "loss": 0.873, "step": 29625 }, { "epoch": 0.7607126063798203, "grad_norm": 0.75, "learning_rate": 0.00011863340396453919, "loss": 0.8369, "step": 29626 }, { "epoch": 0.7607382835757421, "grad_norm": 0.83984375, "learning_rate": 0.00011862901792830242, "loss": 0.917, "step": 29627 }, { "epoch": 0.760763960771664, "grad_norm": 0.78515625, "learning_rate": 0.00011862463185493966, "loss": 0.9107, "step": 29628 }, { "epoch": 0.7607896379675857, "grad_norm": 0.82421875, "learning_rate": 0.0001186202457444597, "loss": 0.81, "step": 29629 }, { "epoch": 0.7608153151635075, "grad_norm": 0.8515625, "learning_rate": 0.00011861585959687129, "loss": 0.9391, "step": 29630 }, { "epoch": 0.7608409923594294, "grad_norm": 0.8984375, "learning_rate": 0.00011861147341218313, "loss": 0.9255, "step": 29631 }, { "epoch": 0.7608666695553512, "grad_norm": 0.76953125, "learning_rate": 0.00011860708719040401, "loss": 0.7677, "step": 29632 }, { "epoch": 0.760892346751273, "grad_norm": 0.8046875, "learning_rate": 0.00011860270093154264, "loss": 0.9313, "step": 29633 }, { "epoch": 0.7609180239471949, "grad_norm": 1.0234375, "learning_rate": 0.00011859831463560773, "loss": 0.7943, "step": 29634 }, { "epoch": 0.7609437011431167, "grad_norm": 0.7734375, "learning_rate": 0.00011859392830260809, "loss": 0.8114, "step": 29635 }, { "epoch": 0.7609693783390384, "grad_norm": 0.8515625, "learning_rate": 0.00011858954193255245, "loss": 0.7928, "step": 29636 }, { "epoch": 0.7609950555349603, "grad_norm": 0.734375, "learning_rate": 0.00011858515552544951, "loss": 0.8945, "step": 29637 }, { "epoch": 0.7610207327308821, "grad_norm": 0.75, "learning_rate": 0.00011858076908130804, "loss": 0.8845, "step": 29638 }, { "epoch": 0.761046409926804, "grad_norm": 0.7421875, "learning_rate": 0.00011857638260013675, "loss": 0.7678, "step": 29639 }, { "epoch": 0.7610720871227258, "grad_norm": 0.7265625, "learning_rate": 0.00011857199608194443, "loss": 0.7665, "step": 29640 }, { "epoch": 0.7610977643186476, "grad_norm": 0.9140625, "learning_rate": 0.0001185676095267398, "loss": 0.7421, "step": 29641 }, { "epoch": 0.7611234415145695, "grad_norm": 0.90625, "learning_rate": 0.00011856322293453162, "loss": 0.8348, "step": 29642 }, { "epoch": 0.7611491187104912, "grad_norm": 0.84375, "learning_rate": 0.00011855883630532858, "loss": 0.8219, "step": 29643 }, { "epoch": 0.761174795906413, "grad_norm": 0.99609375, "learning_rate": 0.00011855444963913944, "loss": 0.8781, "step": 29644 }, { "epoch": 0.7612004731023349, "grad_norm": 0.765625, "learning_rate": 0.000118550062935973, "loss": 0.6755, "step": 29645 }, { "epoch": 0.7612261502982567, "grad_norm": 0.77734375, "learning_rate": 0.00011854567619583797, "loss": 0.8092, "step": 29646 }, { "epoch": 0.7612518274941785, "grad_norm": 0.78515625, "learning_rate": 0.00011854128941874303, "loss": 0.8459, "step": 29647 }, { "epoch": 0.7612775046901004, "grad_norm": 0.91015625, "learning_rate": 0.000118536902604697, "loss": 0.8118, "step": 29648 }, { "epoch": 0.7613031818860221, "grad_norm": 0.828125, "learning_rate": 0.00011853251575370863, "loss": 0.9656, "step": 29649 }, { "epoch": 0.7613288590819439, "grad_norm": 0.75, "learning_rate": 0.00011852812886578659, "loss": 0.8209, "step": 29650 }, { "epoch": 0.7613545362778658, "grad_norm": 0.921875, "learning_rate": 0.00011852374194093967, "loss": 0.9116, "step": 29651 }, { "epoch": 0.7613802134737876, "grad_norm": 0.73828125, "learning_rate": 0.00011851935497917661, "loss": 0.7842, "step": 29652 }, { "epoch": 0.7614058906697094, "grad_norm": 0.73828125, "learning_rate": 0.00011851496798050615, "loss": 0.7387, "step": 29653 }, { "epoch": 0.7614315678656313, "grad_norm": 0.77734375, "learning_rate": 0.00011851058094493703, "loss": 0.8917, "step": 29654 }, { "epoch": 0.7614572450615531, "grad_norm": 0.83203125, "learning_rate": 0.00011850619387247796, "loss": 0.9528, "step": 29655 }, { "epoch": 0.7614829222574748, "grad_norm": 0.765625, "learning_rate": 0.00011850180676313778, "loss": 0.7974, "step": 29656 }, { "epoch": 0.7615085994533967, "grad_norm": 0.79296875, "learning_rate": 0.00011849741961692515, "loss": 0.821, "step": 29657 }, { "epoch": 0.7615342766493185, "grad_norm": 1.109375, "learning_rate": 0.00011849303243384881, "loss": 0.9151, "step": 29658 }, { "epoch": 0.7615599538452403, "grad_norm": 0.81640625, "learning_rate": 0.00011848864521391754, "loss": 0.942, "step": 29659 }, { "epoch": 0.7615856310411622, "grad_norm": 0.83203125, "learning_rate": 0.00011848425795714009, "loss": 0.8926, "step": 29660 }, { "epoch": 0.761611308237084, "grad_norm": 0.7578125, "learning_rate": 0.00011847987066352515, "loss": 0.877, "step": 29661 }, { "epoch": 0.7616369854330058, "grad_norm": 0.75390625, "learning_rate": 0.00011847548333308153, "loss": 0.8396, "step": 29662 }, { "epoch": 0.7616626626289276, "grad_norm": 0.7421875, "learning_rate": 0.00011847109596581792, "loss": 0.8642, "step": 29663 }, { "epoch": 0.7616883398248494, "grad_norm": 0.75390625, "learning_rate": 0.00011846670856174307, "loss": 0.7017, "step": 29664 }, { "epoch": 0.7617140170207712, "grad_norm": 0.85546875, "learning_rate": 0.00011846232112086577, "loss": 0.966, "step": 29665 }, { "epoch": 0.7617396942166931, "grad_norm": 0.8046875, "learning_rate": 0.00011845793364319468, "loss": 0.8883, "step": 29666 }, { "epoch": 0.7617653714126149, "grad_norm": 0.77734375, "learning_rate": 0.00011845354612873864, "loss": 0.844, "step": 29667 }, { "epoch": 0.7617910486085367, "grad_norm": 0.875, "learning_rate": 0.00011844915857750635, "loss": 0.8149, "step": 29668 }, { "epoch": 0.7618167258044585, "grad_norm": 0.78125, "learning_rate": 0.00011844477098950655, "loss": 0.8961, "step": 29669 }, { "epoch": 0.7618424030003803, "grad_norm": 0.796875, "learning_rate": 0.00011844038336474798, "loss": 0.8397, "step": 29670 }, { "epoch": 0.7618680801963021, "grad_norm": 0.82421875, "learning_rate": 0.0001184359957032394, "loss": 0.8147, "step": 29671 }, { "epoch": 0.761893757392224, "grad_norm": 0.828125, "learning_rate": 0.00011843160800498954, "loss": 0.9409, "step": 29672 }, { "epoch": 0.7619194345881458, "grad_norm": 0.796875, "learning_rate": 0.00011842722027000715, "loss": 0.9758, "step": 29673 }, { "epoch": 0.7619451117840677, "grad_norm": 0.86328125, "learning_rate": 0.00011842283249830095, "loss": 0.744, "step": 29674 }, { "epoch": 0.7619707889799895, "grad_norm": 0.8671875, "learning_rate": 0.00011841844468987977, "loss": 0.7875, "step": 29675 }, { "epoch": 0.7619964661759112, "grad_norm": 0.77734375, "learning_rate": 0.00011841405684475225, "loss": 0.8106, "step": 29676 }, { "epoch": 0.762022143371833, "grad_norm": 0.74609375, "learning_rate": 0.00011840966896292718, "loss": 0.8785, "step": 29677 }, { "epoch": 0.7620478205677549, "grad_norm": 0.765625, "learning_rate": 0.0001184052810444133, "loss": 0.9061, "step": 29678 }, { "epoch": 0.7620734977636767, "grad_norm": 0.796875, "learning_rate": 0.00011840089308921935, "loss": 0.8578, "step": 29679 }, { "epoch": 0.7620991749595986, "grad_norm": 0.7109375, "learning_rate": 0.00011839650509735412, "loss": 0.7046, "step": 29680 }, { "epoch": 0.7621248521555204, "grad_norm": 0.83984375, "learning_rate": 0.00011839211706882629, "loss": 0.7492, "step": 29681 }, { "epoch": 0.7621505293514422, "grad_norm": 0.73828125, "learning_rate": 0.00011838772900364463, "loss": 0.7824, "step": 29682 }, { "epoch": 0.762176206547364, "grad_norm": 0.9140625, "learning_rate": 0.0001183833409018179, "loss": 1.0306, "step": 29683 }, { "epoch": 0.7622018837432858, "grad_norm": 0.765625, "learning_rate": 0.00011837895276335482, "loss": 0.86, "step": 29684 }, { "epoch": 0.7622275609392076, "grad_norm": 0.84375, "learning_rate": 0.00011837456458826416, "loss": 0.8747, "step": 29685 }, { "epoch": 0.7622532381351295, "grad_norm": 0.82421875, "learning_rate": 0.00011837017637655466, "loss": 0.7489, "step": 29686 }, { "epoch": 0.7622789153310513, "grad_norm": 0.76953125, "learning_rate": 0.00011836578812823502, "loss": 0.8051, "step": 29687 }, { "epoch": 0.7623045925269731, "grad_norm": 0.80859375, "learning_rate": 0.00011836139984331404, "loss": 0.8127, "step": 29688 }, { "epoch": 0.7623302697228949, "grad_norm": 0.78125, "learning_rate": 0.0001183570115218005, "loss": 0.7403, "step": 29689 }, { "epoch": 0.7623559469188167, "grad_norm": 0.75, "learning_rate": 0.00011835262316370301, "loss": 0.7934, "step": 29690 }, { "epoch": 0.7623816241147385, "grad_norm": 0.80859375, "learning_rate": 0.00011834823476903045, "loss": 0.7784, "step": 29691 }, { "epoch": 0.7624073013106604, "grad_norm": 0.8515625, "learning_rate": 0.00011834384633779152, "loss": 0.8097, "step": 29692 }, { "epoch": 0.7624329785065822, "grad_norm": 0.75390625, "learning_rate": 0.00011833945786999492, "loss": 0.8301, "step": 29693 }, { "epoch": 0.762458655702504, "grad_norm": 0.80078125, "learning_rate": 0.00011833506936564947, "loss": 0.8233, "step": 29694 }, { "epoch": 0.7624843328984259, "grad_norm": 0.84765625, "learning_rate": 0.00011833068082476389, "loss": 0.8077, "step": 29695 }, { "epoch": 0.7625100100943476, "grad_norm": 0.88671875, "learning_rate": 0.0001183262922473469, "loss": 0.8467, "step": 29696 }, { "epoch": 0.7625356872902694, "grad_norm": 0.77734375, "learning_rate": 0.00011832190363340727, "loss": 0.7386, "step": 29697 }, { "epoch": 0.7625613644861913, "grad_norm": 0.83984375, "learning_rate": 0.00011831751498295374, "loss": 0.9225, "step": 29698 }, { "epoch": 0.7625870416821131, "grad_norm": 0.8046875, "learning_rate": 0.00011831312629599504, "loss": 0.7504, "step": 29699 }, { "epoch": 0.762612718878035, "grad_norm": 0.7890625, "learning_rate": 0.00011830873757253997, "loss": 0.7221, "step": 29700 }, { "epoch": 0.7626383960739568, "grad_norm": 0.83203125, "learning_rate": 0.0001183043488125972, "loss": 0.7909, "step": 29701 }, { "epoch": 0.7626640732698786, "grad_norm": 0.8671875, "learning_rate": 0.00011829996001617558, "loss": 0.8516, "step": 29702 }, { "epoch": 0.7626897504658003, "grad_norm": 0.7890625, "learning_rate": 0.00011829557118328374, "loss": 0.7875, "step": 29703 }, { "epoch": 0.7627154276617222, "grad_norm": 0.78125, "learning_rate": 0.00011829118231393048, "loss": 0.7869, "step": 29704 }, { "epoch": 0.762741104857644, "grad_norm": 0.75390625, "learning_rate": 0.00011828679340812456, "loss": 0.7547, "step": 29705 }, { "epoch": 0.7627667820535659, "grad_norm": 0.74609375, "learning_rate": 0.00011828240446587468, "loss": 0.8278, "step": 29706 }, { "epoch": 0.7627924592494877, "grad_norm": 0.71875, "learning_rate": 0.00011827801548718966, "loss": 0.7534, "step": 29707 }, { "epoch": 0.7628181364454095, "grad_norm": 0.765625, "learning_rate": 0.0001182736264720782, "loss": 0.7955, "step": 29708 }, { "epoch": 0.7628438136413312, "grad_norm": 0.76171875, "learning_rate": 0.00011826923742054906, "loss": 0.8003, "step": 29709 }, { "epoch": 0.7628694908372531, "grad_norm": 0.8046875, "learning_rate": 0.00011826484833261098, "loss": 0.8574, "step": 29710 }, { "epoch": 0.7628951680331749, "grad_norm": 0.7890625, "learning_rate": 0.00011826045920827267, "loss": 0.8038, "step": 29711 }, { "epoch": 0.7629208452290968, "grad_norm": 0.83203125, "learning_rate": 0.00011825607004754293, "loss": 0.775, "step": 29712 }, { "epoch": 0.7629465224250186, "grad_norm": 0.69140625, "learning_rate": 0.00011825168085043051, "loss": 0.7952, "step": 29713 }, { "epoch": 0.7629721996209404, "grad_norm": 0.76953125, "learning_rate": 0.00011824729161694413, "loss": 0.864, "step": 29714 }, { "epoch": 0.7629978768168623, "grad_norm": 0.74609375, "learning_rate": 0.00011824290234709256, "loss": 0.7627, "step": 29715 }, { "epoch": 0.763023554012784, "grad_norm": 0.765625, "learning_rate": 0.00011823851304088449, "loss": 0.8037, "step": 29716 }, { "epoch": 0.7630492312087058, "grad_norm": 0.7890625, "learning_rate": 0.00011823412369832875, "loss": 0.7993, "step": 29717 }, { "epoch": 0.7630749084046277, "grad_norm": 0.7578125, "learning_rate": 0.00011822973431943404, "loss": 0.727, "step": 29718 }, { "epoch": 0.7631005856005495, "grad_norm": 1.0078125, "learning_rate": 0.00011822534490420909, "loss": 0.7968, "step": 29719 }, { "epoch": 0.7631262627964713, "grad_norm": 0.8046875, "learning_rate": 0.00011822095545266272, "loss": 0.8399, "step": 29720 }, { "epoch": 0.7631519399923932, "grad_norm": 0.79296875, "learning_rate": 0.00011821656596480362, "loss": 0.8687, "step": 29721 }, { "epoch": 0.763177617188315, "grad_norm": 0.7578125, "learning_rate": 0.00011821217644064053, "loss": 0.8599, "step": 29722 }, { "epoch": 0.7632032943842367, "grad_norm": 0.83203125, "learning_rate": 0.00011820778688018223, "loss": 0.7894, "step": 29723 }, { "epoch": 0.7632289715801586, "grad_norm": 0.69921875, "learning_rate": 0.00011820339728343744, "loss": 0.7024, "step": 29724 }, { "epoch": 0.7632546487760804, "grad_norm": 0.80078125, "learning_rate": 0.00011819900765041494, "loss": 0.7752, "step": 29725 }, { "epoch": 0.7632803259720022, "grad_norm": 0.77734375, "learning_rate": 0.00011819461798112345, "loss": 0.7545, "step": 29726 }, { "epoch": 0.7633060031679241, "grad_norm": 0.81640625, "learning_rate": 0.00011819022827557174, "loss": 0.7375, "step": 29727 }, { "epoch": 0.7633316803638459, "grad_norm": 0.8125, "learning_rate": 0.00011818583853376857, "loss": 0.8421, "step": 29728 }, { "epoch": 0.7633573575597676, "grad_norm": 0.87109375, "learning_rate": 0.00011818144875572263, "loss": 0.8915, "step": 29729 }, { "epoch": 0.7633830347556895, "grad_norm": 0.78515625, "learning_rate": 0.00011817705894144271, "loss": 0.9012, "step": 29730 }, { "epoch": 0.7634087119516113, "grad_norm": 0.83203125, "learning_rate": 0.00011817266909093756, "loss": 0.8712, "step": 29731 }, { "epoch": 0.7634343891475331, "grad_norm": 0.78125, "learning_rate": 0.00011816827920421591, "loss": 0.8886, "step": 29732 }, { "epoch": 0.763460066343455, "grad_norm": 0.76953125, "learning_rate": 0.00011816388928128655, "loss": 0.7582, "step": 29733 }, { "epoch": 0.7634857435393768, "grad_norm": 0.8125, "learning_rate": 0.00011815949932215817, "loss": 0.8265, "step": 29734 }, { "epoch": 0.7635114207352987, "grad_norm": 0.9140625, "learning_rate": 0.00011815510932683957, "loss": 0.9084, "step": 29735 }, { "epoch": 0.7635370979312204, "grad_norm": 0.7890625, "learning_rate": 0.00011815071929533948, "loss": 0.8273, "step": 29736 }, { "epoch": 0.7635627751271422, "grad_norm": 0.77734375, "learning_rate": 0.00011814632922766664, "loss": 0.6866, "step": 29737 }, { "epoch": 0.763588452323064, "grad_norm": 0.7890625, "learning_rate": 0.00011814193912382979, "loss": 0.7548, "step": 29738 }, { "epoch": 0.7636141295189859, "grad_norm": 0.77734375, "learning_rate": 0.00011813754898383769, "loss": 0.8678, "step": 29739 }, { "epoch": 0.7636398067149077, "grad_norm": 0.89453125, "learning_rate": 0.0001181331588076991, "loss": 0.883, "step": 29740 }, { "epoch": 0.7636654839108296, "grad_norm": 0.82421875, "learning_rate": 0.00011812876859542278, "loss": 0.8852, "step": 29741 }, { "epoch": 0.7636911611067513, "grad_norm": 0.7265625, "learning_rate": 0.00011812437834701746, "loss": 0.8013, "step": 29742 }, { "epoch": 0.7637168383026731, "grad_norm": 0.75390625, "learning_rate": 0.00011811998806249188, "loss": 0.7816, "step": 29743 }, { "epoch": 0.763742515498595, "grad_norm": 0.765625, "learning_rate": 0.00011811559774185478, "loss": 0.7703, "step": 29744 }, { "epoch": 0.7637681926945168, "grad_norm": 0.69921875, "learning_rate": 0.00011811120738511496, "loss": 0.7893, "step": 29745 }, { "epoch": 0.7637938698904386, "grad_norm": 1.59375, "learning_rate": 0.00011810681699228112, "loss": 0.7114, "step": 29746 }, { "epoch": 0.7638195470863605, "grad_norm": 0.82421875, "learning_rate": 0.00011810242656336204, "loss": 0.897, "step": 29747 }, { "epoch": 0.7638452242822823, "grad_norm": 0.76171875, "learning_rate": 0.00011809803609836648, "loss": 0.8497, "step": 29748 }, { "epoch": 0.763870901478204, "grad_norm": 0.83984375, "learning_rate": 0.00011809364559730314, "loss": 0.9254, "step": 29749 }, { "epoch": 0.7638965786741259, "grad_norm": 0.83984375, "learning_rate": 0.00011808925506018082, "loss": 0.9753, "step": 29750 }, { "epoch": 0.7639222558700477, "grad_norm": 0.765625, "learning_rate": 0.00011808486448700821, "loss": 0.7072, "step": 29751 }, { "epoch": 0.7639479330659695, "grad_norm": 0.765625, "learning_rate": 0.00011808047387779412, "loss": 0.8117, "step": 29752 }, { "epoch": 0.7639736102618914, "grad_norm": 0.78125, "learning_rate": 0.0001180760832325473, "loss": 0.7815, "step": 29753 }, { "epoch": 0.7639992874578132, "grad_norm": 0.76953125, "learning_rate": 0.00011807169255127646, "loss": 0.8677, "step": 29754 }, { "epoch": 0.764024964653735, "grad_norm": 0.796875, "learning_rate": 0.0001180673018339904, "loss": 0.8093, "step": 29755 }, { "epoch": 0.7640506418496568, "grad_norm": 0.83203125, "learning_rate": 0.00011806291108069778, "loss": 0.8882, "step": 29756 }, { "epoch": 0.7640763190455786, "grad_norm": 0.7421875, "learning_rate": 0.00011805852029140745, "loss": 0.7103, "step": 29757 }, { "epoch": 0.7641019962415004, "grad_norm": 0.796875, "learning_rate": 0.0001180541294661281, "loss": 0.8076, "step": 29758 }, { "epoch": 0.7641276734374223, "grad_norm": 0.8515625, "learning_rate": 0.0001180497386048685, "loss": 0.9949, "step": 29759 }, { "epoch": 0.7641533506333441, "grad_norm": 0.77734375, "learning_rate": 0.00011804534770763742, "loss": 0.903, "step": 29760 }, { "epoch": 0.7641790278292659, "grad_norm": 0.796875, "learning_rate": 0.0001180409567744436, "loss": 0.9097, "step": 29761 }, { "epoch": 0.7642047050251877, "grad_norm": 0.74609375, "learning_rate": 0.00011803656580529576, "loss": 0.8786, "step": 29762 }, { "epoch": 0.7642303822211095, "grad_norm": 0.75390625, "learning_rate": 0.0001180321748002027, "loss": 0.868, "step": 29763 }, { "epoch": 0.7642560594170313, "grad_norm": 0.71875, "learning_rate": 0.0001180277837591731, "loss": 0.8313, "step": 29764 }, { "epoch": 0.7642817366129532, "grad_norm": 0.79296875, "learning_rate": 0.00011802339268221578, "loss": 0.8081, "step": 29765 }, { "epoch": 0.764307413808875, "grad_norm": 0.7421875, "learning_rate": 0.00011801900156933948, "loss": 0.8713, "step": 29766 }, { "epoch": 0.7643330910047968, "grad_norm": 0.796875, "learning_rate": 0.00011801461042055293, "loss": 0.8308, "step": 29767 }, { "epoch": 0.7643587682007187, "grad_norm": 0.8125, "learning_rate": 0.00011801021923586489, "loss": 0.7833, "step": 29768 }, { "epoch": 0.7643844453966404, "grad_norm": 0.81640625, "learning_rate": 0.00011800582801528408, "loss": 0.946, "step": 29769 }, { "epoch": 0.7644101225925622, "grad_norm": 0.84765625, "learning_rate": 0.00011800143675881932, "loss": 0.804, "step": 29770 }, { "epoch": 0.7644357997884841, "grad_norm": 0.7421875, "learning_rate": 0.00011799704546647931, "loss": 0.8336, "step": 29771 }, { "epoch": 0.7644614769844059, "grad_norm": 0.875, "learning_rate": 0.00011799265413827281, "loss": 0.8776, "step": 29772 }, { "epoch": 0.7644871541803278, "grad_norm": 0.8203125, "learning_rate": 0.00011798826277420856, "loss": 0.904, "step": 29773 }, { "epoch": 0.7645128313762496, "grad_norm": 0.7734375, "learning_rate": 0.00011798387137429539, "loss": 0.7394, "step": 29774 }, { "epoch": 0.7645385085721714, "grad_norm": 0.85546875, "learning_rate": 0.00011797947993854193, "loss": 0.9334, "step": 29775 }, { "epoch": 0.7645641857680932, "grad_norm": 0.796875, "learning_rate": 0.000117975088466957, "loss": 0.901, "step": 29776 }, { "epoch": 0.764589862964015, "grad_norm": 0.828125, "learning_rate": 0.00011797069695954934, "loss": 0.8454, "step": 29777 }, { "epoch": 0.7646155401599368, "grad_norm": 0.77734375, "learning_rate": 0.00011796630541632769, "loss": 0.8556, "step": 29778 }, { "epoch": 0.7646412173558587, "grad_norm": 0.8515625, "learning_rate": 0.00011796191383730084, "loss": 0.7542, "step": 29779 }, { "epoch": 0.7646668945517805, "grad_norm": 0.6953125, "learning_rate": 0.00011795752222247751, "loss": 0.7629, "step": 29780 }, { "epoch": 0.7646925717477023, "grad_norm": 0.73828125, "learning_rate": 0.0001179531305718665, "loss": 0.7739, "step": 29781 }, { "epoch": 0.7647182489436241, "grad_norm": 0.71875, "learning_rate": 0.00011794873888547649, "loss": 0.692, "step": 29782 }, { "epoch": 0.7647439261395459, "grad_norm": 0.79296875, "learning_rate": 0.00011794434716331624, "loss": 0.8061, "step": 29783 }, { "epoch": 0.7647696033354677, "grad_norm": 1.1328125, "learning_rate": 0.00011793995540539455, "loss": 0.8068, "step": 29784 }, { "epoch": 0.7647952805313896, "grad_norm": 0.89453125, "learning_rate": 0.00011793556361172016, "loss": 0.8687, "step": 29785 }, { "epoch": 0.7648209577273114, "grad_norm": 0.83984375, "learning_rate": 0.00011793117178230177, "loss": 0.8094, "step": 29786 }, { "epoch": 0.7648466349232332, "grad_norm": 0.70703125, "learning_rate": 0.00011792677991714825, "loss": 0.8433, "step": 29787 }, { "epoch": 0.7648723121191551, "grad_norm": 0.76953125, "learning_rate": 0.0001179223880162682, "loss": 0.8792, "step": 29788 }, { "epoch": 0.7648979893150768, "grad_norm": 0.81640625, "learning_rate": 0.00011791799607967049, "loss": 0.7337, "step": 29789 }, { "epoch": 0.7649236665109986, "grad_norm": 0.8203125, "learning_rate": 0.00011791360410736383, "loss": 0.8427, "step": 29790 }, { "epoch": 0.7649493437069205, "grad_norm": 0.79296875, "learning_rate": 0.00011790921209935697, "loss": 0.8732, "step": 29791 }, { "epoch": 0.7649750209028423, "grad_norm": 0.84375, "learning_rate": 0.00011790482005565866, "loss": 0.9006, "step": 29792 }, { "epoch": 0.7650006980987641, "grad_norm": 0.71484375, "learning_rate": 0.00011790042797627767, "loss": 0.7013, "step": 29793 }, { "epoch": 0.765026375294686, "grad_norm": 0.88671875, "learning_rate": 0.00011789603586122275, "loss": 0.8537, "step": 29794 }, { "epoch": 0.7650520524906078, "grad_norm": 0.83984375, "learning_rate": 0.00011789164371050265, "loss": 0.9967, "step": 29795 }, { "epoch": 0.7650777296865295, "grad_norm": 0.8359375, "learning_rate": 0.0001178872515241261, "loss": 0.7766, "step": 29796 }, { "epoch": 0.7651034068824514, "grad_norm": 0.91796875, "learning_rate": 0.00011788285930210188, "loss": 0.8354, "step": 29797 }, { "epoch": 0.7651290840783732, "grad_norm": 1.1484375, "learning_rate": 0.00011787846704443874, "loss": 0.7935, "step": 29798 }, { "epoch": 0.765154761274295, "grad_norm": 0.875, "learning_rate": 0.00011787407475114541, "loss": 0.8664, "step": 29799 }, { "epoch": 0.7651804384702169, "grad_norm": 0.81640625, "learning_rate": 0.00011786968242223071, "loss": 1.0147, "step": 29800 }, { "epoch": 0.7652061156661387, "grad_norm": 0.734375, "learning_rate": 0.0001178652900577033, "loss": 0.8274, "step": 29801 }, { "epoch": 0.7652317928620604, "grad_norm": 0.77734375, "learning_rate": 0.00011786089765757203, "loss": 0.7683, "step": 29802 }, { "epoch": 0.7652574700579823, "grad_norm": 0.77734375, "learning_rate": 0.00011785650522184556, "loss": 0.7457, "step": 29803 }, { "epoch": 0.7652831472539041, "grad_norm": 0.83984375, "learning_rate": 0.0001178521127505327, "loss": 0.8832, "step": 29804 }, { "epoch": 0.765308824449826, "grad_norm": 0.828125, "learning_rate": 0.00011784772024364219, "loss": 1.0101, "step": 29805 }, { "epoch": 0.7653345016457478, "grad_norm": 0.8046875, "learning_rate": 0.0001178433277011828, "loss": 0.7465, "step": 29806 }, { "epoch": 0.7653601788416696, "grad_norm": 0.7890625, "learning_rate": 0.00011783893512316326, "loss": 0.8728, "step": 29807 }, { "epoch": 0.7653858560375915, "grad_norm": 0.76171875, "learning_rate": 0.00011783454250959234, "loss": 0.8414, "step": 29808 }, { "epoch": 0.7654115332335132, "grad_norm": 0.71484375, "learning_rate": 0.00011783014986047875, "loss": 0.7319, "step": 29809 }, { "epoch": 0.765437210429435, "grad_norm": 0.7890625, "learning_rate": 0.00011782575717583133, "loss": 0.8425, "step": 29810 }, { "epoch": 0.7654628876253569, "grad_norm": 0.765625, "learning_rate": 0.00011782136445565879, "loss": 0.8021, "step": 29811 }, { "epoch": 0.7654885648212787, "grad_norm": 0.8515625, "learning_rate": 0.00011781697169996983, "loss": 0.761, "step": 29812 }, { "epoch": 0.7655142420172005, "grad_norm": 0.83203125, "learning_rate": 0.00011781257890877331, "loss": 0.9952, "step": 29813 }, { "epoch": 0.7655399192131224, "grad_norm": 0.765625, "learning_rate": 0.00011780818608207791, "loss": 0.7227, "step": 29814 }, { "epoch": 0.7655655964090442, "grad_norm": 0.83984375, "learning_rate": 0.0001178037932198924, "loss": 0.7478, "step": 29815 }, { "epoch": 0.7655912736049659, "grad_norm": 0.73828125, "learning_rate": 0.00011779940032222554, "loss": 0.7793, "step": 29816 }, { "epoch": 0.7656169508008878, "grad_norm": 0.76953125, "learning_rate": 0.00011779500738908607, "loss": 0.7468, "step": 29817 }, { "epoch": 0.7656426279968096, "grad_norm": 0.7265625, "learning_rate": 0.00011779061442048276, "loss": 0.764, "step": 29818 }, { "epoch": 0.7656683051927314, "grad_norm": 0.87890625, "learning_rate": 0.00011778622141642437, "loss": 0.8214, "step": 29819 }, { "epoch": 0.7656939823886533, "grad_norm": 0.7578125, "learning_rate": 0.00011778182837691965, "loss": 0.8275, "step": 29820 }, { "epoch": 0.7657196595845751, "grad_norm": 0.7265625, "learning_rate": 0.00011777743530197736, "loss": 0.8865, "step": 29821 }, { "epoch": 0.7657453367804968, "grad_norm": 0.77734375, "learning_rate": 0.00011777304219160623, "loss": 0.7702, "step": 29822 }, { "epoch": 0.7657710139764187, "grad_norm": 0.88671875, "learning_rate": 0.00011776864904581503, "loss": 0.8616, "step": 29823 }, { "epoch": 0.7657966911723405, "grad_norm": 0.73828125, "learning_rate": 0.00011776425586461253, "loss": 0.7016, "step": 29824 }, { "epoch": 0.7658223683682623, "grad_norm": 0.8203125, "learning_rate": 0.00011775986264800747, "loss": 0.9037, "step": 29825 }, { "epoch": 0.7658480455641842, "grad_norm": 0.7734375, "learning_rate": 0.0001177554693960086, "loss": 0.8383, "step": 29826 }, { "epoch": 0.765873722760106, "grad_norm": 0.7578125, "learning_rate": 0.00011775107610862472, "loss": 0.8033, "step": 29827 }, { "epoch": 0.7658993999560278, "grad_norm": 0.765625, "learning_rate": 0.00011774668278586449, "loss": 0.8058, "step": 29828 }, { "epoch": 0.7659250771519496, "grad_norm": 0.8125, "learning_rate": 0.00011774228942773678, "loss": 0.8354, "step": 29829 }, { "epoch": 0.7659507543478714, "grad_norm": 0.74609375, "learning_rate": 0.00011773789603425024, "loss": 0.8557, "step": 29830 }, { "epoch": 0.7659764315437932, "grad_norm": 0.74609375, "learning_rate": 0.00011773350260541369, "loss": 0.8867, "step": 29831 }, { "epoch": 0.7660021087397151, "grad_norm": 0.703125, "learning_rate": 0.0001177291091412359, "loss": 0.7868, "step": 29832 }, { "epoch": 0.7660277859356369, "grad_norm": 0.7734375, "learning_rate": 0.00011772471564172558, "loss": 0.9708, "step": 29833 }, { "epoch": 0.7660534631315588, "grad_norm": 0.79296875, "learning_rate": 0.0001177203221068915, "loss": 0.8303, "step": 29834 }, { "epoch": 0.7660791403274806, "grad_norm": 0.796875, "learning_rate": 0.00011771592853674243, "loss": 0.8612, "step": 29835 }, { "epoch": 0.7661048175234023, "grad_norm": 0.76171875, "learning_rate": 0.00011771153493128708, "loss": 0.7896, "step": 29836 }, { "epoch": 0.7661304947193242, "grad_norm": 0.96484375, "learning_rate": 0.00011770714129053426, "loss": 0.8219, "step": 29837 }, { "epoch": 0.766156171915246, "grad_norm": 0.7109375, "learning_rate": 0.00011770274761449274, "loss": 0.846, "step": 29838 }, { "epoch": 0.7661818491111678, "grad_norm": 0.671875, "learning_rate": 0.00011769835390317118, "loss": 0.8026, "step": 29839 }, { "epoch": 0.7662075263070897, "grad_norm": 0.73828125, "learning_rate": 0.00011769396015657846, "loss": 0.7751, "step": 29840 }, { "epoch": 0.7662332035030115, "grad_norm": 0.78125, "learning_rate": 0.00011768956637472323, "loss": 0.9117, "step": 29841 }, { "epoch": 0.7662588806989332, "grad_norm": 0.75, "learning_rate": 0.00011768517255761434, "loss": 0.7826, "step": 29842 }, { "epoch": 0.7662845578948551, "grad_norm": 0.8203125, "learning_rate": 0.00011768077870526047, "loss": 0.8264, "step": 29843 }, { "epoch": 0.7663102350907769, "grad_norm": 0.7265625, "learning_rate": 0.00011767638481767038, "loss": 0.8288, "step": 29844 }, { "epoch": 0.7663359122866987, "grad_norm": 0.796875, "learning_rate": 0.00011767199089485288, "loss": 0.9057, "step": 29845 }, { "epoch": 0.7663615894826206, "grad_norm": 0.8125, "learning_rate": 0.0001176675969368167, "loss": 0.7868, "step": 29846 }, { "epoch": 0.7663872666785424, "grad_norm": 0.8671875, "learning_rate": 0.00011766320294357059, "loss": 0.7317, "step": 29847 }, { "epoch": 0.7664129438744642, "grad_norm": 0.78125, "learning_rate": 0.00011765880891512331, "loss": 0.8497, "step": 29848 }, { "epoch": 0.766438621070386, "grad_norm": 0.73046875, "learning_rate": 0.0001176544148514836, "loss": 0.7359, "step": 29849 }, { "epoch": 0.7664642982663078, "grad_norm": 0.80078125, "learning_rate": 0.00011765002075266028, "loss": 1.012, "step": 29850 }, { "epoch": 0.7664899754622296, "grad_norm": 0.78515625, "learning_rate": 0.00011764562661866203, "loss": 0.8249, "step": 29851 }, { "epoch": 0.7665156526581515, "grad_norm": 0.81640625, "learning_rate": 0.00011764123244949765, "loss": 0.8494, "step": 29852 }, { "epoch": 0.7665413298540733, "grad_norm": 0.74609375, "learning_rate": 0.0001176368382451759, "loss": 0.8111, "step": 29853 }, { "epoch": 0.7665670070499951, "grad_norm": 0.78515625, "learning_rate": 0.00011763244400570552, "loss": 0.7741, "step": 29854 }, { "epoch": 0.766592684245917, "grad_norm": 0.81640625, "learning_rate": 0.00011762804973109525, "loss": 0.8204, "step": 29855 }, { "epoch": 0.7666183614418387, "grad_norm": 0.796875, "learning_rate": 0.00011762365542135389, "loss": 0.9695, "step": 29856 }, { "epoch": 0.7666440386377605, "grad_norm": 0.8125, "learning_rate": 0.00011761926107649015, "loss": 0.7786, "step": 29857 }, { "epoch": 0.7666697158336824, "grad_norm": 0.87109375, "learning_rate": 0.00011761486669651285, "loss": 0.7588, "step": 29858 }, { "epoch": 0.7666953930296042, "grad_norm": 0.77734375, "learning_rate": 0.00011761047228143071, "loss": 0.7975, "step": 29859 }, { "epoch": 0.766721070225526, "grad_norm": 0.76171875, "learning_rate": 0.00011760607783125245, "loss": 0.7813, "step": 29860 }, { "epoch": 0.7667467474214479, "grad_norm": 0.76171875, "learning_rate": 0.00011760168334598691, "loss": 0.6868, "step": 29861 }, { "epoch": 0.7667724246173696, "grad_norm": 0.84765625, "learning_rate": 0.00011759728882564278, "loss": 0.8009, "step": 29862 }, { "epoch": 0.7667981018132914, "grad_norm": 0.828125, "learning_rate": 0.00011759289427022884, "loss": 0.6955, "step": 29863 }, { "epoch": 0.7668237790092133, "grad_norm": 0.828125, "learning_rate": 0.00011758849967975387, "loss": 0.8985, "step": 29864 }, { "epoch": 0.7668494562051351, "grad_norm": 0.8671875, "learning_rate": 0.00011758410505422661, "loss": 0.8673, "step": 29865 }, { "epoch": 0.766875133401057, "grad_norm": 0.796875, "learning_rate": 0.0001175797103936558, "loss": 0.7348, "step": 29866 }, { "epoch": 0.7669008105969788, "grad_norm": 0.76171875, "learning_rate": 0.00011757531569805022, "loss": 0.8595, "step": 29867 }, { "epoch": 0.7669264877929006, "grad_norm": 0.7734375, "learning_rate": 0.00011757092096741862, "loss": 0.7547, "step": 29868 }, { "epoch": 0.7669521649888223, "grad_norm": 0.8359375, "learning_rate": 0.00011756652620176977, "loss": 0.7622, "step": 29869 }, { "epoch": 0.7669778421847442, "grad_norm": 0.86328125, "learning_rate": 0.00011756213140111243, "loss": 0.8418, "step": 29870 }, { "epoch": 0.767003519380666, "grad_norm": 0.8125, "learning_rate": 0.00011755773656545533, "loss": 0.8047, "step": 29871 }, { "epoch": 0.7670291965765879, "grad_norm": 0.828125, "learning_rate": 0.00011755334169480725, "loss": 0.85, "step": 29872 }, { "epoch": 0.7670548737725097, "grad_norm": 0.7890625, "learning_rate": 0.00011754894678917696, "loss": 0.776, "step": 29873 }, { "epoch": 0.7670805509684315, "grad_norm": 0.83203125, "learning_rate": 0.00011754455184857318, "loss": 0.7944, "step": 29874 }, { "epoch": 0.7671062281643534, "grad_norm": 0.765625, "learning_rate": 0.00011754015687300473, "loss": 0.849, "step": 29875 }, { "epoch": 0.7671319053602751, "grad_norm": 0.83984375, "learning_rate": 0.00011753576186248028, "loss": 0.8878, "step": 29876 }, { "epoch": 0.7671575825561969, "grad_norm": 0.81640625, "learning_rate": 0.00011753136681700868, "loss": 0.875, "step": 29877 }, { "epoch": 0.7671832597521188, "grad_norm": 0.72265625, "learning_rate": 0.00011752697173659863, "loss": 0.7373, "step": 29878 }, { "epoch": 0.7672089369480406, "grad_norm": 0.86328125, "learning_rate": 0.00011752257662125892, "loss": 0.8621, "step": 29879 }, { "epoch": 0.7672346141439624, "grad_norm": 0.84375, "learning_rate": 0.00011751818147099831, "loss": 0.7845, "step": 29880 }, { "epoch": 0.7672602913398843, "grad_norm": 0.8671875, "learning_rate": 0.0001175137862858255, "loss": 0.896, "step": 29881 }, { "epoch": 0.767285968535806, "grad_norm": 0.73046875, "learning_rate": 0.00011750939106574934, "loss": 0.8485, "step": 29882 }, { "epoch": 0.7673116457317278, "grad_norm": 0.8046875, "learning_rate": 0.00011750499581077854, "loss": 0.7653, "step": 29883 }, { "epoch": 0.7673373229276497, "grad_norm": 1.171875, "learning_rate": 0.00011750060052092185, "loss": 0.7525, "step": 29884 }, { "epoch": 0.7673630001235715, "grad_norm": 0.87109375, "learning_rate": 0.00011749620519618805, "loss": 0.7859, "step": 29885 }, { "epoch": 0.7673886773194933, "grad_norm": 0.75, "learning_rate": 0.0001174918098365859, "loss": 0.9643, "step": 29886 }, { "epoch": 0.7674143545154152, "grad_norm": 0.76171875, "learning_rate": 0.00011748741444212414, "loss": 0.8422, "step": 29887 }, { "epoch": 0.767440031711337, "grad_norm": 0.765625, "learning_rate": 0.00011748301901281156, "loss": 0.8923, "step": 29888 }, { "epoch": 0.7674657089072587, "grad_norm": 0.6953125, "learning_rate": 0.00011747862354865686, "loss": 0.7742, "step": 29889 }, { "epoch": 0.7674913861031806, "grad_norm": 0.71875, "learning_rate": 0.0001174742280496689, "loss": 0.8824, "step": 29890 }, { "epoch": 0.7675170632991024, "grad_norm": 0.87109375, "learning_rate": 0.00011746983251585635, "loss": 0.8841, "step": 29891 }, { "epoch": 0.7675427404950242, "grad_norm": 0.7578125, "learning_rate": 0.00011746543694722802, "loss": 0.903, "step": 29892 }, { "epoch": 0.7675684176909461, "grad_norm": 0.78515625, "learning_rate": 0.00011746104134379263, "loss": 0.9706, "step": 29893 }, { "epoch": 0.7675940948868679, "grad_norm": 0.7421875, "learning_rate": 0.00011745664570555896, "loss": 0.7651, "step": 29894 }, { "epoch": 0.7676197720827898, "grad_norm": 0.984375, "learning_rate": 0.0001174522500325358, "loss": 0.9971, "step": 29895 }, { "epoch": 0.7676454492787115, "grad_norm": 0.84765625, "learning_rate": 0.00011744785432473186, "loss": 0.9636, "step": 29896 }, { "epoch": 0.7676711264746333, "grad_norm": 0.8359375, "learning_rate": 0.00011744345858215591, "loss": 0.8182, "step": 29897 }, { "epoch": 0.7676968036705552, "grad_norm": 0.7734375, "learning_rate": 0.00011743906280481675, "loss": 0.8278, "step": 29898 }, { "epoch": 0.767722480866477, "grad_norm": 0.79296875, "learning_rate": 0.00011743466699272314, "loss": 0.9247, "step": 29899 }, { "epoch": 0.7677481580623988, "grad_norm": 0.875, "learning_rate": 0.00011743027114588375, "loss": 0.9051, "step": 29900 }, { "epoch": 0.7677738352583207, "grad_norm": 0.79296875, "learning_rate": 0.00011742587526430744, "loss": 0.7857, "step": 29901 }, { "epoch": 0.7677995124542424, "grad_norm": 0.76953125, "learning_rate": 0.00011742147934800294, "loss": 0.8059, "step": 29902 }, { "epoch": 0.7678251896501642, "grad_norm": 0.81640625, "learning_rate": 0.00011741708339697897, "loss": 0.7969, "step": 29903 }, { "epoch": 0.767850866846086, "grad_norm": 0.76171875, "learning_rate": 0.00011741268741124433, "loss": 0.7716, "step": 29904 }, { "epoch": 0.7678765440420079, "grad_norm": 0.82421875, "learning_rate": 0.0001174082913908078, "loss": 0.8525, "step": 29905 }, { "epoch": 0.7679022212379297, "grad_norm": 0.78515625, "learning_rate": 0.00011740389533567813, "loss": 0.8181, "step": 29906 }, { "epoch": 0.7679278984338516, "grad_norm": 0.83203125, "learning_rate": 0.00011739949924586407, "loss": 0.7995, "step": 29907 }, { "epoch": 0.7679535756297734, "grad_norm": 0.86328125, "learning_rate": 0.00011739510312137434, "loss": 0.9623, "step": 29908 }, { "epoch": 0.7679792528256951, "grad_norm": 0.734375, "learning_rate": 0.00011739070696221777, "loss": 0.7263, "step": 29909 }, { "epoch": 0.768004930021617, "grad_norm": 0.7421875, "learning_rate": 0.00011738631076840307, "loss": 0.7362, "step": 29910 }, { "epoch": 0.7680306072175388, "grad_norm": 0.796875, "learning_rate": 0.00011738191453993902, "loss": 0.9888, "step": 29911 }, { "epoch": 0.7680562844134606, "grad_norm": 0.85546875, "learning_rate": 0.00011737751827683442, "loss": 0.9098, "step": 29912 }, { "epoch": 0.7680819616093825, "grad_norm": 0.80078125, "learning_rate": 0.00011737312197909798, "loss": 0.9247, "step": 29913 }, { "epoch": 0.7681076388053043, "grad_norm": 0.8046875, "learning_rate": 0.00011736872564673846, "loss": 0.7093, "step": 29914 }, { "epoch": 0.7681333160012261, "grad_norm": 0.78125, "learning_rate": 0.00011736432927976467, "loss": 0.7888, "step": 29915 }, { "epoch": 0.7681589931971479, "grad_norm": 0.7734375, "learning_rate": 0.0001173599328781853, "loss": 0.8651, "step": 29916 }, { "epoch": 0.7681846703930697, "grad_norm": 0.8046875, "learning_rate": 0.00011735553644200918, "loss": 0.9165, "step": 29917 }, { "epoch": 0.7682103475889915, "grad_norm": 0.765625, "learning_rate": 0.00011735113997124505, "loss": 0.6885, "step": 29918 }, { "epoch": 0.7682360247849134, "grad_norm": 0.69921875, "learning_rate": 0.00011734674346590165, "loss": 0.8629, "step": 29919 }, { "epoch": 0.7682617019808352, "grad_norm": 0.8203125, "learning_rate": 0.00011734234692598778, "loss": 0.9594, "step": 29920 }, { "epoch": 0.768287379176757, "grad_norm": 0.74609375, "learning_rate": 0.00011733795035151214, "loss": 0.7808, "step": 29921 }, { "epoch": 0.7683130563726788, "grad_norm": 0.7734375, "learning_rate": 0.00011733355374248353, "loss": 0.908, "step": 29922 }, { "epoch": 0.7683387335686006, "grad_norm": 0.80078125, "learning_rate": 0.00011732915709891074, "loss": 0.8409, "step": 29923 }, { "epoch": 0.7683644107645224, "grad_norm": 0.68359375, "learning_rate": 0.00011732476042080249, "loss": 0.7307, "step": 29924 }, { "epoch": 0.7683900879604443, "grad_norm": 0.90625, "learning_rate": 0.0001173203637081676, "loss": 0.7148, "step": 29925 }, { "epoch": 0.7684157651563661, "grad_norm": 1.0078125, "learning_rate": 0.00011731596696101474, "loss": 0.8598, "step": 29926 }, { "epoch": 0.768441442352288, "grad_norm": 0.7421875, "learning_rate": 0.00011731157017935275, "loss": 0.7623, "step": 29927 }, { "epoch": 0.7684671195482098, "grad_norm": 0.82421875, "learning_rate": 0.00011730717336319035, "loss": 0.8853, "step": 29928 }, { "epoch": 0.7684927967441315, "grad_norm": 0.81640625, "learning_rate": 0.00011730277651253629, "loss": 0.7509, "step": 29929 }, { "epoch": 0.7685184739400533, "grad_norm": 0.7890625, "learning_rate": 0.0001172983796273994, "loss": 0.7535, "step": 29930 }, { "epoch": 0.7685441511359752, "grad_norm": 0.80859375, "learning_rate": 0.0001172939827077884, "loss": 0.8938, "step": 29931 }, { "epoch": 0.768569828331897, "grad_norm": 0.7578125, "learning_rate": 0.00011728958575371205, "loss": 0.6213, "step": 29932 }, { "epoch": 0.7685955055278189, "grad_norm": 0.765625, "learning_rate": 0.00011728518876517913, "loss": 0.8239, "step": 29933 }, { "epoch": 0.7686211827237407, "grad_norm": 0.78515625, "learning_rate": 0.00011728079174219835, "loss": 0.8265, "step": 29934 }, { "epoch": 0.7686468599196625, "grad_norm": 0.75390625, "learning_rate": 0.00011727639468477853, "loss": 0.7555, "step": 29935 }, { "epoch": 0.7686725371155843, "grad_norm": 0.72265625, "learning_rate": 0.00011727199759292844, "loss": 0.7815, "step": 29936 }, { "epoch": 0.7686982143115061, "grad_norm": 0.79296875, "learning_rate": 0.00011726760046665677, "loss": 0.8343, "step": 29937 }, { "epoch": 0.7687238915074279, "grad_norm": 0.82421875, "learning_rate": 0.0001172632033059724, "loss": 0.8178, "step": 29938 }, { "epoch": 0.7687495687033498, "grad_norm": 0.75390625, "learning_rate": 0.000117258806110884, "loss": 0.8368, "step": 29939 }, { "epoch": 0.7687752458992716, "grad_norm": 0.86328125, "learning_rate": 0.00011725440888140031, "loss": 0.8907, "step": 29940 }, { "epoch": 0.7688009230951934, "grad_norm": 0.71484375, "learning_rate": 0.0001172500116175302, "loss": 0.7789, "step": 29941 }, { "epoch": 0.7688266002911152, "grad_norm": 0.97265625, "learning_rate": 0.00011724561431928234, "loss": 0.7526, "step": 29942 }, { "epoch": 0.768852277487037, "grad_norm": 0.76171875, "learning_rate": 0.00011724121698666554, "loss": 0.8159, "step": 29943 }, { "epoch": 0.7688779546829588, "grad_norm": 0.80859375, "learning_rate": 0.00011723681961968855, "loss": 0.8207, "step": 29944 }, { "epoch": 0.7689036318788807, "grad_norm": 0.8046875, "learning_rate": 0.00011723242221836015, "loss": 0.8013, "step": 29945 }, { "epoch": 0.7689293090748025, "grad_norm": 0.7734375, "learning_rate": 0.00011722802478268908, "loss": 0.7546, "step": 29946 }, { "epoch": 0.7689549862707243, "grad_norm": 0.77734375, "learning_rate": 0.00011722362731268412, "loss": 0.8275, "step": 29947 }, { "epoch": 0.7689806634666462, "grad_norm": 0.76953125, "learning_rate": 0.000117219229808354, "loss": 0.7515, "step": 29948 }, { "epoch": 0.7690063406625679, "grad_norm": 0.87890625, "learning_rate": 0.00011721483226970753, "loss": 0.7878, "step": 29949 }, { "epoch": 0.7690320178584897, "grad_norm": 0.80078125, "learning_rate": 0.00011721043469675346, "loss": 0.7058, "step": 29950 }, { "epoch": 0.7690576950544116, "grad_norm": 0.83203125, "learning_rate": 0.00011720603708950053, "loss": 0.9879, "step": 29951 }, { "epoch": 0.7690833722503334, "grad_norm": 0.8203125, "learning_rate": 0.00011720163944795755, "loss": 0.895, "step": 29952 }, { "epoch": 0.7691090494462552, "grad_norm": 0.86328125, "learning_rate": 0.00011719724177213322, "loss": 0.8563, "step": 29953 }, { "epoch": 0.7691347266421771, "grad_norm": 0.7890625, "learning_rate": 0.00011719284406203637, "loss": 0.7738, "step": 29954 }, { "epoch": 0.7691604038380988, "grad_norm": 0.86328125, "learning_rate": 0.00011718844631767573, "loss": 0.8405, "step": 29955 }, { "epoch": 0.7691860810340206, "grad_norm": 0.8046875, "learning_rate": 0.00011718404853906004, "loss": 0.9064, "step": 29956 }, { "epoch": 0.7692117582299425, "grad_norm": 0.7578125, "learning_rate": 0.00011717965072619812, "loss": 0.8494, "step": 29957 }, { "epoch": 0.7692374354258643, "grad_norm": 0.74609375, "learning_rate": 0.00011717525287909871, "loss": 0.8395, "step": 29958 }, { "epoch": 0.7692631126217861, "grad_norm": 0.80078125, "learning_rate": 0.00011717085499777059, "loss": 0.8778, "step": 29959 }, { "epoch": 0.769288789817708, "grad_norm": 0.78515625, "learning_rate": 0.00011716645708222249, "loss": 0.8094, "step": 29960 }, { "epoch": 0.7693144670136298, "grad_norm": 0.765625, "learning_rate": 0.00011716205913246317, "loss": 0.7337, "step": 29961 }, { "epoch": 0.7693401442095515, "grad_norm": 0.78515625, "learning_rate": 0.00011715766114850141, "loss": 0.8226, "step": 29962 }, { "epoch": 0.7693658214054734, "grad_norm": 0.85546875, "learning_rate": 0.00011715326313034602, "loss": 0.7435, "step": 29963 }, { "epoch": 0.7693914986013952, "grad_norm": 0.7734375, "learning_rate": 0.00011714886507800571, "loss": 0.7505, "step": 29964 }, { "epoch": 0.769417175797317, "grad_norm": 0.875, "learning_rate": 0.00011714446699148926, "loss": 0.7975, "step": 29965 }, { "epoch": 0.7694428529932389, "grad_norm": 0.828125, "learning_rate": 0.00011714006887080544, "loss": 0.8411, "step": 29966 }, { "epoch": 0.7694685301891607, "grad_norm": 0.81640625, "learning_rate": 0.000117135670715963, "loss": 0.8858, "step": 29967 }, { "epoch": 0.7694942073850826, "grad_norm": 0.77734375, "learning_rate": 0.00011713127252697073, "loss": 1.0038, "step": 29968 }, { "epoch": 0.7695198845810043, "grad_norm": 0.8203125, "learning_rate": 0.00011712687430383738, "loss": 0.7833, "step": 29969 }, { "epoch": 0.7695455617769261, "grad_norm": 0.76953125, "learning_rate": 0.00011712247604657169, "loss": 0.8204, "step": 29970 }, { "epoch": 0.769571238972848, "grad_norm": 0.80078125, "learning_rate": 0.00011711807775518249, "loss": 0.7453, "step": 29971 }, { "epoch": 0.7695969161687698, "grad_norm": 0.6875, "learning_rate": 0.00011711367942967851, "loss": 0.7202, "step": 29972 }, { "epoch": 0.7696225933646916, "grad_norm": 0.85546875, "learning_rate": 0.00011710928107006848, "loss": 0.8466, "step": 29973 }, { "epoch": 0.7696482705606135, "grad_norm": 0.8046875, "learning_rate": 0.0001171048826763612, "loss": 0.7398, "step": 29974 }, { "epoch": 0.7696739477565352, "grad_norm": 0.77734375, "learning_rate": 0.00011710048424856545, "loss": 0.791, "step": 29975 }, { "epoch": 0.769699624952457, "grad_norm": 0.81640625, "learning_rate": 0.00011709608578668999, "loss": 0.8683, "step": 29976 }, { "epoch": 0.7697253021483789, "grad_norm": 0.84375, "learning_rate": 0.00011709168729074353, "loss": 0.899, "step": 29977 }, { "epoch": 0.7697509793443007, "grad_norm": 0.71875, "learning_rate": 0.00011708728876073494, "loss": 0.7678, "step": 29978 }, { "epoch": 0.7697766565402225, "grad_norm": 0.703125, "learning_rate": 0.00011708289019667291, "loss": 0.67, "step": 29979 }, { "epoch": 0.7698023337361444, "grad_norm": 0.76953125, "learning_rate": 0.00011707849159856618, "loss": 0.7722, "step": 29980 }, { "epoch": 0.7698280109320662, "grad_norm": 0.81640625, "learning_rate": 0.00011707409296642362, "loss": 0.8183, "step": 29981 }, { "epoch": 0.7698536881279879, "grad_norm": 0.77734375, "learning_rate": 0.00011706969430025387, "loss": 0.8844, "step": 29982 }, { "epoch": 0.7698793653239098, "grad_norm": 0.796875, "learning_rate": 0.0001170652956000658, "loss": 0.9964, "step": 29983 }, { "epoch": 0.7699050425198316, "grad_norm": 0.6875, "learning_rate": 0.00011706089686586816, "loss": 0.7419, "step": 29984 }, { "epoch": 0.7699307197157534, "grad_norm": 0.77734375, "learning_rate": 0.00011705649809766965, "loss": 0.7426, "step": 29985 }, { "epoch": 0.7699563969116753, "grad_norm": 0.7890625, "learning_rate": 0.00011705209929547911, "loss": 0.8123, "step": 29986 }, { "epoch": 0.7699820741075971, "grad_norm": 0.80859375, "learning_rate": 0.00011704770045930527, "loss": 0.874, "step": 29987 }, { "epoch": 0.770007751303519, "grad_norm": 0.890625, "learning_rate": 0.00011704330158915688, "loss": 0.9085, "step": 29988 }, { "epoch": 0.7700334284994407, "grad_norm": 0.7890625, "learning_rate": 0.00011703890268504276, "loss": 0.8623, "step": 29989 }, { "epoch": 0.7700591056953625, "grad_norm": 0.74609375, "learning_rate": 0.00011703450374697164, "loss": 0.8031, "step": 29990 }, { "epoch": 0.7700847828912843, "grad_norm": 0.8359375, "learning_rate": 0.00011703010477495229, "loss": 0.8134, "step": 29991 }, { "epoch": 0.7701104600872062, "grad_norm": 0.98046875, "learning_rate": 0.00011702570576899347, "loss": 0.8462, "step": 29992 }, { "epoch": 0.770136137283128, "grad_norm": 0.80859375, "learning_rate": 0.00011702130672910394, "loss": 0.8216, "step": 29993 }, { "epoch": 0.7701618144790499, "grad_norm": 0.765625, "learning_rate": 0.00011701690765529251, "loss": 0.8113, "step": 29994 }, { "epoch": 0.7701874916749716, "grad_norm": 0.765625, "learning_rate": 0.00011701250854756793, "loss": 0.753, "step": 29995 }, { "epoch": 0.7702131688708934, "grad_norm": 0.80078125, "learning_rate": 0.00011700810940593891, "loss": 0.8585, "step": 29996 }, { "epoch": 0.7702388460668153, "grad_norm": 0.76171875, "learning_rate": 0.00011700371023041432, "loss": 0.7659, "step": 29997 }, { "epoch": 0.7702645232627371, "grad_norm": 0.73828125, "learning_rate": 0.00011699931102100285, "loss": 0.7572, "step": 29998 }, { "epoch": 0.7702902004586589, "grad_norm": 0.796875, "learning_rate": 0.00011699491177771329, "loss": 0.96, "step": 29999 }, { "epoch": 0.7703158776545808, "grad_norm": 0.7890625, "learning_rate": 0.00011699051250055442, "loss": 0.8763, "step": 30000 }, { "epoch": 0.7703158776545808, "eval_loss": 0.8166946172714233, "eval_runtime": 387.1891, "eval_samples_per_second": 25.827, "eval_steps_per_second": 0.808, "step": 30000 }, { "epoch": 0.7703415548505026, "grad_norm": 0.7890625, "learning_rate": 0.00011698611318953494, "loss": 0.9043, "step": 30001 }, { "epoch": 0.7703672320464243, "grad_norm": 0.83984375, "learning_rate": 0.00011698171384466372, "loss": 0.8628, "step": 30002 }, { "epoch": 0.7703929092423462, "grad_norm": 0.7890625, "learning_rate": 0.00011697731446594946, "loss": 0.7379, "step": 30003 }, { "epoch": 0.770418586438268, "grad_norm": 0.80859375, "learning_rate": 0.00011697291505340095, "loss": 0.8892, "step": 30004 }, { "epoch": 0.7704442636341898, "grad_norm": 0.7734375, "learning_rate": 0.00011696851560702697, "loss": 0.7717, "step": 30005 }, { "epoch": 0.7704699408301117, "grad_norm": 0.78125, "learning_rate": 0.00011696411612683624, "loss": 0.9251, "step": 30006 }, { "epoch": 0.7704956180260335, "grad_norm": 0.75, "learning_rate": 0.00011695971661283758, "loss": 0.8244, "step": 30007 }, { "epoch": 0.7705212952219553, "grad_norm": 0.77734375, "learning_rate": 0.00011695531706503973, "loss": 0.7736, "step": 30008 }, { "epoch": 0.7705469724178771, "grad_norm": 0.703125, "learning_rate": 0.00011695091748345146, "loss": 0.7899, "step": 30009 }, { "epoch": 0.7705726496137989, "grad_norm": 0.8515625, "learning_rate": 0.00011694651786808155, "loss": 0.9701, "step": 30010 }, { "epoch": 0.7705983268097207, "grad_norm": 0.76171875, "learning_rate": 0.00011694211821893877, "loss": 0.8843, "step": 30011 }, { "epoch": 0.7706240040056426, "grad_norm": 0.71484375, "learning_rate": 0.00011693771853603186, "loss": 0.7525, "step": 30012 }, { "epoch": 0.7706496812015644, "grad_norm": 0.7578125, "learning_rate": 0.00011693331881936961, "loss": 0.7702, "step": 30013 }, { "epoch": 0.7706753583974862, "grad_norm": 0.8046875, "learning_rate": 0.00011692891906896078, "loss": 0.8504, "step": 30014 }, { "epoch": 0.770701035593408, "grad_norm": 0.78515625, "learning_rate": 0.00011692451928481416, "loss": 0.7562, "step": 30015 }, { "epoch": 0.7707267127893298, "grad_norm": 0.78515625, "learning_rate": 0.00011692011946693851, "loss": 0.8657, "step": 30016 }, { "epoch": 0.7707523899852516, "grad_norm": 0.66015625, "learning_rate": 0.00011691571961534258, "loss": 0.7423, "step": 30017 }, { "epoch": 0.7707780671811735, "grad_norm": 0.75390625, "learning_rate": 0.00011691131973003515, "loss": 0.7602, "step": 30018 }, { "epoch": 0.7708037443770953, "grad_norm": 0.78125, "learning_rate": 0.00011690691981102495, "loss": 0.6729, "step": 30019 }, { "epoch": 0.7708294215730171, "grad_norm": 0.70703125, "learning_rate": 0.00011690251985832083, "loss": 0.833, "step": 30020 }, { "epoch": 0.770855098768939, "grad_norm": 0.8125, "learning_rate": 0.0001168981198719315, "loss": 0.7808, "step": 30021 }, { "epoch": 0.7708807759648607, "grad_norm": 0.76953125, "learning_rate": 0.00011689371985186572, "loss": 0.8767, "step": 30022 }, { "epoch": 0.7709064531607825, "grad_norm": 0.84765625, "learning_rate": 0.00011688931979813234, "loss": 0.8849, "step": 30023 }, { "epoch": 0.7709321303567044, "grad_norm": 0.7890625, "learning_rate": 0.00011688491971074005, "loss": 0.8478, "step": 30024 }, { "epoch": 0.7709578075526262, "grad_norm": 0.82421875, "learning_rate": 0.00011688051958969761, "loss": 0.955, "step": 30025 }, { "epoch": 0.770983484748548, "grad_norm": 0.69921875, "learning_rate": 0.00011687611943501384, "loss": 0.7435, "step": 30026 }, { "epoch": 0.7710091619444699, "grad_norm": 0.7734375, "learning_rate": 0.0001168717192466975, "loss": 0.8514, "step": 30027 }, { "epoch": 0.7710348391403917, "grad_norm": 0.90625, "learning_rate": 0.00011686731902475731, "loss": 0.8603, "step": 30028 }, { "epoch": 0.7710605163363135, "grad_norm": 0.73046875, "learning_rate": 0.00011686291876920212, "loss": 0.8189, "step": 30029 }, { "epoch": 0.7710861935322353, "grad_norm": 0.84375, "learning_rate": 0.00011685851848004064, "loss": 0.8393, "step": 30030 }, { "epoch": 0.7711118707281571, "grad_norm": 0.80859375, "learning_rate": 0.00011685411815728166, "loss": 0.9101, "step": 30031 }, { "epoch": 0.771137547924079, "grad_norm": 0.796875, "learning_rate": 0.00011684971780093395, "loss": 0.7456, "step": 30032 }, { "epoch": 0.7711632251200008, "grad_norm": 0.7734375, "learning_rate": 0.00011684531741100623, "loss": 0.7718, "step": 30033 }, { "epoch": 0.7711889023159226, "grad_norm": 0.78125, "learning_rate": 0.00011684091698750737, "loss": 0.8574, "step": 30034 }, { "epoch": 0.7712145795118444, "grad_norm": 0.7734375, "learning_rate": 0.00011683651653044605, "loss": 0.746, "step": 30035 }, { "epoch": 0.7712402567077662, "grad_norm": 0.7890625, "learning_rate": 0.00011683211603983106, "loss": 0.8103, "step": 30036 }, { "epoch": 0.771265933903688, "grad_norm": 0.7265625, "learning_rate": 0.00011682771551567125, "loss": 0.747, "step": 30037 }, { "epoch": 0.7712916110996099, "grad_norm": 0.75390625, "learning_rate": 0.00011682331495797524, "loss": 0.8967, "step": 30038 }, { "epoch": 0.7713172882955317, "grad_norm": 0.6796875, "learning_rate": 0.00011681891436675194, "loss": 0.7668, "step": 30039 }, { "epoch": 0.7713429654914535, "grad_norm": 0.78125, "learning_rate": 0.00011681451374201004, "loss": 0.8492, "step": 30040 }, { "epoch": 0.7713686426873754, "grad_norm": 0.7890625, "learning_rate": 0.00011681011308375833, "loss": 0.7893, "step": 30041 }, { "epoch": 0.7713943198832971, "grad_norm": 0.8046875, "learning_rate": 0.00011680571239200559, "loss": 0.7726, "step": 30042 }, { "epoch": 0.7714199970792189, "grad_norm": 0.7890625, "learning_rate": 0.0001168013116667606, "loss": 0.8355, "step": 30043 }, { "epoch": 0.7714456742751408, "grad_norm": 0.78125, "learning_rate": 0.0001167969109080321, "loss": 0.8336, "step": 30044 }, { "epoch": 0.7714713514710626, "grad_norm": 1.25, "learning_rate": 0.00011679251011582888, "loss": 0.7411, "step": 30045 }, { "epoch": 0.7714970286669844, "grad_norm": 0.765625, "learning_rate": 0.00011678810929015968, "loss": 0.94, "step": 30046 }, { "epoch": 0.7715227058629063, "grad_norm": 0.734375, "learning_rate": 0.00011678370843103333, "loss": 0.7919, "step": 30047 }, { "epoch": 0.7715483830588281, "grad_norm": 0.75390625, "learning_rate": 0.00011677930753845854, "loss": 0.9011, "step": 30048 }, { "epoch": 0.7715740602547498, "grad_norm": 0.89453125, "learning_rate": 0.0001167749066124441, "loss": 0.8682, "step": 30049 }, { "epoch": 0.7715997374506717, "grad_norm": 0.8671875, "learning_rate": 0.00011677050565299881, "loss": 0.7754, "step": 30050 }, { "epoch": 0.7716254146465935, "grad_norm": 0.77734375, "learning_rate": 0.0001167661046601314, "loss": 0.8891, "step": 30051 }, { "epoch": 0.7716510918425153, "grad_norm": 0.79296875, "learning_rate": 0.00011676170363385066, "loss": 0.8523, "step": 30052 }, { "epoch": 0.7716767690384372, "grad_norm": 0.77734375, "learning_rate": 0.00011675730257416537, "loss": 0.7726, "step": 30053 }, { "epoch": 0.771702446234359, "grad_norm": 0.8359375, "learning_rate": 0.00011675290148108426, "loss": 0.8504, "step": 30054 }, { "epoch": 0.7717281234302807, "grad_norm": 0.75, "learning_rate": 0.00011674850035461618, "loss": 0.8265, "step": 30055 }, { "epoch": 0.7717538006262026, "grad_norm": 0.79296875, "learning_rate": 0.00011674409919476981, "loss": 0.8213, "step": 30056 }, { "epoch": 0.7717794778221244, "grad_norm": 0.85546875, "learning_rate": 0.00011673969800155397, "loss": 0.894, "step": 30057 }, { "epoch": 0.7718051550180463, "grad_norm": 0.7109375, "learning_rate": 0.00011673529677497745, "loss": 0.8669, "step": 30058 }, { "epoch": 0.7718308322139681, "grad_norm": 0.8046875, "learning_rate": 0.00011673089551504897, "loss": 0.8284, "step": 30059 }, { "epoch": 0.7718565094098899, "grad_norm": 0.85546875, "learning_rate": 0.00011672649422177733, "loss": 0.9102, "step": 30060 }, { "epoch": 0.7718821866058118, "grad_norm": 0.76953125, "learning_rate": 0.0001167220928951713, "loss": 0.8371, "step": 30061 }, { "epoch": 0.7719078638017335, "grad_norm": 0.79296875, "learning_rate": 0.00011671769153523964, "loss": 0.8783, "step": 30062 }, { "epoch": 0.7719335409976553, "grad_norm": 0.734375, "learning_rate": 0.00011671329014199117, "loss": 0.6811, "step": 30063 }, { "epoch": 0.7719592181935772, "grad_norm": 0.73828125, "learning_rate": 0.0001167088887154346, "loss": 0.7786, "step": 30064 }, { "epoch": 0.771984895389499, "grad_norm": 0.83984375, "learning_rate": 0.0001167044872555787, "loss": 0.9021, "step": 30065 }, { "epoch": 0.7720105725854208, "grad_norm": 0.7265625, "learning_rate": 0.00011670008576243228, "loss": 0.8812, "step": 30066 }, { "epoch": 0.7720362497813427, "grad_norm": 0.74609375, "learning_rate": 0.00011669568423600407, "loss": 0.7498, "step": 30067 }, { "epoch": 0.7720619269772645, "grad_norm": 0.7421875, "learning_rate": 0.00011669128267630291, "loss": 0.7948, "step": 30068 }, { "epoch": 0.7720876041731862, "grad_norm": 0.83984375, "learning_rate": 0.00011668688108333754, "loss": 0.8072, "step": 30069 }, { "epoch": 0.7721132813691081, "grad_norm": 0.75390625, "learning_rate": 0.0001166824794571167, "loss": 0.7135, "step": 30070 }, { "epoch": 0.7721389585650299, "grad_norm": 0.6953125, "learning_rate": 0.0001166780777976492, "loss": 0.6453, "step": 30071 }, { "epoch": 0.7721646357609517, "grad_norm": 0.7578125, "learning_rate": 0.0001166736761049438, "loss": 0.9271, "step": 30072 }, { "epoch": 0.7721903129568736, "grad_norm": 0.77734375, "learning_rate": 0.00011666927437900924, "loss": 0.764, "step": 30073 }, { "epoch": 0.7722159901527954, "grad_norm": 0.83203125, "learning_rate": 0.00011666487261985435, "loss": 0.7596, "step": 30074 }, { "epoch": 0.7722416673487171, "grad_norm": 0.796875, "learning_rate": 0.00011666047082748786, "loss": 0.8317, "step": 30075 }, { "epoch": 0.772267344544639, "grad_norm": 0.7578125, "learning_rate": 0.00011665606900191855, "loss": 0.9552, "step": 30076 }, { "epoch": 0.7722930217405608, "grad_norm": 0.69140625, "learning_rate": 0.00011665166714315523, "loss": 0.6694, "step": 30077 }, { "epoch": 0.7723186989364826, "grad_norm": 0.7578125, "learning_rate": 0.00011664726525120659, "loss": 0.7313, "step": 30078 }, { "epoch": 0.7723443761324045, "grad_norm": 0.87890625, "learning_rate": 0.0001166428633260815, "loss": 0.8446, "step": 30079 }, { "epoch": 0.7723700533283263, "grad_norm": 0.703125, "learning_rate": 0.00011663846136778866, "loss": 0.9042, "step": 30080 }, { "epoch": 0.7723957305242481, "grad_norm": 0.8203125, "learning_rate": 0.00011663405937633686, "loss": 0.7959, "step": 30081 }, { "epoch": 0.7724214077201699, "grad_norm": 0.85546875, "learning_rate": 0.00011662965735173492, "loss": 0.7625, "step": 30082 }, { "epoch": 0.7724470849160917, "grad_norm": 0.77734375, "learning_rate": 0.00011662525529399157, "loss": 0.8829, "step": 30083 }, { "epoch": 0.7724727621120135, "grad_norm": 0.8203125, "learning_rate": 0.00011662085320311557, "loss": 0.8594, "step": 30084 }, { "epoch": 0.7724984393079354, "grad_norm": 0.984375, "learning_rate": 0.00011661645107911575, "loss": 0.9346, "step": 30085 }, { "epoch": 0.7725241165038572, "grad_norm": 0.859375, "learning_rate": 0.00011661204892200077, "loss": 0.8562, "step": 30086 }, { "epoch": 0.772549793699779, "grad_norm": 0.77734375, "learning_rate": 0.00011660764673177953, "loss": 0.8079, "step": 30087 }, { "epoch": 0.7725754708957009, "grad_norm": 0.79296875, "learning_rate": 0.00011660324450846074, "loss": 0.9401, "step": 30088 }, { "epoch": 0.7726011480916226, "grad_norm": 0.8203125, "learning_rate": 0.00011659884225205317, "loss": 0.8932, "step": 30089 }, { "epoch": 0.7726268252875444, "grad_norm": 0.75, "learning_rate": 0.00011659443996256565, "loss": 0.7211, "step": 30090 }, { "epoch": 0.7726525024834663, "grad_norm": 0.80078125, "learning_rate": 0.00011659003764000684, "loss": 0.8701, "step": 30091 }, { "epoch": 0.7726781796793881, "grad_norm": 0.7578125, "learning_rate": 0.00011658563528438564, "loss": 0.7493, "step": 30092 }, { "epoch": 0.77270385687531, "grad_norm": 0.78515625, "learning_rate": 0.00011658123289571075, "loss": 0.8829, "step": 30093 }, { "epoch": 0.7727295340712318, "grad_norm": 0.84375, "learning_rate": 0.00011657683047399094, "loss": 0.8485, "step": 30094 }, { "epoch": 0.7727552112671535, "grad_norm": 0.79296875, "learning_rate": 0.00011657242801923503, "loss": 0.8413, "step": 30095 }, { "epoch": 0.7727808884630754, "grad_norm": 0.765625, "learning_rate": 0.00011656802553145176, "loss": 0.8186, "step": 30096 }, { "epoch": 0.7728065656589972, "grad_norm": 0.72265625, "learning_rate": 0.00011656362301064993, "loss": 0.7943, "step": 30097 }, { "epoch": 0.772832242854919, "grad_norm": 0.7578125, "learning_rate": 0.0001165592204568383, "loss": 0.737, "step": 30098 }, { "epoch": 0.7728579200508409, "grad_norm": 0.84375, "learning_rate": 0.00011655481787002558, "loss": 0.704, "step": 30099 }, { "epoch": 0.7728835972467627, "grad_norm": 0.78125, "learning_rate": 0.00011655041525022066, "loss": 0.8598, "step": 30100 }, { "epoch": 0.7729092744426845, "grad_norm": 0.84375, "learning_rate": 0.00011654601259743223, "loss": 0.8651, "step": 30101 }, { "epoch": 0.7729349516386063, "grad_norm": 0.72265625, "learning_rate": 0.0001165416099116691, "loss": 0.8216, "step": 30102 }, { "epoch": 0.7729606288345281, "grad_norm": 0.7578125, "learning_rate": 0.00011653720719294005, "loss": 0.8411, "step": 30103 }, { "epoch": 0.7729863060304499, "grad_norm": 0.7421875, "learning_rate": 0.00011653280444125383, "loss": 0.7035, "step": 30104 }, { "epoch": 0.7730119832263718, "grad_norm": 0.84765625, "learning_rate": 0.0001165284016566192, "loss": 0.8918, "step": 30105 }, { "epoch": 0.7730376604222936, "grad_norm": 0.953125, "learning_rate": 0.00011652399883904499, "loss": 0.8446, "step": 30106 }, { "epoch": 0.7730633376182154, "grad_norm": 0.80078125, "learning_rate": 0.00011651959598853992, "loss": 0.7674, "step": 30107 }, { "epoch": 0.7730890148141373, "grad_norm": 0.73046875, "learning_rate": 0.00011651519310511281, "loss": 0.8007, "step": 30108 }, { "epoch": 0.773114692010059, "grad_norm": 0.88671875, "learning_rate": 0.00011651079018877243, "loss": 1.0272, "step": 30109 }, { "epoch": 0.7731403692059808, "grad_norm": 0.8828125, "learning_rate": 0.00011650638723952747, "loss": 0.7283, "step": 30110 }, { "epoch": 0.7731660464019027, "grad_norm": 0.76171875, "learning_rate": 0.00011650198425738683, "loss": 0.7363, "step": 30111 }, { "epoch": 0.7731917235978245, "grad_norm": 0.7734375, "learning_rate": 0.00011649758124235921, "loss": 0.7997, "step": 30112 }, { "epoch": 0.7732174007937463, "grad_norm": 0.7421875, "learning_rate": 0.00011649317819445337, "loss": 0.9806, "step": 30113 }, { "epoch": 0.7732430779896682, "grad_norm": 0.8671875, "learning_rate": 0.00011648877511367814, "loss": 0.8341, "step": 30114 }, { "epoch": 0.7732687551855899, "grad_norm": 0.796875, "learning_rate": 0.00011648437200004229, "loss": 0.8114, "step": 30115 }, { "epoch": 0.7732944323815117, "grad_norm": 0.703125, "learning_rate": 0.00011647996885355456, "loss": 0.7204, "step": 30116 }, { "epoch": 0.7733201095774336, "grad_norm": 0.828125, "learning_rate": 0.00011647556567422376, "loss": 0.8806, "step": 30117 }, { "epoch": 0.7733457867733554, "grad_norm": 0.7421875, "learning_rate": 0.00011647116246205861, "loss": 0.6407, "step": 30118 }, { "epoch": 0.7733714639692773, "grad_norm": 0.76171875, "learning_rate": 0.00011646675921706794, "loss": 0.7663, "step": 30119 }, { "epoch": 0.7733971411651991, "grad_norm": 0.7734375, "learning_rate": 0.0001164623559392605, "loss": 0.8295, "step": 30120 }, { "epoch": 0.7734228183611209, "grad_norm": 0.71875, "learning_rate": 0.00011645795262864505, "loss": 0.7292, "step": 30121 }, { "epoch": 0.7734484955570426, "grad_norm": 0.8125, "learning_rate": 0.0001164535492852304, "loss": 0.7487, "step": 30122 }, { "epoch": 0.7734741727529645, "grad_norm": 0.765625, "learning_rate": 0.00011644914590902534, "loss": 0.8637, "step": 30123 }, { "epoch": 0.7734998499488863, "grad_norm": 0.78125, "learning_rate": 0.00011644474250003861, "loss": 0.8238, "step": 30124 }, { "epoch": 0.7735255271448082, "grad_norm": 0.7734375, "learning_rate": 0.00011644033905827897, "loss": 0.926, "step": 30125 }, { "epoch": 0.77355120434073, "grad_norm": 0.8359375, "learning_rate": 0.00011643593558375522, "loss": 0.8989, "step": 30126 }, { "epoch": 0.7735768815366518, "grad_norm": 0.76953125, "learning_rate": 0.00011643153207647616, "loss": 0.7405, "step": 30127 }, { "epoch": 0.7736025587325737, "grad_norm": 0.765625, "learning_rate": 0.00011642712853645054, "loss": 0.757, "step": 30128 }, { "epoch": 0.7736282359284954, "grad_norm": 0.71484375, "learning_rate": 0.00011642272496368714, "loss": 0.7673, "step": 30129 }, { "epoch": 0.7736539131244172, "grad_norm": 0.8125, "learning_rate": 0.0001164183213581947, "loss": 0.7787, "step": 30130 }, { "epoch": 0.7736795903203391, "grad_norm": 0.75390625, "learning_rate": 0.00011641391771998205, "loss": 0.831, "step": 30131 }, { "epoch": 0.7737052675162609, "grad_norm": 0.765625, "learning_rate": 0.00011640951404905793, "loss": 0.7893, "step": 30132 }, { "epoch": 0.7737309447121827, "grad_norm": 0.8515625, "learning_rate": 0.00011640511034543115, "loss": 0.8195, "step": 30133 }, { "epoch": 0.7737566219081046, "grad_norm": 0.796875, "learning_rate": 0.00011640070660911044, "loss": 0.794, "step": 30134 }, { "epoch": 0.7737822991040263, "grad_norm": 0.75390625, "learning_rate": 0.00011639630284010463, "loss": 0.7827, "step": 30135 }, { "epoch": 0.7738079762999481, "grad_norm": 0.78515625, "learning_rate": 0.00011639189903842247, "loss": 0.8281, "step": 30136 }, { "epoch": 0.77383365349587, "grad_norm": 0.8203125, "learning_rate": 0.00011638749520407274, "loss": 0.8896, "step": 30137 }, { "epoch": 0.7738593306917918, "grad_norm": 0.7578125, "learning_rate": 0.0001163830913370642, "loss": 0.8291, "step": 30138 }, { "epoch": 0.7738850078877136, "grad_norm": 0.828125, "learning_rate": 0.00011637868743740563, "loss": 0.765, "step": 30139 }, { "epoch": 0.7739106850836355, "grad_norm": 0.77734375, "learning_rate": 0.00011637428350510582, "loss": 0.8051, "step": 30140 }, { "epoch": 0.7739363622795573, "grad_norm": 0.78515625, "learning_rate": 0.00011636987954017356, "loss": 0.809, "step": 30141 }, { "epoch": 0.773962039475479, "grad_norm": 0.82421875, "learning_rate": 0.00011636547554261761, "loss": 0.692, "step": 30142 }, { "epoch": 0.7739877166714009, "grad_norm": 0.7890625, "learning_rate": 0.00011636107151244673, "loss": 0.889, "step": 30143 }, { "epoch": 0.7740133938673227, "grad_norm": 0.65625, "learning_rate": 0.00011635666744966969, "loss": 0.6886, "step": 30144 }, { "epoch": 0.7740390710632445, "grad_norm": 0.75, "learning_rate": 0.00011635226335429532, "loss": 0.7648, "step": 30145 }, { "epoch": 0.7740647482591664, "grad_norm": 0.84765625, "learning_rate": 0.00011634785922633238, "loss": 0.82, "step": 30146 }, { "epoch": 0.7740904254550882, "grad_norm": 0.76953125, "learning_rate": 0.00011634345506578958, "loss": 0.7763, "step": 30147 }, { "epoch": 0.77411610265101, "grad_norm": 0.80859375, "learning_rate": 0.00011633905087267578, "loss": 0.9132, "step": 30148 }, { "epoch": 0.7741417798469318, "grad_norm": 0.78125, "learning_rate": 0.00011633464664699978, "loss": 0.8173, "step": 30149 }, { "epoch": 0.7741674570428536, "grad_norm": 0.85546875, "learning_rate": 0.00011633024238877022, "loss": 0.8494, "step": 30150 }, { "epoch": 0.7741931342387754, "grad_norm": 0.796875, "learning_rate": 0.000116325838097996, "loss": 0.8775, "step": 30151 }, { "epoch": 0.7742188114346973, "grad_norm": 0.82421875, "learning_rate": 0.00011632143377468588, "loss": 0.8756, "step": 30152 }, { "epoch": 0.7742444886306191, "grad_norm": 0.8828125, "learning_rate": 0.00011631702941884856, "loss": 0.7634, "step": 30153 }, { "epoch": 0.774270165826541, "grad_norm": 0.796875, "learning_rate": 0.00011631262503049292, "loss": 0.9482, "step": 30154 }, { "epoch": 0.7742958430224627, "grad_norm": 0.8515625, "learning_rate": 0.00011630822060962769, "loss": 0.8704, "step": 30155 }, { "epoch": 0.7743215202183845, "grad_norm": 0.80859375, "learning_rate": 0.00011630381615626165, "loss": 0.8172, "step": 30156 }, { "epoch": 0.7743471974143064, "grad_norm": 0.73828125, "learning_rate": 0.00011629941167040357, "loss": 0.8139, "step": 30157 }, { "epoch": 0.7743728746102282, "grad_norm": 0.76171875, "learning_rate": 0.00011629500715206221, "loss": 0.7941, "step": 30158 }, { "epoch": 0.77439855180615, "grad_norm": 0.7578125, "learning_rate": 0.0001162906026012464, "loss": 0.8831, "step": 30159 }, { "epoch": 0.7744242290020719, "grad_norm": 0.8671875, "learning_rate": 0.00011628619801796488, "loss": 0.8252, "step": 30160 }, { "epoch": 0.7744499061979937, "grad_norm": 0.7578125, "learning_rate": 0.00011628179340222643, "loss": 0.7631, "step": 30161 }, { "epoch": 0.7744755833939154, "grad_norm": 0.8203125, "learning_rate": 0.00011627738875403986, "loss": 0.9224, "step": 30162 }, { "epoch": 0.7745012605898373, "grad_norm": 0.73828125, "learning_rate": 0.00011627298407341392, "loss": 0.8093, "step": 30163 }, { "epoch": 0.7745269377857591, "grad_norm": 0.76953125, "learning_rate": 0.00011626857936035738, "loss": 0.8781, "step": 30164 }, { "epoch": 0.7745526149816809, "grad_norm": 0.74609375, "learning_rate": 0.00011626417461487904, "loss": 0.7273, "step": 30165 }, { "epoch": 0.7745782921776028, "grad_norm": 0.7265625, "learning_rate": 0.00011625976983698765, "loss": 0.8793, "step": 30166 }, { "epoch": 0.7746039693735246, "grad_norm": 0.8125, "learning_rate": 0.00011625536502669204, "loss": 0.8249, "step": 30167 }, { "epoch": 0.7746296465694463, "grad_norm": 0.765625, "learning_rate": 0.00011625096018400095, "loss": 0.6469, "step": 30168 }, { "epoch": 0.7746553237653682, "grad_norm": 0.84765625, "learning_rate": 0.00011624655530892314, "loss": 0.8561, "step": 30169 }, { "epoch": 0.77468100096129, "grad_norm": 0.83203125, "learning_rate": 0.00011624215040146745, "loss": 0.8952, "step": 30170 }, { "epoch": 0.7747066781572118, "grad_norm": 1.453125, "learning_rate": 0.00011623774546164256, "loss": 0.7216, "step": 30171 }, { "epoch": 0.7747323553531337, "grad_norm": 0.82421875, "learning_rate": 0.00011623334048945736, "loss": 0.8855, "step": 30172 }, { "epoch": 0.7747580325490555, "grad_norm": 0.7734375, "learning_rate": 0.00011622893548492058, "loss": 0.7416, "step": 30173 }, { "epoch": 0.7747837097449773, "grad_norm": 0.77734375, "learning_rate": 0.00011622453044804096, "loss": 0.8571, "step": 30174 }, { "epoch": 0.7748093869408991, "grad_norm": 0.76171875, "learning_rate": 0.00011622012537882736, "loss": 0.7271, "step": 30175 }, { "epoch": 0.7748350641368209, "grad_norm": 0.8046875, "learning_rate": 0.00011621572027728848, "loss": 0.8602, "step": 30176 }, { "epoch": 0.7748607413327427, "grad_norm": 0.7109375, "learning_rate": 0.00011621131514343315, "loss": 0.8421, "step": 30177 }, { "epoch": 0.7748864185286646, "grad_norm": 0.921875, "learning_rate": 0.00011620690997727015, "loss": 0.8411, "step": 30178 }, { "epoch": 0.7749120957245864, "grad_norm": 0.76953125, "learning_rate": 0.00011620250477880821, "loss": 0.7404, "step": 30179 }, { "epoch": 0.7749377729205082, "grad_norm": 0.79296875, "learning_rate": 0.00011619809954805616, "loss": 0.7991, "step": 30180 }, { "epoch": 0.7749634501164301, "grad_norm": 0.7421875, "learning_rate": 0.00011619369428502276, "loss": 0.798, "step": 30181 }, { "epoch": 0.7749891273123518, "grad_norm": 0.75390625, "learning_rate": 0.0001161892889897168, "loss": 0.7325, "step": 30182 }, { "epoch": 0.7750148045082736, "grad_norm": 0.75, "learning_rate": 0.00011618488366214703, "loss": 0.692, "step": 30183 }, { "epoch": 0.7750404817041955, "grad_norm": 0.7578125, "learning_rate": 0.00011618047830232226, "loss": 0.7939, "step": 30184 }, { "epoch": 0.7750661589001173, "grad_norm": 0.79296875, "learning_rate": 0.00011617607291025125, "loss": 0.7997, "step": 30185 }, { "epoch": 0.7750918360960392, "grad_norm": 0.76953125, "learning_rate": 0.00011617166748594279, "loss": 0.7677, "step": 30186 }, { "epoch": 0.775117513291961, "grad_norm": 0.76953125, "learning_rate": 0.00011616726202940565, "loss": 0.8706, "step": 30187 }, { "epoch": 0.7751431904878827, "grad_norm": 0.70703125, "learning_rate": 0.00011616285654064865, "loss": 0.8507, "step": 30188 }, { "epoch": 0.7751688676838046, "grad_norm": 0.80859375, "learning_rate": 0.00011615845101968053, "loss": 0.81, "step": 30189 }, { "epoch": 0.7751945448797264, "grad_norm": 0.7734375, "learning_rate": 0.00011615404546651003, "loss": 0.8785, "step": 30190 }, { "epoch": 0.7752202220756482, "grad_norm": 0.78515625, "learning_rate": 0.00011614963988114602, "loss": 0.7318, "step": 30191 }, { "epoch": 0.7752458992715701, "grad_norm": 0.7421875, "learning_rate": 0.0001161452342635972, "loss": 0.7277, "step": 30192 }, { "epoch": 0.7752715764674919, "grad_norm": 0.8125, "learning_rate": 0.00011614082861387243, "loss": 0.8511, "step": 30193 }, { "epoch": 0.7752972536634137, "grad_norm": 0.7578125, "learning_rate": 0.00011613642293198044, "loss": 0.7619, "step": 30194 }, { "epoch": 0.7753229308593355, "grad_norm": 0.76953125, "learning_rate": 0.00011613201721793, "loss": 0.6576, "step": 30195 }, { "epoch": 0.7753486080552573, "grad_norm": 0.79296875, "learning_rate": 0.00011612761147172992, "loss": 0.9245, "step": 30196 }, { "epoch": 0.7753742852511791, "grad_norm": 0.8125, "learning_rate": 0.00011612320569338894, "loss": 0.8048, "step": 30197 }, { "epoch": 0.775399962447101, "grad_norm": 0.859375, "learning_rate": 0.0001161187998829159, "loss": 0.9123, "step": 30198 }, { "epoch": 0.7754256396430228, "grad_norm": 0.796875, "learning_rate": 0.00011611439404031952, "loss": 0.8425, "step": 30199 }, { "epoch": 0.7754513168389446, "grad_norm": 0.86328125, "learning_rate": 0.00011610998816560863, "loss": 0.8225, "step": 30200 }, { "epoch": 0.7754769940348665, "grad_norm": 0.8203125, "learning_rate": 0.00011610558225879198, "loss": 0.8934, "step": 30201 }, { "epoch": 0.7755026712307882, "grad_norm": 0.765625, "learning_rate": 0.00011610117631987835, "loss": 0.7919, "step": 30202 }, { "epoch": 0.77552834842671, "grad_norm": 0.9375, "learning_rate": 0.00011609677034887653, "loss": 0.8549, "step": 30203 }, { "epoch": 0.7755540256226319, "grad_norm": 0.83203125, "learning_rate": 0.00011609236434579534, "loss": 0.727, "step": 30204 }, { "epoch": 0.7755797028185537, "grad_norm": 0.859375, "learning_rate": 0.00011608795831064347, "loss": 1.0123, "step": 30205 }, { "epoch": 0.7756053800144755, "grad_norm": 0.8671875, "learning_rate": 0.00011608355224342977, "loss": 0.9394, "step": 30206 }, { "epoch": 0.7756310572103974, "grad_norm": 0.80078125, "learning_rate": 0.00011607914614416303, "loss": 0.9006, "step": 30207 }, { "epoch": 0.7756567344063191, "grad_norm": 0.80859375, "learning_rate": 0.00011607474001285197, "loss": 0.7703, "step": 30208 }, { "epoch": 0.7756824116022409, "grad_norm": 1.125, "learning_rate": 0.00011607033384950543, "loss": 0.9776, "step": 30209 }, { "epoch": 0.7757080887981628, "grad_norm": 0.828125, "learning_rate": 0.00011606592765413216, "loss": 0.7874, "step": 30210 }, { "epoch": 0.7757337659940846, "grad_norm": 0.7421875, "learning_rate": 0.00011606152142674092, "loss": 0.8948, "step": 30211 }, { "epoch": 0.7757594431900064, "grad_norm": 0.98046875, "learning_rate": 0.00011605711516734055, "loss": 0.8895, "step": 30212 }, { "epoch": 0.7757851203859283, "grad_norm": 0.8359375, "learning_rate": 0.00011605270887593978, "loss": 0.7804, "step": 30213 }, { "epoch": 0.7758107975818501, "grad_norm": 0.79296875, "learning_rate": 0.0001160483025525474, "loss": 0.9291, "step": 30214 }, { "epoch": 0.7758364747777718, "grad_norm": 0.77734375, "learning_rate": 0.00011604389619717226, "loss": 0.7666, "step": 30215 }, { "epoch": 0.7758621519736937, "grad_norm": 0.76953125, "learning_rate": 0.00011603948980982302, "loss": 0.7709, "step": 30216 }, { "epoch": 0.7758878291696155, "grad_norm": 0.765625, "learning_rate": 0.00011603508339050857, "loss": 0.796, "step": 30217 }, { "epoch": 0.7759135063655374, "grad_norm": 0.73046875, "learning_rate": 0.00011603067693923764, "loss": 0.7955, "step": 30218 }, { "epoch": 0.7759391835614592, "grad_norm": 0.796875, "learning_rate": 0.00011602627045601897, "loss": 0.9251, "step": 30219 }, { "epoch": 0.775964860757381, "grad_norm": 0.8046875, "learning_rate": 0.00011602186394086146, "loss": 0.8551, "step": 30220 }, { "epoch": 0.7759905379533029, "grad_norm": 0.79296875, "learning_rate": 0.00011601745739377379, "loss": 0.9045, "step": 30221 }, { "epoch": 0.7760162151492246, "grad_norm": 0.8125, "learning_rate": 0.00011601305081476477, "loss": 0.8173, "step": 30222 }, { "epoch": 0.7760418923451464, "grad_norm": 0.7578125, "learning_rate": 0.00011600864420384319, "loss": 0.8862, "step": 30223 }, { "epoch": 0.7760675695410683, "grad_norm": 0.7890625, "learning_rate": 0.00011600423756101781, "loss": 0.899, "step": 30224 }, { "epoch": 0.7760932467369901, "grad_norm": 0.76953125, "learning_rate": 0.00011599983088629747, "loss": 0.7974, "step": 30225 }, { "epoch": 0.7761189239329119, "grad_norm": 0.74609375, "learning_rate": 0.00011599542417969089, "loss": 0.8388, "step": 30226 }, { "epoch": 0.7761446011288338, "grad_norm": 0.78515625, "learning_rate": 0.00011599101744120687, "loss": 0.7187, "step": 30227 }, { "epoch": 0.7761702783247555, "grad_norm": 0.76953125, "learning_rate": 0.00011598661067085424, "loss": 0.8215, "step": 30228 }, { "epoch": 0.7761959555206773, "grad_norm": 0.79296875, "learning_rate": 0.00011598220386864167, "loss": 0.8495, "step": 30229 }, { "epoch": 0.7762216327165992, "grad_norm": 0.85546875, "learning_rate": 0.00011597779703457804, "loss": 0.9045, "step": 30230 }, { "epoch": 0.776247309912521, "grad_norm": 0.7890625, "learning_rate": 0.00011597339016867212, "loss": 0.778, "step": 30231 }, { "epoch": 0.7762729871084428, "grad_norm": 0.84375, "learning_rate": 0.00011596898327093266, "loss": 0.7782, "step": 30232 }, { "epoch": 0.7762986643043647, "grad_norm": 0.79296875, "learning_rate": 0.00011596457634136846, "loss": 0.7232, "step": 30233 }, { "epoch": 0.7763243415002865, "grad_norm": 0.71484375, "learning_rate": 0.00011596016937998833, "loss": 0.7594, "step": 30234 }, { "epoch": 0.7763500186962082, "grad_norm": 0.79296875, "learning_rate": 0.000115955762386801, "loss": 0.9056, "step": 30235 }, { "epoch": 0.7763756958921301, "grad_norm": 0.734375, "learning_rate": 0.00011595135536181527, "loss": 0.8811, "step": 30236 }, { "epoch": 0.7764013730880519, "grad_norm": 0.82421875, "learning_rate": 0.00011594694830503994, "loss": 0.8109, "step": 30237 }, { "epoch": 0.7764270502839737, "grad_norm": 0.79296875, "learning_rate": 0.00011594254121648376, "loss": 0.8244, "step": 30238 }, { "epoch": 0.7764527274798956, "grad_norm": 0.7421875, "learning_rate": 0.00011593813409615555, "loss": 0.7894, "step": 30239 }, { "epoch": 0.7764784046758174, "grad_norm": 0.81640625, "learning_rate": 0.00011593372694406408, "loss": 0.7458, "step": 30240 }, { "epoch": 0.7765040818717392, "grad_norm": 0.7734375, "learning_rate": 0.00011592931976021815, "loss": 0.8203, "step": 30241 }, { "epoch": 0.776529759067661, "grad_norm": 0.8359375, "learning_rate": 0.00011592491254462652, "loss": 0.9035, "step": 30242 }, { "epoch": 0.7765554362635828, "grad_norm": 0.7734375, "learning_rate": 0.00011592050529729794, "loss": 0.94, "step": 30243 }, { "epoch": 0.7765811134595046, "grad_norm": 0.81640625, "learning_rate": 0.00011591609801824126, "loss": 0.7837, "step": 30244 }, { "epoch": 0.7766067906554265, "grad_norm": 0.8046875, "learning_rate": 0.00011591169070746523, "loss": 0.803, "step": 30245 }, { "epoch": 0.7766324678513483, "grad_norm": 0.77734375, "learning_rate": 0.00011590728336497863, "loss": 0.8117, "step": 30246 }, { "epoch": 0.7766581450472702, "grad_norm": 0.7734375, "learning_rate": 0.00011590287599079028, "loss": 0.8138, "step": 30247 }, { "epoch": 0.7766838222431919, "grad_norm": 0.84375, "learning_rate": 0.00011589846858490889, "loss": 0.7781, "step": 30248 }, { "epoch": 0.7767094994391137, "grad_norm": 0.859375, "learning_rate": 0.00011589406114734333, "loss": 0.8779, "step": 30249 }, { "epoch": 0.7767351766350356, "grad_norm": 0.73828125, "learning_rate": 0.00011588965367810234, "loss": 0.8025, "step": 30250 }, { "epoch": 0.7767608538309574, "grad_norm": 0.796875, "learning_rate": 0.00011588524617719466, "loss": 0.8937, "step": 30251 }, { "epoch": 0.7767865310268792, "grad_norm": 0.81640625, "learning_rate": 0.00011588083864462915, "loss": 0.783, "step": 30252 }, { "epoch": 0.7768122082228011, "grad_norm": 0.71875, "learning_rate": 0.00011587643108041457, "loss": 0.6687, "step": 30253 }, { "epoch": 0.7768378854187229, "grad_norm": 0.72265625, "learning_rate": 0.0001158720234845597, "loss": 0.7829, "step": 30254 }, { "epoch": 0.7768635626146446, "grad_norm": 0.828125, "learning_rate": 0.00011586761585707331, "loss": 0.858, "step": 30255 }, { "epoch": 0.7768892398105665, "grad_norm": 0.82421875, "learning_rate": 0.00011586320819796418, "loss": 0.9224, "step": 30256 }, { "epoch": 0.7769149170064883, "grad_norm": 0.76171875, "learning_rate": 0.00011585880050724114, "loss": 0.73, "step": 30257 }, { "epoch": 0.7769405942024101, "grad_norm": 0.78125, "learning_rate": 0.00011585439278491292, "loss": 0.8432, "step": 30258 }, { "epoch": 0.776966271398332, "grad_norm": 0.74609375, "learning_rate": 0.00011584998503098832, "loss": 0.8113, "step": 30259 }, { "epoch": 0.7769919485942538, "grad_norm": 0.8203125, "learning_rate": 0.00011584557724547615, "loss": 1.0128, "step": 30260 }, { "epoch": 0.7770176257901756, "grad_norm": 0.76953125, "learning_rate": 0.00011584116942838519, "loss": 0.8139, "step": 30261 }, { "epoch": 0.7770433029860974, "grad_norm": 0.83984375, "learning_rate": 0.00011583676157972419, "loss": 0.9412, "step": 30262 }, { "epoch": 0.7770689801820192, "grad_norm": 0.7734375, "learning_rate": 0.00011583235369950196, "loss": 0.7242, "step": 30263 }, { "epoch": 0.777094657377941, "grad_norm": 0.80078125, "learning_rate": 0.00011582794578772727, "loss": 0.7517, "step": 30264 }, { "epoch": 0.7771203345738629, "grad_norm": 0.78515625, "learning_rate": 0.00011582353784440891, "loss": 0.8317, "step": 30265 }, { "epoch": 0.7771460117697847, "grad_norm": 0.83203125, "learning_rate": 0.00011581912986955567, "loss": 0.8617, "step": 30266 }, { "epoch": 0.7771716889657065, "grad_norm": 0.73046875, "learning_rate": 0.00011581472186317637, "loss": 0.8203, "step": 30267 }, { "epoch": 0.7771973661616283, "grad_norm": 0.73828125, "learning_rate": 0.00011581031382527973, "loss": 0.7218, "step": 30268 }, { "epoch": 0.7772230433575501, "grad_norm": 0.7734375, "learning_rate": 0.00011580590575587454, "loss": 0.756, "step": 30269 }, { "epoch": 0.7772487205534719, "grad_norm": 0.76953125, "learning_rate": 0.00011580149765496964, "loss": 0.8754, "step": 30270 }, { "epoch": 0.7772743977493938, "grad_norm": 0.73828125, "learning_rate": 0.00011579708952257378, "loss": 0.9336, "step": 30271 }, { "epoch": 0.7773000749453156, "grad_norm": 0.80078125, "learning_rate": 0.00011579268135869572, "loss": 0.9453, "step": 30272 }, { "epoch": 0.7773257521412374, "grad_norm": 0.77734375, "learning_rate": 0.00011578827316334429, "loss": 0.9001, "step": 30273 }, { "epoch": 0.7773514293371593, "grad_norm": 0.81640625, "learning_rate": 0.00011578386493652828, "loss": 0.8717, "step": 30274 }, { "epoch": 0.777377106533081, "grad_norm": 0.9140625, "learning_rate": 0.00011577945667825643, "loss": 0.734, "step": 30275 }, { "epoch": 0.7774027837290028, "grad_norm": 0.85546875, "learning_rate": 0.00011577504838853754, "loss": 0.8833, "step": 30276 }, { "epoch": 0.7774284609249247, "grad_norm": 0.7578125, "learning_rate": 0.00011577064006738043, "loss": 0.7233, "step": 30277 }, { "epoch": 0.7774541381208465, "grad_norm": 1.03125, "learning_rate": 0.00011576623171479384, "loss": 0.7796, "step": 30278 }, { "epoch": 0.7774798153167684, "grad_norm": 0.80859375, "learning_rate": 0.00011576182333078658, "loss": 0.7565, "step": 30279 }, { "epoch": 0.7775054925126902, "grad_norm": 0.77734375, "learning_rate": 0.00011575741491536743, "loss": 0.9595, "step": 30280 }, { "epoch": 0.777531169708612, "grad_norm": 0.90234375, "learning_rate": 0.00011575300646854517, "loss": 0.8866, "step": 30281 }, { "epoch": 0.7775568469045337, "grad_norm": 0.7421875, "learning_rate": 0.0001157485979903286, "loss": 0.7172, "step": 30282 }, { "epoch": 0.7775825241004556, "grad_norm": 0.88671875, "learning_rate": 0.00011574418948072647, "loss": 0.9082, "step": 30283 }, { "epoch": 0.7776082012963774, "grad_norm": 0.75, "learning_rate": 0.00011573978093974763, "loss": 0.7525, "step": 30284 }, { "epoch": 0.7776338784922993, "grad_norm": 0.82421875, "learning_rate": 0.00011573537236740082, "loss": 0.8912, "step": 30285 }, { "epoch": 0.7776595556882211, "grad_norm": 0.796875, "learning_rate": 0.00011573096376369479, "loss": 0.9076, "step": 30286 }, { "epoch": 0.7776852328841429, "grad_norm": 0.8125, "learning_rate": 0.00011572655512863843, "loss": 0.8963, "step": 30287 }, { "epoch": 0.7777109100800647, "grad_norm": 0.765625, "learning_rate": 0.00011572214646224045, "loss": 0.7281, "step": 30288 }, { "epoch": 0.7777365872759865, "grad_norm": 0.83203125, "learning_rate": 0.00011571773776450964, "loss": 0.836, "step": 30289 }, { "epoch": 0.7777622644719083, "grad_norm": 0.79296875, "learning_rate": 0.0001157133290354548, "loss": 0.982, "step": 30290 }, { "epoch": 0.7777879416678302, "grad_norm": 0.796875, "learning_rate": 0.0001157089202750847, "loss": 0.8624, "step": 30291 }, { "epoch": 0.777813618863752, "grad_norm": 0.82421875, "learning_rate": 0.00011570451148340817, "loss": 0.8051, "step": 30292 }, { "epoch": 0.7778392960596738, "grad_norm": 0.734375, "learning_rate": 0.00011570010266043397, "loss": 0.9161, "step": 30293 }, { "epoch": 0.7778649732555957, "grad_norm": 0.8125, "learning_rate": 0.00011569569380617086, "loss": 0.8128, "step": 30294 }, { "epoch": 0.7778906504515174, "grad_norm": 0.81640625, "learning_rate": 0.00011569128492062766, "loss": 0.724, "step": 30295 }, { "epoch": 0.7779163276474392, "grad_norm": 0.7890625, "learning_rate": 0.00011568687600381314, "loss": 0.8316, "step": 30296 }, { "epoch": 0.7779420048433611, "grad_norm": 0.7578125, "learning_rate": 0.00011568246705573611, "loss": 0.7565, "step": 30297 }, { "epoch": 0.7779676820392829, "grad_norm": 0.73046875, "learning_rate": 0.00011567805807640532, "loss": 0.8245, "step": 30298 }, { "epoch": 0.7779933592352047, "grad_norm": 0.7421875, "learning_rate": 0.00011567364906582958, "loss": 0.8129, "step": 30299 }, { "epoch": 0.7780190364311266, "grad_norm": 1.1796875, "learning_rate": 0.00011566924002401772, "loss": 0.8866, "step": 30300 }, { "epoch": 0.7780447136270484, "grad_norm": 0.7734375, "learning_rate": 0.0001156648309509784, "loss": 0.7898, "step": 30301 }, { "epoch": 0.7780703908229701, "grad_norm": 0.85546875, "learning_rate": 0.00011566042184672052, "loss": 0.9796, "step": 30302 }, { "epoch": 0.778096068018892, "grad_norm": 0.76953125, "learning_rate": 0.00011565601271125286, "loss": 0.6649, "step": 30303 }, { "epoch": 0.7781217452148138, "grad_norm": 0.83203125, "learning_rate": 0.00011565160354458414, "loss": 0.8086, "step": 30304 }, { "epoch": 0.7781474224107356, "grad_norm": 0.76953125, "learning_rate": 0.00011564719434672322, "loss": 0.7994, "step": 30305 }, { "epoch": 0.7781730996066575, "grad_norm": 0.81640625, "learning_rate": 0.00011564278511767883, "loss": 0.7988, "step": 30306 }, { "epoch": 0.7781987768025793, "grad_norm": 0.87890625, "learning_rate": 0.0001156383758574598, "loss": 0.8648, "step": 30307 }, { "epoch": 0.778224453998501, "grad_norm": 0.796875, "learning_rate": 0.00011563396656607491, "loss": 0.7779, "step": 30308 }, { "epoch": 0.7782501311944229, "grad_norm": 0.7890625, "learning_rate": 0.0001156295572435329, "loss": 0.7931, "step": 30309 }, { "epoch": 0.7782758083903447, "grad_norm": 0.7421875, "learning_rate": 0.00011562514788984262, "loss": 0.9285, "step": 30310 }, { "epoch": 0.7783014855862666, "grad_norm": 0.80078125, "learning_rate": 0.00011562073850501285, "loss": 0.8389, "step": 30311 }, { "epoch": 0.7783271627821884, "grad_norm": 0.7734375, "learning_rate": 0.00011561632908905231, "loss": 0.7894, "step": 30312 }, { "epoch": 0.7783528399781102, "grad_norm": 0.76953125, "learning_rate": 0.00011561191964196989, "loss": 0.8957, "step": 30313 }, { "epoch": 0.7783785171740321, "grad_norm": 0.77734375, "learning_rate": 0.00011560751016377431, "loss": 0.8795, "step": 30314 }, { "epoch": 0.7784041943699538, "grad_norm": 0.8125, "learning_rate": 0.00011560310065447432, "loss": 0.8164, "step": 30315 }, { "epoch": 0.7784298715658756, "grad_norm": 0.77734375, "learning_rate": 0.00011559869111407882, "loss": 0.7459, "step": 30316 }, { "epoch": 0.7784555487617975, "grad_norm": 0.7890625, "learning_rate": 0.0001155942815425965, "loss": 0.7938, "step": 30317 }, { "epoch": 0.7784812259577193, "grad_norm": 0.77734375, "learning_rate": 0.00011558987194003621, "loss": 0.9072, "step": 30318 }, { "epoch": 0.7785069031536411, "grad_norm": 0.8046875, "learning_rate": 0.00011558546230640672, "loss": 0.8049, "step": 30319 }, { "epoch": 0.778532580349563, "grad_norm": 0.7578125, "learning_rate": 0.0001155810526417168, "loss": 0.8281, "step": 30320 }, { "epoch": 0.7785582575454848, "grad_norm": 0.79296875, "learning_rate": 0.00011557664294597526, "loss": 0.8073, "step": 30321 }, { "epoch": 0.7785839347414065, "grad_norm": 0.9140625, "learning_rate": 0.00011557223321919086, "loss": 0.834, "step": 30322 }, { "epoch": 0.7786096119373284, "grad_norm": 0.796875, "learning_rate": 0.00011556782346137239, "loss": 0.8481, "step": 30323 }, { "epoch": 0.7786352891332502, "grad_norm": 0.796875, "learning_rate": 0.00011556341367252868, "loss": 0.789, "step": 30324 }, { "epoch": 0.778660966329172, "grad_norm": 0.71484375, "learning_rate": 0.00011555900385266849, "loss": 0.775, "step": 30325 }, { "epoch": 0.7786866435250939, "grad_norm": 0.76953125, "learning_rate": 0.00011555459400180063, "loss": 0.7845, "step": 30326 }, { "epoch": 0.7787123207210157, "grad_norm": 0.84375, "learning_rate": 0.00011555018411993383, "loss": 0.8683, "step": 30327 }, { "epoch": 0.7787379979169374, "grad_norm": 0.7578125, "learning_rate": 0.00011554577420707693, "loss": 0.8753, "step": 30328 }, { "epoch": 0.7787636751128593, "grad_norm": 0.80859375, "learning_rate": 0.00011554136426323871, "loss": 0.8318, "step": 30329 }, { "epoch": 0.7787893523087811, "grad_norm": 0.84765625, "learning_rate": 0.00011553695428842797, "loss": 0.7895, "step": 30330 }, { "epoch": 0.7788150295047029, "grad_norm": 0.75, "learning_rate": 0.00011553254428265346, "loss": 0.8025, "step": 30331 }, { "epoch": 0.7788407067006248, "grad_norm": 0.82421875, "learning_rate": 0.000115528134245924, "loss": 0.8618, "step": 30332 }, { "epoch": 0.7788663838965466, "grad_norm": 0.80078125, "learning_rate": 0.00011552372417824837, "loss": 0.8296, "step": 30333 }, { "epoch": 0.7788920610924684, "grad_norm": 1.46875, "learning_rate": 0.00011551931407963536, "loss": 0.7677, "step": 30334 }, { "epoch": 0.7789177382883902, "grad_norm": 0.7890625, "learning_rate": 0.00011551490395009377, "loss": 0.8486, "step": 30335 }, { "epoch": 0.778943415484312, "grad_norm": 0.75390625, "learning_rate": 0.00011551049378963236, "loss": 0.6766, "step": 30336 }, { "epoch": 0.7789690926802338, "grad_norm": 0.72265625, "learning_rate": 0.00011550608359825994, "loss": 0.7376, "step": 30337 }, { "epoch": 0.7789947698761557, "grad_norm": 0.76953125, "learning_rate": 0.00011550167337598532, "loss": 0.7295, "step": 30338 }, { "epoch": 0.7790204470720775, "grad_norm": 0.7890625, "learning_rate": 0.00011549726312281723, "loss": 0.7983, "step": 30339 }, { "epoch": 0.7790461242679994, "grad_norm": 0.76171875, "learning_rate": 0.00011549285283876455, "loss": 0.7295, "step": 30340 }, { "epoch": 0.7790718014639212, "grad_norm": 0.7265625, "learning_rate": 0.00011548844252383595, "loss": 0.7151, "step": 30341 }, { "epoch": 0.7790974786598429, "grad_norm": 0.9375, "learning_rate": 0.00011548403217804031, "loss": 0.8789, "step": 30342 }, { "epoch": 0.7791231558557647, "grad_norm": 0.703125, "learning_rate": 0.0001154796218013864, "loss": 0.6107, "step": 30343 }, { "epoch": 0.7791488330516866, "grad_norm": 0.80859375, "learning_rate": 0.00011547521139388298, "loss": 0.8328, "step": 30344 }, { "epoch": 0.7791745102476084, "grad_norm": 0.7734375, "learning_rate": 0.00011547080095553888, "loss": 0.7869, "step": 30345 }, { "epoch": 0.7792001874435303, "grad_norm": 0.77734375, "learning_rate": 0.00011546639048636287, "loss": 0.8008, "step": 30346 }, { "epoch": 0.7792258646394521, "grad_norm": 0.7421875, "learning_rate": 0.00011546197998636374, "loss": 0.7453, "step": 30347 }, { "epoch": 0.7792515418353738, "grad_norm": 0.734375, "learning_rate": 0.00011545756945555029, "loss": 0.7856, "step": 30348 }, { "epoch": 0.7792772190312957, "grad_norm": 0.75390625, "learning_rate": 0.00011545315889393126, "loss": 0.7527, "step": 30349 }, { "epoch": 0.7793028962272175, "grad_norm": 0.875, "learning_rate": 0.0001154487483015155, "loss": 0.8979, "step": 30350 }, { "epoch": 0.7793285734231393, "grad_norm": 0.92578125, "learning_rate": 0.00011544433767831181, "loss": 0.8014, "step": 30351 }, { "epoch": 0.7793542506190612, "grad_norm": 0.82421875, "learning_rate": 0.00011543992702432891, "loss": 0.8, "step": 30352 }, { "epoch": 0.779379927814983, "grad_norm": 0.80859375, "learning_rate": 0.00011543551633957568, "loss": 0.9254, "step": 30353 }, { "epoch": 0.7794056050109048, "grad_norm": 0.765625, "learning_rate": 0.00011543110562406081, "loss": 0.7802, "step": 30354 }, { "epoch": 0.7794312822068266, "grad_norm": 0.8203125, "learning_rate": 0.00011542669487779317, "loss": 0.7551, "step": 30355 }, { "epoch": 0.7794569594027484, "grad_norm": 0.8359375, "learning_rate": 0.00011542228410078151, "loss": 0.9451, "step": 30356 }, { "epoch": 0.7794826365986702, "grad_norm": 0.78515625, "learning_rate": 0.00011541787329303462, "loss": 0.8038, "step": 30357 }, { "epoch": 0.7795083137945921, "grad_norm": 1.40625, "learning_rate": 0.0001154134624545613, "loss": 0.8844, "step": 30358 }, { "epoch": 0.7795339909905139, "grad_norm": 0.6953125, "learning_rate": 0.00011540905158537038, "loss": 0.7721, "step": 30359 }, { "epoch": 0.7795596681864357, "grad_norm": 0.73828125, "learning_rate": 0.00011540464068547059, "loss": 0.8045, "step": 30360 }, { "epoch": 0.7795853453823575, "grad_norm": 0.98046875, "learning_rate": 0.00011540022975487073, "loss": 0.8157, "step": 30361 }, { "epoch": 0.7796110225782793, "grad_norm": 0.80078125, "learning_rate": 0.00011539581879357961, "loss": 0.7327, "step": 30362 }, { "epoch": 0.7796366997742011, "grad_norm": 0.69140625, "learning_rate": 0.00011539140780160602, "loss": 0.6964, "step": 30363 }, { "epoch": 0.779662376970123, "grad_norm": 0.77734375, "learning_rate": 0.00011538699677895874, "loss": 0.7514, "step": 30364 }, { "epoch": 0.7796880541660448, "grad_norm": 0.71875, "learning_rate": 0.00011538258572564657, "loss": 0.8225, "step": 30365 }, { "epoch": 0.7797137313619666, "grad_norm": 0.8203125, "learning_rate": 0.00011537817464167831, "loss": 0.8597, "step": 30366 }, { "epoch": 0.7797394085578885, "grad_norm": 0.8046875, "learning_rate": 0.00011537376352706271, "loss": 0.9234, "step": 30367 }, { "epoch": 0.7797650857538102, "grad_norm": 0.73828125, "learning_rate": 0.00011536935238180857, "loss": 0.8826, "step": 30368 }, { "epoch": 0.779790762949732, "grad_norm": 0.8046875, "learning_rate": 0.00011536494120592474, "loss": 0.7424, "step": 30369 }, { "epoch": 0.7798164401456539, "grad_norm": 0.7578125, "learning_rate": 0.00011536052999941996, "loss": 0.7193, "step": 30370 }, { "epoch": 0.7798421173415757, "grad_norm": 0.82421875, "learning_rate": 0.00011535611876230301, "loss": 0.8454, "step": 30371 }, { "epoch": 0.7798677945374975, "grad_norm": 0.71875, "learning_rate": 0.00011535170749458275, "loss": 0.7794, "step": 30372 }, { "epoch": 0.7798934717334194, "grad_norm": 0.78125, "learning_rate": 0.00011534729619626788, "loss": 0.9343, "step": 30373 }, { "epoch": 0.7799191489293412, "grad_norm": 0.74609375, "learning_rate": 0.00011534288486736724, "loss": 0.7929, "step": 30374 }, { "epoch": 0.779944826125263, "grad_norm": 0.8203125, "learning_rate": 0.00011533847350788965, "loss": 0.8957, "step": 30375 }, { "epoch": 0.7799705033211848, "grad_norm": 0.81640625, "learning_rate": 0.0001153340621178438, "loss": 0.9556, "step": 30376 }, { "epoch": 0.7799961805171066, "grad_norm": 0.75390625, "learning_rate": 0.0001153296506972386, "loss": 0.7498, "step": 30377 }, { "epoch": 0.7800218577130285, "grad_norm": 0.8125, "learning_rate": 0.00011532523924608278, "loss": 0.8445, "step": 30378 }, { "epoch": 0.7800475349089503, "grad_norm": 0.828125, "learning_rate": 0.00011532082776438515, "loss": 0.8238, "step": 30379 }, { "epoch": 0.7800732121048721, "grad_norm": 0.75390625, "learning_rate": 0.00011531641625215451, "loss": 0.7957, "step": 30380 }, { "epoch": 0.7800988893007939, "grad_norm": 0.75390625, "learning_rate": 0.0001153120047093996, "loss": 0.7635, "step": 30381 }, { "epoch": 0.7801245664967157, "grad_norm": 0.79296875, "learning_rate": 0.00011530759313612927, "loss": 0.833, "step": 30382 }, { "epoch": 0.7801502436926375, "grad_norm": 0.75390625, "learning_rate": 0.00011530318153235229, "loss": 0.795, "step": 30383 }, { "epoch": 0.7801759208885594, "grad_norm": 0.70703125, "learning_rate": 0.00011529876989807741, "loss": 0.7694, "step": 30384 }, { "epoch": 0.7802015980844812, "grad_norm": 0.76953125, "learning_rate": 0.00011529435823331352, "loss": 0.896, "step": 30385 }, { "epoch": 0.780227275280403, "grad_norm": 0.734375, "learning_rate": 0.00011528994653806933, "loss": 0.797, "step": 30386 }, { "epoch": 0.7802529524763249, "grad_norm": 0.8515625, "learning_rate": 0.00011528553481235367, "loss": 0.7874, "step": 30387 }, { "epoch": 0.7802786296722466, "grad_norm": 0.72265625, "learning_rate": 0.00011528112305617531, "loss": 0.7932, "step": 30388 }, { "epoch": 0.7803043068681684, "grad_norm": 0.78125, "learning_rate": 0.00011527671126954303, "loss": 0.7348, "step": 30389 }, { "epoch": 0.7803299840640903, "grad_norm": 0.8046875, "learning_rate": 0.00011527229945246568, "loss": 0.7412, "step": 30390 }, { "epoch": 0.7803556612600121, "grad_norm": 0.73828125, "learning_rate": 0.000115267887604952, "loss": 0.8298, "step": 30391 }, { "epoch": 0.7803813384559339, "grad_norm": 0.74609375, "learning_rate": 0.00011526347572701081, "loss": 0.9006, "step": 30392 }, { "epoch": 0.7804070156518558, "grad_norm": 0.76953125, "learning_rate": 0.0001152590638186509, "loss": 0.826, "step": 30393 }, { "epoch": 0.7804326928477776, "grad_norm": 0.8046875, "learning_rate": 0.00011525465187988103, "loss": 0.9139, "step": 30394 }, { "epoch": 0.7804583700436993, "grad_norm": 0.80078125, "learning_rate": 0.00011525023991071003, "loss": 1.0151, "step": 30395 }, { "epoch": 0.7804840472396212, "grad_norm": 0.75, "learning_rate": 0.00011524582791114666, "loss": 0.7512, "step": 30396 }, { "epoch": 0.780509724435543, "grad_norm": 0.890625, "learning_rate": 0.00011524141588119975, "loss": 0.8582, "step": 30397 }, { "epoch": 0.7805354016314648, "grad_norm": 0.78125, "learning_rate": 0.00011523700382087806, "loss": 0.6795, "step": 30398 }, { "epoch": 0.7805610788273867, "grad_norm": 0.74609375, "learning_rate": 0.00011523259173019042, "loss": 0.7664, "step": 30399 }, { "epoch": 0.7805867560233085, "grad_norm": 0.75390625, "learning_rate": 0.00011522817960914558, "loss": 0.7978, "step": 30400 }, { "epoch": 0.7806124332192302, "grad_norm": 0.75390625, "learning_rate": 0.00011522376745775238, "loss": 0.7744, "step": 30401 }, { "epoch": 0.7806381104151521, "grad_norm": 0.828125, "learning_rate": 0.00011521935527601956, "loss": 0.8299, "step": 30402 }, { "epoch": 0.7806637876110739, "grad_norm": 0.75, "learning_rate": 0.00011521494306395595, "loss": 0.8956, "step": 30403 }, { "epoch": 0.7806894648069957, "grad_norm": 0.78515625, "learning_rate": 0.00011521053082157034, "loss": 0.9022, "step": 30404 }, { "epoch": 0.7807151420029176, "grad_norm": 0.78125, "learning_rate": 0.0001152061185488715, "loss": 0.7722, "step": 30405 }, { "epoch": 0.7807408191988394, "grad_norm": 1.1328125, "learning_rate": 0.00011520170624586826, "loss": 0.8163, "step": 30406 }, { "epoch": 0.7807664963947613, "grad_norm": 0.7265625, "learning_rate": 0.00011519729391256939, "loss": 0.7037, "step": 30407 }, { "epoch": 0.780792173590683, "grad_norm": 0.79296875, "learning_rate": 0.00011519288154898366, "loss": 0.8668, "step": 30408 }, { "epoch": 0.7808178507866048, "grad_norm": 0.8046875, "learning_rate": 0.00011518846915511991, "loss": 0.7014, "step": 30409 }, { "epoch": 0.7808435279825267, "grad_norm": 0.7890625, "learning_rate": 0.0001151840567309869, "loss": 0.8733, "step": 30410 }, { "epoch": 0.7808692051784485, "grad_norm": 0.734375, "learning_rate": 0.00011517964427659345, "loss": 0.8451, "step": 30411 }, { "epoch": 0.7808948823743703, "grad_norm": 0.75, "learning_rate": 0.00011517523179194835, "loss": 0.7753, "step": 30412 }, { "epoch": 0.7809205595702922, "grad_norm": 0.84375, "learning_rate": 0.00011517081927706037, "loss": 0.9811, "step": 30413 }, { "epoch": 0.780946236766214, "grad_norm": 0.796875, "learning_rate": 0.0001151664067319383, "loss": 0.7808, "step": 30414 }, { "epoch": 0.7809719139621357, "grad_norm": 0.7421875, "learning_rate": 0.000115161994156591, "loss": 0.8537, "step": 30415 }, { "epoch": 0.7809975911580576, "grad_norm": 0.6953125, "learning_rate": 0.00011515758155102716, "loss": 0.7508, "step": 30416 }, { "epoch": 0.7810232683539794, "grad_norm": 0.75390625, "learning_rate": 0.00011515316891525564, "loss": 0.8848, "step": 30417 }, { "epoch": 0.7810489455499012, "grad_norm": 0.82421875, "learning_rate": 0.00011514875624928526, "loss": 0.726, "step": 30418 }, { "epoch": 0.7810746227458231, "grad_norm": 0.80078125, "learning_rate": 0.00011514434355312476, "loss": 0.8104, "step": 30419 }, { "epoch": 0.7811002999417449, "grad_norm": 0.7578125, "learning_rate": 0.00011513993082678293, "loss": 0.8743, "step": 30420 }, { "epoch": 0.7811259771376666, "grad_norm": 0.77734375, "learning_rate": 0.00011513551807026858, "loss": 0.8408, "step": 30421 }, { "epoch": 0.7811516543335885, "grad_norm": 0.75, "learning_rate": 0.00011513110528359054, "loss": 0.7815, "step": 30422 }, { "epoch": 0.7811773315295103, "grad_norm": 0.78125, "learning_rate": 0.00011512669246675758, "loss": 0.7097, "step": 30423 }, { "epoch": 0.7812030087254321, "grad_norm": 0.75, "learning_rate": 0.00011512227961977844, "loss": 0.7316, "step": 30424 }, { "epoch": 0.781228685921354, "grad_norm": 0.7734375, "learning_rate": 0.00011511786674266203, "loss": 0.7666, "step": 30425 }, { "epoch": 0.7812543631172758, "grad_norm": 0.73828125, "learning_rate": 0.00011511345383541704, "loss": 0.8459, "step": 30426 }, { "epoch": 0.7812800403131976, "grad_norm": 0.8984375, "learning_rate": 0.0001151090408980523, "loss": 0.8564, "step": 30427 }, { "epoch": 0.7813057175091194, "grad_norm": 0.81640625, "learning_rate": 0.00011510462793057661, "loss": 0.8543, "step": 30428 }, { "epoch": 0.7813313947050412, "grad_norm": 0.80859375, "learning_rate": 0.00011510021493299875, "loss": 0.7189, "step": 30429 }, { "epoch": 0.781357071900963, "grad_norm": 0.71484375, "learning_rate": 0.00011509580190532754, "loss": 0.7347, "step": 30430 }, { "epoch": 0.7813827490968849, "grad_norm": 0.73828125, "learning_rate": 0.00011509138884757176, "loss": 0.8251, "step": 30431 }, { "epoch": 0.7814084262928067, "grad_norm": 0.76953125, "learning_rate": 0.00011508697575974021, "loss": 1.0226, "step": 30432 }, { "epoch": 0.7814341034887285, "grad_norm": 0.79296875, "learning_rate": 0.00011508256264184168, "loss": 0.7558, "step": 30433 }, { "epoch": 0.7814597806846504, "grad_norm": 0.98828125, "learning_rate": 0.00011507814949388493, "loss": 0.8211, "step": 30434 }, { "epoch": 0.7814854578805721, "grad_norm": 0.74609375, "learning_rate": 0.00011507373631587883, "loss": 0.8959, "step": 30435 }, { "epoch": 0.781511135076494, "grad_norm": 0.69921875, "learning_rate": 0.00011506932310783212, "loss": 0.7417, "step": 30436 }, { "epoch": 0.7815368122724158, "grad_norm": 0.7890625, "learning_rate": 0.0001150649098697536, "loss": 0.8102, "step": 30437 }, { "epoch": 0.7815624894683376, "grad_norm": 0.67578125, "learning_rate": 0.0001150604966016521, "loss": 0.785, "step": 30438 }, { "epoch": 0.7815881666642595, "grad_norm": 0.828125, "learning_rate": 0.00011505608330353638, "loss": 0.8212, "step": 30439 }, { "epoch": 0.7816138438601813, "grad_norm": 0.69140625, "learning_rate": 0.00011505166997541525, "loss": 0.7253, "step": 30440 }, { "epoch": 0.781639521056103, "grad_norm": 0.74609375, "learning_rate": 0.00011504725661729749, "loss": 0.8434, "step": 30441 }, { "epoch": 0.7816651982520249, "grad_norm": 0.78125, "learning_rate": 0.0001150428432291919, "loss": 0.8956, "step": 30442 }, { "epoch": 0.7816908754479467, "grad_norm": 0.86328125, "learning_rate": 0.00011503842981110731, "loss": 0.892, "step": 30443 }, { "epoch": 0.7817165526438685, "grad_norm": 0.8046875, "learning_rate": 0.00011503401636305247, "loss": 0.9032, "step": 30444 }, { "epoch": 0.7817422298397904, "grad_norm": 0.7421875, "learning_rate": 0.00011502960288503619, "loss": 0.716, "step": 30445 }, { "epoch": 0.7817679070357122, "grad_norm": 0.8046875, "learning_rate": 0.0001150251893770673, "loss": 0.8743, "step": 30446 }, { "epoch": 0.781793584231634, "grad_norm": 0.75, "learning_rate": 0.00011502077583915454, "loss": 0.7999, "step": 30447 }, { "epoch": 0.7818192614275558, "grad_norm": 0.83984375, "learning_rate": 0.0001150163622713067, "loss": 0.7325, "step": 30448 }, { "epoch": 0.7818449386234776, "grad_norm": 0.7734375, "learning_rate": 0.00011501194867353265, "loss": 0.814, "step": 30449 }, { "epoch": 0.7818706158193994, "grad_norm": 0.78125, "learning_rate": 0.00011500753504584115, "loss": 0.8604, "step": 30450 }, { "epoch": 0.7818962930153213, "grad_norm": 0.79296875, "learning_rate": 0.00011500312138824097, "loss": 0.9909, "step": 30451 }, { "epoch": 0.7819219702112431, "grad_norm": 0.81640625, "learning_rate": 0.00011499870770074093, "loss": 0.8629, "step": 30452 }, { "epoch": 0.7819476474071649, "grad_norm": 0.859375, "learning_rate": 0.00011499429398334978, "loss": 0.7347, "step": 30453 }, { "epoch": 0.7819733246030868, "grad_norm": 0.73828125, "learning_rate": 0.0001149898802360764, "loss": 0.8404, "step": 30454 }, { "epoch": 0.7819990017990085, "grad_norm": 0.7890625, "learning_rate": 0.00011498546645892953, "loss": 0.8232, "step": 30455 }, { "epoch": 0.7820246789949303, "grad_norm": 0.71875, "learning_rate": 0.00011498105265191797, "loss": 0.8056, "step": 30456 }, { "epoch": 0.7820503561908522, "grad_norm": 0.78125, "learning_rate": 0.00011497663881505055, "loss": 0.7956, "step": 30457 }, { "epoch": 0.782076033386774, "grad_norm": 0.73828125, "learning_rate": 0.00011497222494833601, "loss": 0.7951, "step": 30458 }, { "epoch": 0.7821017105826958, "grad_norm": 0.9140625, "learning_rate": 0.0001149678110517832, "loss": 0.8656, "step": 30459 }, { "epoch": 0.7821273877786177, "grad_norm": 0.86328125, "learning_rate": 0.0001149633971254009, "loss": 0.9576, "step": 30460 }, { "epoch": 0.7821530649745394, "grad_norm": 1.078125, "learning_rate": 0.00011495898316919789, "loss": 0.8476, "step": 30461 }, { "epoch": 0.7821787421704612, "grad_norm": 0.7578125, "learning_rate": 0.00011495456918318296, "loss": 0.9053, "step": 30462 }, { "epoch": 0.7822044193663831, "grad_norm": 0.74609375, "learning_rate": 0.00011495015516736496, "loss": 0.8045, "step": 30463 }, { "epoch": 0.7822300965623049, "grad_norm": 0.765625, "learning_rate": 0.00011494574112175262, "loss": 0.7231, "step": 30464 }, { "epoch": 0.7822557737582267, "grad_norm": 0.81640625, "learning_rate": 0.00011494132704635481, "loss": 0.882, "step": 30465 }, { "epoch": 0.7822814509541486, "grad_norm": 0.8359375, "learning_rate": 0.00011493691294118025, "loss": 0.8164, "step": 30466 }, { "epoch": 0.7823071281500704, "grad_norm": 0.8203125, "learning_rate": 0.00011493249880623777, "loss": 0.7927, "step": 30467 }, { "epoch": 0.7823328053459921, "grad_norm": 0.89453125, "learning_rate": 0.00011492808464153619, "loss": 0.884, "step": 30468 }, { "epoch": 0.782358482541914, "grad_norm": 0.8984375, "learning_rate": 0.00011492367044708425, "loss": 0.8557, "step": 30469 }, { "epoch": 0.7823841597378358, "grad_norm": 0.72265625, "learning_rate": 0.00011491925622289082, "loss": 0.7164, "step": 30470 }, { "epoch": 0.7824098369337577, "grad_norm": 0.859375, "learning_rate": 0.00011491484196896465, "loss": 0.8405, "step": 30471 }, { "epoch": 0.7824355141296795, "grad_norm": 0.8125, "learning_rate": 0.00011491042768531454, "loss": 0.8065, "step": 30472 }, { "epoch": 0.7824611913256013, "grad_norm": 0.83984375, "learning_rate": 0.0001149060133719493, "loss": 0.7972, "step": 30473 }, { "epoch": 0.7824868685215232, "grad_norm": 0.78515625, "learning_rate": 0.00011490159902887772, "loss": 0.9546, "step": 30474 }, { "epoch": 0.7825125457174449, "grad_norm": 0.875, "learning_rate": 0.00011489718465610861, "loss": 1.0135, "step": 30475 }, { "epoch": 0.7825382229133667, "grad_norm": 0.6953125, "learning_rate": 0.00011489277025365075, "loss": 0.7942, "step": 30476 }, { "epoch": 0.7825639001092886, "grad_norm": 0.8515625, "learning_rate": 0.00011488835582151292, "loss": 0.8796, "step": 30477 }, { "epoch": 0.7825895773052104, "grad_norm": 0.80859375, "learning_rate": 0.00011488394135970399, "loss": 0.8681, "step": 30478 }, { "epoch": 0.7826152545011322, "grad_norm": 0.75, "learning_rate": 0.00011487952686823267, "loss": 0.7494, "step": 30479 }, { "epoch": 0.7826409316970541, "grad_norm": 0.82421875, "learning_rate": 0.00011487511234710781, "loss": 0.8994, "step": 30480 }, { "epoch": 0.7826666088929758, "grad_norm": 0.78515625, "learning_rate": 0.00011487069779633818, "loss": 0.7736, "step": 30481 }, { "epoch": 0.7826922860888976, "grad_norm": 0.82421875, "learning_rate": 0.0001148662832159326, "loss": 0.7796, "step": 30482 }, { "epoch": 0.7827179632848195, "grad_norm": 0.7265625, "learning_rate": 0.00011486186860589986, "loss": 0.8311, "step": 30483 }, { "epoch": 0.7827436404807413, "grad_norm": 0.76953125, "learning_rate": 0.0001148574539662488, "loss": 0.8295, "step": 30484 }, { "epoch": 0.7827693176766631, "grad_norm": 0.796875, "learning_rate": 0.00011485303929698811, "loss": 0.7103, "step": 30485 }, { "epoch": 0.782794994872585, "grad_norm": 0.80859375, "learning_rate": 0.00011484862459812671, "loss": 0.8436, "step": 30486 }, { "epoch": 0.7828206720685068, "grad_norm": 0.8359375, "learning_rate": 0.00011484420986967333, "loss": 0.8898, "step": 30487 }, { "epoch": 0.7828463492644285, "grad_norm": 0.8359375, "learning_rate": 0.00011483979511163674, "loss": 0.9227, "step": 30488 }, { "epoch": 0.7828720264603504, "grad_norm": 0.6953125, "learning_rate": 0.0001148353803240258, "loss": 0.6912, "step": 30489 }, { "epoch": 0.7828977036562722, "grad_norm": 0.7890625, "learning_rate": 0.00011483096550684929, "loss": 0.9064, "step": 30490 }, { "epoch": 0.782923380852194, "grad_norm": 0.81640625, "learning_rate": 0.00011482655066011603, "loss": 0.8597, "step": 30491 }, { "epoch": 0.7829490580481159, "grad_norm": 0.8984375, "learning_rate": 0.00011482213578383478, "loss": 0.8583, "step": 30492 }, { "epoch": 0.7829747352440377, "grad_norm": 0.80859375, "learning_rate": 0.00011481772087801434, "loss": 0.788, "step": 30493 }, { "epoch": 0.7830004124399595, "grad_norm": 0.83984375, "learning_rate": 0.00011481330594266352, "loss": 0.8764, "step": 30494 }, { "epoch": 0.7830260896358813, "grad_norm": 0.83984375, "learning_rate": 0.00011480889097779113, "loss": 0.8488, "step": 30495 }, { "epoch": 0.7830517668318031, "grad_norm": 0.77734375, "learning_rate": 0.00011480447598340594, "loss": 0.7882, "step": 30496 }, { "epoch": 0.783077444027725, "grad_norm": 0.78125, "learning_rate": 0.00011480006095951681, "loss": 0.8151, "step": 30497 }, { "epoch": 0.7831031212236468, "grad_norm": 0.72265625, "learning_rate": 0.00011479564590613245, "loss": 0.7402, "step": 30498 }, { "epoch": 0.7831287984195686, "grad_norm": 0.7265625, "learning_rate": 0.00011479123082326173, "loss": 0.8974, "step": 30499 }, { "epoch": 0.7831544756154905, "grad_norm": 0.875, "learning_rate": 0.00011478681571091342, "loss": 0.8778, "step": 30500 }, { "epoch": 0.7831801528114122, "grad_norm": 0.79296875, "learning_rate": 0.00011478240056909628, "loss": 0.8035, "step": 30501 }, { "epoch": 0.783205830007334, "grad_norm": 0.6796875, "learning_rate": 0.00011477798539781921, "loss": 0.7161, "step": 30502 }, { "epoch": 0.7832315072032559, "grad_norm": 0.765625, "learning_rate": 0.00011477357019709091, "loss": 0.7886, "step": 30503 }, { "epoch": 0.7832571843991777, "grad_norm": 0.71484375, "learning_rate": 0.00011476915496692025, "loss": 0.6722, "step": 30504 }, { "epoch": 0.7832828615950995, "grad_norm": 0.86328125, "learning_rate": 0.00011476473970731598, "loss": 0.7586, "step": 30505 }, { "epoch": 0.7833085387910214, "grad_norm": 0.74609375, "learning_rate": 0.00011476032441828692, "loss": 0.7913, "step": 30506 }, { "epoch": 0.7833342159869432, "grad_norm": 0.80859375, "learning_rate": 0.00011475590909984187, "loss": 0.8676, "step": 30507 }, { "epoch": 0.7833598931828649, "grad_norm": 0.9453125, "learning_rate": 0.00011475149375198961, "loss": 0.8556, "step": 30508 }, { "epoch": 0.7833855703787868, "grad_norm": 0.74609375, "learning_rate": 0.00011474707837473897, "loss": 0.7801, "step": 30509 }, { "epoch": 0.7834112475747086, "grad_norm": 0.8125, "learning_rate": 0.00011474266296809872, "loss": 0.814, "step": 30510 }, { "epoch": 0.7834369247706304, "grad_norm": 0.76953125, "learning_rate": 0.00011473824753207772, "loss": 0.7334, "step": 30511 }, { "epoch": 0.7834626019665523, "grad_norm": 0.8203125, "learning_rate": 0.00011473383206668469, "loss": 0.7757, "step": 30512 }, { "epoch": 0.7834882791624741, "grad_norm": 0.78125, "learning_rate": 0.00011472941657192846, "loss": 0.9249, "step": 30513 }, { "epoch": 0.7835139563583959, "grad_norm": 0.859375, "learning_rate": 0.00011472500104781782, "loss": 0.7912, "step": 30514 }, { "epoch": 0.7835396335543177, "grad_norm": 0.82421875, "learning_rate": 0.00011472058549436162, "loss": 0.7697, "step": 30515 }, { "epoch": 0.7835653107502395, "grad_norm": 0.7421875, "learning_rate": 0.0001147161699115686, "loss": 0.8195, "step": 30516 }, { "epoch": 0.7835909879461613, "grad_norm": 0.79296875, "learning_rate": 0.0001147117542994476, "loss": 0.7987, "step": 30517 }, { "epoch": 0.7836166651420832, "grad_norm": 0.81640625, "learning_rate": 0.0001147073386580074, "loss": 0.8341, "step": 30518 }, { "epoch": 0.783642342338005, "grad_norm": 0.8046875, "learning_rate": 0.00011470292298725676, "loss": 0.8134, "step": 30519 }, { "epoch": 0.7836680195339268, "grad_norm": 0.8046875, "learning_rate": 0.00011469850728720457, "loss": 0.8111, "step": 30520 }, { "epoch": 0.7836936967298486, "grad_norm": 0.74609375, "learning_rate": 0.00011469409155785957, "loss": 0.7731, "step": 30521 }, { "epoch": 0.7837193739257704, "grad_norm": 0.78125, "learning_rate": 0.00011468967579923055, "loss": 0.8556, "step": 30522 }, { "epoch": 0.7837450511216922, "grad_norm": 0.83203125, "learning_rate": 0.00011468526001132636, "loss": 0.7488, "step": 30523 }, { "epoch": 0.7837707283176141, "grad_norm": 0.80859375, "learning_rate": 0.00011468084419415579, "loss": 0.8122, "step": 30524 }, { "epoch": 0.7837964055135359, "grad_norm": 0.73828125, "learning_rate": 0.00011467642834772758, "loss": 0.8032, "step": 30525 }, { "epoch": 0.7838220827094577, "grad_norm": 0.765625, "learning_rate": 0.00011467201247205062, "loss": 0.8714, "step": 30526 }, { "epoch": 0.7838477599053796, "grad_norm": 0.6875, "learning_rate": 0.00011466759656713364, "loss": 0.7266, "step": 30527 }, { "epoch": 0.7838734371013013, "grad_norm": 0.80859375, "learning_rate": 0.00011466318063298544, "loss": 0.8175, "step": 30528 }, { "epoch": 0.7838991142972231, "grad_norm": 0.81640625, "learning_rate": 0.00011465876466961487, "loss": 0.9972, "step": 30529 }, { "epoch": 0.783924791493145, "grad_norm": 0.76171875, "learning_rate": 0.0001146543486770307, "loss": 0.8509, "step": 30530 }, { "epoch": 0.7839504686890668, "grad_norm": 0.7265625, "learning_rate": 0.00011464993265524175, "loss": 0.7778, "step": 30531 }, { "epoch": 0.7839761458849887, "grad_norm": 0.76953125, "learning_rate": 0.00011464551660425682, "loss": 0.7437, "step": 30532 }, { "epoch": 0.7840018230809105, "grad_norm": 0.82421875, "learning_rate": 0.00011464110052408465, "loss": 0.8375, "step": 30533 }, { "epoch": 0.7840275002768323, "grad_norm": 0.890625, "learning_rate": 0.00011463668441473415, "loss": 0.8813, "step": 30534 }, { "epoch": 0.784053177472754, "grad_norm": 0.86328125, "learning_rate": 0.00011463226827621402, "loss": 0.846, "step": 30535 }, { "epoch": 0.7840788546686759, "grad_norm": 0.796875, "learning_rate": 0.00011462785210853308, "loss": 0.7738, "step": 30536 }, { "epoch": 0.7841045318645977, "grad_norm": 0.76953125, "learning_rate": 0.00011462343591170021, "loss": 0.7869, "step": 30537 }, { "epoch": 0.7841302090605196, "grad_norm": 0.78515625, "learning_rate": 0.0001146190196857241, "loss": 0.8012, "step": 30538 }, { "epoch": 0.7841558862564414, "grad_norm": 0.75, "learning_rate": 0.00011461460343061365, "loss": 0.9034, "step": 30539 }, { "epoch": 0.7841815634523632, "grad_norm": 0.78515625, "learning_rate": 0.0001146101871463776, "loss": 0.7308, "step": 30540 }, { "epoch": 0.784207240648285, "grad_norm": 0.76171875, "learning_rate": 0.00011460577083302475, "loss": 0.8392, "step": 30541 }, { "epoch": 0.7842329178442068, "grad_norm": 0.7890625, "learning_rate": 0.00011460135449056393, "loss": 0.8299, "step": 30542 }, { "epoch": 0.7842585950401286, "grad_norm": 0.77734375, "learning_rate": 0.00011459693811900392, "loss": 0.7521, "step": 30543 }, { "epoch": 0.7842842722360505, "grad_norm": 0.80078125, "learning_rate": 0.00011459252171835355, "loss": 0.82, "step": 30544 }, { "epoch": 0.7843099494319723, "grad_norm": 0.82421875, "learning_rate": 0.0001145881052886216, "loss": 0.6407, "step": 30545 }, { "epoch": 0.7843356266278941, "grad_norm": 0.796875, "learning_rate": 0.00011458368882981685, "loss": 0.7772, "step": 30546 }, { "epoch": 0.784361303823816, "grad_norm": 0.75390625, "learning_rate": 0.00011457927234194814, "loss": 0.8174, "step": 30547 }, { "epoch": 0.7843869810197377, "grad_norm": 0.78515625, "learning_rate": 0.00011457485582502427, "loss": 0.8461, "step": 30548 }, { "epoch": 0.7844126582156595, "grad_norm": 0.7734375, "learning_rate": 0.00011457043927905399, "loss": 0.7449, "step": 30549 }, { "epoch": 0.7844383354115814, "grad_norm": 0.8828125, "learning_rate": 0.00011456602270404619, "loss": 0.7863, "step": 30550 }, { "epoch": 0.7844640126075032, "grad_norm": 0.79296875, "learning_rate": 0.0001145616061000096, "loss": 0.8652, "step": 30551 }, { "epoch": 0.784489689803425, "grad_norm": 0.8515625, "learning_rate": 0.00011455718946695304, "loss": 0.8962, "step": 30552 }, { "epoch": 0.7845153669993469, "grad_norm": 0.82421875, "learning_rate": 0.00011455277280488532, "loss": 0.7601, "step": 30553 }, { "epoch": 0.7845410441952687, "grad_norm": 0.7890625, "learning_rate": 0.00011454835611381522, "loss": 0.7642, "step": 30554 }, { "epoch": 0.7845667213911904, "grad_norm": 0.69140625, "learning_rate": 0.00011454393939375159, "loss": 0.6921, "step": 30555 }, { "epoch": 0.7845923985871123, "grad_norm": 0.9140625, "learning_rate": 0.0001145395226447032, "loss": 0.8834, "step": 30556 }, { "epoch": 0.7846180757830341, "grad_norm": 0.7421875, "learning_rate": 0.00011453510586667885, "loss": 0.8863, "step": 30557 }, { "epoch": 0.7846437529789559, "grad_norm": 0.76171875, "learning_rate": 0.00011453068905968736, "loss": 0.9802, "step": 30558 }, { "epoch": 0.7846694301748778, "grad_norm": 0.81640625, "learning_rate": 0.00011452627222373748, "loss": 0.7092, "step": 30559 }, { "epoch": 0.7846951073707996, "grad_norm": 0.82421875, "learning_rate": 0.00011452185535883808, "loss": 0.8262, "step": 30560 }, { "epoch": 0.7847207845667213, "grad_norm": 0.765625, "learning_rate": 0.00011451743846499794, "loss": 0.6468, "step": 30561 }, { "epoch": 0.7847464617626432, "grad_norm": 0.80859375, "learning_rate": 0.00011451302154222583, "loss": 0.73, "step": 30562 }, { "epoch": 0.784772138958565, "grad_norm": 0.8125, "learning_rate": 0.00011450860459053062, "loss": 0.848, "step": 30563 }, { "epoch": 0.7847978161544868, "grad_norm": 0.8125, "learning_rate": 0.00011450418760992103, "loss": 0.869, "step": 30564 }, { "epoch": 0.7848234933504087, "grad_norm": 0.77734375, "learning_rate": 0.0001144997706004059, "loss": 0.8291, "step": 30565 }, { "epoch": 0.7848491705463305, "grad_norm": 0.875, "learning_rate": 0.00011449535356199407, "loss": 0.8372, "step": 30566 }, { "epoch": 0.7848748477422524, "grad_norm": 0.765625, "learning_rate": 0.00011449093649469429, "loss": 0.8539, "step": 30567 }, { "epoch": 0.7849005249381741, "grad_norm": 0.8046875, "learning_rate": 0.00011448651939851538, "loss": 0.8829, "step": 30568 }, { "epoch": 0.7849262021340959, "grad_norm": 0.7578125, "learning_rate": 0.00011448210227346616, "loss": 0.8713, "step": 30569 }, { "epoch": 0.7849518793300178, "grad_norm": 0.734375, "learning_rate": 0.00011447768511955543, "loss": 0.745, "step": 30570 }, { "epoch": 0.7849775565259396, "grad_norm": 0.796875, "learning_rate": 0.00011447326793679196, "loss": 0.724, "step": 30571 }, { "epoch": 0.7850032337218614, "grad_norm": 0.73046875, "learning_rate": 0.00011446885072518458, "loss": 0.8006, "step": 30572 }, { "epoch": 0.7850289109177833, "grad_norm": 0.78125, "learning_rate": 0.00011446443348474207, "loss": 0.7398, "step": 30573 }, { "epoch": 0.785054588113705, "grad_norm": 0.73046875, "learning_rate": 0.00011446001621547328, "loss": 0.7117, "step": 30574 }, { "epoch": 0.7850802653096268, "grad_norm": 0.83984375, "learning_rate": 0.00011445559891738697, "loss": 0.9415, "step": 30575 }, { "epoch": 0.7851059425055487, "grad_norm": 0.7109375, "learning_rate": 0.00011445118159049197, "loss": 0.7837, "step": 30576 }, { "epoch": 0.7851316197014705, "grad_norm": 0.8203125, "learning_rate": 0.00011444676423479705, "loss": 0.7735, "step": 30577 }, { "epoch": 0.7851572968973923, "grad_norm": 0.79296875, "learning_rate": 0.00011444234685031103, "loss": 0.8639, "step": 30578 }, { "epoch": 0.7851829740933142, "grad_norm": 0.734375, "learning_rate": 0.00011443792943704274, "loss": 0.8213, "step": 30579 }, { "epoch": 0.785208651289236, "grad_norm": 0.78125, "learning_rate": 0.00011443351199500097, "loss": 0.9146, "step": 30580 }, { "epoch": 0.7852343284851577, "grad_norm": 0.7421875, "learning_rate": 0.00011442909452419446, "loss": 0.8136, "step": 30581 }, { "epoch": 0.7852600056810796, "grad_norm": 0.71875, "learning_rate": 0.0001144246770246321, "loss": 0.8383, "step": 30582 }, { "epoch": 0.7852856828770014, "grad_norm": 0.8203125, "learning_rate": 0.00011442025949632267, "loss": 0.8776, "step": 30583 }, { "epoch": 0.7853113600729232, "grad_norm": 0.79296875, "learning_rate": 0.00011441584193927497, "loss": 0.8656, "step": 30584 }, { "epoch": 0.7853370372688451, "grad_norm": 0.796875, "learning_rate": 0.00011441142435349779, "loss": 0.876, "step": 30585 }, { "epoch": 0.7853627144647669, "grad_norm": 0.71484375, "learning_rate": 0.00011440700673899993, "loss": 0.6842, "step": 30586 }, { "epoch": 0.7853883916606887, "grad_norm": 0.73046875, "learning_rate": 0.00011440258909579022, "loss": 0.8741, "step": 30587 }, { "epoch": 0.7854140688566105, "grad_norm": 0.7890625, "learning_rate": 0.00011439817142387745, "loss": 0.7939, "step": 30588 }, { "epoch": 0.7854397460525323, "grad_norm": 1.0546875, "learning_rate": 0.00011439375372327043, "loss": 0.7722, "step": 30589 }, { "epoch": 0.7854654232484541, "grad_norm": 0.7890625, "learning_rate": 0.00011438933599397797, "loss": 0.8258, "step": 30590 }, { "epoch": 0.785491100444376, "grad_norm": 0.77734375, "learning_rate": 0.00011438491823600883, "loss": 0.8176, "step": 30591 }, { "epoch": 0.7855167776402978, "grad_norm": 0.75390625, "learning_rate": 0.00011438050044937187, "loss": 0.6891, "step": 30592 }, { "epoch": 0.7855424548362197, "grad_norm": 0.84375, "learning_rate": 0.00011437608263407587, "loss": 0.8792, "step": 30593 }, { "epoch": 0.7855681320321414, "grad_norm": 0.8359375, "learning_rate": 0.00011437166479012962, "loss": 0.8256, "step": 30594 }, { "epoch": 0.7855938092280632, "grad_norm": 0.82421875, "learning_rate": 0.00011436724691754195, "loss": 0.9401, "step": 30595 }, { "epoch": 0.785619486423985, "grad_norm": 0.8671875, "learning_rate": 0.00011436282901632164, "loss": 0.7258, "step": 30596 }, { "epoch": 0.7856451636199069, "grad_norm": 0.796875, "learning_rate": 0.00011435841108647755, "loss": 0.7893, "step": 30597 }, { "epoch": 0.7856708408158287, "grad_norm": 0.76171875, "learning_rate": 0.00011435399312801842, "loss": 0.9096, "step": 30598 }, { "epoch": 0.7856965180117506, "grad_norm": 0.8203125, "learning_rate": 0.00011434957514095306, "loss": 0.8173, "step": 30599 }, { "epoch": 0.7857221952076724, "grad_norm": 0.7890625, "learning_rate": 0.00011434515712529032, "loss": 0.762, "step": 30600 }, { "epoch": 0.7857478724035941, "grad_norm": 0.74609375, "learning_rate": 0.00011434073908103897, "loss": 0.7034, "step": 30601 }, { "epoch": 0.785773549599516, "grad_norm": 0.78125, "learning_rate": 0.00011433632100820782, "loss": 0.903, "step": 30602 }, { "epoch": 0.7857992267954378, "grad_norm": 0.74609375, "learning_rate": 0.00011433190290680569, "loss": 0.8489, "step": 30603 }, { "epoch": 0.7858249039913596, "grad_norm": 0.71875, "learning_rate": 0.00011432748477684134, "loss": 0.719, "step": 30604 }, { "epoch": 0.7858505811872815, "grad_norm": 0.8125, "learning_rate": 0.00011432306661832363, "loss": 0.7829, "step": 30605 }, { "epoch": 0.7858762583832033, "grad_norm": 0.74609375, "learning_rate": 0.00011431864843126135, "loss": 0.7262, "step": 30606 }, { "epoch": 0.7859019355791251, "grad_norm": 0.74609375, "learning_rate": 0.00011431423021566328, "loss": 0.7485, "step": 30607 }, { "epoch": 0.7859276127750469, "grad_norm": 0.78515625, "learning_rate": 0.00011430981197153824, "loss": 0.8099, "step": 30608 }, { "epoch": 0.7859532899709687, "grad_norm": 0.70703125, "learning_rate": 0.00011430539369889508, "loss": 0.7544, "step": 30609 }, { "epoch": 0.7859789671668905, "grad_norm": 0.6875, "learning_rate": 0.00011430097539774252, "loss": 0.7426, "step": 30610 }, { "epoch": 0.7860046443628124, "grad_norm": 0.796875, "learning_rate": 0.00011429655706808939, "loss": 0.8056, "step": 30611 }, { "epoch": 0.7860303215587342, "grad_norm": 0.76171875, "learning_rate": 0.00011429213870994455, "loss": 0.7955, "step": 30612 }, { "epoch": 0.786055998754656, "grad_norm": 0.87109375, "learning_rate": 0.00011428772032331677, "loss": 0.8276, "step": 30613 }, { "epoch": 0.7860816759505778, "grad_norm": 0.796875, "learning_rate": 0.00011428330190821484, "loss": 0.7607, "step": 30614 }, { "epoch": 0.7861073531464996, "grad_norm": 0.76953125, "learning_rate": 0.00011427888346464754, "loss": 0.8384, "step": 30615 }, { "epoch": 0.7861330303424214, "grad_norm": 0.78515625, "learning_rate": 0.0001142744649926238, "loss": 0.775, "step": 30616 }, { "epoch": 0.7861587075383433, "grad_norm": 0.734375, "learning_rate": 0.00011427004649215229, "loss": 0.8106, "step": 30617 }, { "epoch": 0.7861843847342651, "grad_norm": 0.7890625, "learning_rate": 0.00011426562796324188, "loss": 0.8585, "step": 30618 }, { "epoch": 0.7862100619301869, "grad_norm": 0.75, "learning_rate": 0.00011426120940590135, "loss": 0.8746, "step": 30619 }, { "epoch": 0.7862357391261088, "grad_norm": 0.6875, "learning_rate": 0.00011425679082013952, "loss": 0.766, "step": 30620 }, { "epoch": 0.7862614163220305, "grad_norm": 0.80078125, "learning_rate": 0.00011425237220596519, "loss": 0.9759, "step": 30621 }, { "epoch": 0.7862870935179523, "grad_norm": 0.7421875, "learning_rate": 0.0001142479535633872, "loss": 0.8661, "step": 30622 }, { "epoch": 0.7863127707138742, "grad_norm": 0.78515625, "learning_rate": 0.00011424353489241428, "loss": 0.8124, "step": 30623 }, { "epoch": 0.786338447909796, "grad_norm": 0.8046875, "learning_rate": 0.0001142391161930553, "loss": 0.8323, "step": 30624 }, { "epoch": 0.7863641251057178, "grad_norm": 0.859375, "learning_rate": 0.00011423469746531905, "loss": 0.7543, "step": 30625 }, { "epoch": 0.7863898023016397, "grad_norm": 0.765625, "learning_rate": 0.00011423027870921433, "loss": 0.7656, "step": 30626 }, { "epoch": 0.7864154794975615, "grad_norm": 0.8203125, "learning_rate": 0.00011422585992474996, "loss": 0.8155, "step": 30627 }, { "epoch": 0.7864411566934832, "grad_norm": 0.76953125, "learning_rate": 0.00011422144111193473, "loss": 0.9489, "step": 30628 }, { "epoch": 0.7864668338894051, "grad_norm": 0.7421875, "learning_rate": 0.00011421702227077746, "loss": 0.8063, "step": 30629 }, { "epoch": 0.7864925110853269, "grad_norm": 0.82421875, "learning_rate": 0.00011421260340128697, "loss": 0.9321, "step": 30630 }, { "epoch": 0.7865181882812488, "grad_norm": 0.765625, "learning_rate": 0.00011420818450347199, "loss": 0.8717, "step": 30631 }, { "epoch": 0.7865438654771706, "grad_norm": 0.81640625, "learning_rate": 0.00011420376557734141, "loss": 0.8555, "step": 30632 }, { "epoch": 0.7865695426730924, "grad_norm": 0.74609375, "learning_rate": 0.00011419934662290401, "loss": 0.797, "step": 30633 }, { "epoch": 0.7865952198690142, "grad_norm": 0.8359375, "learning_rate": 0.00011419492764016858, "loss": 0.934, "step": 30634 }, { "epoch": 0.786620897064936, "grad_norm": 0.8359375, "learning_rate": 0.00011419050862914397, "loss": 0.7662, "step": 30635 }, { "epoch": 0.7866465742608578, "grad_norm": 0.76171875, "learning_rate": 0.00011418608958983895, "loss": 0.7304, "step": 30636 }, { "epoch": 0.7866722514567797, "grad_norm": 0.76171875, "learning_rate": 0.00011418167052226232, "loss": 0.8735, "step": 30637 }, { "epoch": 0.7866979286527015, "grad_norm": 0.80859375, "learning_rate": 0.00011417725142642291, "loss": 0.8421, "step": 30638 }, { "epoch": 0.7867236058486233, "grad_norm": 0.85546875, "learning_rate": 0.00011417283230232949, "loss": 0.8847, "step": 30639 }, { "epoch": 0.7867492830445452, "grad_norm": 0.84375, "learning_rate": 0.00011416841314999094, "loss": 0.8643, "step": 30640 }, { "epoch": 0.7867749602404669, "grad_norm": 0.72265625, "learning_rate": 0.000114163993969416, "loss": 0.813, "step": 30641 }, { "epoch": 0.7868006374363887, "grad_norm": 0.765625, "learning_rate": 0.00011415957476061352, "loss": 0.8844, "step": 30642 }, { "epoch": 0.7868263146323106, "grad_norm": 0.75, "learning_rate": 0.00011415515552359226, "loss": 0.8507, "step": 30643 }, { "epoch": 0.7868519918282324, "grad_norm": 0.8359375, "learning_rate": 0.00011415073625836105, "loss": 0.7927, "step": 30644 }, { "epoch": 0.7868776690241542, "grad_norm": 0.79296875, "learning_rate": 0.00011414631696492872, "loss": 0.688, "step": 30645 }, { "epoch": 0.7869033462200761, "grad_norm": 0.84375, "learning_rate": 0.00011414189764330408, "loss": 0.7382, "step": 30646 }, { "epoch": 0.7869290234159979, "grad_norm": 0.83203125, "learning_rate": 0.00011413747829349586, "loss": 0.8817, "step": 30647 }, { "epoch": 0.7869547006119196, "grad_norm": 0.73828125, "learning_rate": 0.00011413305891551296, "loss": 0.7377, "step": 30648 }, { "epoch": 0.7869803778078415, "grad_norm": 0.80859375, "learning_rate": 0.00011412863950936417, "loss": 0.976, "step": 30649 }, { "epoch": 0.7870060550037633, "grad_norm": 1.2265625, "learning_rate": 0.00011412422007505823, "loss": 0.8877, "step": 30650 }, { "epoch": 0.7870317321996851, "grad_norm": 0.7734375, "learning_rate": 0.00011411980061260401, "loss": 0.7825, "step": 30651 }, { "epoch": 0.787057409395607, "grad_norm": 0.76171875, "learning_rate": 0.00011411538112201032, "loss": 0.7715, "step": 30652 }, { "epoch": 0.7870830865915288, "grad_norm": 0.80078125, "learning_rate": 0.00011411096160328592, "loss": 0.8298, "step": 30653 }, { "epoch": 0.7871087637874505, "grad_norm": 0.69140625, "learning_rate": 0.00011410654205643967, "loss": 0.7316, "step": 30654 }, { "epoch": 0.7871344409833724, "grad_norm": 0.7578125, "learning_rate": 0.00011410212248148036, "loss": 0.7649, "step": 30655 }, { "epoch": 0.7871601181792942, "grad_norm": 0.67578125, "learning_rate": 0.0001140977028784168, "loss": 0.69, "step": 30656 }, { "epoch": 0.787185795375216, "grad_norm": 0.78125, "learning_rate": 0.00011409328324725777, "loss": 0.7628, "step": 30657 }, { "epoch": 0.7872114725711379, "grad_norm": 0.8203125, "learning_rate": 0.00011408886358801209, "loss": 0.7696, "step": 30658 }, { "epoch": 0.7872371497670597, "grad_norm": 0.75, "learning_rate": 0.0001140844439006886, "loss": 0.7457, "step": 30659 }, { "epoch": 0.7872628269629816, "grad_norm": 0.80078125, "learning_rate": 0.00011408002418529609, "loss": 0.9355, "step": 30660 }, { "epoch": 0.7872885041589033, "grad_norm": 0.80859375, "learning_rate": 0.00011407560444184333, "loss": 0.8644, "step": 30661 }, { "epoch": 0.7873141813548251, "grad_norm": 0.76171875, "learning_rate": 0.00011407118467033921, "loss": 0.7435, "step": 30662 }, { "epoch": 0.787339858550747, "grad_norm": 0.859375, "learning_rate": 0.00011406676487079246, "loss": 0.8137, "step": 30663 }, { "epoch": 0.7873655357466688, "grad_norm": 0.7421875, "learning_rate": 0.00011406234504321193, "loss": 0.772, "step": 30664 }, { "epoch": 0.7873912129425906, "grad_norm": 0.79296875, "learning_rate": 0.00011405792518760641, "loss": 0.8802, "step": 30665 }, { "epoch": 0.7874168901385125, "grad_norm": 0.8125, "learning_rate": 0.0001140535053039847, "loss": 0.7791, "step": 30666 }, { "epoch": 0.7874425673344343, "grad_norm": 0.76171875, "learning_rate": 0.00011404908539235564, "loss": 0.7547, "step": 30667 }, { "epoch": 0.787468244530356, "grad_norm": 0.8046875, "learning_rate": 0.00011404466545272803, "loss": 0.8372, "step": 30668 }, { "epoch": 0.7874939217262779, "grad_norm": 0.76953125, "learning_rate": 0.00011404024548511066, "loss": 0.744, "step": 30669 }, { "epoch": 0.7875195989221997, "grad_norm": 0.79296875, "learning_rate": 0.00011403582548951237, "loss": 0.8175, "step": 30670 }, { "epoch": 0.7875452761181215, "grad_norm": 0.77734375, "learning_rate": 0.00011403140546594192, "loss": 0.7415, "step": 30671 }, { "epoch": 0.7875709533140434, "grad_norm": 0.75390625, "learning_rate": 0.00011402698541440812, "loss": 0.7657, "step": 30672 }, { "epoch": 0.7875966305099652, "grad_norm": 0.78125, "learning_rate": 0.00011402256533491986, "loss": 0.7393, "step": 30673 }, { "epoch": 0.7876223077058869, "grad_norm": 0.70703125, "learning_rate": 0.00011401814522748586, "loss": 0.9229, "step": 30674 }, { "epoch": 0.7876479849018088, "grad_norm": 0.80078125, "learning_rate": 0.000114013725092115, "loss": 0.7939, "step": 30675 }, { "epoch": 0.7876736620977306, "grad_norm": 0.8125, "learning_rate": 0.000114009304928816, "loss": 0.7207, "step": 30676 }, { "epoch": 0.7876993392936524, "grad_norm": 0.765625, "learning_rate": 0.00011400488473759776, "loss": 0.8106, "step": 30677 }, { "epoch": 0.7877250164895743, "grad_norm": 0.765625, "learning_rate": 0.00011400046451846904, "loss": 0.9373, "step": 30678 }, { "epoch": 0.7877506936854961, "grad_norm": 0.79296875, "learning_rate": 0.00011399604427143864, "loss": 0.7959, "step": 30679 }, { "epoch": 0.7877763708814179, "grad_norm": 0.84765625, "learning_rate": 0.00011399162399651542, "loss": 0.8679, "step": 30680 }, { "epoch": 0.7878020480773397, "grad_norm": 0.75390625, "learning_rate": 0.00011398720369370815, "loss": 0.8884, "step": 30681 }, { "epoch": 0.7878277252732615, "grad_norm": 0.7734375, "learning_rate": 0.00011398278336302563, "loss": 0.8736, "step": 30682 }, { "epoch": 0.7878534024691833, "grad_norm": 0.8125, "learning_rate": 0.00011397836300447671, "loss": 0.9408, "step": 30683 }, { "epoch": 0.7878790796651052, "grad_norm": 0.7265625, "learning_rate": 0.00011397394261807015, "loss": 0.7465, "step": 30684 }, { "epoch": 0.787904756861027, "grad_norm": 0.83203125, "learning_rate": 0.00011396952220381481, "loss": 0.9032, "step": 30685 }, { "epoch": 0.7879304340569488, "grad_norm": 0.78515625, "learning_rate": 0.00011396510176171944, "loss": 0.8332, "step": 30686 }, { "epoch": 0.7879561112528707, "grad_norm": 0.7734375, "learning_rate": 0.00011396068129179289, "loss": 0.7414, "step": 30687 }, { "epoch": 0.7879817884487924, "grad_norm": 1.0234375, "learning_rate": 0.000113956260794044, "loss": 0.8404, "step": 30688 }, { "epoch": 0.7880074656447142, "grad_norm": 0.7890625, "learning_rate": 0.00011395184026848154, "loss": 0.8199, "step": 30689 }, { "epoch": 0.7880331428406361, "grad_norm": 0.81640625, "learning_rate": 0.00011394741971511427, "loss": 0.6912, "step": 30690 }, { "epoch": 0.7880588200365579, "grad_norm": 0.796875, "learning_rate": 0.00011394299913395109, "loss": 0.808, "step": 30691 }, { "epoch": 0.7880844972324798, "grad_norm": 0.8359375, "learning_rate": 0.00011393857852500074, "loss": 0.9055, "step": 30692 }, { "epoch": 0.7881101744284016, "grad_norm": 0.81640625, "learning_rate": 0.00011393415788827212, "loss": 0.8823, "step": 30693 }, { "epoch": 0.7881358516243233, "grad_norm": 0.79296875, "learning_rate": 0.00011392973722377393, "loss": 0.8672, "step": 30694 }, { "epoch": 0.7881615288202451, "grad_norm": 0.80078125, "learning_rate": 0.00011392531653151506, "loss": 0.7878, "step": 30695 }, { "epoch": 0.788187206016167, "grad_norm": 0.74609375, "learning_rate": 0.00011392089581150429, "loss": 0.8511, "step": 30696 }, { "epoch": 0.7882128832120888, "grad_norm": 0.765625, "learning_rate": 0.00011391647506375042, "loss": 0.8211, "step": 30697 }, { "epoch": 0.7882385604080107, "grad_norm": 0.73828125, "learning_rate": 0.00011391205428826226, "loss": 0.803, "step": 30698 }, { "epoch": 0.7882642376039325, "grad_norm": 0.7734375, "learning_rate": 0.00011390763348504867, "loss": 0.7447, "step": 30699 }, { "epoch": 0.7882899147998543, "grad_norm": 0.7734375, "learning_rate": 0.0001139032126541184, "loss": 0.8143, "step": 30700 }, { "epoch": 0.788315591995776, "grad_norm": 0.76953125, "learning_rate": 0.00011389879179548031, "loss": 1.016, "step": 30701 }, { "epoch": 0.7883412691916979, "grad_norm": 0.80078125, "learning_rate": 0.00011389437090914314, "loss": 0.8477, "step": 30702 }, { "epoch": 0.7883669463876197, "grad_norm": 0.8203125, "learning_rate": 0.00011388994999511576, "loss": 0.85, "step": 30703 }, { "epoch": 0.7883926235835416, "grad_norm": 0.78515625, "learning_rate": 0.00011388552905340697, "loss": 0.7348, "step": 30704 }, { "epoch": 0.7884183007794634, "grad_norm": 0.734375, "learning_rate": 0.00011388110808402559, "loss": 0.7796, "step": 30705 }, { "epoch": 0.7884439779753852, "grad_norm": 0.75390625, "learning_rate": 0.00011387668708698036, "loss": 0.6896, "step": 30706 }, { "epoch": 0.7884696551713071, "grad_norm": 0.82421875, "learning_rate": 0.00011387226606228021, "loss": 0.8857, "step": 30707 }, { "epoch": 0.7884953323672288, "grad_norm": 0.7109375, "learning_rate": 0.00011386784500993386, "loss": 0.7492, "step": 30708 }, { "epoch": 0.7885210095631506, "grad_norm": 0.77734375, "learning_rate": 0.00011386342392995015, "loss": 0.857, "step": 30709 }, { "epoch": 0.7885466867590725, "grad_norm": 0.7578125, "learning_rate": 0.0001138590028223379, "loss": 0.9205, "step": 30710 }, { "epoch": 0.7885723639549943, "grad_norm": 0.7890625, "learning_rate": 0.00011385458168710588, "loss": 0.8628, "step": 30711 }, { "epoch": 0.7885980411509161, "grad_norm": 0.89453125, "learning_rate": 0.00011385016052426294, "loss": 0.935, "step": 30712 }, { "epoch": 0.788623718346838, "grad_norm": 0.78125, "learning_rate": 0.0001138457393338179, "loss": 0.7508, "step": 30713 }, { "epoch": 0.7886493955427597, "grad_norm": 0.74609375, "learning_rate": 0.00011384131811577954, "loss": 0.8118, "step": 30714 }, { "epoch": 0.7886750727386815, "grad_norm": 0.80859375, "learning_rate": 0.00011383689687015669, "loss": 0.6798, "step": 30715 }, { "epoch": 0.7887007499346034, "grad_norm": 0.76171875, "learning_rate": 0.00011383247559695813, "loss": 0.7223, "step": 30716 }, { "epoch": 0.7887264271305252, "grad_norm": 0.79296875, "learning_rate": 0.00011382805429619273, "loss": 0.8957, "step": 30717 }, { "epoch": 0.788752104326447, "grad_norm": 0.73046875, "learning_rate": 0.00011382363296786924, "loss": 0.7034, "step": 30718 }, { "epoch": 0.7887777815223689, "grad_norm": 0.81640625, "learning_rate": 0.0001138192116119965, "loss": 0.87, "step": 30719 }, { "epoch": 0.7888034587182907, "grad_norm": 0.7734375, "learning_rate": 0.00011381479022858333, "loss": 0.8194, "step": 30720 }, { "epoch": 0.7888291359142124, "grad_norm": 0.796875, "learning_rate": 0.00011381036881763854, "loss": 0.7853, "step": 30721 }, { "epoch": 0.7888548131101343, "grad_norm": 0.83203125, "learning_rate": 0.00011380594737917092, "loss": 0.7397, "step": 30722 }, { "epoch": 0.7888804903060561, "grad_norm": 0.78515625, "learning_rate": 0.00011380152591318929, "loss": 0.8172, "step": 30723 }, { "epoch": 0.788906167501978, "grad_norm": 0.75390625, "learning_rate": 0.00011379710441970247, "loss": 0.829, "step": 30724 }, { "epoch": 0.7889318446978998, "grad_norm": 0.796875, "learning_rate": 0.00011379268289871925, "loss": 0.7777, "step": 30725 }, { "epoch": 0.7889575218938216, "grad_norm": 0.796875, "learning_rate": 0.00011378826135024849, "loss": 0.8149, "step": 30726 }, { "epoch": 0.7889831990897435, "grad_norm": 0.78515625, "learning_rate": 0.00011378383977429893, "loss": 0.81, "step": 30727 }, { "epoch": 0.7890088762856652, "grad_norm": 0.8125, "learning_rate": 0.0001137794181708795, "loss": 0.8596, "step": 30728 }, { "epoch": 0.789034553481587, "grad_norm": 0.8203125, "learning_rate": 0.00011377499653999886, "loss": 0.9267, "step": 30729 }, { "epoch": 0.7890602306775089, "grad_norm": 0.71484375, "learning_rate": 0.00011377057488166592, "loss": 0.8288, "step": 30730 }, { "epoch": 0.7890859078734307, "grad_norm": 0.79296875, "learning_rate": 0.00011376615319588948, "loss": 0.871, "step": 30731 }, { "epoch": 0.7891115850693525, "grad_norm": 0.8046875, "learning_rate": 0.0001137617314826783, "loss": 0.9605, "step": 30732 }, { "epoch": 0.7891372622652744, "grad_norm": 0.7109375, "learning_rate": 0.00011375730974204126, "loss": 0.8379, "step": 30733 }, { "epoch": 0.7891629394611961, "grad_norm": 0.828125, "learning_rate": 0.00011375288797398718, "loss": 0.7914, "step": 30734 }, { "epoch": 0.7891886166571179, "grad_norm": 0.77734375, "learning_rate": 0.00011374846617852479, "loss": 0.8529, "step": 30735 }, { "epoch": 0.7892142938530398, "grad_norm": 0.73046875, "learning_rate": 0.00011374404435566297, "loss": 0.7116, "step": 30736 }, { "epoch": 0.7892399710489616, "grad_norm": 0.7890625, "learning_rate": 0.0001137396225054105, "loss": 0.845, "step": 30737 }, { "epoch": 0.7892656482448834, "grad_norm": 0.765625, "learning_rate": 0.00011373520062777619, "loss": 0.8244, "step": 30738 }, { "epoch": 0.7892913254408053, "grad_norm": 0.73046875, "learning_rate": 0.0001137307787227689, "loss": 0.7789, "step": 30739 }, { "epoch": 0.7893170026367271, "grad_norm": 0.828125, "learning_rate": 0.00011372635679039736, "loss": 0.9257, "step": 30740 }, { "epoch": 0.7893426798326488, "grad_norm": 0.86328125, "learning_rate": 0.00011372193483067048, "loss": 0.9608, "step": 30741 }, { "epoch": 0.7893683570285707, "grad_norm": 0.79296875, "learning_rate": 0.00011371751284359701, "loss": 0.885, "step": 30742 }, { "epoch": 0.7893940342244925, "grad_norm": 0.78515625, "learning_rate": 0.00011371309082918575, "loss": 0.8141, "step": 30743 }, { "epoch": 0.7894197114204143, "grad_norm": 0.76953125, "learning_rate": 0.00011370866878744557, "loss": 0.8615, "step": 30744 }, { "epoch": 0.7894453886163362, "grad_norm": 0.74609375, "learning_rate": 0.00011370424671838524, "loss": 0.872, "step": 30745 }, { "epoch": 0.789471065812258, "grad_norm": 0.7421875, "learning_rate": 0.00011369982462201356, "loss": 0.7872, "step": 30746 }, { "epoch": 0.7894967430081798, "grad_norm": 0.80078125, "learning_rate": 0.00011369540249833943, "loss": 0.7566, "step": 30747 }, { "epoch": 0.7895224202041016, "grad_norm": 0.80078125, "learning_rate": 0.00011369098034737154, "loss": 0.8207, "step": 30748 }, { "epoch": 0.7895480974000234, "grad_norm": 0.828125, "learning_rate": 0.00011368655816911879, "loss": 0.831, "step": 30749 }, { "epoch": 0.7895737745959452, "grad_norm": 0.84765625, "learning_rate": 0.00011368213596358997, "loss": 0.8124, "step": 30750 }, { "epoch": 0.7895994517918671, "grad_norm": 0.8359375, "learning_rate": 0.00011367771373079386, "loss": 0.8897, "step": 30751 }, { "epoch": 0.7896251289877889, "grad_norm": 0.7734375, "learning_rate": 0.00011367329147073933, "loss": 0.9315, "step": 30752 }, { "epoch": 0.7896508061837108, "grad_norm": 0.80859375, "learning_rate": 0.00011366886918343516, "loss": 0.7042, "step": 30753 }, { "epoch": 0.7896764833796325, "grad_norm": 0.71484375, "learning_rate": 0.00011366444686889015, "loss": 0.6921, "step": 30754 }, { "epoch": 0.7897021605755543, "grad_norm": 0.80859375, "learning_rate": 0.00011366002452711315, "loss": 0.7652, "step": 30755 }, { "epoch": 0.7897278377714761, "grad_norm": 0.7890625, "learning_rate": 0.00011365560215811295, "loss": 0.9018, "step": 30756 }, { "epoch": 0.789753514967398, "grad_norm": 0.7265625, "learning_rate": 0.00011365117976189836, "loss": 0.8685, "step": 30757 }, { "epoch": 0.7897791921633198, "grad_norm": 0.77734375, "learning_rate": 0.00011364675733847823, "loss": 0.7941, "step": 30758 }, { "epoch": 0.7898048693592417, "grad_norm": 0.7421875, "learning_rate": 0.00011364233488786129, "loss": 0.8021, "step": 30759 }, { "epoch": 0.7898305465551635, "grad_norm": 0.73046875, "learning_rate": 0.00011363791241005648, "loss": 0.72, "step": 30760 }, { "epoch": 0.7898562237510852, "grad_norm": 0.76171875, "learning_rate": 0.0001136334899050725, "loss": 0.7969, "step": 30761 }, { "epoch": 0.789881900947007, "grad_norm": 0.796875, "learning_rate": 0.0001136290673729182, "loss": 1.0157, "step": 30762 }, { "epoch": 0.7899075781429289, "grad_norm": 0.77734375, "learning_rate": 0.00011362464481360242, "loss": 0.676, "step": 30763 }, { "epoch": 0.7899332553388507, "grad_norm": 0.9296875, "learning_rate": 0.00011362022222713394, "loss": 0.8721, "step": 30764 }, { "epoch": 0.7899589325347726, "grad_norm": 0.74609375, "learning_rate": 0.0001136157996135216, "loss": 0.7072, "step": 30765 }, { "epoch": 0.7899846097306944, "grad_norm": 0.76953125, "learning_rate": 0.0001136113769727742, "loss": 0.7991, "step": 30766 }, { "epoch": 0.7900102869266162, "grad_norm": 0.81640625, "learning_rate": 0.00011360695430490056, "loss": 0.7842, "step": 30767 }, { "epoch": 0.790035964122538, "grad_norm": 0.71875, "learning_rate": 0.00011360253160990948, "loss": 0.8618, "step": 30768 }, { "epoch": 0.7900616413184598, "grad_norm": 0.80859375, "learning_rate": 0.00011359810888780977, "loss": 0.7963, "step": 30769 }, { "epoch": 0.7900873185143816, "grad_norm": 0.77734375, "learning_rate": 0.00011359368613861028, "loss": 0.7295, "step": 30770 }, { "epoch": 0.7901129957103035, "grad_norm": 0.7421875, "learning_rate": 0.00011358926336231979, "loss": 0.9164, "step": 30771 }, { "epoch": 0.7901386729062253, "grad_norm": 0.78515625, "learning_rate": 0.00011358484055894711, "loss": 0.7819, "step": 30772 }, { "epoch": 0.7901643501021471, "grad_norm": 0.7890625, "learning_rate": 0.0001135804177285011, "loss": 0.8444, "step": 30773 }, { "epoch": 0.7901900272980689, "grad_norm": 0.76953125, "learning_rate": 0.00011357599487099056, "loss": 0.755, "step": 30774 }, { "epoch": 0.7902157044939907, "grad_norm": 0.76171875, "learning_rate": 0.00011357157198642424, "loss": 0.8381, "step": 30775 }, { "epoch": 0.7902413816899125, "grad_norm": 0.75390625, "learning_rate": 0.00011356714907481103, "loss": 0.8207, "step": 30776 }, { "epoch": 0.7902670588858344, "grad_norm": 0.7734375, "learning_rate": 0.00011356272613615969, "loss": 0.8288, "step": 30777 }, { "epoch": 0.7902927360817562, "grad_norm": 0.8828125, "learning_rate": 0.00011355830317047909, "loss": 0.8836, "step": 30778 }, { "epoch": 0.790318413277678, "grad_norm": 0.84765625, "learning_rate": 0.00011355388017777802, "loss": 0.8315, "step": 30779 }, { "epoch": 0.7903440904735999, "grad_norm": 0.80859375, "learning_rate": 0.00011354945715806526, "loss": 0.7951, "step": 30780 }, { "epoch": 0.7903697676695216, "grad_norm": 0.82421875, "learning_rate": 0.00011354503411134969, "loss": 0.9045, "step": 30781 }, { "epoch": 0.7903954448654434, "grad_norm": 0.75, "learning_rate": 0.00011354061103764008, "loss": 0.9238, "step": 30782 }, { "epoch": 0.7904211220613653, "grad_norm": 0.78515625, "learning_rate": 0.00011353618793694525, "loss": 0.8588, "step": 30783 }, { "epoch": 0.7904467992572871, "grad_norm": 0.73828125, "learning_rate": 0.00011353176480927401, "loss": 0.7932, "step": 30784 }, { "epoch": 0.790472476453209, "grad_norm": 0.78515625, "learning_rate": 0.0001135273416546352, "loss": 0.7845, "step": 30785 }, { "epoch": 0.7904981536491308, "grad_norm": 0.7734375, "learning_rate": 0.00011352291847303762, "loss": 0.7196, "step": 30786 }, { "epoch": 0.7905238308450525, "grad_norm": 0.8984375, "learning_rate": 0.0001135184952644901, "loss": 0.8941, "step": 30787 }, { "epoch": 0.7905495080409743, "grad_norm": 0.78515625, "learning_rate": 0.00011351407202900141, "loss": 0.8181, "step": 30788 }, { "epoch": 0.7905751852368962, "grad_norm": 0.80078125, "learning_rate": 0.00011350964876658042, "loss": 0.9779, "step": 30789 }, { "epoch": 0.790600862432818, "grad_norm": 0.765625, "learning_rate": 0.0001135052254772359, "loss": 0.7217, "step": 30790 }, { "epoch": 0.7906265396287399, "grad_norm": 0.8359375, "learning_rate": 0.00011350080216097668, "loss": 0.915, "step": 30791 }, { "epoch": 0.7906522168246617, "grad_norm": 0.75390625, "learning_rate": 0.00011349637881781159, "loss": 0.7524, "step": 30792 }, { "epoch": 0.7906778940205835, "grad_norm": 0.80859375, "learning_rate": 0.00011349195544774944, "loss": 0.7629, "step": 30793 }, { "epoch": 0.7907035712165053, "grad_norm": 0.81640625, "learning_rate": 0.00011348753205079905, "loss": 0.8554, "step": 30794 }, { "epoch": 0.7907292484124271, "grad_norm": 0.80859375, "learning_rate": 0.00011348310862696922, "loss": 0.8311, "step": 30795 }, { "epoch": 0.7907549256083489, "grad_norm": 0.73828125, "learning_rate": 0.00011347868517626876, "loss": 0.6811, "step": 30796 }, { "epoch": 0.7907806028042708, "grad_norm": 0.7265625, "learning_rate": 0.00011347426169870652, "loss": 0.7953, "step": 30797 }, { "epoch": 0.7908062800001926, "grad_norm": 0.80078125, "learning_rate": 0.00011346983819429129, "loss": 0.6943, "step": 30798 }, { "epoch": 0.7908319571961144, "grad_norm": 0.765625, "learning_rate": 0.00011346541466303187, "loss": 0.7645, "step": 30799 }, { "epoch": 0.7908576343920363, "grad_norm": 0.78125, "learning_rate": 0.00011346099110493714, "loss": 0.7841, "step": 30800 }, { "epoch": 0.790883311587958, "grad_norm": 0.7421875, "learning_rate": 0.00011345656752001581, "loss": 0.7796, "step": 30801 }, { "epoch": 0.7909089887838798, "grad_norm": 0.78515625, "learning_rate": 0.00011345214390827679, "loss": 0.8259, "step": 30802 }, { "epoch": 0.7909346659798017, "grad_norm": 0.8203125, "learning_rate": 0.00011344772026972886, "loss": 0.8854, "step": 30803 }, { "epoch": 0.7909603431757235, "grad_norm": 0.77734375, "learning_rate": 0.00011344329660438082, "loss": 0.7493, "step": 30804 }, { "epoch": 0.7909860203716453, "grad_norm": 0.8671875, "learning_rate": 0.00011343887291224151, "loss": 0.8491, "step": 30805 }, { "epoch": 0.7910116975675672, "grad_norm": 0.8515625, "learning_rate": 0.00011343444919331976, "loss": 0.8651, "step": 30806 }, { "epoch": 0.7910373747634889, "grad_norm": 0.7734375, "learning_rate": 0.00011343002544762436, "loss": 0.8097, "step": 30807 }, { "epoch": 0.7910630519594107, "grad_norm": 0.88671875, "learning_rate": 0.00011342560167516414, "loss": 0.8246, "step": 30808 }, { "epoch": 0.7910887291553326, "grad_norm": 0.80078125, "learning_rate": 0.00011342117787594788, "loss": 0.8333, "step": 30809 }, { "epoch": 0.7911144063512544, "grad_norm": 0.7578125, "learning_rate": 0.00011341675404998444, "loss": 0.7551, "step": 30810 }, { "epoch": 0.7911400835471762, "grad_norm": 0.703125, "learning_rate": 0.00011341233019728263, "loss": 0.7732, "step": 30811 }, { "epoch": 0.7911657607430981, "grad_norm": 0.80078125, "learning_rate": 0.00011340790631785123, "loss": 0.8054, "step": 30812 }, { "epoch": 0.7911914379390199, "grad_norm": 0.76953125, "learning_rate": 0.00011340348241169914, "loss": 0.8531, "step": 30813 }, { "epoch": 0.7912171151349416, "grad_norm": 0.8359375, "learning_rate": 0.00011339905847883507, "loss": 0.865, "step": 30814 }, { "epoch": 0.7912427923308635, "grad_norm": 0.82421875, "learning_rate": 0.00011339463451926789, "loss": 0.8264, "step": 30815 }, { "epoch": 0.7912684695267853, "grad_norm": 0.8125, "learning_rate": 0.00011339021053300643, "loss": 1.0226, "step": 30816 }, { "epoch": 0.7912941467227071, "grad_norm": 0.8203125, "learning_rate": 0.00011338578652005946, "loss": 0.7977, "step": 30817 }, { "epoch": 0.791319823918629, "grad_norm": 0.8125, "learning_rate": 0.00011338136248043585, "loss": 0.8096, "step": 30818 }, { "epoch": 0.7913455011145508, "grad_norm": 0.8359375, "learning_rate": 0.00011337693841414441, "loss": 0.7039, "step": 30819 }, { "epoch": 0.7913711783104727, "grad_norm": 0.8359375, "learning_rate": 0.00011337251432119394, "loss": 0.7547, "step": 30820 }, { "epoch": 0.7913968555063944, "grad_norm": 0.765625, "learning_rate": 0.00011336809020159323, "loss": 0.8874, "step": 30821 }, { "epoch": 0.7914225327023162, "grad_norm": 0.71484375, "learning_rate": 0.00011336366605535114, "loss": 0.6995, "step": 30822 }, { "epoch": 0.791448209898238, "grad_norm": 0.72265625, "learning_rate": 0.00011335924188247645, "loss": 0.8522, "step": 30823 }, { "epoch": 0.7914738870941599, "grad_norm": 0.8203125, "learning_rate": 0.00011335481768297802, "loss": 0.8153, "step": 30824 }, { "epoch": 0.7914995642900817, "grad_norm": 0.73046875, "learning_rate": 0.00011335039345686464, "loss": 0.8536, "step": 30825 }, { "epoch": 0.7915252414860036, "grad_norm": 0.78515625, "learning_rate": 0.00011334596920414513, "loss": 0.7622, "step": 30826 }, { "epoch": 0.7915509186819253, "grad_norm": 0.78125, "learning_rate": 0.00011334154492482831, "loss": 0.8693, "step": 30827 }, { "epoch": 0.7915765958778471, "grad_norm": 0.7734375, "learning_rate": 0.00011333712061892299, "loss": 0.8417, "step": 30828 }, { "epoch": 0.791602273073769, "grad_norm": 0.76171875, "learning_rate": 0.00011333269628643799, "loss": 0.8273, "step": 30829 }, { "epoch": 0.7916279502696908, "grad_norm": 0.82421875, "learning_rate": 0.00011332827192738215, "loss": 0.9534, "step": 30830 }, { "epoch": 0.7916536274656126, "grad_norm": 0.83203125, "learning_rate": 0.00011332384754176424, "loss": 0.8581, "step": 30831 }, { "epoch": 0.7916793046615345, "grad_norm": 0.765625, "learning_rate": 0.00011331942312959312, "loss": 0.8654, "step": 30832 }, { "epoch": 0.7917049818574563, "grad_norm": 0.8203125, "learning_rate": 0.0001133149986908776, "loss": 0.7018, "step": 30833 }, { "epoch": 0.791730659053378, "grad_norm": 0.78515625, "learning_rate": 0.00011331057422562649, "loss": 0.9075, "step": 30834 }, { "epoch": 0.7917563362492999, "grad_norm": 0.7578125, "learning_rate": 0.00011330614973384861, "loss": 0.7739, "step": 30835 }, { "epoch": 0.7917820134452217, "grad_norm": 0.79296875, "learning_rate": 0.00011330172521555276, "loss": 0.864, "step": 30836 }, { "epoch": 0.7918076906411435, "grad_norm": 0.77734375, "learning_rate": 0.00011329730067074778, "loss": 0.8792, "step": 30837 }, { "epoch": 0.7918333678370654, "grad_norm": 0.81640625, "learning_rate": 0.00011329287609944248, "loss": 0.7787, "step": 30838 }, { "epoch": 0.7918590450329872, "grad_norm": 0.79296875, "learning_rate": 0.00011328845150164569, "loss": 0.7741, "step": 30839 }, { "epoch": 0.791884722228909, "grad_norm": 0.79296875, "learning_rate": 0.00011328402687736621, "loss": 0.8154, "step": 30840 }, { "epoch": 0.7919103994248308, "grad_norm": 0.76171875, "learning_rate": 0.00011327960222661284, "loss": 0.7114, "step": 30841 }, { "epoch": 0.7919360766207526, "grad_norm": 0.8125, "learning_rate": 0.00011327517754939445, "loss": 0.8435, "step": 30842 }, { "epoch": 0.7919617538166744, "grad_norm": 0.73828125, "learning_rate": 0.00011327075284571984, "loss": 0.732, "step": 30843 }, { "epoch": 0.7919874310125963, "grad_norm": 0.84765625, "learning_rate": 0.00011326632811559778, "loss": 0.856, "step": 30844 }, { "epoch": 0.7920131082085181, "grad_norm": 0.79296875, "learning_rate": 0.00011326190335903715, "loss": 0.7687, "step": 30845 }, { "epoch": 0.79203878540444, "grad_norm": 0.82421875, "learning_rate": 0.00011325747857604676, "loss": 0.7779, "step": 30846 }, { "epoch": 0.7920644626003617, "grad_norm": 0.78515625, "learning_rate": 0.0001132530537666354, "loss": 0.753, "step": 30847 }, { "epoch": 0.7920901397962835, "grad_norm": 0.73828125, "learning_rate": 0.0001132486289308119, "loss": 0.7507, "step": 30848 }, { "epoch": 0.7921158169922053, "grad_norm": 0.8203125, "learning_rate": 0.00011324420406858504, "loss": 0.8298, "step": 30849 }, { "epoch": 0.7921414941881272, "grad_norm": 0.85546875, "learning_rate": 0.00011323977917996373, "loss": 0.7341, "step": 30850 }, { "epoch": 0.792167171384049, "grad_norm": 0.75390625, "learning_rate": 0.00011323535426495672, "loss": 0.7068, "step": 30851 }, { "epoch": 0.7921928485799709, "grad_norm": 0.84375, "learning_rate": 0.00011323092932357285, "loss": 0.986, "step": 30852 }, { "epoch": 0.7922185257758927, "grad_norm": 0.75390625, "learning_rate": 0.00011322650435582095, "loss": 0.8436, "step": 30853 }, { "epoch": 0.7922442029718144, "grad_norm": 0.9375, "learning_rate": 0.00011322207936170977, "loss": 0.7983, "step": 30854 }, { "epoch": 0.7922698801677363, "grad_norm": 0.7890625, "learning_rate": 0.00011321765434124823, "loss": 0.8183, "step": 30855 }, { "epoch": 0.7922955573636581, "grad_norm": 0.7890625, "learning_rate": 0.00011321322929444507, "loss": 0.7747, "step": 30856 }, { "epoch": 0.7923212345595799, "grad_norm": 0.734375, "learning_rate": 0.00011320880422130913, "loss": 0.7398, "step": 30857 }, { "epoch": 0.7923469117555018, "grad_norm": 0.78515625, "learning_rate": 0.00011320437912184927, "loss": 0.7517, "step": 30858 }, { "epoch": 0.7923725889514236, "grad_norm": 0.7578125, "learning_rate": 0.00011319995399607429, "loss": 0.8369, "step": 30859 }, { "epoch": 0.7923982661473454, "grad_norm": 0.8515625, "learning_rate": 0.00011319552884399293, "loss": 0.7921, "step": 30860 }, { "epoch": 0.7924239433432672, "grad_norm": 0.6953125, "learning_rate": 0.00011319110366561412, "loss": 0.7472, "step": 30861 }, { "epoch": 0.792449620539189, "grad_norm": 0.74609375, "learning_rate": 0.00011318667846094661, "loss": 0.8121, "step": 30862 }, { "epoch": 0.7924752977351108, "grad_norm": 0.765625, "learning_rate": 0.00011318225322999923, "loss": 0.8067, "step": 30863 }, { "epoch": 0.7925009749310327, "grad_norm": 0.79296875, "learning_rate": 0.00011317782797278082, "loss": 0.8227, "step": 30864 }, { "epoch": 0.7925266521269545, "grad_norm": 0.859375, "learning_rate": 0.0001131734026893002, "loss": 0.8536, "step": 30865 }, { "epoch": 0.7925523293228763, "grad_norm": 0.7890625, "learning_rate": 0.0001131689773795662, "loss": 0.7945, "step": 30866 }, { "epoch": 0.7925780065187981, "grad_norm": 0.74609375, "learning_rate": 0.0001131645520435876, "loss": 0.801, "step": 30867 }, { "epoch": 0.7926036837147199, "grad_norm": 0.75, "learning_rate": 0.00011316012668137322, "loss": 0.8203, "step": 30868 }, { "epoch": 0.7926293609106417, "grad_norm": 0.73828125, "learning_rate": 0.0001131557012929319, "loss": 0.7758, "step": 30869 }, { "epoch": 0.7926550381065636, "grad_norm": 0.84375, "learning_rate": 0.00011315127587827246, "loss": 0.8876, "step": 30870 }, { "epoch": 0.7926807153024854, "grad_norm": 0.796875, "learning_rate": 0.00011314685043740369, "loss": 0.864, "step": 30871 }, { "epoch": 0.7927063924984072, "grad_norm": 0.81640625, "learning_rate": 0.0001131424249703345, "loss": 0.7572, "step": 30872 }, { "epoch": 0.7927320696943291, "grad_norm": 0.78125, "learning_rate": 0.0001131379994770736, "loss": 0.8422, "step": 30873 }, { "epoch": 0.7927577468902508, "grad_norm": 0.7265625, "learning_rate": 0.00011313357395762984, "loss": 0.7321, "step": 30874 }, { "epoch": 0.7927834240861726, "grad_norm": 0.76171875, "learning_rate": 0.0001131291484120121, "loss": 0.6491, "step": 30875 }, { "epoch": 0.7928091012820945, "grad_norm": 0.8125, "learning_rate": 0.0001131247228402291, "loss": 0.8276, "step": 30876 }, { "epoch": 0.7928347784780163, "grad_norm": 0.796875, "learning_rate": 0.00011312029724228974, "loss": 0.9032, "step": 30877 }, { "epoch": 0.7928604556739381, "grad_norm": 0.73046875, "learning_rate": 0.0001131158716182028, "loss": 0.79, "step": 30878 }, { "epoch": 0.79288613286986, "grad_norm": 0.74609375, "learning_rate": 0.00011311144596797714, "loss": 0.7147, "step": 30879 }, { "epoch": 0.7929118100657818, "grad_norm": 0.828125, "learning_rate": 0.00011310702029162155, "loss": 0.7986, "step": 30880 }, { "epoch": 0.7929374872617035, "grad_norm": 0.765625, "learning_rate": 0.00011310259458914482, "loss": 0.7748, "step": 30881 }, { "epoch": 0.7929631644576254, "grad_norm": 0.80859375, "learning_rate": 0.00011309816886055582, "loss": 0.7675, "step": 30882 }, { "epoch": 0.7929888416535472, "grad_norm": 0.859375, "learning_rate": 0.00011309374310586337, "loss": 0.8129, "step": 30883 }, { "epoch": 0.793014518849469, "grad_norm": 0.734375, "learning_rate": 0.00011308931732507624, "loss": 0.8096, "step": 30884 }, { "epoch": 0.7930401960453909, "grad_norm": 0.83203125, "learning_rate": 0.00011308489151820331, "loss": 0.8947, "step": 30885 }, { "epoch": 0.7930658732413127, "grad_norm": 0.86328125, "learning_rate": 0.00011308046568525336, "loss": 0.7757, "step": 30886 }, { "epoch": 0.7930915504372344, "grad_norm": 0.75, "learning_rate": 0.00011307603982623522, "loss": 0.7685, "step": 30887 }, { "epoch": 0.7931172276331563, "grad_norm": 0.67578125, "learning_rate": 0.00011307161394115773, "loss": 0.7531, "step": 30888 }, { "epoch": 0.7931429048290781, "grad_norm": 1.0078125, "learning_rate": 0.00011306718803002966, "loss": 0.8048, "step": 30889 }, { "epoch": 0.793168582025, "grad_norm": 0.7890625, "learning_rate": 0.00011306276209285991, "loss": 0.7968, "step": 30890 }, { "epoch": 0.7931942592209218, "grad_norm": 0.734375, "learning_rate": 0.00011305833612965724, "loss": 0.8244, "step": 30891 }, { "epoch": 0.7932199364168436, "grad_norm": 0.765625, "learning_rate": 0.00011305391014043047, "loss": 0.7334, "step": 30892 }, { "epoch": 0.7932456136127655, "grad_norm": 0.7109375, "learning_rate": 0.00011304948412518847, "loss": 0.7198, "step": 30893 }, { "epoch": 0.7932712908086872, "grad_norm": 0.765625, "learning_rate": 0.00011304505808393999, "loss": 0.8259, "step": 30894 }, { "epoch": 0.793296968004609, "grad_norm": 0.7578125, "learning_rate": 0.0001130406320166939, "loss": 0.9114, "step": 30895 }, { "epoch": 0.7933226452005309, "grad_norm": 0.76953125, "learning_rate": 0.00011303620592345901, "loss": 0.666, "step": 30896 }, { "epoch": 0.7933483223964527, "grad_norm": 0.765625, "learning_rate": 0.00011303177980424414, "loss": 0.7691, "step": 30897 }, { "epoch": 0.7933739995923745, "grad_norm": 0.84765625, "learning_rate": 0.0001130273536590581, "loss": 0.8242, "step": 30898 }, { "epoch": 0.7933996767882964, "grad_norm": 0.7265625, "learning_rate": 0.00011302292748790975, "loss": 0.7603, "step": 30899 }, { "epoch": 0.7934253539842182, "grad_norm": 0.74609375, "learning_rate": 0.00011301850129080785, "loss": 0.8356, "step": 30900 }, { "epoch": 0.7934510311801399, "grad_norm": 0.87890625, "learning_rate": 0.00011301407506776128, "loss": 0.9621, "step": 30901 }, { "epoch": 0.7934767083760618, "grad_norm": 0.8203125, "learning_rate": 0.00011300964881877878, "loss": 0.8764, "step": 30902 }, { "epoch": 0.7935023855719836, "grad_norm": 0.74609375, "learning_rate": 0.00011300522254386927, "loss": 0.7105, "step": 30903 }, { "epoch": 0.7935280627679054, "grad_norm": 0.72265625, "learning_rate": 0.00011300079624304152, "loss": 0.7946, "step": 30904 }, { "epoch": 0.7935537399638273, "grad_norm": 0.8515625, "learning_rate": 0.00011299636991630436, "loss": 0.8287, "step": 30905 }, { "epoch": 0.7935794171597491, "grad_norm": 0.72265625, "learning_rate": 0.00011299194356366663, "loss": 0.818, "step": 30906 }, { "epoch": 0.7936050943556708, "grad_norm": 0.78515625, "learning_rate": 0.00011298751718513709, "loss": 0.8384, "step": 30907 }, { "epoch": 0.7936307715515927, "grad_norm": 0.82421875, "learning_rate": 0.0001129830907807246, "loss": 0.8108, "step": 30908 }, { "epoch": 0.7936564487475145, "grad_norm": 0.765625, "learning_rate": 0.000112978664350438, "loss": 0.8183, "step": 30909 }, { "epoch": 0.7936821259434363, "grad_norm": 0.7734375, "learning_rate": 0.00011297423789428612, "loss": 0.8717, "step": 30910 }, { "epoch": 0.7937078031393582, "grad_norm": 0.8203125, "learning_rate": 0.0001129698114122777, "loss": 0.9536, "step": 30911 }, { "epoch": 0.79373348033528, "grad_norm": 0.78125, "learning_rate": 0.00011296538490442168, "loss": 0.8002, "step": 30912 }, { "epoch": 0.7937591575312019, "grad_norm": 0.8515625, "learning_rate": 0.00011296095837072676, "loss": 0.7869, "step": 30913 }, { "epoch": 0.7937848347271236, "grad_norm": 0.76953125, "learning_rate": 0.00011295653181120185, "loss": 0.888, "step": 30914 }, { "epoch": 0.7938105119230454, "grad_norm": 0.7421875, "learning_rate": 0.00011295210522585575, "loss": 0.7124, "step": 30915 }, { "epoch": 0.7938361891189673, "grad_norm": 0.72265625, "learning_rate": 0.00011294767861469723, "loss": 0.7962, "step": 30916 }, { "epoch": 0.7938618663148891, "grad_norm": 0.71875, "learning_rate": 0.0001129432519777352, "loss": 0.7579, "step": 30917 }, { "epoch": 0.7938875435108109, "grad_norm": 0.7265625, "learning_rate": 0.00011293882531497844, "loss": 0.6848, "step": 30918 }, { "epoch": 0.7939132207067328, "grad_norm": 0.69921875, "learning_rate": 0.00011293439862643577, "loss": 0.7578, "step": 30919 }, { "epoch": 0.7939388979026546, "grad_norm": 0.84765625, "learning_rate": 0.000112929971912116, "loss": 0.8039, "step": 30920 }, { "epoch": 0.7939645750985763, "grad_norm": 0.796875, "learning_rate": 0.00011292554517202796, "loss": 0.7739, "step": 30921 }, { "epoch": 0.7939902522944982, "grad_norm": 0.8125, "learning_rate": 0.00011292111840618048, "loss": 0.76, "step": 30922 }, { "epoch": 0.79401592949042, "grad_norm": 0.73828125, "learning_rate": 0.00011291669161458238, "loss": 0.7876, "step": 30923 }, { "epoch": 0.7940416066863418, "grad_norm": 0.765625, "learning_rate": 0.00011291226479724246, "loss": 0.7586, "step": 30924 }, { "epoch": 0.7940672838822637, "grad_norm": 0.78125, "learning_rate": 0.00011290783795416959, "loss": 0.8675, "step": 30925 }, { "epoch": 0.7940929610781855, "grad_norm": 0.7734375, "learning_rate": 0.00011290341108537254, "loss": 0.802, "step": 30926 }, { "epoch": 0.7941186382741072, "grad_norm": 0.82421875, "learning_rate": 0.00011289898419086019, "loss": 0.7226, "step": 30927 }, { "epoch": 0.7941443154700291, "grad_norm": 0.75390625, "learning_rate": 0.00011289455727064132, "loss": 0.7795, "step": 30928 }, { "epoch": 0.7941699926659509, "grad_norm": 0.83203125, "learning_rate": 0.00011289013032472472, "loss": 0.8987, "step": 30929 }, { "epoch": 0.7941956698618727, "grad_norm": 0.76953125, "learning_rate": 0.0001128857033531193, "loss": 0.9149, "step": 30930 }, { "epoch": 0.7942213470577946, "grad_norm": 0.8359375, "learning_rate": 0.00011288127635583383, "loss": 0.8947, "step": 30931 }, { "epoch": 0.7942470242537164, "grad_norm": 0.78515625, "learning_rate": 0.00011287684933287715, "loss": 0.8175, "step": 30932 }, { "epoch": 0.7942727014496382, "grad_norm": 0.75, "learning_rate": 0.00011287242228425804, "loss": 0.7157, "step": 30933 }, { "epoch": 0.79429837864556, "grad_norm": 0.81640625, "learning_rate": 0.00011286799520998536, "loss": 0.7839, "step": 30934 }, { "epoch": 0.7943240558414818, "grad_norm": 0.80078125, "learning_rate": 0.00011286356811006797, "loss": 1.0121, "step": 30935 }, { "epoch": 0.7943497330374036, "grad_norm": 0.76171875, "learning_rate": 0.00011285914098451462, "loss": 0.7743, "step": 30936 }, { "epoch": 0.7943754102333255, "grad_norm": 0.73046875, "learning_rate": 0.00011285471383333415, "loss": 0.8771, "step": 30937 }, { "epoch": 0.7944010874292473, "grad_norm": 0.8671875, "learning_rate": 0.00011285028665653543, "loss": 0.8089, "step": 30938 }, { "epoch": 0.7944267646251691, "grad_norm": 0.78125, "learning_rate": 0.00011284585945412725, "loss": 0.8696, "step": 30939 }, { "epoch": 0.794452441821091, "grad_norm": 0.7578125, "learning_rate": 0.00011284143222611838, "loss": 0.8459, "step": 30940 }, { "epoch": 0.7944781190170127, "grad_norm": 0.78515625, "learning_rate": 0.00011283700497251775, "loss": 0.8382, "step": 30941 }, { "epoch": 0.7945037962129345, "grad_norm": 0.77734375, "learning_rate": 0.00011283257769333408, "loss": 0.8328, "step": 30942 }, { "epoch": 0.7945294734088564, "grad_norm": 0.7421875, "learning_rate": 0.00011282815038857628, "loss": 0.8108, "step": 30943 }, { "epoch": 0.7945551506047782, "grad_norm": 0.8046875, "learning_rate": 0.00011282372305825312, "loss": 0.8564, "step": 30944 }, { "epoch": 0.7945808278007, "grad_norm": 0.7734375, "learning_rate": 0.00011281929570237347, "loss": 0.9079, "step": 30945 }, { "epoch": 0.7946065049966219, "grad_norm": 0.828125, "learning_rate": 0.0001128148683209461, "loss": 0.9022, "step": 30946 }, { "epoch": 0.7946321821925436, "grad_norm": 0.7578125, "learning_rate": 0.00011281044091397986, "loss": 0.8215, "step": 30947 }, { "epoch": 0.7946578593884654, "grad_norm": 0.8046875, "learning_rate": 0.00011280601348148354, "loss": 0.9121, "step": 30948 }, { "epoch": 0.7946835365843873, "grad_norm": 0.8203125, "learning_rate": 0.000112801586023466, "loss": 0.9243, "step": 30949 }, { "epoch": 0.7947092137803091, "grad_norm": 0.75, "learning_rate": 0.00011279715853993606, "loss": 0.7874, "step": 30950 }, { "epoch": 0.794734890976231, "grad_norm": 0.734375, "learning_rate": 0.00011279273103090257, "loss": 0.7456, "step": 30951 }, { "epoch": 0.7947605681721528, "grad_norm": 0.67578125, "learning_rate": 0.0001127883034963743, "loss": 0.6913, "step": 30952 }, { "epoch": 0.7947862453680746, "grad_norm": 0.79296875, "learning_rate": 0.00011278387593636009, "loss": 0.7981, "step": 30953 }, { "epoch": 0.7948119225639964, "grad_norm": 0.80859375, "learning_rate": 0.00011277944835086876, "loss": 0.8466, "step": 30954 }, { "epoch": 0.7948375997599182, "grad_norm": 0.84765625, "learning_rate": 0.00011277502073990917, "loss": 0.8828, "step": 30955 }, { "epoch": 0.79486327695584, "grad_norm": 0.734375, "learning_rate": 0.0001127705931034901, "loss": 0.741, "step": 30956 }, { "epoch": 0.7948889541517619, "grad_norm": 0.75390625, "learning_rate": 0.00011276616544162038, "loss": 0.8088, "step": 30957 }, { "epoch": 0.7949146313476837, "grad_norm": 0.765625, "learning_rate": 0.00011276173775430887, "loss": 0.7693, "step": 30958 }, { "epoch": 0.7949403085436055, "grad_norm": 0.77734375, "learning_rate": 0.00011275731004156438, "loss": 0.9144, "step": 30959 }, { "epoch": 0.7949659857395274, "grad_norm": 0.703125, "learning_rate": 0.00011275288230339569, "loss": 0.649, "step": 30960 }, { "epoch": 0.7949916629354491, "grad_norm": 0.74609375, "learning_rate": 0.00011274845453981167, "loss": 0.8274, "step": 30961 }, { "epoch": 0.7950173401313709, "grad_norm": 0.7890625, "learning_rate": 0.00011274402675082114, "loss": 0.8369, "step": 30962 }, { "epoch": 0.7950430173272928, "grad_norm": 0.890625, "learning_rate": 0.00011273959893643291, "loss": 0.9019, "step": 30963 }, { "epoch": 0.7950686945232146, "grad_norm": 0.859375, "learning_rate": 0.00011273517109665582, "loss": 0.7985, "step": 30964 }, { "epoch": 0.7950943717191364, "grad_norm": 0.78515625, "learning_rate": 0.00011273074323149868, "loss": 0.7809, "step": 30965 }, { "epoch": 0.7951200489150583, "grad_norm": 0.7421875, "learning_rate": 0.00011272631534097029, "loss": 0.7422, "step": 30966 }, { "epoch": 0.79514572611098, "grad_norm": 0.81640625, "learning_rate": 0.00011272188742507954, "loss": 0.9021, "step": 30967 }, { "epoch": 0.7951714033069018, "grad_norm": 0.74609375, "learning_rate": 0.0001127174594838352, "loss": 0.7594, "step": 30968 }, { "epoch": 0.7951970805028237, "grad_norm": 0.72265625, "learning_rate": 0.0001127130315172461, "loss": 0.6265, "step": 30969 }, { "epoch": 0.7952227576987455, "grad_norm": 0.69921875, "learning_rate": 0.00011270860352532109, "loss": 0.8132, "step": 30970 }, { "epoch": 0.7952484348946673, "grad_norm": 0.81640625, "learning_rate": 0.000112704175508069, "loss": 0.8058, "step": 30971 }, { "epoch": 0.7952741120905892, "grad_norm": 0.78125, "learning_rate": 0.00011269974746549863, "loss": 0.7837, "step": 30972 }, { "epoch": 0.795299789286511, "grad_norm": 0.8125, "learning_rate": 0.00011269531939761879, "loss": 0.9555, "step": 30973 }, { "epoch": 0.7953254664824327, "grad_norm": 0.80859375, "learning_rate": 0.00011269089130443833, "loss": 0.7276, "step": 30974 }, { "epoch": 0.7953511436783546, "grad_norm": 0.80078125, "learning_rate": 0.00011268646318596607, "loss": 0.7846, "step": 30975 }, { "epoch": 0.7953768208742764, "grad_norm": 0.80859375, "learning_rate": 0.00011268203504221085, "loss": 0.7668, "step": 30976 }, { "epoch": 0.7954024980701982, "grad_norm": 0.8046875, "learning_rate": 0.00011267760687318147, "loss": 0.8198, "step": 30977 }, { "epoch": 0.7954281752661201, "grad_norm": 0.77734375, "learning_rate": 0.00011267317867888679, "loss": 0.8105, "step": 30978 }, { "epoch": 0.7954538524620419, "grad_norm": 0.78515625, "learning_rate": 0.00011266875045933556, "loss": 0.8145, "step": 30979 }, { "epoch": 0.7954795296579638, "grad_norm": 0.88671875, "learning_rate": 0.00011266432221453667, "loss": 0.7344, "step": 30980 }, { "epoch": 0.7955052068538855, "grad_norm": 0.74609375, "learning_rate": 0.00011265989394449894, "loss": 0.7383, "step": 30981 }, { "epoch": 0.7955308840498073, "grad_norm": 0.83984375, "learning_rate": 0.00011265546564923117, "loss": 0.834, "step": 30982 }, { "epoch": 0.7955565612457292, "grad_norm": 0.8671875, "learning_rate": 0.00011265103732874223, "loss": 0.8106, "step": 30983 }, { "epoch": 0.795582238441651, "grad_norm": 0.8046875, "learning_rate": 0.00011264660898304091, "loss": 0.8898, "step": 30984 }, { "epoch": 0.7956079156375728, "grad_norm": 0.76953125, "learning_rate": 0.00011264218061213603, "loss": 0.8565, "step": 30985 }, { "epoch": 0.7956335928334947, "grad_norm": 0.703125, "learning_rate": 0.00011263775221603643, "loss": 0.7075, "step": 30986 }, { "epoch": 0.7956592700294164, "grad_norm": 0.8828125, "learning_rate": 0.00011263332379475092, "loss": 0.8038, "step": 30987 }, { "epoch": 0.7956849472253382, "grad_norm": 0.80078125, "learning_rate": 0.00011262889534828833, "loss": 0.8391, "step": 30988 }, { "epoch": 0.7957106244212601, "grad_norm": 0.8125, "learning_rate": 0.0001126244668766575, "loss": 0.8589, "step": 30989 }, { "epoch": 0.7957363016171819, "grad_norm": 0.73828125, "learning_rate": 0.00011262003837986725, "loss": 0.8061, "step": 30990 }, { "epoch": 0.7957619788131037, "grad_norm": 0.8515625, "learning_rate": 0.00011261560985792643, "loss": 0.8282, "step": 30991 }, { "epoch": 0.7957876560090256, "grad_norm": 0.86328125, "learning_rate": 0.00011261118131084383, "loss": 0.8691, "step": 30992 }, { "epoch": 0.7958133332049474, "grad_norm": 0.78515625, "learning_rate": 0.00011260675273862824, "loss": 0.7615, "step": 30993 }, { "epoch": 0.7958390104008691, "grad_norm": 0.7421875, "learning_rate": 0.00011260232414128858, "loss": 0.8982, "step": 30994 }, { "epoch": 0.795864687596791, "grad_norm": 0.81640625, "learning_rate": 0.0001125978955188336, "loss": 0.8538, "step": 30995 }, { "epoch": 0.7958903647927128, "grad_norm": 0.84765625, "learning_rate": 0.00011259346687127215, "loss": 0.7223, "step": 30996 }, { "epoch": 0.7959160419886346, "grad_norm": 0.8203125, "learning_rate": 0.00011258903819861308, "loss": 0.9238, "step": 30997 }, { "epoch": 0.7959417191845565, "grad_norm": 0.7578125, "learning_rate": 0.00011258460950086518, "loss": 0.7115, "step": 30998 }, { "epoch": 0.7959673963804783, "grad_norm": 0.8125, "learning_rate": 0.0001125801807780373, "loss": 0.878, "step": 30999 }, { "epoch": 0.7959930735764, "grad_norm": 0.7421875, "learning_rate": 0.00011257575203013826, "loss": 0.7818, "step": 31000 }, { "epoch": 0.7959930735764, "eval_loss": 0.8121166229248047, "eval_runtime": 386.01, "eval_samples_per_second": 25.906, "eval_steps_per_second": 0.811, "step": 31000 }, { "epoch": 0.7960187507723219, "grad_norm": 0.74609375, "learning_rate": 0.00011257132325717685, "loss": 0.8611, "step": 31001 }, { "epoch": 0.7960444279682437, "grad_norm": 0.8671875, "learning_rate": 0.00011256689445916195, "loss": 0.7293, "step": 31002 }, { "epoch": 0.7960701051641655, "grad_norm": 0.79296875, "learning_rate": 0.00011256246563610237, "loss": 0.7429, "step": 31003 }, { "epoch": 0.7960957823600874, "grad_norm": 0.78125, "learning_rate": 0.00011255803678800694, "loss": 0.707, "step": 31004 }, { "epoch": 0.7961214595560092, "grad_norm": 0.67578125, "learning_rate": 0.00011255360791488445, "loss": 0.73, "step": 31005 }, { "epoch": 0.796147136751931, "grad_norm": 0.78515625, "learning_rate": 0.00011254917901674375, "loss": 0.9402, "step": 31006 }, { "epoch": 0.7961728139478528, "grad_norm": 0.8203125, "learning_rate": 0.00011254475009359368, "loss": 0.9244, "step": 31007 }, { "epoch": 0.7961984911437746, "grad_norm": 0.77734375, "learning_rate": 0.00011254032114544309, "loss": 0.8564, "step": 31008 }, { "epoch": 0.7962241683396964, "grad_norm": 0.75, "learning_rate": 0.00011253589217230071, "loss": 0.6669, "step": 31009 }, { "epoch": 0.7962498455356183, "grad_norm": 0.75390625, "learning_rate": 0.00011253146317417548, "loss": 0.8773, "step": 31010 }, { "epoch": 0.7962755227315401, "grad_norm": 0.9609375, "learning_rate": 0.00011252703415107615, "loss": 0.8266, "step": 31011 }, { "epoch": 0.796301199927462, "grad_norm": 0.8359375, "learning_rate": 0.00011252260510301158, "loss": 0.8239, "step": 31012 }, { "epoch": 0.7963268771233838, "grad_norm": 0.8515625, "learning_rate": 0.00011251817602999061, "loss": 0.7589, "step": 31013 }, { "epoch": 0.7963525543193055, "grad_norm": 0.765625, "learning_rate": 0.00011251374693202201, "loss": 0.828, "step": 31014 }, { "epoch": 0.7963782315152274, "grad_norm": 0.734375, "learning_rate": 0.00011250931780911467, "loss": 0.8063, "step": 31015 }, { "epoch": 0.7964039087111492, "grad_norm": 0.8828125, "learning_rate": 0.00011250488866127737, "loss": 0.8652, "step": 31016 }, { "epoch": 0.796429585907071, "grad_norm": 0.8046875, "learning_rate": 0.00011250045948851897, "loss": 0.7832, "step": 31017 }, { "epoch": 0.7964552631029929, "grad_norm": 0.78125, "learning_rate": 0.0001124960302908483, "loss": 0.8997, "step": 31018 }, { "epoch": 0.7964809402989147, "grad_norm": 0.76171875, "learning_rate": 0.00011249160106827412, "loss": 0.7589, "step": 31019 }, { "epoch": 0.7965066174948364, "grad_norm": 0.83203125, "learning_rate": 0.00011248717182080535, "loss": 0.7797, "step": 31020 }, { "epoch": 0.7965322946907583, "grad_norm": 0.80859375, "learning_rate": 0.00011248274254845074, "loss": 0.7523, "step": 31021 }, { "epoch": 0.7965579718866801, "grad_norm": 0.88671875, "learning_rate": 0.00011247831325121918, "loss": 0.8527, "step": 31022 }, { "epoch": 0.7965836490826019, "grad_norm": 0.80859375, "learning_rate": 0.00011247388392911948, "loss": 0.7541, "step": 31023 }, { "epoch": 0.7966093262785238, "grad_norm": 0.8046875, "learning_rate": 0.00011246945458216044, "loss": 0.8994, "step": 31024 }, { "epoch": 0.7966350034744456, "grad_norm": 0.7890625, "learning_rate": 0.0001124650252103509, "loss": 0.7947, "step": 31025 }, { "epoch": 0.7966606806703674, "grad_norm": 0.80859375, "learning_rate": 0.00011246059581369968, "loss": 0.8625, "step": 31026 }, { "epoch": 0.7966863578662892, "grad_norm": 0.75390625, "learning_rate": 0.00011245616639221561, "loss": 0.8258, "step": 31027 }, { "epoch": 0.796712035062211, "grad_norm": 0.75390625, "learning_rate": 0.00011245173694590756, "loss": 0.7992, "step": 31028 }, { "epoch": 0.7967377122581328, "grad_norm": 0.8046875, "learning_rate": 0.00011244730747478431, "loss": 0.8627, "step": 31029 }, { "epoch": 0.7967633894540547, "grad_norm": 0.8359375, "learning_rate": 0.0001124428779788547, "loss": 0.7532, "step": 31030 }, { "epoch": 0.7967890666499765, "grad_norm": 0.80078125, "learning_rate": 0.00011243844845812756, "loss": 0.9, "step": 31031 }, { "epoch": 0.7968147438458983, "grad_norm": 0.8125, "learning_rate": 0.00011243401891261171, "loss": 0.8468, "step": 31032 }, { "epoch": 0.7968404210418202, "grad_norm": 0.81640625, "learning_rate": 0.00011242958934231597, "loss": 0.8713, "step": 31033 }, { "epoch": 0.7968660982377419, "grad_norm": 0.75, "learning_rate": 0.00011242515974724921, "loss": 0.7309, "step": 31034 }, { "epoch": 0.7968917754336637, "grad_norm": 0.82421875, "learning_rate": 0.00011242073012742023, "loss": 0.8453, "step": 31035 }, { "epoch": 0.7969174526295856, "grad_norm": 0.77734375, "learning_rate": 0.00011241630048283782, "loss": 0.7868, "step": 31036 }, { "epoch": 0.7969431298255074, "grad_norm": 0.81640625, "learning_rate": 0.0001124118708135109, "loss": 0.8314, "step": 31037 }, { "epoch": 0.7969688070214292, "grad_norm": 0.8515625, "learning_rate": 0.00011240744111944819, "loss": 0.9473, "step": 31038 }, { "epoch": 0.7969944842173511, "grad_norm": 0.7890625, "learning_rate": 0.00011240301140065859, "loss": 0.8876, "step": 31039 }, { "epoch": 0.7970201614132728, "grad_norm": 0.78125, "learning_rate": 0.00011239858165715091, "loss": 0.8108, "step": 31040 }, { "epoch": 0.7970458386091946, "grad_norm": 0.8203125, "learning_rate": 0.00011239415188893396, "loss": 0.6794, "step": 31041 }, { "epoch": 0.7970715158051165, "grad_norm": 0.83984375, "learning_rate": 0.00011238972209601662, "loss": 0.8224, "step": 31042 }, { "epoch": 0.7970971930010383, "grad_norm": 0.75, "learning_rate": 0.00011238529227840766, "loss": 0.7785, "step": 31043 }, { "epoch": 0.7971228701969602, "grad_norm": 0.8125, "learning_rate": 0.00011238086243611594, "loss": 1.0446, "step": 31044 }, { "epoch": 0.797148547392882, "grad_norm": 0.78125, "learning_rate": 0.0001123764325691503, "loss": 0.8809, "step": 31045 }, { "epoch": 0.7971742245888038, "grad_norm": 0.74609375, "learning_rate": 0.00011237200267751949, "loss": 0.8473, "step": 31046 }, { "epoch": 0.7971999017847256, "grad_norm": 0.83203125, "learning_rate": 0.00011236757276123245, "loss": 0.8369, "step": 31047 }, { "epoch": 0.7972255789806474, "grad_norm": 0.80078125, "learning_rate": 0.00011236314282029791, "loss": 0.8157, "step": 31048 }, { "epoch": 0.7972512561765692, "grad_norm": 0.81640625, "learning_rate": 0.00011235871285472476, "loss": 0.7902, "step": 31049 }, { "epoch": 0.7972769333724911, "grad_norm": 0.76953125, "learning_rate": 0.00011235428286452186, "loss": 0.8048, "step": 31050 }, { "epoch": 0.7973026105684129, "grad_norm": 0.78515625, "learning_rate": 0.00011234985284969792, "loss": 0.8113, "step": 31051 }, { "epoch": 0.7973282877643347, "grad_norm": 0.79296875, "learning_rate": 0.00011234542281026188, "loss": 0.7016, "step": 31052 }, { "epoch": 0.7973539649602566, "grad_norm": 0.859375, "learning_rate": 0.00011234099274622252, "loss": 0.7497, "step": 31053 }, { "epoch": 0.7973796421561783, "grad_norm": 0.7734375, "learning_rate": 0.00011233656265758866, "loss": 0.807, "step": 31054 }, { "epoch": 0.7974053193521001, "grad_norm": 0.78515625, "learning_rate": 0.00011233213254436914, "loss": 0.8559, "step": 31055 }, { "epoch": 0.797430996548022, "grad_norm": 0.67578125, "learning_rate": 0.00011232770240657282, "loss": 0.7079, "step": 31056 }, { "epoch": 0.7974566737439438, "grad_norm": 0.7734375, "learning_rate": 0.00011232327224420852, "loss": 0.962, "step": 31057 }, { "epoch": 0.7974823509398656, "grad_norm": 0.78515625, "learning_rate": 0.000112318842057285, "loss": 0.7749, "step": 31058 }, { "epoch": 0.7975080281357875, "grad_norm": 0.76171875, "learning_rate": 0.00011231441184581115, "loss": 0.7731, "step": 31059 }, { "epoch": 0.7975337053317092, "grad_norm": 0.76171875, "learning_rate": 0.00011230998160979582, "loss": 0.8015, "step": 31060 }, { "epoch": 0.797559382527631, "grad_norm": 0.79296875, "learning_rate": 0.00011230555134924778, "loss": 0.9038, "step": 31061 }, { "epoch": 0.7975850597235529, "grad_norm": 0.765625, "learning_rate": 0.00011230112106417589, "loss": 0.739, "step": 31062 }, { "epoch": 0.7976107369194747, "grad_norm": 0.8203125, "learning_rate": 0.00011229669075458902, "loss": 0.8179, "step": 31063 }, { "epoch": 0.7976364141153965, "grad_norm": 0.765625, "learning_rate": 0.00011229226042049589, "loss": 0.9568, "step": 31064 }, { "epoch": 0.7976620913113184, "grad_norm": 0.7578125, "learning_rate": 0.00011228783006190546, "loss": 0.7365, "step": 31065 }, { "epoch": 0.7976877685072402, "grad_norm": 0.796875, "learning_rate": 0.00011228339967882646, "loss": 0.8387, "step": 31066 }, { "epoch": 0.7977134457031619, "grad_norm": 0.78125, "learning_rate": 0.00011227896927126775, "loss": 0.8992, "step": 31067 }, { "epoch": 0.7977391228990838, "grad_norm": 0.7890625, "learning_rate": 0.00011227453883923818, "loss": 0.7254, "step": 31068 }, { "epoch": 0.7977648000950056, "grad_norm": 0.71875, "learning_rate": 0.00011227010838274654, "loss": 0.815, "step": 31069 }, { "epoch": 0.7977904772909274, "grad_norm": 0.796875, "learning_rate": 0.00011226567790180172, "loss": 0.7121, "step": 31070 }, { "epoch": 0.7978161544868493, "grad_norm": 0.83984375, "learning_rate": 0.00011226124739641248, "loss": 0.7386, "step": 31071 }, { "epoch": 0.7978418316827711, "grad_norm": 0.82421875, "learning_rate": 0.0001122568168665877, "loss": 0.8107, "step": 31072 }, { "epoch": 0.797867508878693, "grad_norm": 0.69921875, "learning_rate": 0.00011225238631233616, "loss": 0.7919, "step": 31073 }, { "epoch": 0.7978931860746147, "grad_norm": 0.796875, "learning_rate": 0.00011224795573366675, "loss": 0.8033, "step": 31074 }, { "epoch": 0.7979188632705365, "grad_norm": 0.69921875, "learning_rate": 0.00011224352513058827, "loss": 0.8154, "step": 31075 }, { "epoch": 0.7979445404664584, "grad_norm": 0.87890625, "learning_rate": 0.00011223909450310954, "loss": 0.8552, "step": 31076 }, { "epoch": 0.7979702176623802, "grad_norm": 0.77734375, "learning_rate": 0.00011223466385123942, "loss": 0.9105, "step": 31077 }, { "epoch": 0.797995894858302, "grad_norm": 0.77734375, "learning_rate": 0.00011223023317498667, "loss": 0.8027, "step": 31078 }, { "epoch": 0.7980215720542239, "grad_norm": 0.80859375, "learning_rate": 0.00011222580247436021, "loss": 0.8348, "step": 31079 }, { "epoch": 0.7980472492501456, "grad_norm": 0.82421875, "learning_rate": 0.00011222137174936882, "loss": 0.8909, "step": 31080 }, { "epoch": 0.7980729264460674, "grad_norm": 0.8671875, "learning_rate": 0.00011221694100002134, "loss": 0.828, "step": 31081 }, { "epoch": 0.7980986036419893, "grad_norm": 0.76953125, "learning_rate": 0.00011221251022632659, "loss": 0.7743, "step": 31082 }, { "epoch": 0.7981242808379111, "grad_norm": 0.78515625, "learning_rate": 0.00011220807942829343, "loss": 0.7755, "step": 31083 }, { "epoch": 0.7981499580338329, "grad_norm": 0.8359375, "learning_rate": 0.00011220364860593069, "loss": 0.9157, "step": 31084 }, { "epoch": 0.7981756352297548, "grad_norm": 0.79296875, "learning_rate": 0.00011219921775924716, "loss": 0.8558, "step": 31085 }, { "epoch": 0.7982013124256766, "grad_norm": 0.84375, "learning_rate": 0.00011219478688825166, "loss": 0.6581, "step": 31086 }, { "epoch": 0.7982269896215983, "grad_norm": 0.80859375, "learning_rate": 0.00011219035599295309, "loss": 0.9517, "step": 31087 }, { "epoch": 0.7982526668175202, "grad_norm": 0.71875, "learning_rate": 0.00011218592507336024, "loss": 0.8381, "step": 31088 }, { "epoch": 0.798278344013442, "grad_norm": 0.8359375, "learning_rate": 0.00011218149412948191, "loss": 0.757, "step": 31089 }, { "epoch": 0.7983040212093638, "grad_norm": 0.734375, "learning_rate": 0.00011217706316132699, "loss": 0.6842, "step": 31090 }, { "epoch": 0.7983296984052857, "grad_norm": 0.82421875, "learning_rate": 0.00011217263216890426, "loss": 0.8365, "step": 31091 }, { "epoch": 0.7983553756012075, "grad_norm": 0.77734375, "learning_rate": 0.00011216820115222259, "loss": 0.8519, "step": 31092 }, { "epoch": 0.7983810527971293, "grad_norm": 0.74609375, "learning_rate": 0.0001121637701112908, "loss": 0.8127, "step": 31093 }, { "epoch": 0.7984067299930511, "grad_norm": 0.83203125, "learning_rate": 0.0001121593390461177, "loss": 1.0024, "step": 31094 }, { "epoch": 0.7984324071889729, "grad_norm": 0.76953125, "learning_rate": 0.00011215490795671215, "loss": 0.8303, "step": 31095 }, { "epoch": 0.7984580843848947, "grad_norm": 0.765625, "learning_rate": 0.00011215047684308298, "loss": 0.733, "step": 31096 }, { "epoch": 0.7984837615808166, "grad_norm": 0.80078125, "learning_rate": 0.00011214604570523898, "loss": 0.8497, "step": 31097 }, { "epoch": 0.7985094387767384, "grad_norm": 0.765625, "learning_rate": 0.00011214161454318901, "loss": 0.7135, "step": 31098 }, { "epoch": 0.7985351159726602, "grad_norm": 0.75, "learning_rate": 0.0001121371833569419, "loss": 0.791, "step": 31099 }, { "epoch": 0.798560793168582, "grad_norm": 0.7734375, "learning_rate": 0.0001121327521465065, "loss": 0.8302, "step": 31100 }, { "epoch": 0.7985864703645038, "grad_norm": 0.88671875, "learning_rate": 0.0001121283209118916, "loss": 0.8822, "step": 31101 }, { "epoch": 0.7986121475604256, "grad_norm": 0.8203125, "learning_rate": 0.00011212388965310605, "loss": 0.8125, "step": 31102 }, { "epoch": 0.7986378247563475, "grad_norm": 0.8671875, "learning_rate": 0.00011211945837015871, "loss": 0.7778, "step": 31103 }, { "epoch": 0.7986635019522693, "grad_norm": 0.90625, "learning_rate": 0.00011211502706305834, "loss": 0.8869, "step": 31104 }, { "epoch": 0.7986891791481912, "grad_norm": 0.71484375, "learning_rate": 0.00011211059573181386, "loss": 0.7724, "step": 31105 }, { "epoch": 0.798714856344113, "grad_norm": 0.75, "learning_rate": 0.00011210616437643404, "loss": 0.7969, "step": 31106 }, { "epoch": 0.7987405335400347, "grad_norm": 0.76171875, "learning_rate": 0.0001121017329969277, "loss": 0.8261, "step": 31107 }, { "epoch": 0.7987662107359565, "grad_norm": 0.8359375, "learning_rate": 0.00011209730159330374, "loss": 0.918, "step": 31108 }, { "epoch": 0.7987918879318784, "grad_norm": 0.80859375, "learning_rate": 0.00011209287016557096, "loss": 0.6913, "step": 31109 }, { "epoch": 0.7988175651278002, "grad_norm": 0.765625, "learning_rate": 0.00011208843871373814, "loss": 0.7682, "step": 31110 }, { "epoch": 0.7988432423237221, "grad_norm": 0.80078125, "learning_rate": 0.00011208400723781415, "loss": 0.8254, "step": 31111 }, { "epoch": 0.7988689195196439, "grad_norm": 0.84375, "learning_rate": 0.00011207957573780787, "loss": 0.8294, "step": 31112 }, { "epoch": 0.7988945967155657, "grad_norm": 0.8046875, "learning_rate": 0.00011207514421372805, "loss": 0.8673, "step": 31113 }, { "epoch": 0.7989202739114875, "grad_norm": 0.93359375, "learning_rate": 0.00011207071266558357, "loss": 0.83, "step": 31114 }, { "epoch": 0.7989459511074093, "grad_norm": 0.71875, "learning_rate": 0.00011206628109338324, "loss": 0.8009, "step": 31115 }, { "epoch": 0.7989716283033311, "grad_norm": 0.84765625, "learning_rate": 0.00011206184949713593, "loss": 0.8058, "step": 31116 }, { "epoch": 0.798997305499253, "grad_norm": 0.79296875, "learning_rate": 0.00011205741787685045, "loss": 0.834, "step": 31117 }, { "epoch": 0.7990229826951748, "grad_norm": 0.75, "learning_rate": 0.00011205298623253557, "loss": 0.7256, "step": 31118 }, { "epoch": 0.7990486598910966, "grad_norm": 0.80078125, "learning_rate": 0.00011204855456420022, "loss": 0.8019, "step": 31119 }, { "epoch": 0.7990743370870184, "grad_norm": 0.79296875, "learning_rate": 0.00011204412287185317, "loss": 0.8656, "step": 31120 }, { "epoch": 0.7991000142829402, "grad_norm": 0.80078125, "learning_rate": 0.00011203969115550327, "loss": 0.7432, "step": 31121 }, { "epoch": 0.799125691478862, "grad_norm": 0.8125, "learning_rate": 0.0001120352594151594, "loss": 0.7652, "step": 31122 }, { "epoch": 0.7991513686747839, "grad_norm": 0.7890625, "learning_rate": 0.00011203082765083029, "loss": 0.7818, "step": 31123 }, { "epoch": 0.7991770458707057, "grad_norm": 0.76171875, "learning_rate": 0.00011202639586252484, "loss": 0.7825, "step": 31124 }, { "epoch": 0.7992027230666275, "grad_norm": 0.7734375, "learning_rate": 0.00011202196405025188, "loss": 0.8542, "step": 31125 }, { "epoch": 0.7992284002625494, "grad_norm": 0.78125, "learning_rate": 0.0001120175322140202, "loss": 0.7235, "step": 31126 }, { "epoch": 0.7992540774584711, "grad_norm": 0.8515625, "learning_rate": 0.0001120131003538387, "loss": 0.7653, "step": 31127 }, { "epoch": 0.7992797546543929, "grad_norm": 0.87890625, "learning_rate": 0.00011200866846971616, "loss": 0.9077, "step": 31128 }, { "epoch": 0.7993054318503148, "grad_norm": 0.77734375, "learning_rate": 0.00011200423656166146, "loss": 0.744, "step": 31129 }, { "epoch": 0.7993311090462366, "grad_norm": 0.7421875, "learning_rate": 0.00011199980462968335, "loss": 0.759, "step": 31130 }, { "epoch": 0.7993567862421584, "grad_norm": 0.7890625, "learning_rate": 0.00011199537267379074, "loss": 0.7641, "step": 31131 }, { "epoch": 0.7993824634380803, "grad_norm": 0.8046875, "learning_rate": 0.00011199094069399243, "loss": 0.8474, "step": 31132 }, { "epoch": 0.7994081406340021, "grad_norm": 0.78125, "learning_rate": 0.00011198650869029728, "loss": 0.8161, "step": 31133 }, { "epoch": 0.7994338178299238, "grad_norm": 0.79296875, "learning_rate": 0.00011198207666271406, "loss": 1.0011, "step": 31134 }, { "epoch": 0.7994594950258457, "grad_norm": 0.7265625, "learning_rate": 0.00011197764461125168, "loss": 0.7805, "step": 31135 }, { "epoch": 0.7994851722217675, "grad_norm": 0.83984375, "learning_rate": 0.00011197321253591892, "loss": 0.7634, "step": 31136 }, { "epoch": 0.7995108494176894, "grad_norm": 0.76953125, "learning_rate": 0.00011196878043672463, "loss": 0.7947, "step": 31137 }, { "epoch": 0.7995365266136112, "grad_norm": 0.7734375, "learning_rate": 0.00011196434831367765, "loss": 0.9936, "step": 31138 }, { "epoch": 0.799562203809533, "grad_norm": 0.80859375, "learning_rate": 0.00011195991616678678, "loss": 0.9479, "step": 31139 }, { "epoch": 0.7995878810054547, "grad_norm": 0.875, "learning_rate": 0.0001119554839960609, "loss": 0.8846, "step": 31140 }, { "epoch": 0.7996135582013766, "grad_norm": 0.875, "learning_rate": 0.00011195105180150883, "loss": 0.8503, "step": 31141 }, { "epoch": 0.7996392353972984, "grad_norm": 0.8359375, "learning_rate": 0.0001119466195831394, "loss": 0.9199, "step": 31142 }, { "epoch": 0.7996649125932203, "grad_norm": 0.796875, "learning_rate": 0.00011194218734096141, "loss": 0.8568, "step": 31143 }, { "epoch": 0.7996905897891421, "grad_norm": 0.78125, "learning_rate": 0.0001119377550749837, "loss": 0.9072, "step": 31144 }, { "epoch": 0.7997162669850639, "grad_norm": 0.75390625, "learning_rate": 0.00011193332278521516, "loss": 0.9273, "step": 31145 }, { "epoch": 0.7997419441809858, "grad_norm": 0.83984375, "learning_rate": 0.00011192889047166458, "loss": 0.904, "step": 31146 }, { "epoch": 0.7997676213769075, "grad_norm": 0.765625, "learning_rate": 0.00011192445813434076, "loss": 0.6912, "step": 31147 }, { "epoch": 0.7997932985728293, "grad_norm": 0.78515625, "learning_rate": 0.00011192002577325262, "loss": 0.846, "step": 31148 }, { "epoch": 0.7998189757687512, "grad_norm": 0.75, "learning_rate": 0.00011191559338840894, "loss": 0.8739, "step": 31149 }, { "epoch": 0.799844652964673, "grad_norm": 0.8671875, "learning_rate": 0.00011191116097981853, "loss": 0.9591, "step": 31150 }, { "epoch": 0.7998703301605948, "grad_norm": 0.75, "learning_rate": 0.00011190672854749028, "loss": 0.7577, "step": 31151 }, { "epoch": 0.7998960073565167, "grad_norm": 0.8125, "learning_rate": 0.00011190229609143296, "loss": 0.8407, "step": 31152 }, { "epoch": 0.7999216845524385, "grad_norm": 0.75390625, "learning_rate": 0.00011189786361165548, "loss": 0.8808, "step": 31153 }, { "epoch": 0.7999473617483602, "grad_norm": 0.89453125, "learning_rate": 0.00011189343110816662, "loss": 0.9339, "step": 31154 }, { "epoch": 0.7999730389442821, "grad_norm": 0.75, "learning_rate": 0.00011188899858097523, "loss": 0.789, "step": 31155 }, { "epoch": 0.7999987161402039, "grad_norm": 0.73046875, "learning_rate": 0.00011188456603009014, "loss": 0.7595, "step": 31156 }, { "epoch": 0.8000243933361257, "grad_norm": 0.75390625, "learning_rate": 0.00011188013345552017, "loss": 0.8743, "step": 31157 }, { "epoch": 0.8000500705320476, "grad_norm": 0.71484375, "learning_rate": 0.00011187570085727416, "loss": 0.6794, "step": 31158 }, { "epoch": 0.8000757477279694, "grad_norm": 0.7421875, "learning_rate": 0.00011187126823536098, "loss": 0.8968, "step": 31159 }, { "epoch": 0.8001014249238911, "grad_norm": 0.80078125, "learning_rate": 0.00011186683558978941, "loss": 0.8828, "step": 31160 }, { "epoch": 0.800127102119813, "grad_norm": 0.7265625, "learning_rate": 0.00011186240292056831, "loss": 0.7428, "step": 31161 }, { "epoch": 0.8001527793157348, "grad_norm": 0.77734375, "learning_rate": 0.00011185797022770655, "loss": 0.7329, "step": 31162 }, { "epoch": 0.8001784565116566, "grad_norm": 0.80078125, "learning_rate": 0.00011185353751121288, "loss": 0.7455, "step": 31163 }, { "epoch": 0.8002041337075785, "grad_norm": 0.76171875, "learning_rate": 0.0001118491047710962, "loss": 0.7923, "step": 31164 }, { "epoch": 0.8002298109035003, "grad_norm": 0.75390625, "learning_rate": 0.00011184467200736533, "loss": 0.7507, "step": 31165 }, { "epoch": 0.8002554880994222, "grad_norm": 0.79296875, "learning_rate": 0.00011184023922002907, "loss": 0.8057, "step": 31166 }, { "epoch": 0.8002811652953439, "grad_norm": 0.84375, "learning_rate": 0.0001118358064090963, "loss": 0.7379, "step": 31167 }, { "epoch": 0.8003068424912657, "grad_norm": 0.73828125, "learning_rate": 0.00011183137357457585, "loss": 0.8234, "step": 31168 }, { "epoch": 0.8003325196871875, "grad_norm": 0.765625, "learning_rate": 0.00011182694071647653, "loss": 0.7629, "step": 31169 }, { "epoch": 0.8003581968831094, "grad_norm": 0.87109375, "learning_rate": 0.0001118225078348072, "loss": 0.7261, "step": 31170 }, { "epoch": 0.8003838740790312, "grad_norm": 0.9609375, "learning_rate": 0.00011181807492957666, "loss": 0.7847, "step": 31171 }, { "epoch": 0.8004095512749531, "grad_norm": 0.78125, "learning_rate": 0.00011181364200079376, "loss": 0.8114, "step": 31172 }, { "epoch": 0.8004352284708749, "grad_norm": 0.78515625, "learning_rate": 0.00011180920904846734, "loss": 0.8056, "step": 31173 }, { "epoch": 0.8004609056667966, "grad_norm": 0.78515625, "learning_rate": 0.00011180477607260625, "loss": 0.831, "step": 31174 }, { "epoch": 0.8004865828627185, "grad_norm": 1.4375, "learning_rate": 0.00011180034307321932, "loss": 0.7177, "step": 31175 }, { "epoch": 0.8005122600586403, "grad_norm": 0.81640625, "learning_rate": 0.00011179591005031533, "loss": 0.8903, "step": 31176 }, { "epoch": 0.8005379372545621, "grad_norm": 0.734375, "learning_rate": 0.00011179147700390317, "loss": 0.7163, "step": 31177 }, { "epoch": 0.800563614450484, "grad_norm": 0.8203125, "learning_rate": 0.00011178704393399169, "loss": 0.876, "step": 31178 }, { "epoch": 0.8005892916464058, "grad_norm": 0.8046875, "learning_rate": 0.00011178261084058965, "loss": 0.902, "step": 31179 }, { "epoch": 0.8006149688423275, "grad_norm": 0.79296875, "learning_rate": 0.00011177817772370598, "loss": 0.7961, "step": 31180 }, { "epoch": 0.8006406460382494, "grad_norm": 0.76171875, "learning_rate": 0.00011177374458334943, "loss": 0.7817, "step": 31181 }, { "epoch": 0.8006663232341712, "grad_norm": 0.8203125, "learning_rate": 0.00011176931141952888, "loss": 0.8456, "step": 31182 }, { "epoch": 0.800692000430093, "grad_norm": 0.79296875, "learning_rate": 0.00011176487823225319, "loss": 0.8838, "step": 31183 }, { "epoch": 0.8007176776260149, "grad_norm": 0.75, "learning_rate": 0.00011176044502153111, "loss": 0.7149, "step": 31184 }, { "epoch": 0.8007433548219367, "grad_norm": 0.765625, "learning_rate": 0.00011175601178737154, "loss": 0.8563, "step": 31185 }, { "epoch": 0.8007690320178585, "grad_norm": 0.8046875, "learning_rate": 0.0001117515785297833, "loss": 0.7379, "step": 31186 }, { "epoch": 0.8007947092137803, "grad_norm": 0.8359375, "learning_rate": 0.00011174714524877523, "loss": 0.8819, "step": 31187 }, { "epoch": 0.8008203864097021, "grad_norm": 0.77734375, "learning_rate": 0.0001117427119443562, "loss": 0.7321, "step": 31188 }, { "epoch": 0.8008460636056239, "grad_norm": 0.8203125, "learning_rate": 0.00011173827861653495, "loss": 0.8901, "step": 31189 }, { "epoch": 0.8008717408015458, "grad_norm": 0.76953125, "learning_rate": 0.00011173384526532038, "loss": 0.7836, "step": 31190 }, { "epoch": 0.8008974179974676, "grad_norm": 0.83203125, "learning_rate": 0.00011172941189072132, "loss": 1.0131, "step": 31191 }, { "epoch": 0.8009230951933894, "grad_norm": 0.7578125, "learning_rate": 0.00011172497849274659, "loss": 0.8841, "step": 31192 }, { "epoch": 0.8009487723893112, "grad_norm": 0.71484375, "learning_rate": 0.00011172054507140505, "loss": 0.7767, "step": 31193 }, { "epoch": 0.800974449585233, "grad_norm": 0.80859375, "learning_rate": 0.00011171611162670557, "loss": 0.9495, "step": 31194 }, { "epoch": 0.8010001267811548, "grad_norm": 0.890625, "learning_rate": 0.00011171167815865689, "loss": 0.9129, "step": 31195 }, { "epoch": 0.8010258039770767, "grad_norm": 0.70703125, "learning_rate": 0.0001117072446672679, "loss": 0.7605, "step": 31196 }, { "epoch": 0.8010514811729985, "grad_norm": 0.79296875, "learning_rate": 0.00011170281115254742, "loss": 0.8397, "step": 31197 }, { "epoch": 0.8010771583689204, "grad_norm": 0.75390625, "learning_rate": 0.00011169837761450428, "loss": 0.7396, "step": 31198 }, { "epoch": 0.8011028355648422, "grad_norm": 0.765625, "learning_rate": 0.00011169394405314734, "loss": 0.7778, "step": 31199 }, { "epoch": 0.8011285127607639, "grad_norm": 0.76171875, "learning_rate": 0.00011168951046848546, "loss": 0.812, "step": 31200 }, { "epoch": 0.8011541899566857, "grad_norm": 0.76171875, "learning_rate": 0.00011168507686052742, "loss": 0.7891, "step": 31201 }, { "epoch": 0.8011798671526076, "grad_norm": 0.796875, "learning_rate": 0.0001116806432292821, "loss": 0.8013, "step": 31202 }, { "epoch": 0.8012055443485294, "grad_norm": 0.66015625, "learning_rate": 0.00011167620957475824, "loss": 0.6219, "step": 31203 }, { "epoch": 0.8012312215444513, "grad_norm": 0.8359375, "learning_rate": 0.00011167177589696481, "loss": 0.8364, "step": 31204 }, { "epoch": 0.8012568987403731, "grad_norm": 0.72265625, "learning_rate": 0.00011166734219591058, "loss": 0.7966, "step": 31205 }, { "epoch": 0.8012825759362949, "grad_norm": 0.8046875, "learning_rate": 0.00011166290847160437, "loss": 0.7794, "step": 31206 }, { "epoch": 0.8013082531322167, "grad_norm": 0.73828125, "learning_rate": 0.00011165847472405504, "loss": 0.8072, "step": 31207 }, { "epoch": 0.8013339303281385, "grad_norm": 0.82421875, "learning_rate": 0.00011165404095327145, "loss": 0.9305, "step": 31208 }, { "epoch": 0.8013596075240603, "grad_norm": 0.734375, "learning_rate": 0.0001116496071592624, "loss": 0.8029, "step": 31209 }, { "epoch": 0.8013852847199822, "grad_norm": 0.765625, "learning_rate": 0.00011164517334203672, "loss": 0.8203, "step": 31210 }, { "epoch": 0.801410961915904, "grad_norm": 0.76953125, "learning_rate": 0.00011164073950160326, "loss": 0.787, "step": 31211 }, { "epoch": 0.8014366391118258, "grad_norm": 0.80859375, "learning_rate": 0.00011163630563797088, "loss": 0.7167, "step": 31212 }, { "epoch": 0.8014623163077476, "grad_norm": 0.765625, "learning_rate": 0.00011163187175114838, "loss": 0.7565, "step": 31213 }, { "epoch": 0.8014879935036694, "grad_norm": 0.87890625, "learning_rate": 0.0001116274378411446, "loss": 0.8024, "step": 31214 }, { "epoch": 0.8015136706995912, "grad_norm": 0.8046875, "learning_rate": 0.0001116230039079684, "loss": 0.8583, "step": 31215 }, { "epoch": 0.8015393478955131, "grad_norm": 0.7890625, "learning_rate": 0.0001116185699516286, "loss": 0.8499, "step": 31216 }, { "epoch": 0.8015650250914349, "grad_norm": 0.80078125, "learning_rate": 0.00011161413597213404, "loss": 0.7731, "step": 31217 }, { "epoch": 0.8015907022873567, "grad_norm": 0.7890625, "learning_rate": 0.00011160970196949357, "loss": 0.7345, "step": 31218 }, { "epoch": 0.8016163794832786, "grad_norm": 0.83984375, "learning_rate": 0.00011160526794371597, "loss": 0.8216, "step": 31219 }, { "epoch": 0.8016420566792003, "grad_norm": 0.76953125, "learning_rate": 0.00011160083389481015, "loss": 0.7915, "step": 31220 }, { "epoch": 0.8016677338751221, "grad_norm": 0.6796875, "learning_rate": 0.00011159639982278491, "loss": 0.8327, "step": 31221 }, { "epoch": 0.801693411071044, "grad_norm": 0.85546875, "learning_rate": 0.00011159196572764912, "loss": 0.7842, "step": 31222 }, { "epoch": 0.8017190882669658, "grad_norm": 0.765625, "learning_rate": 0.00011158753160941158, "loss": 0.8674, "step": 31223 }, { "epoch": 0.8017447654628876, "grad_norm": 0.8359375, "learning_rate": 0.00011158309746808108, "loss": 0.8157, "step": 31224 }, { "epoch": 0.8017704426588095, "grad_norm": 0.7734375, "learning_rate": 0.00011157866330366657, "loss": 0.7017, "step": 31225 }, { "epoch": 0.8017961198547313, "grad_norm": 0.8984375, "learning_rate": 0.00011157422911617684, "loss": 0.7781, "step": 31226 }, { "epoch": 0.801821797050653, "grad_norm": 0.80078125, "learning_rate": 0.00011156979490562067, "loss": 0.8302, "step": 31227 }, { "epoch": 0.8018474742465749, "grad_norm": 0.78515625, "learning_rate": 0.00011156536067200698, "loss": 0.813, "step": 31228 }, { "epoch": 0.8018731514424967, "grad_norm": 0.73828125, "learning_rate": 0.00011156092641534455, "loss": 0.7662, "step": 31229 }, { "epoch": 0.8018988286384185, "grad_norm": 0.78515625, "learning_rate": 0.00011155649213564223, "loss": 0.8085, "step": 31230 }, { "epoch": 0.8019245058343404, "grad_norm": 0.80078125, "learning_rate": 0.00011155205783290889, "loss": 0.7703, "step": 31231 }, { "epoch": 0.8019501830302622, "grad_norm": 0.828125, "learning_rate": 0.0001115476235071533, "loss": 0.8923, "step": 31232 }, { "epoch": 0.801975860226184, "grad_norm": 0.77734375, "learning_rate": 0.0001115431891583844, "loss": 0.7379, "step": 31233 }, { "epoch": 0.8020015374221058, "grad_norm": 0.82421875, "learning_rate": 0.00011153875478661095, "loss": 0.8632, "step": 31234 }, { "epoch": 0.8020272146180276, "grad_norm": 0.76171875, "learning_rate": 0.0001115343203918418, "loss": 0.7402, "step": 31235 }, { "epoch": 0.8020528918139495, "grad_norm": 0.97265625, "learning_rate": 0.00011152988597408579, "loss": 0.7898, "step": 31236 }, { "epoch": 0.8020785690098713, "grad_norm": 0.859375, "learning_rate": 0.00011152545153335174, "loss": 0.882, "step": 31237 }, { "epoch": 0.8021042462057931, "grad_norm": 0.85546875, "learning_rate": 0.0001115210170696485, "loss": 0.8332, "step": 31238 }, { "epoch": 0.802129923401715, "grad_norm": 0.8125, "learning_rate": 0.00011151658258298494, "loss": 0.8668, "step": 31239 }, { "epoch": 0.8021556005976367, "grad_norm": 0.75390625, "learning_rate": 0.00011151214807336985, "loss": 0.7059, "step": 31240 }, { "epoch": 0.8021812777935585, "grad_norm": 0.859375, "learning_rate": 0.00011150771354081213, "loss": 0.8581, "step": 31241 }, { "epoch": 0.8022069549894804, "grad_norm": 0.7734375, "learning_rate": 0.00011150327898532056, "loss": 0.7899, "step": 31242 }, { "epoch": 0.8022326321854022, "grad_norm": 0.71875, "learning_rate": 0.00011149884440690395, "loss": 0.7824, "step": 31243 }, { "epoch": 0.802258309381324, "grad_norm": 0.8125, "learning_rate": 0.00011149440980557122, "loss": 0.8297, "step": 31244 }, { "epoch": 0.8022839865772459, "grad_norm": 0.7890625, "learning_rate": 0.00011148997518133117, "loss": 0.8739, "step": 31245 }, { "epoch": 0.8023096637731677, "grad_norm": 0.84765625, "learning_rate": 0.00011148554053419262, "loss": 0.7916, "step": 31246 }, { "epoch": 0.8023353409690894, "grad_norm": 0.84765625, "learning_rate": 0.00011148110586416445, "loss": 1.0033, "step": 31247 }, { "epoch": 0.8023610181650113, "grad_norm": 0.8046875, "learning_rate": 0.00011147667117125544, "loss": 0.7572, "step": 31248 }, { "epoch": 0.8023866953609331, "grad_norm": 0.8828125, "learning_rate": 0.00011147223645547448, "loss": 0.8845, "step": 31249 }, { "epoch": 0.8024123725568549, "grad_norm": 0.796875, "learning_rate": 0.00011146780171683042, "loss": 0.8943, "step": 31250 }, { "epoch": 0.8024380497527768, "grad_norm": 0.84375, "learning_rate": 0.00011146336695533202, "loss": 0.9117, "step": 31251 }, { "epoch": 0.8024637269486986, "grad_norm": 0.8203125, "learning_rate": 0.00011145893217098817, "loss": 0.8424, "step": 31252 }, { "epoch": 0.8024894041446203, "grad_norm": 0.75, "learning_rate": 0.00011145449736380773, "loss": 0.7928, "step": 31253 }, { "epoch": 0.8025150813405422, "grad_norm": 0.7890625, "learning_rate": 0.00011145006253379951, "loss": 0.8108, "step": 31254 }, { "epoch": 0.802540758536464, "grad_norm": 0.80859375, "learning_rate": 0.00011144562768097233, "loss": 0.9099, "step": 31255 }, { "epoch": 0.8025664357323858, "grad_norm": 0.76953125, "learning_rate": 0.00011144119280533503, "loss": 0.7443, "step": 31256 }, { "epoch": 0.8025921129283077, "grad_norm": 0.734375, "learning_rate": 0.0001114367579068965, "loss": 0.6807, "step": 31257 }, { "epoch": 0.8026177901242295, "grad_norm": 0.77734375, "learning_rate": 0.00011143232298566554, "loss": 0.7984, "step": 31258 }, { "epoch": 0.8026434673201513, "grad_norm": 0.77734375, "learning_rate": 0.00011142788804165096, "loss": 0.8226, "step": 31259 }, { "epoch": 0.8026691445160731, "grad_norm": 0.87109375, "learning_rate": 0.0001114234530748617, "loss": 0.8502, "step": 31260 }, { "epoch": 0.8026948217119949, "grad_norm": 0.8515625, "learning_rate": 0.00011141901808530648, "loss": 0.905, "step": 31261 }, { "epoch": 0.8027204989079167, "grad_norm": 0.8515625, "learning_rate": 0.0001114145830729942, "loss": 0.6318, "step": 31262 }, { "epoch": 0.8027461761038386, "grad_norm": 0.77734375, "learning_rate": 0.00011141014803793368, "loss": 0.8963, "step": 31263 }, { "epoch": 0.8027718532997604, "grad_norm": 0.6796875, "learning_rate": 0.00011140571298013376, "loss": 0.6852, "step": 31264 }, { "epoch": 0.8027975304956823, "grad_norm": 0.72265625, "learning_rate": 0.00011140127789960329, "loss": 0.8018, "step": 31265 }, { "epoch": 0.8028232076916041, "grad_norm": 0.8203125, "learning_rate": 0.00011139684279635112, "loss": 0.8235, "step": 31266 }, { "epoch": 0.8028488848875258, "grad_norm": 0.796875, "learning_rate": 0.00011139240767038606, "loss": 0.7624, "step": 31267 }, { "epoch": 0.8028745620834477, "grad_norm": 0.79296875, "learning_rate": 0.00011138797252171698, "loss": 0.822, "step": 31268 }, { "epoch": 0.8029002392793695, "grad_norm": 0.93359375, "learning_rate": 0.00011138353735035265, "loss": 0.8804, "step": 31269 }, { "epoch": 0.8029259164752913, "grad_norm": 0.765625, "learning_rate": 0.00011137910215630198, "loss": 0.7561, "step": 31270 }, { "epoch": 0.8029515936712132, "grad_norm": 0.74609375, "learning_rate": 0.00011137466693957381, "loss": 0.7841, "step": 31271 }, { "epoch": 0.802977270867135, "grad_norm": 0.75390625, "learning_rate": 0.00011137023170017693, "loss": 0.8388, "step": 31272 }, { "epoch": 0.8030029480630567, "grad_norm": 0.796875, "learning_rate": 0.00011136579643812024, "loss": 0.8293, "step": 31273 }, { "epoch": 0.8030286252589786, "grad_norm": 0.7578125, "learning_rate": 0.00011136136115341253, "loss": 0.7188, "step": 31274 }, { "epoch": 0.8030543024549004, "grad_norm": 0.79296875, "learning_rate": 0.00011135692584606263, "loss": 0.8149, "step": 31275 }, { "epoch": 0.8030799796508222, "grad_norm": 0.74609375, "learning_rate": 0.00011135249051607942, "loss": 0.7062, "step": 31276 }, { "epoch": 0.8031056568467441, "grad_norm": 0.79296875, "learning_rate": 0.00011134805516347168, "loss": 0.7815, "step": 31277 }, { "epoch": 0.8031313340426659, "grad_norm": 0.8671875, "learning_rate": 0.00011134361978824835, "loss": 0.7481, "step": 31278 }, { "epoch": 0.8031570112385877, "grad_norm": 0.859375, "learning_rate": 0.00011133918439041819, "loss": 1.0314, "step": 31279 }, { "epoch": 0.8031826884345095, "grad_norm": 0.83984375, "learning_rate": 0.00011133474896999007, "loss": 0.8458, "step": 31280 }, { "epoch": 0.8032083656304313, "grad_norm": 0.8046875, "learning_rate": 0.00011133031352697281, "loss": 0.8374, "step": 31281 }, { "epoch": 0.8032340428263531, "grad_norm": 0.703125, "learning_rate": 0.00011132587806137527, "loss": 0.7612, "step": 31282 }, { "epoch": 0.803259720022275, "grad_norm": 0.890625, "learning_rate": 0.00011132144257320625, "loss": 0.8692, "step": 31283 }, { "epoch": 0.8032853972181968, "grad_norm": 0.7890625, "learning_rate": 0.00011131700706247463, "loss": 0.7994, "step": 31284 }, { "epoch": 0.8033110744141186, "grad_norm": 0.77734375, "learning_rate": 0.00011131257152918926, "loss": 0.9498, "step": 31285 }, { "epoch": 0.8033367516100405, "grad_norm": 0.82421875, "learning_rate": 0.00011130813597335891, "loss": 0.834, "step": 31286 }, { "epoch": 0.8033624288059622, "grad_norm": 0.78515625, "learning_rate": 0.00011130370039499252, "loss": 0.8033, "step": 31287 }, { "epoch": 0.803388106001884, "grad_norm": 0.73828125, "learning_rate": 0.00011129926479409884, "loss": 0.8042, "step": 31288 }, { "epoch": 0.8034137831978059, "grad_norm": 0.67578125, "learning_rate": 0.00011129482917068676, "loss": 0.7272, "step": 31289 }, { "epoch": 0.8034394603937277, "grad_norm": 0.77734375, "learning_rate": 0.00011129039352476512, "loss": 0.7683, "step": 31290 }, { "epoch": 0.8034651375896495, "grad_norm": 0.8046875, "learning_rate": 0.00011128595785634271, "loss": 0.778, "step": 31291 }, { "epoch": 0.8034908147855714, "grad_norm": 0.796875, "learning_rate": 0.00011128152216542842, "loss": 0.8003, "step": 31292 }, { "epoch": 0.8035164919814931, "grad_norm": 0.78125, "learning_rate": 0.00011127708645203108, "loss": 0.8224, "step": 31293 }, { "epoch": 0.8035421691774149, "grad_norm": 0.8203125, "learning_rate": 0.00011127265071615953, "loss": 0.9056, "step": 31294 }, { "epoch": 0.8035678463733368, "grad_norm": 0.80078125, "learning_rate": 0.0001112682149578226, "loss": 0.7599, "step": 31295 }, { "epoch": 0.8035935235692586, "grad_norm": 0.74609375, "learning_rate": 0.00011126377917702913, "loss": 0.8761, "step": 31296 }, { "epoch": 0.8036192007651805, "grad_norm": 0.7578125, "learning_rate": 0.00011125934337378797, "loss": 0.8427, "step": 31297 }, { "epoch": 0.8036448779611023, "grad_norm": 0.734375, "learning_rate": 0.00011125490754810794, "loss": 0.7499, "step": 31298 }, { "epoch": 0.8036705551570241, "grad_norm": 0.87109375, "learning_rate": 0.00011125047169999792, "loss": 0.8219, "step": 31299 }, { "epoch": 0.8036962323529458, "grad_norm": 0.80859375, "learning_rate": 0.00011124603582946674, "loss": 0.8602, "step": 31300 }, { "epoch": 0.8037219095488677, "grad_norm": 0.78515625, "learning_rate": 0.00011124159993652319, "loss": 0.7473, "step": 31301 }, { "epoch": 0.8037475867447895, "grad_norm": 1.0078125, "learning_rate": 0.00011123716402117615, "loss": 0.8305, "step": 31302 }, { "epoch": 0.8037732639407114, "grad_norm": 0.79296875, "learning_rate": 0.00011123272808343448, "loss": 0.7608, "step": 31303 }, { "epoch": 0.8037989411366332, "grad_norm": 0.75, "learning_rate": 0.00011122829212330696, "loss": 0.9022, "step": 31304 }, { "epoch": 0.803824618332555, "grad_norm": 0.84375, "learning_rate": 0.0001112238561408025, "loss": 0.9232, "step": 31305 }, { "epoch": 0.8038502955284769, "grad_norm": 0.828125, "learning_rate": 0.00011121942013592992, "loss": 0.8816, "step": 31306 }, { "epoch": 0.8038759727243986, "grad_norm": 0.80859375, "learning_rate": 0.00011121498410869801, "loss": 0.8702, "step": 31307 }, { "epoch": 0.8039016499203204, "grad_norm": 0.83203125, "learning_rate": 0.00011121054805911568, "loss": 0.7947, "step": 31308 }, { "epoch": 0.8039273271162423, "grad_norm": 0.765625, "learning_rate": 0.00011120611198719173, "loss": 0.9225, "step": 31309 }, { "epoch": 0.8039530043121641, "grad_norm": 0.73828125, "learning_rate": 0.00011120167589293503, "loss": 0.6656, "step": 31310 }, { "epoch": 0.8039786815080859, "grad_norm": 0.75, "learning_rate": 0.00011119723977635437, "loss": 0.7239, "step": 31311 }, { "epoch": 0.8040043587040078, "grad_norm": 0.8359375, "learning_rate": 0.00011119280363745864, "loss": 0.8729, "step": 31312 }, { "epoch": 0.8040300358999295, "grad_norm": 0.703125, "learning_rate": 0.00011118836747625667, "loss": 0.7709, "step": 31313 }, { "epoch": 0.8040557130958513, "grad_norm": 0.71484375, "learning_rate": 0.00011118393129275726, "loss": 0.7436, "step": 31314 }, { "epoch": 0.8040813902917732, "grad_norm": 0.8125, "learning_rate": 0.00011117949508696932, "loss": 0.8732, "step": 31315 }, { "epoch": 0.804107067487695, "grad_norm": 0.79296875, "learning_rate": 0.00011117505885890164, "loss": 0.8441, "step": 31316 }, { "epoch": 0.8041327446836168, "grad_norm": 0.81640625, "learning_rate": 0.00011117062260856308, "loss": 0.8718, "step": 31317 }, { "epoch": 0.8041584218795387, "grad_norm": 0.7734375, "learning_rate": 0.0001111661863359625, "loss": 0.8045, "step": 31318 }, { "epoch": 0.8041840990754605, "grad_norm": 0.796875, "learning_rate": 0.00011116175004110872, "loss": 0.8479, "step": 31319 }, { "epoch": 0.8042097762713822, "grad_norm": 0.87890625, "learning_rate": 0.00011115731372401055, "loss": 0.7411, "step": 31320 }, { "epoch": 0.8042354534673041, "grad_norm": 0.7265625, "learning_rate": 0.00011115287738467686, "loss": 0.7932, "step": 31321 }, { "epoch": 0.8042611306632259, "grad_norm": 1.0703125, "learning_rate": 0.00011114844102311652, "loss": 0.8519, "step": 31322 }, { "epoch": 0.8042868078591477, "grad_norm": 0.7578125, "learning_rate": 0.00011114400463933832, "loss": 0.9141, "step": 31323 }, { "epoch": 0.8043124850550696, "grad_norm": 1.046875, "learning_rate": 0.00011113956823335113, "loss": 0.8598, "step": 31324 }, { "epoch": 0.8043381622509914, "grad_norm": 0.81640625, "learning_rate": 0.0001111351318051638, "loss": 0.7984, "step": 31325 }, { "epoch": 0.8043638394469133, "grad_norm": 0.78125, "learning_rate": 0.00011113069535478515, "loss": 0.8168, "step": 31326 }, { "epoch": 0.804389516642835, "grad_norm": 0.74609375, "learning_rate": 0.00011112625888222403, "loss": 0.9297, "step": 31327 }, { "epoch": 0.8044151938387568, "grad_norm": 0.78515625, "learning_rate": 0.00011112182238748927, "loss": 0.8622, "step": 31328 }, { "epoch": 0.8044408710346787, "grad_norm": 0.81640625, "learning_rate": 0.00011111738587058973, "loss": 0.7316, "step": 31329 }, { "epoch": 0.8044665482306005, "grad_norm": 0.75390625, "learning_rate": 0.00011111294933153427, "loss": 0.8149, "step": 31330 }, { "epoch": 0.8044922254265223, "grad_norm": 0.83203125, "learning_rate": 0.00011110851277033166, "loss": 0.9185, "step": 31331 }, { "epoch": 0.8045179026224442, "grad_norm": 0.6953125, "learning_rate": 0.00011110407618699081, "loss": 0.7046, "step": 31332 }, { "epoch": 0.8045435798183659, "grad_norm": 0.76171875, "learning_rate": 0.00011109963958152055, "loss": 0.8389, "step": 31333 }, { "epoch": 0.8045692570142877, "grad_norm": 0.73828125, "learning_rate": 0.0001110952029539297, "loss": 0.7398, "step": 31334 }, { "epoch": 0.8045949342102096, "grad_norm": 0.78515625, "learning_rate": 0.00011109076630422713, "loss": 0.809, "step": 31335 }, { "epoch": 0.8046206114061314, "grad_norm": 0.78515625, "learning_rate": 0.00011108632963242163, "loss": 0.8308, "step": 31336 }, { "epoch": 0.8046462886020532, "grad_norm": 0.7734375, "learning_rate": 0.00011108189293852209, "loss": 0.7306, "step": 31337 }, { "epoch": 0.8046719657979751, "grad_norm": 0.77734375, "learning_rate": 0.00011107745622253734, "loss": 0.9142, "step": 31338 }, { "epoch": 0.8046976429938969, "grad_norm": 0.77734375, "learning_rate": 0.00011107301948447623, "loss": 0.7894, "step": 31339 }, { "epoch": 0.8047233201898186, "grad_norm": 0.765625, "learning_rate": 0.00011106858272434759, "loss": 0.8178, "step": 31340 }, { "epoch": 0.8047489973857405, "grad_norm": 0.91015625, "learning_rate": 0.00011106414594216023, "loss": 0.9009, "step": 31341 }, { "epoch": 0.8047746745816623, "grad_norm": 0.73046875, "learning_rate": 0.00011105970913792307, "loss": 0.709, "step": 31342 }, { "epoch": 0.8048003517775841, "grad_norm": 1.046875, "learning_rate": 0.0001110552723116449, "loss": 0.9397, "step": 31343 }, { "epoch": 0.804826028973506, "grad_norm": 0.72265625, "learning_rate": 0.00011105083546333454, "loss": 0.9221, "step": 31344 }, { "epoch": 0.8048517061694278, "grad_norm": 0.8203125, "learning_rate": 0.0001110463985930009, "loss": 0.8746, "step": 31345 }, { "epoch": 0.8048773833653496, "grad_norm": 0.75, "learning_rate": 0.0001110419617006528, "loss": 0.7652, "step": 31346 }, { "epoch": 0.8049030605612714, "grad_norm": 0.6953125, "learning_rate": 0.00011103752478629903, "loss": 0.7956, "step": 31347 }, { "epoch": 0.8049287377571932, "grad_norm": 0.85546875, "learning_rate": 0.0001110330878499485, "loss": 0.8106, "step": 31348 }, { "epoch": 0.804954414953115, "grad_norm": 0.74609375, "learning_rate": 0.00011102865089160999, "loss": 0.8696, "step": 31349 }, { "epoch": 0.8049800921490369, "grad_norm": 0.75390625, "learning_rate": 0.00011102421391129238, "loss": 0.6691, "step": 31350 }, { "epoch": 0.8050057693449587, "grad_norm": 0.74609375, "learning_rate": 0.00011101977690900454, "loss": 0.6807, "step": 31351 }, { "epoch": 0.8050314465408805, "grad_norm": 0.734375, "learning_rate": 0.00011101533988475525, "loss": 0.6452, "step": 31352 }, { "epoch": 0.8050571237368023, "grad_norm": 0.8125, "learning_rate": 0.0001110109028385534, "loss": 0.7139, "step": 31353 }, { "epoch": 0.8050828009327241, "grad_norm": 0.69921875, "learning_rate": 0.00011100646577040778, "loss": 0.7015, "step": 31354 }, { "epoch": 0.8051084781286459, "grad_norm": 0.76953125, "learning_rate": 0.0001110020286803273, "loss": 0.8149, "step": 31355 }, { "epoch": 0.8051341553245678, "grad_norm": 0.91015625, "learning_rate": 0.00011099759156832077, "loss": 0.9466, "step": 31356 }, { "epoch": 0.8051598325204896, "grad_norm": 0.77734375, "learning_rate": 0.00011099315443439702, "loss": 0.8751, "step": 31357 }, { "epoch": 0.8051855097164115, "grad_norm": 0.859375, "learning_rate": 0.0001109887172785649, "loss": 0.8818, "step": 31358 }, { "epoch": 0.8052111869123333, "grad_norm": 0.9453125, "learning_rate": 0.00011098428010083331, "loss": 0.8399, "step": 31359 }, { "epoch": 0.805236864108255, "grad_norm": 0.84765625, "learning_rate": 0.000110979842901211, "loss": 0.7977, "step": 31360 }, { "epoch": 0.8052625413041768, "grad_norm": 0.82421875, "learning_rate": 0.00011097540567970686, "loss": 0.8941, "step": 31361 }, { "epoch": 0.8052882185000987, "grad_norm": 0.74609375, "learning_rate": 0.00011097096843632972, "loss": 0.7019, "step": 31362 }, { "epoch": 0.8053138956960205, "grad_norm": 0.75, "learning_rate": 0.00011096653117108842, "loss": 0.879, "step": 31363 }, { "epoch": 0.8053395728919424, "grad_norm": 0.83984375, "learning_rate": 0.00011096209388399183, "loss": 0.8582, "step": 31364 }, { "epoch": 0.8053652500878642, "grad_norm": 0.74609375, "learning_rate": 0.0001109576565750488, "loss": 0.7599, "step": 31365 }, { "epoch": 0.805390927283786, "grad_norm": 0.80078125, "learning_rate": 0.00011095321924426813, "loss": 0.8626, "step": 31366 }, { "epoch": 0.8054166044797078, "grad_norm": 0.83984375, "learning_rate": 0.00011094878189165869, "loss": 0.739, "step": 31367 }, { "epoch": 0.8054422816756296, "grad_norm": 0.7421875, "learning_rate": 0.0001109443445172293, "loss": 0.7978, "step": 31368 }, { "epoch": 0.8054679588715514, "grad_norm": 0.70703125, "learning_rate": 0.00011093990712098883, "loss": 0.8174, "step": 31369 }, { "epoch": 0.8054936360674733, "grad_norm": 1.1640625, "learning_rate": 0.0001109354697029461, "loss": 0.7884, "step": 31370 }, { "epoch": 0.8055193132633951, "grad_norm": 0.765625, "learning_rate": 0.00011093103226310997, "loss": 0.8312, "step": 31371 }, { "epoch": 0.8055449904593169, "grad_norm": 0.796875, "learning_rate": 0.00011092659480148933, "loss": 0.745, "step": 31372 }, { "epoch": 0.8055706676552387, "grad_norm": 0.875, "learning_rate": 0.0001109221573180929, "loss": 0.8429, "step": 31373 }, { "epoch": 0.8055963448511605, "grad_norm": 0.78515625, "learning_rate": 0.00011091771981292965, "loss": 0.7821, "step": 31374 }, { "epoch": 0.8056220220470823, "grad_norm": 0.734375, "learning_rate": 0.00011091328228600834, "loss": 0.8308, "step": 31375 }, { "epoch": 0.8056476992430042, "grad_norm": 0.7890625, "learning_rate": 0.00011090884473733785, "loss": 0.8827, "step": 31376 }, { "epoch": 0.805673376438926, "grad_norm": 0.765625, "learning_rate": 0.00011090440716692703, "loss": 0.6211, "step": 31377 }, { "epoch": 0.8056990536348478, "grad_norm": 0.75, "learning_rate": 0.0001108999695747847, "loss": 0.7863, "step": 31378 }, { "epoch": 0.8057247308307697, "grad_norm": 0.8046875, "learning_rate": 0.00011089553196091973, "loss": 0.8407, "step": 31379 }, { "epoch": 0.8057504080266914, "grad_norm": 0.859375, "learning_rate": 0.00011089109432534095, "loss": 0.9605, "step": 31380 }, { "epoch": 0.8057760852226132, "grad_norm": 0.765625, "learning_rate": 0.00011088665666805718, "loss": 0.67, "step": 31381 }, { "epoch": 0.8058017624185351, "grad_norm": 0.8984375, "learning_rate": 0.00011088221898907729, "loss": 0.8443, "step": 31382 }, { "epoch": 0.8058274396144569, "grad_norm": 0.8203125, "learning_rate": 0.00011087778128841014, "loss": 0.7968, "step": 31383 }, { "epoch": 0.8058531168103787, "grad_norm": 0.85546875, "learning_rate": 0.00011087334356606451, "loss": 0.9463, "step": 31384 }, { "epoch": 0.8058787940063006, "grad_norm": 0.84375, "learning_rate": 0.00011086890582204936, "loss": 0.8202, "step": 31385 }, { "epoch": 0.8059044712022224, "grad_norm": 0.7578125, "learning_rate": 0.0001108644680563734, "loss": 0.765, "step": 31386 }, { "epoch": 0.8059301483981441, "grad_norm": 0.7734375, "learning_rate": 0.00011086003026904556, "loss": 0.7907, "step": 31387 }, { "epoch": 0.805955825594066, "grad_norm": 0.79296875, "learning_rate": 0.00011085559246007469, "loss": 0.8128, "step": 31388 }, { "epoch": 0.8059815027899878, "grad_norm": 0.79296875, "learning_rate": 0.00011085115462946956, "loss": 0.812, "step": 31389 }, { "epoch": 0.8060071799859096, "grad_norm": 0.73046875, "learning_rate": 0.00011084671677723907, "loss": 0.8034, "step": 31390 }, { "epoch": 0.8060328571818315, "grad_norm": 0.77734375, "learning_rate": 0.00011084227890339207, "loss": 0.8924, "step": 31391 }, { "epoch": 0.8060585343777533, "grad_norm": 0.92578125, "learning_rate": 0.00011083784100793737, "loss": 0.8629, "step": 31392 }, { "epoch": 0.806084211573675, "grad_norm": 0.8046875, "learning_rate": 0.00011083340309088386, "loss": 0.9321, "step": 31393 }, { "epoch": 0.8061098887695969, "grad_norm": 0.7109375, "learning_rate": 0.0001108289651522403, "loss": 0.7368, "step": 31394 }, { "epoch": 0.8061355659655187, "grad_norm": 0.74609375, "learning_rate": 0.00011082452719201564, "loss": 0.8032, "step": 31395 }, { "epoch": 0.8061612431614406, "grad_norm": 0.8125, "learning_rate": 0.00011082008921021867, "loss": 0.9547, "step": 31396 }, { "epoch": 0.8061869203573624, "grad_norm": 0.80078125, "learning_rate": 0.0001108156512068582, "loss": 0.8236, "step": 31397 }, { "epoch": 0.8062125975532842, "grad_norm": 0.7578125, "learning_rate": 0.00011081121318194316, "loss": 0.8944, "step": 31398 }, { "epoch": 0.8062382747492061, "grad_norm": 0.76953125, "learning_rate": 0.00011080677513548234, "loss": 0.7723, "step": 31399 }, { "epoch": 0.8062639519451278, "grad_norm": 0.81640625, "learning_rate": 0.00011080233706748457, "loss": 0.8076, "step": 31400 }, { "epoch": 0.8062896291410496, "grad_norm": 0.76953125, "learning_rate": 0.00011079789897795874, "loss": 0.7016, "step": 31401 }, { "epoch": 0.8063153063369715, "grad_norm": 1.0078125, "learning_rate": 0.00011079346086691365, "loss": 0.8431, "step": 31402 }, { "epoch": 0.8063409835328933, "grad_norm": 0.77734375, "learning_rate": 0.0001107890227343582, "loss": 0.9705, "step": 31403 }, { "epoch": 0.8063666607288151, "grad_norm": 0.74609375, "learning_rate": 0.00011078458458030119, "loss": 0.8807, "step": 31404 }, { "epoch": 0.806392337924737, "grad_norm": 0.75, "learning_rate": 0.00011078014640475147, "loss": 0.7907, "step": 31405 }, { "epoch": 0.8064180151206587, "grad_norm": 0.84375, "learning_rate": 0.0001107757082077179, "loss": 0.8744, "step": 31406 }, { "epoch": 0.8064436923165805, "grad_norm": 0.81640625, "learning_rate": 0.00011077126998920932, "loss": 0.7846, "step": 31407 }, { "epoch": 0.8064693695125024, "grad_norm": 1.0078125, "learning_rate": 0.00011076683174923454, "loss": 0.7975, "step": 31408 }, { "epoch": 0.8064950467084242, "grad_norm": 0.78515625, "learning_rate": 0.00011076239348780246, "loss": 0.6583, "step": 31409 }, { "epoch": 0.806520723904346, "grad_norm": 0.7578125, "learning_rate": 0.00011075795520492191, "loss": 0.7916, "step": 31410 }, { "epoch": 0.8065464011002679, "grad_norm": 0.87109375, "learning_rate": 0.00011075351690060174, "loss": 0.7819, "step": 31411 }, { "epoch": 0.8065720782961897, "grad_norm": 0.76171875, "learning_rate": 0.00011074907857485075, "loss": 0.8713, "step": 31412 }, { "epoch": 0.8065977554921114, "grad_norm": 0.81640625, "learning_rate": 0.00011074464022767783, "loss": 0.7388, "step": 31413 }, { "epoch": 0.8066234326880333, "grad_norm": 0.75, "learning_rate": 0.00011074020185909182, "loss": 0.7469, "step": 31414 }, { "epoch": 0.8066491098839551, "grad_norm": 0.7890625, "learning_rate": 0.00011073576346910155, "loss": 0.8334, "step": 31415 }, { "epoch": 0.8066747870798769, "grad_norm": 0.80078125, "learning_rate": 0.00011073132505771585, "loss": 0.711, "step": 31416 }, { "epoch": 0.8067004642757988, "grad_norm": 0.8046875, "learning_rate": 0.00011072688662494361, "loss": 0.8387, "step": 31417 }, { "epoch": 0.8067261414717206, "grad_norm": 0.80078125, "learning_rate": 0.00011072244817079366, "loss": 0.8155, "step": 31418 }, { "epoch": 0.8067518186676425, "grad_norm": 0.796875, "learning_rate": 0.00011071800969527484, "loss": 0.833, "step": 31419 }, { "epoch": 0.8067774958635642, "grad_norm": 0.67578125, "learning_rate": 0.000110713571198396, "loss": 0.6818, "step": 31420 }, { "epoch": 0.806803173059486, "grad_norm": 0.7578125, "learning_rate": 0.00011070913268016597, "loss": 0.8318, "step": 31421 }, { "epoch": 0.8068288502554078, "grad_norm": 0.93359375, "learning_rate": 0.00011070469414059359, "loss": 0.7978, "step": 31422 }, { "epoch": 0.8068545274513297, "grad_norm": 0.83203125, "learning_rate": 0.00011070025557968775, "loss": 0.7421, "step": 31423 }, { "epoch": 0.8068802046472515, "grad_norm": 0.8359375, "learning_rate": 0.00011069581699745723, "loss": 0.7881, "step": 31424 }, { "epoch": 0.8069058818431734, "grad_norm": 0.734375, "learning_rate": 0.00011069137839391097, "loss": 0.8244, "step": 31425 }, { "epoch": 0.8069315590390951, "grad_norm": 0.83984375, "learning_rate": 0.0001106869397690577, "loss": 0.7626, "step": 31426 }, { "epoch": 0.8069572362350169, "grad_norm": 0.87109375, "learning_rate": 0.00011068250112290636, "loss": 0.7689, "step": 31427 }, { "epoch": 0.8069829134309388, "grad_norm": 0.9453125, "learning_rate": 0.00011067806245546575, "loss": 0.7877, "step": 31428 }, { "epoch": 0.8070085906268606, "grad_norm": 0.94921875, "learning_rate": 0.00011067362376674473, "loss": 0.6957, "step": 31429 }, { "epoch": 0.8070342678227824, "grad_norm": 0.7734375, "learning_rate": 0.00011066918505675213, "loss": 0.7735, "step": 31430 }, { "epoch": 0.8070599450187043, "grad_norm": 0.75, "learning_rate": 0.00011066474632549681, "loss": 0.6936, "step": 31431 }, { "epoch": 0.8070856222146261, "grad_norm": 0.7890625, "learning_rate": 0.00011066030757298764, "loss": 0.8661, "step": 31432 }, { "epoch": 0.8071112994105478, "grad_norm": 0.75390625, "learning_rate": 0.00011065586879923342, "loss": 0.8981, "step": 31433 }, { "epoch": 0.8071369766064697, "grad_norm": 0.7109375, "learning_rate": 0.00011065143000424301, "loss": 0.6864, "step": 31434 }, { "epoch": 0.8071626538023915, "grad_norm": 0.765625, "learning_rate": 0.00011064699118802526, "loss": 0.8917, "step": 31435 }, { "epoch": 0.8071883309983133, "grad_norm": 0.79296875, "learning_rate": 0.00011064255235058904, "loss": 0.9199, "step": 31436 }, { "epoch": 0.8072140081942352, "grad_norm": 0.76171875, "learning_rate": 0.00011063811349194313, "loss": 0.7242, "step": 31437 }, { "epoch": 0.807239685390157, "grad_norm": 0.8359375, "learning_rate": 0.00011063367461209649, "loss": 0.8392, "step": 31438 }, { "epoch": 0.8072653625860788, "grad_norm": 0.7734375, "learning_rate": 0.00011062923571105784, "loss": 0.8025, "step": 31439 }, { "epoch": 0.8072910397820006, "grad_norm": 0.8359375, "learning_rate": 0.0001106247967888361, "loss": 0.7499, "step": 31440 }, { "epoch": 0.8073167169779224, "grad_norm": 0.82421875, "learning_rate": 0.00011062035784544011, "loss": 0.816, "step": 31441 }, { "epoch": 0.8073423941738442, "grad_norm": 0.82421875, "learning_rate": 0.00011061591888087868, "loss": 0.8885, "step": 31442 }, { "epoch": 0.8073680713697661, "grad_norm": 0.8359375, "learning_rate": 0.00011061147989516072, "loss": 0.8326, "step": 31443 }, { "epoch": 0.8073937485656879, "grad_norm": 0.77734375, "learning_rate": 0.00011060704088829504, "loss": 0.865, "step": 31444 }, { "epoch": 0.8074194257616097, "grad_norm": 0.7890625, "learning_rate": 0.00011060260186029046, "loss": 0.8698, "step": 31445 }, { "epoch": 0.8074451029575315, "grad_norm": 0.8828125, "learning_rate": 0.00011059816281115585, "loss": 0.9424, "step": 31446 }, { "epoch": 0.8074707801534533, "grad_norm": 0.796875, "learning_rate": 0.00011059372374090007, "loss": 0.9428, "step": 31447 }, { "epoch": 0.8074964573493751, "grad_norm": 0.73828125, "learning_rate": 0.00011058928464953193, "loss": 0.7956, "step": 31448 }, { "epoch": 0.807522134545297, "grad_norm": 0.82421875, "learning_rate": 0.00011058484553706034, "loss": 0.8346, "step": 31449 }, { "epoch": 0.8075478117412188, "grad_norm": 0.734375, "learning_rate": 0.00011058040640349407, "loss": 0.7544, "step": 31450 }, { "epoch": 0.8075734889371406, "grad_norm": 1.0546875, "learning_rate": 0.00011057596724884205, "loss": 0.8232, "step": 31451 }, { "epoch": 0.8075991661330625, "grad_norm": 0.85546875, "learning_rate": 0.00011057152807311308, "loss": 0.8284, "step": 31452 }, { "epoch": 0.8076248433289842, "grad_norm": 0.75, "learning_rate": 0.00011056708887631598, "loss": 0.8684, "step": 31453 }, { "epoch": 0.807650520524906, "grad_norm": 0.8203125, "learning_rate": 0.00011056264965845964, "loss": 0.7999, "step": 31454 }, { "epoch": 0.8076761977208279, "grad_norm": 0.8203125, "learning_rate": 0.00011055821041955288, "loss": 0.8277, "step": 31455 }, { "epoch": 0.8077018749167497, "grad_norm": 0.76953125, "learning_rate": 0.00011055377115960457, "loss": 0.7168, "step": 31456 }, { "epoch": 0.8077275521126716, "grad_norm": 0.74609375, "learning_rate": 0.00011054933187862356, "loss": 0.7089, "step": 31457 }, { "epoch": 0.8077532293085934, "grad_norm": 0.76953125, "learning_rate": 0.00011054489257661866, "loss": 0.7964, "step": 31458 }, { "epoch": 0.8077789065045152, "grad_norm": 0.7734375, "learning_rate": 0.00011054045325359877, "loss": 0.7783, "step": 31459 }, { "epoch": 0.807804583700437, "grad_norm": 0.7578125, "learning_rate": 0.00011053601390957269, "loss": 0.8122, "step": 31460 }, { "epoch": 0.8078302608963588, "grad_norm": 0.75390625, "learning_rate": 0.00011053157454454928, "loss": 0.8252, "step": 31461 }, { "epoch": 0.8078559380922806, "grad_norm": 0.7421875, "learning_rate": 0.0001105271351585374, "loss": 0.8023, "step": 31462 }, { "epoch": 0.8078816152882025, "grad_norm": 0.77734375, "learning_rate": 0.0001105226957515459, "loss": 0.984, "step": 31463 }, { "epoch": 0.8079072924841243, "grad_norm": 0.80078125, "learning_rate": 0.00011051825632358362, "loss": 0.8822, "step": 31464 }, { "epoch": 0.8079329696800461, "grad_norm": 0.76171875, "learning_rate": 0.0001105138168746594, "loss": 0.7149, "step": 31465 }, { "epoch": 0.8079586468759679, "grad_norm": 0.765625, "learning_rate": 0.00011050937740478206, "loss": 0.8233, "step": 31466 }, { "epoch": 0.8079843240718897, "grad_norm": 0.71875, "learning_rate": 0.00011050493791396052, "loss": 0.7176, "step": 31467 }, { "epoch": 0.8080100012678115, "grad_norm": 0.7734375, "learning_rate": 0.00011050049840220358, "loss": 0.8695, "step": 31468 }, { "epoch": 0.8080356784637334, "grad_norm": 0.80859375, "learning_rate": 0.00011049605886952008, "loss": 0.7712, "step": 31469 }, { "epoch": 0.8080613556596552, "grad_norm": 0.859375, "learning_rate": 0.0001104916193159189, "loss": 0.8798, "step": 31470 }, { "epoch": 0.808087032855577, "grad_norm": 0.703125, "learning_rate": 0.00011048717974140888, "loss": 0.7617, "step": 31471 }, { "epoch": 0.8081127100514989, "grad_norm": 0.80078125, "learning_rate": 0.00011048274014599884, "loss": 0.7824, "step": 31472 }, { "epoch": 0.8081383872474206, "grad_norm": 0.765625, "learning_rate": 0.00011047830052969765, "loss": 0.8338, "step": 31473 }, { "epoch": 0.8081640644433424, "grad_norm": 0.7109375, "learning_rate": 0.00011047386089251415, "loss": 0.788, "step": 31474 }, { "epoch": 0.8081897416392643, "grad_norm": 0.74609375, "learning_rate": 0.00011046942123445718, "loss": 0.8774, "step": 31475 }, { "epoch": 0.8082154188351861, "grad_norm": 0.77734375, "learning_rate": 0.00011046498155553561, "loss": 0.7601, "step": 31476 }, { "epoch": 0.8082410960311079, "grad_norm": 0.7734375, "learning_rate": 0.0001104605418557583, "loss": 0.7286, "step": 31477 }, { "epoch": 0.8082667732270298, "grad_norm": 0.79296875, "learning_rate": 0.00011045610213513406, "loss": 0.8313, "step": 31478 }, { "epoch": 0.8082924504229516, "grad_norm": 0.73828125, "learning_rate": 0.00011045166239367172, "loss": 0.8219, "step": 31479 }, { "epoch": 0.8083181276188733, "grad_norm": 0.80078125, "learning_rate": 0.0001104472226313802, "loss": 0.7539, "step": 31480 }, { "epoch": 0.8083438048147952, "grad_norm": 0.78515625, "learning_rate": 0.00011044278284826832, "loss": 0.8272, "step": 31481 }, { "epoch": 0.808369482010717, "grad_norm": 0.84375, "learning_rate": 0.00011043834304434486, "loss": 0.8546, "step": 31482 }, { "epoch": 0.8083951592066388, "grad_norm": 0.78125, "learning_rate": 0.00011043390321961877, "loss": 0.7921, "step": 31483 }, { "epoch": 0.8084208364025607, "grad_norm": 0.85546875, "learning_rate": 0.00011042946337409888, "loss": 0.798, "step": 31484 }, { "epoch": 0.8084465135984825, "grad_norm": 0.82421875, "learning_rate": 0.00011042502350779395, "loss": 0.8221, "step": 31485 }, { "epoch": 0.8084721907944042, "grad_norm": 0.76953125, "learning_rate": 0.00011042058362071293, "loss": 0.7424, "step": 31486 }, { "epoch": 0.8084978679903261, "grad_norm": 0.765625, "learning_rate": 0.00011041614371286461, "loss": 0.8626, "step": 31487 }, { "epoch": 0.8085235451862479, "grad_norm": 0.84765625, "learning_rate": 0.00011041170378425785, "loss": 0.8361, "step": 31488 }, { "epoch": 0.8085492223821698, "grad_norm": 0.7578125, "learning_rate": 0.00011040726383490152, "loss": 0.6061, "step": 31489 }, { "epoch": 0.8085748995780916, "grad_norm": 0.7578125, "learning_rate": 0.00011040282386480445, "loss": 0.8542, "step": 31490 }, { "epoch": 0.8086005767740134, "grad_norm": 0.8515625, "learning_rate": 0.0001103983838739755, "loss": 0.8117, "step": 31491 }, { "epoch": 0.8086262539699353, "grad_norm": 0.8125, "learning_rate": 0.00011039394386242349, "loss": 0.7701, "step": 31492 }, { "epoch": 0.808651931165857, "grad_norm": 0.8203125, "learning_rate": 0.0001103895038301573, "loss": 0.9789, "step": 31493 }, { "epoch": 0.8086776083617788, "grad_norm": 0.80078125, "learning_rate": 0.00011038506377718576, "loss": 0.6676, "step": 31494 }, { "epoch": 0.8087032855577007, "grad_norm": 0.7265625, "learning_rate": 0.00011038062370351774, "loss": 0.7555, "step": 31495 }, { "epoch": 0.8087289627536225, "grad_norm": 0.8203125, "learning_rate": 0.00011037618360916204, "loss": 0.8823, "step": 31496 }, { "epoch": 0.8087546399495443, "grad_norm": 0.73046875, "learning_rate": 0.0001103717434941276, "loss": 0.8067, "step": 31497 }, { "epoch": 0.8087803171454662, "grad_norm": 0.78515625, "learning_rate": 0.00011036730335842316, "loss": 0.7675, "step": 31498 }, { "epoch": 0.808805994341388, "grad_norm": 0.78515625, "learning_rate": 0.00011036286320205767, "loss": 0.9288, "step": 31499 }, { "epoch": 0.8088316715373097, "grad_norm": 0.80859375, "learning_rate": 0.00011035842302503989, "loss": 0.8282, "step": 31500 }, { "epoch": 0.8088573487332316, "grad_norm": 0.7734375, "learning_rate": 0.0001103539828273787, "loss": 0.8096, "step": 31501 }, { "epoch": 0.8088830259291534, "grad_norm": 0.7265625, "learning_rate": 0.00011034954260908299, "loss": 0.7423, "step": 31502 }, { "epoch": 0.8089087031250752, "grad_norm": 0.77734375, "learning_rate": 0.00011034510237016158, "loss": 0.8499, "step": 31503 }, { "epoch": 0.8089343803209971, "grad_norm": 0.7421875, "learning_rate": 0.0001103406621106233, "loss": 0.8016, "step": 31504 }, { "epoch": 0.8089600575169189, "grad_norm": 0.79296875, "learning_rate": 0.00011033622183047702, "loss": 0.9324, "step": 31505 }, { "epoch": 0.8089857347128406, "grad_norm": 0.75, "learning_rate": 0.00011033178152973155, "loss": 0.7452, "step": 31506 }, { "epoch": 0.8090114119087625, "grad_norm": 0.86328125, "learning_rate": 0.00011032734120839581, "loss": 0.7736, "step": 31507 }, { "epoch": 0.8090370891046843, "grad_norm": 0.8203125, "learning_rate": 0.0001103229008664786, "loss": 0.7573, "step": 31508 }, { "epoch": 0.8090627663006061, "grad_norm": 0.828125, "learning_rate": 0.00011031846050398878, "loss": 0.8094, "step": 31509 }, { "epoch": 0.809088443496528, "grad_norm": 0.7734375, "learning_rate": 0.00011031402012093523, "loss": 0.7216, "step": 31510 }, { "epoch": 0.8091141206924498, "grad_norm": 0.765625, "learning_rate": 0.00011030957971732673, "loss": 0.762, "step": 31511 }, { "epoch": 0.8091397978883716, "grad_norm": 0.89453125, "learning_rate": 0.00011030513929317219, "loss": 0.8943, "step": 31512 }, { "epoch": 0.8091654750842934, "grad_norm": 0.8125, "learning_rate": 0.00011030069884848044, "loss": 0.8957, "step": 31513 }, { "epoch": 0.8091911522802152, "grad_norm": 0.7421875, "learning_rate": 0.00011029625838326029, "loss": 0.773, "step": 31514 }, { "epoch": 0.809216829476137, "grad_norm": 0.6875, "learning_rate": 0.00011029181789752066, "loss": 0.7971, "step": 31515 }, { "epoch": 0.8092425066720589, "grad_norm": 0.7890625, "learning_rate": 0.00011028737739127037, "loss": 0.7477, "step": 31516 }, { "epoch": 0.8092681838679807, "grad_norm": 0.76953125, "learning_rate": 0.00011028293686451827, "loss": 0.8419, "step": 31517 }, { "epoch": 0.8092938610639026, "grad_norm": 0.7578125, "learning_rate": 0.0001102784963172732, "loss": 0.7246, "step": 31518 }, { "epoch": 0.8093195382598244, "grad_norm": 0.703125, "learning_rate": 0.00011027405574954399, "loss": 0.7362, "step": 31519 }, { "epoch": 0.8093452154557461, "grad_norm": 0.76171875, "learning_rate": 0.00011026961516133954, "loss": 0.8066, "step": 31520 }, { "epoch": 0.809370892651668, "grad_norm": 0.77734375, "learning_rate": 0.00011026517455266867, "loss": 0.8439, "step": 31521 }, { "epoch": 0.8093965698475898, "grad_norm": 0.74609375, "learning_rate": 0.00011026073392354022, "loss": 0.6982, "step": 31522 }, { "epoch": 0.8094222470435116, "grad_norm": 0.76953125, "learning_rate": 0.0001102562932739631, "loss": 0.7759, "step": 31523 }, { "epoch": 0.8094479242394335, "grad_norm": 0.75, "learning_rate": 0.00011025185260394609, "loss": 0.8217, "step": 31524 }, { "epoch": 0.8094736014353553, "grad_norm": 0.9453125, "learning_rate": 0.00011024741191349804, "loss": 0.8722, "step": 31525 }, { "epoch": 0.809499278631277, "grad_norm": 0.78515625, "learning_rate": 0.00011024297120262783, "loss": 0.9497, "step": 31526 }, { "epoch": 0.8095249558271989, "grad_norm": 0.83203125, "learning_rate": 0.0001102385304713443, "loss": 0.8798, "step": 31527 }, { "epoch": 0.8095506330231207, "grad_norm": 0.73046875, "learning_rate": 0.00011023408971965632, "loss": 0.7925, "step": 31528 }, { "epoch": 0.8095763102190425, "grad_norm": 0.73046875, "learning_rate": 0.00011022964894757273, "loss": 0.7512, "step": 31529 }, { "epoch": 0.8096019874149644, "grad_norm": 0.7578125, "learning_rate": 0.00011022520815510235, "loss": 0.8125, "step": 31530 }, { "epoch": 0.8096276646108862, "grad_norm": 0.89453125, "learning_rate": 0.00011022076734225408, "loss": 0.7428, "step": 31531 }, { "epoch": 0.809653341806808, "grad_norm": 0.7890625, "learning_rate": 0.00011021632650903674, "loss": 0.9154, "step": 31532 }, { "epoch": 0.8096790190027298, "grad_norm": 0.67578125, "learning_rate": 0.00011021188565545917, "loss": 0.8474, "step": 31533 }, { "epoch": 0.8097046961986516, "grad_norm": 0.703125, "learning_rate": 0.00011020744478153025, "loss": 0.706, "step": 31534 }, { "epoch": 0.8097303733945734, "grad_norm": 0.8125, "learning_rate": 0.00011020300388725879, "loss": 0.8293, "step": 31535 }, { "epoch": 0.8097560505904953, "grad_norm": 0.8046875, "learning_rate": 0.0001101985629726537, "loss": 0.9983, "step": 31536 }, { "epoch": 0.8097817277864171, "grad_norm": 0.76953125, "learning_rate": 0.00011019412203772377, "loss": 0.8602, "step": 31537 }, { "epoch": 0.8098074049823389, "grad_norm": 0.703125, "learning_rate": 0.00011018968108247786, "loss": 0.6723, "step": 31538 }, { "epoch": 0.8098330821782608, "grad_norm": 0.8984375, "learning_rate": 0.00011018524010692486, "loss": 0.739, "step": 31539 }, { "epoch": 0.8098587593741825, "grad_norm": 0.8046875, "learning_rate": 0.0001101807991110736, "loss": 0.9388, "step": 31540 }, { "epoch": 0.8098844365701043, "grad_norm": 0.78125, "learning_rate": 0.0001101763580949329, "loss": 0.6947, "step": 31541 }, { "epoch": 0.8099101137660262, "grad_norm": 0.73046875, "learning_rate": 0.00011017191705851166, "loss": 0.9208, "step": 31542 }, { "epoch": 0.809935790961948, "grad_norm": 0.68359375, "learning_rate": 0.00011016747600181871, "loss": 0.7196, "step": 31543 }, { "epoch": 0.8099614681578698, "grad_norm": 0.7265625, "learning_rate": 0.00011016303492486291, "loss": 0.8306, "step": 31544 }, { "epoch": 0.8099871453537917, "grad_norm": 0.78125, "learning_rate": 0.00011015859382765308, "loss": 0.8707, "step": 31545 }, { "epoch": 0.8100128225497134, "grad_norm": 0.83203125, "learning_rate": 0.00011015415271019809, "loss": 0.82, "step": 31546 }, { "epoch": 0.8100384997456352, "grad_norm": 0.81640625, "learning_rate": 0.0001101497115725068, "loss": 0.9097, "step": 31547 }, { "epoch": 0.8100641769415571, "grad_norm": 0.8046875, "learning_rate": 0.00011014527041458804, "loss": 0.7968, "step": 31548 }, { "epoch": 0.8100898541374789, "grad_norm": 0.75390625, "learning_rate": 0.00011014082923645066, "loss": 0.7837, "step": 31549 }, { "epoch": 0.8101155313334008, "grad_norm": 0.9375, "learning_rate": 0.00011013638803810358, "loss": 0.8065, "step": 31550 }, { "epoch": 0.8101412085293226, "grad_norm": 0.75390625, "learning_rate": 0.00011013194681955552, "loss": 0.7722, "step": 31551 }, { "epoch": 0.8101668857252444, "grad_norm": 0.7734375, "learning_rate": 0.00011012750558081546, "loss": 0.8538, "step": 31552 }, { "epoch": 0.8101925629211661, "grad_norm": 0.8125, "learning_rate": 0.00011012306432189219, "loss": 0.7767, "step": 31553 }, { "epoch": 0.810218240117088, "grad_norm": 0.828125, "learning_rate": 0.00011011862304279453, "loss": 0.9028, "step": 31554 }, { "epoch": 0.8102439173130098, "grad_norm": 0.73046875, "learning_rate": 0.00011011418174353138, "loss": 0.7894, "step": 31555 }, { "epoch": 0.8102695945089317, "grad_norm": 0.82421875, "learning_rate": 0.00011010974042411162, "loss": 1.0, "step": 31556 }, { "epoch": 0.8102952717048535, "grad_norm": 0.72265625, "learning_rate": 0.00011010529908454403, "loss": 0.756, "step": 31557 }, { "epoch": 0.8103209489007753, "grad_norm": 0.82421875, "learning_rate": 0.00011010085772483749, "loss": 0.8854, "step": 31558 }, { "epoch": 0.8103466260966972, "grad_norm": 0.8125, "learning_rate": 0.00011009641634500086, "loss": 0.7589, "step": 31559 }, { "epoch": 0.8103723032926189, "grad_norm": 0.796875, "learning_rate": 0.00011009197494504298, "loss": 0.8991, "step": 31560 }, { "epoch": 0.8103979804885407, "grad_norm": 0.7109375, "learning_rate": 0.00011008753352497271, "loss": 0.766, "step": 31561 }, { "epoch": 0.8104236576844626, "grad_norm": 0.71875, "learning_rate": 0.00011008309208479888, "loss": 0.8683, "step": 31562 }, { "epoch": 0.8104493348803844, "grad_norm": 0.828125, "learning_rate": 0.0001100786506245304, "loss": 0.8657, "step": 31563 }, { "epoch": 0.8104750120763062, "grad_norm": 0.80078125, "learning_rate": 0.00011007420914417603, "loss": 0.7814, "step": 31564 }, { "epoch": 0.8105006892722281, "grad_norm": 0.73046875, "learning_rate": 0.0001100697676437447, "loss": 0.8169, "step": 31565 }, { "epoch": 0.8105263664681498, "grad_norm": 0.83984375, "learning_rate": 0.00011006532612324523, "loss": 0.7773, "step": 31566 }, { "epoch": 0.8105520436640716, "grad_norm": 0.87890625, "learning_rate": 0.00011006088458268646, "loss": 0.7355, "step": 31567 }, { "epoch": 0.8105777208599935, "grad_norm": 0.81640625, "learning_rate": 0.00011005644302207727, "loss": 0.9944, "step": 31568 }, { "epoch": 0.8106033980559153, "grad_norm": 0.8359375, "learning_rate": 0.00011005200144142651, "loss": 0.8594, "step": 31569 }, { "epoch": 0.8106290752518371, "grad_norm": 0.95703125, "learning_rate": 0.000110047559840743, "loss": 0.8363, "step": 31570 }, { "epoch": 0.810654752447759, "grad_norm": 1.0078125, "learning_rate": 0.00011004311822003561, "loss": 0.808, "step": 31571 }, { "epoch": 0.8106804296436808, "grad_norm": 0.7421875, "learning_rate": 0.0001100386765793132, "loss": 0.7425, "step": 31572 }, { "epoch": 0.8107061068396025, "grad_norm": 0.86328125, "learning_rate": 0.0001100342349185846, "loss": 0.8657, "step": 31573 }, { "epoch": 0.8107317840355244, "grad_norm": 0.7890625, "learning_rate": 0.00011002979323785869, "loss": 0.904, "step": 31574 }, { "epoch": 0.8107574612314462, "grad_norm": 0.8203125, "learning_rate": 0.00011002535153714429, "loss": 0.7872, "step": 31575 }, { "epoch": 0.810783138427368, "grad_norm": 0.76953125, "learning_rate": 0.00011002090981645031, "loss": 0.7817, "step": 31576 }, { "epoch": 0.8108088156232899, "grad_norm": 0.75390625, "learning_rate": 0.00011001646807578554, "loss": 0.7773, "step": 31577 }, { "epoch": 0.8108344928192117, "grad_norm": 0.8359375, "learning_rate": 0.00011001202631515884, "loss": 0.8516, "step": 31578 }, { "epoch": 0.8108601700151336, "grad_norm": 0.78515625, "learning_rate": 0.00011000758453457908, "loss": 0.8857, "step": 31579 }, { "epoch": 0.8108858472110553, "grad_norm": 0.859375, "learning_rate": 0.00011000314273405512, "loss": 0.8755, "step": 31580 }, { "epoch": 0.8109115244069771, "grad_norm": 0.765625, "learning_rate": 0.00010999870091359579, "loss": 0.8088, "step": 31581 }, { "epoch": 0.810937201602899, "grad_norm": 0.8671875, "learning_rate": 0.00010999425907320997, "loss": 0.9824, "step": 31582 }, { "epoch": 0.8109628787988208, "grad_norm": 0.7421875, "learning_rate": 0.00010998981721290647, "loss": 0.7925, "step": 31583 }, { "epoch": 0.8109885559947426, "grad_norm": 0.83203125, "learning_rate": 0.00010998537533269418, "loss": 0.8058, "step": 31584 }, { "epoch": 0.8110142331906645, "grad_norm": 0.796875, "learning_rate": 0.00010998093343258196, "loss": 0.9347, "step": 31585 }, { "epoch": 0.8110399103865862, "grad_norm": 0.76953125, "learning_rate": 0.00010997649151257858, "loss": 0.7656, "step": 31586 }, { "epoch": 0.811065587582508, "grad_norm": 0.6953125, "learning_rate": 0.000109972049572693, "loss": 0.734, "step": 31587 }, { "epoch": 0.8110912647784299, "grad_norm": 0.99609375, "learning_rate": 0.00010996760761293403, "loss": 0.8024, "step": 31588 }, { "epoch": 0.8111169419743517, "grad_norm": 0.76171875, "learning_rate": 0.00010996316563331051, "loss": 0.7125, "step": 31589 }, { "epoch": 0.8111426191702735, "grad_norm": 0.94140625, "learning_rate": 0.0001099587236338313, "loss": 0.7687, "step": 31590 }, { "epoch": 0.8111682963661954, "grad_norm": 0.76171875, "learning_rate": 0.00010995428161450523, "loss": 0.7722, "step": 31591 }, { "epoch": 0.8111939735621172, "grad_norm": 0.78125, "learning_rate": 0.0001099498395753412, "loss": 0.7934, "step": 31592 }, { "epoch": 0.8112196507580389, "grad_norm": 0.7578125, "learning_rate": 0.00010994539751634805, "loss": 0.8062, "step": 31593 }, { "epoch": 0.8112453279539608, "grad_norm": 0.79296875, "learning_rate": 0.00010994095543753457, "loss": 0.7416, "step": 31594 }, { "epoch": 0.8112710051498826, "grad_norm": 0.79296875, "learning_rate": 0.00010993651333890974, "loss": 0.8752, "step": 31595 }, { "epoch": 0.8112966823458044, "grad_norm": 0.8359375, "learning_rate": 0.00010993207122048227, "loss": 0.6447, "step": 31596 }, { "epoch": 0.8113223595417263, "grad_norm": 0.8359375, "learning_rate": 0.00010992762908226113, "loss": 0.847, "step": 31597 }, { "epoch": 0.8113480367376481, "grad_norm": 0.76171875, "learning_rate": 0.0001099231869242551, "loss": 0.7175, "step": 31598 }, { "epoch": 0.8113737139335699, "grad_norm": 0.75, "learning_rate": 0.00010991874474647303, "loss": 0.8463, "step": 31599 }, { "epoch": 0.8113993911294917, "grad_norm": 0.78515625, "learning_rate": 0.00010991430254892383, "loss": 0.7183, "step": 31600 }, { "epoch": 0.8114250683254135, "grad_norm": 0.80078125, "learning_rate": 0.0001099098603316163, "loss": 0.7065, "step": 31601 }, { "epoch": 0.8114507455213353, "grad_norm": 0.83203125, "learning_rate": 0.00010990541809455934, "loss": 0.9064, "step": 31602 }, { "epoch": 0.8114764227172572, "grad_norm": 0.76953125, "learning_rate": 0.00010990097583776175, "loss": 0.8623, "step": 31603 }, { "epoch": 0.811502099913179, "grad_norm": 0.7734375, "learning_rate": 0.00010989653356123242, "loss": 0.8666, "step": 31604 }, { "epoch": 0.8115277771091008, "grad_norm": 0.78125, "learning_rate": 0.00010989209126498019, "loss": 0.8401, "step": 31605 }, { "epoch": 0.8115534543050226, "grad_norm": 0.765625, "learning_rate": 0.00010988764894901394, "loss": 0.9263, "step": 31606 }, { "epoch": 0.8115791315009444, "grad_norm": 0.74609375, "learning_rate": 0.00010988320661334243, "loss": 0.8432, "step": 31607 }, { "epoch": 0.8116048086968662, "grad_norm": 0.8046875, "learning_rate": 0.00010987876425797465, "loss": 0.7418, "step": 31608 }, { "epoch": 0.8116304858927881, "grad_norm": 0.7421875, "learning_rate": 0.00010987432188291939, "loss": 0.8307, "step": 31609 }, { "epoch": 0.8116561630887099, "grad_norm": 0.78515625, "learning_rate": 0.00010986987948818546, "loss": 0.7799, "step": 31610 }, { "epoch": 0.8116818402846318, "grad_norm": 0.83984375, "learning_rate": 0.00010986543707378177, "loss": 0.718, "step": 31611 }, { "epoch": 0.8117075174805536, "grad_norm": 0.7421875, "learning_rate": 0.00010986099463971716, "loss": 0.7264, "step": 31612 }, { "epoch": 0.8117331946764753, "grad_norm": 0.74609375, "learning_rate": 0.00010985655218600044, "loss": 0.7342, "step": 31613 }, { "epoch": 0.8117588718723971, "grad_norm": 0.78125, "learning_rate": 0.00010985210971264055, "loss": 0.7947, "step": 31614 }, { "epoch": 0.811784549068319, "grad_norm": 0.75, "learning_rate": 0.00010984766721964628, "loss": 0.7655, "step": 31615 }, { "epoch": 0.8118102262642408, "grad_norm": 0.7421875, "learning_rate": 0.00010984322470702648, "loss": 0.8638, "step": 31616 }, { "epoch": 0.8118359034601627, "grad_norm": 0.79296875, "learning_rate": 0.00010983878217479004, "loss": 0.7816, "step": 31617 }, { "epoch": 0.8118615806560845, "grad_norm": 0.83203125, "learning_rate": 0.00010983433962294578, "loss": 0.7691, "step": 31618 }, { "epoch": 0.8118872578520062, "grad_norm": 0.76953125, "learning_rate": 0.0001098298970515026, "loss": 0.7223, "step": 31619 }, { "epoch": 0.811912935047928, "grad_norm": 0.75390625, "learning_rate": 0.0001098254544604693, "loss": 0.8464, "step": 31620 }, { "epoch": 0.8119386122438499, "grad_norm": 0.73046875, "learning_rate": 0.00010982101184985473, "loss": 0.772, "step": 31621 }, { "epoch": 0.8119642894397717, "grad_norm": 0.828125, "learning_rate": 0.00010981656921966785, "loss": 0.785, "step": 31622 }, { "epoch": 0.8119899666356936, "grad_norm": 0.77734375, "learning_rate": 0.00010981212656991735, "loss": 0.8542, "step": 31623 }, { "epoch": 0.8120156438316154, "grad_norm": 0.7890625, "learning_rate": 0.00010980768390061222, "loss": 0.7495, "step": 31624 }, { "epoch": 0.8120413210275372, "grad_norm": 0.80078125, "learning_rate": 0.00010980324121176124, "loss": 0.8153, "step": 31625 }, { "epoch": 0.812066998223459, "grad_norm": 0.87890625, "learning_rate": 0.0001097987985033733, "loss": 0.7769, "step": 31626 }, { "epoch": 0.8120926754193808, "grad_norm": 0.7734375, "learning_rate": 0.00010979435577545723, "loss": 0.8359, "step": 31627 }, { "epoch": 0.8121183526153026, "grad_norm": 0.765625, "learning_rate": 0.00010978991302802188, "loss": 0.9273, "step": 31628 }, { "epoch": 0.8121440298112245, "grad_norm": 0.765625, "learning_rate": 0.00010978547026107615, "loss": 0.774, "step": 31629 }, { "epoch": 0.8121697070071463, "grad_norm": 0.73046875, "learning_rate": 0.00010978102747462884, "loss": 0.8002, "step": 31630 }, { "epoch": 0.8121953842030681, "grad_norm": 0.7421875, "learning_rate": 0.00010977658466868883, "loss": 0.8098, "step": 31631 }, { "epoch": 0.81222106139899, "grad_norm": 0.8203125, "learning_rate": 0.00010977214184326496, "loss": 0.7531, "step": 31632 }, { "epoch": 0.8122467385949117, "grad_norm": 0.80078125, "learning_rate": 0.00010976769899836611, "loss": 0.8559, "step": 31633 }, { "epoch": 0.8122724157908335, "grad_norm": 0.82421875, "learning_rate": 0.00010976325613400109, "loss": 0.8083, "step": 31634 }, { "epoch": 0.8122980929867554, "grad_norm": 0.74609375, "learning_rate": 0.00010975881325017884, "loss": 0.8068, "step": 31635 }, { "epoch": 0.8123237701826772, "grad_norm": 0.7421875, "learning_rate": 0.00010975437034690812, "loss": 0.7696, "step": 31636 }, { "epoch": 0.812349447378599, "grad_norm": 0.79296875, "learning_rate": 0.0001097499274241978, "loss": 0.8361, "step": 31637 }, { "epoch": 0.8123751245745209, "grad_norm": 0.72265625, "learning_rate": 0.0001097454844820568, "loss": 0.8221, "step": 31638 }, { "epoch": 0.8124008017704426, "grad_norm": 0.7890625, "learning_rate": 0.00010974104152049389, "loss": 0.8255, "step": 31639 }, { "epoch": 0.8124264789663644, "grad_norm": 0.78125, "learning_rate": 0.00010973659853951799, "loss": 0.7509, "step": 31640 }, { "epoch": 0.8124521561622863, "grad_norm": 0.76953125, "learning_rate": 0.00010973215553913791, "loss": 0.9137, "step": 31641 }, { "epoch": 0.8124778333582081, "grad_norm": 0.875, "learning_rate": 0.00010972771251936256, "loss": 0.8238, "step": 31642 }, { "epoch": 0.81250351055413, "grad_norm": 0.80078125, "learning_rate": 0.00010972326948020073, "loss": 0.7918, "step": 31643 }, { "epoch": 0.8125291877500518, "grad_norm": 0.80859375, "learning_rate": 0.0001097188264216613, "loss": 0.8129, "step": 31644 }, { "epoch": 0.8125548649459736, "grad_norm": 0.83203125, "learning_rate": 0.00010971438334375313, "loss": 0.8347, "step": 31645 }, { "epoch": 0.8125805421418953, "grad_norm": 0.80078125, "learning_rate": 0.00010970994024648507, "loss": 0.739, "step": 31646 }, { "epoch": 0.8126062193378172, "grad_norm": 0.75390625, "learning_rate": 0.00010970549712986595, "loss": 0.724, "step": 31647 }, { "epoch": 0.812631896533739, "grad_norm": 0.8046875, "learning_rate": 0.00010970105399390471, "loss": 0.8531, "step": 31648 }, { "epoch": 0.8126575737296609, "grad_norm": 0.7265625, "learning_rate": 0.00010969661083861011, "loss": 0.7945, "step": 31649 }, { "epoch": 0.8126832509255827, "grad_norm": 0.71875, "learning_rate": 0.00010969216766399104, "loss": 0.729, "step": 31650 }, { "epoch": 0.8127089281215045, "grad_norm": 0.796875, "learning_rate": 0.00010968772447005636, "loss": 0.711, "step": 31651 }, { "epoch": 0.8127346053174264, "grad_norm": 0.71875, "learning_rate": 0.0001096832812568149, "loss": 0.6993, "step": 31652 }, { "epoch": 0.8127602825133481, "grad_norm": 0.80859375, "learning_rate": 0.00010967883802427556, "loss": 0.7761, "step": 31653 }, { "epoch": 0.8127859597092699, "grad_norm": 0.734375, "learning_rate": 0.00010967439477244719, "loss": 0.7667, "step": 31654 }, { "epoch": 0.8128116369051918, "grad_norm": 0.8125, "learning_rate": 0.00010966995150133859, "loss": 0.7846, "step": 31655 }, { "epoch": 0.8128373141011136, "grad_norm": 0.76953125, "learning_rate": 0.00010966550821095867, "loss": 0.9835, "step": 31656 }, { "epoch": 0.8128629912970354, "grad_norm": 0.734375, "learning_rate": 0.00010966106490131627, "loss": 0.8387, "step": 31657 }, { "epoch": 0.8128886684929573, "grad_norm": 0.83203125, "learning_rate": 0.0001096566215724202, "loss": 0.8227, "step": 31658 }, { "epoch": 0.812914345688879, "grad_norm": 0.8671875, "learning_rate": 0.00010965217822427938, "loss": 0.8409, "step": 31659 }, { "epoch": 0.8129400228848008, "grad_norm": 0.74609375, "learning_rate": 0.00010964773485690265, "loss": 0.7803, "step": 31660 }, { "epoch": 0.8129657000807227, "grad_norm": 0.77734375, "learning_rate": 0.00010964329147029886, "loss": 0.7809, "step": 31661 }, { "epoch": 0.8129913772766445, "grad_norm": 0.79296875, "learning_rate": 0.00010963884806447686, "loss": 0.7866, "step": 31662 }, { "epoch": 0.8130170544725663, "grad_norm": 0.76171875, "learning_rate": 0.00010963440463944547, "loss": 0.7598, "step": 31663 }, { "epoch": 0.8130427316684882, "grad_norm": 0.734375, "learning_rate": 0.00010962996119521362, "loss": 0.7633, "step": 31664 }, { "epoch": 0.81306840886441, "grad_norm": 0.78515625, "learning_rate": 0.0001096255177317901, "loss": 0.8275, "step": 31665 }, { "epoch": 0.8130940860603317, "grad_norm": 0.75390625, "learning_rate": 0.0001096210742491838, "loss": 0.7312, "step": 31666 }, { "epoch": 0.8131197632562536, "grad_norm": 0.8515625, "learning_rate": 0.00010961663074740358, "loss": 0.8513, "step": 31667 }, { "epoch": 0.8131454404521754, "grad_norm": 0.890625, "learning_rate": 0.0001096121872264583, "loss": 0.8312, "step": 31668 }, { "epoch": 0.8131711176480972, "grad_norm": 0.78125, "learning_rate": 0.00010960774368635678, "loss": 0.8782, "step": 31669 }, { "epoch": 0.8131967948440191, "grad_norm": 0.8046875, "learning_rate": 0.00010960330012710789, "loss": 0.8321, "step": 31670 }, { "epoch": 0.8132224720399409, "grad_norm": 0.828125, "learning_rate": 0.00010959885654872049, "loss": 0.8035, "step": 31671 }, { "epoch": 0.8132481492358627, "grad_norm": 0.78515625, "learning_rate": 0.00010959441295120343, "loss": 0.9024, "step": 31672 }, { "epoch": 0.8132738264317845, "grad_norm": 0.72265625, "learning_rate": 0.0001095899693345656, "loss": 0.8272, "step": 31673 }, { "epoch": 0.8132995036277063, "grad_norm": 0.79296875, "learning_rate": 0.00010958552569881577, "loss": 0.7872, "step": 31674 }, { "epoch": 0.8133251808236281, "grad_norm": 0.78515625, "learning_rate": 0.00010958108204396294, "loss": 0.7961, "step": 31675 }, { "epoch": 0.81335085801955, "grad_norm": 0.76953125, "learning_rate": 0.00010957663837001582, "loss": 0.822, "step": 31676 }, { "epoch": 0.8133765352154718, "grad_norm": 0.7578125, "learning_rate": 0.00010957219467698332, "loss": 0.8084, "step": 31677 }, { "epoch": 0.8134022124113937, "grad_norm": 0.796875, "learning_rate": 0.00010956775096487433, "loss": 0.8375, "step": 31678 }, { "epoch": 0.8134278896073154, "grad_norm": 0.8203125, "learning_rate": 0.00010956330723369765, "loss": 0.7925, "step": 31679 }, { "epoch": 0.8134535668032372, "grad_norm": 0.7890625, "learning_rate": 0.00010955886348346218, "loss": 0.9387, "step": 31680 }, { "epoch": 0.813479243999159, "grad_norm": 0.75, "learning_rate": 0.00010955441971417675, "loss": 0.7316, "step": 31681 }, { "epoch": 0.8135049211950809, "grad_norm": 0.87890625, "learning_rate": 0.00010954997592585023, "loss": 0.7918, "step": 31682 }, { "epoch": 0.8135305983910027, "grad_norm": 0.796875, "learning_rate": 0.00010954553211849148, "loss": 0.8128, "step": 31683 }, { "epoch": 0.8135562755869246, "grad_norm": 0.75, "learning_rate": 0.00010954108829210931, "loss": 0.6971, "step": 31684 }, { "epoch": 0.8135819527828464, "grad_norm": 0.78125, "learning_rate": 0.00010953664444671266, "loss": 0.7878, "step": 31685 }, { "epoch": 0.8136076299787681, "grad_norm": 0.79296875, "learning_rate": 0.0001095322005823103, "loss": 0.6638, "step": 31686 }, { "epoch": 0.81363330717469, "grad_norm": 0.78125, "learning_rate": 0.00010952775669891113, "loss": 0.8603, "step": 31687 }, { "epoch": 0.8136589843706118, "grad_norm": 0.77734375, "learning_rate": 0.00010952331279652406, "loss": 0.8221, "step": 31688 }, { "epoch": 0.8136846615665336, "grad_norm": 0.76171875, "learning_rate": 0.00010951886887515781, "loss": 0.7566, "step": 31689 }, { "epoch": 0.8137103387624555, "grad_norm": 0.7734375, "learning_rate": 0.00010951442493482136, "loss": 0.8211, "step": 31690 }, { "epoch": 0.8137360159583773, "grad_norm": 0.8359375, "learning_rate": 0.00010950998097552352, "loss": 0.8516, "step": 31691 }, { "epoch": 0.8137616931542991, "grad_norm": 0.7734375, "learning_rate": 0.00010950553699727311, "loss": 0.6632, "step": 31692 }, { "epoch": 0.8137873703502209, "grad_norm": 0.83984375, "learning_rate": 0.00010950109300007904, "loss": 0.8472, "step": 31693 }, { "epoch": 0.8138130475461427, "grad_norm": 0.74609375, "learning_rate": 0.0001094966489839502, "loss": 0.8266, "step": 31694 }, { "epoch": 0.8138387247420645, "grad_norm": 0.79296875, "learning_rate": 0.00010949220494889534, "loss": 0.809, "step": 31695 }, { "epoch": 0.8138644019379864, "grad_norm": 0.76171875, "learning_rate": 0.00010948776089492338, "loss": 0.8797, "step": 31696 }, { "epoch": 0.8138900791339082, "grad_norm": 0.734375, "learning_rate": 0.00010948331682204318, "loss": 0.8126, "step": 31697 }, { "epoch": 0.81391575632983, "grad_norm": 0.83984375, "learning_rate": 0.00010947887273026358, "loss": 0.8286, "step": 31698 }, { "epoch": 0.8139414335257518, "grad_norm": 0.69921875, "learning_rate": 0.00010947442861959345, "loss": 0.772, "step": 31699 }, { "epoch": 0.8139671107216736, "grad_norm": 0.7890625, "learning_rate": 0.0001094699844900416, "loss": 0.7282, "step": 31700 }, { "epoch": 0.8139927879175954, "grad_norm": 0.8359375, "learning_rate": 0.000109465540341617, "loss": 0.8059, "step": 31701 }, { "epoch": 0.8140184651135173, "grad_norm": 0.75, "learning_rate": 0.0001094610961743284, "loss": 0.6996, "step": 31702 }, { "epoch": 0.8140441423094391, "grad_norm": 0.85546875, "learning_rate": 0.00010945665198818466, "loss": 0.8874, "step": 31703 }, { "epoch": 0.814069819505361, "grad_norm": 0.80078125, "learning_rate": 0.0001094522077831947, "loss": 0.754, "step": 31704 }, { "epoch": 0.8140954967012828, "grad_norm": 0.75, "learning_rate": 0.00010944776355936734, "loss": 0.7967, "step": 31705 }, { "epoch": 0.8141211738972045, "grad_norm": 0.7890625, "learning_rate": 0.00010944331931671141, "loss": 0.8683, "step": 31706 }, { "epoch": 0.8141468510931263, "grad_norm": 0.79296875, "learning_rate": 0.00010943887505523586, "loss": 0.7872, "step": 31707 }, { "epoch": 0.8141725282890482, "grad_norm": 0.84765625, "learning_rate": 0.00010943443077494944, "loss": 0.8023, "step": 31708 }, { "epoch": 0.81419820548497, "grad_norm": 0.82421875, "learning_rate": 0.00010942998647586105, "loss": 0.8372, "step": 31709 }, { "epoch": 0.8142238826808919, "grad_norm": 0.69921875, "learning_rate": 0.00010942554215797955, "loss": 0.7217, "step": 31710 }, { "epoch": 0.8142495598768137, "grad_norm": 0.73046875, "learning_rate": 0.0001094210978213138, "loss": 0.7501, "step": 31711 }, { "epoch": 0.8142752370727355, "grad_norm": 0.78125, "learning_rate": 0.00010941665346587266, "loss": 0.8309, "step": 31712 }, { "epoch": 0.8143009142686572, "grad_norm": 0.71875, "learning_rate": 0.00010941220909166499, "loss": 0.7709, "step": 31713 }, { "epoch": 0.8143265914645791, "grad_norm": 0.8984375, "learning_rate": 0.00010940776469869962, "loss": 0.8853, "step": 31714 }, { "epoch": 0.8143522686605009, "grad_norm": 0.86328125, "learning_rate": 0.00010940332028698543, "loss": 0.9305, "step": 31715 }, { "epoch": 0.8143779458564228, "grad_norm": 0.7734375, "learning_rate": 0.00010939887585653126, "loss": 0.7354, "step": 31716 }, { "epoch": 0.8144036230523446, "grad_norm": 0.8359375, "learning_rate": 0.000109394431407346, "loss": 0.8837, "step": 31717 }, { "epoch": 0.8144293002482664, "grad_norm": 0.83203125, "learning_rate": 0.00010938998693943849, "loss": 0.945, "step": 31718 }, { "epoch": 0.8144549774441882, "grad_norm": 0.76953125, "learning_rate": 0.00010938554245281755, "loss": 0.8581, "step": 31719 }, { "epoch": 0.81448065464011, "grad_norm": 0.73828125, "learning_rate": 0.00010938109794749209, "loss": 0.6963, "step": 31720 }, { "epoch": 0.8145063318360318, "grad_norm": 0.80078125, "learning_rate": 0.00010937665342347096, "loss": 0.8301, "step": 31721 }, { "epoch": 0.8145320090319537, "grad_norm": 0.73046875, "learning_rate": 0.00010937220888076301, "loss": 0.7662, "step": 31722 }, { "epoch": 0.8145576862278755, "grad_norm": 0.796875, "learning_rate": 0.0001093677643193771, "loss": 0.83, "step": 31723 }, { "epoch": 0.8145833634237973, "grad_norm": 0.8359375, "learning_rate": 0.00010936331973932203, "loss": 0.8184, "step": 31724 }, { "epoch": 0.8146090406197192, "grad_norm": 0.76171875, "learning_rate": 0.00010935887514060677, "loss": 0.7024, "step": 31725 }, { "epoch": 0.8146347178156409, "grad_norm": 0.890625, "learning_rate": 0.00010935443052324007, "loss": 0.9734, "step": 31726 }, { "epoch": 0.8146603950115627, "grad_norm": 0.73828125, "learning_rate": 0.00010934998588723089, "loss": 0.8067, "step": 31727 }, { "epoch": 0.8146860722074846, "grad_norm": 0.734375, "learning_rate": 0.000109345541232588, "loss": 0.7632, "step": 31728 }, { "epoch": 0.8147117494034064, "grad_norm": 0.80859375, "learning_rate": 0.00010934109655932028, "loss": 0.7222, "step": 31729 }, { "epoch": 0.8147374265993282, "grad_norm": 0.7578125, "learning_rate": 0.0001093366518674366, "loss": 0.7582, "step": 31730 }, { "epoch": 0.8147631037952501, "grad_norm": 0.7890625, "learning_rate": 0.00010933220715694583, "loss": 0.7545, "step": 31731 }, { "epoch": 0.8147887809911719, "grad_norm": 0.78125, "learning_rate": 0.0001093277624278568, "loss": 0.7338, "step": 31732 }, { "epoch": 0.8148144581870936, "grad_norm": 0.765625, "learning_rate": 0.00010932331768017838, "loss": 0.7214, "step": 31733 }, { "epoch": 0.8148401353830155, "grad_norm": 0.83984375, "learning_rate": 0.00010931887291391949, "loss": 0.9544, "step": 31734 }, { "epoch": 0.8148658125789373, "grad_norm": 0.7890625, "learning_rate": 0.00010931442812908885, "loss": 0.8585, "step": 31735 }, { "epoch": 0.8148914897748591, "grad_norm": 0.80859375, "learning_rate": 0.00010930998332569544, "loss": 0.9245, "step": 31736 }, { "epoch": 0.814917166970781, "grad_norm": 0.73046875, "learning_rate": 0.00010930553850374803, "loss": 0.8314, "step": 31737 }, { "epoch": 0.8149428441667028, "grad_norm": 0.80859375, "learning_rate": 0.00010930109366325557, "loss": 0.7719, "step": 31738 }, { "epoch": 0.8149685213626245, "grad_norm": 0.8046875, "learning_rate": 0.00010929664880422685, "loss": 0.7796, "step": 31739 }, { "epoch": 0.8149941985585464, "grad_norm": 0.73046875, "learning_rate": 0.00010929220392667074, "loss": 0.7802, "step": 31740 }, { "epoch": 0.8150198757544682, "grad_norm": 0.828125, "learning_rate": 0.00010928775903059613, "loss": 0.9524, "step": 31741 }, { "epoch": 0.81504555295039, "grad_norm": 0.84765625, "learning_rate": 0.00010928331411601185, "loss": 0.7878, "step": 31742 }, { "epoch": 0.8150712301463119, "grad_norm": 0.87109375, "learning_rate": 0.00010927886918292676, "loss": 0.8679, "step": 31743 }, { "epoch": 0.8150969073422337, "grad_norm": 0.84375, "learning_rate": 0.0001092744242313497, "loss": 0.8071, "step": 31744 }, { "epoch": 0.8151225845381556, "grad_norm": 0.87109375, "learning_rate": 0.00010926997926128958, "loss": 0.7785, "step": 31745 }, { "epoch": 0.8151482617340773, "grad_norm": 0.80078125, "learning_rate": 0.0001092655342727552, "loss": 0.9033, "step": 31746 }, { "epoch": 0.8151739389299991, "grad_norm": 0.734375, "learning_rate": 0.0001092610892657555, "loss": 0.8866, "step": 31747 }, { "epoch": 0.815199616125921, "grad_norm": 0.78515625, "learning_rate": 0.00010925664424029922, "loss": 0.7253, "step": 31748 }, { "epoch": 0.8152252933218428, "grad_norm": 0.7265625, "learning_rate": 0.00010925219919639533, "loss": 0.8103, "step": 31749 }, { "epoch": 0.8152509705177646, "grad_norm": 0.78125, "learning_rate": 0.00010924775413405264, "loss": 0.7379, "step": 31750 }, { "epoch": 0.8152766477136865, "grad_norm": 0.7734375, "learning_rate": 0.00010924330905327997, "loss": 0.8072, "step": 31751 }, { "epoch": 0.8153023249096083, "grad_norm": 0.68359375, "learning_rate": 0.00010923886395408626, "loss": 0.7383, "step": 31752 }, { "epoch": 0.81532800210553, "grad_norm": 0.7578125, "learning_rate": 0.00010923441883648031, "loss": 0.8002, "step": 31753 }, { "epoch": 0.8153536793014519, "grad_norm": 0.74609375, "learning_rate": 0.00010922997370047104, "loss": 0.7702, "step": 31754 }, { "epoch": 0.8153793564973737, "grad_norm": 0.85546875, "learning_rate": 0.00010922552854606722, "loss": 0.9148, "step": 31755 }, { "epoch": 0.8154050336932955, "grad_norm": 0.7734375, "learning_rate": 0.00010922108337327774, "loss": 0.8039, "step": 31756 }, { "epoch": 0.8154307108892174, "grad_norm": 0.8125, "learning_rate": 0.00010921663818211152, "loss": 0.8812, "step": 31757 }, { "epoch": 0.8154563880851392, "grad_norm": 0.8203125, "learning_rate": 0.00010921219297257734, "loss": 0.8185, "step": 31758 }, { "epoch": 0.8154820652810609, "grad_norm": 0.7890625, "learning_rate": 0.0001092077477446841, "loss": 0.7358, "step": 31759 }, { "epoch": 0.8155077424769828, "grad_norm": 0.7265625, "learning_rate": 0.00010920330249844069, "loss": 0.7804, "step": 31760 }, { "epoch": 0.8155334196729046, "grad_norm": 0.734375, "learning_rate": 0.00010919885723385586, "loss": 0.7741, "step": 31761 }, { "epoch": 0.8155590968688264, "grad_norm": 0.7890625, "learning_rate": 0.00010919441195093858, "loss": 0.717, "step": 31762 }, { "epoch": 0.8155847740647483, "grad_norm": 0.83203125, "learning_rate": 0.00010918996664969768, "loss": 0.7968, "step": 31763 }, { "epoch": 0.8156104512606701, "grad_norm": 0.7734375, "learning_rate": 0.00010918552133014194, "loss": 0.7791, "step": 31764 }, { "epoch": 0.815636128456592, "grad_norm": 0.74609375, "learning_rate": 0.00010918107599228036, "loss": 0.803, "step": 31765 }, { "epoch": 0.8156618056525137, "grad_norm": 0.77734375, "learning_rate": 0.00010917663063612169, "loss": 0.8384, "step": 31766 }, { "epoch": 0.8156874828484355, "grad_norm": 0.765625, "learning_rate": 0.00010917218526167485, "loss": 0.8375, "step": 31767 }, { "epoch": 0.8157131600443573, "grad_norm": 0.72265625, "learning_rate": 0.00010916773986894867, "loss": 0.7206, "step": 31768 }, { "epoch": 0.8157388372402792, "grad_norm": 0.7890625, "learning_rate": 0.00010916329445795197, "loss": 0.7903, "step": 31769 }, { "epoch": 0.815764514436201, "grad_norm": 0.80078125, "learning_rate": 0.00010915884902869366, "loss": 0.889, "step": 31770 }, { "epoch": 0.8157901916321229, "grad_norm": 0.8046875, "learning_rate": 0.00010915440358118263, "loss": 0.8089, "step": 31771 }, { "epoch": 0.8158158688280447, "grad_norm": 0.8359375, "learning_rate": 0.00010914995811542765, "loss": 0.7759, "step": 31772 }, { "epoch": 0.8158415460239664, "grad_norm": 0.82421875, "learning_rate": 0.0001091455126314377, "loss": 0.9072, "step": 31773 }, { "epoch": 0.8158672232198882, "grad_norm": 0.75, "learning_rate": 0.00010914106712922154, "loss": 0.7467, "step": 31774 }, { "epoch": 0.8158929004158101, "grad_norm": 0.8046875, "learning_rate": 0.00010913662160878804, "loss": 0.8, "step": 31775 }, { "epoch": 0.8159185776117319, "grad_norm": 0.90234375, "learning_rate": 0.00010913217607014608, "loss": 0.859, "step": 31776 }, { "epoch": 0.8159442548076538, "grad_norm": 0.88671875, "learning_rate": 0.00010912773051330451, "loss": 0.7376, "step": 31777 }, { "epoch": 0.8159699320035756, "grad_norm": 0.76171875, "learning_rate": 0.00010912328493827221, "loss": 0.7185, "step": 31778 }, { "epoch": 0.8159956091994973, "grad_norm": 0.76171875, "learning_rate": 0.00010911883934505804, "loss": 0.7921, "step": 31779 }, { "epoch": 0.8160212863954192, "grad_norm": 0.8515625, "learning_rate": 0.00010911439373367086, "loss": 0.8326, "step": 31780 }, { "epoch": 0.816046963591341, "grad_norm": 0.79296875, "learning_rate": 0.00010910994810411948, "loss": 0.7805, "step": 31781 }, { "epoch": 0.8160726407872628, "grad_norm": 0.76171875, "learning_rate": 0.00010910550245641283, "loss": 0.6591, "step": 31782 }, { "epoch": 0.8160983179831847, "grad_norm": 0.78125, "learning_rate": 0.00010910105679055968, "loss": 0.8324, "step": 31783 }, { "epoch": 0.8161239951791065, "grad_norm": 0.78515625, "learning_rate": 0.000109096611106569, "loss": 0.8457, "step": 31784 }, { "epoch": 0.8161496723750283, "grad_norm": 0.78515625, "learning_rate": 0.00010909216540444958, "loss": 0.8116, "step": 31785 }, { "epoch": 0.8161753495709501, "grad_norm": 0.75390625, "learning_rate": 0.0001090877196842103, "loss": 0.7475, "step": 31786 }, { "epoch": 0.8162010267668719, "grad_norm": 0.7421875, "learning_rate": 0.00010908327394586001, "loss": 0.7556, "step": 31787 }, { "epoch": 0.8162267039627937, "grad_norm": 0.70703125, "learning_rate": 0.00010907882818940756, "loss": 0.6976, "step": 31788 }, { "epoch": 0.8162523811587156, "grad_norm": 2.546875, "learning_rate": 0.00010907438241486185, "loss": 0.8754, "step": 31789 }, { "epoch": 0.8162780583546374, "grad_norm": 0.74609375, "learning_rate": 0.0001090699366222317, "loss": 0.8245, "step": 31790 }, { "epoch": 0.8163037355505592, "grad_norm": 0.7734375, "learning_rate": 0.00010906549081152597, "loss": 0.7564, "step": 31791 }, { "epoch": 0.8163294127464811, "grad_norm": 0.83984375, "learning_rate": 0.00010906104498275358, "loss": 0.8536, "step": 31792 }, { "epoch": 0.8163550899424028, "grad_norm": 0.8515625, "learning_rate": 0.00010905659913592332, "loss": 0.7415, "step": 31793 }, { "epoch": 0.8163807671383246, "grad_norm": 0.76953125, "learning_rate": 0.00010905215327104408, "loss": 0.7282, "step": 31794 }, { "epoch": 0.8164064443342465, "grad_norm": 0.73828125, "learning_rate": 0.00010904770738812474, "loss": 0.827, "step": 31795 }, { "epoch": 0.8164321215301683, "grad_norm": 0.75390625, "learning_rate": 0.00010904326148717407, "loss": 0.7895, "step": 31796 }, { "epoch": 0.8164577987260901, "grad_norm": 0.734375, "learning_rate": 0.00010903881556820106, "loss": 0.7554, "step": 31797 }, { "epoch": 0.816483475922012, "grad_norm": 0.75390625, "learning_rate": 0.00010903436963121447, "loss": 0.8177, "step": 31798 }, { "epoch": 0.8165091531179337, "grad_norm": 0.76953125, "learning_rate": 0.00010902992367622322, "loss": 0.7212, "step": 31799 }, { "epoch": 0.8165348303138555, "grad_norm": 0.796875, "learning_rate": 0.00010902547770323615, "loss": 0.7475, "step": 31800 }, { "epoch": 0.8165605075097774, "grad_norm": 0.7578125, "learning_rate": 0.0001090210317122621, "loss": 0.7415, "step": 31801 }, { "epoch": 0.8165861847056992, "grad_norm": 0.76953125, "learning_rate": 0.00010901658570330995, "loss": 0.7083, "step": 31802 }, { "epoch": 0.816611861901621, "grad_norm": 0.7734375, "learning_rate": 0.00010901213967638857, "loss": 0.8747, "step": 31803 }, { "epoch": 0.8166375390975429, "grad_norm": 0.8125, "learning_rate": 0.0001090076936315068, "loss": 0.7532, "step": 31804 }, { "epoch": 0.8166632162934647, "grad_norm": 0.890625, "learning_rate": 0.00010900324756867353, "loss": 0.7721, "step": 31805 }, { "epoch": 0.8166888934893864, "grad_norm": 0.73046875, "learning_rate": 0.00010899880148789758, "loss": 0.787, "step": 31806 }, { "epoch": 0.8167145706853083, "grad_norm": 0.8046875, "learning_rate": 0.00010899435538918785, "loss": 0.7857, "step": 31807 }, { "epoch": 0.8167402478812301, "grad_norm": 0.78125, "learning_rate": 0.00010898990927255318, "loss": 0.7307, "step": 31808 }, { "epoch": 0.816765925077152, "grad_norm": 0.8046875, "learning_rate": 0.0001089854631380024, "loss": 0.7699, "step": 31809 }, { "epoch": 0.8167916022730738, "grad_norm": 0.83984375, "learning_rate": 0.00010898101698554443, "loss": 0.9532, "step": 31810 }, { "epoch": 0.8168172794689956, "grad_norm": 0.8046875, "learning_rate": 0.00010897657081518811, "loss": 0.8197, "step": 31811 }, { "epoch": 0.8168429566649174, "grad_norm": 0.765625, "learning_rate": 0.00010897212462694226, "loss": 0.8464, "step": 31812 }, { "epoch": 0.8168686338608392, "grad_norm": 0.8046875, "learning_rate": 0.00010896767842081584, "loss": 0.8159, "step": 31813 }, { "epoch": 0.816894311056761, "grad_norm": 0.90234375, "learning_rate": 0.0001089632321968176, "loss": 0.7748, "step": 31814 }, { "epoch": 0.8169199882526829, "grad_norm": 0.83984375, "learning_rate": 0.00010895878595495645, "loss": 0.6935, "step": 31815 }, { "epoch": 0.8169456654486047, "grad_norm": 0.71484375, "learning_rate": 0.00010895433969524127, "loss": 0.6898, "step": 31816 }, { "epoch": 0.8169713426445265, "grad_norm": 0.76953125, "learning_rate": 0.00010894989341768088, "loss": 0.8052, "step": 31817 }, { "epoch": 0.8169970198404484, "grad_norm": 0.796875, "learning_rate": 0.00010894544712228417, "loss": 0.8591, "step": 31818 }, { "epoch": 0.8170226970363701, "grad_norm": 0.8515625, "learning_rate": 0.00010894100080906, "loss": 0.8335, "step": 31819 }, { "epoch": 0.8170483742322919, "grad_norm": 0.81640625, "learning_rate": 0.0001089365544780172, "loss": 0.7582, "step": 31820 }, { "epoch": 0.8170740514282138, "grad_norm": 0.8359375, "learning_rate": 0.00010893210812916466, "loss": 0.9124, "step": 31821 }, { "epoch": 0.8170997286241356, "grad_norm": 0.7109375, "learning_rate": 0.00010892766176251125, "loss": 0.7545, "step": 31822 }, { "epoch": 0.8171254058200574, "grad_norm": 0.8359375, "learning_rate": 0.0001089232153780658, "loss": 0.7767, "step": 31823 }, { "epoch": 0.8171510830159793, "grad_norm": 0.796875, "learning_rate": 0.00010891876897583719, "loss": 0.7228, "step": 31824 }, { "epoch": 0.8171767602119011, "grad_norm": 0.80078125, "learning_rate": 0.00010891432255583426, "loss": 0.9184, "step": 31825 }, { "epoch": 0.8172024374078228, "grad_norm": 0.8359375, "learning_rate": 0.00010890987611806595, "loss": 0.8652, "step": 31826 }, { "epoch": 0.8172281146037447, "grad_norm": 0.77734375, "learning_rate": 0.00010890542966254105, "loss": 0.8274, "step": 31827 }, { "epoch": 0.8172537917996665, "grad_norm": 0.79296875, "learning_rate": 0.00010890098318926838, "loss": 0.8636, "step": 31828 }, { "epoch": 0.8172794689955883, "grad_norm": 0.75, "learning_rate": 0.0001088965366982569, "loss": 0.8004, "step": 31829 }, { "epoch": 0.8173051461915102, "grad_norm": 0.83984375, "learning_rate": 0.0001088920901895154, "loss": 0.8317, "step": 31830 }, { "epoch": 0.817330823387432, "grad_norm": 0.79296875, "learning_rate": 0.00010888764366305278, "loss": 1.1532, "step": 31831 }, { "epoch": 0.8173565005833537, "grad_norm": 0.71484375, "learning_rate": 0.00010888319711887791, "loss": 0.8582, "step": 31832 }, { "epoch": 0.8173821777792756, "grad_norm": 0.828125, "learning_rate": 0.00010887875055699959, "loss": 0.9292, "step": 31833 }, { "epoch": 0.8174078549751974, "grad_norm": 0.88671875, "learning_rate": 0.00010887430397742673, "loss": 0.8865, "step": 31834 }, { "epoch": 0.8174335321711192, "grad_norm": 0.87890625, "learning_rate": 0.00010886985738016822, "loss": 0.9082, "step": 31835 }, { "epoch": 0.8174592093670411, "grad_norm": 0.71875, "learning_rate": 0.00010886541076523282, "loss": 0.7573, "step": 31836 }, { "epoch": 0.8174848865629629, "grad_norm": 0.75, "learning_rate": 0.00010886096413262951, "loss": 0.7755, "step": 31837 }, { "epoch": 0.8175105637588848, "grad_norm": 0.77734375, "learning_rate": 0.00010885651748236709, "loss": 0.8099, "step": 31838 }, { "epoch": 0.8175362409548065, "grad_norm": 0.765625, "learning_rate": 0.00010885207081445443, "loss": 0.801, "step": 31839 }, { "epoch": 0.8175619181507283, "grad_norm": 0.87109375, "learning_rate": 0.0001088476241289004, "loss": 0.8802, "step": 31840 }, { "epoch": 0.8175875953466502, "grad_norm": 0.734375, "learning_rate": 0.00010884317742571381, "loss": 0.8206, "step": 31841 }, { "epoch": 0.817613272542572, "grad_norm": 1.0546875, "learning_rate": 0.00010883873070490362, "loss": 0.8417, "step": 31842 }, { "epoch": 0.8176389497384938, "grad_norm": 0.796875, "learning_rate": 0.00010883428396647861, "loss": 0.7533, "step": 31843 }, { "epoch": 0.8176646269344157, "grad_norm": 0.74609375, "learning_rate": 0.00010882983721044765, "loss": 0.8186, "step": 31844 }, { "epoch": 0.8176903041303375, "grad_norm": 0.80078125, "learning_rate": 0.00010882539043681968, "loss": 0.8277, "step": 31845 }, { "epoch": 0.8177159813262592, "grad_norm": 0.828125, "learning_rate": 0.00010882094364560346, "loss": 0.8853, "step": 31846 }, { "epoch": 0.8177416585221811, "grad_norm": 1.0390625, "learning_rate": 0.00010881649683680792, "loss": 0.7916, "step": 31847 }, { "epoch": 0.8177673357181029, "grad_norm": 0.8125, "learning_rate": 0.0001088120500104419, "loss": 0.8797, "step": 31848 }, { "epoch": 0.8177930129140247, "grad_norm": 0.80859375, "learning_rate": 0.00010880760316651422, "loss": 0.7661, "step": 31849 }, { "epoch": 0.8178186901099466, "grad_norm": 0.80078125, "learning_rate": 0.00010880315630503381, "loss": 0.9016, "step": 31850 }, { "epoch": 0.8178443673058684, "grad_norm": 0.7578125, "learning_rate": 0.00010879870942600951, "loss": 0.7157, "step": 31851 }, { "epoch": 0.8178700445017901, "grad_norm": 0.73828125, "learning_rate": 0.0001087942625294502, "loss": 0.8141, "step": 31852 }, { "epoch": 0.817895721697712, "grad_norm": 0.74609375, "learning_rate": 0.00010878981561536468, "loss": 0.8027, "step": 31853 }, { "epoch": 0.8179213988936338, "grad_norm": 0.7109375, "learning_rate": 0.00010878536868376186, "loss": 0.7814, "step": 31854 }, { "epoch": 0.8179470760895556, "grad_norm": 0.7734375, "learning_rate": 0.0001087809217346506, "loss": 0.6808, "step": 31855 }, { "epoch": 0.8179727532854775, "grad_norm": 0.73046875, "learning_rate": 0.00010877647476803976, "loss": 0.7224, "step": 31856 }, { "epoch": 0.8179984304813993, "grad_norm": 0.80859375, "learning_rate": 0.00010877202778393816, "loss": 0.9234, "step": 31857 }, { "epoch": 0.8180241076773211, "grad_norm": 0.7421875, "learning_rate": 0.00010876758078235474, "loss": 0.8324, "step": 31858 }, { "epoch": 0.8180497848732429, "grad_norm": 0.77734375, "learning_rate": 0.00010876313376329835, "loss": 0.8399, "step": 31859 }, { "epoch": 0.8180754620691647, "grad_norm": 0.71484375, "learning_rate": 0.00010875868672677776, "loss": 0.8002, "step": 31860 }, { "epoch": 0.8181011392650865, "grad_norm": 0.890625, "learning_rate": 0.00010875423967280193, "loss": 0.9219, "step": 31861 }, { "epoch": 0.8181268164610084, "grad_norm": 0.8828125, "learning_rate": 0.0001087497926013797, "loss": 0.7553, "step": 31862 }, { "epoch": 0.8181524936569302, "grad_norm": 0.77734375, "learning_rate": 0.00010874534551251991, "loss": 0.7808, "step": 31863 }, { "epoch": 0.818178170852852, "grad_norm": 0.828125, "learning_rate": 0.00010874089840623145, "loss": 0.8846, "step": 31864 }, { "epoch": 0.8182038480487739, "grad_norm": 0.828125, "learning_rate": 0.00010873645128252316, "loss": 0.8293, "step": 31865 }, { "epoch": 0.8182295252446956, "grad_norm": 0.78125, "learning_rate": 0.00010873200414140392, "loss": 0.8822, "step": 31866 }, { "epoch": 0.8182552024406174, "grad_norm": 0.80859375, "learning_rate": 0.00010872755698288258, "loss": 0.763, "step": 31867 }, { "epoch": 0.8182808796365393, "grad_norm": 0.76953125, "learning_rate": 0.00010872310980696798, "loss": 0.8715, "step": 31868 }, { "epoch": 0.8183065568324611, "grad_norm": 0.8671875, "learning_rate": 0.00010871866261366904, "loss": 0.7513, "step": 31869 }, { "epoch": 0.818332234028383, "grad_norm": 0.7265625, "learning_rate": 0.0001087142154029946, "loss": 0.7336, "step": 31870 }, { "epoch": 0.8183579112243048, "grad_norm": 0.796875, "learning_rate": 0.00010870976817495349, "loss": 0.829, "step": 31871 }, { "epoch": 0.8183835884202265, "grad_norm": 0.75390625, "learning_rate": 0.00010870532092955463, "loss": 0.8279, "step": 31872 }, { "epoch": 0.8184092656161484, "grad_norm": 0.80859375, "learning_rate": 0.00010870087366680681, "loss": 0.7112, "step": 31873 }, { "epoch": 0.8184349428120702, "grad_norm": 0.8515625, "learning_rate": 0.00010869642638671897, "loss": 0.897, "step": 31874 }, { "epoch": 0.818460620007992, "grad_norm": 0.79296875, "learning_rate": 0.00010869197908929993, "loss": 0.7578, "step": 31875 }, { "epoch": 0.8184862972039139, "grad_norm": 0.765625, "learning_rate": 0.00010868753177455852, "loss": 0.7516, "step": 31876 }, { "epoch": 0.8185119743998357, "grad_norm": 0.7890625, "learning_rate": 0.0001086830844425037, "loss": 0.8638, "step": 31877 }, { "epoch": 0.8185376515957575, "grad_norm": 0.84765625, "learning_rate": 0.00010867863709314425, "loss": 0.6715, "step": 31878 }, { "epoch": 0.8185633287916793, "grad_norm": 0.77734375, "learning_rate": 0.00010867418972648909, "loss": 0.7824, "step": 31879 }, { "epoch": 0.8185890059876011, "grad_norm": 0.83203125, "learning_rate": 0.00010866974234254704, "loss": 0.8563, "step": 31880 }, { "epoch": 0.8186146831835229, "grad_norm": 0.796875, "learning_rate": 0.00010866529494132694, "loss": 0.8257, "step": 31881 }, { "epoch": 0.8186403603794448, "grad_norm": 0.77734375, "learning_rate": 0.00010866084752283771, "loss": 0.7851, "step": 31882 }, { "epoch": 0.8186660375753666, "grad_norm": 0.8671875, "learning_rate": 0.00010865640008708821, "loss": 0.9331, "step": 31883 }, { "epoch": 0.8186917147712884, "grad_norm": 0.73828125, "learning_rate": 0.00010865195263408727, "loss": 0.7456, "step": 31884 }, { "epoch": 0.8187173919672103, "grad_norm": 0.75390625, "learning_rate": 0.00010864750516384381, "loss": 0.8787, "step": 31885 }, { "epoch": 0.818743069163132, "grad_norm": 0.83984375, "learning_rate": 0.0001086430576763666, "loss": 0.8721, "step": 31886 }, { "epoch": 0.8187687463590538, "grad_norm": 0.71484375, "learning_rate": 0.00010863861017166459, "loss": 0.6906, "step": 31887 }, { "epoch": 0.8187944235549757, "grad_norm": 0.828125, "learning_rate": 0.0001086341626497466, "loss": 0.8131, "step": 31888 }, { "epoch": 0.8188201007508975, "grad_norm": 0.75390625, "learning_rate": 0.00010862971511062148, "loss": 0.7613, "step": 31889 }, { "epoch": 0.8188457779468193, "grad_norm": 0.83203125, "learning_rate": 0.00010862526755429813, "loss": 0.8202, "step": 31890 }, { "epoch": 0.8188714551427412, "grad_norm": 0.75390625, "learning_rate": 0.00010862081998078543, "loss": 0.8505, "step": 31891 }, { "epoch": 0.8188971323386629, "grad_norm": 0.80859375, "learning_rate": 0.0001086163723900922, "loss": 0.7806, "step": 31892 }, { "epoch": 0.8189228095345847, "grad_norm": 0.74609375, "learning_rate": 0.0001086119247822273, "loss": 0.8795, "step": 31893 }, { "epoch": 0.8189484867305066, "grad_norm": 0.84765625, "learning_rate": 0.00010860747715719962, "loss": 0.8107, "step": 31894 }, { "epoch": 0.8189741639264284, "grad_norm": 0.85546875, "learning_rate": 0.00010860302951501802, "loss": 0.8831, "step": 31895 }, { "epoch": 0.8189998411223502, "grad_norm": 0.78515625, "learning_rate": 0.00010859858185569136, "loss": 0.8825, "step": 31896 }, { "epoch": 0.8190255183182721, "grad_norm": 0.8046875, "learning_rate": 0.0001085941341792285, "loss": 0.902, "step": 31897 }, { "epoch": 0.8190511955141939, "grad_norm": 0.734375, "learning_rate": 0.00010858968648563834, "loss": 0.7937, "step": 31898 }, { "epoch": 0.8190768727101156, "grad_norm": 0.74609375, "learning_rate": 0.00010858523877492967, "loss": 0.7035, "step": 31899 }, { "epoch": 0.8191025499060375, "grad_norm": 0.73046875, "learning_rate": 0.00010858079104711138, "loss": 0.7809, "step": 31900 }, { "epoch": 0.8191282271019593, "grad_norm": 0.9453125, "learning_rate": 0.00010857634330219239, "loss": 0.7158, "step": 31901 }, { "epoch": 0.8191539042978812, "grad_norm": 0.8515625, "learning_rate": 0.00010857189554018148, "loss": 0.8631, "step": 31902 }, { "epoch": 0.819179581493803, "grad_norm": 0.78125, "learning_rate": 0.0001085674477610876, "loss": 0.8565, "step": 31903 }, { "epoch": 0.8192052586897248, "grad_norm": 0.828125, "learning_rate": 0.00010856299996491954, "loss": 0.8467, "step": 31904 }, { "epoch": 0.8192309358856467, "grad_norm": 0.8046875, "learning_rate": 0.00010855855215168622, "loss": 0.9383, "step": 31905 }, { "epoch": 0.8192566130815684, "grad_norm": 0.79296875, "learning_rate": 0.00010855410432139648, "loss": 0.8057, "step": 31906 }, { "epoch": 0.8192822902774902, "grad_norm": 0.76953125, "learning_rate": 0.00010854965647405918, "loss": 0.6829, "step": 31907 }, { "epoch": 0.8193079674734121, "grad_norm": 0.76953125, "learning_rate": 0.00010854520860968316, "loss": 0.7472, "step": 31908 }, { "epoch": 0.8193336446693339, "grad_norm": 0.86328125, "learning_rate": 0.00010854076072827733, "loss": 0.8328, "step": 31909 }, { "epoch": 0.8193593218652557, "grad_norm": 0.76953125, "learning_rate": 0.00010853631282985053, "loss": 0.9002, "step": 31910 }, { "epoch": 0.8193849990611776, "grad_norm": 0.765625, "learning_rate": 0.00010853186491441162, "loss": 0.7115, "step": 31911 }, { "epoch": 0.8194106762570993, "grad_norm": 0.765625, "learning_rate": 0.00010852741698196951, "loss": 0.7548, "step": 31912 }, { "epoch": 0.8194363534530211, "grad_norm": 0.6796875, "learning_rate": 0.00010852296903253298, "loss": 0.8092, "step": 31913 }, { "epoch": 0.819462030648943, "grad_norm": 0.796875, "learning_rate": 0.00010851852106611096, "loss": 0.716, "step": 31914 }, { "epoch": 0.8194877078448648, "grad_norm": 0.6875, "learning_rate": 0.00010851407308271231, "loss": 0.6818, "step": 31915 }, { "epoch": 0.8195133850407866, "grad_norm": 0.86328125, "learning_rate": 0.00010850962508234586, "loss": 0.7548, "step": 31916 }, { "epoch": 0.8195390622367085, "grad_norm": 0.88671875, "learning_rate": 0.00010850517706502053, "loss": 0.9745, "step": 31917 }, { "epoch": 0.8195647394326303, "grad_norm": 0.83203125, "learning_rate": 0.00010850072903074512, "loss": 0.8075, "step": 31918 }, { "epoch": 0.819590416628552, "grad_norm": 0.8125, "learning_rate": 0.00010849628097952856, "loss": 0.893, "step": 31919 }, { "epoch": 0.8196160938244739, "grad_norm": 0.7890625, "learning_rate": 0.00010849183291137965, "loss": 0.8976, "step": 31920 }, { "epoch": 0.8196417710203957, "grad_norm": 0.7421875, "learning_rate": 0.00010848738482630727, "loss": 0.8737, "step": 31921 }, { "epoch": 0.8196674482163175, "grad_norm": 0.83984375, "learning_rate": 0.00010848293672432033, "loss": 0.8027, "step": 31922 }, { "epoch": 0.8196931254122394, "grad_norm": 0.70703125, "learning_rate": 0.00010847848860542764, "loss": 0.6459, "step": 31923 }, { "epoch": 0.8197188026081612, "grad_norm": 0.83203125, "learning_rate": 0.00010847404046963812, "loss": 0.7747, "step": 31924 }, { "epoch": 0.819744479804083, "grad_norm": 0.87109375, "learning_rate": 0.00010846959231696057, "loss": 0.7203, "step": 31925 }, { "epoch": 0.8197701570000048, "grad_norm": 0.79296875, "learning_rate": 0.00010846514414740389, "loss": 0.7178, "step": 31926 }, { "epoch": 0.8197958341959266, "grad_norm": 0.94921875, "learning_rate": 0.00010846069596097696, "loss": 0.7692, "step": 31927 }, { "epoch": 0.8198215113918484, "grad_norm": 0.85546875, "learning_rate": 0.00010845624775768861, "loss": 0.646, "step": 31928 }, { "epoch": 0.8198471885877703, "grad_norm": 0.84375, "learning_rate": 0.00010845179953754771, "loss": 0.936, "step": 31929 }, { "epoch": 0.8198728657836921, "grad_norm": 0.734375, "learning_rate": 0.00010844735130056318, "loss": 0.721, "step": 31930 }, { "epoch": 0.819898542979614, "grad_norm": 0.7890625, "learning_rate": 0.00010844290304674383, "loss": 0.8613, "step": 31931 }, { "epoch": 0.8199242201755357, "grad_norm": 0.7890625, "learning_rate": 0.00010843845477609853, "loss": 0.7167, "step": 31932 }, { "epoch": 0.8199498973714575, "grad_norm": 0.796875, "learning_rate": 0.00010843400648863615, "loss": 0.8414, "step": 31933 }, { "epoch": 0.8199755745673794, "grad_norm": 3.421875, "learning_rate": 0.00010842955818436554, "loss": 0.8029, "step": 31934 }, { "epoch": 0.8200012517633012, "grad_norm": 0.7265625, "learning_rate": 0.00010842510986329559, "loss": 0.7132, "step": 31935 }, { "epoch": 0.820026928959223, "grad_norm": 0.8359375, "learning_rate": 0.00010842066152543517, "loss": 0.7542, "step": 31936 }, { "epoch": 0.8200526061551449, "grad_norm": 0.76171875, "learning_rate": 0.00010841621317079311, "loss": 0.6575, "step": 31937 }, { "epoch": 0.8200782833510667, "grad_norm": 0.76953125, "learning_rate": 0.00010841176479937836, "loss": 0.8672, "step": 31938 }, { "epoch": 0.8201039605469884, "grad_norm": 0.76171875, "learning_rate": 0.00010840731641119966, "loss": 0.7921, "step": 31939 }, { "epoch": 0.8201296377429103, "grad_norm": 0.734375, "learning_rate": 0.00010840286800626595, "loss": 0.7512, "step": 31940 }, { "epoch": 0.8201553149388321, "grad_norm": 0.80859375, "learning_rate": 0.0001083984195845861, "loss": 0.7581, "step": 31941 }, { "epoch": 0.8201809921347539, "grad_norm": 0.8671875, "learning_rate": 0.00010839397114616891, "loss": 0.7654, "step": 31942 }, { "epoch": 0.8202066693306758, "grad_norm": 0.87890625, "learning_rate": 0.00010838952269102337, "loss": 0.8391, "step": 31943 }, { "epoch": 0.8202323465265976, "grad_norm": 0.765625, "learning_rate": 0.00010838507421915824, "loss": 0.6914, "step": 31944 }, { "epoch": 0.8202580237225194, "grad_norm": 0.7421875, "learning_rate": 0.00010838062573058238, "loss": 0.7903, "step": 31945 }, { "epoch": 0.8202837009184412, "grad_norm": 0.7109375, "learning_rate": 0.00010837617722530474, "loss": 0.7295, "step": 31946 }, { "epoch": 0.820309378114363, "grad_norm": 0.8203125, "learning_rate": 0.0001083717287033341, "loss": 0.7429, "step": 31947 }, { "epoch": 0.8203350553102848, "grad_norm": 0.8125, "learning_rate": 0.00010836728016467934, "loss": 0.8899, "step": 31948 }, { "epoch": 0.8203607325062067, "grad_norm": 0.8125, "learning_rate": 0.0001083628316093494, "loss": 0.8482, "step": 31949 }, { "epoch": 0.8203864097021285, "grad_norm": 0.8125, "learning_rate": 0.00010835838303735304, "loss": 0.8212, "step": 31950 }, { "epoch": 0.8204120868980503, "grad_norm": 0.79296875, "learning_rate": 0.00010835393444869923, "loss": 0.8915, "step": 31951 }, { "epoch": 0.8204377640939721, "grad_norm": 0.84765625, "learning_rate": 0.00010834948584339677, "loss": 0.7732, "step": 31952 }, { "epoch": 0.8204634412898939, "grad_norm": 0.73046875, "learning_rate": 0.00010834503722145451, "loss": 0.7118, "step": 31953 }, { "epoch": 0.8204891184858157, "grad_norm": 0.78515625, "learning_rate": 0.00010834058858288136, "loss": 0.7737, "step": 31954 }, { "epoch": 0.8205147956817376, "grad_norm": 0.76953125, "learning_rate": 0.00010833613992768619, "loss": 0.8131, "step": 31955 }, { "epoch": 0.8205404728776594, "grad_norm": 0.828125, "learning_rate": 0.0001083316912558778, "loss": 0.9175, "step": 31956 }, { "epoch": 0.8205661500735812, "grad_norm": 0.76953125, "learning_rate": 0.00010832724256746516, "loss": 0.8171, "step": 31957 }, { "epoch": 0.8205918272695031, "grad_norm": 0.77734375, "learning_rate": 0.00010832279386245702, "loss": 0.8443, "step": 31958 }, { "epoch": 0.8206175044654248, "grad_norm": 0.85546875, "learning_rate": 0.00010831834514086237, "loss": 0.8395, "step": 31959 }, { "epoch": 0.8206431816613466, "grad_norm": 0.8046875, "learning_rate": 0.00010831389640268997, "loss": 0.881, "step": 31960 }, { "epoch": 0.8206688588572685, "grad_norm": 0.81640625, "learning_rate": 0.00010830944764794871, "loss": 0.7472, "step": 31961 }, { "epoch": 0.8206945360531903, "grad_norm": 0.8203125, "learning_rate": 0.00010830499887664749, "loss": 0.8018, "step": 31962 }, { "epoch": 0.8207202132491122, "grad_norm": 0.79296875, "learning_rate": 0.00010830055008879516, "loss": 0.8236, "step": 31963 }, { "epoch": 0.820745890445034, "grad_norm": 0.83984375, "learning_rate": 0.00010829610128440058, "loss": 0.8639, "step": 31964 }, { "epoch": 0.8207715676409558, "grad_norm": 0.890625, "learning_rate": 0.00010829165246347264, "loss": 0.8926, "step": 31965 }, { "epoch": 0.8207972448368775, "grad_norm": 1.0, "learning_rate": 0.00010828720362602014, "loss": 0.8875, "step": 31966 }, { "epoch": 0.8208229220327994, "grad_norm": 0.7890625, "learning_rate": 0.000108282754772052, "loss": 0.9123, "step": 31967 }, { "epoch": 0.8208485992287212, "grad_norm": 0.7734375, "learning_rate": 0.00010827830590157712, "loss": 0.8669, "step": 31968 }, { "epoch": 0.8208742764246431, "grad_norm": 0.71875, "learning_rate": 0.00010827385701460429, "loss": 0.8888, "step": 31969 }, { "epoch": 0.8208999536205649, "grad_norm": 0.83203125, "learning_rate": 0.00010826940811114243, "loss": 0.8535, "step": 31970 }, { "epoch": 0.8209256308164867, "grad_norm": 0.79296875, "learning_rate": 0.00010826495919120036, "loss": 0.7879, "step": 31971 }, { "epoch": 0.8209513080124085, "grad_norm": 0.8203125, "learning_rate": 0.000108260510254787, "loss": 0.7997, "step": 31972 }, { "epoch": 0.8209769852083303, "grad_norm": 0.72265625, "learning_rate": 0.00010825606130191117, "loss": 0.8259, "step": 31973 }, { "epoch": 0.8210026624042521, "grad_norm": 0.75, "learning_rate": 0.00010825161233258177, "loss": 0.818, "step": 31974 }, { "epoch": 0.821028339600174, "grad_norm": 0.73046875, "learning_rate": 0.00010824716334680765, "loss": 0.6897, "step": 31975 }, { "epoch": 0.8210540167960958, "grad_norm": 0.79296875, "learning_rate": 0.00010824271434459767, "loss": 0.8567, "step": 31976 }, { "epoch": 0.8210796939920176, "grad_norm": 0.82421875, "learning_rate": 0.00010823826532596071, "loss": 0.74, "step": 31977 }, { "epoch": 0.8211053711879395, "grad_norm": 0.80078125, "learning_rate": 0.00010823381629090563, "loss": 0.6991, "step": 31978 }, { "epoch": 0.8211310483838612, "grad_norm": 0.8125, "learning_rate": 0.00010822936723944129, "loss": 0.6765, "step": 31979 }, { "epoch": 0.821156725579783, "grad_norm": 0.8671875, "learning_rate": 0.00010822491817157656, "loss": 0.8661, "step": 31980 }, { "epoch": 0.8211824027757049, "grad_norm": 0.75390625, "learning_rate": 0.00010822046908732033, "loss": 0.752, "step": 31981 }, { "epoch": 0.8212080799716267, "grad_norm": 0.8671875, "learning_rate": 0.00010821601998668143, "loss": 0.8659, "step": 31982 }, { "epoch": 0.8212337571675485, "grad_norm": 0.80078125, "learning_rate": 0.00010821157086966875, "loss": 0.8424, "step": 31983 }, { "epoch": 0.8212594343634704, "grad_norm": 0.8203125, "learning_rate": 0.00010820712173629119, "loss": 0.8598, "step": 31984 }, { "epoch": 0.8212851115593922, "grad_norm": 0.7890625, "learning_rate": 0.00010820267258655754, "loss": 0.9003, "step": 31985 }, { "epoch": 0.8213107887553139, "grad_norm": 0.80859375, "learning_rate": 0.00010819822342047672, "loss": 0.9167, "step": 31986 }, { "epoch": 0.8213364659512358, "grad_norm": 0.765625, "learning_rate": 0.00010819377423805755, "loss": 0.9082, "step": 31987 }, { "epoch": 0.8213621431471576, "grad_norm": 0.8203125, "learning_rate": 0.00010818932503930895, "loss": 0.86, "step": 31988 }, { "epoch": 0.8213878203430794, "grad_norm": 0.81640625, "learning_rate": 0.00010818487582423977, "loss": 0.8929, "step": 31989 }, { "epoch": 0.8214134975390013, "grad_norm": 0.7890625, "learning_rate": 0.00010818042659285885, "loss": 0.857, "step": 31990 }, { "epoch": 0.8214391747349231, "grad_norm": 0.72265625, "learning_rate": 0.0001081759773451751, "loss": 0.7665, "step": 31991 }, { "epoch": 0.8214648519308448, "grad_norm": 0.796875, "learning_rate": 0.00010817152808119737, "loss": 0.774, "step": 31992 }, { "epoch": 0.8214905291267667, "grad_norm": 0.95703125, "learning_rate": 0.00010816707880093451, "loss": 0.83, "step": 31993 }, { "epoch": 0.8215162063226885, "grad_norm": 0.828125, "learning_rate": 0.0001081626295043954, "loss": 0.9938, "step": 31994 }, { "epoch": 0.8215418835186103, "grad_norm": 0.83984375, "learning_rate": 0.00010815818019158892, "loss": 0.8117, "step": 31995 }, { "epoch": 0.8215675607145322, "grad_norm": 0.67578125, "learning_rate": 0.0001081537308625239, "loss": 0.6413, "step": 31996 }, { "epoch": 0.821593237910454, "grad_norm": 0.8125, "learning_rate": 0.00010814928151720927, "loss": 0.6991, "step": 31997 }, { "epoch": 0.8216189151063759, "grad_norm": 0.796875, "learning_rate": 0.0001081448321556538, "loss": 0.818, "step": 31998 }, { "epoch": 0.8216445923022976, "grad_norm": 0.73046875, "learning_rate": 0.00010814038277786644, "loss": 0.7625, "step": 31999 }, { "epoch": 0.8216702694982194, "grad_norm": 0.984375, "learning_rate": 0.00010813593338385605, "loss": 0.784, "step": 32000 }, { "epoch": 0.8216702694982194, "eval_loss": 0.8049712181091309, "eval_runtime": 354.7977, "eval_samples_per_second": 28.185, "eval_steps_per_second": 0.882, "step": 32000 }, { "epoch": 0.8216959466941413, "grad_norm": 0.7109375, "learning_rate": 0.00010813148397363146, "loss": 0.7735, "step": 32001 }, { "epoch": 0.8217216238900631, "grad_norm": 0.8515625, "learning_rate": 0.00010812703454720157, "loss": 0.7665, "step": 32002 }, { "epoch": 0.8217473010859849, "grad_norm": 0.8359375, "learning_rate": 0.00010812258510457523, "loss": 0.9308, "step": 32003 }, { "epoch": 0.8217729782819068, "grad_norm": 0.7421875, "learning_rate": 0.00010811813564576132, "loss": 0.8966, "step": 32004 }, { "epoch": 0.8217986554778286, "grad_norm": 0.8046875, "learning_rate": 0.0001081136861707687, "loss": 0.8567, "step": 32005 }, { "epoch": 0.8218243326737503, "grad_norm": 0.828125, "learning_rate": 0.0001081092366796062, "loss": 0.8934, "step": 32006 }, { "epoch": 0.8218500098696722, "grad_norm": 0.77734375, "learning_rate": 0.00010810478717228277, "loss": 0.7904, "step": 32007 }, { "epoch": 0.821875687065594, "grad_norm": 0.7890625, "learning_rate": 0.0001081003376488072, "loss": 0.8318, "step": 32008 }, { "epoch": 0.8219013642615158, "grad_norm": 0.8125, "learning_rate": 0.00010809588810918838, "loss": 0.9321, "step": 32009 }, { "epoch": 0.8219270414574377, "grad_norm": 0.8046875, "learning_rate": 0.00010809143855343524, "loss": 0.7949, "step": 32010 }, { "epoch": 0.8219527186533595, "grad_norm": 0.72265625, "learning_rate": 0.00010808698898155654, "loss": 0.8113, "step": 32011 }, { "epoch": 0.8219783958492812, "grad_norm": 0.73046875, "learning_rate": 0.00010808253939356123, "loss": 0.8532, "step": 32012 }, { "epoch": 0.8220040730452031, "grad_norm": 0.7734375, "learning_rate": 0.00010807808978945815, "loss": 0.7903, "step": 32013 }, { "epoch": 0.8220297502411249, "grad_norm": 0.78125, "learning_rate": 0.00010807364016925614, "loss": 0.6978, "step": 32014 }, { "epoch": 0.8220554274370467, "grad_norm": 0.7421875, "learning_rate": 0.00010806919053296412, "loss": 0.78, "step": 32015 }, { "epoch": 0.8220811046329686, "grad_norm": 0.76953125, "learning_rate": 0.00010806474088059093, "loss": 0.7363, "step": 32016 }, { "epoch": 0.8221067818288904, "grad_norm": 0.875, "learning_rate": 0.00010806029121214543, "loss": 0.6926, "step": 32017 }, { "epoch": 0.8221324590248122, "grad_norm": 0.82421875, "learning_rate": 0.00010805584152763651, "loss": 0.7708, "step": 32018 }, { "epoch": 0.822158136220734, "grad_norm": 0.78125, "learning_rate": 0.000108051391827073, "loss": 0.8914, "step": 32019 }, { "epoch": 0.8221838134166558, "grad_norm": 0.8125, "learning_rate": 0.00010804694211046382, "loss": 0.9737, "step": 32020 }, { "epoch": 0.8222094906125776, "grad_norm": 0.76953125, "learning_rate": 0.0001080424923778178, "loss": 0.7912, "step": 32021 }, { "epoch": 0.8222351678084995, "grad_norm": 0.76953125, "learning_rate": 0.00010803804262914381, "loss": 0.6696, "step": 32022 }, { "epoch": 0.8222608450044213, "grad_norm": 0.80078125, "learning_rate": 0.00010803359286445076, "loss": 0.7793, "step": 32023 }, { "epoch": 0.8222865222003432, "grad_norm": 0.8203125, "learning_rate": 0.00010802914308374745, "loss": 0.8912, "step": 32024 }, { "epoch": 0.8223121993962649, "grad_norm": 0.81640625, "learning_rate": 0.00010802469328704281, "loss": 0.9256, "step": 32025 }, { "epoch": 0.8223378765921867, "grad_norm": 0.828125, "learning_rate": 0.00010802024347434568, "loss": 0.7623, "step": 32026 }, { "epoch": 0.8223635537881085, "grad_norm": 0.69921875, "learning_rate": 0.00010801579364566488, "loss": 0.7149, "step": 32027 }, { "epoch": 0.8223892309840304, "grad_norm": 0.74609375, "learning_rate": 0.00010801134380100937, "loss": 0.8747, "step": 32028 }, { "epoch": 0.8224149081799522, "grad_norm": 0.9140625, "learning_rate": 0.00010800689394038799, "loss": 0.8093, "step": 32029 }, { "epoch": 0.8224405853758741, "grad_norm": 0.78125, "learning_rate": 0.00010800244406380957, "loss": 0.8196, "step": 32030 }, { "epoch": 0.8224662625717959, "grad_norm": 0.859375, "learning_rate": 0.000107997994171283, "loss": 0.9003, "step": 32031 }, { "epoch": 0.8224919397677176, "grad_norm": 0.81640625, "learning_rate": 0.00010799354426281715, "loss": 0.7016, "step": 32032 }, { "epoch": 0.8225176169636395, "grad_norm": 0.8828125, "learning_rate": 0.0001079890943384209, "loss": 0.9134, "step": 32033 }, { "epoch": 0.8225432941595613, "grad_norm": 0.78125, "learning_rate": 0.0001079846443981031, "loss": 0.7569, "step": 32034 }, { "epoch": 0.8225689713554831, "grad_norm": 0.79296875, "learning_rate": 0.00010798019444187264, "loss": 0.8211, "step": 32035 }, { "epoch": 0.822594648551405, "grad_norm": 0.84375, "learning_rate": 0.00010797574446973835, "loss": 0.8556, "step": 32036 }, { "epoch": 0.8226203257473268, "grad_norm": 0.8046875, "learning_rate": 0.00010797129448170915, "loss": 0.9542, "step": 32037 }, { "epoch": 0.8226460029432486, "grad_norm": 0.8125, "learning_rate": 0.00010796684447779383, "loss": 0.7812, "step": 32038 }, { "epoch": 0.8226716801391704, "grad_norm": 0.8046875, "learning_rate": 0.00010796239445800136, "loss": 0.9663, "step": 32039 }, { "epoch": 0.8226973573350922, "grad_norm": 0.83984375, "learning_rate": 0.00010795794442234053, "loss": 0.8134, "step": 32040 }, { "epoch": 0.822723034531014, "grad_norm": 0.8046875, "learning_rate": 0.00010795349437082023, "loss": 0.852, "step": 32041 }, { "epoch": 0.8227487117269359, "grad_norm": 0.8515625, "learning_rate": 0.00010794904430344934, "loss": 0.8002, "step": 32042 }, { "epoch": 0.8227743889228577, "grad_norm": 0.734375, "learning_rate": 0.00010794459422023673, "loss": 0.8205, "step": 32043 }, { "epoch": 0.8228000661187795, "grad_norm": 0.7734375, "learning_rate": 0.00010794014412119127, "loss": 0.8467, "step": 32044 }, { "epoch": 0.8228257433147013, "grad_norm": 0.76953125, "learning_rate": 0.00010793569400632181, "loss": 0.8878, "step": 32045 }, { "epoch": 0.8228514205106231, "grad_norm": 0.828125, "learning_rate": 0.00010793124387563722, "loss": 0.7657, "step": 32046 }, { "epoch": 0.8228770977065449, "grad_norm": 0.78515625, "learning_rate": 0.00010792679372914638, "loss": 0.8345, "step": 32047 }, { "epoch": 0.8229027749024668, "grad_norm": 0.76953125, "learning_rate": 0.00010792234356685818, "loss": 0.7746, "step": 32048 }, { "epoch": 0.8229284520983886, "grad_norm": 0.75390625, "learning_rate": 0.00010791789338878144, "loss": 0.7398, "step": 32049 }, { "epoch": 0.8229541292943104, "grad_norm": 0.83984375, "learning_rate": 0.00010791344319492506, "loss": 0.9238, "step": 32050 }, { "epoch": 0.8229798064902323, "grad_norm": 0.8125, "learning_rate": 0.00010790899298529791, "loss": 0.8151, "step": 32051 }, { "epoch": 0.823005483686154, "grad_norm": 0.74609375, "learning_rate": 0.00010790454275990885, "loss": 0.826, "step": 32052 }, { "epoch": 0.8230311608820758, "grad_norm": 0.859375, "learning_rate": 0.00010790009251876674, "loss": 0.837, "step": 32053 }, { "epoch": 0.8230568380779977, "grad_norm": 0.796875, "learning_rate": 0.00010789564226188045, "loss": 0.7407, "step": 32054 }, { "epoch": 0.8230825152739195, "grad_norm": 0.73046875, "learning_rate": 0.00010789119198925888, "loss": 0.6859, "step": 32055 }, { "epoch": 0.8231081924698413, "grad_norm": 0.87109375, "learning_rate": 0.00010788674170091087, "loss": 0.9053, "step": 32056 }, { "epoch": 0.8231338696657632, "grad_norm": 0.75390625, "learning_rate": 0.00010788229139684529, "loss": 0.7445, "step": 32057 }, { "epoch": 0.823159546861685, "grad_norm": 0.765625, "learning_rate": 0.00010787784107707104, "loss": 0.8215, "step": 32058 }, { "epoch": 0.8231852240576067, "grad_norm": 0.75390625, "learning_rate": 0.00010787339074159693, "loss": 0.8556, "step": 32059 }, { "epoch": 0.8232109012535286, "grad_norm": 0.7890625, "learning_rate": 0.00010786894039043187, "loss": 0.788, "step": 32060 }, { "epoch": 0.8232365784494504, "grad_norm": 0.67578125, "learning_rate": 0.00010786449002358475, "loss": 0.7603, "step": 32061 }, { "epoch": 0.8232622556453723, "grad_norm": 0.7890625, "learning_rate": 0.00010786003964106437, "loss": 0.9431, "step": 32062 }, { "epoch": 0.8232879328412941, "grad_norm": 0.75390625, "learning_rate": 0.00010785558924287969, "loss": 0.7733, "step": 32063 }, { "epoch": 0.8233136100372159, "grad_norm": 0.8984375, "learning_rate": 0.0001078511388290395, "loss": 0.8595, "step": 32064 }, { "epoch": 0.8233392872331377, "grad_norm": 0.7109375, "learning_rate": 0.0001078466883995527, "loss": 0.8266, "step": 32065 }, { "epoch": 0.8233649644290595, "grad_norm": 1.0234375, "learning_rate": 0.00010784223795442816, "loss": 0.9543, "step": 32066 }, { "epoch": 0.8233906416249813, "grad_norm": 0.81640625, "learning_rate": 0.00010783778749367475, "loss": 0.8866, "step": 32067 }, { "epoch": 0.8234163188209032, "grad_norm": 0.76953125, "learning_rate": 0.00010783333701730134, "loss": 0.8145, "step": 32068 }, { "epoch": 0.823441996016825, "grad_norm": 0.7109375, "learning_rate": 0.00010782888652531682, "loss": 0.8294, "step": 32069 }, { "epoch": 0.8234676732127468, "grad_norm": 0.75, "learning_rate": 0.00010782443601772998, "loss": 0.7437, "step": 32070 }, { "epoch": 0.8234933504086687, "grad_norm": 0.78125, "learning_rate": 0.0001078199854945498, "loss": 0.8607, "step": 32071 }, { "epoch": 0.8235190276045904, "grad_norm": 0.80078125, "learning_rate": 0.00010781553495578507, "loss": 0.8437, "step": 32072 }, { "epoch": 0.8235447048005122, "grad_norm": 1.5, "learning_rate": 0.00010781108440144467, "loss": 0.7576, "step": 32073 }, { "epoch": 0.8235703819964341, "grad_norm": 0.68359375, "learning_rate": 0.00010780663383153751, "loss": 0.7342, "step": 32074 }, { "epoch": 0.8235960591923559, "grad_norm": 0.7890625, "learning_rate": 0.0001078021832460724, "loss": 0.8569, "step": 32075 }, { "epoch": 0.8236217363882777, "grad_norm": 0.73828125, "learning_rate": 0.00010779773264505831, "loss": 0.8089, "step": 32076 }, { "epoch": 0.8236474135841996, "grad_norm": 0.75390625, "learning_rate": 0.000107793282028504, "loss": 0.8413, "step": 32077 }, { "epoch": 0.8236730907801214, "grad_norm": 0.78125, "learning_rate": 0.00010778883139641838, "loss": 0.7008, "step": 32078 }, { "epoch": 0.8236987679760431, "grad_norm": 0.74609375, "learning_rate": 0.00010778438074881033, "loss": 0.8329, "step": 32079 }, { "epoch": 0.823724445171965, "grad_norm": 0.78125, "learning_rate": 0.00010777993008568872, "loss": 0.7728, "step": 32080 }, { "epoch": 0.8237501223678868, "grad_norm": 0.8984375, "learning_rate": 0.00010777547940706236, "loss": 0.7218, "step": 32081 }, { "epoch": 0.8237757995638086, "grad_norm": 0.88671875, "learning_rate": 0.00010777102871294025, "loss": 0.7968, "step": 32082 }, { "epoch": 0.8238014767597305, "grad_norm": 0.73828125, "learning_rate": 0.00010776657800333114, "loss": 0.8345, "step": 32083 }, { "epoch": 0.8238271539556523, "grad_norm": 0.79296875, "learning_rate": 0.00010776212727824395, "loss": 0.8531, "step": 32084 }, { "epoch": 0.823852831151574, "grad_norm": 0.81640625, "learning_rate": 0.00010775767653768753, "loss": 0.7807, "step": 32085 }, { "epoch": 0.8238785083474959, "grad_norm": 0.8515625, "learning_rate": 0.00010775322578167078, "loss": 0.8684, "step": 32086 }, { "epoch": 0.8239041855434177, "grad_norm": 0.8125, "learning_rate": 0.00010774877501020252, "loss": 1.0198, "step": 32087 }, { "epoch": 0.8239298627393395, "grad_norm": 0.69921875, "learning_rate": 0.00010774432422329168, "loss": 0.6619, "step": 32088 }, { "epoch": 0.8239555399352614, "grad_norm": 0.7265625, "learning_rate": 0.00010773987342094712, "loss": 0.7474, "step": 32089 }, { "epoch": 0.8239812171311832, "grad_norm": 0.89453125, "learning_rate": 0.00010773542260317766, "loss": 0.8663, "step": 32090 }, { "epoch": 0.824006894327105, "grad_norm": 0.83203125, "learning_rate": 0.00010773097176999218, "loss": 0.8773, "step": 32091 }, { "epoch": 0.8240325715230268, "grad_norm": 0.70703125, "learning_rate": 0.00010772652092139961, "loss": 0.7866, "step": 32092 }, { "epoch": 0.8240582487189486, "grad_norm": 0.76171875, "learning_rate": 0.00010772207005740878, "loss": 0.7513, "step": 32093 }, { "epoch": 0.8240839259148705, "grad_norm": 0.7890625, "learning_rate": 0.00010771761917802854, "loss": 0.836, "step": 32094 }, { "epoch": 0.8241096031107923, "grad_norm": 0.8125, "learning_rate": 0.0001077131682832678, "loss": 0.998, "step": 32095 }, { "epoch": 0.8241352803067141, "grad_norm": 0.78515625, "learning_rate": 0.00010770871737313542, "loss": 0.8285, "step": 32096 }, { "epoch": 0.824160957502636, "grad_norm": 0.796875, "learning_rate": 0.00010770426644764025, "loss": 0.8839, "step": 32097 }, { "epoch": 0.8241866346985578, "grad_norm": 0.796875, "learning_rate": 0.00010769981550679118, "loss": 0.8422, "step": 32098 }, { "epoch": 0.8242123118944795, "grad_norm": 0.75390625, "learning_rate": 0.00010769536455059703, "loss": 0.7357, "step": 32099 }, { "epoch": 0.8242379890904014, "grad_norm": 0.75, "learning_rate": 0.00010769091357906676, "loss": 0.8062, "step": 32100 }, { "epoch": 0.8242636662863232, "grad_norm": 0.75, "learning_rate": 0.0001076864625922092, "loss": 0.7139, "step": 32101 }, { "epoch": 0.824289343482245, "grad_norm": 0.796875, "learning_rate": 0.00010768201159003319, "loss": 0.9173, "step": 32102 }, { "epoch": 0.8243150206781669, "grad_norm": 0.7890625, "learning_rate": 0.00010767756057254766, "loss": 0.8527, "step": 32103 }, { "epoch": 0.8243406978740887, "grad_norm": 0.78515625, "learning_rate": 0.00010767310953976137, "loss": 0.6615, "step": 32104 }, { "epoch": 0.8243663750700104, "grad_norm": 0.83984375, "learning_rate": 0.00010766865849168333, "loss": 0.7286, "step": 32105 }, { "epoch": 0.8243920522659323, "grad_norm": 0.765625, "learning_rate": 0.00010766420742832233, "loss": 0.7097, "step": 32106 }, { "epoch": 0.8244177294618541, "grad_norm": 0.83984375, "learning_rate": 0.00010765975634968723, "loss": 0.8185, "step": 32107 }, { "epoch": 0.8244434066577759, "grad_norm": 0.8203125, "learning_rate": 0.00010765530525578698, "loss": 0.7868, "step": 32108 }, { "epoch": 0.8244690838536978, "grad_norm": 0.734375, "learning_rate": 0.00010765085414663039, "loss": 0.724, "step": 32109 }, { "epoch": 0.8244947610496196, "grad_norm": 0.73046875, "learning_rate": 0.0001076464030222263, "loss": 0.7742, "step": 32110 }, { "epoch": 0.8245204382455414, "grad_norm": 0.8828125, "learning_rate": 0.00010764195188258365, "loss": 0.8268, "step": 32111 }, { "epoch": 0.8245461154414632, "grad_norm": 0.74609375, "learning_rate": 0.00010763750072771126, "loss": 0.7452, "step": 32112 }, { "epoch": 0.824571792637385, "grad_norm": 0.80078125, "learning_rate": 0.00010763304955761804, "loss": 0.8928, "step": 32113 }, { "epoch": 0.8245974698333068, "grad_norm": 0.83984375, "learning_rate": 0.00010762859837231283, "loss": 0.812, "step": 32114 }, { "epoch": 0.8246231470292287, "grad_norm": 0.6953125, "learning_rate": 0.00010762414717180454, "loss": 0.7712, "step": 32115 }, { "epoch": 0.8246488242251505, "grad_norm": 0.80078125, "learning_rate": 0.00010761969595610198, "loss": 0.8068, "step": 32116 }, { "epoch": 0.8246745014210723, "grad_norm": 0.7265625, "learning_rate": 0.00010761524472521407, "loss": 0.6897, "step": 32117 }, { "epoch": 0.8247001786169942, "grad_norm": 0.8046875, "learning_rate": 0.00010761079347914964, "loss": 0.8289, "step": 32118 }, { "epoch": 0.8247258558129159, "grad_norm": 0.83203125, "learning_rate": 0.0001076063422179176, "loss": 0.8612, "step": 32119 }, { "epoch": 0.8247515330088377, "grad_norm": 0.82421875, "learning_rate": 0.00010760189094152684, "loss": 0.8237, "step": 32120 }, { "epoch": 0.8247772102047596, "grad_norm": 0.77734375, "learning_rate": 0.00010759743964998614, "loss": 0.6882, "step": 32121 }, { "epoch": 0.8248028874006814, "grad_norm": 0.7265625, "learning_rate": 0.00010759298834330449, "loss": 0.8169, "step": 32122 }, { "epoch": 0.8248285645966033, "grad_norm": 0.7265625, "learning_rate": 0.00010758853702149066, "loss": 0.8299, "step": 32123 }, { "epoch": 0.8248542417925251, "grad_norm": 0.8203125, "learning_rate": 0.00010758408568455356, "loss": 0.8608, "step": 32124 }, { "epoch": 0.8248799189884468, "grad_norm": 0.7890625, "learning_rate": 0.00010757963433250209, "loss": 0.8436, "step": 32125 }, { "epoch": 0.8249055961843687, "grad_norm": 0.7734375, "learning_rate": 0.00010757518296534505, "loss": 0.7078, "step": 32126 }, { "epoch": 0.8249312733802905, "grad_norm": 0.75390625, "learning_rate": 0.00010757073158309139, "loss": 0.7733, "step": 32127 }, { "epoch": 0.8249569505762123, "grad_norm": 0.82421875, "learning_rate": 0.00010756628018574995, "loss": 0.7966, "step": 32128 }, { "epoch": 0.8249826277721342, "grad_norm": 0.76171875, "learning_rate": 0.00010756182877332958, "loss": 0.8344, "step": 32129 }, { "epoch": 0.825008304968056, "grad_norm": 0.76171875, "learning_rate": 0.00010755737734583917, "loss": 0.7655, "step": 32130 }, { "epoch": 0.8250339821639778, "grad_norm": 0.7890625, "learning_rate": 0.00010755292590328758, "loss": 0.912, "step": 32131 }, { "epoch": 0.8250596593598996, "grad_norm": 0.7109375, "learning_rate": 0.0001075484744456837, "loss": 0.8383, "step": 32132 }, { "epoch": 0.8250853365558214, "grad_norm": 0.828125, "learning_rate": 0.00010754402297303641, "loss": 0.7179, "step": 32133 }, { "epoch": 0.8251110137517432, "grad_norm": 0.8125, "learning_rate": 0.00010753957148535452, "loss": 0.756, "step": 32134 }, { "epoch": 0.8251366909476651, "grad_norm": 0.78125, "learning_rate": 0.000107535119982647, "loss": 0.8871, "step": 32135 }, { "epoch": 0.8251623681435869, "grad_norm": 0.74609375, "learning_rate": 0.00010753066846492264, "loss": 0.6321, "step": 32136 }, { "epoch": 0.8251880453395087, "grad_norm": 0.7578125, "learning_rate": 0.00010752621693219034, "loss": 0.7616, "step": 32137 }, { "epoch": 0.8252137225354306, "grad_norm": 0.78125, "learning_rate": 0.00010752176538445897, "loss": 0.8422, "step": 32138 }, { "epoch": 0.8252393997313523, "grad_norm": 0.6640625, "learning_rate": 0.00010751731382173738, "loss": 0.6712, "step": 32139 }, { "epoch": 0.8252650769272741, "grad_norm": 0.75, "learning_rate": 0.0001075128622440345, "loss": 0.8294, "step": 32140 }, { "epoch": 0.825290754123196, "grad_norm": 0.80078125, "learning_rate": 0.00010750841065135912, "loss": 0.8468, "step": 32141 }, { "epoch": 0.8253164313191178, "grad_norm": 0.7578125, "learning_rate": 0.00010750395904372019, "loss": 0.7765, "step": 32142 }, { "epoch": 0.8253421085150396, "grad_norm": 0.8203125, "learning_rate": 0.00010749950742112656, "loss": 0.8896, "step": 32143 }, { "epoch": 0.8253677857109615, "grad_norm": 0.74609375, "learning_rate": 0.00010749505578358705, "loss": 0.8069, "step": 32144 }, { "epoch": 0.8253934629068832, "grad_norm": 0.78515625, "learning_rate": 0.00010749060413111058, "loss": 0.7951, "step": 32145 }, { "epoch": 0.825419140102805, "grad_norm": 0.8671875, "learning_rate": 0.00010748615246370604, "loss": 0.8321, "step": 32146 }, { "epoch": 0.8254448172987269, "grad_norm": 0.703125, "learning_rate": 0.00010748170078138225, "loss": 0.8442, "step": 32147 }, { "epoch": 0.8254704944946487, "grad_norm": 0.765625, "learning_rate": 0.00010747724908414811, "loss": 0.8647, "step": 32148 }, { "epoch": 0.8254961716905705, "grad_norm": 0.84375, "learning_rate": 0.00010747279737201248, "loss": 0.8925, "step": 32149 }, { "epoch": 0.8255218488864924, "grad_norm": 0.74609375, "learning_rate": 0.00010746834564498427, "loss": 0.8259, "step": 32150 }, { "epoch": 0.8255475260824142, "grad_norm": 0.8984375, "learning_rate": 0.0001074638939030723, "loss": 0.8597, "step": 32151 }, { "epoch": 0.8255732032783359, "grad_norm": 0.83984375, "learning_rate": 0.00010745944214628545, "loss": 0.7865, "step": 32152 }, { "epoch": 0.8255988804742578, "grad_norm": 0.73046875, "learning_rate": 0.00010745499037463262, "loss": 0.7405, "step": 32153 }, { "epoch": 0.8256245576701796, "grad_norm": 0.859375, "learning_rate": 0.00010745053858812268, "loss": 0.808, "step": 32154 }, { "epoch": 0.8256502348661015, "grad_norm": 1.2734375, "learning_rate": 0.00010744608678676446, "loss": 0.7196, "step": 32155 }, { "epoch": 0.8256759120620233, "grad_norm": 0.78515625, "learning_rate": 0.00010744163497056689, "loss": 0.8351, "step": 32156 }, { "epoch": 0.8257015892579451, "grad_norm": 0.8046875, "learning_rate": 0.00010743718313953881, "loss": 0.7931, "step": 32157 }, { "epoch": 0.825727266453867, "grad_norm": 0.80078125, "learning_rate": 0.0001074327312936891, "loss": 0.8661, "step": 32158 }, { "epoch": 0.8257529436497887, "grad_norm": 0.80078125, "learning_rate": 0.0001074282794330266, "loss": 0.8463, "step": 32159 }, { "epoch": 0.8257786208457105, "grad_norm": 0.734375, "learning_rate": 0.00010742382755756024, "loss": 0.7318, "step": 32160 }, { "epoch": 0.8258042980416324, "grad_norm": 0.7109375, "learning_rate": 0.00010741937566729885, "loss": 0.7939, "step": 32161 }, { "epoch": 0.8258299752375542, "grad_norm": 1.140625, "learning_rate": 0.0001074149237622513, "loss": 0.8574, "step": 32162 }, { "epoch": 0.825855652433476, "grad_norm": 0.83203125, "learning_rate": 0.0001074104718424265, "loss": 0.8697, "step": 32163 }, { "epoch": 0.8258813296293979, "grad_norm": 0.76953125, "learning_rate": 0.00010740601990783328, "loss": 0.8595, "step": 32164 }, { "epoch": 0.8259070068253196, "grad_norm": 0.71875, "learning_rate": 0.00010740156795848055, "loss": 0.8426, "step": 32165 }, { "epoch": 0.8259326840212414, "grad_norm": 0.78125, "learning_rate": 0.00010739711599437714, "loss": 0.7199, "step": 32166 }, { "epoch": 0.8259583612171633, "grad_norm": 0.796875, "learning_rate": 0.00010739266401553196, "loss": 0.7466, "step": 32167 }, { "epoch": 0.8259840384130851, "grad_norm": 0.75390625, "learning_rate": 0.00010738821202195388, "loss": 0.8096, "step": 32168 }, { "epoch": 0.8260097156090069, "grad_norm": 0.88671875, "learning_rate": 0.00010738376001365176, "loss": 0.8201, "step": 32169 }, { "epoch": 0.8260353928049288, "grad_norm": 0.80078125, "learning_rate": 0.00010737930799063446, "loss": 0.7796, "step": 32170 }, { "epoch": 0.8260610700008506, "grad_norm": 0.85546875, "learning_rate": 0.00010737485595291086, "loss": 0.8236, "step": 32171 }, { "epoch": 0.8260867471967723, "grad_norm": 0.8125, "learning_rate": 0.00010737040390048985, "loss": 0.8014, "step": 32172 }, { "epoch": 0.8261124243926942, "grad_norm": 0.828125, "learning_rate": 0.0001073659518333803, "loss": 0.8476, "step": 32173 }, { "epoch": 0.826138101588616, "grad_norm": 0.703125, "learning_rate": 0.00010736149975159104, "loss": 0.7491, "step": 32174 }, { "epoch": 0.8261637787845378, "grad_norm": 0.80859375, "learning_rate": 0.00010735704765513102, "loss": 0.7194, "step": 32175 }, { "epoch": 0.8261894559804597, "grad_norm": 0.6953125, "learning_rate": 0.00010735259554400903, "loss": 0.701, "step": 32176 }, { "epoch": 0.8262151331763815, "grad_norm": 0.8203125, "learning_rate": 0.000107348143418234, "loss": 0.8709, "step": 32177 }, { "epoch": 0.8262408103723033, "grad_norm": 0.75390625, "learning_rate": 0.00010734369127781478, "loss": 0.9641, "step": 32178 }, { "epoch": 0.8262664875682251, "grad_norm": 0.6953125, "learning_rate": 0.00010733923912276023, "loss": 0.7528, "step": 32179 }, { "epoch": 0.8262921647641469, "grad_norm": 0.75, "learning_rate": 0.00010733478695307925, "loss": 0.7245, "step": 32180 }, { "epoch": 0.8263178419600687, "grad_norm": 0.79296875, "learning_rate": 0.00010733033476878072, "loss": 0.6733, "step": 32181 }, { "epoch": 0.8263435191559906, "grad_norm": 0.79296875, "learning_rate": 0.0001073258825698735, "loss": 0.7588, "step": 32182 }, { "epoch": 0.8263691963519124, "grad_norm": 0.8125, "learning_rate": 0.00010732143035636644, "loss": 0.7798, "step": 32183 }, { "epoch": 0.8263948735478343, "grad_norm": 0.71875, "learning_rate": 0.00010731697812826842, "loss": 0.6646, "step": 32184 }, { "epoch": 0.826420550743756, "grad_norm": 0.69921875, "learning_rate": 0.00010731252588558834, "loss": 0.7391, "step": 32185 }, { "epoch": 0.8264462279396778, "grad_norm": 0.7265625, "learning_rate": 0.00010730807362833506, "loss": 0.665, "step": 32186 }, { "epoch": 0.8264719051355996, "grad_norm": 0.83984375, "learning_rate": 0.00010730362135651741, "loss": 0.8878, "step": 32187 }, { "epoch": 0.8264975823315215, "grad_norm": 0.75390625, "learning_rate": 0.00010729916907014436, "loss": 0.8459, "step": 32188 }, { "epoch": 0.8265232595274433, "grad_norm": 0.71875, "learning_rate": 0.0001072947167692247, "loss": 0.7608, "step": 32189 }, { "epoch": 0.8265489367233652, "grad_norm": 0.765625, "learning_rate": 0.00010729026445376733, "loss": 0.7976, "step": 32190 }, { "epoch": 0.826574613919287, "grad_norm": 0.78515625, "learning_rate": 0.00010728581212378112, "loss": 0.8236, "step": 32191 }, { "epoch": 0.8266002911152087, "grad_norm": 0.70703125, "learning_rate": 0.00010728135977927491, "loss": 0.7767, "step": 32192 }, { "epoch": 0.8266259683111306, "grad_norm": 0.75390625, "learning_rate": 0.00010727690742025766, "loss": 0.821, "step": 32193 }, { "epoch": 0.8266516455070524, "grad_norm": 0.85546875, "learning_rate": 0.00010727245504673819, "loss": 0.7029, "step": 32194 }, { "epoch": 0.8266773227029742, "grad_norm": 0.7890625, "learning_rate": 0.00010726800265872534, "loss": 0.8698, "step": 32195 }, { "epoch": 0.8267029998988961, "grad_norm": 0.8125, "learning_rate": 0.00010726355025622805, "loss": 0.8466, "step": 32196 }, { "epoch": 0.8267286770948179, "grad_norm": 0.76171875, "learning_rate": 0.00010725909783925514, "loss": 0.9167, "step": 32197 }, { "epoch": 0.8267543542907397, "grad_norm": 0.8203125, "learning_rate": 0.00010725464540781549, "loss": 0.8216, "step": 32198 }, { "epoch": 0.8267800314866615, "grad_norm": 0.6796875, "learning_rate": 0.00010725019296191803, "loss": 0.7405, "step": 32199 }, { "epoch": 0.8268057086825833, "grad_norm": 0.7109375, "learning_rate": 0.00010724574050157154, "loss": 0.7875, "step": 32200 }, { "epoch": 0.8268313858785051, "grad_norm": 0.734375, "learning_rate": 0.000107241288026785, "loss": 0.8717, "step": 32201 }, { "epoch": 0.826857063074427, "grad_norm": 0.7578125, "learning_rate": 0.00010723683553756721, "loss": 0.8466, "step": 32202 }, { "epoch": 0.8268827402703488, "grad_norm": 0.79296875, "learning_rate": 0.00010723238303392702, "loss": 0.8613, "step": 32203 }, { "epoch": 0.8269084174662706, "grad_norm": 0.82421875, "learning_rate": 0.00010722793051587336, "loss": 0.8171, "step": 32204 }, { "epoch": 0.8269340946621924, "grad_norm": 0.75, "learning_rate": 0.0001072234779834151, "loss": 0.7365, "step": 32205 }, { "epoch": 0.8269597718581142, "grad_norm": 0.7734375, "learning_rate": 0.00010721902543656108, "loss": 0.8305, "step": 32206 }, { "epoch": 0.826985449054036, "grad_norm": 0.80859375, "learning_rate": 0.00010721457287532024, "loss": 0.8677, "step": 32207 }, { "epoch": 0.8270111262499579, "grad_norm": 0.734375, "learning_rate": 0.00010721012029970136, "loss": 0.718, "step": 32208 }, { "epoch": 0.8270368034458797, "grad_norm": 0.76953125, "learning_rate": 0.00010720566770971341, "loss": 0.8496, "step": 32209 }, { "epoch": 0.8270624806418015, "grad_norm": 0.7578125, "learning_rate": 0.00010720121510536518, "loss": 0.7491, "step": 32210 }, { "epoch": 0.8270881578377234, "grad_norm": 0.8203125, "learning_rate": 0.00010719676248666557, "loss": 0.7936, "step": 32211 }, { "epoch": 0.8271138350336451, "grad_norm": 0.828125, "learning_rate": 0.0001071923098536235, "loss": 0.818, "step": 32212 }, { "epoch": 0.8271395122295669, "grad_norm": 0.703125, "learning_rate": 0.00010718785720624777, "loss": 0.7368, "step": 32213 }, { "epoch": 0.8271651894254888, "grad_norm": 0.8125, "learning_rate": 0.00010718340454454732, "loss": 0.8093, "step": 32214 }, { "epoch": 0.8271908666214106, "grad_norm": 0.79296875, "learning_rate": 0.00010717895186853097, "loss": 0.7451, "step": 32215 }, { "epoch": 0.8272165438173325, "grad_norm": 0.8203125, "learning_rate": 0.00010717449917820761, "loss": 0.7926, "step": 32216 }, { "epoch": 0.8272422210132543, "grad_norm": 0.890625, "learning_rate": 0.00010717004647358615, "loss": 0.8652, "step": 32217 }, { "epoch": 0.8272678982091761, "grad_norm": 0.84765625, "learning_rate": 0.00010716559375467543, "loss": 0.8232, "step": 32218 }, { "epoch": 0.8272935754050978, "grad_norm": 0.74609375, "learning_rate": 0.00010716114102148428, "loss": 0.8863, "step": 32219 }, { "epoch": 0.8273192526010197, "grad_norm": 0.86328125, "learning_rate": 0.00010715668827402172, "loss": 0.9441, "step": 32220 }, { "epoch": 0.8273449297969415, "grad_norm": 0.82421875, "learning_rate": 0.00010715223551229645, "loss": 0.9411, "step": 32221 }, { "epoch": 0.8273706069928634, "grad_norm": 0.7265625, "learning_rate": 0.00010714778273631745, "loss": 0.7865, "step": 32222 }, { "epoch": 0.8273962841887852, "grad_norm": 0.76953125, "learning_rate": 0.00010714332994609355, "loss": 0.7014, "step": 32223 }, { "epoch": 0.827421961384707, "grad_norm": 0.78515625, "learning_rate": 0.00010713887714163362, "loss": 0.7468, "step": 32224 }, { "epoch": 0.8274476385806288, "grad_norm": 0.80078125, "learning_rate": 0.00010713442432294658, "loss": 0.7318, "step": 32225 }, { "epoch": 0.8274733157765506, "grad_norm": 0.73828125, "learning_rate": 0.0001071299714900413, "loss": 0.7961, "step": 32226 }, { "epoch": 0.8274989929724724, "grad_norm": 0.78125, "learning_rate": 0.0001071255186429266, "loss": 0.8442, "step": 32227 }, { "epoch": 0.8275246701683943, "grad_norm": 0.77734375, "learning_rate": 0.00010712106578161138, "loss": 0.8876, "step": 32228 }, { "epoch": 0.8275503473643161, "grad_norm": 0.73046875, "learning_rate": 0.00010711661290610453, "loss": 0.6859, "step": 32229 }, { "epoch": 0.8275760245602379, "grad_norm": 0.75, "learning_rate": 0.00010711216001641491, "loss": 0.8399, "step": 32230 }, { "epoch": 0.8276017017561598, "grad_norm": 0.81640625, "learning_rate": 0.0001071077071125514, "loss": 0.8636, "step": 32231 }, { "epoch": 0.8276273789520815, "grad_norm": 0.77734375, "learning_rate": 0.00010710325419452285, "loss": 0.8705, "step": 32232 }, { "epoch": 0.8276530561480033, "grad_norm": 0.91015625, "learning_rate": 0.0001070988012623382, "loss": 0.9239, "step": 32233 }, { "epoch": 0.8276787333439252, "grad_norm": 0.71484375, "learning_rate": 0.00010709434831600629, "loss": 0.7649, "step": 32234 }, { "epoch": 0.827704410539847, "grad_norm": 0.83984375, "learning_rate": 0.00010708989535553593, "loss": 0.7491, "step": 32235 }, { "epoch": 0.8277300877357688, "grad_norm": 0.8203125, "learning_rate": 0.00010708544238093608, "loss": 0.8073, "step": 32236 }, { "epoch": 0.8277557649316907, "grad_norm": 0.79296875, "learning_rate": 0.00010708098939221556, "loss": 0.8664, "step": 32237 }, { "epoch": 0.8277814421276124, "grad_norm": 0.7734375, "learning_rate": 0.00010707653638938328, "loss": 0.8351, "step": 32238 }, { "epoch": 0.8278071193235342, "grad_norm": 0.859375, "learning_rate": 0.00010707208337244811, "loss": 0.7869, "step": 32239 }, { "epoch": 0.8278327965194561, "grad_norm": 0.91796875, "learning_rate": 0.00010706763034141891, "loss": 0.7425, "step": 32240 }, { "epoch": 0.8278584737153779, "grad_norm": 0.765625, "learning_rate": 0.00010706317729630458, "loss": 0.7937, "step": 32241 }, { "epoch": 0.8278841509112997, "grad_norm": 0.69921875, "learning_rate": 0.00010705872423711396, "loss": 0.7817, "step": 32242 }, { "epoch": 0.8279098281072216, "grad_norm": 0.75390625, "learning_rate": 0.00010705427116385592, "loss": 0.7265, "step": 32243 }, { "epoch": 0.8279355053031434, "grad_norm": 0.76953125, "learning_rate": 0.00010704981807653938, "loss": 0.6907, "step": 32244 }, { "epoch": 0.8279611824990651, "grad_norm": 0.8515625, "learning_rate": 0.00010704536497517319, "loss": 0.9452, "step": 32245 }, { "epoch": 0.827986859694987, "grad_norm": 0.8046875, "learning_rate": 0.0001070409118597662, "loss": 0.7744, "step": 32246 }, { "epoch": 0.8280125368909088, "grad_norm": 0.8125, "learning_rate": 0.00010703645873032735, "loss": 0.7639, "step": 32247 }, { "epoch": 0.8280382140868306, "grad_norm": 0.77734375, "learning_rate": 0.00010703200558686542, "loss": 0.8013, "step": 32248 }, { "epoch": 0.8280638912827525, "grad_norm": 0.8046875, "learning_rate": 0.00010702755242938937, "loss": 0.8777, "step": 32249 }, { "epoch": 0.8280895684786743, "grad_norm": 0.76171875, "learning_rate": 0.00010702309925790805, "loss": 0.7458, "step": 32250 }, { "epoch": 0.8281152456745962, "grad_norm": 0.859375, "learning_rate": 0.00010701864607243031, "loss": 0.8793, "step": 32251 }, { "epoch": 0.8281409228705179, "grad_norm": 0.8359375, "learning_rate": 0.00010701419287296504, "loss": 0.8553, "step": 32252 }, { "epoch": 0.8281666000664397, "grad_norm": 0.95703125, "learning_rate": 0.00010700973965952115, "loss": 0.7764, "step": 32253 }, { "epoch": 0.8281922772623616, "grad_norm": 0.79296875, "learning_rate": 0.00010700528643210745, "loss": 0.8326, "step": 32254 }, { "epoch": 0.8282179544582834, "grad_norm": 0.8125, "learning_rate": 0.00010700083319073286, "loss": 0.8153, "step": 32255 }, { "epoch": 0.8282436316542052, "grad_norm": 0.82421875, "learning_rate": 0.0001069963799354062, "loss": 0.8466, "step": 32256 }, { "epoch": 0.8282693088501271, "grad_norm": 0.78125, "learning_rate": 0.00010699192666613645, "loss": 0.7805, "step": 32257 }, { "epoch": 0.8282949860460488, "grad_norm": 0.7578125, "learning_rate": 0.0001069874733829324, "loss": 0.8096, "step": 32258 }, { "epoch": 0.8283206632419706, "grad_norm": 0.80859375, "learning_rate": 0.00010698302008580292, "loss": 0.7973, "step": 32259 }, { "epoch": 0.8283463404378925, "grad_norm": 0.77734375, "learning_rate": 0.00010697856677475697, "loss": 0.8185, "step": 32260 }, { "epoch": 0.8283720176338143, "grad_norm": 0.6875, "learning_rate": 0.0001069741134498033, "loss": 0.805, "step": 32261 }, { "epoch": 0.8283976948297361, "grad_norm": 0.9609375, "learning_rate": 0.00010696966011095091, "loss": 0.6773, "step": 32262 }, { "epoch": 0.828423372025658, "grad_norm": 0.75390625, "learning_rate": 0.00010696520675820858, "loss": 0.8061, "step": 32263 }, { "epoch": 0.8284490492215798, "grad_norm": 0.75, "learning_rate": 0.00010696075339158522, "loss": 0.8458, "step": 32264 }, { "epoch": 0.8284747264175015, "grad_norm": 0.83203125, "learning_rate": 0.00010695630001108971, "loss": 0.8613, "step": 32265 }, { "epoch": 0.8285004036134234, "grad_norm": 0.75, "learning_rate": 0.00010695184661673094, "loss": 0.7585, "step": 32266 }, { "epoch": 0.8285260808093452, "grad_norm": 0.73828125, "learning_rate": 0.00010694739320851777, "loss": 0.8359, "step": 32267 }, { "epoch": 0.828551758005267, "grad_norm": 0.76953125, "learning_rate": 0.00010694293978645906, "loss": 0.8876, "step": 32268 }, { "epoch": 0.8285774352011889, "grad_norm": 0.80078125, "learning_rate": 0.00010693848635056369, "loss": 0.8106, "step": 32269 }, { "epoch": 0.8286031123971107, "grad_norm": 0.70703125, "learning_rate": 0.00010693403290084057, "loss": 0.7623, "step": 32270 }, { "epoch": 0.8286287895930325, "grad_norm": 0.7421875, "learning_rate": 0.00010692957943729854, "loss": 0.6612, "step": 32271 }, { "epoch": 0.8286544667889543, "grad_norm": 0.78125, "learning_rate": 0.00010692512595994646, "loss": 0.8845, "step": 32272 }, { "epoch": 0.8286801439848761, "grad_norm": 0.859375, "learning_rate": 0.00010692067246879327, "loss": 0.8559, "step": 32273 }, { "epoch": 0.8287058211807979, "grad_norm": 0.8671875, "learning_rate": 0.00010691621896384776, "loss": 0.8264, "step": 32274 }, { "epoch": 0.8287314983767198, "grad_norm": 0.8046875, "learning_rate": 0.00010691176544511888, "loss": 0.7379, "step": 32275 }, { "epoch": 0.8287571755726416, "grad_norm": 0.8984375, "learning_rate": 0.00010690731191261547, "loss": 0.9017, "step": 32276 }, { "epoch": 0.8287828527685634, "grad_norm": 0.82421875, "learning_rate": 0.00010690285836634639, "loss": 0.8984, "step": 32277 }, { "epoch": 0.8288085299644852, "grad_norm": 0.80078125, "learning_rate": 0.00010689840480632056, "loss": 0.7248, "step": 32278 }, { "epoch": 0.828834207160407, "grad_norm": 0.828125, "learning_rate": 0.00010689395123254684, "loss": 0.8356, "step": 32279 }, { "epoch": 0.8288598843563288, "grad_norm": 0.765625, "learning_rate": 0.00010688949764503408, "loss": 0.6223, "step": 32280 }, { "epoch": 0.8288855615522507, "grad_norm": 0.7265625, "learning_rate": 0.00010688504404379119, "loss": 0.738, "step": 32281 }, { "epoch": 0.8289112387481725, "grad_norm": 0.765625, "learning_rate": 0.00010688059042882704, "loss": 0.8111, "step": 32282 }, { "epoch": 0.8289369159440944, "grad_norm": 0.8125, "learning_rate": 0.00010687613680015043, "loss": 0.7928, "step": 32283 }, { "epoch": 0.8289625931400162, "grad_norm": 0.83984375, "learning_rate": 0.00010687168315777036, "loss": 0.8028, "step": 32284 }, { "epoch": 0.8289882703359379, "grad_norm": 0.75, "learning_rate": 0.00010686722950169561, "loss": 0.8228, "step": 32285 }, { "epoch": 0.8290139475318598, "grad_norm": 0.90234375, "learning_rate": 0.00010686277583193513, "loss": 0.8099, "step": 32286 }, { "epoch": 0.8290396247277816, "grad_norm": 0.87109375, "learning_rate": 0.00010685832214849776, "loss": 0.8845, "step": 32287 }, { "epoch": 0.8290653019237034, "grad_norm": 0.81640625, "learning_rate": 0.00010685386845139233, "loss": 0.8738, "step": 32288 }, { "epoch": 0.8290909791196253, "grad_norm": 0.890625, "learning_rate": 0.00010684941474062779, "loss": 0.8242, "step": 32289 }, { "epoch": 0.8291166563155471, "grad_norm": 0.78515625, "learning_rate": 0.00010684496101621297, "loss": 0.8531, "step": 32290 }, { "epoch": 0.8291423335114689, "grad_norm": 0.7578125, "learning_rate": 0.00010684050727815675, "loss": 0.8346, "step": 32291 }, { "epoch": 0.8291680107073907, "grad_norm": 0.7109375, "learning_rate": 0.00010683605352646807, "loss": 0.728, "step": 32292 }, { "epoch": 0.8291936879033125, "grad_norm": 0.78515625, "learning_rate": 0.0001068315997611557, "loss": 0.8193, "step": 32293 }, { "epoch": 0.8292193650992343, "grad_norm": 0.7109375, "learning_rate": 0.00010682714598222861, "loss": 0.8723, "step": 32294 }, { "epoch": 0.8292450422951562, "grad_norm": 0.81640625, "learning_rate": 0.0001068226921896956, "loss": 0.7759, "step": 32295 }, { "epoch": 0.829270719491078, "grad_norm": 0.86328125, "learning_rate": 0.00010681823838356557, "loss": 0.7732, "step": 32296 }, { "epoch": 0.8292963966869998, "grad_norm": 0.73828125, "learning_rate": 0.00010681378456384745, "loss": 0.9504, "step": 32297 }, { "epoch": 0.8293220738829216, "grad_norm": 0.8125, "learning_rate": 0.00010680933073055004, "loss": 0.8229, "step": 32298 }, { "epoch": 0.8293477510788434, "grad_norm": 0.77734375, "learning_rate": 0.00010680487688368229, "loss": 0.7302, "step": 32299 }, { "epoch": 0.8293734282747652, "grad_norm": 0.8359375, "learning_rate": 0.00010680042302325299, "loss": 0.8759, "step": 32300 }, { "epoch": 0.8293991054706871, "grad_norm": 0.7578125, "learning_rate": 0.00010679596914927106, "loss": 0.8211, "step": 32301 }, { "epoch": 0.8294247826666089, "grad_norm": 0.7734375, "learning_rate": 0.0001067915152617454, "loss": 0.9093, "step": 32302 }, { "epoch": 0.8294504598625307, "grad_norm": 0.84765625, "learning_rate": 0.00010678706136068486, "loss": 1.0493, "step": 32303 }, { "epoch": 0.8294761370584526, "grad_norm": 0.78515625, "learning_rate": 0.00010678260744609831, "loss": 0.8064, "step": 32304 }, { "epoch": 0.8295018142543743, "grad_norm": 0.87890625, "learning_rate": 0.00010677815351799465, "loss": 0.9878, "step": 32305 }, { "epoch": 0.8295274914502961, "grad_norm": 0.74609375, "learning_rate": 0.00010677369957638275, "loss": 0.7527, "step": 32306 }, { "epoch": 0.829553168646218, "grad_norm": 0.796875, "learning_rate": 0.00010676924562127146, "loss": 0.7706, "step": 32307 }, { "epoch": 0.8295788458421398, "grad_norm": 0.76953125, "learning_rate": 0.00010676479165266968, "loss": 0.8646, "step": 32308 }, { "epoch": 0.8296045230380616, "grad_norm": 0.93359375, "learning_rate": 0.00010676033767058626, "loss": 0.87, "step": 32309 }, { "epoch": 0.8296302002339835, "grad_norm": 0.76953125, "learning_rate": 0.00010675588367503013, "loss": 0.8184, "step": 32310 }, { "epoch": 0.8296558774299053, "grad_norm": 0.74609375, "learning_rate": 0.00010675142966601012, "loss": 0.7899, "step": 32311 }, { "epoch": 0.829681554625827, "grad_norm": 0.80078125, "learning_rate": 0.00010674697564353513, "loss": 0.839, "step": 32312 }, { "epoch": 0.8297072318217489, "grad_norm": 0.734375, "learning_rate": 0.00010674252160761404, "loss": 0.7376, "step": 32313 }, { "epoch": 0.8297329090176707, "grad_norm": 0.74609375, "learning_rate": 0.00010673806755825565, "loss": 0.809, "step": 32314 }, { "epoch": 0.8297585862135926, "grad_norm": 0.80078125, "learning_rate": 0.00010673361349546898, "loss": 0.8281, "step": 32315 }, { "epoch": 0.8297842634095144, "grad_norm": 0.81640625, "learning_rate": 0.00010672915941926279, "loss": 0.901, "step": 32316 }, { "epoch": 0.8298099406054362, "grad_norm": 0.83984375, "learning_rate": 0.00010672470532964598, "loss": 0.9121, "step": 32317 }, { "epoch": 0.829835617801358, "grad_norm": 0.796875, "learning_rate": 0.00010672025122662745, "loss": 0.8072, "step": 32318 }, { "epoch": 0.8298612949972798, "grad_norm": 0.78515625, "learning_rate": 0.00010671579711021609, "loss": 0.7612, "step": 32319 }, { "epoch": 0.8298869721932016, "grad_norm": 0.76953125, "learning_rate": 0.00010671134298042072, "loss": 0.8152, "step": 32320 }, { "epoch": 0.8299126493891235, "grad_norm": 0.85546875, "learning_rate": 0.00010670688883725027, "loss": 0.8185, "step": 32321 }, { "epoch": 0.8299383265850453, "grad_norm": 0.76171875, "learning_rate": 0.00010670243468071359, "loss": 0.762, "step": 32322 }, { "epoch": 0.8299640037809671, "grad_norm": 0.77734375, "learning_rate": 0.00010669798051081954, "loss": 0.7862, "step": 32323 }, { "epoch": 0.829989680976889, "grad_norm": 0.9921875, "learning_rate": 0.00010669352632757705, "loss": 0.8782, "step": 32324 }, { "epoch": 0.8300153581728107, "grad_norm": 0.79296875, "learning_rate": 0.00010668907213099495, "loss": 0.8672, "step": 32325 }, { "epoch": 0.8300410353687325, "grad_norm": 0.8984375, "learning_rate": 0.00010668461792108218, "loss": 0.8594, "step": 32326 }, { "epoch": 0.8300667125646544, "grad_norm": 0.74609375, "learning_rate": 0.00010668016369784753, "loss": 0.7636, "step": 32327 }, { "epoch": 0.8300923897605762, "grad_norm": 0.7890625, "learning_rate": 0.00010667570946129989, "loss": 0.6686, "step": 32328 }, { "epoch": 0.830118066956498, "grad_norm": 0.7265625, "learning_rate": 0.0001066712552114482, "loss": 0.8393, "step": 32329 }, { "epoch": 0.8301437441524199, "grad_norm": 1.2265625, "learning_rate": 0.0001066668009483013, "loss": 0.6909, "step": 32330 }, { "epoch": 0.8301694213483417, "grad_norm": 0.8125, "learning_rate": 0.00010666234667186805, "loss": 0.8681, "step": 32331 }, { "epoch": 0.8301950985442634, "grad_norm": 0.77734375, "learning_rate": 0.00010665789238215738, "loss": 0.8595, "step": 32332 }, { "epoch": 0.8302207757401853, "grad_norm": 0.76953125, "learning_rate": 0.00010665343807917809, "loss": 0.7922, "step": 32333 }, { "epoch": 0.8302464529361071, "grad_norm": 0.75390625, "learning_rate": 0.00010664898376293913, "loss": 0.8374, "step": 32334 }, { "epoch": 0.8302721301320289, "grad_norm": 0.859375, "learning_rate": 0.00010664452943344934, "loss": 0.8957, "step": 32335 }, { "epoch": 0.8302978073279508, "grad_norm": 0.796875, "learning_rate": 0.00010664007509071759, "loss": 0.7546, "step": 32336 }, { "epoch": 0.8303234845238726, "grad_norm": 0.76953125, "learning_rate": 0.00010663562073475277, "loss": 0.7971, "step": 32337 }, { "epoch": 0.8303491617197943, "grad_norm": 0.81640625, "learning_rate": 0.00010663116636556376, "loss": 0.905, "step": 32338 }, { "epoch": 0.8303748389157162, "grad_norm": 0.81640625, "learning_rate": 0.00010662671198315946, "loss": 0.9377, "step": 32339 }, { "epoch": 0.830400516111638, "grad_norm": 0.83203125, "learning_rate": 0.00010662225758754869, "loss": 0.7852, "step": 32340 }, { "epoch": 0.8304261933075598, "grad_norm": 0.7109375, "learning_rate": 0.00010661780317874035, "loss": 0.8035, "step": 32341 }, { "epoch": 0.8304518705034817, "grad_norm": 0.7421875, "learning_rate": 0.00010661334875674336, "loss": 0.7859, "step": 32342 }, { "epoch": 0.8304775476994035, "grad_norm": 0.74609375, "learning_rate": 0.00010660889432156656, "loss": 0.7991, "step": 32343 }, { "epoch": 0.8305032248953254, "grad_norm": 0.83984375, "learning_rate": 0.0001066044398732188, "loss": 0.7539, "step": 32344 }, { "epoch": 0.8305289020912471, "grad_norm": 1.1015625, "learning_rate": 0.00010659998541170903, "loss": 0.7834, "step": 32345 }, { "epoch": 0.8305545792871689, "grad_norm": 0.8359375, "learning_rate": 0.00010659553093704604, "loss": 0.8262, "step": 32346 }, { "epoch": 0.8305802564830908, "grad_norm": 0.72265625, "learning_rate": 0.00010659107644923878, "loss": 0.8004, "step": 32347 }, { "epoch": 0.8306059336790126, "grad_norm": 0.9921875, "learning_rate": 0.0001065866219482961, "loss": 0.7187, "step": 32348 }, { "epoch": 0.8306316108749344, "grad_norm": 0.7421875, "learning_rate": 0.00010658216743422686, "loss": 0.6926, "step": 32349 }, { "epoch": 0.8306572880708563, "grad_norm": 0.7734375, "learning_rate": 0.00010657771290703997, "loss": 0.8096, "step": 32350 }, { "epoch": 0.8306829652667781, "grad_norm": 0.765625, "learning_rate": 0.0001065732583667443, "loss": 0.7566, "step": 32351 }, { "epoch": 0.8307086424626998, "grad_norm": 0.76953125, "learning_rate": 0.0001065688038133487, "loss": 0.7528, "step": 32352 }, { "epoch": 0.8307343196586217, "grad_norm": 0.78515625, "learning_rate": 0.0001065643492468621, "loss": 0.7567, "step": 32353 }, { "epoch": 0.8307599968545435, "grad_norm": 0.75, "learning_rate": 0.0001065598946672933, "loss": 0.9645, "step": 32354 }, { "epoch": 0.8307856740504653, "grad_norm": 0.82421875, "learning_rate": 0.00010655544007465124, "loss": 0.7782, "step": 32355 }, { "epoch": 0.8308113512463872, "grad_norm": 0.8984375, "learning_rate": 0.0001065509854689448, "loss": 0.8817, "step": 32356 }, { "epoch": 0.830837028442309, "grad_norm": 1.2109375, "learning_rate": 0.0001065465308501828, "loss": 0.8971, "step": 32357 }, { "epoch": 0.8308627056382307, "grad_norm": 0.703125, "learning_rate": 0.00010654207621837419, "loss": 0.6486, "step": 32358 }, { "epoch": 0.8308883828341526, "grad_norm": 0.83203125, "learning_rate": 0.00010653762157352782, "loss": 0.9089, "step": 32359 }, { "epoch": 0.8309140600300744, "grad_norm": 0.7578125, "learning_rate": 0.00010653316691565251, "loss": 0.7327, "step": 32360 }, { "epoch": 0.8309397372259962, "grad_norm": 0.73046875, "learning_rate": 0.00010652871224475723, "loss": 0.7903, "step": 32361 }, { "epoch": 0.8309654144219181, "grad_norm": 0.76953125, "learning_rate": 0.00010652425756085078, "loss": 0.7677, "step": 32362 }, { "epoch": 0.8309910916178399, "grad_norm": 0.75390625, "learning_rate": 0.00010651980286394212, "loss": 0.8427, "step": 32363 }, { "epoch": 0.8310167688137617, "grad_norm": 0.671875, "learning_rate": 0.00010651534815404007, "loss": 0.8265, "step": 32364 }, { "epoch": 0.8310424460096835, "grad_norm": 0.74609375, "learning_rate": 0.0001065108934311535, "loss": 0.7579, "step": 32365 }, { "epoch": 0.8310681232056053, "grad_norm": 0.7578125, "learning_rate": 0.00010650643869529133, "loss": 0.7228, "step": 32366 }, { "epoch": 0.8310938004015271, "grad_norm": 0.828125, "learning_rate": 0.00010650198394646241, "loss": 0.7671, "step": 32367 }, { "epoch": 0.831119477597449, "grad_norm": 0.72265625, "learning_rate": 0.00010649752918467559, "loss": 0.7101, "step": 32368 }, { "epoch": 0.8311451547933708, "grad_norm": 0.734375, "learning_rate": 0.00010649307440993982, "loss": 0.7425, "step": 32369 }, { "epoch": 0.8311708319892926, "grad_norm": 0.70703125, "learning_rate": 0.00010648861962226393, "loss": 0.6688, "step": 32370 }, { "epoch": 0.8311965091852145, "grad_norm": 0.78125, "learning_rate": 0.00010648416482165679, "loss": 0.7683, "step": 32371 }, { "epoch": 0.8312221863811362, "grad_norm": 0.7890625, "learning_rate": 0.00010647971000812736, "loss": 0.698, "step": 32372 }, { "epoch": 0.831247863577058, "grad_norm": 0.94140625, "learning_rate": 0.00010647525518168438, "loss": 0.7923, "step": 32373 }, { "epoch": 0.8312735407729799, "grad_norm": 0.76171875, "learning_rate": 0.00010647080034233683, "loss": 0.7768, "step": 32374 }, { "epoch": 0.8312992179689017, "grad_norm": 0.85546875, "learning_rate": 0.00010646634549009356, "loss": 0.8519, "step": 32375 }, { "epoch": 0.8313248951648236, "grad_norm": 0.7109375, "learning_rate": 0.00010646189062496341, "loss": 0.7101, "step": 32376 }, { "epoch": 0.8313505723607454, "grad_norm": 0.765625, "learning_rate": 0.00010645743574695535, "loss": 0.9155, "step": 32377 }, { "epoch": 0.8313762495566671, "grad_norm": 0.828125, "learning_rate": 0.00010645298085607818, "loss": 0.7986, "step": 32378 }, { "epoch": 0.831401926752589, "grad_norm": 0.74609375, "learning_rate": 0.00010644852595234081, "loss": 0.7419, "step": 32379 }, { "epoch": 0.8314276039485108, "grad_norm": 0.71484375, "learning_rate": 0.0001064440710357521, "loss": 0.8817, "step": 32380 }, { "epoch": 0.8314532811444326, "grad_norm": 0.7890625, "learning_rate": 0.00010643961610632091, "loss": 0.9198, "step": 32381 }, { "epoch": 0.8314789583403545, "grad_norm": 0.75, "learning_rate": 0.0001064351611640562, "loss": 0.7433, "step": 32382 }, { "epoch": 0.8315046355362763, "grad_norm": 0.70703125, "learning_rate": 0.00010643070620896678, "loss": 0.7971, "step": 32383 }, { "epoch": 0.8315303127321981, "grad_norm": 0.78515625, "learning_rate": 0.00010642625124106151, "loss": 0.7904, "step": 32384 }, { "epoch": 0.8315559899281199, "grad_norm": 0.73828125, "learning_rate": 0.00010642179626034937, "loss": 0.796, "step": 32385 }, { "epoch": 0.8315816671240417, "grad_norm": 0.6875, "learning_rate": 0.00010641734126683911, "loss": 0.7571, "step": 32386 }, { "epoch": 0.8316073443199635, "grad_norm": 0.78515625, "learning_rate": 0.00010641288626053968, "loss": 0.7442, "step": 32387 }, { "epoch": 0.8316330215158854, "grad_norm": 0.82421875, "learning_rate": 0.00010640843124145998, "loss": 0.8192, "step": 32388 }, { "epoch": 0.8316586987118072, "grad_norm": 0.80078125, "learning_rate": 0.0001064039762096088, "loss": 0.795, "step": 32389 }, { "epoch": 0.831684375907729, "grad_norm": 0.83203125, "learning_rate": 0.00010639952116499512, "loss": 0.7872, "step": 32390 }, { "epoch": 0.8317100531036509, "grad_norm": 0.78515625, "learning_rate": 0.00010639506610762775, "loss": 0.8413, "step": 32391 }, { "epoch": 0.8317357302995726, "grad_norm": 0.80859375, "learning_rate": 0.00010639061103751559, "loss": 0.8816, "step": 32392 }, { "epoch": 0.8317614074954944, "grad_norm": 0.78515625, "learning_rate": 0.00010638615595466755, "loss": 0.7461, "step": 32393 }, { "epoch": 0.8317870846914163, "grad_norm": 0.86328125, "learning_rate": 0.00010638170085909245, "loss": 0.8274, "step": 32394 }, { "epoch": 0.8318127618873381, "grad_norm": 0.83203125, "learning_rate": 0.00010637724575079919, "loss": 0.8035, "step": 32395 }, { "epoch": 0.8318384390832599, "grad_norm": 0.734375, "learning_rate": 0.00010637279062979668, "loss": 0.7741, "step": 32396 }, { "epoch": 0.8318641162791818, "grad_norm": 0.796875, "learning_rate": 0.00010636833549609372, "loss": 0.7925, "step": 32397 }, { "epoch": 0.8318897934751035, "grad_norm": 0.82421875, "learning_rate": 0.00010636388034969933, "loss": 0.8026, "step": 32398 }, { "epoch": 0.8319154706710253, "grad_norm": 0.8046875, "learning_rate": 0.00010635942519062223, "loss": 0.8146, "step": 32399 }, { "epoch": 0.8319411478669472, "grad_norm": 0.76171875, "learning_rate": 0.0001063549700188714, "loss": 0.8329, "step": 32400 }, { "epoch": 0.831966825062869, "grad_norm": 0.85546875, "learning_rate": 0.0001063505148344557, "loss": 0.7733, "step": 32401 }, { "epoch": 0.8319925022587908, "grad_norm": 0.7265625, "learning_rate": 0.00010634605963738395, "loss": 0.644, "step": 32402 }, { "epoch": 0.8320181794547127, "grad_norm": 0.80859375, "learning_rate": 0.00010634160442766512, "loss": 0.7729, "step": 32403 }, { "epoch": 0.8320438566506345, "grad_norm": 0.71875, "learning_rate": 0.00010633714920530804, "loss": 0.78, "step": 32404 }, { "epoch": 0.8320695338465562, "grad_norm": 0.9765625, "learning_rate": 0.0001063326939703216, "loss": 0.8014, "step": 32405 }, { "epoch": 0.8320952110424781, "grad_norm": 0.8046875, "learning_rate": 0.00010632823872271464, "loss": 0.9201, "step": 32406 }, { "epoch": 0.8321208882383999, "grad_norm": 0.7421875, "learning_rate": 0.00010632378346249612, "loss": 0.8847, "step": 32407 }, { "epoch": 0.8321465654343217, "grad_norm": 0.78515625, "learning_rate": 0.00010631932818967483, "loss": 0.7221, "step": 32408 }, { "epoch": 0.8321722426302436, "grad_norm": 0.828125, "learning_rate": 0.0001063148729042597, "loss": 0.929, "step": 32409 }, { "epoch": 0.8321979198261654, "grad_norm": 0.76953125, "learning_rate": 0.00010631041760625957, "loss": 0.7268, "step": 32410 }, { "epoch": 0.8322235970220873, "grad_norm": 0.82421875, "learning_rate": 0.00010630596229568341, "loss": 0.7714, "step": 32411 }, { "epoch": 0.832249274218009, "grad_norm": 0.71875, "learning_rate": 0.00010630150697254002, "loss": 0.681, "step": 32412 }, { "epoch": 0.8322749514139308, "grad_norm": 0.7109375, "learning_rate": 0.00010629705163683825, "loss": 0.7293, "step": 32413 }, { "epoch": 0.8323006286098527, "grad_norm": 0.734375, "learning_rate": 0.00010629259628858707, "loss": 0.6948, "step": 32414 }, { "epoch": 0.8323263058057745, "grad_norm": 0.8125, "learning_rate": 0.00010628814092779531, "loss": 0.8043, "step": 32415 }, { "epoch": 0.8323519830016963, "grad_norm": 1.6875, "learning_rate": 0.00010628368555447182, "loss": 0.8699, "step": 32416 }, { "epoch": 0.8323776601976182, "grad_norm": 0.8046875, "learning_rate": 0.00010627923016862555, "loss": 0.9149, "step": 32417 }, { "epoch": 0.8324033373935399, "grad_norm": 0.78515625, "learning_rate": 0.00010627477477026532, "loss": 0.7086, "step": 32418 }, { "epoch": 0.8324290145894617, "grad_norm": 0.73046875, "learning_rate": 0.00010627031935940003, "loss": 0.8245, "step": 32419 }, { "epoch": 0.8324546917853836, "grad_norm": 0.828125, "learning_rate": 0.0001062658639360386, "loss": 0.7808, "step": 32420 }, { "epoch": 0.8324803689813054, "grad_norm": 0.80859375, "learning_rate": 0.00010626140850018982, "loss": 0.8, "step": 32421 }, { "epoch": 0.8325060461772272, "grad_norm": 0.8046875, "learning_rate": 0.00010625695305186263, "loss": 0.7768, "step": 32422 }, { "epoch": 0.8325317233731491, "grad_norm": 0.828125, "learning_rate": 0.0001062524975910659, "loss": 0.8761, "step": 32423 }, { "epoch": 0.8325574005690709, "grad_norm": 0.75, "learning_rate": 0.00010624804211780853, "loss": 0.7816, "step": 32424 }, { "epoch": 0.8325830777649926, "grad_norm": 0.734375, "learning_rate": 0.00010624358663209935, "loss": 0.8197, "step": 32425 }, { "epoch": 0.8326087549609145, "grad_norm": 0.76171875, "learning_rate": 0.00010623913113394726, "loss": 0.7703, "step": 32426 }, { "epoch": 0.8326344321568363, "grad_norm": 0.7421875, "learning_rate": 0.00010623467562336116, "loss": 0.7102, "step": 32427 }, { "epoch": 0.8326601093527581, "grad_norm": 0.83203125, "learning_rate": 0.00010623022010034991, "loss": 0.778, "step": 32428 }, { "epoch": 0.83268578654868, "grad_norm": 0.7578125, "learning_rate": 0.0001062257645649224, "loss": 0.8694, "step": 32429 }, { "epoch": 0.8327114637446018, "grad_norm": 0.85546875, "learning_rate": 0.00010622130901708748, "loss": 0.8371, "step": 32430 }, { "epoch": 0.8327371409405235, "grad_norm": 0.78515625, "learning_rate": 0.00010621685345685409, "loss": 0.8816, "step": 32431 }, { "epoch": 0.8327628181364454, "grad_norm": 0.76171875, "learning_rate": 0.00010621239788423105, "loss": 0.7316, "step": 32432 }, { "epoch": 0.8327884953323672, "grad_norm": 0.76171875, "learning_rate": 0.00010620794229922728, "loss": 0.727, "step": 32433 }, { "epoch": 0.832814172528289, "grad_norm": 0.8046875, "learning_rate": 0.00010620348670185163, "loss": 0.8963, "step": 32434 }, { "epoch": 0.8328398497242109, "grad_norm": 0.796875, "learning_rate": 0.00010619903109211298, "loss": 0.7943, "step": 32435 }, { "epoch": 0.8328655269201327, "grad_norm": 0.78515625, "learning_rate": 0.00010619457547002024, "loss": 0.8822, "step": 32436 }, { "epoch": 0.8328912041160546, "grad_norm": 0.875, "learning_rate": 0.00010619011983558226, "loss": 0.8039, "step": 32437 }, { "epoch": 0.8329168813119763, "grad_norm": 0.78515625, "learning_rate": 0.00010618566418880796, "loss": 0.9556, "step": 32438 }, { "epoch": 0.8329425585078981, "grad_norm": 0.70703125, "learning_rate": 0.00010618120852970612, "loss": 0.7983, "step": 32439 }, { "epoch": 0.83296823570382, "grad_norm": 0.7578125, "learning_rate": 0.00010617675285828574, "loss": 0.8859, "step": 32440 }, { "epoch": 0.8329939128997418, "grad_norm": 0.7734375, "learning_rate": 0.00010617229717455566, "loss": 0.8042, "step": 32441 }, { "epoch": 0.8330195900956636, "grad_norm": 0.75390625, "learning_rate": 0.00010616784147852472, "loss": 0.8905, "step": 32442 }, { "epoch": 0.8330452672915855, "grad_norm": 0.73046875, "learning_rate": 0.00010616338577020184, "loss": 0.8045, "step": 32443 }, { "epoch": 0.8330709444875073, "grad_norm": 0.796875, "learning_rate": 0.00010615893004959591, "loss": 0.9232, "step": 32444 }, { "epoch": 0.833096621683429, "grad_norm": 0.80078125, "learning_rate": 0.00010615447431671575, "loss": 0.7523, "step": 32445 }, { "epoch": 0.8331222988793509, "grad_norm": 0.765625, "learning_rate": 0.0001061500185715703, "loss": 0.8671, "step": 32446 }, { "epoch": 0.8331479760752727, "grad_norm": 0.7109375, "learning_rate": 0.00010614556281416843, "loss": 0.7847, "step": 32447 }, { "epoch": 0.8331736532711945, "grad_norm": 0.703125, "learning_rate": 0.00010614110704451899, "loss": 0.8123, "step": 32448 }, { "epoch": 0.8331993304671164, "grad_norm": 0.7578125, "learning_rate": 0.00010613665126263086, "loss": 0.8682, "step": 32449 }, { "epoch": 0.8332250076630382, "grad_norm": 0.734375, "learning_rate": 0.00010613219546851296, "loss": 0.7499, "step": 32450 }, { "epoch": 0.8332506848589599, "grad_norm": 0.671875, "learning_rate": 0.00010612773966217417, "loss": 0.7499, "step": 32451 }, { "epoch": 0.8332763620548818, "grad_norm": 0.69921875, "learning_rate": 0.00010612328384362333, "loss": 0.6795, "step": 32452 }, { "epoch": 0.8333020392508036, "grad_norm": 0.81640625, "learning_rate": 0.00010611882801286932, "loss": 0.8263, "step": 32453 }, { "epoch": 0.8333277164467254, "grad_norm": 0.79296875, "learning_rate": 0.00010611437216992106, "loss": 0.8951, "step": 32454 }, { "epoch": 0.8333533936426473, "grad_norm": 0.796875, "learning_rate": 0.00010610991631478742, "loss": 0.7173, "step": 32455 }, { "epoch": 0.8333790708385691, "grad_norm": 0.8046875, "learning_rate": 0.00010610546044747723, "loss": 0.8496, "step": 32456 }, { "epoch": 0.8334047480344909, "grad_norm": 0.765625, "learning_rate": 0.00010610100456799945, "loss": 0.8692, "step": 32457 }, { "epoch": 0.8334304252304127, "grad_norm": 0.81640625, "learning_rate": 0.00010609654867636287, "loss": 0.9239, "step": 32458 }, { "epoch": 0.8334561024263345, "grad_norm": 0.765625, "learning_rate": 0.00010609209277257646, "loss": 0.7745, "step": 32459 }, { "epoch": 0.8334817796222563, "grad_norm": 0.7421875, "learning_rate": 0.00010608763685664907, "loss": 0.8197, "step": 32460 }, { "epoch": 0.8335074568181782, "grad_norm": 0.734375, "learning_rate": 0.00010608318092858951, "loss": 0.7594, "step": 32461 }, { "epoch": 0.8335331340141, "grad_norm": 0.76953125, "learning_rate": 0.00010607872498840676, "loss": 0.796, "step": 32462 }, { "epoch": 0.8335588112100218, "grad_norm": 0.77734375, "learning_rate": 0.00010607426903610966, "loss": 0.8971, "step": 32463 }, { "epoch": 0.8335844884059437, "grad_norm": 0.9765625, "learning_rate": 0.00010606981307170712, "loss": 0.7092, "step": 32464 }, { "epoch": 0.8336101656018654, "grad_norm": 0.75390625, "learning_rate": 0.00010606535709520794, "loss": 0.6655, "step": 32465 }, { "epoch": 0.8336358427977872, "grad_norm": 0.76171875, "learning_rate": 0.00010606090110662105, "loss": 0.8382, "step": 32466 }, { "epoch": 0.8336615199937091, "grad_norm": 0.765625, "learning_rate": 0.00010605644510595536, "loss": 0.7614, "step": 32467 }, { "epoch": 0.8336871971896309, "grad_norm": 0.84375, "learning_rate": 0.00010605198909321971, "loss": 0.8904, "step": 32468 }, { "epoch": 0.8337128743855527, "grad_norm": 0.7109375, "learning_rate": 0.00010604753306842299, "loss": 0.7185, "step": 32469 }, { "epoch": 0.8337385515814746, "grad_norm": 0.796875, "learning_rate": 0.00010604307703157413, "loss": 0.8514, "step": 32470 }, { "epoch": 0.8337642287773963, "grad_norm": 0.79296875, "learning_rate": 0.00010603862098268189, "loss": 0.7581, "step": 32471 }, { "epoch": 0.8337899059733181, "grad_norm": 0.69140625, "learning_rate": 0.00010603416492175527, "loss": 0.7862, "step": 32472 }, { "epoch": 0.83381558316924, "grad_norm": 0.73046875, "learning_rate": 0.0001060297088488031, "loss": 0.7262, "step": 32473 }, { "epoch": 0.8338412603651618, "grad_norm": 0.8046875, "learning_rate": 0.00010602525276383424, "loss": 0.7695, "step": 32474 }, { "epoch": 0.8338669375610837, "grad_norm": 0.74609375, "learning_rate": 0.0001060207966668576, "loss": 0.7751, "step": 32475 }, { "epoch": 0.8338926147570055, "grad_norm": 0.6875, "learning_rate": 0.00010601634055788208, "loss": 0.717, "step": 32476 }, { "epoch": 0.8339182919529273, "grad_norm": 0.77734375, "learning_rate": 0.00010601188443691652, "loss": 0.8179, "step": 32477 }, { "epoch": 0.833943969148849, "grad_norm": 0.73828125, "learning_rate": 0.00010600742830396984, "loss": 0.6764, "step": 32478 }, { "epoch": 0.8339696463447709, "grad_norm": 0.71875, "learning_rate": 0.00010600297215905086, "loss": 0.8252, "step": 32479 }, { "epoch": 0.8339953235406927, "grad_norm": 0.76953125, "learning_rate": 0.00010599851600216852, "loss": 0.8205, "step": 32480 }, { "epoch": 0.8340210007366146, "grad_norm": 0.890625, "learning_rate": 0.00010599405983333168, "loss": 0.8355, "step": 32481 }, { "epoch": 0.8340466779325364, "grad_norm": 0.7578125, "learning_rate": 0.0001059896036525492, "loss": 0.6627, "step": 32482 }, { "epoch": 0.8340723551284582, "grad_norm": 0.75, "learning_rate": 0.00010598514745983001, "loss": 0.8084, "step": 32483 }, { "epoch": 0.8340980323243801, "grad_norm": 0.76953125, "learning_rate": 0.00010598069125518298, "loss": 0.8848, "step": 32484 }, { "epoch": 0.8341237095203018, "grad_norm": 0.6953125, "learning_rate": 0.00010597623503861691, "loss": 0.8148, "step": 32485 }, { "epoch": 0.8341493867162236, "grad_norm": 0.8046875, "learning_rate": 0.00010597177881014077, "loss": 0.8814, "step": 32486 }, { "epoch": 0.8341750639121455, "grad_norm": 0.82421875, "learning_rate": 0.00010596732256976343, "loss": 0.8194, "step": 32487 }, { "epoch": 0.8342007411080673, "grad_norm": 0.80078125, "learning_rate": 0.00010596286631749375, "loss": 0.7647, "step": 32488 }, { "epoch": 0.8342264183039891, "grad_norm": 0.78125, "learning_rate": 0.00010595841005334061, "loss": 0.8701, "step": 32489 }, { "epoch": 0.834252095499911, "grad_norm": 0.796875, "learning_rate": 0.0001059539537773129, "loss": 0.7842, "step": 32490 }, { "epoch": 0.8342777726958327, "grad_norm": 0.78515625, "learning_rate": 0.00010594949748941949, "loss": 0.7477, "step": 32491 }, { "epoch": 0.8343034498917545, "grad_norm": 0.7890625, "learning_rate": 0.00010594504118966929, "loss": 0.756, "step": 32492 }, { "epoch": 0.8343291270876764, "grad_norm": 0.8046875, "learning_rate": 0.00010594058487807112, "loss": 0.8575, "step": 32493 }, { "epoch": 0.8343548042835982, "grad_norm": 0.73828125, "learning_rate": 0.00010593612855463395, "loss": 0.7991, "step": 32494 }, { "epoch": 0.83438048147952, "grad_norm": 0.765625, "learning_rate": 0.00010593167221936657, "loss": 0.7736, "step": 32495 }, { "epoch": 0.8344061586754419, "grad_norm": 0.7421875, "learning_rate": 0.00010592721587227792, "loss": 0.7995, "step": 32496 }, { "epoch": 0.8344318358713637, "grad_norm": 0.7578125, "learning_rate": 0.0001059227595133769, "loss": 0.8448, "step": 32497 }, { "epoch": 0.8344575130672854, "grad_norm": 0.82421875, "learning_rate": 0.0001059183031426723, "loss": 0.7065, "step": 32498 }, { "epoch": 0.8344831902632073, "grad_norm": 0.7421875, "learning_rate": 0.00010591384676017309, "loss": 0.8447, "step": 32499 }, { "epoch": 0.8345088674591291, "grad_norm": 0.875, "learning_rate": 0.0001059093903658881, "loss": 0.6958, "step": 32500 }, { "epoch": 0.834534544655051, "grad_norm": 0.84375, "learning_rate": 0.00010590493395982622, "loss": 0.688, "step": 32501 }, { "epoch": 0.8345602218509728, "grad_norm": 0.80859375, "learning_rate": 0.00010590047754199634, "loss": 0.8068, "step": 32502 }, { "epoch": 0.8345858990468946, "grad_norm": 0.81640625, "learning_rate": 0.00010589602111240737, "loss": 0.8487, "step": 32503 }, { "epoch": 0.8346115762428165, "grad_norm": 0.76171875, "learning_rate": 0.00010589156467106816, "loss": 0.7164, "step": 32504 }, { "epoch": 0.8346372534387382, "grad_norm": 0.765625, "learning_rate": 0.00010588710821798757, "loss": 0.824, "step": 32505 }, { "epoch": 0.83466293063466, "grad_norm": 0.7578125, "learning_rate": 0.00010588265175317451, "loss": 0.738, "step": 32506 }, { "epoch": 0.8346886078305819, "grad_norm": 0.84765625, "learning_rate": 0.00010587819527663787, "loss": 0.8087, "step": 32507 }, { "epoch": 0.8347142850265037, "grad_norm": 0.84375, "learning_rate": 0.00010587373878838651, "loss": 0.8835, "step": 32508 }, { "epoch": 0.8347399622224255, "grad_norm": 0.84765625, "learning_rate": 0.0001058692822884293, "loss": 0.8697, "step": 32509 }, { "epoch": 0.8347656394183474, "grad_norm": 0.85546875, "learning_rate": 0.0001058648257767752, "loss": 0.7385, "step": 32510 }, { "epoch": 0.8347913166142691, "grad_norm": 0.80078125, "learning_rate": 0.00010586036925343297, "loss": 0.7992, "step": 32511 }, { "epoch": 0.8348169938101909, "grad_norm": 0.79296875, "learning_rate": 0.00010585591271841157, "loss": 0.7642, "step": 32512 }, { "epoch": 0.8348426710061128, "grad_norm": 0.7734375, "learning_rate": 0.00010585145617171988, "loss": 0.8374, "step": 32513 }, { "epoch": 0.8348683482020346, "grad_norm": 0.76953125, "learning_rate": 0.00010584699961336675, "loss": 0.8752, "step": 32514 }, { "epoch": 0.8348940253979564, "grad_norm": 0.796875, "learning_rate": 0.00010584254304336107, "loss": 0.7665, "step": 32515 }, { "epoch": 0.8349197025938783, "grad_norm": 0.80078125, "learning_rate": 0.00010583808646171176, "loss": 1.0727, "step": 32516 }, { "epoch": 0.8349453797898001, "grad_norm": 0.8046875, "learning_rate": 0.00010583362986842767, "loss": 0.6935, "step": 32517 }, { "epoch": 0.8349710569857218, "grad_norm": 0.828125, "learning_rate": 0.00010582917326351766, "loss": 0.9505, "step": 32518 }, { "epoch": 0.8349967341816437, "grad_norm": 0.83984375, "learning_rate": 0.00010582471664699063, "loss": 0.8412, "step": 32519 }, { "epoch": 0.8350224113775655, "grad_norm": 0.78125, "learning_rate": 0.00010582026001885547, "loss": 0.9183, "step": 32520 }, { "epoch": 0.8350480885734873, "grad_norm": 0.8203125, "learning_rate": 0.0001058158033791211, "loss": 0.7523, "step": 32521 }, { "epoch": 0.8350737657694092, "grad_norm": 0.71875, "learning_rate": 0.00010581134672779629, "loss": 0.7212, "step": 32522 }, { "epoch": 0.835099442965331, "grad_norm": 0.796875, "learning_rate": 0.00010580689006489006, "loss": 0.7444, "step": 32523 }, { "epoch": 0.8351251201612528, "grad_norm": 0.78515625, "learning_rate": 0.00010580243339041117, "loss": 0.6902, "step": 32524 }, { "epoch": 0.8351507973571746, "grad_norm": 0.75390625, "learning_rate": 0.00010579797670436858, "loss": 0.7605, "step": 32525 }, { "epoch": 0.8351764745530964, "grad_norm": 0.7734375, "learning_rate": 0.00010579352000677114, "loss": 0.7185, "step": 32526 }, { "epoch": 0.8352021517490182, "grad_norm": 0.7734375, "learning_rate": 0.00010578906329762772, "loss": 0.7207, "step": 32527 }, { "epoch": 0.8352278289449401, "grad_norm": 0.7421875, "learning_rate": 0.00010578460657694724, "loss": 0.7526, "step": 32528 }, { "epoch": 0.8352535061408619, "grad_norm": 0.77734375, "learning_rate": 0.00010578014984473858, "loss": 0.8223, "step": 32529 }, { "epoch": 0.8352791833367837, "grad_norm": 0.77734375, "learning_rate": 0.00010577569310101057, "loss": 0.8463, "step": 32530 }, { "epoch": 0.8353048605327055, "grad_norm": 0.83984375, "learning_rate": 0.00010577123634577215, "loss": 0.9487, "step": 32531 }, { "epoch": 0.8353305377286273, "grad_norm": 0.77734375, "learning_rate": 0.00010576677957903216, "loss": 0.7787, "step": 32532 }, { "epoch": 0.8353562149245491, "grad_norm": 0.7265625, "learning_rate": 0.0001057623228007995, "loss": 0.7932, "step": 32533 }, { "epoch": 0.835381892120471, "grad_norm": 0.76171875, "learning_rate": 0.00010575786601108305, "loss": 0.8791, "step": 32534 }, { "epoch": 0.8354075693163928, "grad_norm": 0.80078125, "learning_rate": 0.00010575340920989167, "loss": 0.8108, "step": 32535 }, { "epoch": 0.8354332465123147, "grad_norm": 0.7578125, "learning_rate": 0.00010574895239723432, "loss": 0.6893, "step": 32536 }, { "epoch": 0.8354589237082365, "grad_norm": 0.77734375, "learning_rate": 0.00010574449557311981, "loss": 0.8261, "step": 32537 }, { "epoch": 0.8354846009041582, "grad_norm": 0.6953125, "learning_rate": 0.00010574003873755703, "loss": 0.7541, "step": 32538 }, { "epoch": 0.83551027810008, "grad_norm": 0.7421875, "learning_rate": 0.00010573558189055485, "loss": 0.8813, "step": 32539 }, { "epoch": 0.8355359552960019, "grad_norm": 0.78515625, "learning_rate": 0.00010573112503212221, "loss": 0.7467, "step": 32540 }, { "epoch": 0.8355616324919237, "grad_norm": 0.90625, "learning_rate": 0.00010572666816226793, "loss": 0.7665, "step": 32541 }, { "epoch": 0.8355873096878456, "grad_norm": 0.86328125, "learning_rate": 0.00010572221128100093, "loss": 0.7887, "step": 32542 }, { "epoch": 0.8356129868837674, "grad_norm": 0.875, "learning_rate": 0.00010571775438833007, "loss": 0.8596, "step": 32543 }, { "epoch": 0.8356386640796892, "grad_norm": 0.79296875, "learning_rate": 0.00010571329748426427, "loss": 0.8107, "step": 32544 }, { "epoch": 0.835664341275611, "grad_norm": 0.796875, "learning_rate": 0.00010570884056881236, "loss": 0.7185, "step": 32545 }, { "epoch": 0.8356900184715328, "grad_norm": 0.734375, "learning_rate": 0.00010570438364198323, "loss": 0.7527, "step": 32546 }, { "epoch": 0.8357156956674546, "grad_norm": 0.7578125, "learning_rate": 0.00010569992670378582, "loss": 0.8544, "step": 32547 }, { "epoch": 0.8357413728633765, "grad_norm": 0.74609375, "learning_rate": 0.00010569546975422895, "loss": 0.6826, "step": 32548 }, { "epoch": 0.8357670500592983, "grad_norm": 0.77734375, "learning_rate": 0.00010569101279332153, "loss": 0.7508, "step": 32549 }, { "epoch": 0.8357927272552201, "grad_norm": 0.7578125, "learning_rate": 0.00010568655582107243, "loss": 0.846, "step": 32550 }, { "epoch": 0.8358184044511419, "grad_norm": 0.78125, "learning_rate": 0.00010568209883749051, "loss": 0.6836, "step": 32551 }, { "epoch": 0.8358440816470637, "grad_norm": 0.73046875, "learning_rate": 0.00010567764184258471, "loss": 0.826, "step": 32552 }, { "epoch": 0.8358697588429855, "grad_norm": 0.91015625, "learning_rate": 0.00010567318483636388, "loss": 0.8357, "step": 32553 }, { "epoch": 0.8358954360389074, "grad_norm": 0.83203125, "learning_rate": 0.0001056687278188369, "loss": 0.8679, "step": 32554 }, { "epoch": 0.8359211132348292, "grad_norm": 0.75, "learning_rate": 0.00010566427079001267, "loss": 0.7398, "step": 32555 }, { "epoch": 0.835946790430751, "grad_norm": 0.83203125, "learning_rate": 0.00010565981374990004, "loss": 0.7915, "step": 32556 }, { "epoch": 0.8359724676266729, "grad_norm": 0.73828125, "learning_rate": 0.00010565535669850795, "loss": 0.735, "step": 32557 }, { "epoch": 0.8359981448225946, "grad_norm": 0.73046875, "learning_rate": 0.0001056508996358452, "loss": 0.7148, "step": 32558 }, { "epoch": 0.8360238220185164, "grad_norm": 0.73046875, "learning_rate": 0.00010564644256192073, "loss": 0.7739, "step": 32559 }, { "epoch": 0.8360494992144383, "grad_norm": 0.76171875, "learning_rate": 0.00010564198547674342, "loss": 0.9415, "step": 32560 }, { "epoch": 0.8360751764103601, "grad_norm": 0.81640625, "learning_rate": 0.00010563752838032215, "loss": 0.897, "step": 32561 }, { "epoch": 0.836100853606282, "grad_norm": 0.83984375, "learning_rate": 0.00010563307127266577, "loss": 0.8096, "step": 32562 }, { "epoch": 0.8361265308022038, "grad_norm": 0.8046875, "learning_rate": 0.00010562861415378322, "loss": 0.8568, "step": 32563 }, { "epoch": 0.8361522079981256, "grad_norm": 0.75390625, "learning_rate": 0.00010562415702368331, "loss": 0.7998, "step": 32564 }, { "epoch": 0.8361778851940473, "grad_norm": 0.86328125, "learning_rate": 0.00010561969988237499, "loss": 0.8025, "step": 32565 }, { "epoch": 0.8362035623899692, "grad_norm": 0.76953125, "learning_rate": 0.00010561524272986711, "loss": 0.7638, "step": 32566 }, { "epoch": 0.836229239585891, "grad_norm": 0.76171875, "learning_rate": 0.00010561078556616855, "loss": 0.7301, "step": 32567 }, { "epoch": 0.8362549167818129, "grad_norm": 0.7890625, "learning_rate": 0.00010560632839128821, "loss": 0.7888, "step": 32568 }, { "epoch": 0.8362805939777347, "grad_norm": 0.83203125, "learning_rate": 0.000105601871205235, "loss": 0.8217, "step": 32569 }, { "epoch": 0.8363062711736565, "grad_norm": 0.80078125, "learning_rate": 0.00010559741400801773, "loss": 0.8943, "step": 32570 }, { "epoch": 0.8363319483695782, "grad_norm": 1.1875, "learning_rate": 0.00010559295679964532, "loss": 0.7049, "step": 32571 }, { "epoch": 0.8363576255655001, "grad_norm": 0.77734375, "learning_rate": 0.00010558849958012662, "loss": 0.7055, "step": 32572 }, { "epoch": 0.8363833027614219, "grad_norm": 0.7265625, "learning_rate": 0.0001055840423494706, "loss": 0.5579, "step": 32573 }, { "epoch": 0.8364089799573438, "grad_norm": 0.71875, "learning_rate": 0.00010557958510768606, "loss": 0.7784, "step": 32574 }, { "epoch": 0.8364346571532656, "grad_norm": 0.7890625, "learning_rate": 0.0001055751278547819, "loss": 0.7433, "step": 32575 }, { "epoch": 0.8364603343491874, "grad_norm": 0.734375, "learning_rate": 0.00010557067059076706, "loss": 0.8211, "step": 32576 }, { "epoch": 0.8364860115451093, "grad_norm": 0.78515625, "learning_rate": 0.00010556621331565035, "loss": 0.873, "step": 32577 }, { "epoch": 0.836511688741031, "grad_norm": 0.7734375, "learning_rate": 0.00010556175602944067, "loss": 0.7742, "step": 32578 }, { "epoch": 0.8365373659369528, "grad_norm": 0.859375, "learning_rate": 0.00010555729873214692, "loss": 0.8653, "step": 32579 }, { "epoch": 0.8365630431328747, "grad_norm": 0.8046875, "learning_rate": 0.00010555284142377799, "loss": 0.8836, "step": 32580 }, { "epoch": 0.8365887203287965, "grad_norm": 0.82421875, "learning_rate": 0.0001055483841043427, "loss": 0.9081, "step": 32581 }, { "epoch": 0.8366143975247183, "grad_norm": 0.828125, "learning_rate": 0.00010554392677385005, "loss": 1.0299, "step": 32582 }, { "epoch": 0.8366400747206402, "grad_norm": 0.765625, "learning_rate": 0.00010553946943230881, "loss": 0.8226, "step": 32583 }, { "epoch": 0.836665751916562, "grad_norm": 0.9140625, "learning_rate": 0.00010553501207972791, "loss": 0.9042, "step": 32584 }, { "epoch": 0.8366914291124837, "grad_norm": 0.765625, "learning_rate": 0.00010553055471611626, "loss": 0.8416, "step": 32585 }, { "epoch": 0.8367171063084056, "grad_norm": 0.71875, "learning_rate": 0.00010552609734148267, "loss": 0.8339, "step": 32586 }, { "epoch": 0.8367427835043274, "grad_norm": 0.8515625, "learning_rate": 0.00010552163995583611, "loss": 0.7554, "step": 32587 }, { "epoch": 0.8367684607002492, "grad_norm": 0.82421875, "learning_rate": 0.00010551718255918539, "loss": 0.8006, "step": 32588 }, { "epoch": 0.8367941378961711, "grad_norm": 0.890625, "learning_rate": 0.00010551272515153944, "loss": 0.9957, "step": 32589 }, { "epoch": 0.8368198150920929, "grad_norm": 0.8359375, "learning_rate": 0.00010550826773290711, "loss": 0.8383, "step": 32590 }, { "epoch": 0.8368454922880146, "grad_norm": 0.78125, "learning_rate": 0.0001055038103032973, "loss": 0.764, "step": 32591 }, { "epoch": 0.8368711694839365, "grad_norm": 0.73828125, "learning_rate": 0.00010549935286271893, "loss": 0.7348, "step": 32592 }, { "epoch": 0.8368968466798583, "grad_norm": 0.77734375, "learning_rate": 0.00010549489541118083, "loss": 0.8552, "step": 32593 }, { "epoch": 0.8369225238757801, "grad_norm": 0.76171875, "learning_rate": 0.00010549043794869187, "loss": 0.8452, "step": 32594 }, { "epoch": 0.836948201071702, "grad_norm": 0.76171875, "learning_rate": 0.000105485980475261, "loss": 0.7577, "step": 32595 }, { "epoch": 0.8369738782676238, "grad_norm": 0.76953125, "learning_rate": 0.00010548152299089704, "loss": 0.8653, "step": 32596 }, { "epoch": 0.8369995554635457, "grad_norm": 0.765625, "learning_rate": 0.00010547706549560892, "loss": 0.7365, "step": 32597 }, { "epoch": 0.8370252326594674, "grad_norm": 0.78125, "learning_rate": 0.00010547260798940548, "loss": 0.8168, "step": 32598 }, { "epoch": 0.8370509098553892, "grad_norm": 0.88671875, "learning_rate": 0.00010546815047229563, "loss": 0.7685, "step": 32599 }, { "epoch": 0.837076587051311, "grad_norm": 0.75390625, "learning_rate": 0.00010546369294428826, "loss": 0.699, "step": 32600 }, { "epoch": 0.8371022642472329, "grad_norm": 0.73828125, "learning_rate": 0.00010545923540539226, "loss": 0.7044, "step": 32601 }, { "epoch": 0.8371279414431547, "grad_norm": 0.8125, "learning_rate": 0.00010545477785561648, "loss": 0.8864, "step": 32602 }, { "epoch": 0.8371536186390766, "grad_norm": 0.78515625, "learning_rate": 0.00010545032029496982, "loss": 0.7205, "step": 32603 }, { "epoch": 0.8371792958349984, "grad_norm": 0.70703125, "learning_rate": 0.00010544586272346116, "loss": 0.7795, "step": 32604 }, { "epoch": 0.8372049730309201, "grad_norm": 0.7421875, "learning_rate": 0.00010544140514109939, "loss": 0.8373, "step": 32605 }, { "epoch": 0.837230650226842, "grad_norm": 0.86328125, "learning_rate": 0.00010543694754789339, "loss": 0.8373, "step": 32606 }, { "epoch": 0.8372563274227638, "grad_norm": 0.7890625, "learning_rate": 0.00010543248994385204, "loss": 0.7916, "step": 32607 }, { "epoch": 0.8372820046186856, "grad_norm": 0.75390625, "learning_rate": 0.00010542803232898426, "loss": 0.8489, "step": 32608 }, { "epoch": 0.8373076818146075, "grad_norm": 0.83203125, "learning_rate": 0.00010542357470329889, "loss": 0.8145, "step": 32609 }, { "epoch": 0.8373333590105293, "grad_norm": 0.79296875, "learning_rate": 0.00010541911706680478, "loss": 0.817, "step": 32610 }, { "epoch": 0.837359036206451, "grad_norm": 1.921875, "learning_rate": 0.00010541465941951092, "loss": 0.6683, "step": 32611 }, { "epoch": 0.8373847134023729, "grad_norm": 0.74609375, "learning_rate": 0.00010541020176142608, "loss": 0.8103, "step": 32612 }, { "epoch": 0.8374103905982947, "grad_norm": 0.7734375, "learning_rate": 0.00010540574409255923, "loss": 0.7649, "step": 32613 }, { "epoch": 0.8374360677942165, "grad_norm": 0.79296875, "learning_rate": 0.00010540128641291922, "loss": 0.7404, "step": 32614 }, { "epoch": 0.8374617449901384, "grad_norm": 0.765625, "learning_rate": 0.00010539682872251493, "loss": 0.7542, "step": 32615 }, { "epoch": 0.8374874221860602, "grad_norm": 0.671875, "learning_rate": 0.00010539237102135524, "loss": 0.8065, "step": 32616 }, { "epoch": 0.837513099381982, "grad_norm": 0.765625, "learning_rate": 0.00010538791330944906, "loss": 0.7963, "step": 32617 }, { "epoch": 0.8375387765779038, "grad_norm": 0.81640625, "learning_rate": 0.00010538345558680522, "loss": 0.6875, "step": 32618 }, { "epoch": 0.8375644537738256, "grad_norm": 0.7578125, "learning_rate": 0.00010537899785343266, "loss": 0.8599, "step": 32619 }, { "epoch": 0.8375901309697474, "grad_norm": 0.765625, "learning_rate": 0.00010537454010934027, "loss": 0.7664, "step": 32620 }, { "epoch": 0.8376158081656693, "grad_norm": 0.984375, "learning_rate": 0.00010537008235453685, "loss": 0.881, "step": 32621 }, { "epoch": 0.8376414853615911, "grad_norm": 0.82421875, "learning_rate": 0.0001053656245890314, "loss": 0.9415, "step": 32622 }, { "epoch": 0.837667162557513, "grad_norm": 0.74609375, "learning_rate": 0.0001053611668128327, "loss": 0.6943, "step": 32623 }, { "epoch": 0.8376928397534348, "grad_norm": 0.7421875, "learning_rate": 0.0001053567090259497, "loss": 0.7981, "step": 32624 }, { "epoch": 0.8377185169493565, "grad_norm": 0.83203125, "learning_rate": 0.00010535225122839127, "loss": 0.8374, "step": 32625 }, { "epoch": 0.8377441941452783, "grad_norm": 0.7578125, "learning_rate": 0.00010534779342016626, "loss": 0.8968, "step": 32626 }, { "epoch": 0.8377698713412002, "grad_norm": 0.78515625, "learning_rate": 0.0001053433356012836, "loss": 0.8741, "step": 32627 }, { "epoch": 0.837795548537122, "grad_norm": 0.7421875, "learning_rate": 0.00010533887777175215, "loss": 0.71, "step": 32628 }, { "epoch": 0.8378212257330439, "grad_norm": 0.7890625, "learning_rate": 0.00010533441993158082, "loss": 0.8338, "step": 32629 }, { "epoch": 0.8378469029289657, "grad_norm": 0.76171875, "learning_rate": 0.00010532996208077845, "loss": 0.7117, "step": 32630 }, { "epoch": 0.8378725801248874, "grad_norm": 0.8125, "learning_rate": 0.00010532550421935392, "loss": 0.8216, "step": 32631 }, { "epoch": 0.8378982573208092, "grad_norm": 0.82421875, "learning_rate": 0.00010532104634731619, "loss": 0.9723, "step": 32632 }, { "epoch": 0.8379239345167311, "grad_norm": 0.76953125, "learning_rate": 0.00010531658846467408, "loss": 0.7862, "step": 32633 }, { "epoch": 0.8379496117126529, "grad_norm": 0.77734375, "learning_rate": 0.00010531213057143647, "loss": 0.8903, "step": 32634 }, { "epoch": 0.8379752889085748, "grad_norm": 0.78515625, "learning_rate": 0.0001053076726676123, "loss": 0.9095, "step": 32635 }, { "epoch": 0.8380009661044966, "grad_norm": 0.85546875, "learning_rate": 0.00010530321475321039, "loss": 0.8989, "step": 32636 }, { "epoch": 0.8380266433004184, "grad_norm": 0.8203125, "learning_rate": 0.00010529875682823967, "loss": 0.7437, "step": 32637 }, { "epoch": 0.8380523204963402, "grad_norm": 0.8125, "learning_rate": 0.00010529429889270899, "loss": 0.7711, "step": 32638 }, { "epoch": 0.838077997692262, "grad_norm": 0.76953125, "learning_rate": 0.00010528984094662724, "loss": 0.7808, "step": 32639 }, { "epoch": 0.8381036748881838, "grad_norm": 0.875, "learning_rate": 0.00010528538299000332, "loss": 0.7354, "step": 32640 }, { "epoch": 0.8381293520841057, "grad_norm": 0.8203125, "learning_rate": 0.00010528092502284614, "loss": 0.8709, "step": 32641 }, { "epoch": 0.8381550292800275, "grad_norm": 0.79296875, "learning_rate": 0.00010527646704516455, "loss": 0.7465, "step": 32642 }, { "epoch": 0.8381807064759493, "grad_norm": 0.86328125, "learning_rate": 0.00010527200905696742, "loss": 0.8781, "step": 32643 }, { "epoch": 0.8382063836718711, "grad_norm": 0.83984375, "learning_rate": 0.00010526755105826363, "loss": 0.7008, "step": 32644 }, { "epoch": 0.8382320608677929, "grad_norm": 0.765625, "learning_rate": 0.00010526309304906212, "loss": 0.8542, "step": 32645 }, { "epoch": 0.8382577380637147, "grad_norm": 0.6875, "learning_rate": 0.00010525863502937173, "loss": 0.7264, "step": 32646 }, { "epoch": 0.8382834152596366, "grad_norm": 0.71875, "learning_rate": 0.00010525417699920135, "loss": 0.8354, "step": 32647 }, { "epoch": 0.8383090924555584, "grad_norm": 0.85546875, "learning_rate": 0.00010524971895855989, "loss": 0.8122, "step": 32648 }, { "epoch": 0.8383347696514802, "grad_norm": 0.77734375, "learning_rate": 0.00010524526090745619, "loss": 0.7137, "step": 32649 }, { "epoch": 0.8383604468474021, "grad_norm": 0.8125, "learning_rate": 0.00010524080284589917, "loss": 0.9018, "step": 32650 }, { "epoch": 0.8383861240433238, "grad_norm": 0.66015625, "learning_rate": 0.0001052363447738977, "loss": 0.6798, "step": 32651 }, { "epoch": 0.8384118012392456, "grad_norm": 0.77734375, "learning_rate": 0.00010523188669146066, "loss": 0.7307, "step": 32652 }, { "epoch": 0.8384374784351675, "grad_norm": 0.69921875, "learning_rate": 0.00010522742859859696, "loss": 0.7643, "step": 32653 }, { "epoch": 0.8384631556310893, "grad_norm": 0.81640625, "learning_rate": 0.00010522297049531549, "loss": 0.7294, "step": 32654 }, { "epoch": 0.8384888328270111, "grad_norm": 0.7421875, "learning_rate": 0.00010521851238162505, "loss": 0.7143, "step": 32655 }, { "epoch": 0.838514510022933, "grad_norm": 0.83984375, "learning_rate": 0.00010521405425753464, "loss": 0.9013, "step": 32656 }, { "epoch": 0.8385401872188548, "grad_norm": 0.71875, "learning_rate": 0.00010520959612305305, "loss": 0.6896, "step": 32657 }, { "epoch": 0.8385658644147765, "grad_norm": 0.76171875, "learning_rate": 0.00010520513797818922, "loss": 0.8157, "step": 32658 }, { "epoch": 0.8385915416106984, "grad_norm": 0.7265625, "learning_rate": 0.00010520067982295202, "loss": 0.62, "step": 32659 }, { "epoch": 0.8386172188066202, "grad_norm": 0.72265625, "learning_rate": 0.00010519622165735033, "loss": 0.733, "step": 32660 }, { "epoch": 0.838642896002542, "grad_norm": 0.76171875, "learning_rate": 0.00010519176348139307, "loss": 0.8467, "step": 32661 }, { "epoch": 0.8386685731984639, "grad_norm": 0.86328125, "learning_rate": 0.00010518730529508909, "loss": 0.7031, "step": 32662 }, { "epoch": 0.8386942503943857, "grad_norm": 0.8125, "learning_rate": 0.00010518284709844724, "loss": 0.9785, "step": 32663 }, { "epoch": 0.8387199275903074, "grad_norm": 0.8125, "learning_rate": 0.00010517838889147646, "loss": 0.8325, "step": 32664 }, { "epoch": 0.8387456047862293, "grad_norm": 0.8515625, "learning_rate": 0.00010517393067418564, "loss": 0.7703, "step": 32665 }, { "epoch": 0.8387712819821511, "grad_norm": 0.7421875, "learning_rate": 0.00010516947244658361, "loss": 0.883, "step": 32666 }, { "epoch": 0.838796959178073, "grad_norm": 0.78515625, "learning_rate": 0.00010516501420867932, "loss": 0.7273, "step": 32667 }, { "epoch": 0.8388226363739948, "grad_norm": 0.79296875, "learning_rate": 0.00010516055596048162, "loss": 0.8128, "step": 32668 }, { "epoch": 0.8388483135699166, "grad_norm": 0.71484375, "learning_rate": 0.00010515609770199938, "loss": 0.8152, "step": 32669 }, { "epoch": 0.8388739907658385, "grad_norm": 0.88671875, "learning_rate": 0.00010515163943324151, "loss": 0.809, "step": 32670 }, { "epoch": 0.8388996679617602, "grad_norm": 0.78125, "learning_rate": 0.00010514718115421689, "loss": 0.8526, "step": 32671 }, { "epoch": 0.838925345157682, "grad_norm": 0.83203125, "learning_rate": 0.0001051427228649344, "loss": 0.9659, "step": 32672 }, { "epoch": 0.8389510223536039, "grad_norm": 0.7578125, "learning_rate": 0.00010513826456540295, "loss": 0.7801, "step": 32673 }, { "epoch": 0.8389766995495257, "grad_norm": 0.80078125, "learning_rate": 0.00010513380625563136, "loss": 0.7792, "step": 32674 }, { "epoch": 0.8390023767454475, "grad_norm": 0.82421875, "learning_rate": 0.00010512934793562861, "loss": 0.8051, "step": 32675 }, { "epoch": 0.8390280539413694, "grad_norm": 0.8203125, "learning_rate": 0.00010512488960540348, "loss": 0.7076, "step": 32676 }, { "epoch": 0.8390537311372912, "grad_norm": 0.7421875, "learning_rate": 0.00010512043126496494, "loss": 0.8714, "step": 32677 }, { "epoch": 0.8390794083332129, "grad_norm": 0.83203125, "learning_rate": 0.00010511597291432183, "loss": 0.868, "step": 32678 }, { "epoch": 0.8391050855291348, "grad_norm": 0.890625, "learning_rate": 0.00010511151455348302, "loss": 0.883, "step": 32679 }, { "epoch": 0.8391307627250566, "grad_norm": 0.83984375, "learning_rate": 0.00010510705618245749, "loss": 0.8499, "step": 32680 }, { "epoch": 0.8391564399209784, "grad_norm": 0.8671875, "learning_rate": 0.00010510259780125403, "loss": 0.9627, "step": 32681 }, { "epoch": 0.8391821171169003, "grad_norm": 0.77734375, "learning_rate": 0.00010509813940988154, "loss": 0.8189, "step": 32682 }, { "epoch": 0.8392077943128221, "grad_norm": 0.7578125, "learning_rate": 0.00010509368100834893, "loss": 0.7242, "step": 32683 }, { "epoch": 0.8392334715087438, "grad_norm": 1.046875, "learning_rate": 0.00010508922259666505, "loss": 0.8133, "step": 32684 }, { "epoch": 0.8392591487046657, "grad_norm": 0.703125, "learning_rate": 0.00010508476417483883, "loss": 0.8411, "step": 32685 }, { "epoch": 0.8392848259005875, "grad_norm": 0.7578125, "learning_rate": 0.00010508030574287915, "loss": 0.6868, "step": 32686 }, { "epoch": 0.8393105030965093, "grad_norm": 0.71875, "learning_rate": 0.00010507584730079487, "loss": 0.7282, "step": 32687 }, { "epoch": 0.8393361802924312, "grad_norm": 0.86328125, "learning_rate": 0.00010507138884859487, "loss": 0.883, "step": 32688 }, { "epoch": 0.839361857488353, "grad_norm": 0.81640625, "learning_rate": 0.00010506693038628806, "loss": 0.7902, "step": 32689 }, { "epoch": 0.8393875346842748, "grad_norm": 0.79296875, "learning_rate": 0.0001050624719138833, "loss": 0.7985, "step": 32690 }, { "epoch": 0.8394132118801966, "grad_norm": 0.80859375, "learning_rate": 0.00010505801343138953, "loss": 0.917, "step": 32691 }, { "epoch": 0.8394388890761184, "grad_norm": 0.76953125, "learning_rate": 0.00010505355493881555, "loss": 0.7669, "step": 32692 }, { "epoch": 0.8394645662720402, "grad_norm": 0.7109375, "learning_rate": 0.00010504909643617032, "loss": 0.8185, "step": 32693 }, { "epoch": 0.8394902434679621, "grad_norm": 0.78125, "learning_rate": 0.0001050446379234627, "loss": 0.8825, "step": 32694 }, { "epoch": 0.8395159206638839, "grad_norm": 0.80859375, "learning_rate": 0.00010504017940070154, "loss": 0.7344, "step": 32695 }, { "epoch": 0.8395415978598058, "grad_norm": 0.78515625, "learning_rate": 0.0001050357208678958, "loss": 0.8244, "step": 32696 }, { "epoch": 0.8395672750557276, "grad_norm": 0.8046875, "learning_rate": 0.00010503126232505428, "loss": 0.9066, "step": 32697 }, { "epoch": 0.8395929522516493, "grad_norm": 0.828125, "learning_rate": 0.00010502680377218593, "loss": 0.7099, "step": 32698 }, { "epoch": 0.8396186294475712, "grad_norm": 0.76171875, "learning_rate": 0.00010502234520929961, "loss": 0.7607, "step": 32699 }, { "epoch": 0.839644306643493, "grad_norm": 0.82421875, "learning_rate": 0.00010501788663640422, "loss": 0.832, "step": 32700 }, { "epoch": 0.8396699838394148, "grad_norm": 0.76171875, "learning_rate": 0.00010501342805350865, "loss": 0.7997, "step": 32701 }, { "epoch": 0.8396956610353367, "grad_norm": 0.8828125, "learning_rate": 0.00010500896946062175, "loss": 0.8892, "step": 32702 }, { "epoch": 0.8397213382312585, "grad_norm": 0.74609375, "learning_rate": 0.00010500451085775241, "loss": 0.6667, "step": 32703 }, { "epoch": 0.8397470154271802, "grad_norm": 0.72265625, "learning_rate": 0.00010500005224490955, "loss": 0.763, "step": 32704 }, { "epoch": 0.8397726926231021, "grad_norm": 0.9296875, "learning_rate": 0.00010499559362210206, "loss": 0.7548, "step": 32705 }, { "epoch": 0.8397983698190239, "grad_norm": 0.80078125, "learning_rate": 0.00010499113498933874, "loss": 0.6877, "step": 32706 }, { "epoch": 0.8398240470149457, "grad_norm": 0.796875, "learning_rate": 0.00010498667634662859, "loss": 0.8939, "step": 32707 }, { "epoch": 0.8398497242108676, "grad_norm": 0.7734375, "learning_rate": 0.00010498221769398044, "loss": 0.9743, "step": 32708 }, { "epoch": 0.8398754014067894, "grad_norm": 0.80078125, "learning_rate": 0.00010497775903140317, "loss": 0.8343, "step": 32709 }, { "epoch": 0.8399010786027112, "grad_norm": 0.671875, "learning_rate": 0.00010497330035890569, "loss": 0.8037, "step": 32710 }, { "epoch": 0.839926755798633, "grad_norm": 0.8125, "learning_rate": 0.00010496884167649684, "loss": 0.8907, "step": 32711 }, { "epoch": 0.8399524329945548, "grad_norm": 0.77734375, "learning_rate": 0.00010496438298418556, "loss": 0.7974, "step": 32712 }, { "epoch": 0.8399781101904766, "grad_norm": 0.76171875, "learning_rate": 0.00010495992428198071, "loss": 0.7049, "step": 32713 }, { "epoch": 0.8400037873863985, "grad_norm": 0.80859375, "learning_rate": 0.00010495546556989119, "loss": 0.8268, "step": 32714 }, { "epoch": 0.8400294645823203, "grad_norm": 0.78515625, "learning_rate": 0.00010495100684792586, "loss": 0.837, "step": 32715 }, { "epoch": 0.8400551417782421, "grad_norm": 0.76953125, "learning_rate": 0.00010494654811609362, "loss": 0.8208, "step": 32716 }, { "epoch": 0.840080818974164, "grad_norm": 0.76171875, "learning_rate": 0.00010494208937440337, "loss": 0.8898, "step": 32717 }, { "epoch": 0.8401064961700857, "grad_norm": 0.80078125, "learning_rate": 0.00010493763062286398, "loss": 0.7238, "step": 32718 }, { "epoch": 0.8401321733660075, "grad_norm": 0.8359375, "learning_rate": 0.0001049331718614843, "loss": 0.8174, "step": 32719 }, { "epoch": 0.8401578505619294, "grad_norm": 0.8203125, "learning_rate": 0.00010492871309027332, "loss": 0.9516, "step": 32720 }, { "epoch": 0.8401835277578512, "grad_norm": 0.73046875, "learning_rate": 0.0001049242543092398, "loss": 0.7733, "step": 32721 }, { "epoch": 0.840209204953773, "grad_norm": 0.83984375, "learning_rate": 0.00010491979551839273, "loss": 0.7897, "step": 32722 }, { "epoch": 0.8402348821496949, "grad_norm": 0.78515625, "learning_rate": 0.00010491533671774094, "loss": 0.9366, "step": 32723 }, { "epoch": 0.8402605593456166, "grad_norm": 0.83203125, "learning_rate": 0.0001049108779072933, "loss": 0.7225, "step": 32724 }, { "epoch": 0.8402862365415384, "grad_norm": 0.91796875, "learning_rate": 0.00010490641908705876, "loss": 0.8676, "step": 32725 }, { "epoch": 0.8403119137374603, "grad_norm": 0.76953125, "learning_rate": 0.00010490196025704615, "loss": 0.7321, "step": 32726 }, { "epoch": 0.8403375909333821, "grad_norm": 0.7734375, "learning_rate": 0.0001048975014172644, "loss": 0.7824, "step": 32727 }, { "epoch": 0.840363268129304, "grad_norm": 0.74609375, "learning_rate": 0.00010489304256772235, "loss": 0.8614, "step": 32728 }, { "epoch": 0.8403889453252258, "grad_norm": 0.79296875, "learning_rate": 0.00010488858370842888, "loss": 0.7481, "step": 32729 }, { "epoch": 0.8404146225211476, "grad_norm": 0.67578125, "learning_rate": 0.00010488412483939297, "loss": 0.8119, "step": 32730 }, { "epoch": 0.8404402997170694, "grad_norm": 0.80078125, "learning_rate": 0.0001048796659606234, "loss": 0.9102, "step": 32731 }, { "epoch": 0.8404659769129912, "grad_norm": 0.7578125, "learning_rate": 0.00010487520707212908, "loss": 0.6847, "step": 32732 }, { "epoch": 0.840491654108913, "grad_norm": 0.85546875, "learning_rate": 0.00010487074817391898, "loss": 0.7811, "step": 32733 }, { "epoch": 0.8405173313048349, "grad_norm": 0.79296875, "learning_rate": 0.00010486628926600187, "loss": 0.7684, "step": 32734 }, { "epoch": 0.8405430085007567, "grad_norm": 0.78125, "learning_rate": 0.00010486183034838666, "loss": 0.8372, "step": 32735 }, { "epoch": 0.8405686856966785, "grad_norm": 0.765625, "learning_rate": 0.0001048573714210823, "loss": 0.776, "step": 32736 }, { "epoch": 0.8405943628926004, "grad_norm": 0.7734375, "learning_rate": 0.0001048529124840976, "loss": 0.854, "step": 32737 }, { "epoch": 0.8406200400885221, "grad_norm": 0.81640625, "learning_rate": 0.00010484845353744153, "loss": 0.8611, "step": 32738 }, { "epoch": 0.8406457172844439, "grad_norm": 0.80859375, "learning_rate": 0.00010484399458112293, "loss": 0.8716, "step": 32739 }, { "epoch": 0.8406713944803658, "grad_norm": 0.75, "learning_rate": 0.00010483953561515065, "loss": 0.7157, "step": 32740 }, { "epoch": 0.8406970716762876, "grad_norm": 0.69140625, "learning_rate": 0.00010483507663953365, "loss": 0.6504, "step": 32741 }, { "epoch": 0.8407227488722094, "grad_norm": 0.75390625, "learning_rate": 0.00010483061765428077, "loss": 0.887, "step": 32742 }, { "epoch": 0.8407484260681313, "grad_norm": 0.796875, "learning_rate": 0.00010482615865940086, "loss": 0.9372, "step": 32743 }, { "epoch": 0.840774103264053, "grad_norm": 0.71484375, "learning_rate": 0.00010482169965490292, "loss": 0.7786, "step": 32744 }, { "epoch": 0.8407997804599748, "grad_norm": 0.796875, "learning_rate": 0.00010481724064079572, "loss": 0.7936, "step": 32745 }, { "epoch": 0.8408254576558967, "grad_norm": 0.78515625, "learning_rate": 0.00010481278161708823, "loss": 0.8959, "step": 32746 }, { "epoch": 0.8408511348518185, "grad_norm": 0.80078125, "learning_rate": 0.00010480832258378927, "loss": 0.7494, "step": 32747 }, { "epoch": 0.8408768120477403, "grad_norm": 0.73046875, "learning_rate": 0.00010480386354090776, "loss": 0.6241, "step": 32748 }, { "epoch": 0.8409024892436622, "grad_norm": 0.875, "learning_rate": 0.0001047994044884526, "loss": 0.8782, "step": 32749 }, { "epoch": 0.840928166439584, "grad_norm": 0.734375, "learning_rate": 0.00010479494542643264, "loss": 0.8091, "step": 32750 }, { "epoch": 0.8409538436355057, "grad_norm": 0.828125, "learning_rate": 0.0001047904863548568, "loss": 0.8579, "step": 32751 }, { "epoch": 0.8409795208314276, "grad_norm": 0.765625, "learning_rate": 0.00010478602727373394, "loss": 0.7632, "step": 32752 }, { "epoch": 0.8410051980273494, "grad_norm": 0.76953125, "learning_rate": 0.00010478156818307299, "loss": 0.9024, "step": 32753 }, { "epoch": 0.8410308752232712, "grad_norm": 0.7421875, "learning_rate": 0.00010477710908288281, "loss": 0.77, "step": 32754 }, { "epoch": 0.8410565524191931, "grad_norm": 0.79296875, "learning_rate": 0.00010477264997317227, "loss": 0.7983, "step": 32755 }, { "epoch": 0.8410822296151149, "grad_norm": 0.77734375, "learning_rate": 0.00010476819085395022, "loss": 0.7683, "step": 32756 }, { "epoch": 0.8411079068110368, "grad_norm": 0.75390625, "learning_rate": 0.00010476373172522564, "loss": 0.8253, "step": 32757 }, { "epoch": 0.8411335840069585, "grad_norm": 0.76171875, "learning_rate": 0.0001047592725870074, "loss": 0.8284, "step": 32758 }, { "epoch": 0.8411592612028803, "grad_norm": 0.87109375, "learning_rate": 0.00010475481343930431, "loss": 0.8478, "step": 32759 }, { "epoch": 0.8411849383988022, "grad_norm": 0.78125, "learning_rate": 0.00010475035428212534, "loss": 0.7274, "step": 32760 }, { "epoch": 0.841210615594724, "grad_norm": 0.86328125, "learning_rate": 0.00010474589511547932, "loss": 0.8095, "step": 32761 }, { "epoch": 0.8412362927906458, "grad_norm": 0.77734375, "learning_rate": 0.00010474143593937518, "loss": 0.7646, "step": 32762 }, { "epoch": 0.8412619699865677, "grad_norm": 0.82421875, "learning_rate": 0.0001047369767538218, "loss": 0.8052, "step": 32763 }, { "epoch": 0.8412876471824894, "grad_norm": 0.76953125, "learning_rate": 0.000104732517558828, "loss": 0.787, "step": 32764 }, { "epoch": 0.8413133243784112, "grad_norm": 0.76953125, "learning_rate": 0.00010472805835440276, "loss": 0.7498, "step": 32765 }, { "epoch": 0.8413390015743331, "grad_norm": 0.69921875, "learning_rate": 0.00010472359914055491, "loss": 0.7135, "step": 32766 }, { "epoch": 0.8413646787702549, "grad_norm": 0.79296875, "learning_rate": 0.00010471913991729337, "loss": 0.9518, "step": 32767 }, { "epoch": 0.8413903559661767, "grad_norm": 0.7421875, "learning_rate": 0.000104714680684627, "loss": 0.8408, "step": 32768 }, { "epoch": 0.8414160331620986, "grad_norm": 0.76171875, "learning_rate": 0.00010471022144256468, "loss": 0.81, "step": 32769 }, { "epoch": 0.8414417103580204, "grad_norm": 0.74609375, "learning_rate": 0.00010470576219111534, "loss": 0.8227, "step": 32770 }, { "epoch": 0.8414673875539421, "grad_norm": 0.765625, "learning_rate": 0.00010470130293028784, "loss": 0.8085, "step": 32771 }, { "epoch": 0.841493064749864, "grad_norm": 0.75390625, "learning_rate": 0.00010469684366009105, "loss": 0.853, "step": 32772 }, { "epoch": 0.8415187419457858, "grad_norm": 1.453125, "learning_rate": 0.0001046923843805339, "loss": 0.7088, "step": 32773 }, { "epoch": 0.8415444191417076, "grad_norm": 0.79296875, "learning_rate": 0.00010468792509162523, "loss": 0.8255, "step": 32774 }, { "epoch": 0.8415700963376295, "grad_norm": 0.79296875, "learning_rate": 0.00010468346579337396, "loss": 0.7494, "step": 32775 }, { "epoch": 0.8415957735335513, "grad_norm": 0.734375, "learning_rate": 0.00010467900648578896, "loss": 0.8762, "step": 32776 }, { "epoch": 0.8416214507294731, "grad_norm": 0.85546875, "learning_rate": 0.00010467454716887912, "loss": 0.7039, "step": 32777 }, { "epoch": 0.8416471279253949, "grad_norm": 0.7578125, "learning_rate": 0.00010467008784265333, "loss": 0.6961, "step": 32778 }, { "epoch": 0.8416728051213167, "grad_norm": 0.7578125, "learning_rate": 0.00010466562850712052, "loss": 0.7101, "step": 32779 }, { "epoch": 0.8416984823172385, "grad_norm": 0.828125, "learning_rate": 0.00010466116916228948, "loss": 0.7252, "step": 32780 }, { "epoch": 0.8417241595131604, "grad_norm": 0.80859375, "learning_rate": 0.00010465670980816917, "loss": 0.7778, "step": 32781 }, { "epoch": 0.8417498367090822, "grad_norm": 0.79296875, "learning_rate": 0.00010465225044476845, "loss": 0.9585, "step": 32782 }, { "epoch": 0.841775513905004, "grad_norm": 0.82421875, "learning_rate": 0.00010464779107209622, "loss": 0.9216, "step": 32783 }, { "epoch": 0.8418011911009258, "grad_norm": 0.7578125, "learning_rate": 0.00010464333169016136, "loss": 0.8299, "step": 32784 }, { "epoch": 0.8418268682968476, "grad_norm": 0.7421875, "learning_rate": 0.00010463887229897273, "loss": 0.7882, "step": 32785 }, { "epoch": 0.8418525454927694, "grad_norm": 0.76171875, "learning_rate": 0.00010463441289853931, "loss": 0.7251, "step": 32786 }, { "epoch": 0.8418782226886913, "grad_norm": 0.78515625, "learning_rate": 0.00010462995348886991, "loss": 0.8116, "step": 32787 }, { "epoch": 0.8419038998846131, "grad_norm": 0.78125, "learning_rate": 0.00010462549406997338, "loss": 1.0279, "step": 32788 }, { "epoch": 0.841929577080535, "grad_norm": 0.765625, "learning_rate": 0.0001046210346418587, "loss": 0.8418, "step": 32789 }, { "epoch": 0.8419552542764568, "grad_norm": 0.74609375, "learning_rate": 0.00010461657520453472, "loss": 0.8795, "step": 32790 }, { "epoch": 0.8419809314723785, "grad_norm": 0.953125, "learning_rate": 0.00010461211575801028, "loss": 0.8402, "step": 32791 }, { "epoch": 0.8420066086683003, "grad_norm": 0.77734375, "learning_rate": 0.00010460765630229434, "loss": 0.8053, "step": 32792 }, { "epoch": 0.8420322858642222, "grad_norm": 0.734375, "learning_rate": 0.00010460319683739576, "loss": 0.9389, "step": 32793 }, { "epoch": 0.842057963060144, "grad_norm": 0.765625, "learning_rate": 0.00010459873736332344, "loss": 0.8042, "step": 32794 }, { "epoch": 0.8420836402560659, "grad_norm": 0.73828125, "learning_rate": 0.00010459427788008622, "loss": 0.7818, "step": 32795 }, { "epoch": 0.8421093174519877, "grad_norm": 0.90234375, "learning_rate": 0.00010458981838769302, "loss": 0.7543, "step": 32796 }, { "epoch": 0.8421349946479095, "grad_norm": 0.7734375, "learning_rate": 0.00010458535888615275, "loss": 0.826, "step": 32797 }, { "epoch": 0.8421606718438313, "grad_norm": 0.8515625, "learning_rate": 0.00010458089937547426, "loss": 0.8933, "step": 32798 }, { "epoch": 0.8421863490397531, "grad_norm": 0.796875, "learning_rate": 0.00010457643985566645, "loss": 0.817, "step": 32799 }, { "epoch": 0.8422120262356749, "grad_norm": 0.734375, "learning_rate": 0.00010457198032673823, "loss": 0.8757, "step": 32800 }, { "epoch": 0.8422377034315968, "grad_norm": 0.75, "learning_rate": 0.00010456752078869842, "loss": 0.7749, "step": 32801 }, { "epoch": 0.8422633806275186, "grad_norm": 0.91015625, "learning_rate": 0.000104563061241556, "loss": 0.8866, "step": 32802 }, { "epoch": 0.8422890578234404, "grad_norm": 0.7578125, "learning_rate": 0.0001045586016853198, "loss": 0.964, "step": 32803 }, { "epoch": 0.8423147350193622, "grad_norm": 0.84765625, "learning_rate": 0.00010455414211999867, "loss": 0.8873, "step": 32804 }, { "epoch": 0.842340412215284, "grad_norm": 0.71875, "learning_rate": 0.0001045496825456016, "loss": 0.7246, "step": 32805 }, { "epoch": 0.8423660894112058, "grad_norm": 0.7734375, "learning_rate": 0.00010454522296213742, "loss": 0.6967, "step": 32806 }, { "epoch": 0.8423917666071277, "grad_norm": 0.75390625, "learning_rate": 0.00010454076336961502, "loss": 0.8505, "step": 32807 }, { "epoch": 0.8424174438030495, "grad_norm": 0.79296875, "learning_rate": 0.00010453630376804329, "loss": 0.7712, "step": 32808 }, { "epoch": 0.8424431209989713, "grad_norm": 0.7734375, "learning_rate": 0.0001045318441574311, "loss": 0.7744, "step": 32809 }, { "epoch": 0.8424687981948932, "grad_norm": 0.72265625, "learning_rate": 0.00010452738453778734, "loss": 0.6428, "step": 32810 }, { "epoch": 0.8424944753908149, "grad_norm": 0.78125, "learning_rate": 0.00010452292490912094, "loss": 0.7949, "step": 32811 }, { "epoch": 0.8425201525867367, "grad_norm": 0.74609375, "learning_rate": 0.00010451846527144076, "loss": 0.7125, "step": 32812 }, { "epoch": 0.8425458297826586, "grad_norm": 0.80859375, "learning_rate": 0.00010451400562475569, "loss": 0.8487, "step": 32813 }, { "epoch": 0.8425715069785804, "grad_norm": 0.82421875, "learning_rate": 0.00010450954596907458, "loss": 0.7342, "step": 32814 }, { "epoch": 0.8425971841745022, "grad_norm": 1.296875, "learning_rate": 0.00010450508630440639, "loss": 0.7226, "step": 32815 }, { "epoch": 0.8426228613704241, "grad_norm": 0.78515625, "learning_rate": 0.00010450062663075994, "loss": 0.809, "step": 32816 }, { "epoch": 0.8426485385663459, "grad_norm": 0.80859375, "learning_rate": 0.00010449616694814418, "loss": 0.7569, "step": 32817 }, { "epoch": 0.8426742157622676, "grad_norm": 0.6953125, "learning_rate": 0.00010449170725656794, "loss": 0.655, "step": 32818 }, { "epoch": 0.8426998929581895, "grad_norm": 0.828125, "learning_rate": 0.00010448724755604015, "loss": 0.9641, "step": 32819 }, { "epoch": 0.8427255701541113, "grad_norm": 0.83984375, "learning_rate": 0.00010448278784656966, "loss": 0.8582, "step": 32820 }, { "epoch": 0.8427512473500332, "grad_norm": 0.734375, "learning_rate": 0.0001044783281281654, "loss": 0.7246, "step": 32821 }, { "epoch": 0.842776924545955, "grad_norm": 0.78125, "learning_rate": 0.00010447386840083621, "loss": 0.7618, "step": 32822 }, { "epoch": 0.8428026017418768, "grad_norm": 0.7578125, "learning_rate": 0.00010446940866459102, "loss": 0.7929, "step": 32823 }, { "epoch": 0.8428282789377985, "grad_norm": 0.80078125, "learning_rate": 0.00010446494891943872, "loss": 0.8713, "step": 32824 }, { "epoch": 0.8428539561337204, "grad_norm": 0.71875, "learning_rate": 0.00010446048916538818, "loss": 0.7527, "step": 32825 }, { "epoch": 0.8428796333296422, "grad_norm": 0.921875, "learning_rate": 0.00010445602940244828, "loss": 0.7926, "step": 32826 }, { "epoch": 0.842905310525564, "grad_norm": 0.83984375, "learning_rate": 0.0001044515696306279, "loss": 0.7276, "step": 32827 }, { "epoch": 0.8429309877214859, "grad_norm": 0.8359375, "learning_rate": 0.00010444710984993595, "loss": 0.7462, "step": 32828 }, { "epoch": 0.8429566649174077, "grad_norm": 0.84375, "learning_rate": 0.00010444265006038132, "loss": 0.9116, "step": 32829 }, { "epoch": 0.8429823421133296, "grad_norm": 0.78515625, "learning_rate": 0.00010443819026197288, "loss": 0.7966, "step": 32830 }, { "epoch": 0.8430080193092513, "grad_norm": 0.796875, "learning_rate": 0.00010443373045471952, "loss": 0.8543, "step": 32831 }, { "epoch": 0.8430336965051731, "grad_norm": 0.7578125, "learning_rate": 0.00010442927063863017, "loss": 0.8301, "step": 32832 }, { "epoch": 0.843059373701095, "grad_norm": 1.4453125, "learning_rate": 0.00010442481081371364, "loss": 0.7813, "step": 32833 }, { "epoch": 0.8430850508970168, "grad_norm": 0.77734375, "learning_rate": 0.00010442035097997889, "loss": 0.7947, "step": 32834 }, { "epoch": 0.8431107280929386, "grad_norm": 0.78515625, "learning_rate": 0.00010441589113743477, "loss": 0.7702, "step": 32835 }, { "epoch": 0.8431364052888605, "grad_norm": 0.8359375, "learning_rate": 0.00010441143128609016, "loss": 0.8257, "step": 32836 }, { "epoch": 0.8431620824847823, "grad_norm": 0.65234375, "learning_rate": 0.00010440697142595402, "loss": 0.7042, "step": 32837 }, { "epoch": 0.843187759680704, "grad_norm": 0.765625, "learning_rate": 0.00010440251155703513, "loss": 0.8274, "step": 32838 }, { "epoch": 0.8432134368766259, "grad_norm": 0.765625, "learning_rate": 0.00010439805167934247, "loss": 0.7446, "step": 32839 }, { "epoch": 0.8432391140725477, "grad_norm": 0.73046875, "learning_rate": 0.00010439359179288488, "loss": 0.7838, "step": 32840 }, { "epoch": 0.8432647912684695, "grad_norm": 0.796875, "learning_rate": 0.00010438913189767123, "loss": 0.7284, "step": 32841 }, { "epoch": 0.8432904684643914, "grad_norm": 0.703125, "learning_rate": 0.00010438467199371047, "loss": 0.6515, "step": 32842 }, { "epoch": 0.8433161456603132, "grad_norm": 0.703125, "learning_rate": 0.00010438021208101146, "loss": 0.8598, "step": 32843 }, { "epoch": 0.8433418228562349, "grad_norm": 0.77734375, "learning_rate": 0.00010437575215958305, "loss": 0.7946, "step": 32844 }, { "epoch": 0.8433675000521568, "grad_norm": 0.76953125, "learning_rate": 0.00010437129222943418, "loss": 0.8823, "step": 32845 }, { "epoch": 0.8433931772480786, "grad_norm": 0.8125, "learning_rate": 0.00010436683229057371, "loss": 0.6337, "step": 32846 }, { "epoch": 0.8434188544440004, "grad_norm": 0.78515625, "learning_rate": 0.00010436237234301054, "loss": 0.76, "step": 32847 }, { "epoch": 0.8434445316399223, "grad_norm": 0.73828125, "learning_rate": 0.00010435791238675357, "loss": 0.6883, "step": 32848 }, { "epoch": 0.8434702088358441, "grad_norm": 0.734375, "learning_rate": 0.00010435345242181165, "loss": 0.8242, "step": 32849 }, { "epoch": 0.843495886031766, "grad_norm": 0.7421875, "learning_rate": 0.00010434899244819372, "loss": 0.8782, "step": 32850 }, { "epoch": 0.8435215632276877, "grad_norm": 0.85546875, "learning_rate": 0.00010434453246590863, "loss": 0.8495, "step": 32851 }, { "epoch": 0.8435472404236095, "grad_norm": 0.80859375, "learning_rate": 0.00010434007247496527, "loss": 0.77, "step": 32852 }, { "epoch": 0.8435729176195313, "grad_norm": 0.8046875, "learning_rate": 0.00010433561247537255, "loss": 0.9027, "step": 32853 }, { "epoch": 0.8435985948154532, "grad_norm": 0.7734375, "learning_rate": 0.00010433115246713934, "loss": 0.7869, "step": 32854 }, { "epoch": 0.843624272011375, "grad_norm": 0.79296875, "learning_rate": 0.00010432669245027453, "loss": 0.8645, "step": 32855 }, { "epoch": 0.8436499492072969, "grad_norm": 0.71875, "learning_rate": 0.00010432223242478703, "loss": 0.732, "step": 32856 }, { "epoch": 0.8436756264032186, "grad_norm": 0.71484375, "learning_rate": 0.00010431777239068567, "loss": 0.6805, "step": 32857 }, { "epoch": 0.8437013035991404, "grad_norm": 0.74609375, "learning_rate": 0.00010431331234797943, "loss": 0.7062, "step": 32858 }, { "epoch": 0.8437269807950623, "grad_norm": 0.84375, "learning_rate": 0.0001043088522966771, "loss": 0.948, "step": 32859 }, { "epoch": 0.8437526579909841, "grad_norm": 0.7421875, "learning_rate": 0.00010430439223678767, "loss": 0.8577, "step": 32860 }, { "epoch": 0.8437783351869059, "grad_norm": 0.70703125, "learning_rate": 0.00010429993216831995, "loss": 0.7637, "step": 32861 }, { "epoch": 0.8438040123828278, "grad_norm": 0.76953125, "learning_rate": 0.00010429547209128283, "loss": 0.7725, "step": 32862 }, { "epoch": 0.8438296895787496, "grad_norm": 0.77734375, "learning_rate": 0.00010429101200568525, "loss": 0.857, "step": 32863 }, { "epoch": 0.8438553667746713, "grad_norm": 0.80859375, "learning_rate": 0.00010428655191153607, "loss": 0.9614, "step": 32864 }, { "epoch": 0.8438810439705932, "grad_norm": 0.87890625, "learning_rate": 0.00010428209180884418, "loss": 0.804, "step": 32865 }, { "epoch": 0.843906721166515, "grad_norm": 0.91796875, "learning_rate": 0.00010427763169761847, "loss": 0.8909, "step": 32866 }, { "epoch": 0.8439323983624368, "grad_norm": 0.73046875, "learning_rate": 0.00010427317157786782, "loss": 0.7254, "step": 32867 }, { "epoch": 0.8439580755583587, "grad_norm": 0.75390625, "learning_rate": 0.00010426871144960111, "loss": 0.7871, "step": 32868 }, { "epoch": 0.8439837527542805, "grad_norm": 0.8203125, "learning_rate": 0.00010426425131282727, "loss": 0.8527, "step": 32869 }, { "epoch": 0.8440094299502023, "grad_norm": 0.81640625, "learning_rate": 0.00010425979116755515, "loss": 0.8379, "step": 32870 }, { "epoch": 0.8440351071461241, "grad_norm": 0.72265625, "learning_rate": 0.00010425533101379366, "loss": 0.7023, "step": 32871 }, { "epoch": 0.8440607843420459, "grad_norm": 0.90625, "learning_rate": 0.00010425087085155168, "loss": 0.7516, "step": 32872 }, { "epoch": 0.8440864615379677, "grad_norm": 0.72265625, "learning_rate": 0.00010424641068083804, "loss": 0.871, "step": 32873 }, { "epoch": 0.8441121387338896, "grad_norm": 0.74609375, "learning_rate": 0.00010424195050166176, "loss": 0.8095, "step": 32874 }, { "epoch": 0.8441378159298114, "grad_norm": 0.80859375, "learning_rate": 0.00010423749031403163, "loss": 0.7652, "step": 32875 }, { "epoch": 0.8441634931257332, "grad_norm": 0.99609375, "learning_rate": 0.00010423303011795655, "loss": 0.8709, "step": 32876 }, { "epoch": 0.844189170321655, "grad_norm": 0.76953125, "learning_rate": 0.00010422856991344545, "loss": 0.8302, "step": 32877 }, { "epoch": 0.8442148475175768, "grad_norm": 0.74609375, "learning_rate": 0.00010422410970050718, "loss": 0.8264, "step": 32878 }, { "epoch": 0.8442405247134986, "grad_norm": 0.81640625, "learning_rate": 0.00010421964947915066, "loss": 0.8353, "step": 32879 }, { "epoch": 0.8442662019094205, "grad_norm": 0.76953125, "learning_rate": 0.00010421518924938474, "loss": 0.7326, "step": 32880 }, { "epoch": 0.8442918791053423, "grad_norm": 0.8359375, "learning_rate": 0.00010421072901121832, "loss": 0.8401, "step": 32881 }, { "epoch": 0.8443175563012641, "grad_norm": 0.8359375, "learning_rate": 0.0001042062687646603, "loss": 0.888, "step": 32882 }, { "epoch": 0.844343233497186, "grad_norm": 0.72265625, "learning_rate": 0.00010420180850971958, "loss": 0.7355, "step": 32883 }, { "epoch": 0.8443689106931077, "grad_norm": 0.7265625, "learning_rate": 0.00010419734824640501, "loss": 0.8034, "step": 32884 }, { "epoch": 0.8443945878890295, "grad_norm": 0.66015625, "learning_rate": 0.00010419288797472555, "loss": 0.7537, "step": 32885 }, { "epoch": 0.8444202650849514, "grad_norm": 0.8203125, "learning_rate": 0.00010418842769469002, "loss": 0.916, "step": 32886 }, { "epoch": 0.8444459422808732, "grad_norm": 1.0703125, "learning_rate": 0.00010418396740630731, "loss": 0.7631, "step": 32887 }, { "epoch": 0.844471619476795, "grad_norm": 0.76171875, "learning_rate": 0.00010417950710958637, "loss": 0.7932, "step": 32888 }, { "epoch": 0.8444972966727169, "grad_norm": 0.80859375, "learning_rate": 0.00010417504680453601, "loss": 0.7393, "step": 32889 }, { "epoch": 0.8445229738686387, "grad_norm": 0.6640625, "learning_rate": 0.00010417058649116518, "loss": 0.7704, "step": 32890 }, { "epoch": 0.8445486510645605, "grad_norm": 0.734375, "learning_rate": 0.00010416612616948276, "loss": 0.7555, "step": 32891 }, { "epoch": 0.8445743282604823, "grad_norm": 0.80078125, "learning_rate": 0.0001041616658394976, "loss": 0.8847, "step": 32892 }, { "epoch": 0.8446000054564041, "grad_norm": 0.7734375, "learning_rate": 0.00010415720550121863, "loss": 0.8878, "step": 32893 }, { "epoch": 0.844625682652326, "grad_norm": 0.72265625, "learning_rate": 0.00010415274515465472, "loss": 0.8276, "step": 32894 }, { "epoch": 0.8446513598482478, "grad_norm": 0.94140625, "learning_rate": 0.00010414828479981477, "loss": 0.7551, "step": 32895 }, { "epoch": 0.8446770370441696, "grad_norm": 0.8046875, "learning_rate": 0.00010414382443670767, "loss": 0.8034, "step": 32896 }, { "epoch": 0.8447027142400914, "grad_norm": 0.77734375, "learning_rate": 0.00010413936406534229, "loss": 0.7691, "step": 32897 }, { "epoch": 0.8447283914360132, "grad_norm": 0.765625, "learning_rate": 0.00010413490368572753, "loss": 0.7622, "step": 32898 }, { "epoch": 0.844754068631935, "grad_norm": 0.76171875, "learning_rate": 0.0001041304432978723, "loss": 0.7777, "step": 32899 }, { "epoch": 0.8447797458278569, "grad_norm": 0.75390625, "learning_rate": 0.00010412598290178543, "loss": 0.7557, "step": 32900 }, { "epoch": 0.8448054230237787, "grad_norm": 0.73828125, "learning_rate": 0.00010412152249747588, "loss": 0.8251, "step": 32901 }, { "epoch": 0.8448311002197005, "grad_norm": 0.8359375, "learning_rate": 0.00010411706208495249, "loss": 0.8268, "step": 32902 }, { "epoch": 0.8448567774156224, "grad_norm": 0.69921875, "learning_rate": 0.00010411260166422419, "loss": 0.7858, "step": 32903 }, { "epoch": 0.8448824546115441, "grad_norm": 0.84375, "learning_rate": 0.00010410814123529986, "loss": 0.8385, "step": 32904 }, { "epoch": 0.8449081318074659, "grad_norm": 0.80859375, "learning_rate": 0.00010410368079818833, "loss": 0.7568, "step": 32905 }, { "epoch": 0.8449338090033878, "grad_norm": 0.78515625, "learning_rate": 0.00010409922035289856, "loss": 0.8553, "step": 32906 }, { "epoch": 0.8449594861993096, "grad_norm": 0.7265625, "learning_rate": 0.00010409475989943942, "loss": 0.7648, "step": 32907 }, { "epoch": 0.8449851633952314, "grad_norm": 0.98046875, "learning_rate": 0.00010409029943781976, "loss": 0.7464, "step": 32908 }, { "epoch": 0.8450108405911533, "grad_norm": 0.80859375, "learning_rate": 0.00010408583896804853, "loss": 0.7538, "step": 32909 }, { "epoch": 0.8450365177870751, "grad_norm": 0.7734375, "learning_rate": 0.00010408137849013457, "loss": 0.7454, "step": 32910 }, { "epoch": 0.8450621949829968, "grad_norm": 0.7734375, "learning_rate": 0.00010407691800408683, "loss": 0.8379, "step": 32911 }, { "epoch": 0.8450878721789187, "grad_norm": 0.77734375, "learning_rate": 0.00010407245750991415, "loss": 0.8781, "step": 32912 }, { "epoch": 0.8451135493748405, "grad_norm": 0.77734375, "learning_rate": 0.00010406799700762542, "loss": 0.8803, "step": 32913 }, { "epoch": 0.8451392265707623, "grad_norm": 0.85546875, "learning_rate": 0.00010406353649722953, "loss": 0.95, "step": 32914 }, { "epoch": 0.8451649037666842, "grad_norm": 0.79296875, "learning_rate": 0.00010405907597873539, "loss": 0.832, "step": 32915 }, { "epoch": 0.845190580962606, "grad_norm": 0.80859375, "learning_rate": 0.00010405461545215186, "loss": 0.853, "step": 32916 }, { "epoch": 0.8452162581585277, "grad_norm": 0.78515625, "learning_rate": 0.00010405015491748789, "loss": 0.8438, "step": 32917 }, { "epoch": 0.8452419353544496, "grad_norm": 0.890625, "learning_rate": 0.00010404569437475229, "loss": 0.768, "step": 32918 }, { "epoch": 0.8452676125503714, "grad_norm": 0.8515625, "learning_rate": 0.00010404123382395402, "loss": 0.7919, "step": 32919 }, { "epoch": 0.8452932897462933, "grad_norm": 0.78515625, "learning_rate": 0.00010403677326510191, "loss": 0.839, "step": 32920 }, { "epoch": 0.8453189669422151, "grad_norm": 0.87890625, "learning_rate": 0.00010403231269820486, "loss": 0.7049, "step": 32921 }, { "epoch": 0.8453446441381369, "grad_norm": 0.765625, "learning_rate": 0.00010402785212327179, "loss": 0.7371, "step": 32922 }, { "epoch": 0.8453703213340588, "grad_norm": 0.83203125, "learning_rate": 0.00010402339154031158, "loss": 0.8224, "step": 32923 }, { "epoch": 0.8453959985299805, "grad_norm": 0.78515625, "learning_rate": 0.00010401893094933313, "loss": 0.7448, "step": 32924 }, { "epoch": 0.8454216757259023, "grad_norm": 0.80859375, "learning_rate": 0.00010401447035034528, "loss": 0.7314, "step": 32925 }, { "epoch": 0.8454473529218242, "grad_norm": 0.828125, "learning_rate": 0.00010401000974335697, "loss": 0.894, "step": 32926 }, { "epoch": 0.845473030117746, "grad_norm": 0.8203125, "learning_rate": 0.00010400554912837708, "loss": 0.7847, "step": 32927 }, { "epoch": 0.8454987073136678, "grad_norm": 0.8671875, "learning_rate": 0.00010400108850541447, "loss": 0.7641, "step": 32928 }, { "epoch": 0.8455243845095897, "grad_norm": 0.88671875, "learning_rate": 0.00010399662787447806, "loss": 0.9721, "step": 32929 }, { "epoch": 0.8455500617055115, "grad_norm": 0.828125, "learning_rate": 0.00010399216723557675, "loss": 0.7948, "step": 32930 }, { "epoch": 0.8455757389014332, "grad_norm": 0.8046875, "learning_rate": 0.0001039877065887194, "loss": 0.6818, "step": 32931 }, { "epoch": 0.8456014160973551, "grad_norm": 0.828125, "learning_rate": 0.00010398324593391491, "loss": 0.8446, "step": 32932 }, { "epoch": 0.8456270932932769, "grad_norm": 0.74609375, "learning_rate": 0.00010397878527117217, "loss": 0.7074, "step": 32933 }, { "epoch": 0.8456527704891987, "grad_norm": 0.8125, "learning_rate": 0.00010397432460050007, "loss": 0.8202, "step": 32934 }, { "epoch": 0.8456784476851206, "grad_norm": 0.7734375, "learning_rate": 0.0001039698639219075, "loss": 0.7355, "step": 32935 }, { "epoch": 0.8457041248810424, "grad_norm": 0.82421875, "learning_rate": 0.00010396540323540336, "loss": 0.7794, "step": 32936 }, { "epoch": 0.8457298020769641, "grad_norm": 0.73828125, "learning_rate": 0.00010396094254099653, "loss": 0.7228, "step": 32937 }, { "epoch": 0.845755479272886, "grad_norm": 0.7421875, "learning_rate": 0.00010395648183869589, "loss": 0.7455, "step": 32938 }, { "epoch": 0.8457811564688078, "grad_norm": 0.83203125, "learning_rate": 0.00010395202112851033, "loss": 0.8347, "step": 32939 }, { "epoch": 0.8458068336647296, "grad_norm": 0.8203125, "learning_rate": 0.00010394756041044876, "loss": 0.7724, "step": 32940 }, { "epoch": 0.8458325108606515, "grad_norm": 0.734375, "learning_rate": 0.00010394309968452006, "loss": 0.6446, "step": 32941 }, { "epoch": 0.8458581880565733, "grad_norm": 0.7890625, "learning_rate": 0.00010393863895073311, "loss": 0.9066, "step": 32942 }, { "epoch": 0.8458838652524951, "grad_norm": 0.80859375, "learning_rate": 0.00010393417820909681, "loss": 0.8886, "step": 32943 }, { "epoch": 0.8459095424484169, "grad_norm": 0.81640625, "learning_rate": 0.00010392971745962008, "loss": 0.8251, "step": 32944 }, { "epoch": 0.8459352196443387, "grad_norm": 0.76953125, "learning_rate": 0.00010392525670231173, "loss": 0.9506, "step": 32945 }, { "epoch": 0.8459608968402605, "grad_norm": 0.7890625, "learning_rate": 0.00010392079593718071, "loss": 0.8679, "step": 32946 }, { "epoch": 0.8459865740361824, "grad_norm": 0.7578125, "learning_rate": 0.0001039163351642359, "loss": 0.821, "step": 32947 }, { "epoch": 0.8460122512321042, "grad_norm": 0.8203125, "learning_rate": 0.00010391187438348619, "loss": 0.7055, "step": 32948 }, { "epoch": 0.846037928428026, "grad_norm": 0.8046875, "learning_rate": 0.00010390741359494047, "loss": 0.7868, "step": 32949 }, { "epoch": 0.8460636056239479, "grad_norm": 0.8125, "learning_rate": 0.00010390295279860764, "loss": 0.658, "step": 32950 }, { "epoch": 0.8460892828198696, "grad_norm": 0.78125, "learning_rate": 0.00010389849199449658, "loss": 0.7969, "step": 32951 }, { "epoch": 0.8461149600157915, "grad_norm": 0.84765625, "learning_rate": 0.00010389403118261616, "loss": 0.8587, "step": 32952 }, { "epoch": 0.8461406372117133, "grad_norm": 0.84765625, "learning_rate": 0.00010388957036297528, "loss": 0.7498, "step": 32953 }, { "epoch": 0.8461663144076351, "grad_norm": 0.8125, "learning_rate": 0.00010388510953558286, "loss": 0.8458, "step": 32954 }, { "epoch": 0.846191991603557, "grad_norm": 0.77734375, "learning_rate": 0.00010388064870044776, "loss": 0.8323, "step": 32955 }, { "epoch": 0.8462176687994788, "grad_norm": 0.7265625, "learning_rate": 0.00010387618785757886, "loss": 0.6908, "step": 32956 }, { "epoch": 0.8462433459954005, "grad_norm": 1.25, "learning_rate": 0.00010387172700698511, "loss": 0.697, "step": 32957 }, { "epoch": 0.8462690231913224, "grad_norm": 0.71484375, "learning_rate": 0.0001038672661486753, "loss": 0.76, "step": 32958 }, { "epoch": 0.8462947003872442, "grad_norm": 0.78125, "learning_rate": 0.00010386280528265842, "loss": 0.9801, "step": 32959 }, { "epoch": 0.846320377583166, "grad_norm": 0.80859375, "learning_rate": 0.00010385834440894331, "loss": 0.7898, "step": 32960 }, { "epoch": 0.8463460547790879, "grad_norm": 0.8359375, "learning_rate": 0.00010385388352753885, "loss": 0.8491, "step": 32961 }, { "epoch": 0.8463717319750097, "grad_norm": 0.75, "learning_rate": 0.00010384942263845397, "loss": 0.6825, "step": 32962 }, { "epoch": 0.8463974091709315, "grad_norm": 0.79296875, "learning_rate": 0.00010384496174169753, "loss": 0.8412, "step": 32963 }, { "epoch": 0.8464230863668533, "grad_norm": 0.80859375, "learning_rate": 0.00010384050083727844, "loss": 0.8741, "step": 32964 }, { "epoch": 0.8464487635627751, "grad_norm": 0.78125, "learning_rate": 0.00010383603992520557, "loss": 0.8592, "step": 32965 }, { "epoch": 0.8464744407586969, "grad_norm": 0.70703125, "learning_rate": 0.00010383157900548782, "loss": 0.7834, "step": 32966 }, { "epoch": 0.8465001179546188, "grad_norm": 0.76171875, "learning_rate": 0.00010382711807813406, "loss": 0.7379, "step": 32967 }, { "epoch": 0.8465257951505406, "grad_norm": 0.8046875, "learning_rate": 0.00010382265714315322, "loss": 0.7251, "step": 32968 }, { "epoch": 0.8465514723464624, "grad_norm": 0.76953125, "learning_rate": 0.00010381819620055415, "loss": 0.7659, "step": 32969 }, { "epoch": 0.8465771495423843, "grad_norm": 0.8125, "learning_rate": 0.00010381373525034581, "loss": 0.725, "step": 32970 }, { "epoch": 0.846602826738306, "grad_norm": 0.73046875, "learning_rate": 0.00010380927429253697, "loss": 0.8745, "step": 32971 }, { "epoch": 0.8466285039342278, "grad_norm": 0.83203125, "learning_rate": 0.00010380481332713665, "loss": 0.752, "step": 32972 }, { "epoch": 0.8466541811301497, "grad_norm": 0.8515625, "learning_rate": 0.00010380035235415367, "loss": 0.735, "step": 32973 }, { "epoch": 0.8466798583260715, "grad_norm": 0.78125, "learning_rate": 0.00010379589137359689, "loss": 0.6988, "step": 32974 }, { "epoch": 0.8467055355219933, "grad_norm": 0.78515625, "learning_rate": 0.00010379143038547527, "loss": 0.8713, "step": 32975 }, { "epoch": 0.8467312127179152, "grad_norm": 0.77734375, "learning_rate": 0.00010378696938979768, "loss": 0.8549, "step": 32976 }, { "epoch": 0.8467568899138369, "grad_norm": 0.73046875, "learning_rate": 0.000103782508386573, "loss": 0.7611, "step": 32977 }, { "epoch": 0.8467825671097587, "grad_norm": 0.77734375, "learning_rate": 0.00010377804737581011, "loss": 0.8166, "step": 32978 }, { "epoch": 0.8468082443056806, "grad_norm": 0.7734375, "learning_rate": 0.0001037735863575179, "loss": 0.7803, "step": 32979 }, { "epoch": 0.8468339215016024, "grad_norm": 0.76171875, "learning_rate": 0.0001037691253317053, "loss": 0.7833, "step": 32980 }, { "epoch": 0.8468595986975243, "grad_norm": 0.78515625, "learning_rate": 0.00010376466429838116, "loss": 0.7655, "step": 32981 }, { "epoch": 0.8468852758934461, "grad_norm": 0.76953125, "learning_rate": 0.00010376020325755438, "loss": 0.8757, "step": 32982 }, { "epoch": 0.8469109530893679, "grad_norm": 0.71875, "learning_rate": 0.00010375574220923389, "loss": 0.6869, "step": 32983 }, { "epoch": 0.8469366302852896, "grad_norm": 0.796875, "learning_rate": 0.00010375128115342852, "loss": 0.8, "step": 32984 }, { "epoch": 0.8469623074812115, "grad_norm": 0.73046875, "learning_rate": 0.00010374682009014718, "loss": 0.7587, "step": 32985 }, { "epoch": 0.8469879846771333, "grad_norm": 0.81640625, "learning_rate": 0.00010374235901939877, "loss": 1.0166, "step": 32986 }, { "epoch": 0.8470136618730552, "grad_norm": 0.7578125, "learning_rate": 0.00010373789794119216, "loss": 0.8028, "step": 32987 }, { "epoch": 0.847039339068977, "grad_norm": 0.73046875, "learning_rate": 0.00010373343685553627, "loss": 0.7116, "step": 32988 }, { "epoch": 0.8470650162648988, "grad_norm": 0.75, "learning_rate": 0.00010372897576244001, "loss": 0.778, "step": 32989 }, { "epoch": 0.8470906934608207, "grad_norm": 0.79296875, "learning_rate": 0.0001037245146619122, "loss": 0.6842, "step": 32990 }, { "epoch": 0.8471163706567424, "grad_norm": 0.75390625, "learning_rate": 0.00010372005355396179, "loss": 0.765, "step": 32991 }, { "epoch": 0.8471420478526642, "grad_norm": 0.76953125, "learning_rate": 0.00010371559243859763, "loss": 0.8581, "step": 32992 }, { "epoch": 0.8471677250485861, "grad_norm": 0.75390625, "learning_rate": 0.00010371113131582864, "loss": 0.7515, "step": 32993 }, { "epoch": 0.8471934022445079, "grad_norm": 0.82421875, "learning_rate": 0.00010370667018566369, "loss": 0.7125, "step": 32994 }, { "epoch": 0.8472190794404297, "grad_norm": 0.7265625, "learning_rate": 0.0001037022090481117, "loss": 0.8156, "step": 32995 }, { "epoch": 0.8472447566363516, "grad_norm": 0.8046875, "learning_rate": 0.00010369774790318152, "loss": 0.9542, "step": 32996 }, { "epoch": 0.8472704338322733, "grad_norm": 0.80859375, "learning_rate": 0.00010369328675088208, "loss": 0.8312, "step": 32997 }, { "epoch": 0.8472961110281951, "grad_norm": 0.78515625, "learning_rate": 0.00010368882559122223, "loss": 0.9105, "step": 32998 }, { "epoch": 0.847321788224117, "grad_norm": 0.74609375, "learning_rate": 0.00010368436442421092, "loss": 0.766, "step": 32999 }, { "epoch": 0.8473474654200388, "grad_norm": 0.83203125, "learning_rate": 0.000103679903249857, "loss": 0.8133, "step": 33000 }, { "epoch": 0.8473474654200388, "eval_loss": 0.8028781414031982, "eval_runtime": 351.1822, "eval_samples_per_second": 28.475, "eval_steps_per_second": 0.891, "step": 33000 }, { "epoch": 0.8473731426159606, "grad_norm": 0.8203125, "learning_rate": 0.0001036754420681693, "loss": 0.7612, "step": 33001 }, { "epoch": 0.8473988198118825, "grad_norm": 0.76953125, "learning_rate": 0.00010367098087915686, "loss": 0.737, "step": 33002 }, { "epoch": 0.8474244970078043, "grad_norm": 0.8359375, "learning_rate": 0.00010366651968282843, "loss": 0.7196, "step": 33003 }, { "epoch": 0.847450174203726, "grad_norm": 0.73046875, "learning_rate": 0.00010366205847919299, "loss": 0.6937, "step": 33004 }, { "epoch": 0.8474758513996479, "grad_norm": 0.76953125, "learning_rate": 0.00010365759726825941, "loss": 0.9479, "step": 33005 }, { "epoch": 0.8475015285955697, "grad_norm": 0.76171875, "learning_rate": 0.00010365313605003653, "loss": 0.8484, "step": 33006 }, { "epoch": 0.8475272057914915, "grad_norm": 0.78515625, "learning_rate": 0.00010364867482453328, "loss": 0.6601, "step": 33007 }, { "epoch": 0.8475528829874134, "grad_norm": 0.73828125, "learning_rate": 0.00010364421359175858, "loss": 0.7863, "step": 33008 }, { "epoch": 0.8475785601833352, "grad_norm": 0.7734375, "learning_rate": 0.00010363975235172128, "loss": 0.8184, "step": 33009 }, { "epoch": 0.847604237379257, "grad_norm": 0.75, "learning_rate": 0.00010363529110443028, "loss": 0.81, "step": 33010 }, { "epoch": 0.8476299145751788, "grad_norm": 0.84375, "learning_rate": 0.00010363082984989447, "loss": 0.912, "step": 33011 }, { "epoch": 0.8476555917711006, "grad_norm": 0.8203125, "learning_rate": 0.00010362636858812275, "loss": 0.8423, "step": 33012 }, { "epoch": 0.8476812689670224, "grad_norm": 0.7890625, "learning_rate": 0.00010362190731912402, "loss": 0.6282, "step": 33013 }, { "epoch": 0.8477069461629443, "grad_norm": 0.76171875, "learning_rate": 0.00010361744604290713, "loss": 0.7137, "step": 33014 }, { "epoch": 0.8477326233588661, "grad_norm": 0.87890625, "learning_rate": 0.000103612984759481, "loss": 0.9701, "step": 33015 }, { "epoch": 0.847758300554788, "grad_norm": 0.7578125, "learning_rate": 0.00010360852346885453, "loss": 0.882, "step": 33016 }, { "epoch": 0.8477839777507097, "grad_norm": 0.765625, "learning_rate": 0.0001036040621710366, "loss": 0.8773, "step": 33017 }, { "epoch": 0.8478096549466315, "grad_norm": 0.734375, "learning_rate": 0.00010359960086603611, "loss": 0.7849, "step": 33018 }, { "epoch": 0.8478353321425534, "grad_norm": 0.8046875, "learning_rate": 0.00010359513955386189, "loss": 0.7611, "step": 33019 }, { "epoch": 0.8478610093384752, "grad_norm": 0.79296875, "learning_rate": 0.00010359067823452294, "loss": 0.7451, "step": 33020 }, { "epoch": 0.847886686534397, "grad_norm": 0.703125, "learning_rate": 0.00010358621690802806, "loss": 0.69, "step": 33021 }, { "epoch": 0.8479123637303189, "grad_norm": 0.83203125, "learning_rate": 0.00010358175557438616, "loss": 0.7327, "step": 33022 }, { "epoch": 0.8479380409262407, "grad_norm": 0.76953125, "learning_rate": 0.0001035772942336062, "loss": 0.8881, "step": 33023 }, { "epoch": 0.8479637181221624, "grad_norm": 0.78515625, "learning_rate": 0.00010357283288569697, "loss": 0.7718, "step": 33024 }, { "epoch": 0.8479893953180843, "grad_norm": 0.75, "learning_rate": 0.0001035683715306674, "loss": 0.6928, "step": 33025 }, { "epoch": 0.8480150725140061, "grad_norm": 0.73828125, "learning_rate": 0.00010356391016852643, "loss": 0.7148, "step": 33026 }, { "epoch": 0.8480407497099279, "grad_norm": 0.84375, "learning_rate": 0.00010355944879928286, "loss": 0.8795, "step": 33027 }, { "epoch": 0.8480664269058498, "grad_norm": 0.79296875, "learning_rate": 0.00010355498742294567, "loss": 0.837, "step": 33028 }, { "epoch": 0.8480921041017716, "grad_norm": 0.8203125, "learning_rate": 0.0001035505260395237, "loss": 0.8496, "step": 33029 }, { "epoch": 0.8481177812976934, "grad_norm": 0.73828125, "learning_rate": 0.00010354606464902585, "loss": 0.7195, "step": 33030 }, { "epoch": 0.8481434584936152, "grad_norm": 0.8125, "learning_rate": 0.00010354160325146101, "loss": 0.7689, "step": 33031 }, { "epoch": 0.848169135689537, "grad_norm": 0.78515625, "learning_rate": 0.00010353714184683809, "loss": 0.6372, "step": 33032 }, { "epoch": 0.8481948128854588, "grad_norm": 0.78125, "learning_rate": 0.00010353268043516593, "loss": 0.7723, "step": 33033 }, { "epoch": 0.8482204900813807, "grad_norm": 0.875, "learning_rate": 0.00010352821901645348, "loss": 0.8477, "step": 33034 }, { "epoch": 0.8482461672773025, "grad_norm": 0.81640625, "learning_rate": 0.0001035237575907096, "loss": 0.9396, "step": 33035 }, { "epoch": 0.8482718444732243, "grad_norm": 0.796875, "learning_rate": 0.00010351929615794323, "loss": 0.7041, "step": 33036 }, { "epoch": 0.8482975216691461, "grad_norm": 0.79296875, "learning_rate": 0.00010351483471816319, "loss": 0.8339, "step": 33037 }, { "epoch": 0.8483231988650679, "grad_norm": 0.79296875, "learning_rate": 0.00010351037327137838, "loss": 0.6995, "step": 33038 }, { "epoch": 0.8483488760609897, "grad_norm": 0.76171875, "learning_rate": 0.00010350591181759775, "loss": 0.7836, "step": 33039 }, { "epoch": 0.8483745532569116, "grad_norm": 0.87109375, "learning_rate": 0.00010350145035683013, "loss": 0.8048, "step": 33040 }, { "epoch": 0.8484002304528334, "grad_norm": 0.78125, "learning_rate": 0.00010349698888908443, "loss": 0.6613, "step": 33041 }, { "epoch": 0.8484259076487553, "grad_norm": 0.78125, "learning_rate": 0.00010349252741436959, "loss": 0.841, "step": 33042 }, { "epoch": 0.8484515848446771, "grad_norm": 0.828125, "learning_rate": 0.0001034880659326944, "loss": 0.918, "step": 33043 }, { "epoch": 0.8484772620405988, "grad_norm": 0.82421875, "learning_rate": 0.00010348360444406783, "loss": 0.848, "step": 33044 }, { "epoch": 0.8485029392365206, "grad_norm": 0.76171875, "learning_rate": 0.00010347914294849879, "loss": 0.732, "step": 33045 }, { "epoch": 0.8485286164324425, "grad_norm": 0.8828125, "learning_rate": 0.00010347468144599609, "loss": 0.8398, "step": 33046 }, { "epoch": 0.8485542936283643, "grad_norm": 0.76171875, "learning_rate": 0.00010347021993656867, "loss": 0.7968, "step": 33047 }, { "epoch": 0.8485799708242862, "grad_norm": 0.859375, "learning_rate": 0.00010346575842022541, "loss": 0.8746, "step": 33048 }, { "epoch": 0.848605648020208, "grad_norm": 0.71484375, "learning_rate": 0.00010346129689697523, "loss": 0.7124, "step": 33049 }, { "epoch": 0.8486313252161297, "grad_norm": 0.84765625, "learning_rate": 0.000103456835366827, "loss": 0.882, "step": 33050 }, { "epoch": 0.8486570024120516, "grad_norm": 0.7265625, "learning_rate": 0.00010345237382978956, "loss": 0.6794, "step": 33051 }, { "epoch": 0.8486826796079734, "grad_norm": 0.7734375, "learning_rate": 0.0001034479122858719, "loss": 0.8842, "step": 33052 }, { "epoch": 0.8487083568038952, "grad_norm": 0.8515625, "learning_rate": 0.00010344345073508286, "loss": 0.9102, "step": 33053 }, { "epoch": 0.8487340339998171, "grad_norm": 0.78125, "learning_rate": 0.00010343898917743128, "loss": 0.769, "step": 33054 }, { "epoch": 0.8487597111957389, "grad_norm": 1.234375, "learning_rate": 0.00010343452761292618, "loss": 0.7539, "step": 33055 }, { "epoch": 0.8487853883916607, "grad_norm": 0.8125, "learning_rate": 0.00010343006604157635, "loss": 0.7536, "step": 33056 }, { "epoch": 0.8488110655875825, "grad_norm": 0.8359375, "learning_rate": 0.00010342560446339068, "loss": 0.932, "step": 33057 }, { "epoch": 0.8488367427835043, "grad_norm": 0.8359375, "learning_rate": 0.00010342114287837811, "loss": 0.7412, "step": 33058 }, { "epoch": 0.8488624199794261, "grad_norm": 0.84375, "learning_rate": 0.00010341668128654751, "loss": 0.9348, "step": 33059 }, { "epoch": 0.848888097175348, "grad_norm": 0.78515625, "learning_rate": 0.00010341221968790778, "loss": 0.8616, "step": 33060 }, { "epoch": 0.8489137743712698, "grad_norm": 0.703125, "learning_rate": 0.0001034077580824678, "loss": 0.7647, "step": 33061 }, { "epoch": 0.8489394515671916, "grad_norm": 0.8671875, "learning_rate": 0.00010340329647023648, "loss": 0.7538, "step": 33062 }, { "epoch": 0.8489651287631135, "grad_norm": 0.84375, "learning_rate": 0.00010339883485122269, "loss": 0.7598, "step": 33063 }, { "epoch": 0.8489908059590352, "grad_norm": 0.78125, "learning_rate": 0.0001033943732254353, "loss": 0.798, "step": 33064 }, { "epoch": 0.849016483154957, "grad_norm": 0.765625, "learning_rate": 0.00010338991159288327, "loss": 0.8098, "step": 33065 }, { "epoch": 0.8490421603508789, "grad_norm": 1.953125, "learning_rate": 0.00010338544995357544, "loss": 0.8596, "step": 33066 }, { "epoch": 0.8490678375468007, "grad_norm": 0.8046875, "learning_rate": 0.00010338098830752068, "loss": 0.8745, "step": 33067 }, { "epoch": 0.8490935147427225, "grad_norm": 0.71875, "learning_rate": 0.00010337652665472795, "loss": 0.7776, "step": 33068 }, { "epoch": 0.8491191919386444, "grad_norm": 0.7890625, "learning_rate": 0.00010337206499520613, "loss": 0.8043, "step": 33069 }, { "epoch": 0.8491448691345661, "grad_norm": 0.82421875, "learning_rate": 0.00010336760332896405, "loss": 0.7361, "step": 33070 }, { "epoch": 0.8491705463304879, "grad_norm": 0.81640625, "learning_rate": 0.00010336314165601066, "loss": 0.7195, "step": 33071 }, { "epoch": 0.8491962235264098, "grad_norm": 0.79296875, "learning_rate": 0.0001033586799763548, "loss": 0.9218, "step": 33072 }, { "epoch": 0.8492219007223316, "grad_norm": 0.78515625, "learning_rate": 0.00010335421829000542, "loss": 0.8262, "step": 33073 }, { "epoch": 0.8492475779182534, "grad_norm": 0.82421875, "learning_rate": 0.0001033497565969714, "loss": 0.6999, "step": 33074 }, { "epoch": 0.8492732551141753, "grad_norm": 0.80078125, "learning_rate": 0.00010334529489726162, "loss": 0.8547, "step": 33075 }, { "epoch": 0.8492989323100971, "grad_norm": 0.69921875, "learning_rate": 0.00010334083319088495, "loss": 0.7493, "step": 33076 }, { "epoch": 0.8493246095060188, "grad_norm": 0.76171875, "learning_rate": 0.00010333637147785029, "loss": 0.7621, "step": 33077 }, { "epoch": 0.8493502867019407, "grad_norm": 0.8046875, "learning_rate": 0.00010333190975816655, "loss": 0.734, "step": 33078 }, { "epoch": 0.8493759638978625, "grad_norm": 0.83203125, "learning_rate": 0.00010332744803184262, "loss": 0.8539, "step": 33079 }, { "epoch": 0.8494016410937844, "grad_norm": 0.7734375, "learning_rate": 0.0001033229862988874, "loss": 0.6305, "step": 33080 }, { "epoch": 0.8494273182897062, "grad_norm": 0.7578125, "learning_rate": 0.00010331852455930974, "loss": 0.9046, "step": 33081 }, { "epoch": 0.849452995485628, "grad_norm": 0.8046875, "learning_rate": 0.00010331406281311859, "loss": 0.9005, "step": 33082 }, { "epoch": 0.8494786726815499, "grad_norm": 0.78515625, "learning_rate": 0.0001033096010603228, "loss": 0.8578, "step": 33083 }, { "epoch": 0.8495043498774716, "grad_norm": 0.7734375, "learning_rate": 0.00010330513930093126, "loss": 0.8394, "step": 33084 }, { "epoch": 0.8495300270733934, "grad_norm": 1.484375, "learning_rate": 0.0001033006775349529, "loss": 0.818, "step": 33085 }, { "epoch": 0.8495557042693153, "grad_norm": 0.734375, "learning_rate": 0.00010329621576239656, "loss": 0.7048, "step": 33086 }, { "epoch": 0.8495813814652371, "grad_norm": 0.6875, "learning_rate": 0.00010329175398327117, "loss": 0.7424, "step": 33087 }, { "epoch": 0.8496070586611589, "grad_norm": 0.703125, "learning_rate": 0.00010328729219758564, "loss": 0.6765, "step": 33088 }, { "epoch": 0.8496327358570808, "grad_norm": 0.828125, "learning_rate": 0.0001032828304053488, "loss": 0.9625, "step": 33089 }, { "epoch": 0.8496584130530025, "grad_norm": 0.78125, "learning_rate": 0.00010327836860656961, "loss": 0.8732, "step": 33090 }, { "epoch": 0.8496840902489243, "grad_norm": 0.80078125, "learning_rate": 0.00010327390680125689, "loss": 0.8248, "step": 33091 }, { "epoch": 0.8497097674448462, "grad_norm": 0.90234375, "learning_rate": 0.00010326944498941957, "loss": 0.8154, "step": 33092 }, { "epoch": 0.849735444640768, "grad_norm": 0.7890625, "learning_rate": 0.00010326498317106657, "loss": 0.7624, "step": 33093 }, { "epoch": 0.8497611218366898, "grad_norm": 0.75390625, "learning_rate": 0.00010326052134620672, "loss": 0.7714, "step": 33094 }, { "epoch": 0.8497867990326117, "grad_norm": 0.76953125, "learning_rate": 0.000103256059514849, "loss": 0.8754, "step": 33095 }, { "epoch": 0.8498124762285335, "grad_norm": 0.94140625, "learning_rate": 0.00010325159767700219, "loss": 1.058, "step": 33096 }, { "epoch": 0.8498381534244552, "grad_norm": 0.75390625, "learning_rate": 0.00010324713583267526, "loss": 0.7572, "step": 33097 }, { "epoch": 0.8498638306203771, "grad_norm": 0.7421875, "learning_rate": 0.0001032426739818771, "loss": 0.7507, "step": 33098 }, { "epoch": 0.8498895078162989, "grad_norm": 0.74609375, "learning_rate": 0.00010323821212461655, "loss": 0.7001, "step": 33099 }, { "epoch": 0.8499151850122207, "grad_norm": 0.67578125, "learning_rate": 0.00010323375026090256, "loss": 0.7881, "step": 33100 }, { "epoch": 0.8499408622081426, "grad_norm": 0.78125, "learning_rate": 0.000103229288390744, "loss": 0.7931, "step": 33101 }, { "epoch": 0.8499665394040644, "grad_norm": 0.8515625, "learning_rate": 0.00010322482651414975, "loss": 0.7996, "step": 33102 }, { "epoch": 0.8499922165999863, "grad_norm": 0.8203125, "learning_rate": 0.00010322036463112872, "loss": 0.7683, "step": 33103 }, { "epoch": 0.850017893795908, "grad_norm": 0.76171875, "learning_rate": 0.00010321590274168976, "loss": 0.8877, "step": 33104 }, { "epoch": 0.8500435709918298, "grad_norm": 0.74609375, "learning_rate": 0.00010321144084584182, "loss": 0.675, "step": 33105 }, { "epoch": 0.8500692481877516, "grad_norm": 0.76171875, "learning_rate": 0.00010320697894359379, "loss": 0.749, "step": 33106 }, { "epoch": 0.8500949253836735, "grad_norm": 0.765625, "learning_rate": 0.00010320251703495449, "loss": 0.8524, "step": 33107 }, { "epoch": 0.8501206025795953, "grad_norm": 0.79296875, "learning_rate": 0.00010319805511993292, "loss": 0.7159, "step": 33108 }, { "epoch": 0.8501462797755172, "grad_norm": 0.91015625, "learning_rate": 0.00010319359319853787, "loss": 0.9811, "step": 33109 }, { "epoch": 0.8501719569714389, "grad_norm": 0.8125, "learning_rate": 0.00010318913127077832, "loss": 0.8444, "step": 33110 }, { "epoch": 0.8501976341673607, "grad_norm": 0.7890625, "learning_rate": 0.00010318466933666311, "loss": 0.8022, "step": 33111 }, { "epoch": 0.8502233113632826, "grad_norm": 0.79296875, "learning_rate": 0.0001031802073962011, "loss": 0.7618, "step": 33112 }, { "epoch": 0.8502489885592044, "grad_norm": 0.7265625, "learning_rate": 0.00010317574544940125, "loss": 0.7145, "step": 33113 }, { "epoch": 0.8502746657551262, "grad_norm": 0.79296875, "learning_rate": 0.00010317128349627243, "loss": 0.8847, "step": 33114 }, { "epoch": 0.8503003429510481, "grad_norm": 0.8046875, "learning_rate": 0.00010316682153682354, "loss": 0.8411, "step": 33115 }, { "epoch": 0.8503260201469699, "grad_norm": 0.88671875, "learning_rate": 0.00010316235957106346, "loss": 0.7983, "step": 33116 }, { "epoch": 0.8503516973428916, "grad_norm": 0.73046875, "learning_rate": 0.00010315789759900107, "loss": 0.8482, "step": 33117 }, { "epoch": 0.8503773745388135, "grad_norm": 0.765625, "learning_rate": 0.00010315343562064526, "loss": 0.7908, "step": 33118 }, { "epoch": 0.8504030517347353, "grad_norm": 0.8125, "learning_rate": 0.00010314897363600496, "loss": 0.7589, "step": 33119 }, { "epoch": 0.8504287289306571, "grad_norm": 0.9296875, "learning_rate": 0.00010314451164508901, "loss": 0.8358, "step": 33120 }, { "epoch": 0.850454406126579, "grad_norm": 0.8203125, "learning_rate": 0.00010314004964790638, "loss": 0.809, "step": 33121 }, { "epoch": 0.8504800833225008, "grad_norm": 0.7109375, "learning_rate": 0.00010313558764446589, "loss": 0.7881, "step": 33122 }, { "epoch": 0.8505057605184226, "grad_norm": 0.83203125, "learning_rate": 0.00010313112563477645, "loss": 0.861, "step": 33123 }, { "epoch": 0.8505314377143444, "grad_norm": 0.66015625, "learning_rate": 0.00010312666361884697, "loss": 0.623, "step": 33124 }, { "epoch": 0.8505571149102662, "grad_norm": 0.79296875, "learning_rate": 0.00010312220159668633, "loss": 1.0032, "step": 33125 }, { "epoch": 0.850582792106188, "grad_norm": 0.8125, "learning_rate": 0.00010311773956830342, "loss": 0.8185, "step": 33126 }, { "epoch": 0.8506084693021099, "grad_norm": 0.78515625, "learning_rate": 0.00010311327753370714, "loss": 0.7767, "step": 33127 }, { "epoch": 0.8506341464980317, "grad_norm": 0.76953125, "learning_rate": 0.00010310881549290639, "loss": 0.8367, "step": 33128 }, { "epoch": 0.8506598236939535, "grad_norm": 0.76171875, "learning_rate": 0.00010310435344591003, "loss": 0.88, "step": 33129 }, { "epoch": 0.8506855008898753, "grad_norm": 0.74609375, "learning_rate": 0.000103099891392727, "loss": 0.8355, "step": 33130 }, { "epoch": 0.8507111780857971, "grad_norm": 0.890625, "learning_rate": 0.00010309542933336613, "loss": 0.7916, "step": 33131 }, { "epoch": 0.8507368552817189, "grad_norm": 0.78125, "learning_rate": 0.00010309096726783637, "loss": 0.812, "step": 33132 }, { "epoch": 0.8507625324776408, "grad_norm": 0.875, "learning_rate": 0.0001030865051961466, "loss": 0.7649, "step": 33133 }, { "epoch": 0.8507882096735626, "grad_norm": 0.8125, "learning_rate": 0.00010308204311830569, "loss": 0.7816, "step": 33134 }, { "epoch": 0.8508138868694844, "grad_norm": 0.8203125, "learning_rate": 0.00010307758103432257, "loss": 0.9167, "step": 33135 }, { "epoch": 0.8508395640654063, "grad_norm": 0.7109375, "learning_rate": 0.00010307311894420608, "loss": 0.8023, "step": 33136 }, { "epoch": 0.850865241261328, "grad_norm": 0.73828125, "learning_rate": 0.00010306865684796514, "loss": 0.7993, "step": 33137 }, { "epoch": 0.8508909184572498, "grad_norm": 0.73828125, "learning_rate": 0.00010306419474560867, "loss": 0.7784, "step": 33138 }, { "epoch": 0.8509165956531717, "grad_norm": 0.7890625, "learning_rate": 0.00010305973263714551, "loss": 0.7262, "step": 33139 }, { "epoch": 0.8509422728490935, "grad_norm": 0.76171875, "learning_rate": 0.0001030552705225846, "loss": 0.8375, "step": 33140 }, { "epoch": 0.8509679500450154, "grad_norm": 0.78515625, "learning_rate": 0.0001030508084019348, "loss": 0.7864, "step": 33141 }, { "epoch": 0.8509936272409372, "grad_norm": 0.78515625, "learning_rate": 0.00010304634627520502, "loss": 0.7416, "step": 33142 }, { "epoch": 0.851019304436859, "grad_norm": 0.796875, "learning_rate": 0.00010304188414240416, "loss": 0.8757, "step": 33143 }, { "epoch": 0.8510449816327808, "grad_norm": 0.7578125, "learning_rate": 0.00010303742200354108, "loss": 0.778, "step": 33144 }, { "epoch": 0.8510706588287026, "grad_norm": 0.7578125, "learning_rate": 0.00010303295985862468, "loss": 0.8849, "step": 33145 }, { "epoch": 0.8510963360246244, "grad_norm": 0.83203125, "learning_rate": 0.00010302849770766389, "loss": 0.6633, "step": 33146 }, { "epoch": 0.8511220132205463, "grad_norm": 0.7421875, "learning_rate": 0.00010302403555066755, "loss": 0.759, "step": 33147 }, { "epoch": 0.8511476904164681, "grad_norm": 0.75, "learning_rate": 0.00010301957338764463, "loss": 0.7047, "step": 33148 }, { "epoch": 0.8511733676123899, "grad_norm": 0.78515625, "learning_rate": 0.00010301511121860392, "loss": 0.7875, "step": 33149 }, { "epoch": 0.8511990448083117, "grad_norm": 0.83203125, "learning_rate": 0.0001030106490435544, "loss": 0.8629, "step": 33150 }, { "epoch": 0.8512247220042335, "grad_norm": 0.80078125, "learning_rate": 0.00010300618686250492, "loss": 0.7561, "step": 33151 }, { "epoch": 0.8512503992001553, "grad_norm": 0.7890625, "learning_rate": 0.00010300172467546436, "loss": 0.8314, "step": 33152 }, { "epoch": 0.8512760763960772, "grad_norm": 0.73046875, "learning_rate": 0.00010299726248244166, "loss": 0.9179, "step": 33153 }, { "epoch": 0.851301753591999, "grad_norm": 0.92578125, "learning_rate": 0.0001029928002834457, "loss": 0.904, "step": 33154 }, { "epoch": 0.8513274307879208, "grad_norm": 0.76171875, "learning_rate": 0.00010298833807848533, "loss": 0.8245, "step": 33155 }, { "epoch": 0.8513531079838427, "grad_norm": 0.828125, "learning_rate": 0.00010298387586756948, "loss": 0.7226, "step": 33156 }, { "epoch": 0.8513787851797644, "grad_norm": 0.80859375, "learning_rate": 0.00010297941365070705, "loss": 0.8257, "step": 33157 }, { "epoch": 0.8514044623756862, "grad_norm": 0.8359375, "learning_rate": 0.0001029749514279069, "loss": 0.9155, "step": 33158 }, { "epoch": 0.8514301395716081, "grad_norm": 0.8125, "learning_rate": 0.00010297048919917794, "loss": 0.7413, "step": 33159 }, { "epoch": 0.8514558167675299, "grad_norm": 0.78125, "learning_rate": 0.00010296602696452904, "loss": 0.7856, "step": 33160 }, { "epoch": 0.8514814939634517, "grad_norm": 0.8046875, "learning_rate": 0.00010296156472396919, "loss": 0.7884, "step": 33161 }, { "epoch": 0.8515071711593736, "grad_norm": 0.80078125, "learning_rate": 0.00010295710247750716, "loss": 0.7998, "step": 33162 }, { "epoch": 0.8515328483552954, "grad_norm": 0.796875, "learning_rate": 0.00010295264022515188, "loss": 0.7874, "step": 33163 }, { "epoch": 0.8515585255512171, "grad_norm": 0.703125, "learning_rate": 0.00010294817796691228, "loss": 0.8099, "step": 33164 }, { "epoch": 0.851584202747139, "grad_norm": 0.890625, "learning_rate": 0.00010294371570279722, "loss": 0.8533, "step": 33165 }, { "epoch": 0.8516098799430608, "grad_norm": 0.8046875, "learning_rate": 0.00010293925343281556, "loss": 0.8971, "step": 33166 }, { "epoch": 0.8516355571389826, "grad_norm": 0.83203125, "learning_rate": 0.0001029347911569763, "loss": 0.8114, "step": 33167 }, { "epoch": 0.8516612343349045, "grad_norm": 0.78125, "learning_rate": 0.00010293032887528821, "loss": 0.8551, "step": 33168 }, { "epoch": 0.8516869115308263, "grad_norm": 0.77734375, "learning_rate": 0.00010292586658776029, "loss": 0.8355, "step": 33169 }, { "epoch": 0.851712588726748, "grad_norm": 0.83203125, "learning_rate": 0.00010292140429440136, "loss": 0.7535, "step": 33170 }, { "epoch": 0.8517382659226699, "grad_norm": 0.84375, "learning_rate": 0.0001029169419952203, "loss": 0.7572, "step": 33171 }, { "epoch": 0.8517639431185917, "grad_norm": 0.83984375, "learning_rate": 0.00010291247969022608, "loss": 0.7942, "step": 33172 }, { "epoch": 0.8517896203145136, "grad_norm": 0.78125, "learning_rate": 0.00010290801737942757, "loss": 0.8275, "step": 33173 }, { "epoch": 0.8518152975104354, "grad_norm": 0.8046875, "learning_rate": 0.0001029035550628336, "loss": 0.7839, "step": 33174 }, { "epoch": 0.8518409747063572, "grad_norm": 0.74609375, "learning_rate": 0.00010289909274045313, "loss": 0.7346, "step": 33175 }, { "epoch": 0.8518666519022791, "grad_norm": 0.77734375, "learning_rate": 0.000102894630412295, "loss": 0.7564, "step": 33176 }, { "epoch": 0.8518923290982008, "grad_norm": 0.73828125, "learning_rate": 0.00010289016807836817, "loss": 0.7574, "step": 33177 }, { "epoch": 0.8519180062941226, "grad_norm": 0.83203125, "learning_rate": 0.0001028857057386815, "loss": 0.936, "step": 33178 }, { "epoch": 0.8519436834900445, "grad_norm": 0.84765625, "learning_rate": 0.00010288124339324383, "loss": 0.9364, "step": 33179 }, { "epoch": 0.8519693606859663, "grad_norm": 0.7578125, "learning_rate": 0.00010287678104206415, "loss": 0.9654, "step": 33180 }, { "epoch": 0.8519950378818881, "grad_norm": 0.80078125, "learning_rate": 0.00010287231868515131, "loss": 0.8099, "step": 33181 }, { "epoch": 0.85202071507781, "grad_norm": 0.765625, "learning_rate": 0.00010286785632251416, "loss": 0.7619, "step": 33182 }, { "epoch": 0.8520463922737318, "grad_norm": 0.82421875, "learning_rate": 0.00010286339395416167, "loss": 0.8491, "step": 33183 }, { "epoch": 0.8520720694696535, "grad_norm": 0.77734375, "learning_rate": 0.00010285893158010265, "loss": 0.8883, "step": 33184 }, { "epoch": 0.8520977466655754, "grad_norm": 0.953125, "learning_rate": 0.00010285446920034607, "loss": 0.9075, "step": 33185 }, { "epoch": 0.8521234238614972, "grad_norm": 0.74609375, "learning_rate": 0.00010285000681490078, "loss": 0.9204, "step": 33186 }, { "epoch": 0.852149101057419, "grad_norm": 0.796875, "learning_rate": 0.0001028455444237757, "loss": 0.9936, "step": 33187 }, { "epoch": 0.8521747782533409, "grad_norm": 0.80078125, "learning_rate": 0.00010284108202697969, "loss": 0.6776, "step": 33188 }, { "epoch": 0.8522004554492627, "grad_norm": 0.83203125, "learning_rate": 0.00010283661962452164, "loss": 0.9133, "step": 33189 }, { "epoch": 0.8522261326451844, "grad_norm": 0.78515625, "learning_rate": 0.00010283215721641048, "loss": 0.8325, "step": 33190 }, { "epoch": 0.8522518098411063, "grad_norm": 0.8984375, "learning_rate": 0.0001028276948026551, "loss": 0.9086, "step": 33191 }, { "epoch": 0.8522774870370281, "grad_norm": 0.8046875, "learning_rate": 0.00010282323238326437, "loss": 0.7638, "step": 33192 }, { "epoch": 0.8523031642329499, "grad_norm": 0.8203125, "learning_rate": 0.0001028187699582472, "loss": 0.8366, "step": 33193 }, { "epoch": 0.8523288414288718, "grad_norm": 0.8046875, "learning_rate": 0.0001028143075276125, "loss": 0.8409, "step": 33194 }, { "epoch": 0.8523545186247936, "grad_norm": 0.7734375, "learning_rate": 0.00010280984509136909, "loss": 0.7265, "step": 33195 }, { "epoch": 0.8523801958207154, "grad_norm": 0.8203125, "learning_rate": 0.00010280538264952593, "loss": 0.817, "step": 33196 }, { "epoch": 0.8524058730166372, "grad_norm": 0.83984375, "learning_rate": 0.00010280092020209188, "loss": 0.7327, "step": 33197 }, { "epoch": 0.852431550212559, "grad_norm": 0.7421875, "learning_rate": 0.00010279645774907589, "loss": 0.7675, "step": 33198 }, { "epoch": 0.8524572274084808, "grad_norm": 0.8515625, "learning_rate": 0.0001027919952904868, "loss": 0.8418, "step": 33199 }, { "epoch": 0.8524829046044027, "grad_norm": 0.796875, "learning_rate": 0.00010278753282633351, "loss": 0.941, "step": 33200 }, { "epoch": 0.8525085818003245, "grad_norm": 0.7734375, "learning_rate": 0.00010278307035662491, "loss": 0.7907, "step": 33201 }, { "epoch": 0.8525342589962464, "grad_norm": 0.80859375, "learning_rate": 0.0001027786078813699, "loss": 0.7795, "step": 33202 }, { "epoch": 0.8525599361921682, "grad_norm": 0.7421875, "learning_rate": 0.00010277414540057738, "loss": 0.7361, "step": 33203 }, { "epoch": 0.8525856133880899, "grad_norm": 0.8359375, "learning_rate": 0.00010276968291425624, "loss": 0.7697, "step": 33204 }, { "epoch": 0.8526112905840117, "grad_norm": 0.6875, "learning_rate": 0.00010276522042241538, "loss": 0.6244, "step": 33205 }, { "epoch": 0.8526369677799336, "grad_norm": 0.765625, "learning_rate": 0.00010276075792506365, "loss": 0.7625, "step": 33206 }, { "epoch": 0.8526626449758554, "grad_norm": 0.7265625, "learning_rate": 0.00010275629542221005, "loss": 0.6669, "step": 33207 }, { "epoch": 0.8526883221717773, "grad_norm": 0.81640625, "learning_rate": 0.00010275183291386335, "loss": 0.8562, "step": 33208 }, { "epoch": 0.8527139993676991, "grad_norm": 0.953125, "learning_rate": 0.0001027473704000325, "loss": 1.011, "step": 33209 }, { "epoch": 0.8527396765636208, "grad_norm": 0.75, "learning_rate": 0.00010274290788072641, "loss": 0.8583, "step": 33210 }, { "epoch": 0.8527653537595427, "grad_norm": 0.828125, "learning_rate": 0.00010273844535595392, "loss": 0.9363, "step": 33211 }, { "epoch": 0.8527910309554645, "grad_norm": 0.85546875, "learning_rate": 0.00010273398282572397, "loss": 0.8763, "step": 33212 }, { "epoch": 0.8528167081513863, "grad_norm": 0.6796875, "learning_rate": 0.00010272952029004546, "loss": 0.7557, "step": 33213 }, { "epoch": 0.8528423853473082, "grad_norm": 0.69921875, "learning_rate": 0.00010272505774892724, "loss": 0.6707, "step": 33214 }, { "epoch": 0.85286806254323, "grad_norm": 0.83984375, "learning_rate": 0.00010272059520237822, "loss": 0.7952, "step": 33215 }, { "epoch": 0.8528937397391518, "grad_norm": 0.81640625, "learning_rate": 0.0001027161326504073, "loss": 0.8933, "step": 33216 }, { "epoch": 0.8529194169350736, "grad_norm": 0.71875, "learning_rate": 0.00010271167009302339, "loss": 0.8016, "step": 33217 }, { "epoch": 0.8529450941309954, "grad_norm": 0.765625, "learning_rate": 0.00010270720753023538, "loss": 0.7619, "step": 33218 }, { "epoch": 0.8529707713269172, "grad_norm": 0.84375, "learning_rate": 0.0001027027449620521, "loss": 0.9145, "step": 33219 }, { "epoch": 0.8529964485228391, "grad_norm": 0.8359375, "learning_rate": 0.00010269828238848256, "loss": 0.7323, "step": 33220 }, { "epoch": 0.8530221257187609, "grad_norm": 0.75, "learning_rate": 0.00010269381980953552, "loss": 0.8664, "step": 33221 }, { "epoch": 0.8530478029146827, "grad_norm": 0.828125, "learning_rate": 0.00010268935722521998, "loss": 0.8492, "step": 33222 }, { "epoch": 0.8530734801106046, "grad_norm": 0.75, "learning_rate": 0.00010268489463554479, "loss": 0.8311, "step": 33223 }, { "epoch": 0.8530991573065263, "grad_norm": 0.8671875, "learning_rate": 0.00010268043204051883, "loss": 0.961, "step": 33224 }, { "epoch": 0.8531248345024481, "grad_norm": 0.76171875, "learning_rate": 0.00010267596944015101, "loss": 0.757, "step": 33225 }, { "epoch": 0.85315051169837, "grad_norm": 0.69921875, "learning_rate": 0.00010267150683445026, "loss": 0.6751, "step": 33226 }, { "epoch": 0.8531761888942918, "grad_norm": 0.73828125, "learning_rate": 0.00010266704422342542, "loss": 0.7676, "step": 33227 }, { "epoch": 0.8532018660902136, "grad_norm": 0.7421875, "learning_rate": 0.0001026625816070854, "loss": 0.8503, "step": 33228 }, { "epoch": 0.8532275432861355, "grad_norm": 0.7890625, "learning_rate": 0.00010265811898543906, "loss": 0.8462, "step": 33229 }, { "epoch": 0.8532532204820572, "grad_norm": 0.76953125, "learning_rate": 0.00010265365635849538, "loss": 0.6942, "step": 33230 }, { "epoch": 0.853278897677979, "grad_norm": 0.79296875, "learning_rate": 0.00010264919372626318, "loss": 0.7146, "step": 33231 }, { "epoch": 0.8533045748739009, "grad_norm": 0.796875, "learning_rate": 0.00010264473108875136, "loss": 0.7493, "step": 33232 }, { "epoch": 0.8533302520698227, "grad_norm": 0.79296875, "learning_rate": 0.00010264026844596888, "loss": 0.8592, "step": 33233 }, { "epoch": 0.8533559292657446, "grad_norm": 0.828125, "learning_rate": 0.00010263580579792453, "loss": 0.7381, "step": 33234 }, { "epoch": 0.8533816064616664, "grad_norm": 0.75390625, "learning_rate": 0.0001026313431446273, "loss": 0.8666, "step": 33235 }, { "epoch": 0.8534072836575882, "grad_norm": 0.8203125, "learning_rate": 0.00010262688048608602, "loss": 0.8579, "step": 33236 }, { "epoch": 0.85343296085351, "grad_norm": 0.86328125, "learning_rate": 0.0001026224178223096, "loss": 0.879, "step": 33237 }, { "epoch": 0.8534586380494318, "grad_norm": 0.78125, "learning_rate": 0.00010261795515330695, "loss": 0.7962, "step": 33238 }, { "epoch": 0.8534843152453536, "grad_norm": 0.79296875, "learning_rate": 0.00010261349247908693, "loss": 0.7941, "step": 33239 }, { "epoch": 0.8535099924412755, "grad_norm": 0.8359375, "learning_rate": 0.0001026090297996585, "loss": 0.7468, "step": 33240 }, { "epoch": 0.8535356696371973, "grad_norm": 0.81640625, "learning_rate": 0.00010260456711503048, "loss": 0.8157, "step": 33241 }, { "epoch": 0.8535613468331191, "grad_norm": 0.89453125, "learning_rate": 0.00010260010442521179, "loss": 0.7725, "step": 33242 }, { "epoch": 0.853587024029041, "grad_norm": 0.73828125, "learning_rate": 0.00010259564173021132, "loss": 0.8257, "step": 33243 }, { "epoch": 0.8536127012249627, "grad_norm": 0.83203125, "learning_rate": 0.00010259117903003798, "loss": 0.7907, "step": 33244 }, { "epoch": 0.8536383784208845, "grad_norm": 0.79296875, "learning_rate": 0.00010258671632470064, "loss": 0.7606, "step": 33245 }, { "epoch": 0.8536640556168064, "grad_norm": 0.8046875, "learning_rate": 0.00010258225361420826, "loss": 0.8204, "step": 33246 }, { "epoch": 0.8536897328127282, "grad_norm": 0.75, "learning_rate": 0.00010257779089856965, "loss": 0.7245, "step": 33247 }, { "epoch": 0.85371541000865, "grad_norm": 0.984375, "learning_rate": 0.0001025733281777937, "loss": 0.9581, "step": 33248 }, { "epoch": 0.8537410872045719, "grad_norm": 0.796875, "learning_rate": 0.00010256886545188938, "loss": 0.7701, "step": 33249 }, { "epoch": 0.8537667644004936, "grad_norm": 0.8359375, "learning_rate": 0.00010256440272086554, "loss": 0.905, "step": 33250 }, { "epoch": 0.8537924415964154, "grad_norm": 0.81640625, "learning_rate": 0.00010255993998473106, "loss": 0.743, "step": 33251 }, { "epoch": 0.8538181187923373, "grad_norm": 0.81640625, "learning_rate": 0.00010255547724349486, "loss": 0.923, "step": 33252 }, { "epoch": 0.8538437959882591, "grad_norm": 0.86328125, "learning_rate": 0.00010255101449716584, "loss": 0.8737, "step": 33253 }, { "epoch": 0.8538694731841809, "grad_norm": 0.80078125, "learning_rate": 0.00010254655174575288, "loss": 0.7466, "step": 33254 }, { "epoch": 0.8538951503801028, "grad_norm": 0.7578125, "learning_rate": 0.00010254208898926485, "loss": 0.7388, "step": 33255 }, { "epoch": 0.8539208275760246, "grad_norm": 0.7734375, "learning_rate": 0.00010253762622771066, "loss": 0.7165, "step": 33256 }, { "epoch": 0.8539465047719463, "grad_norm": 0.85546875, "learning_rate": 0.00010253316346109922, "loss": 0.86, "step": 33257 }, { "epoch": 0.8539721819678682, "grad_norm": 0.7734375, "learning_rate": 0.00010252870068943943, "loss": 0.9042, "step": 33258 }, { "epoch": 0.85399785916379, "grad_norm": 0.7265625, "learning_rate": 0.00010252423791274015, "loss": 0.8389, "step": 33259 }, { "epoch": 0.8540235363597118, "grad_norm": 0.796875, "learning_rate": 0.0001025197751310103, "loss": 0.7292, "step": 33260 }, { "epoch": 0.8540492135556337, "grad_norm": 0.8359375, "learning_rate": 0.00010251531234425875, "loss": 0.8027, "step": 33261 }, { "epoch": 0.8540748907515555, "grad_norm": 0.78125, "learning_rate": 0.00010251084955249444, "loss": 0.8076, "step": 33262 }, { "epoch": 0.8541005679474772, "grad_norm": 0.80078125, "learning_rate": 0.00010250638675572621, "loss": 0.8029, "step": 33263 }, { "epoch": 0.8541262451433991, "grad_norm": 0.8359375, "learning_rate": 0.00010250192395396297, "loss": 0.9085, "step": 33264 }, { "epoch": 0.8541519223393209, "grad_norm": 0.8046875, "learning_rate": 0.00010249746114721364, "loss": 0.773, "step": 33265 }, { "epoch": 0.8541775995352427, "grad_norm": 0.7734375, "learning_rate": 0.0001024929983354871, "loss": 0.8341, "step": 33266 }, { "epoch": 0.8542032767311646, "grad_norm": 0.75390625, "learning_rate": 0.00010248853551879223, "loss": 0.7629, "step": 33267 }, { "epoch": 0.8542289539270864, "grad_norm": 0.8046875, "learning_rate": 0.00010248407269713793, "loss": 0.8893, "step": 33268 }, { "epoch": 0.8542546311230083, "grad_norm": 0.74609375, "learning_rate": 0.00010247960987053308, "loss": 0.8164, "step": 33269 }, { "epoch": 0.85428030831893, "grad_norm": 0.7578125, "learning_rate": 0.00010247514703898664, "loss": 0.7772, "step": 33270 }, { "epoch": 0.8543059855148518, "grad_norm": 0.859375, "learning_rate": 0.00010247068420250742, "loss": 0.8473, "step": 33271 }, { "epoch": 0.8543316627107737, "grad_norm": 0.78125, "learning_rate": 0.00010246622136110435, "loss": 0.8628, "step": 33272 }, { "epoch": 0.8543573399066955, "grad_norm": 0.90234375, "learning_rate": 0.00010246175851478636, "loss": 0.7941, "step": 33273 }, { "epoch": 0.8543830171026173, "grad_norm": 0.80859375, "learning_rate": 0.00010245729566356226, "loss": 0.8018, "step": 33274 }, { "epoch": 0.8544086942985392, "grad_norm": 0.6953125, "learning_rate": 0.00010245283280744102, "loss": 0.6974, "step": 33275 }, { "epoch": 0.854434371494461, "grad_norm": 0.77734375, "learning_rate": 0.0001024483699464315, "loss": 0.7753, "step": 33276 }, { "epoch": 0.8544600486903827, "grad_norm": 0.81640625, "learning_rate": 0.00010244390708054256, "loss": 0.764, "step": 33277 }, { "epoch": 0.8544857258863046, "grad_norm": 0.86328125, "learning_rate": 0.0001024394442097832, "loss": 0.9462, "step": 33278 }, { "epoch": 0.8545114030822264, "grad_norm": 0.7421875, "learning_rate": 0.00010243498133416222, "loss": 0.7589, "step": 33279 }, { "epoch": 0.8545370802781482, "grad_norm": 0.890625, "learning_rate": 0.00010243051845368854, "loss": 0.8219, "step": 33280 }, { "epoch": 0.8545627574740701, "grad_norm": 0.77734375, "learning_rate": 0.00010242605556837106, "loss": 0.7284, "step": 33281 }, { "epoch": 0.8545884346699919, "grad_norm": 0.7734375, "learning_rate": 0.00010242159267821866, "loss": 0.7232, "step": 33282 }, { "epoch": 0.8546141118659136, "grad_norm": 0.76953125, "learning_rate": 0.00010241712978324024, "loss": 0.8775, "step": 33283 }, { "epoch": 0.8546397890618355, "grad_norm": 0.79296875, "learning_rate": 0.00010241266688344473, "loss": 0.8073, "step": 33284 }, { "epoch": 0.8546654662577573, "grad_norm": 0.76953125, "learning_rate": 0.00010240820397884094, "loss": 0.8352, "step": 33285 }, { "epoch": 0.8546911434536791, "grad_norm": 0.84375, "learning_rate": 0.00010240374106943789, "loss": 0.7619, "step": 33286 }, { "epoch": 0.854716820649601, "grad_norm": 0.8046875, "learning_rate": 0.00010239927815524437, "loss": 0.7549, "step": 33287 }, { "epoch": 0.8547424978455228, "grad_norm": 0.76171875, "learning_rate": 0.00010239481523626928, "loss": 0.8991, "step": 33288 }, { "epoch": 0.8547681750414446, "grad_norm": 0.78125, "learning_rate": 0.00010239035231252156, "loss": 0.8373, "step": 33289 }, { "epoch": 0.8547938522373664, "grad_norm": 0.78125, "learning_rate": 0.00010238588938401009, "loss": 0.863, "step": 33290 }, { "epoch": 0.8548195294332882, "grad_norm": 0.8046875, "learning_rate": 0.00010238142645074373, "loss": 0.896, "step": 33291 }, { "epoch": 0.85484520662921, "grad_norm": 0.81640625, "learning_rate": 0.00010237696351273144, "loss": 0.7339, "step": 33292 }, { "epoch": 0.8548708838251319, "grad_norm": 0.8359375, "learning_rate": 0.00010237250056998205, "loss": 0.9171, "step": 33293 }, { "epoch": 0.8548965610210537, "grad_norm": 0.74609375, "learning_rate": 0.00010236803762250448, "loss": 0.7235, "step": 33294 }, { "epoch": 0.8549222382169755, "grad_norm": 0.78125, "learning_rate": 0.00010236357467030763, "loss": 0.8733, "step": 33295 }, { "epoch": 0.8549479154128974, "grad_norm": 0.8671875, "learning_rate": 0.00010235911171340039, "loss": 0.9368, "step": 33296 }, { "epoch": 0.8549735926088191, "grad_norm": 0.91796875, "learning_rate": 0.00010235464875179166, "loss": 0.9699, "step": 33297 }, { "epoch": 0.854999269804741, "grad_norm": 0.67578125, "learning_rate": 0.00010235018578549034, "loss": 0.8075, "step": 33298 }, { "epoch": 0.8550249470006628, "grad_norm": 0.92578125, "learning_rate": 0.0001023457228145053, "loss": 0.7665, "step": 33299 }, { "epoch": 0.8550506241965846, "grad_norm": 0.88671875, "learning_rate": 0.00010234125983884544, "loss": 0.732, "step": 33300 }, { "epoch": 0.8550763013925065, "grad_norm": 0.7578125, "learning_rate": 0.00010233679685851964, "loss": 0.9053, "step": 33301 }, { "epoch": 0.8551019785884283, "grad_norm": 0.71484375, "learning_rate": 0.00010233233387353685, "loss": 0.787, "step": 33302 }, { "epoch": 0.85512765578435, "grad_norm": 0.82421875, "learning_rate": 0.00010232787088390592, "loss": 0.805, "step": 33303 }, { "epoch": 0.8551533329802719, "grad_norm": 0.78125, "learning_rate": 0.00010232340788963573, "loss": 0.7531, "step": 33304 }, { "epoch": 0.8551790101761937, "grad_norm": 0.78515625, "learning_rate": 0.00010231894489073524, "loss": 0.8298, "step": 33305 }, { "epoch": 0.8552046873721155, "grad_norm": 0.74609375, "learning_rate": 0.00010231448188721326, "loss": 0.8581, "step": 33306 }, { "epoch": 0.8552303645680374, "grad_norm": 0.83203125, "learning_rate": 0.00010231001887907876, "loss": 0.8328, "step": 33307 }, { "epoch": 0.8552560417639592, "grad_norm": 0.8203125, "learning_rate": 0.00010230555586634059, "loss": 0.7119, "step": 33308 }, { "epoch": 0.855281718959881, "grad_norm": 0.7734375, "learning_rate": 0.00010230109284900764, "loss": 0.7702, "step": 33309 }, { "epoch": 0.8553073961558028, "grad_norm": 0.7890625, "learning_rate": 0.00010229662982708884, "loss": 0.7712, "step": 33310 }, { "epoch": 0.8553330733517246, "grad_norm": 0.81640625, "learning_rate": 0.00010229216680059306, "loss": 0.78, "step": 33311 }, { "epoch": 0.8553587505476464, "grad_norm": 0.78515625, "learning_rate": 0.00010228770376952919, "loss": 0.7869, "step": 33312 }, { "epoch": 0.8553844277435683, "grad_norm": 0.79296875, "learning_rate": 0.00010228324073390615, "loss": 0.8289, "step": 33313 }, { "epoch": 0.8554101049394901, "grad_norm": 0.7265625, "learning_rate": 0.0001022787776937328, "loss": 0.8546, "step": 33314 }, { "epoch": 0.8554357821354119, "grad_norm": 0.98046875, "learning_rate": 0.00010227431464901804, "loss": 0.8608, "step": 33315 }, { "epoch": 0.8554614593313338, "grad_norm": 0.76171875, "learning_rate": 0.00010226985159977081, "loss": 0.7682, "step": 33316 }, { "epoch": 0.8554871365272555, "grad_norm": 0.7421875, "learning_rate": 0.00010226538854599994, "loss": 0.8769, "step": 33317 }, { "epoch": 0.8555128137231773, "grad_norm": 0.81640625, "learning_rate": 0.00010226092548771438, "loss": 0.8165, "step": 33318 }, { "epoch": 0.8555384909190992, "grad_norm": 0.8203125, "learning_rate": 0.00010225646242492301, "loss": 0.8137, "step": 33319 }, { "epoch": 0.855564168115021, "grad_norm": 0.9296875, "learning_rate": 0.00010225199935763467, "loss": 0.7524, "step": 33320 }, { "epoch": 0.8555898453109428, "grad_norm": 0.7734375, "learning_rate": 0.00010224753628585834, "loss": 0.8147, "step": 33321 }, { "epoch": 0.8556155225068647, "grad_norm": 0.75, "learning_rate": 0.00010224307320960285, "loss": 0.8382, "step": 33322 }, { "epoch": 0.8556411997027864, "grad_norm": 0.78125, "learning_rate": 0.00010223861012887713, "loss": 0.7145, "step": 33323 }, { "epoch": 0.8556668768987082, "grad_norm": 0.8125, "learning_rate": 0.00010223414704369006, "loss": 0.7953, "step": 33324 }, { "epoch": 0.8556925540946301, "grad_norm": 0.7890625, "learning_rate": 0.00010222968395405053, "loss": 0.9286, "step": 33325 }, { "epoch": 0.8557182312905519, "grad_norm": 1.0, "learning_rate": 0.00010222522085996745, "loss": 0.8497, "step": 33326 }, { "epoch": 0.8557439084864737, "grad_norm": 0.8125, "learning_rate": 0.0001022207577614497, "loss": 0.8466, "step": 33327 }, { "epoch": 0.8557695856823956, "grad_norm": 0.8828125, "learning_rate": 0.00010221629465850617, "loss": 0.8745, "step": 33328 }, { "epoch": 0.8557952628783174, "grad_norm": 0.81640625, "learning_rate": 0.00010221183155114577, "loss": 0.8803, "step": 33329 }, { "epoch": 0.8558209400742391, "grad_norm": 0.796875, "learning_rate": 0.00010220736843937742, "loss": 0.853, "step": 33330 }, { "epoch": 0.855846617270161, "grad_norm": 0.8046875, "learning_rate": 0.00010220290532320993, "loss": 0.9978, "step": 33331 }, { "epoch": 0.8558722944660828, "grad_norm": 0.8515625, "learning_rate": 0.00010219844220265232, "loss": 0.8735, "step": 33332 }, { "epoch": 0.8558979716620047, "grad_norm": 0.78515625, "learning_rate": 0.00010219397907771333, "loss": 0.8043, "step": 33333 }, { "epoch": 0.8559236488579265, "grad_norm": 0.7734375, "learning_rate": 0.00010218951594840198, "loss": 0.7616, "step": 33334 }, { "epoch": 0.8559493260538483, "grad_norm": 0.7734375, "learning_rate": 0.00010218505281472715, "loss": 0.8842, "step": 33335 }, { "epoch": 0.8559750032497702, "grad_norm": 0.7421875, "learning_rate": 0.00010218058967669766, "loss": 0.7137, "step": 33336 }, { "epoch": 0.8560006804456919, "grad_norm": 0.83203125, "learning_rate": 0.00010217612653432247, "loss": 0.8016, "step": 33337 }, { "epoch": 0.8560263576416137, "grad_norm": 0.7734375, "learning_rate": 0.00010217166338761046, "loss": 0.8039, "step": 33338 }, { "epoch": 0.8560520348375356, "grad_norm": 0.84765625, "learning_rate": 0.00010216720023657052, "loss": 0.9226, "step": 33339 }, { "epoch": 0.8560777120334574, "grad_norm": 0.734375, "learning_rate": 0.00010216273708121155, "loss": 0.8708, "step": 33340 }, { "epoch": 0.8561033892293792, "grad_norm": 0.78515625, "learning_rate": 0.00010215827392154242, "loss": 0.8174, "step": 33341 }, { "epoch": 0.8561290664253011, "grad_norm": 0.77734375, "learning_rate": 0.00010215381075757206, "loss": 0.8499, "step": 33342 }, { "epoch": 0.8561547436212228, "grad_norm": 0.734375, "learning_rate": 0.00010214934758930936, "loss": 0.7569, "step": 33343 }, { "epoch": 0.8561804208171446, "grad_norm": 0.76171875, "learning_rate": 0.00010214488441676318, "loss": 0.8722, "step": 33344 }, { "epoch": 0.8562060980130665, "grad_norm": 0.79296875, "learning_rate": 0.00010214042123994247, "loss": 0.7297, "step": 33345 }, { "epoch": 0.8562317752089883, "grad_norm": 0.75390625, "learning_rate": 0.00010213595805885605, "loss": 0.732, "step": 33346 }, { "epoch": 0.8562574524049101, "grad_norm": 0.78125, "learning_rate": 0.0001021314948735129, "loss": 0.8547, "step": 33347 }, { "epoch": 0.856283129600832, "grad_norm": 0.79296875, "learning_rate": 0.00010212703168392186, "loss": 0.8521, "step": 33348 }, { "epoch": 0.8563088067967538, "grad_norm": 0.8125, "learning_rate": 0.0001021225684900918, "loss": 0.862, "step": 33349 }, { "epoch": 0.8563344839926755, "grad_norm": 0.73828125, "learning_rate": 0.00010211810529203171, "loss": 0.7371, "step": 33350 }, { "epoch": 0.8563601611885974, "grad_norm": 0.703125, "learning_rate": 0.0001021136420897504, "loss": 0.7247, "step": 33351 }, { "epoch": 0.8563858383845192, "grad_norm": 0.80078125, "learning_rate": 0.00010210917888325682, "loss": 0.8571, "step": 33352 }, { "epoch": 0.856411515580441, "grad_norm": 0.76953125, "learning_rate": 0.00010210471567255981, "loss": 0.9839, "step": 33353 }, { "epoch": 0.8564371927763629, "grad_norm": 0.73828125, "learning_rate": 0.00010210025245766828, "loss": 0.7827, "step": 33354 }, { "epoch": 0.8564628699722847, "grad_norm": 0.79296875, "learning_rate": 0.00010209578923859115, "loss": 0.7442, "step": 33355 }, { "epoch": 0.8564885471682065, "grad_norm": 0.78515625, "learning_rate": 0.00010209132601533733, "loss": 0.9006, "step": 33356 }, { "epoch": 0.8565142243641283, "grad_norm": 0.87109375, "learning_rate": 0.00010208686278791564, "loss": 0.8365, "step": 33357 }, { "epoch": 0.8565399015600501, "grad_norm": 0.78125, "learning_rate": 0.00010208239955633507, "loss": 0.774, "step": 33358 }, { "epoch": 0.856565578755972, "grad_norm": 0.82421875, "learning_rate": 0.00010207793632060443, "loss": 0.7261, "step": 33359 }, { "epoch": 0.8565912559518938, "grad_norm": 0.78125, "learning_rate": 0.00010207347308073265, "loss": 0.8634, "step": 33360 }, { "epoch": 0.8566169331478156, "grad_norm": 0.78515625, "learning_rate": 0.00010206900983672864, "loss": 0.8111, "step": 33361 }, { "epoch": 0.8566426103437375, "grad_norm": 0.80859375, "learning_rate": 0.00010206454658860125, "loss": 0.9059, "step": 33362 }, { "epoch": 0.8566682875396592, "grad_norm": 0.79296875, "learning_rate": 0.00010206008333635944, "loss": 0.8643, "step": 33363 }, { "epoch": 0.856693964735581, "grad_norm": 0.8125, "learning_rate": 0.00010205562008001206, "loss": 0.7817, "step": 33364 }, { "epoch": 0.8567196419315029, "grad_norm": 0.85546875, "learning_rate": 0.00010205115681956802, "loss": 0.7495, "step": 33365 }, { "epoch": 0.8567453191274247, "grad_norm": 0.765625, "learning_rate": 0.00010204669355503622, "loss": 0.7134, "step": 33366 }, { "epoch": 0.8567709963233465, "grad_norm": 0.76171875, "learning_rate": 0.00010204223028642552, "loss": 0.6861, "step": 33367 }, { "epoch": 0.8567966735192684, "grad_norm": 0.8359375, "learning_rate": 0.00010203776701374483, "loss": 0.8953, "step": 33368 }, { "epoch": 0.8568223507151902, "grad_norm": 0.82421875, "learning_rate": 0.00010203330373700308, "loss": 0.7287, "step": 33369 }, { "epoch": 0.8568480279111119, "grad_norm": 0.7890625, "learning_rate": 0.00010202884045620912, "loss": 0.8548, "step": 33370 }, { "epoch": 0.8568737051070338, "grad_norm": 0.85546875, "learning_rate": 0.0001020243771713719, "loss": 0.9223, "step": 33371 }, { "epoch": 0.8568993823029556, "grad_norm": 0.84765625, "learning_rate": 0.00010201991388250027, "loss": 0.8257, "step": 33372 }, { "epoch": 0.8569250594988774, "grad_norm": 0.70703125, "learning_rate": 0.0001020154505896031, "loss": 0.6907, "step": 33373 }, { "epoch": 0.8569507366947993, "grad_norm": 0.86328125, "learning_rate": 0.00010201098729268934, "loss": 0.8683, "step": 33374 }, { "epoch": 0.8569764138907211, "grad_norm": 0.8125, "learning_rate": 0.00010200652399176786, "loss": 0.6949, "step": 33375 }, { "epoch": 0.8570020910866429, "grad_norm": 0.7421875, "learning_rate": 0.00010200206068684755, "loss": 0.834, "step": 33376 }, { "epoch": 0.8570277682825647, "grad_norm": 0.765625, "learning_rate": 0.00010199759737793732, "loss": 0.7975, "step": 33377 }, { "epoch": 0.8570534454784865, "grad_norm": 0.89453125, "learning_rate": 0.00010199313406504608, "loss": 0.7775, "step": 33378 }, { "epoch": 0.8570791226744083, "grad_norm": 0.76171875, "learning_rate": 0.0001019886707481827, "loss": 0.8996, "step": 33379 }, { "epoch": 0.8571047998703302, "grad_norm": 0.80078125, "learning_rate": 0.00010198420742735606, "loss": 0.7748, "step": 33380 }, { "epoch": 0.857130477066252, "grad_norm": 0.84765625, "learning_rate": 0.00010197974410257507, "loss": 0.8875, "step": 33381 }, { "epoch": 0.8571561542621738, "grad_norm": 0.72265625, "learning_rate": 0.00010197528077384864, "loss": 0.7321, "step": 33382 }, { "epoch": 0.8571818314580956, "grad_norm": 0.7734375, "learning_rate": 0.00010197081744118566, "loss": 0.7985, "step": 33383 }, { "epoch": 0.8572075086540174, "grad_norm": 0.7734375, "learning_rate": 0.000101966354104595, "loss": 0.8422, "step": 33384 }, { "epoch": 0.8572331858499392, "grad_norm": 0.79296875, "learning_rate": 0.0001019618907640856, "loss": 0.6942, "step": 33385 }, { "epoch": 0.8572588630458611, "grad_norm": 0.8046875, "learning_rate": 0.0001019574274196663, "loss": 0.7512, "step": 33386 }, { "epoch": 0.8572845402417829, "grad_norm": 0.74609375, "learning_rate": 0.00010195296407134605, "loss": 0.6551, "step": 33387 }, { "epoch": 0.8573102174377047, "grad_norm": 0.78515625, "learning_rate": 0.00010194850071913371, "loss": 0.7554, "step": 33388 }, { "epoch": 0.8573358946336266, "grad_norm": 0.78125, "learning_rate": 0.00010194403736303817, "loss": 0.6943, "step": 33389 }, { "epoch": 0.8573615718295483, "grad_norm": 0.8984375, "learning_rate": 0.00010193957400306834, "loss": 0.7658, "step": 33390 }, { "epoch": 0.8573872490254701, "grad_norm": 0.76171875, "learning_rate": 0.00010193511063923313, "loss": 0.8425, "step": 33391 }, { "epoch": 0.857412926221392, "grad_norm": 0.82421875, "learning_rate": 0.00010193064727154143, "loss": 0.8249, "step": 33392 }, { "epoch": 0.8574386034173138, "grad_norm": 0.8359375, "learning_rate": 0.00010192618390000213, "loss": 0.6821, "step": 33393 }, { "epoch": 0.8574642806132357, "grad_norm": 0.81640625, "learning_rate": 0.00010192172052462407, "loss": 0.8193, "step": 33394 }, { "epoch": 0.8574899578091575, "grad_norm": 0.78125, "learning_rate": 0.00010191725714541623, "loss": 0.8258, "step": 33395 }, { "epoch": 0.8575156350050793, "grad_norm": 0.9765625, "learning_rate": 0.00010191279376238747, "loss": 0.8561, "step": 33396 }, { "epoch": 0.857541312201001, "grad_norm": 0.828125, "learning_rate": 0.00010190833037554665, "loss": 0.7547, "step": 33397 }, { "epoch": 0.8575669893969229, "grad_norm": 0.796875, "learning_rate": 0.00010190386698490275, "loss": 0.7642, "step": 33398 }, { "epoch": 0.8575926665928447, "grad_norm": 0.81640625, "learning_rate": 0.00010189940359046459, "loss": 0.8905, "step": 33399 }, { "epoch": 0.8576183437887666, "grad_norm": 0.73046875, "learning_rate": 0.00010189494019224109, "loss": 0.7932, "step": 33400 }, { "epoch": 0.8576440209846884, "grad_norm": 0.90234375, "learning_rate": 0.00010189047679024115, "loss": 0.8079, "step": 33401 }, { "epoch": 0.8576696981806102, "grad_norm": 0.7578125, "learning_rate": 0.00010188601338447366, "loss": 0.6998, "step": 33402 }, { "epoch": 0.857695375376532, "grad_norm": 0.76171875, "learning_rate": 0.0001018815499749475, "loss": 0.8152, "step": 33403 }, { "epoch": 0.8577210525724538, "grad_norm": 0.828125, "learning_rate": 0.00010187708656167164, "loss": 0.7674, "step": 33404 }, { "epoch": 0.8577467297683756, "grad_norm": 0.73828125, "learning_rate": 0.00010187262314465484, "loss": 0.7402, "step": 33405 }, { "epoch": 0.8577724069642975, "grad_norm": 0.91015625, "learning_rate": 0.0001018681597239061, "loss": 0.7695, "step": 33406 }, { "epoch": 0.8577980841602193, "grad_norm": 0.73828125, "learning_rate": 0.00010186369629943429, "loss": 0.8346, "step": 33407 }, { "epoch": 0.8578237613561411, "grad_norm": 0.8203125, "learning_rate": 0.0001018592328712483, "loss": 0.8637, "step": 33408 }, { "epoch": 0.857849438552063, "grad_norm": 0.84375, "learning_rate": 0.00010185476943935703, "loss": 0.8129, "step": 33409 }, { "epoch": 0.8578751157479847, "grad_norm": 0.8203125, "learning_rate": 0.00010185030600376934, "loss": 0.8006, "step": 33410 }, { "epoch": 0.8579007929439065, "grad_norm": 0.73828125, "learning_rate": 0.00010184584256449423, "loss": 0.752, "step": 33411 }, { "epoch": 0.8579264701398284, "grad_norm": 0.80859375, "learning_rate": 0.00010184137912154048, "loss": 0.8354, "step": 33412 }, { "epoch": 0.8579521473357502, "grad_norm": 0.765625, "learning_rate": 0.000101836915674917, "loss": 0.8439, "step": 33413 }, { "epoch": 0.857977824531672, "grad_norm": 0.765625, "learning_rate": 0.00010183245222463275, "loss": 0.7347, "step": 33414 }, { "epoch": 0.8580035017275939, "grad_norm": 0.71875, "learning_rate": 0.0001018279887706966, "loss": 0.6543, "step": 33415 }, { "epoch": 0.8580291789235157, "grad_norm": 0.75390625, "learning_rate": 0.00010182352531311739, "loss": 0.6297, "step": 33416 }, { "epoch": 0.8580548561194374, "grad_norm": 0.76171875, "learning_rate": 0.0001018190618519041, "loss": 0.7406, "step": 33417 }, { "epoch": 0.8580805333153593, "grad_norm": 0.71484375, "learning_rate": 0.00010181459838706555, "loss": 0.7687, "step": 33418 }, { "epoch": 0.8581062105112811, "grad_norm": 0.78125, "learning_rate": 0.00010181013491861068, "loss": 0.7467, "step": 33419 }, { "epoch": 0.858131887707203, "grad_norm": 0.7734375, "learning_rate": 0.00010180567144654838, "loss": 0.9095, "step": 33420 }, { "epoch": 0.8581575649031248, "grad_norm": 0.76171875, "learning_rate": 0.00010180120797088752, "loss": 0.8221, "step": 33421 }, { "epoch": 0.8581832420990466, "grad_norm": 0.88671875, "learning_rate": 0.00010179674449163705, "loss": 0.8406, "step": 33422 }, { "epoch": 0.8582089192949683, "grad_norm": 0.7109375, "learning_rate": 0.00010179228100880583, "loss": 0.7104, "step": 33423 }, { "epoch": 0.8582345964908902, "grad_norm": 0.796875, "learning_rate": 0.00010178781752240274, "loss": 0.8162, "step": 33424 }, { "epoch": 0.858260273686812, "grad_norm": 0.6953125, "learning_rate": 0.00010178335403243668, "loss": 0.8605, "step": 33425 }, { "epoch": 0.8582859508827339, "grad_norm": 0.828125, "learning_rate": 0.00010177889053891656, "loss": 0.8557, "step": 33426 }, { "epoch": 0.8583116280786557, "grad_norm": 0.90234375, "learning_rate": 0.00010177442704185127, "loss": 0.7888, "step": 33427 }, { "epoch": 0.8583373052745775, "grad_norm": 0.77734375, "learning_rate": 0.00010176996354124973, "loss": 0.8279, "step": 33428 }, { "epoch": 0.8583629824704994, "grad_norm": 0.80078125, "learning_rate": 0.0001017655000371208, "loss": 0.8716, "step": 33429 }, { "epoch": 0.8583886596664211, "grad_norm": 0.77734375, "learning_rate": 0.00010176103652947339, "loss": 0.6794, "step": 33430 }, { "epoch": 0.8584143368623429, "grad_norm": 0.76171875, "learning_rate": 0.00010175657301831638, "loss": 0.7752, "step": 33431 }, { "epoch": 0.8584400140582648, "grad_norm": 0.7578125, "learning_rate": 0.0001017521095036587, "loss": 0.8375, "step": 33432 }, { "epoch": 0.8584656912541866, "grad_norm": 0.7890625, "learning_rate": 0.00010174764598550921, "loss": 0.8103, "step": 33433 }, { "epoch": 0.8584913684501084, "grad_norm": 0.734375, "learning_rate": 0.00010174318246387682, "loss": 0.6703, "step": 33434 }, { "epoch": 0.8585170456460303, "grad_norm": 0.8046875, "learning_rate": 0.00010173871893877042, "loss": 0.791, "step": 33435 }, { "epoch": 0.8585427228419521, "grad_norm": 0.79296875, "learning_rate": 0.00010173425541019892, "loss": 0.8755, "step": 33436 }, { "epoch": 0.8585684000378738, "grad_norm": 0.765625, "learning_rate": 0.00010172979187817122, "loss": 0.6202, "step": 33437 }, { "epoch": 0.8585940772337957, "grad_norm": 0.77734375, "learning_rate": 0.00010172532834269619, "loss": 0.7207, "step": 33438 }, { "epoch": 0.8586197544297175, "grad_norm": 0.75390625, "learning_rate": 0.00010172086480378271, "loss": 0.7792, "step": 33439 }, { "epoch": 0.8586454316256393, "grad_norm": 0.7734375, "learning_rate": 0.00010171640126143972, "loss": 0.6752, "step": 33440 }, { "epoch": 0.8586711088215612, "grad_norm": 0.80078125, "learning_rate": 0.0001017119377156761, "loss": 0.836, "step": 33441 }, { "epoch": 0.858696786017483, "grad_norm": 0.76171875, "learning_rate": 0.00010170747416650072, "loss": 0.7297, "step": 33442 }, { "epoch": 0.8587224632134047, "grad_norm": 0.6875, "learning_rate": 0.00010170301061392253, "loss": 0.7783, "step": 33443 }, { "epoch": 0.8587481404093266, "grad_norm": 0.7890625, "learning_rate": 0.00010169854705795041, "loss": 0.7998, "step": 33444 }, { "epoch": 0.8587738176052484, "grad_norm": 0.80859375, "learning_rate": 0.00010169408349859322, "loss": 0.8408, "step": 33445 }, { "epoch": 0.8587994948011702, "grad_norm": 0.73828125, "learning_rate": 0.00010168961993585986, "loss": 0.8961, "step": 33446 }, { "epoch": 0.8588251719970921, "grad_norm": 0.73046875, "learning_rate": 0.00010168515636975924, "loss": 0.7094, "step": 33447 }, { "epoch": 0.8588508491930139, "grad_norm": 0.80859375, "learning_rate": 0.00010168069280030026, "loss": 0.7438, "step": 33448 }, { "epoch": 0.8588765263889357, "grad_norm": 0.765625, "learning_rate": 0.00010167622922749183, "loss": 0.7029, "step": 33449 }, { "epoch": 0.8589022035848575, "grad_norm": 0.7265625, "learning_rate": 0.00010167176565134279, "loss": 0.7763, "step": 33450 }, { "epoch": 0.8589278807807793, "grad_norm": 0.77734375, "learning_rate": 0.00010166730207186213, "loss": 0.7051, "step": 33451 }, { "epoch": 0.8589535579767011, "grad_norm": 0.80078125, "learning_rate": 0.00010166283848905864, "loss": 0.8068, "step": 33452 }, { "epoch": 0.858979235172623, "grad_norm": 0.78125, "learning_rate": 0.00010165837490294126, "loss": 0.7086, "step": 33453 }, { "epoch": 0.8590049123685448, "grad_norm": 0.78515625, "learning_rate": 0.00010165391131351891, "loss": 0.7151, "step": 33454 }, { "epoch": 0.8590305895644667, "grad_norm": 0.80078125, "learning_rate": 0.00010164944772080045, "loss": 0.9115, "step": 33455 }, { "epoch": 0.8590562667603885, "grad_norm": 0.7578125, "learning_rate": 0.00010164498412479479, "loss": 0.7438, "step": 33456 }, { "epoch": 0.8590819439563102, "grad_norm": 0.7890625, "learning_rate": 0.00010164052052551085, "loss": 0.6802, "step": 33457 }, { "epoch": 0.859107621152232, "grad_norm": 0.796875, "learning_rate": 0.00010163605692295748, "loss": 0.8804, "step": 33458 }, { "epoch": 0.8591332983481539, "grad_norm": 0.82421875, "learning_rate": 0.0001016315933171436, "loss": 0.7503, "step": 33459 }, { "epoch": 0.8591589755440757, "grad_norm": 0.77734375, "learning_rate": 0.00010162712970807811, "loss": 0.7263, "step": 33460 }, { "epoch": 0.8591846527399976, "grad_norm": 0.71484375, "learning_rate": 0.00010162266609576987, "loss": 0.6923, "step": 33461 }, { "epoch": 0.8592103299359194, "grad_norm": 0.76953125, "learning_rate": 0.00010161820248022782, "loss": 0.7385, "step": 33462 }, { "epoch": 0.8592360071318411, "grad_norm": 0.7578125, "learning_rate": 0.00010161373886146084, "loss": 0.8586, "step": 33463 }, { "epoch": 0.859261684327763, "grad_norm": 0.796875, "learning_rate": 0.00010160927523947783, "loss": 0.7629, "step": 33464 }, { "epoch": 0.8592873615236848, "grad_norm": 0.81640625, "learning_rate": 0.00010160481161428767, "loss": 0.8334, "step": 33465 }, { "epoch": 0.8593130387196066, "grad_norm": 0.7734375, "learning_rate": 0.00010160034798589925, "loss": 0.8692, "step": 33466 }, { "epoch": 0.8593387159155285, "grad_norm": 0.8203125, "learning_rate": 0.00010159588435432152, "loss": 0.7203, "step": 33467 }, { "epoch": 0.8593643931114503, "grad_norm": 0.71484375, "learning_rate": 0.00010159142071956331, "loss": 0.6158, "step": 33468 }, { "epoch": 0.8593900703073721, "grad_norm": 0.90234375, "learning_rate": 0.00010158695708163355, "loss": 0.8598, "step": 33469 }, { "epoch": 0.8594157475032939, "grad_norm": 0.796875, "learning_rate": 0.00010158249344054116, "loss": 0.8489, "step": 33470 }, { "epoch": 0.8594414246992157, "grad_norm": 0.8125, "learning_rate": 0.00010157802979629494, "loss": 0.6827, "step": 33471 }, { "epoch": 0.8594671018951375, "grad_norm": 0.80078125, "learning_rate": 0.00010157356614890389, "loss": 0.73, "step": 33472 }, { "epoch": 0.8594927790910594, "grad_norm": 0.859375, "learning_rate": 0.00010156910249837685, "loss": 0.849, "step": 33473 }, { "epoch": 0.8595184562869812, "grad_norm": 0.77734375, "learning_rate": 0.00010156463884472273, "loss": 0.8964, "step": 33474 }, { "epoch": 0.859544133482903, "grad_norm": 0.73828125, "learning_rate": 0.00010156017518795042, "loss": 0.8193, "step": 33475 }, { "epoch": 0.8595698106788248, "grad_norm": 0.82421875, "learning_rate": 0.00010155571152806884, "loss": 0.8055, "step": 33476 }, { "epoch": 0.8595954878747466, "grad_norm": 0.796875, "learning_rate": 0.00010155124786508687, "loss": 0.7812, "step": 33477 }, { "epoch": 0.8596211650706684, "grad_norm": 0.875, "learning_rate": 0.0001015467841990134, "loss": 0.845, "step": 33478 }, { "epoch": 0.8596468422665903, "grad_norm": 0.78515625, "learning_rate": 0.0001015423205298573, "loss": 0.8094, "step": 33479 }, { "epoch": 0.8596725194625121, "grad_norm": 0.7734375, "learning_rate": 0.00010153785685762754, "loss": 0.8017, "step": 33480 }, { "epoch": 0.8596981966584339, "grad_norm": 0.82421875, "learning_rate": 0.00010153339318233294, "loss": 0.8115, "step": 33481 }, { "epoch": 0.8597238738543558, "grad_norm": 0.8046875, "learning_rate": 0.00010152892950398243, "loss": 0.8125, "step": 33482 }, { "epoch": 0.8597495510502775, "grad_norm": 0.76171875, "learning_rate": 0.00010152446582258492, "loss": 0.8575, "step": 33483 }, { "epoch": 0.8597752282461993, "grad_norm": 0.765625, "learning_rate": 0.00010152000213814925, "loss": 0.7567, "step": 33484 }, { "epoch": 0.8598009054421212, "grad_norm": 0.8125, "learning_rate": 0.00010151553845068438, "loss": 0.873, "step": 33485 }, { "epoch": 0.859826582638043, "grad_norm": 0.77734375, "learning_rate": 0.00010151107476019918, "loss": 0.7552, "step": 33486 }, { "epoch": 0.8598522598339648, "grad_norm": 0.75390625, "learning_rate": 0.00010150661106670251, "loss": 0.8045, "step": 33487 }, { "epoch": 0.8598779370298867, "grad_norm": 0.7578125, "learning_rate": 0.00010150214737020337, "loss": 0.7616, "step": 33488 }, { "epoch": 0.8599036142258085, "grad_norm": 0.77734375, "learning_rate": 0.00010149768367071053, "loss": 0.8152, "step": 33489 }, { "epoch": 0.8599292914217302, "grad_norm": 0.7265625, "learning_rate": 0.00010149321996823296, "loss": 0.6982, "step": 33490 }, { "epoch": 0.8599549686176521, "grad_norm": 0.75390625, "learning_rate": 0.00010148875626277953, "loss": 0.7562, "step": 33491 }, { "epoch": 0.8599806458135739, "grad_norm": 0.77734375, "learning_rate": 0.00010148429255435916, "loss": 0.8202, "step": 33492 }, { "epoch": 0.8600063230094958, "grad_norm": 0.81640625, "learning_rate": 0.00010147982884298071, "loss": 0.8875, "step": 33493 }, { "epoch": 0.8600320002054176, "grad_norm": 0.75390625, "learning_rate": 0.0001014753651286531, "loss": 0.773, "step": 33494 }, { "epoch": 0.8600576774013394, "grad_norm": 0.7890625, "learning_rate": 0.0001014709014113852, "loss": 0.7704, "step": 33495 }, { "epoch": 0.8600833545972612, "grad_norm": 0.80859375, "learning_rate": 0.00010146643769118598, "loss": 0.8, "step": 33496 }, { "epoch": 0.860109031793183, "grad_norm": 0.8125, "learning_rate": 0.00010146197396806425, "loss": 0.7699, "step": 33497 }, { "epoch": 0.8601347089891048, "grad_norm": 0.73046875, "learning_rate": 0.00010145751024202891, "loss": 0.7895, "step": 33498 }, { "epoch": 0.8601603861850267, "grad_norm": 0.79296875, "learning_rate": 0.00010145304651308893, "loss": 0.8719, "step": 33499 }, { "epoch": 0.8601860633809485, "grad_norm": 0.75390625, "learning_rate": 0.00010144858278125316, "loss": 0.7046, "step": 33500 }, { "epoch": 0.8602117405768703, "grad_norm": 0.828125, "learning_rate": 0.00010144411904653045, "loss": 0.8562, "step": 33501 }, { "epoch": 0.8602374177727922, "grad_norm": 0.734375, "learning_rate": 0.00010143965530892976, "loss": 0.7723, "step": 33502 }, { "epoch": 0.8602630949687139, "grad_norm": 0.796875, "learning_rate": 0.00010143519156846, "loss": 0.7631, "step": 33503 }, { "epoch": 0.8602887721646357, "grad_norm": 0.7421875, "learning_rate": 0.00010143072782513, "loss": 0.6906, "step": 33504 }, { "epoch": 0.8603144493605576, "grad_norm": 0.7890625, "learning_rate": 0.0001014262640789487, "loss": 0.6822, "step": 33505 }, { "epoch": 0.8603401265564794, "grad_norm": 1.015625, "learning_rate": 0.00010142180032992496, "loss": 0.9178, "step": 33506 }, { "epoch": 0.8603658037524012, "grad_norm": 0.79296875, "learning_rate": 0.00010141733657806774, "loss": 0.7045, "step": 33507 }, { "epoch": 0.8603914809483231, "grad_norm": 0.79296875, "learning_rate": 0.00010141287282338587, "loss": 0.8498, "step": 33508 }, { "epoch": 0.8604171581442449, "grad_norm": 0.80859375, "learning_rate": 0.00010140840906588829, "loss": 0.8143, "step": 33509 }, { "epoch": 0.8604428353401666, "grad_norm": 0.8515625, "learning_rate": 0.00010140394530558386, "loss": 0.9625, "step": 33510 }, { "epoch": 0.8604685125360885, "grad_norm": 0.76953125, "learning_rate": 0.00010139948154248149, "loss": 0.7657, "step": 33511 }, { "epoch": 0.8604941897320103, "grad_norm": 0.75390625, "learning_rate": 0.00010139501777659011, "loss": 0.8004, "step": 33512 }, { "epoch": 0.8605198669279321, "grad_norm": 0.79296875, "learning_rate": 0.00010139055400791856, "loss": 0.9242, "step": 33513 }, { "epoch": 0.860545544123854, "grad_norm": 0.8125, "learning_rate": 0.00010138609023647576, "loss": 0.7363, "step": 33514 }, { "epoch": 0.8605712213197758, "grad_norm": 0.80078125, "learning_rate": 0.0001013816264622706, "loss": 0.7558, "step": 33515 }, { "epoch": 0.8605968985156975, "grad_norm": 0.703125, "learning_rate": 0.00010137716268531201, "loss": 0.7902, "step": 33516 }, { "epoch": 0.8606225757116194, "grad_norm": 0.85546875, "learning_rate": 0.00010137269890560885, "loss": 0.746, "step": 33517 }, { "epoch": 0.8606482529075412, "grad_norm": 0.79296875, "learning_rate": 0.00010136823512317005, "loss": 0.8646, "step": 33518 }, { "epoch": 0.860673930103463, "grad_norm": 0.7578125, "learning_rate": 0.00010136377133800443, "loss": 0.8528, "step": 33519 }, { "epoch": 0.8606996072993849, "grad_norm": 0.77734375, "learning_rate": 0.00010135930755012095, "loss": 0.8999, "step": 33520 }, { "epoch": 0.8607252844953067, "grad_norm": 0.73828125, "learning_rate": 0.0001013548437595285, "loss": 0.7046, "step": 33521 }, { "epoch": 0.8607509616912286, "grad_norm": 0.88671875, "learning_rate": 0.000101350379966236, "loss": 0.874, "step": 33522 }, { "epoch": 0.8607766388871503, "grad_norm": 0.8359375, "learning_rate": 0.00010134591617025229, "loss": 0.8797, "step": 33523 }, { "epoch": 0.8608023160830721, "grad_norm": 0.74609375, "learning_rate": 0.00010134145237158628, "loss": 0.7431, "step": 33524 }, { "epoch": 0.860827993278994, "grad_norm": 0.79296875, "learning_rate": 0.00010133698857024687, "loss": 0.7568, "step": 33525 }, { "epoch": 0.8608536704749158, "grad_norm": 0.7578125, "learning_rate": 0.000101332524766243, "loss": 0.7912, "step": 33526 }, { "epoch": 0.8608793476708376, "grad_norm": 0.76171875, "learning_rate": 0.00010132806095958347, "loss": 0.7666, "step": 33527 }, { "epoch": 0.8609050248667595, "grad_norm": 0.796875, "learning_rate": 0.00010132359715027728, "loss": 0.8446, "step": 33528 }, { "epoch": 0.8609307020626813, "grad_norm": 0.79296875, "learning_rate": 0.00010131913333833329, "loss": 0.7994, "step": 33529 }, { "epoch": 0.860956379258603, "grad_norm": 0.76171875, "learning_rate": 0.00010131466952376037, "loss": 0.8349, "step": 33530 }, { "epoch": 0.8609820564545249, "grad_norm": 0.75390625, "learning_rate": 0.00010131020570656741, "loss": 0.7579, "step": 33531 }, { "epoch": 0.8610077336504467, "grad_norm": 0.82421875, "learning_rate": 0.00010130574188676333, "loss": 0.7221, "step": 33532 }, { "epoch": 0.8610334108463685, "grad_norm": 0.80078125, "learning_rate": 0.00010130127806435705, "loss": 0.7908, "step": 33533 }, { "epoch": 0.8610590880422904, "grad_norm": 0.74609375, "learning_rate": 0.00010129681423935744, "loss": 0.7538, "step": 33534 }, { "epoch": 0.8610847652382122, "grad_norm": 0.7890625, "learning_rate": 0.00010129235041177339, "loss": 1.0019, "step": 33535 }, { "epoch": 0.8611104424341339, "grad_norm": 0.83203125, "learning_rate": 0.00010128788658161381, "loss": 0.7713, "step": 33536 }, { "epoch": 0.8611361196300558, "grad_norm": 0.78125, "learning_rate": 0.00010128342274888759, "loss": 0.8299, "step": 33537 }, { "epoch": 0.8611617968259776, "grad_norm": 0.75, "learning_rate": 0.00010127895891360359, "loss": 0.6624, "step": 33538 }, { "epoch": 0.8611874740218994, "grad_norm": 0.88671875, "learning_rate": 0.00010127449507577078, "loss": 0.8212, "step": 33539 }, { "epoch": 0.8612131512178213, "grad_norm": 1.015625, "learning_rate": 0.000101270031235398, "loss": 0.898, "step": 33540 }, { "epoch": 0.8612388284137431, "grad_norm": 0.75, "learning_rate": 0.00010126556739249415, "loss": 0.8344, "step": 33541 }, { "epoch": 0.8612645056096649, "grad_norm": 0.77734375, "learning_rate": 0.00010126110354706817, "loss": 0.7703, "step": 33542 }, { "epoch": 0.8612901828055867, "grad_norm": 0.7421875, "learning_rate": 0.00010125663969912888, "loss": 0.7538, "step": 33543 }, { "epoch": 0.8613158600015085, "grad_norm": 0.7890625, "learning_rate": 0.00010125217584868528, "loss": 0.7979, "step": 33544 }, { "epoch": 0.8613415371974303, "grad_norm": 0.7890625, "learning_rate": 0.00010124771199574616, "loss": 0.7294, "step": 33545 }, { "epoch": 0.8613672143933522, "grad_norm": 0.875, "learning_rate": 0.00010124324814032047, "loss": 0.841, "step": 33546 }, { "epoch": 0.861392891589274, "grad_norm": 0.7890625, "learning_rate": 0.00010123878428241711, "loss": 0.8588, "step": 33547 }, { "epoch": 0.8614185687851958, "grad_norm": 0.765625, "learning_rate": 0.00010123432042204495, "loss": 0.8222, "step": 33548 }, { "epoch": 0.8614442459811177, "grad_norm": 0.79296875, "learning_rate": 0.00010122985655921294, "loss": 0.7154, "step": 33549 }, { "epoch": 0.8614699231770394, "grad_norm": 0.75390625, "learning_rate": 0.0001012253926939299, "loss": 0.7262, "step": 33550 }, { "epoch": 0.8614956003729612, "grad_norm": 0.80078125, "learning_rate": 0.00010122092882620475, "loss": 0.825, "step": 33551 }, { "epoch": 0.8615212775688831, "grad_norm": 0.7578125, "learning_rate": 0.00010121646495604642, "loss": 0.7348, "step": 33552 }, { "epoch": 0.8615469547648049, "grad_norm": 0.765625, "learning_rate": 0.00010121200108346381, "loss": 0.6944, "step": 33553 }, { "epoch": 0.8615726319607268, "grad_norm": 0.7890625, "learning_rate": 0.00010120753720846574, "loss": 0.7349, "step": 33554 }, { "epoch": 0.8615983091566486, "grad_norm": 0.828125, "learning_rate": 0.00010120307333106121, "loss": 0.8909, "step": 33555 }, { "epoch": 0.8616239863525703, "grad_norm": 1.0078125, "learning_rate": 0.000101198609451259, "loss": 0.8614, "step": 33556 }, { "epoch": 0.8616496635484922, "grad_norm": 0.765625, "learning_rate": 0.00010119414556906812, "loss": 0.8176, "step": 33557 }, { "epoch": 0.861675340744414, "grad_norm": 0.76171875, "learning_rate": 0.00010118968168449741, "loss": 0.8203, "step": 33558 }, { "epoch": 0.8617010179403358, "grad_norm": 0.7265625, "learning_rate": 0.00010118521779755575, "loss": 0.748, "step": 33559 }, { "epoch": 0.8617266951362577, "grad_norm": 0.76171875, "learning_rate": 0.00010118075390825207, "loss": 0.8925, "step": 33560 }, { "epoch": 0.8617523723321795, "grad_norm": 0.765625, "learning_rate": 0.00010117629001659527, "loss": 0.7878, "step": 33561 }, { "epoch": 0.8617780495281013, "grad_norm": 0.84765625, "learning_rate": 0.00010117182612259421, "loss": 0.723, "step": 33562 }, { "epoch": 0.8618037267240231, "grad_norm": 0.81640625, "learning_rate": 0.00010116736222625782, "loss": 0.8284, "step": 33563 }, { "epoch": 0.8618294039199449, "grad_norm": 0.80078125, "learning_rate": 0.00010116289832759496, "loss": 0.709, "step": 33564 }, { "epoch": 0.8618550811158667, "grad_norm": 1.15625, "learning_rate": 0.00010115843442661456, "loss": 0.6913, "step": 33565 }, { "epoch": 0.8618807583117886, "grad_norm": 0.81640625, "learning_rate": 0.00010115397052332554, "loss": 0.8487, "step": 33566 }, { "epoch": 0.8619064355077104, "grad_norm": 0.80078125, "learning_rate": 0.0001011495066177367, "loss": 0.7681, "step": 33567 }, { "epoch": 0.8619321127036322, "grad_norm": 0.72265625, "learning_rate": 0.00010114504270985704, "loss": 0.6829, "step": 33568 }, { "epoch": 0.8619577898995541, "grad_norm": 0.8046875, "learning_rate": 0.00010114057879969543, "loss": 0.7352, "step": 33569 }, { "epoch": 0.8619834670954758, "grad_norm": 0.82421875, "learning_rate": 0.00010113611488726069, "loss": 0.8592, "step": 33570 }, { "epoch": 0.8620091442913976, "grad_norm": 0.765625, "learning_rate": 0.0001011316509725618, "loss": 0.7936, "step": 33571 }, { "epoch": 0.8620348214873195, "grad_norm": 0.75390625, "learning_rate": 0.00010112718705560764, "loss": 0.755, "step": 33572 }, { "epoch": 0.8620604986832413, "grad_norm": 0.87109375, "learning_rate": 0.00010112272313640709, "loss": 0.7645, "step": 33573 }, { "epoch": 0.8620861758791631, "grad_norm": 0.7578125, "learning_rate": 0.00010111825921496907, "loss": 0.697, "step": 33574 }, { "epoch": 0.862111853075085, "grad_norm": 0.76953125, "learning_rate": 0.00010111379529130245, "loss": 0.7497, "step": 33575 }, { "epoch": 0.8621375302710067, "grad_norm": 0.77734375, "learning_rate": 0.00010110933136541614, "loss": 0.862, "step": 33576 }, { "epoch": 0.8621632074669285, "grad_norm": 0.73046875, "learning_rate": 0.00010110486743731904, "loss": 0.7221, "step": 33577 }, { "epoch": 0.8621888846628504, "grad_norm": 0.8125, "learning_rate": 0.00010110040350702001, "loss": 0.8201, "step": 33578 }, { "epoch": 0.8622145618587722, "grad_norm": 0.7890625, "learning_rate": 0.00010109593957452801, "loss": 0.7856, "step": 33579 }, { "epoch": 0.862240239054694, "grad_norm": 0.734375, "learning_rate": 0.00010109147563985188, "loss": 0.7438, "step": 33580 }, { "epoch": 0.8622659162506159, "grad_norm": 0.7421875, "learning_rate": 0.00010108701170300052, "loss": 0.7674, "step": 33581 }, { "epoch": 0.8622915934465377, "grad_norm": 0.73828125, "learning_rate": 0.00010108254776398289, "loss": 0.7675, "step": 33582 }, { "epoch": 0.8623172706424594, "grad_norm": 0.84375, "learning_rate": 0.00010107808382280781, "loss": 0.7575, "step": 33583 }, { "epoch": 0.8623429478383813, "grad_norm": 0.7578125, "learning_rate": 0.00010107361987948422, "loss": 0.7526, "step": 33584 }, { "epoch": 0.8623686250343031, "grad_norm": 0.77734375, "learning_rate": 0.000101069155934021, "loss": 0.8855, "step": 33585 }, { "epoch": 0.862394302230225, "grad_norm": 0.7421875, "learning_rate": 0.00010106469198642703, "loss": 0.8606, "step": 33586 }, { "epoch": 0.8624199794261468, "grad_norm": 0.8125, "learning_rate": 0.00010106022803671124, "loss": 0.8965, "step": 33587 }, { "epoch": 0.8624456566220686, "grad_norm": 0.76171875, "learning_rate": 0.00010105576408488251, "loss": 0.7677, "step": 33588 }, { "epoch": 0.8624713338179905, "grad_norm": 0.8125, "learning_rate": 0.00010105130013094975, "loss": 0.7725, "step": 33589 }, { "epoch": 0.8624970110139122, "grad_norm": 0.73828125, "learning_rate": 0.00010104683617492183, "loss": 0.7912, "step": 33590 }, { "epoch": 0.862522688209834, "grad_norm": 0.76171875, "learning_rate": 0.00010104237221680765, "loss": 0.736, "step": 33591 }, { "epoch": 0.8625483654057559, "grad_norm": 0.75, "learning_rate": 0.00010103790825661615, "loss": 0.8556, "step": 33592 }, { "epoch": 0.8625740426016777, "grad_norm": 0.984375, "learning_rate": 0.00010103344429435617, "loss": 0.9308, "step": 33593 }, { "epoch": 0.8625997197975995, "grad_norm": 0.8359375, "learning_rate": 0.0001010289803300366, "loss": 0.8506, "step": 33594 }, { "epoch": 0.8626253969935214, "grad_norm": 0.76953125, "learning_rate": 0.00010102451636366644, "loss": 0.7746, "step": 33595 }, { "epoch": 0.8626510741894431, "grad_norm": 0.86328125, "learning_rate": 0.00010102005239525443, "loss": 0.7944, "step": 33596 }, { "epoch": 0.8626767513853649, "grad_norm": 1.0, "learning_rate": 0.0001010155884248096, "loss": 0.9054, "step": 33597 }, { "epoch": 0.8627024285812868, "grad_norm": 0.734375, "learning_rate": 0.00010101112445234078, "loss": 0.6532, "step": 33598 }, { "epoch": 0.8627281057772086, "grad_norm": 0.78125, "learning_rate": 0.00010100666047785686, "loss": 0.8262, "step": 33599 }, { "epoch": 0.8627537829731304, "grad_norm": 0.765625, "learning_rate": 0.00010100219650136678, "loss": 0.813, "step": 33600 }, { "epoch": 0.8627794601690523, "grad_norm": 0.734375, "learning_rate": 0.00010099773252287941, "loss": 0.632, "step": 33601 }, { "epoch": 0.8628051373649741, "grad_norm": 0.79296875, "learning_rate": 0.00010099326854240365, "loss": 0.8439, "step": 33602 }, { "epoch": 0.8628308145608958, "grad_norm": 0.8046875, "learning_rate": 0.00010098880455994841, "loss": 0.8341, "step": 33603 }, { "epoch": 0.8628564917568177, "grad_norm": 0.76171875, "learning_rate": 0.00010098434057552253, "loss": 0.8445, "step": 33604 }, { "epoch": 0.8628821689527395, "grad_norm": 0.82421875, "learning_rate": 0.00010097987658913499, "loss": 0.8424, "step": 33605 }, { "epoch": 0.8629078461486613, "grad_norm": 0.7578125, "learning_rate": 0.00010097541260079462, "loss": 0.7027, "step": 33606 }, { "epoch": 0.8629335233445832, "grad_norm": 0.76171875, "learning_rate": 0.00010097094861051032, "loss": 0.8145, "step": 33607 }, { "epoch": 0.862959200540505, "grad_norm": 0.796875, "learning_rate": 0.00010096648461829108, "loss": 0.7954, "step": 33608 }, { "epoch": 0.8629848777364268, "grad_norm": 0.75, "learning_rate": 0.00010096202062414566, "loss": 0.8195, "step": 33609 }, { "epoch": 0.8630105549323486, "grad_norm": 0.73046875, "learning_rate": 0.00010095755662808304, "loss": 0.8044, "step": 33610 }, { "epoch": 0.8630362321282704, "grad_norm": 0.7890625, "learning_rate": 0.0001009530926301121, "loss": 0.8153, "step": 33611 }, { "epoch": 0.8630619093241922, "grad_norm": 0.78125, "learning_rate": 0.0001009486286302417, "loss": 0.7962, "step": 33612 }, { "epoch": 0.8630875865201141, "grad_norm": 0.8515625, "learning_rate": 0.00010094416462848082, "loss": 0.7382, "step": 33613 }, { "epoch": 0.8631132637160359, "grad_norm": 0.8046875, "learning_rate": 0.00010093970062483829, "loss": 1.0214, "step": 33614 }, { "epoch": 0.8631389409119578, "grad_norm": 0.8125, "learning_rate": 0.00010093523661932301, "loss": 0.8097, "step": 33615 }, { "epoch": 0.8631646181078795, "grad_norm": 0.76171875, "learning_rate": 0.0001009307726119439, "loss": 0.8502, "step": 33616 }, { "epoch": 0.8631902953038013, "grad_norm": 0.73046875, "learning_rate": 0.00010092630860270984, "loss": 0.7275, "step": 33617 }, { "epoch": 0.8632159724997231, "grad_norm": 0.8125, "learning_rate": 0.0001009218445916297, "loss": 0.9218, "step": 33618 }, { "epoch": 0.863241649695645, "grad_norm": 0.7578125, "learning_rate": 0.00010091738057871243, "loss": 0.8296, "step": 33619 }, { "epoch": 0.8632673268915668, "grad_norm": 0.69140625, "learning_rate": 0.00010091291656396688, "loss": 0.722, "step": 33620 }, { "epoch": 0.8632930040874887, "grad_norm": 0.80078125, "learning_rate": 0.00010090845254740205, "loss": 0.6821, "step": 33621 }, { "epoch": 0.8633186812834105, "grad_norm": 0.80859375, "learning_rate": 0.0001009039885290267, "loss": 0.8521, "step": 33622 }, { "epoch": 0.8633443584793322, "grad_norm": 0.9375, "learning_rate": 0.00010089952450884976, "loss": 0.6976, "step": 33623 }, { "epoch": 0.863370035675254, "grad_norm": 0.7421875, "learning_rate": 0.00010089506048688018, "loss": 0.9209, "step": 33624 }, { "epoch": 0.8633957128711759, "grad_norm": 0.79296875, "learning_rate": 0.00010089059646312682, "loss": 0.7613, "step": 33625 }, { "epoch": 0.8634213900670977, "grad_norm": 0.75390625, "learning_rate": 0.00010088613243759856, "loss": 0.6799, "step": 33626 }, { "epoch": 0.8634470672630196, "grad_norm": 0.796875, "learning_rate": 0.00010088166841030436, "loss": 0.7434, "step": 33627 }, { "epoch": 0.8634727444589414, "grad_norm": 0.84375, "learning_rate": 0.00010087720438125305, "loss": 0.7806, "step": 33628 }, { "epoch": 0.8634984216548632, "grad_norm": 0.7734375, "learning_rate": 0.00010087274035045356, "loss": 0.7201, "step": 33629 }, { "epoch": 0.863524098850785, "grad_norm": 0.7578125, "learning_rate": 0.00010086827631791478, "loss": 0.766, "step": 33630 }, { "epoch": 0.8635497760467068, "grad_norm": 0.76171875, "learning_rate": 0.00010086381228364556, "loss": 0.8499, "step": 33631 }, { "epoch": 0.8635754532426286, "grad_norm": 0.69921875, "learning_rate": 0.00010085934824765488, "loss": 0.676, "step": 33632 }, { "epoch": 0.8636011304385505, "grad_norm": 0.73046875, "learning_rate": 0.00010085488420995159, "loss": 0.7336, "step": 33633 }, { "epoch": 0.8636268076344723, "grad_norm": 0.8125, "learning_rate": 0.00010085042017054462, "loss": 0.8474, "step": 33634 }, { "epoch": 0.8636524848303941, "grad_norm": 0.74609375, "learning_rate": 0.0001008459561294428, "loss": 0.8577, "step": 33635 }, { "epoch": 0.8636781620263159, "grad_norm": 0.82421875, "learning_rate": 0.00010084149208665508, "loss": 0.8626, "step": 33636 }, { "epoch": 0.8637038392222377, "grad_norm": 0.7890625, "learning_rate": 0.00010083702804219035, "loss": 0.8088, "step": 33637 }, { "epoch": 0.8637295164181595, "grad_norm": 0.83984375, "learning_rate": 0.0001008325639960575, "loss": 0.79, "step": 33638 }, { "epoch": 0.8637551936140814, "grad_norm": 0.79296875, "learning_rate": 0.00010082809994826538, "loss": 0.8237, "step": 33639 }, { "epoch": 0.8637808708100032, "grad_norm": 0.7734375, "learning_rate": 0.00010082363589882297, "loss": 0.6847, "step": 33640 }, { "epoch": 0.863806548005925, "grad_norm": 0.82421875, "learning_rate": 0.00010081917184773915, "loss": 0.797, "step": 33641 }, { "epoch": 0.8638322252018469, "grad_norm": 0.8203125, "learning_rate": 0.00010081470779502276, "loss": 0.7324, "step": 33642 }, { "epoch": 0.8638579023977686, "grad_norm": 0.7890625, "learning_rate": 0.00010081024374068275, "loss": 0.8961, "step": 33643 }, { "epoch": 0.8638835795936904, "grad_norm": 0.8046875, "learning_rate": 0.00010080577968472798, "loss": 0.9567, "step": 33644 }, { "epoch": 0.8639092567896123, "grad_norm": 0.8359375, "learning_rate": 0.00010080131562716737, "loss": 0.7196, "step": 33645 }, { "epoch": 0.8639349339855341, "grad_norm": 0.80859375, "learning_rate": 0.00010079685156800982, "loss": 0.7985, "step": 33646 }, { "epoch": 0.863960611181456, "grad_norm": 0.78515625, "learning_rate": 0.00010079238750726421, "loss": 0.7947, "step": 33647 }, { "epoch": 0.8639862883773778, "grad_norm": 0.7734375, "learning_rate": 0.00010078792344493946, "loss": 0.738, "step": 33648 }, { "epoch": 0.8640119655732996, "grad_norm": 0.87109375, "learning_rate": 0.00010078345938104441, "loss": 0.8478, "step": 33649 }, { "epoch": 0.8640376427692213, "grad_norm": 0.76171875, "learning_rate": 0.00010077899531558801, "loss": 0.844, "step": 33650 }, { "epoch": 0.8640633199651432, "grad_norm": 0.765625, "learning_rate": 0.00010077453124857917, "loss": 0.8427, "step": 33651 }, { "epoch": 0.864088997161065, "grad_norm": 0.8046875, "learning_rate": 0.0001007700671800267, "loss": 0.6925, "step": 33652 }, { "epoch": 0.8641146743569869, "grad_norm": 0.7421875, "learning_rate": 0.00010076560310993961, "loss": 0.659, "step": 33653 }, { "epoch": 0.8641403515529087, "grad_norm": 0.84765625, "learning_rate": 0.00010076113903832676, "loss": 0.8384, "step": 33654 }, { "epoch": 0.8641660287488305, "grad_norm": 0.87109375, "learning_rate": 0.00010075667496519698, "loss": 0.7792, "step": 33655 }, { "epoch": 0.8641917059447523, "grad_norm": 0.91796875, "learning_rate": 0.00010075221089055923, "loss": 0.8556, "step": 33656 }, { "epoch": 0.8642173831406741, "grad_norm": 0.74609375, "learning_rate": 0.00010074774681442239, "loss": 0.7219, "step": 33657 }, { "epoch": 0.8642430603365959, "grad_norm": 0.80859375, "learning_rate": 0.00010074328273679537, "loss": 0.8077, "step": 33658 }, { "epoch": 0.8642687375325178, "grad_norm": 0.82421875, "learning_rate": 0.00010073881865768703, "loss": 0.757, "step": 33659 }, { "epoch": 0.8642944147284396, "grad_norm": 0.7421875, "learning_rate": 0.00010073435457710631, "loss": 0.8415, "step": 33660 }, { "epoch": 0.8643200919243614, "grad_norm": 0.8984375, "learning_rate": 0.00010072989049506212, "loss": 0.8145, "step": 33661 }, { "epoch": 0.8643457691202833, "grad_norm": 0.8203125, "learning_rate": 0.00010072542641156328, "loss": 0.8888, "step": 33662 }, { "epoch": 0.864371446316205, "grad_norm": 0.828125, "learning_rate": 0.00010072096232661871, "loss": 0.8777, "step": 33663 }, { "epoch": 0.8643971235121268, "grad_norm": 0.953125, "learning_rate": 0.00010071649824023737, "loss": 0.8745, "step": 33664 }, { "epoch": 0.8644228007080487, "grad_norm": 0.828125, "learning_rate": 0.00010071203415242811, "loss": 0.8141, "step": 33665 }, { "epoch": 0.8644484779039705, "grad_norm": 0.78125, "learning_rate": 0.00010070757006319982, "loss": 0.8441, "step": 33666 }, { "epoch": 0.8644741550998923, "grad_norm": 0.89453125, "learning_rate": 0.00010070310597256143, "loss": 0.8359, "step": 33667 }, { "epoch": 0.8644998322958142, "grad_norm": 0.78515625, "learning_rate": 0.00010069864188052176, "loss": 0.9175, "step": 33668 }, { "epoch": 0.8645255094917359, "grad_norm": 0.9296875, "learning_rate": 0.0001006941777870898, "loss": 0.9175, "step": 33669 }, { "epoch": 0.8645511866876577, "grad_norm": 0.7734375, "learning_rate": 0.0001006897136922744, "loss": 0.7018, "step": 33670 }, { "epoch": 0.8645768638835796, "grad_norm": 0.890625, "learning_rate": 0.00010068524959608446, "loss": 0.8735, "step": 33671 }, { "epoch": 0.8646025410795014, "grad_norm": 0.76953125, "learning_rate": 0.00010068078549852888, "loss": 0.7232, "step": 33672 }, { "epoch": 0.8646282182754232, "grad_norm": 0.8125, "learning_rate": 0.00010067632139961655, "loss": 0.6903, "step": 33673 }, { "epoch": 0.8646538954713451, "grad_norm": 0.7578125, "learning_rate": 0.0001006718572993564, "loss": 0.8199, "step": 33674 }, { "epoch": 0.8646795726672669, "grad_norm": 0.70703125, "learning_rate": 0.00010066739319775726, "loss": 0.7708, "step": 33675 }, { "epoch": 0.8647052498631886, "grad_norm": 0.73828125, "learning_rate": 0.00010066292909482808, "loss": 0.8374, "step": 33676 }, { "epoch": 0.8647309270591105, "grad_norm": 0.8203125, "learning_rate": 0.00010065846499057774, "loss": 0.7318, "step": 33677 }, { "epoch": 0.8647566042550323, "grad_norm": 0.8828125, "learning_rate": 0.00010065400088501515, "loss": 0.756, "step": 33678 }, { "epoch": 0.8647822814509541, "grad_norm": 0.79296875, "learning_rate": 0.00010064953677814918, "loss": 0.7993, "step": 33679 }, { "epoch": 0.864807958646876, "grad_norm": 0.75390625, "learning_rate": 0.00010064507266998877, "loss": 0.6879, "step": 33680 }, { "epoch": 0.8648336358427978, "grad_norm": 0.7734375, "learning_rate": 0.00010064060856054276, "loss": 0.8153, "step": 33681 }, { "epoch": 0.8648593130387197, "grad_norm": 0.8125, "learning_rate": 0.0001006361444498201, "loss": 0.8625, "step": 33682 }, { "epoch": 0.8648849902346414, "grad_norm": 0.7265625, "learning_rate": 0.00010063168033782963, "loss": 0.7103, "step": 33683 }, { "epoch": 0.8649106674305632, "grad_norm": 0.71484375, "learning_rate": 0.00010062721622458028, "loss": 0.7844, "step": 33684 }, { "epoch": 0.864936344626485, "grad_norm": 0.92578125, "learning_rate": 0.00010062275211008096, "loss": 0.8272, "step": 33685 }, { "epoch": 0.8649620218224069, "grad_norm": 0.81640625, "learning_rate": 0.00010061828799434057, "loss": 0.8332, "step": 33686 }, { "epoch": 0.8649876990183287, "grad_norm": 1.2421875, "learning_rate": 0.00010061382387736797, "loss": 0.9293, "step": 33687 }, { "epoch": 0.8650133762142506, "grad_norm": 0.81640625, "learning_rate": 0.00010060935975917208, "loss": 0.6887, "step": 33688 }, { "epoch": 0.8650390534101723, "grad_norm": 0.77734375, "learning_rate": 0.00010060489563976177, "loss": 0.8267, "step": 33689 }, { "epoch": 0.8650647306060941, "grad_norm": 0.765625, "learning_rate": 0.00010060043151914598, "loss": 0.7603, "step": 33690 }, { "epoch": 0.865090407802016, "grad_norm": 0.75390625, "learning_rate": 0.00010059596739733357, "loss": 0.7835, "step": 33691 }, { "epoch": 0.8651160849979378, "grad_norm": 0.7734375, "learning_rate": 0.00010059150327433345, "loss": 0.6776, "step": 33692 }, { "epoch": 0.8651417621938596, "grad_norm": 0.71875, "learning_rate": 0.00010058703915015454, "loss": 0.7247, "step": 33693 }, { "epoch": 0.8651674393897815, "grad_norm": 0.73828125, "learning_rate": 0.00010058257502480568, "loss": 0.7481, "step": 33694 }, { "epoch": 0.8651931165857033, "grad_norm": 0.80078125, "learning_rate": 0.00010057811089829583, "loss": 0.7945, "step": 33695 }, { "epoch": 0.865218793781625, "grad_norm": 0.734375, "learning_rate": 0.00010057364677063385, "loss": 0.9378, "step": 33696 }, { "epoch": 0.8652444709775469, "grad_norm": 0.8125, "learning_rate": 0.00010056918264182861, "loss": 0.7814, "step": 33697 }, { "epoch": 0.8652701481734687, "grad_norm": 0.7734375, "learning_rate": 0.00010056471851188909, "loss": 0.8672, "step": 33698 }, { "epoch": 0.8652958253693905, "grad_norm": 0.765625, "learning_rate": 0.00010056025438082413, "loss": 0.7908, "step": 33699 }, { "epoch": 0.8653215025653124, "grad_norm": 0.79296875, "learning_rate": 0.00010055579024864261, "loss": 0.7757, "step": 33700 }, { "epoch": 0.8653471797612342, "grad_norm": 0.87109375, "learning_rate": 0.00010055132611535348, "loss": 0.9153, "step": 33701 }, { "epoch": 0.865372856957156, "grad_norm": 0.8203125, "learning_rate": 0.00010054686198096559, "loss": 0.8504, "step": 33702 }, { "epoch": 0.8653985341530778, "grad_norm": 0.671875, "learning_rate": 0.00010054239784548782, "loss": 0.7553, "step": 33703 }, { "epoch": 0.8654242113489996, "grad_norm": 0.72265625, "learning_rate": 0.00010053793370892915, "loss": 0.777, "step": 33704 }, { "epoch": 0.8654498885449214, "grad_norm": 0.7734375, "learning_rate": 0.00010053346957129841, "loss": 0.8139, "step": 33705 }, { "epoch": 0.8654755657408433, "grad_norm": 0.73828125, "learning_rate": 0.00010052900543260448, "loss": 0.846, "step": 33706 }, { "epoch": 0.8655012429367651, "grad_norm": 0.83203125, "learning_rate": 0.00010052454129285635, "loss": 0.8086, "step": 33707 }, { "epoch": 0.865526920132687, "grad_norm": 0.84375, "learning_rate": 0.00010052007715206282, "loss": 0.8792, "step": 33708 }, { "epoch": 0.8655525973286087, "grad_norm": 0.7734375, "learning_rate": 0.00010051561301023285, "loss": 0.8467, "step": 33709 }, { "epoch": 0.8655782745245305, "grad_norm": 0.83203125, "learning_rate": 0.0001005111488673753, "loss": 0.911, "step": 33710 }, { "epoch": 0.8656039517204523, "grad_norm": 0.859375, "learning_rate": 0.00010050668472349904, "loss": 0.6956, "step": 33711 }, { "epoch": 0.8656296289163742, "grad_norm": 0.75, "learning_rate": 0.00010050222057861303, "loss": 0.8004, "step": 33712 }, { "epoch": 0.865655306112296, "grad_norm": 0.8515625, "learning_rate": 0.00010049775643272615, "loss": 0.8686, "step": 33713 }, { "epoch": 0.8656809833082179, "grad_norm": 0.828125, "learning_rate": 0.00010049329228584727, "loss": 0.8155, "step": 33714 }, { "epoch": 0.8657066605041397, "grad_norm": 0.8515625, "learning_rate": 0.00010048882813798533, "loss": 0.8638, "step": 33715 }, { "epoch": 0.8657323377000614, "grad_norm": 0.8046875, "learning_rate": 0.00010048436398914916, "loss": 0.8278, "step": 33716 }, { "epoch": 0.8657580148959833, "grad_norm": 0.74609375, "learning_rate": 0.00010047989983934771, "loss": 0.8868, "step": 33717 }, { "epoch": 0.8657836920919051, "grad_norm": 0.7890625, "learning_rate": 0.00010047543568858988, "loss": 0.7537, "step": 33718 }, { "epoch": 0.8658093692878269, "grad_norm": 0.80078125, "learning_rate": 0.00010047097153688452, "loss": 0.8158, "step": 33719 }, { "epoch": 0.8658350464837488, "grad_norm": 0.78515625, "learning_rate": 0.00010046650738424059, "loss": 0.8198, "step": 33720 }, { "epoch": 0.8658607236796706, "grad_norm": 0.7578125, "learning_rate": 0.00010046204323066692, "loss": 0.7852, "step": 33721 }, { "epoch": 0.8658864008755924, "grad_norm": 0.76171875, "learning_rate": 0.00010045757907617247, "loss": 0.7148, "step": 33722 }, { "epoch": 0.8659120780715142, "grad_norm": 0.74609375, "learning_rate": 0.0001004531149207661, "loss": 0.9004, "step": 33723 }, { "epoch": 0.865937755267436, "grad_norm": 0.84765625, "learning_rate": 0.00010044865076445668, "loss": 0.8062, "step": 33724 }, { "epoch": 0.8659634324633578, "grad_norm": 0.765625, "learning_rate": 0.00010044418660725318, "loss": 0.7584, "step": 33725 }, { "epoch": 0.8659891096592797, "grad_norm": 0.8828125, "learning_rate": 0.00010043972244916445, "loss": 0.8554, "step": 33726 }, { "epoch": 0.8660147868552015, "grad_norm": 0.7421875, "learning_rate": 0.00010043525829019939, "loss": 0.7452, "step": 33727 }, { "epoch": 0.8660404640511233, "grad_norm": 0.88671875, "learning_rate": 0.00010043079413036689, "loss": 0.8542, "step": 33728 }, { "epoch": 0.8660661412470451, "grad_norm": 0.765625, "learning_rate": 0.00010042632996967584, "loss": 0.7738, "step": 33729 }, { "epoch": 0.8660918184429669, "grad_norm": 0.85546875, "learning_rate": 0.0001004218658081352, "loss": 0.8187, "step": 33730 }, { "epoch": 0.8661174956388887, "grad_norm": 0.88671875, "learning_rate": 0.00010041740164575379, "loss": 0.8095, "step": 33731 }, { "epoch": 0.8661431728348106, "grad_norm": 0.734375, "learning_rate": 0.00010041293748254053, "loss": 0.7717, "step": 33732 }, { "epoch": 0.8661688500307324, "grad_norm": 0.765625, "learning_rate": 0.00010040847331850436, "loss": 0.7406, "step": 33733 }, { "epoch": 0.8661945272266542, "grad_norm": 0.8515625, "learning_rate": 0.0001004040091536541, "loss": 0.8036, "step": 33734 }, { "epoch": 0.8662202044225761, "grad_norm": 0.78515625, "learning_rate": 0.00010039954498799868, "loss": 0.8447, "step": 33735 }, { "epoch": 0.8662458816184978, "grad_norm": 0.7890625, "learning_rate": 0.00010039508082154702, "loss": 0.9081, "step": 33736 }, { "epoch": 0.8662715588144196, "grad_norm": 0.87109375, "learning_rate": 0.000100390616654308, "loss": 0.7166, "step": 33737 }, { "epoch": 0.8662972360103415, "grad_norm": 0.8125, "learning_rate": 0.00010038615248629053, "loss": 0.6903, "step": 33738 }, { "epoch": 0.8663229132062633, "grad_norm": 0.7890625, "learning_rate": 0.00010038168831750347, "loss": 0.8951, "step": 33739 }, { "epoch": 0.8663485904021851, "grad_norm": 0.78515625, "learning_rate": 0.00010037722414795576, "loss": 0.8383, "step": 33740 }, { "epoch": 0.866374267598107, "grad_norm": 0.80859375, "learning_rate": 0.00010037275997765627, "loss": 0.7594, "step": 33741 }, { "epoch": 0.8663999447940288, "grad_norm": 0.80078125, "learning_rate": 0.00010036829580661392, "loss": 0.7792, "step": 33742 }, { "epoch": 0.8664256219899505, "grad_norm": 0.7734375, "learning_rate": 0.00010036383163483753, "loss": 0.8747, "step": 33743 }, { "epoch": 0.8664512991858724, "grad_norm": 0.85546875, "learning_rate": 0.00010035936746233609, "loss": 0.7299, "step": 33744 }, { "epoch": 0.8664769763817942, "grad_norm": 0.78125, "learning_rate": 0.00010035490328911848, "loss": 0.8112, "step": 33745 }, { "epoch": 0.866502653577716, "grad_norm": 0.7734375, "learning_rate": 0.00010035043911519357, "loss": 0.8147, "step": 33746 }, { "epoch": 0.8665283307736379, "grad_norm": 0.859375, "learning_rate": 0.00010034597494057028, "loss": 0.9064, "step": 33747 }, { "epoch": 0.8665540079695597, "grad_norm": 0.8203125, "learning_rate": 0.00010034151076525746, "loss": 0.8624, "step": 33748 }, { "epoch": 0.8665796851654815, "grad_norm": 0.80859375, "learning_rate": 0.00010033704658926406, "loss": 0.6802, "step": 33749 }, { "epoch": 0.8666053623614033, "grad_norm": 0.80078125, "learning_rate": 0.00010033258241259897, "loss": 0.8504, "step": 33750 }, { "epoch": 0.8666310395573251, "grad_norm": 0.78125, "learning_rate": 0.00010032811823527104, "loss": 0.7881, "step": 33751 }, { "epoch": 0.866656716753247, "grad_norm": 0.80078125, "learning_rate": 0.00010032365405728921, "loss": 0.8177, "step": 33752 }, { "epoch": 0.8666823939491688, "grad_norm": 0.90234375, "learning_rate": 0.00010031918987866238, "loss": 0.7079, "step": 33753 }, { "epoch": 0.8667080711450906, "grad_norm": 0.734375, "learning_rate": 0.00010031472569939943, "loss": 0.6867, "step": 33754 }, { "epoch": 0.8667337483410125, "grad_norm": 0.70703125, "learning_rate": 0.00010031026151950926, "loss": 0.6074, "step": 33755 }, { "epoch": 0.8667594255369342, "grad_norm": 0.80859375, "learning_rate": 0.00010030579733900076, "loss": 0.7111, "step": 33756 }, { "epoch": 0.866785102732856, "grad_norm": 0.8125, "learning_rate": 0.00010030133315788283, "loss": 0.9138, "step": 33757 }, { "epoch": 0.8668107799287779, "grad_norm": 0.8515625, "learning_rate": 0.0001002968689761644, "loss": 0.7685, "step": 33758 }, { "epoch": 0.8668364571246997, "grad_norm": 0.80078125, "learning_rate": 0.00010029240479385431, "loss": 0.835, "step": 33759 }, { "epoch": 0.8668621343206215, "grad_norm": 0.91015625, "learning_rate": 0.0001002879406109615, "loss": 0.8112, "step": 33760 }, { "epoch": 0.8668878115165434, "grad_norm": 0.9140625, "learning_rate": 0.00010028347642749483, "loss": 0.9586, "step": 33761 }, { "epoch": 0.8669134887124652, "grad_norm": 0.80078125, "learning_rate": 0.00010027901224346324, "loss": 0.8432, "step": 33762 }, { "epoch": 0.8669391659083869, "grad_norm": 0.76171875, "learning_rate": 0.00010027454805887557, "loss": 0.7927, "step": 33763 }, { "epoch": 0.8669648431043088, "grad_norm": 1.09375, "learning_rate": 0.00010027008387374074, "loss": 0.6705, "step": 33764 }, { "epoch": 0.8669905203002306, "grad_norm": 0.796875, "learning_rate": 0.00010026561968806771, "loss": 0.8856, "step": 33765 }, { "epoch": 0.8670161974961524, "grad_norm": 0.859375, "learning_rate": 0.0001002611555018653, "loss": 0.7255, "step": 33766 }, { "epoch": 0.8670418746920743, "grad_norm": 0.76171875, "learning_rate": 0.00010025669131514243, "loss": 0.706, "step": 33767 }, { "epoch": 0.8670675518879961, "grad_norm": 0.80859375, "learning_rate": 0.00010025222712790802, "loss": 0.8543, "step": 33768 }, { "epoch": 0.8670932290839178, "grad_norm": 0.8671875, "learning_rate": 0.00010024776294017091, "loss": 0.7693, "step": 33769 }, { "epoch": 0.8671189062798397, "grad_norm": 0.765625, "learning_rate": 0.00010024329875194005, "loss": 0.7379, "step": 33770 }, { "epoch": 0.8671445834757615, "grad_norm": 0.83203125, "learning_rate": 0.00010023883456322431, "loss": 0.8112, "step": 33771 }, { "epoch": 0.8671702606716833, "grad_norm": 0.890625, "learning_rate": 0.00010023437037403261, "loss": 0.7935, "step": 33772 }, { "epoch": 0.8671959378676052, "grad_norm": 0.8515625, "learning_rate": 0.00010022990618437383, "loss": 0.9012, "step": 33773 }, { "epoch": 0.867221615063527, "grad_norm": 0.76171875, "learning_rate": 0.00010022544199425682, "loss": 0.8236, "step": 33774 }, { "epoch": 0.8672472922594489, "grad_norm": 0.80859375, "learning_rate": 0.00010022097780369057, "loss": 0.7736, "step": 33775 }, { "epoch": 0.8672729694553706, "grad_norm": 0.81640625, "learning_rate": 0.00010021651361268394, "loss": 0.8178, "step": 33776 }, { "epoch": 0.8672986466512924, "grad_norm": 0.78125, "learning_rate": 0.00010021204942124578, "loss": 0.688, "step": 33777 }, { "epoch": 0.8673243238472143, "grad_norm": 0.85546875, "learning_rate": 0.00010020758522938505, "loss": 0.782, "step": 33778 }, { "epoch": 0.8673500010431361, "grad_norm": 0.8984375, "learning_rate": 0.00010020312103711062, "loss": 0.7529, "step": 33779 }, { "epoch": 0.8673756782390579, "grad_norm": 0.82421875, "learning_rate": 0.00010019865684443138, "loss": 0.765, "step": 33780 }, { "epoch": 0.8674013554349798, "grad_norm": 0.8125, "learning_rate": 0.00010019419265135624, "loss": 0.9677, "step": 33781 }, { "epoch": 0.8674270326309016, "grad_norm": 0.79296875, "learning_rate": 0.00010018972845789408, "loss": 0.9204, "step": 33782 }, { "epoch": 0.8674527098268233, "grad_norm": 0.9453125, "learning_rate": 0.00010018526426405381, "loss": 0.735, "step": 33783 }, { "epoch": 0.8674783870227452, "grad_norm": 0.75, "learning_rate": 0.00010018080006984434, "loss": 0.7939, "step": 33784 }, { "epoch": 0.867504064218667, "grad_norm": 0.78515625, "learning_rate": 0.00010017633587527453, "loss": 0.9264, "step": 33785 }, { "epoch": 0.8675297414145888, "grad_norm": 0.796875, "learning_rate": 0.00010017187168035333, "loss": 0.912, "step": 33786 }, { "epoch": 0.8675554186105107, "grad_norm": 0.82421875, "learning_rate": 0.00010016740748508962, "loss": 0.8498, "step": 33787 }, { "epoch": 0.8675810958064325, "grad_norm": 0.8046875, "learning_rate": 0.00010016294328949223, "loss": 0.9468, "step": 33788 }, { "epoch": 0.8676067730023542, "grad_norm": 0.79296875, "learning_rate": 0.00010015847909357013, "loss": 0.8168, "step": 33789 }, { "epoch": 0.8676324501982761, "grad_norm": 0.76953125, "learning_rate": 0.0001001540148973322, "loss": 0.7546, "step": 33790 }, { "epoch": 0.8676581273941979, "grad_norm": 0.78125, "learning_rate": 0.0001001495507007873, "loss": 0.8173, "step": 33791 }, { "epoch": 0.8676838045901197, "grad_norm": 0.75, "learning_rate": 0.00010014508650394441, "loss": 0.7807, "step": 33792 }, { "epoch": 0.8677094817860416, "grad_norm": 0.83203125, "learning_rate": 0.00010014062230681234, "loss": 0.7302, "step": 33793 }, { "epoch": 0.8677351589819634, "grad_norm": 0.83984375, "learning_rate": 0.00010013615810940005, "loss": 0.7942, "step": 33794 }, { "epoch": 0.8677608361778852, "grad_norm": 0.8828125, "learning_rate": 0.0001001316939117164, "loss": 0.8734, "step": 33795 }, { "epoch": 0.867786513373807, "grad_norm": 0.796875, "learning_rate": 0.00010012722971377026, "loss": 0.8162, "step": 33796 }, { "epoch": 0.8678121905697288, "grad_norm": 0.75390625, "learning_rate": 0.0001001227655155706, "loss": 0.7821, "step": 33797 }, { "epoch": 0.8678378677656506, "grad_norm": 0.90234375, "learning_rate": 0.00010011830131712627, "loss": 0.7108, "step": 33798 }, { "epoch": 0.8678635449615725, "grad_norm": 0.80859375, "learning_rate": 0.0001001138371184462, "loss": 0.7471, "step": 33799 }, { "epoch": 0.8678892221574943, "grad_norm": 0.7578125, "learning_rate": 0.00010010937291953923, "loss": 0.8084, "step": 33800 }, { "epoch": 0.8679148993534161, "grad_norm": 0.7734375, "learning_rate": 0.0001001049087204143, "loss": 0.7591, "step": 33801 }, { "epoch": 0.867940576549338, "grad_norm": 0.8125, "learning_rate": 0.0001001004445210803, "loss": 0.7972, "step": 33802 }, { "epoch": 0.8679662537452597, "grad_norm": 0.78125, "learning_rate": 0.00010009598032154612, "loss": 0.7672, "step": 33803 }, { "epoch": 0.8679919309411815, "grad_norm": 0.6953125, "learning_rate": 0.00010009151612182064, "loss": 0.7097, "step": 33804 }, { "epoch": 0.8680176081371034, "grad_norm": 0.74609375, "learning_rate": 0.0001000870519219128, "loss": 0.7378, "step": 33805 }, { "epoch": 0.8680432853330252, "grad_norm": 0.69921875, "learning_rate": 0.00010008258772183148, "loss": 0.6747, "step": 33806 }, { "epoch": 0.868068962528947, "grad_norm": 0.734375, "learning_rate": 0.00010007812352158556, "loss": 0.7246, "step": 33807 }, { "epoch": 0.8680946397248689, "grad_norm": 0.75390625, "learning_rate": 0.00010007365932118395, "loss": 0.8544, "step": 33808 }, { "epoch": 0.8681203169207906, "grad_norm": 0.82421875, "learning_rate": 0.00010006919512063553, "loss": 0.9641, "step": 33809 }, { "epoch": 0.8681459941167124, "grad_norm": 0.75390625, "learning_rate": 0.00010006473091994921, "loss": 0.7709, "step": 33810 }, { "epoch": 0.8681716713126343, "grad_norm": 0.81640625, "learning_rate": 0.00010006026671913391, "loss": 0.7698, "step": 33811 }, { "epoch": 0.8681973485085561, "grad_norm": 0.76171875, "learning_rate": 0.0001000558025181985, "loss": 0.8435, "step": 33812 }, { "epoch": 0.868223025704478, "grad_norm": 0.8671875, "learning_rate": 0.00010005133831715188, "loss": 0.8995, "step": 33813 }, { "epoch": 0.8682487029003998, "grad_norm": 0.796875, "learning_rate": 0.00010004687411600291, "loss": 0.7179, "step": 33814 }, { "epoch": 0.8682743800963216, "grad_norm": 0.796875, "learning_rate": 0.00010004240991476057, "loss": 0.8529, "step": 33815 }, { "epoch": 0.8683000572922434, "grad_norm": 0.9296875, "learning_rate": 0.0001000379457134337, "loss": 0.9026, "step": 33816 }, { "epoch": 0.8683257344881652, "grad_norm": 0.73828125, "learning_rate": 0.00010003348151203118, "loss": 0.679, "step": 33817 }, { "epoch": 0.868351411684087, "grad_norm": 0.78515625, "learning_rate": 0.00010002901731056197, "loss": 0.7278, "step": 33818 }, { "epoch": 0.8683770888800089, "grad_norm": 0.79296875, "learning_rate": 0.00010002455310903491, "loss": 0.8204, "step": 33819 }, { "epoch": 0.8684027660759307, "grad_norm": 0.7578125, "learning_rate": 0.00010002008890745893, "loss": 0.8067, "step": 33820 }, { "epoch": 0.8684284432718525, "grad_norm": 0.7890625, "learning_rate": 0.00010001562470584292, "loss": 0.7597, "step": 33821 }, { "epoch": 0.8684541204677744, "grad_norm": 0.7734375, "learning_rate": 0.00010001116050419574, "loss": 0.7647, "step": 33822 }, { "epoch": 0.8684797976636961, "grad_norm": 0.8046875, "learning_rate": 0.00010000669630252634, "loss": 0.8634, "step": 33823 }, { "epoch": 0.8685054748596179, "grad_norm": 0.8671875, "learning_rate": 0.00010000223210084361, "loss": 1.0001, "step": 33824 }, { "epoch": 0.8685311520555398, "grad_norm": 0.8515625, "learning_rate": 9.999776789915641e-05, "loss": 0.8899, "step": 33825 }, { "epoch": 0.8685568292514616, "grad_norm": 0.7421875, "learning_rate": 9.999330369747365e-05, "loss": 0.8175, "step": 33826 }, { "epoch": 0.8685825064473834, "grad_norm": 0.81640625, "learning_rate": 9.998883949580427e-05, "loss": 0.7803, "step": 33827 }, { "epoch": 0.8686081836433053, "grad_norm": 0.7578125, "learning_rate": 9.998437529415713e-05, "loss": 0.8173, "step": 33828 }, { "epoch": 0.868633860839227, "grad_norm": 0.7109375, "learning_rate": 9.997991109254109e-05, "loss": 0.8522, "step": 33829 }, { "epoch": 0.8686595380351488, "grad_norm": 0.85546875, "learning_rate": 9.99754468909651e-05, "loss": 0.9029, "step": 33830 }, { "epoch": 0.8686852152310707, "grad_norm": 0.7890625, "learning_rate": 9.997098268943804e-05, "loss": 0.7486, "step": 33831 }, { "epoch": 0.8687108924269925, "grad_norm": 0.80078125, "learning_rate": 9.996651848796884e-05, "loss": 0.837, "step": 33832 }, { "epoch": 0.8687365696229143, "grad_norm": 0.80078125, "learning_rate": 9.99620542865663e-05, "loss": 0.8335, "step": 33833 }, { "epoch": 0.8687622468188362, "grad_norm": 0.7578125, "learning_rate": 9.995759008523947e-05, "loss": 0.7603, "step": 33834 }, { "epoch": 0.868787924014758, "grad_norm": 0.765625, "learning_rate": 9.995312588399711e-05, "loss": 0.7205, "step": 33835 }, { "epoch": 0.8688136012106797, "grad_norm": 0.7890625, "learning_rate": 9.994866168284816e-05, "loss": 0.8225, "step": 33836 }, { "epoch": 0.8688392784066016, "grad_norm": 0.8125, "learning_rate": 9.994419748180155e-05, "loss": 0.7681, "step": 33837 }, { "epoch": 0.8688649556025234, "grad_norm": 0.78515625, "learning_rate": 9.993973328086611e-05, "loss": 0.8311, "step": 33838 }, { "epoch": 0.8688906327984453, "grad_norm": 0.76171875, "learning_rate": 9.993526908005081e-05, "loss": 0.7696, "step": 33839 }, { "epoch": 0.8689163099943671, "grad_norm": 0.81640625, "learning_rate": 9.993080487936448e-05, "loss": 0.8626, "step": 33840 }, { "epoch": 0.8689419871902889, "grad_norm": 0.8125, "learning_rate": 9.992634067881608e-05, "loss": 0.7796, "step": 33841 }, { "epoch": 0.8689676643862108, "grad_norm": 0.859375, "learning_rate": 9.992187647841449e-05, "loss": 0.8077, "step": 33842 }, { "epoch": 0.8689933415821325, "grad_norm": 0.828125, "learning_rate": 9.991741227816855e-05, "loss": 0.691, "step": 33843 }, { "epoch": 0.8690190187780543, "grad_norm": 0.828125, "learning_rate": 9.991294807808722e-05, "loss": 0.9269, "step": 33844 }, { "epoch": 0.8690446959739762, "grad_norm": 0.71875, "learning_rate": 9.990848387817937e-05, "loss": 0.694, "step": 33845 }, { "epoch": 0.869070373169898, "grad_norm": 0.796875, "learning_rate": 9.99040196784539e-05, "loss": 0.8903, "step": 33846 }, { "epoch": 0.8690960503658198, "grad_norm": 0.8515625, "learning_rate": 9.989955547891971e-05, "loss": 0.848, "step": 33847 }, { "epoch": 0.8691217275617417, "grad_norm": 0.8125, "learning_rate": 9.989509127958574e-05, "loss": 0.7426, "step": 33848 }, { "epoch": 0.8691474047576634, "grad_norm": 0.79296875, "learning_rate": 9.989062708046081e-05, "loss": 0.8282, "step": 33849 }, { "epoch": 0.8691730819535852, "grad_norm": 0.73046875, "learning_rate": 9.988616288155384e-05, "loss": 0.8011, "step": 33850 }, { "epoch": 0.8691987591495071, "grad_norm": 0.77734375, "learning_rate": 9.988169868287375e-05, "loss": 0.8131, "step": 33851 }, { "epoch": 0.8692244363454289, "grad_norm": 0.8515625, "learning_rate": 9.987723448442941e-05, "loss": 0.7411, "step": 33852 }, { "epoch": 0.8692501135413507, "grad_norm": 0.859375, "learning_rate": 9.987277028622976e-05, "loss": 0.7912, "step": 33853 }, { "epoch": 0.8692757907372726, "grad_norm": 0.81640625, "learning_rate": 9.986830608828364e-05, "loss": 0.897, "step": 33854 }, { "epoch": 0.8693014679331944, "grad_norm": 0.78125, "learning_rate": 9.986384189059999e-05, "loss": 0.663, "step": 33855 }, { "epoch": 0.8693271451291161, "grad_norm": 0.75390625, "learning_rate": 9.98593776931877e-05, "loss": 0.8348, "step": 33856 }, { "epoch": 0.869352822325038, "grad_norm": 0.828125, "learning_rate": 9.98549134960556e-05, "loss": 0.8212, "step": 33857 }, { "epoch": 0.8693784995209598, "grad_norm": 0.80859375, "learning_rate": 9.985044929921272e-05, "loss": 0.7368, "step": 33858 }, { "epoch": 0.8694041767168816, "grad_norm": 0.76171875, "learning_rate": 9.984598510266783e-05, "loss": 0.8394, "step": 33859 }, { "epoch": 0.8694298539128035, "grad_norm": 0.7421875, "learning_rate": 9.98415209064299e-05, "loss": 0.8213, "step": 33860 }, { "epoch": 0.8694555311087253, "grad_norm": 0.7578125, "learning_rate": 9.983705671050783e-05, "loss": 0.818, "step": 33861 }, { "epoch": 0.8694812083046471, "grad_norm": 0.8125, "learning_rate": 9.983259251491042e-05, "loss": 0.917, "step": 33862 }, { "epoch": 0.8695068855005689, "grad_norm": 0.80078125, "learning_rate": 9.982812831964669e-05, "loss": 0.7449, "step": 33863 }, { "epoch": 0.8695325626964907, "grad_norm": 0.83984375, "learning_rate": 9.982366412472547e-05, "loss": 0.7509, "step": 33864 }, { "epoch": 0.8695582398924125, "grad_norm": 0.81640625, "learning_rate": 9.981919993015567e-05, "loss": 0.8608, "step": 33865 }, { "epoch": 0.8695839170883344, "grad_norm": 0.96484375, "learning_rate": 9.981473573594618e-05, "loss": 0.8049, "step": 33866 }, { "epoch": 0.8696095942842562, "grad_norm": 0.88671875, "learning_rate": 9.981027154210595e-05, "loss": 0.9434, "step": 33867 }, { "epoch": 0.869635271480178, "grad_norm": 0.7890625, "learning_rate": 9.98058073486438e-05, "loss": 0.8305, "step": 33868 }, { "epoch": 0.8696609486760998, "grad_norm": 0.83984375, "learning_rate": 9.980134315556863e-05, "loss": 0.818, "step": 33869 }, { "epoch": 0.8696866258720216, "grad_norm": 0.79296875, "learning_rate": 9.97968789628894e-05, "loss": 0.7997, "step": 33870 }, { "epoch": 0.8697123030679434, "grad_norm": 0.796875, "learning_rate": 9.979241477061496e-05, "loss": 0.8209, "step": 33871 }, { "epoch": 0.8697379802638653, "grad_norm": 0.79296875, "learning_rate": 9.978795057875426e-05, "loss": 0.6936, "step": 33872 }, { "epoch": 0.8697636574597871, "grad_norm": 0.796875, "learning_rate": 9.978348638731607e-05, "loss": 0.7329, "step": 33873 }, { "epoch": 0.869789334655709, "grad_norm": 0.8515625, "learning_rate": 9.977902219630944e-05, "loss": 0.7569, "step": 33874 }, { "epoch": 0.8698150118516308, "grad_norm": 0.75390625, "learning_rate": 9.977455800574319e-05, "loss": 0.7119, "step": 33875 }, { "epoch": 0.8698406890475525, "grad_norm": 0.921875, "learning_rate": 9.97700938156262e-05, "loss": 0.7527, "step": 33876 }, { "epoch": 0.8698663662434744, "grad_norm": 0.796875, "learning_rate": 9.976562962596744e-05, "loss": 0.8445, "step": 33877 }, { "epoch": 0.8698920434393962, "grad_norm": 0.7890625, "learning_rate": 9.976116543677571e-05, "loss": 0.8255, "step": 33878 }, { "epoch": 0.869917720635318, "grad_norm": 0.81640625, "learning_rate": 9.975670124805997e-05, "loss": 0.9914, "step": 33879 }, { "epoch": 0.8699433978312399, "grad_norm": 0.74609375, "learning_rate": 9.975223705982909e-05, "loss": 0.7788, "step": 33880 }, { "epoch": 0.8699690750271617, "grad_norm": 0.81640625, "learning_rate": 9.9747772872092e-05, "loss": 0.8252, "step": 33881 }, { "epoch": 0.8699947522230834, "grad_norm": 0.75390625, "learning_rate": 9.974330868485759e-05, "loss": 0.7994, "step": 33882 }, { "epoch": 0.8700204294190053, "grad_norm": 0.83984375, "learning_rate": 9.973884449813471e-05, "loss": 0.7781, "step": 33883 }, { "epoch": 0.8700461066149271, "grad_norm": 0.8046875, "learning_rate": 9.97343803119323e-05, "loss": 0.851, "step": 33884 }, { "epoch": 0.8700717838108489, "grad_norm": 0.72265625, "learning_rate": 9.972991612625924e-05, "loss": 0.7481, "step": 33885 }, { "epoch": 0.8700974610067708, "grad_norm": 0.79296875, "learning_rate": 9.972545194112444e-05, "loss": 0.8697, "step": 33886 }, { "epoch": 0.8701231382026926, "grad_norm": 0.859375, "learning_rate": 9.972098775653682e-05, "loss": 0.7987, "step": 33887 }, { "epoch": 0.8701488153986144, "grad_norm": 0.78515625, "learning_rate": 9.971652357250521e-05, "loss": 0.7739, "step": 33888 }, { "epoch": 0.8701744925945362, "grad_norm": 0.80859375, "learning_rate": 9.971205938903855e-05, "loss": 0.6997, "step": 33889 }, { "epoch": 0.870200169790458, "grad_norm": 0.86328125, "learning_rate": 9.970759520614571e-05, "loss": 0.9154, "step": 33890 }, { "epoch": 0.8702258469863798, "grad_norm": 0.72265625, "learning_rate": 9.970313102383562e-05, "loss": 0.6962, "step": 33891 }, { "epoch": 0.8702515241823017, "grad_norm": 0.8046875, "learning_rate": 9.969866684211716e-05, "loss": 0.7766, "step": 33892 }, { "epoch": 0.8702772013782235, "grad_norm": 0.7734375, "learning_rate": 9.969420266099925e-05, "loss": 0.8263, "step": 33893 }, { "epoch": 0.8703028785741453, "grad_norm": 0.83203125, "learning_rate": 9.968973848049078e-05, "loss": 0.8562, "step": 33894 }, { "epoch": 0.8703285557700672, "grad_norm": 0.8515625, "learning_rate": 9.968527430060059e-05, "loss": 0.8436, "step": 33895 }, { "epoch": 0.8703542329659889, "grad_norm": 0.734375, "learning_rate": 9.968081012133763e-05, "loss": 0.8107, "step": 33896 }, { "epoch": 0.8703799101619107, "grad_norm": 0.78125, "learning_rate": 9.967634594271078e-05, "loss": 0.7247, "step": 33897 }, { "epoch": 0.8704055873578326, "grad_norm": 0.84765625, "learning_rate": 9.9671881764729e-05, "loss": 0.9047, "step": 33898 }, { "epoch": 0.8704312645537544, "grad_norm": 0.75, "learning_rate": 9.966741758740105e-05, "loss": 0.6583, "step": 33899 }, { "epoch": 0.8704569417496762, "grad_norm": 0.77734375, "learning_rate": 9.966295341073596e-05, "loss": 0.8093, "step": 33900 }, { "epoch": 0.8704826189455981, "grad_norm": 0.7734375, "learning_rate": 9.965848923474258e-05, "loss": 0.8615, "step": 33901 }, { "epoch": 0.8705082961415198, "grad_norm": 0.8671875, "learning_rate": 9.965402505942973e-05, "loss": 0.972, "step": 33902 }, { "epoch": 0.8705339733374416, "grad_norm": 0.765625, "learning_rate": 9.964956088480646e-05, "loss": 0.8374, "step": 33903 }, { "epoch": 0.8705596505333635, "grad_norm": 0.76953125, "learning_rate": 9.964509671088153e-05, "loss": 0.8092, "step": 33904 }, { "epoch": 0.8705853277292853, "grad_norm": 0.7890625, "learning_rate": 9.964063253766392e-05, "loss": 0.7475, "step": 33905 }, { "epoch": 0.8706110049252072, "grad_norm": 0.765625, "learning_rate": 9.963616836516246e-05, "loss": 0.9008, "step": 33906 }, { "epoch": 0.870636682121129, "grad_norm": 0.76953125, "learning_rate": 9.963170419338612e-05, "loss": 0.8799, "step": 33907 }, { "epoch": 0.8706623593170508, "grad_norm": 0.796875, "learning_rate": 9.962724002234377e-05, "loss": 0.783, "step": 33908 }, { "epoch": 0.8706880365129726, "grad_norm": 0.7578125, "learning_rate": 9.962277585204426e-05, "loss": 0.8408, "step": 33909 }, { "epoch": 0.8707137137088944, "grad_norm": 0.80859375, "learning_rate": 9.961831168249655e-05, "loss": 0.8837, "step": 33910 }, { "epoch": 0.8707393909048162, "grad_norm": 0.8203125, "learning_rate": 9.961384751370948e-05, "loss": 0.8216, "step": 33911 }, { "epoch": 0.8707650681007381, "grad_norm": 0.76953125, "learning_rate": 9.960938334569202e-05, "loss": 0.7251, "step": 33912 }, { "epoch": 0.8707907452966599, "grad_norm": 0.78125, "learning_rate": 9.960491917845296e-05, "loss": 0.7586, "step": 33913 }, { "epoch": 0.8708164224925817, "grad_norm": 0.81640625, "learning_rate": 9.960045501200134e-05, "loss": 0.8009, "step": 33914 }, { "epoch": 0.8708420996885036, "grad_norm": 0.984375, "learning_rate": 9.959599084634593e-05, "loss": 0.8014, "step": 33915 }, { "epoch": 0.8708677768844253, "grad_norm": 0.81640625, "learning_rate": 9.959152668149567e-05, "loss": 0.7514, "step": 33916 }, { "epoch": 0.8708934540803471, "grad_norm": 0.78515625, "learning_rate": 9.958706251745949e-05, "loss": 0.7558, "step": 33917 }, { "epoch": 0.870919131276269, "grad_norm": 0.82421875, "learning_rate": 9.95825983542462e-05, "loss": 0.9098, "step": 33918 }, { "epoch": 0.8709448084721908, "grad_norm": 0.90234375, "learning_rate": 9.957813419186483e-05, "loss": 0.884, "step": 33919 }, { "epoch": 0.8709704856681126, "grad_norm": 0.76953125, "learning_rate": 9.957367003032417e-05, "loss": 0.8379, "step": 33920 }, { "epoch": 0.8709961628640345, "grad_norm": 0.73828125, "learning_rate": 9.956920586963312e-05, "loss": 0.7773, "step": 33921 }, { "epoch": 0.8710218400599562, "grad_norm": 0.796875, "learning_rate": 9.956474170980066e-05, "loss": 0.8437, "step": 33922 }, { "epoch": 0.871047517255878, "grad_norm": 0.8359375, "learning_rate": 9.956027755083556e-05, "loss": 0.7769, "step": 33923 }, { "epoch": 0.8710731944517999, "grad_norm": 0.74609375, "learning_rate": 9.955581339274684e-05, "loss": 0.745, "step": 33924 }, { "epoch": 0.8710988716477217, "grad_norm": 0.7578125, "learning_rate": 9.95513492355433e-05, "loss": 0.8886, "step": 33925 }, { "epoch": 0.8711245488436435, "grad_norm": 0.78125, "learning_rate": 9.954688507923391e-05, "loss": 0.7842, "step": 33926 }, { "epoch": 0.8711502260395654, "grad_norm": 0.8125, "learning_rate": 9.954242092382758e-05, "loss": 0.8284, "step": 33927 }, { "epoch": 0.8711759032354872, "grad_norm": 0.84765625, "learning_rate": 9.953795676933309e-05, "loss": 0.8068, "step": 33928 }, { "epoch": 0.8712015804314089, "grad_norm": 0.75390625, "learning_rate": 9.953349261575943e-05, "loss": 0.7672, "step": 33929 }, { "epoch": 0.8712272576273308, "grad_norm": 0.74609375, "learning_rate": 9.952902846311548e-05, "loss": 0.8168, "step": 33930 }, { "epoch": 0.8712529348232526, "grad_norm": 0.7265625, "learning_rate": 9.952456431141015e-05, "loss": 0.7648, "step": 33931 }, { "epoch": 0.8712786120191744, "grad_norm": 0.79296875, "learning_rate": 9.952010016065228e-05, "loss": 0.8716, "step": 33932 }, { "epoch": 0.8713042892150963, "grad_norm": 0.86328125, "learning_rate": 9.951563601085086e-05, "loss": 0.7107, "step": 33933 }, { "epoch": 0.8713299664110181, "grad_norm": 0.7890625, "learning_rate": 9.951117186201472e-05, "loss": 0.8387, "step": 33934 }, { "epoch": 0.87135564360694, "grad_norm": 0.8828125, "learning_rate": 9.950670771415275e-05, "loss": 0.7807, "step": 33935 }, { "epoch": 0.8713813208028617, "grad_norm": 0.84375, "learning_rate": 9.950224356727388e-05, "loss": 0.8453, "step": 33936 }, { "epoch": 0.8714069979987835, "grad_norm": 0.72265625, "learning_rate": 9.949777942138697e-05, "loss": 0.8393, "step": 33937 }, { "epoch": 0.8714326751947054, "grad_norm": 0.78515625, "learning_rate": 9.949331527650098e-05, "loss": 0.7919, "step": 33938 }, { "epoch": 0.8714583523906272, "grad_norm": 0.78515625, "learning_rate": 9.948885113262474e-05, "loss": 0.8369, "step": 33939 }, { "epoch": 0.871484029586549, "grad_norm": 0.78125, "learning_rate": 9.948438698976719e-05, "loss": 0.7536, "step": 33940 }, { "epoch": 0.8715097067824709, "grad_norm": 0.765625, "learning_rate": 9.947992284793723e-05, "loss": 0.7797, "step": 33941 }, { "epoch": 0.8715353839783926, "grad_norm": 0.82421875, "learning_rate": 9.947545870714366e-05, "loss": 0.9116, "step": 33942 }, { "epoch": 0.8715610611743144, "grad_norm": 0.81640625, "learning_rate": 9.947099456739553e-05, "loss": 0.8074, "step": 33943 }, { "epoch": 0.8715867383702363, "grad_norm": 0.87890625, "learning_rate": 9.946653042870161e-05, "loss": 0.8638, "step": 33944 }, { "epoch": 0.8716124155661581, "grad_norm": 0.79296875, "learning_rate": 9.946206629107088e-05, "loss": 0.8264, "step": 33945 }, { "epoch": 0.8716380927620799, "grad_norm": 0.76953125, "learning_rate": 9.945760215451216e-05, "loss": 0.7279, "step": 33946 }, { "epoch": 0.8716637699580018, "grad_norm": 0.80859375, "learning_rate": 9.945313801903443e-05, "loss": 0.8413, "step": 33947 }, { "epoch": 0.8716894471539236, "grad_norm": 0.77734375, "learning_rate": 9.944867388464657e-05, "loss": 0.8007, "step": 33948 }, { "epoch": 0.8717151243498453, "grad_norm": 0.8046875, "learning_rate": 9.944420975135742e-05, "loss": 0.7267, "step": 33949 }, { "epoch": 0.8717408015457672, "grad_norm": 0.7265625, "learning_rate": 9.943974561917589e-05, "loss": 0.6926, "step": 33950 }, { "epoch": 0.871766478741689, "grad_norm": 0.80078125, "learning_rate": 9.943528148811091e-05, "loss": 0.7843, "step": 33951 }, { "epoch": 0.8717921559376108, "grad_norm": 0.859375, "learning_rate": 9.94308173581714e-05, "loss": 0.8803, "step": 33952 }, { "epoch": 0.8718178331335327, "grad_norm": 0.7890625, "learning_rate": 9.942635322936619e-05, "loss": 0.8855, "step": 33953 }, { "epoch": 0.8718435103294545, "grad_norm": 0.80078125, "learning_rate": 9.942188910170419e-05, "loss": 0.6861, "step": 33954 }, { "epoch": 0.8718691875253763, "grad_norm": 0.78125, "learning_rate": 9.941742497519433e-05, "loss": 0.7164, "step": 33955 }, { "epoch": 0.8718948647212981, "grad_norm": 0.7421875, "learning_rate": 9.941296084984546e-05, "loss": 0.6083, "step": 33956 }, { "epoch": 0.8719205419172199, "grad_norm": 0.76171875, "learning_rate": 9.940849672566657e-05, "loss": 0.849, "step": 33957 }, { "epoch": 0.8719462191131417, "grad_norm": 0.828125, "learning_rate": 9.940403260266642e-05, "loss": 0.8539, "step": 33958 }, { "epoch": 0.8719718963090636, "grad_norm": 0.76953125, "learning_rate": 9.939956848085404e-05, "loss": 0.732, "step": 33959 }, { "epoch": 0.8719975735049854, "grad_norm": 0.83203125, "learning_rate": 9.939510436023826e-05, "loss": 0.7268, "step": 33960 }, { "epoch": 0.8720232507009072, "grad_norm": 0.75, "learning_rate": 9.939064024082793e-05, "loss": 0.7289, "step": 33961 }, { "epoch": 0.872048927896829, "grad_norm": 0.75390625, "learning_rate": 9.938617612263206e-05, "loss": 0.7051, "step": 33962 }, { "epoch": 0.8720746050927508, "grad_norm": 0.73828125, "learning_rate": 9.938171200565946e-05, "loss": 0.8264, "step": 33963 }, { "epoch": 0.8721002822886726, "grad_norm": 0.734375, "learning_rate": 9.937724788991905e-05, "loss": 0.7569, "step": 33964 }, { "epoch": 0.8721259594845945, "grad_norm": 0.765625, "learning_rate": 9.937278377541971e-05, "loss": 0.8359, "step": 33965 }, { "epoch": 0.8721516366805163, "grad_norm": 0.76953125, "learning_rate": 9.936831966217038e-05, "loss": 0.6583, "step": 33966 }, { "epoch": 0.8721773138764382, "grad_norm": 0.7421875, "learning_rate": 9.936385555017995e-05, "loss": 0.8218, "step": 33967 }, { "epoch": 0.87220299107236, "grad_norm": 0.7578125, "learning_rate": 9.935939143945726e-05, "loss": 0.9379, "step": 33968 }, { "epoch": 0.8722286682682817, "grad_norm": 0.94921875, "learning_rate": 9.935492733001127e-05, "loss": 0.864, "step": 33969 }, { "epoch": 0.8722543454642036, "grad_norm": 0.80078125, "learning_rate": 9.935046322185083e-05, "loss": 0.8021, "step": 33970 }, { "epoch": 0.8722800226601254, "grad_norm": 0.80859375, "learning_rate": 9.934599911498487e-05, "loss": 0.8271, "step": 33971 }, { "epoch": 0.8723056998560472, "grad_norm": 0.75, "learning_rate": 9.934153500942225e-05, "loss": 0.7345, "step": 33972 }, { "epoch": 0.8723313770519691, "grad_norm": 0.94140625, "learning_rate": 9.933707090517195e-05, "loss": 0.7958, "step": 33973 }, { "epoch": 0.8723570542478909, "grad_norm": 0.91796875, "learning_rate": 9.933260680224276e-05, "loss": 0.9048, "step": 33974 }, { "epoch": 0.8723827314438127, "grad_norm": 0.77734375, "learning_rate": 9.932814270064363e-05, "loss": 0.8337, "step": 33975 }, { "epoch": 0.8724084086397345, "grad_norm": 0.7890625, "learning_rate": 9.932367860038346e-05, "loss": 0.6949, "step": 33976 }, { "epoch": 0.8724340858356563, "grad_norm": 0.80078125, "learning_rate": 9.931921450147113e-05, "loss": 0.6667, "step": 33977 }, { "epoch": 0.8724597630315781, "grad_norm": 0.734375, "learning_rate": 9.931475040391557e-05, "loss": 0.7417, "step": 33978 }, { "epoch": 0.8724854402275, "grad_norm": 0.74609375, "learning_rate": 9.931028630772562e-05, "loss": 0.8293, "step": 33979 }, { "epoch": 0.8725111174234218, "grad_norm": 0.84375, "learning_rate": 9.930582221291021e-05, "loss": 0.762, "step": 33980 }, { "epoch": 0.8725367946193436, "grad_norm": 0.78125, "learning_rate": 9.930135811947825e-05, "loss": 0.7521, "step": 33981 }, { "epoch": 0.8725624718152654, "grad_norm": 0.77734375, "learning_rate": 9.929689402743861e-05, "loss": 0.7087, "step": 33982 }, { "epoch": 0.8725881490111872, "grad_norm": 0.765625, "learning_rate": 9.929242993680022e-05, "loss": 0.8726, "step": 33983 }, { "epoch": 0.872613826207109, "grad_norm": 0.796875, "learning_rate": 9.928796584757191e-05, "loss": 0.769, "step": 33984 }, { "epoch": 0.8726395034030309, "grad_norm": 0.78125, "learning_rate": 9.928350175976264e-05, "loss": 0.8426, "step": 33985 }, { "epoch": 0.8726651805989527, "grad_norm": 0.8515625, "learning_rate": 9.927903767338131e-05, "loss": 0.7839, "step": 33986 }, { "epoch": 0.8726908577948745, "grad_norm": 0.78515625, "learning_rate": 9.927457358843674e-05, "loss": 0.7055, "step": 33987 }, { "epoch": 0.8727165349907964, "grad_norm": 0.828125, "learning_rate": 9.927010950493793e-05, "loss": 0.7774, "step": 33988 }, { "epoch": 0.8727422121867181, "grad_norm": 0.75, "learning_rate": 9.92656454228937e-05, "loss": 0.7367, "step": 33989 }, { "epoch": 0.8727678893826399, "grad_norm": 0.76171875, "learning_rate": 9.926118134231298e-05, "loss": 0.7824, "step": 33990 }, { "epoch": 0.8727935665785618, "grad_norm": 0.84375, "learning_rate": 9.925671726320464e-05, "loss": 0.7697, "step": 33991 }, { "epoch": 0.8728192437744836, "grad_norm": 0.8203125, "learning_rate": 9.925225318557764e-05, "loss": 0.6766, "step": 33992 }, { "epoch": 0.8728449209704054, "grad_norm": 0.7421875, "learning_rate": 9.92477891094408e-05, "loss": 0.7869, "step": 33993 }, { "epoch": 0.8728705981663273, "grad_norm": 0.7578125, "learning_rate": 9.924332503480304e-05, "loss": 0.7402, "step": 33994 }, { "epoch": 0.8728962753622491, "grad_norm": 0.80859375, "learning_rate": 9.923886096167328e-05, "loss": 0.772, "step": 33995 }, { "epoch": 0.8729219525581708, "grad_norm": 0.76953125, "learning_rate": 9.923439689006037e-05, "loss": 0.7367, "step": 33996 }, { "epoch": 0.8729476297540927, "grad_norm": 0.7578125, "learning_rate": 9.92299328199733e-05, "loss": 0.7693, "step": 33997 }, { "epoch": 0.8729733069500145, "grad_norm": 0.83984375, "learning_rate": 9.922546875142083e-05, "loss": 0.9476, "step": 33998 }, { "epoch": 0.8729989841459364, "grad_norm": 0.7421875, "learning_rate": 9.9221004684412e-05, "loss": 0.8536, "step": 33999 }, { "epoch": 0.8730246613418582, "grad_norm": 0.76171875, "learning_rate": 9.921654061895563e-05, "loss": 0.873, "step": 34000 }, { "epoch": 0.8730246613418582, "eval_loss": 0.7939178943634033, "eval_runtime": 351.5162, "eval_samples_per_second": 28.448, "eval_steps_per_second": 0.89, "step": 34000 }, { "epoch": 0.9003331616170857, "grad_norm": 0.9375, "learning_rate": 9.921207655506057e-05, "loss": 0.8261, "step": 34001 }, { "epoch": 0.90035964122538, "grad_norm": 0.77734375, "learning_rate": 9.37613815364211e-05, "loss": 0.7983, "step": 34002 }, { "epoch": 0.9003861208336743, "grad_norm": 0.75390625, "learning_rate": 9.375678677003054e-05, "loss": 0.7575, "step": 34003 }, { "epoch": 0.9004126004419687, "grad_norm": 0.83203125, "learning_rate": 9.375219201687211e-05, "loss": 0.8773, "step": 34004 }, { "epoch": 0.9004390800502631, "grad_norm": 0.78125, "learning_rate": 9.374759727695556e-05, "loss": 0.7975, "step": 34005 }, { "epoch": 0.9004655596585575, "grad_norm": 0.82421875, "learning_rate": 9.374300255029065e-05, "loss": 0.9247, "step": 34006 }, { "epoch": 0.9004920392668518, "grad_norm": 0.79296875, "learning_rate": 9.373840783688708e-05, "loss": 0.8328, "step": 34007 }, { "epoch": 0.9005185188751462, "grad_norm": 0.78125, "learning_rate": 9.373381313675459e-05, "loss": 0.7105, "step": 34008 }, { "epoch": 0.9005449984834406, "grad_norm": 0.84765625, "learning_rate": 9.372921844990291e-05, "loss": 0.8798, "step": 34009 }, { "epoch": 0.900571478091735, "grad_norm": 0.8359375, "learning_rate": 9.372462377634182e-05, "loss": 0.8332, "step": 34010 }, { "epoch": 0.9005979577000294, "grad_norm": 0.76953125, "learning_rate": 9.372002911608101e-05, "loss": 0.9123, "step": 34011 }, { "epoch": 0.9006244373083238, "grad_norm": 0.80078125, "learning_rate": 9.371543446913026e-05, "loss": 0.7355, "step": 34012 }, { "epoch": 0.9006509169166181, "grad_norm": 0.7265625, "learning_rate": 9.371083983549928e-05, "loss": 0.7831, "step": 34013 }, { "epoch": 0.9006773965249125, "grad_norm": 0.83203125, "learning_rate": 9.370624521519776e-05, "loss": 0.7978, "step": 34014 }, { "epoch": 0.9007038761332069, "grad_norm": 0.80859375, "learning_rate": 9.370165060823556e-05, "loss": 0.9161, "step": 34015 }, { "epoch": 0.9007303557415013, "grad_norm": 0.73046875, "learning_rate": 9.369705601462234e-05, "loss": 0.6855, "step": 34016 }, { "epoch": 0.9007568353497957, "grad_norm": 0.81640625, "learning_rate": 9.369246143436784e-05, "loss": 0.7878, "step": 34017 }, { "epoch": 0.90078331495809, "grad_norm": 0.82421875, "learning_rate": 9.368786686748182e-05, "loss": 0.8564, "step": 34018 }, { "epoch": 0.9008097945663843, "grad_norm": 0.8359375, "learning_rate": 9.368327231397396e-05, "loss": 0.826, "step": 34019 }, { "epoch": 0.9008362741746787, "grad_norm": 0.73828125, "learning_rate": 9.367867777385408e-05, "loss": 0.8193, "step": 34020 }, { "epoch": 0.9008627537829731, "grad_norm": 0.8515625, "learning_rate": 9.36740832471319e-05, "loss": 0.839, "step": 34021 }, { "epoch": 0.9008892333912675, "grad_norm": 0.76953125, "learning_rate": 9.366948873381713e-05, "loss": 0.8047, "step": 34022 }, { "epoch": 0.9009157129995619, "grad_norm": 0.765625, "learning_rate": 9.366489423391951e-05, "loss": 0.8113, "step": 34023 }, { "epoch": 0.9009421926078562, "grad_norm": 0.8125, "learning_rate": 9.366029974744874e-05, "loss": 0.7647, "step": 34024 }, { "epoch": 0.9009686722161506, "grad_norm": 0.765625, "learning_rate": 9.365570527441465e-05, "loss": 0.9213, "step": 34025 }, { "epoch": 0.900995151824445, "grad_norm": 0.80078125, "learning_rate": 9.365111081482692e-05, "loss": 0.9405, "step": 34026 }, { "epoch": 0.9010216314327394, "grad_norm": 0.77734375, "learning_rate": 9.364651636869531e-05, "loss": 0.8492, "step": 34027 }, { "epoch": 0.9010481110410338, "grad_norm": 0.73828125, "learning_rate": 9.364192193602955e-05, "loss": 0.979, "step": 34028 }, { "epoch": 0.9010745906493282, "grad_norm": 0.80859375, "learning_rate": 9.363732751683934e-05, "loss": 0.8707, "step": 34029 }, { "epoch": 0.9011010702576225, "grad_norm": 0.8515625, "learning_rate": 9.363273311113448e-05, "loss": 0.7831, "step": 34030 }, { "epoch": 0.9011275498659169, "grad_norm": 0.7734375, "learning_rate": 9.362813871892467e-05, "loss": 0.7289, "step": 34031 }, { "epoch": 0.9011540294742113, "grad_norm": 0.8125, "learning_rate": 9.362354434021968e-05, "loss": 0.856, "step": 34032 }, { "epoch": 0.9011805090825057, "grad_norm": 0.80859375, "learning_rate": 9.36189499750292e-05, "loss": 0.7669, "step": 34033 }, { "epoch": 0.9012069886908001, "grad_norm": 0.8203125, "learning_rate": 9.361435562336294e-05, "loss": 0.8075, "step": 34034 }, { "epoch": 0.9012334682990943, "grad_norm": 0.8359375, "learning_rate": 9.360976128523074e-05, "loss": 0.9438, "step": 34035 }, { "epoch": 0.9012599479073887, "grad_norm": 0.90625, "learning_rate": 9.36051669606423e-05, "loss": 0.7972, "step": 34036 }, { "epoch": 0.9012864275156831, "grad_norm": 0.77734375, "learning_rate": 9.360057264960732e-05, "loss": 0.7387, "step": 34037 }, { "epoch": 0.9013129071239775, "grad_norm": 0.8125, "learning_rate": 9.359597835213558e-05, "loss": 0.8335, "step": 34038 }, { "epoch": 0.9013393867322719, "grad_norm": 0.76171875, "learning_rate": 9.359138406823675e-05, "loss": 0.794, "step": 34039 }, { "epoch": 0.9013658663405663, "grad_norm": 0.734375, "learning_rate": 9.358678979792065e-05, "loss": 0.6944, "step": 34040 }, { "epoch": 0.9013923459488606, "grad_norm": 0.7421875, "learning_rate": 9.358219554119698e-05, "loss": 0.8513, "step": 34041 }, { "epoch": 0.901418825557155, "grad_norm": 0.7890625, "learning_rate": 9.357760129807549e-05, "loss": 0.7877, "step": 34042 }, { "epoch": 0.9014453051654494, "grad_norm": 0.8125, "learning_rate": 9.357300706856591e-05, "loss": 0.8157, "step": 34043 }, { "epoch": 0.9014717847737438, "grad_norm": 0.76171875, "learning_rate": 9.356841285267792e-05, "loss": 0.8114, "step": 34044 }, { "epoch": 0.9014982643820382, "grad_norm": 0.7734375, "learning_rate": 9.356381865042136e-05, "loss": 0.78, "step": 34045 }, { "epoch": 0.9015247439903326, "grad_norm": 0.71484375, "learning_rate": 9.355922446180593e-05, "loss": 0.7193, "step": 34046 }, { "epoch": 0.9015512235986269, "grad_norm": 0.81640625, "learning_rate": 9.355463028684133e-05, "loss": 0.8143, "step": 34047 }, { "epoch": 0.9015777032069213, "grad_norm": 0.73046875, "learning_rate": 9.355003612553735e-05, "loss": 0.6852, "step": 34048 }, { "epoch": 0.9016041828152157, "grad_norm": 0.734375, "learning_rate": 9.354544197790366e-05, "loss": 0.9107, "step": 34049 }, { "epoch": 0.9016306624235101, "grad_norm": 0.796875, "learning_rate": 9.354084784395006e-05, "loss": 0.7172, "step": 34050 }, { "epoch": 0.9016571420318045, "grad_norm": 0.82421875, "learning_rate": 9.353625372368627e-05, "loss": 0.8369, "step": 34051 }, { "epoch": 0.9016836216400987, "grad_norm": 0.7890625, "learning_rate": 9.353165961712201e-05, "loss": 0.7303, "step": 34052 }, { "epoch": 0.9017101012483931, "grad_norm": 0.734375, "learning_rate": 9.352706552426702e-05, "loss": 0.8402, "step": 34053 }, { "epoch": 0.9017365808566875, "grad_norm": 0.76953125, "learning_rate": 9.352247144513104e-05, "loss": 0.843, "step": 34054 }, { "epoch": 0.9017630604649819, "grad_norm": 0.7578125, "learning_rate": 9.351787737972382e-05, "loss": 0.7733, "step": 34055 }, { "epoch": 0.9017895400732763, "grad_norm": 0.77734375, "learning_rate": 9.35132833280551e-05, "loss": 0.8591, "step": 34056 }, { "epoch": 0.9018160196815707, "grad_norm": 0.80859375, "learning_rate": 9.350868929013463e-05, "loss": 0.7701, "step": 34057 }, { "epoch": 0.901842499289865, "grad_norm": 0.73828125, "learning_rate": 9.350409526597208e-05, "loss": 0.7464, "step": 34058 }, { "epoch": 0.9018689788981594, "grad_norm": 0.83203125, "learning_rate": 9.34995012555772e-05, "loss": 0.9021, "step": 34059 }, { "epoch": 0.9018954585064538, "grad_norm": 0.76953125, "learning_rate": 9.349490725895981e-05, "loss": 0.8115, "step": 34060 }, { "epoch": 0.9019219381147482, "grad_norm": 0.75, "learning_rate": 9.349031327612958e-05, "loss": 0.7385, "step": 34061 }, { "epoch": 0.9019484177230426, "grad_norm": 0.73046875, "learning_rate": 9.348571930709627e-05, "loss": 0.8168, "step": 34062 }, { "epoch": 0.901974897331337, "grad_norm": 0.875, "learning_rate": 9.348112535186959e-05, "loss": 0.8003, "step": 34063 }, { "epoch": 0.9020013769396313, "grad_norm": 0.734375, "learning_rate": 9.347653141045927e-05, "loss": 0.8043, "step": 34064 }, { "epoch": 0.9020278565479257, "grad_norm": 0.70703125, "learning_rate": 9.347193748287509e-05, "loss": 0.7872, "step": 34065 }, { "epoch": 0.9020543361562201, "grad_norm": 0.80859375, "learning_rate": 9.346734356912678e-05, "loss": 0.8423, "step": 34066 }, { "epoch": 0.9020808157645145, "grad_norm": 0.765625, "learning_rate": 9.346274966922406e-05, "loss": 0.821, "step": 34067 }, { "epoch": 0.9021072953728088, "grad_norm": 0.7421875, "learning_rate": 9.345815578317665e-05, "loss": 0.7687, "step": 34068 }, { "epoch": 0.9021337749811031, "grad_norm": 0.79296875, "learning_rate": 9.345356191099431e-05, "loss": 0.8486, "step": 34069 }, { "epoch": 0.9021602545893975, "grad_norm": 0.88671875, "learning_rate": 9.344896805268678e-05, "loss": 0.77, "step": 34070 }, { "epoch": 0.9021867341976919, "grad_norm": 0.71484375, "learning_rate": 9.344437420826378e-05, "loss": 0.7192, "step": 34071 }, { "epoch": 0.9022132138059863, "grad_norm": 0.77734375, "learning_rate": 9.343978037773506e-05, "loss": 0.7758, "step": 34072 }, { "epoch": 0.9022396934142807, "grad_norm": 0.8125, "learning_rate": 9.343518656111034e-05, "loss": 0.8598, "step": 34073 }, { "epoch": 0.9022661730225751, "grad_norm": 0.8359375, "learning_rate": 9.343059275839938e-05, "loss": 0.7443, "step": 34074 }, { "epoch": 0.9022926526308694, "grad_norm": 0.78125, "learning_rate": 9.34259989696119e-05, "loss": 0.9682, "step": 34075 }, { "epoch": 0.9023191322391638, "grad_norm": 0.76171875, "learning_rate": 9.342140519475764e-05, "loss": 0.8645, "step": 34076 }, { "epoch": 0.9023456118474582, "grad_norm": 0.75390625, "learning_rate": 9.341681143384633e-05, "loss": 0.8468, "step": 34077 }, { "epoch": 0.9023720914557526, "grad_norm": 0.7734375, "learning_rate": 9.341221768688773e-05, "loss": 0.8955, "step": 34078 }, { "epoch": 0.902398571064047, "grad_norm": 0.72265625, "learning_rate": 9.340762395389149e-05, "loss": 0.7644, "step": 34079 }, { "epoch": 0.9024250506723414, "grad_norm": 0.859375, "learning_rate": 9.340303023486748e-05, "loss": 0.7209, "step": 34080 }, { "epoch": 0.9024515302806357, "grad_norm": 0.73828125, "learning_rate": 9.339843652982538e-05, "loss": 0.7913, "step": 34081 }, { "epoch": 0.9024780098889301, "grad_norm": 0.83984375, "learning_rate": 9.339384283877489e-05, "loss": 0.7222, "step": 34082 }, { "epoch": 0.9025044894972245, "grad_norm": 0.80859375, "learning_rate": 9.338924916172578e-05, "loss": 0.7736, "step": 34083 }, { "epoch": 0.9025309691055188, "grad_norm": 0.80859375, "learning_rate": 9.338465549868773e-05, "loss": 0.894, "step": 34084 }, { "epoch": 0.9025574487138132, "grad_norm": 0.7734375, "learning_rate": 9.338006184967058e-05, "loss": 0.7517, "step": 34085 }, { "epoch": 0.9025839283221075, "grad_norm": 0.81640625, "learning_rate": 9.337546821468402e-05, "loss": 0.8015, "step": 34086 }, { "epoch": 0.9026104079304019, "grad_norm": 0.82421875, "learning_rate": 9.337087459373776e-05, "loss": 0.7904, "step": 34087 }, { "epoch": 0.9026368875386963, "grad_norm": 0.82421875, "learning_rate": 9.336628098684154e-05, "loss": 0.8362, "step": 34088 }, { "epoch": 0.9026633671469907, "grad_norm": 0.828125, "learning_rate": 9.336168739400511e-05, "loss": 0.7772, "step": 34089 }, { "epoch": 0.9026898467552851, "grad_norm": 0.734375, "learning_rate": 9.335709381523819e-05, "loss": 0.8347, "step": 34090 }, { "epoch": 0.9027163263635795, "grad_norm": 0.76171875, "learning_rate": 9.335250025055056e-05, "loss": 0.7925, "step": 34091 }, { "epoch": 0.9027428059718738, "grad_norm": 0.80859375, "learning_rate": 9.334790669995192e-05, "loss": 0.8202, "step": 34092 }, { "epoch": 0.9027692855801682, "grad_norm": 0.828125, "learning_rate": 9.334331316345203e-05, "loss": 0.925, "step": 34093 }, { "epoch": 0.9027957651884626, "grad_norm": 0.78125, "learning_rate": 9.333871964106056e-05, "loss": 0.8528, "step": 34094 }, { "epoch": 0.902822244796757, "grad_norm": 0.734375, "learning_rate": 9.333412613278733e-05, "loss": 0.7673, "step": 34095 }, { "epoch": 0.9028487244050514, "grad_norm": 0.7734375, "learning_rate": 9.332953263864204e-05, "loss": 0.8261, "step": 34096 }, { "epoch": 0.9028752040133458, "grad_norm": 0.78125, "learning_rate": 9.332493915863442e-05, "loss": 0.8273, "step": 34097 }, { "epoch": 0.9029016836216401, "grad_norm": 0.8046875, "learning_rate": 9.33203456927742e-05, "loss": 0.7104, "step": 34098 }, { "epoch": 0.9029281632299345, "grad_norm": 0.78515625, "learning_rate": 9.331575224107108e-05, "loss": 0.8512, "step": 34099 }, { "epoch": 0.9029546428382289, "grad_norm": 0.76953125, "learning_rate": 9.33111588035349e-05, "loss": 0.81, "step": 34100 }, { "epoch": 0.9029811224465232, "grad_norm": 0.84765625, "learning_rate": 9.330656538017534e-05, "loss": 0.7634, "step": 34101 }, { "epoch": 0.9030076020548176, "grad_norm": 0.75, "learning_rate": 9.330197197100212e-05, "loss": 0.8982, "step": 34102 }, { "epoch": 0.9030340816631119, "grad_norm": 0.796875, "learning_rate": 9.329737857602499e-05, "loss": 0.9136, "step": 34103 }, { "epoch": 0.9030605612714063, "grad_norm": 0.7265625, "learning_rate": 9.329278519525364e-05, "loss": 0.7782, "step": 34104 }, { "epoch": 0.9030870408797007, "grad_norm": 0.83203125, "learning_rate": 9.32881918286979e-05, "loss": 0.7378, "step": 34105 }, { "epoch": 0.9031135204879951, "grad_norm": 0.78515625, "learning_rate": 9.328359847636744e-05, "loss": 0.6718, "step": 34106 }, { "epoch": 0.9031400000962895, "grad_norm": 0.8203125, "learning_rate": 9.327900513827201e-05, "loss": 0.7471, "step": 34107 }, { "epoch": 0.9031664797045839, "grad_norm": 0.7578125, "learning_rate": 9.327441181442136e-05, "loss": 0.8971, "step": 34108 }, { "epoch": 0.9031929593128782, "grad_norm": 0.7265625, "learning_rate": 9.326981850482515e-05, "loss": 0.8756, "step": 34109 }, { "epoch": 0.9032194389211726, "grad_norm": 0.80078125, "learning_rate": 9.326522520949322e-05, "loss": 0.7393, "step": 34110 }, { "epoch": 0.903245918529467, "grad_norm": 0.7578125, "learning_rate": 9.326063192843528e-05, "loss": 0.7259, "step": 34111 }, { "epoch": 0.9032723981377614, "grad_norm": 0.75, "learning_rate": 9.325603866166102e-05, "loss": 0.7555, "step": 34112 }, { "epoch": 0.9032988777460558, "grad_norm": 0.72265625, "learning_rate": 9.325144540918022e-05, "loss": 0.702, "step": 34113 }, { "epoch": 0.9033253573543502, "grad_norm": 1.3125, "learning_rate": 9.324685217100255e-05, "loss": 0.8074, "step": 34114 }, { "epoch": 0.9033518369626445, "grad_norm": 0.84375, "learning_rate": 9.324225894713783e-05, "loss": 0.8667, "step": 34115 }, { "epoch": 0.9033783165709389, "grad_norm": 0.79296875, "learning_rate": 9.323766573759576e-05, "loss": 0.8376, "step": 34116 }, { "epoch": 0.9034047961792332, "grad_norm": 0.67578125, "learning_rate": 9.323307254238607e-05, "loss": 0.6783, "step": 34117 }, { "epoch": 0.9034312757875276, "grad_norm": 0.7578125, "learning_rate": 9.322847936151848e-05, "loss": 0.7361, "step": 34118 }, { "epoch": 0.903457755395822, "grad_norm": 0.83984375, "learning_rate": 9.322388619500271e-05, "loss": 0.8048, "step": 34119 }, { "epoch": 0.9034842350041163, "grad_norm": 0.70703125, "learning_rate": 9.321929304284856e-05, "loss": 0.7703, "step": 34120 }, { "epoch": 0.9035107146124107, "grad_norm": 0.73046875, "learning_rate": 9.321469990506575e-05, "loss": 0.8334, "step": 34121 }, { "epoch": 0.9035371942207051, "grad_norm": 0.8203125, "learning_rate": 9.321010678166398e-05, "loss": 0.8936, "step": 34122 }, { "epoch": 0.9035636738289995, "grad_norm": 0.7265625, "learning_rate": 9.3205513672653e-05, "loss": 0.7326, "step": 34123 }, { "epoch": 0.9035901534372939, "grad_norm": 0.7890625, "learning_rate": 9.320092057804251e-05, "loss": 0.8221, "step": 34124 }, { "epoch": 0.9036166330455883, "grad_norm": 0.80859375, "learning_rate": 9.319632749784232e-05, "loss": 0.7571, "step": 34125 }, { "epoch": 0.9036431126538826, "grad_norm": 0.79296875, "learning_rate": 9.319173443206213e-05, "loss": 0.8527, "step": 34126 }, { "epoch": 0.903669592262177, "grad_norm": 0.734375, "learning_rate": 9.318714138071166e-05, "loss": 0.8267, "step": 34127 }, { "epoch": 0.9036960718704714, "grad_norm": 0.76953125, "learning_rate": 9.318254834380065e-05, "loss": 0.8282, "step": 34128 }, { "epoch": 0.9037225514787658, "grad_norm": 0.76171875, "learning_rate": 9.317795532133882e-05, "loss": 0.7298, "step": 34129 }, { "epoch": 0.9037490310870602, "grad_norm": 0.83203125, "learning_rate": 9.317336231333595e-05, "loss": 0.8041, "step": 34130 }, { "epoch": 0.9037755106953546, "grad_norm": 0.84375, "learning_rate": 9.316876931980174e-05, "loss": 0.8411, "step": 34131 }, { "epoch": 0.9038019903036489, "grad_norm": 0.76171875, "learning_rate": 9.316417634074595e-05, "loss": 0.7461, "step": 34132 }, { "epoch": 0.9038284699119432, "grad_norm": 0.84375, "learning_rate": 9.315958337617828e-05, "loss": 0.8039, "step": 34133 }, { "epoch": 0.9038549495202376, "grad_norm": 0.8125, "learning_rate": 9.315499042610847e-05, "loss": 0.7957, "step": 34134 }, { "epoch": 0.903881429128532, "grad_norm": 0.828125, "learning_rate": 9.31503974905463e-05, "loss": 0.7786, "step": 34135 }, { "epoch": 0.9039079087368264, "grad_norm": 0.7890625, "learning_rate": 9.314580456950147e-05, "loss": 0.8989, "step": 34136 }, { "epoch": 0.9039343883451207, "grad_norm": 0.765625, "learning_rate": 9.31412116629837e-05, "loss": 0.8293, "step": 34137 }, { "epoch": 0.9039608679534151, "grad_norm": 0.73828125, "learning_rate": 9.313661877100271e-05, "loss": 0.8275, "step": 34138 }, { "epoch": 0.9039873475617095, "grad_norm": 0.7734375, "learning_rate": 9.313202589356831e-05, "loss": 0.7015, "step": 34139 }, { "epoch": 0.9040138271700039, "grad_norm": 0.76171875, "learning_rate": 9.312743303069015e-05, "loss": 0.7964, "step": 34140 }, { "epoch": 0.9040403067782983, "grad_norm": 0.8046875, "learning_rate": 9.312284018237804e-05, "loss": 0.886, "step": 34141 }, { "epoch": 0.9040667863865927, "grad_norm": 0.78125, "learning_rate": 9.311824734864166e-05, "loss": 0.8524, "step": 34142 }, { "epoch": 0.904093265994887, "grad_norm": 0.79296875, "learning_rate": 9.311365452949077e-05, "loss": 0.8021, "step": 34143 }, { "epoch": 0.9041197456031814, "grad_norm": 0.78515625, "learning_rate": 9.310906172493509e-05, "loss": 0.7836, "step": 34144 }, { "epoch": 0.9041462252114758, "grad_norm": 0.6953125, "learning_rate": 9.310446893498433e-05, "loss": 0.728, "step": 34145 }, { "epoch": 0.9041727048197702, "grad_norm": 0.7734375, "learning_rate": 9.309987615964829e-05, "loss": 0.9355, "step": 34146 }, { "epoch": 0.9041991844280646, "grad_norm": 0.78125, "learning_rate": 9.309528339893667e-05, "loss": 0.7903, "step": 34147 }, { "epoch": 0.904225664036359, "grad_norm": 0.84375, "learning_rate": 9.309069065285921e-05, "loss": 0.7515, "step": 34148 }, { "epoch": 0.9042521436446533, "grad_norm": 0.76953125, "learning_rate": 9.308609792142562e-05, "loss": 0.709, "step": 34149 }, { "epoch": 0.9042786232529476, "grad_norm": 0.73828125, "learning_rate": 9.30815052046456e-05, "loss": 0.7722, "step": 34150 }, { "epoch": 0.904305102861242, "grad_norm": 0.77734375, "learning_rate": 9.3076912502529e-05, "loss": 0.8319, "step": 34151 }, { "epoch": 0.9043315824695364, "grad_norm": 0.78515625, "learning_rate": 9.307231981508547e-05, "loss": 0.7675, "step": 34152 }, { "epoch": 0.9043580620778308, "grad_norm": 0.875, "learning_rate": 9.306772714232478e-05, "loss": 0.8085, "step": 34153 }, { "epoch": 0.9043845416861251, "grad_norm": 0.80859375, "learning_rate": 9.306313448425663e-05, "loss": 0.7605, "step": 34154 }, { "epoch": 0.9044110212944195, "grad_norm": 0.73828125, "learning_rate": 9.305854184089076e-05, "loss": 0.6866, "step": 34155 }, { "epoch": 0.9044375009027139, "grad_norm": 0.77734375, "learning_rate": 9.305394921223693e-05, "loss": 0.8051, "step": 34156 }, { "epoch": 0.9044639805110083, "grad_norm": 0.8125, "learning_rate": 9.304935659830484e-05, "loss": 0.8098, "step": 34157 }, { "epoch": 0.9044904601193027, "grad_norm": 0.78515625, "learning_rate": 9.304476399910425e-05, "loss": 0.7585, "step": 34158 }, { "epoch": 0.9045169397275971, "grad_norm": 0.78125, "learning_rate": 9.30401714146449e-05, "loss": 0.7964, "step": 34159 }, { "epoch": 0.9045434193358914, "grad_norm": 0.84375, "learning_rate": 9.303557884493648e-05, "loss": 0.7715, "step": 34160 }, { "epoch": 0.9045698989441858, "grad_norm": 0.796875, "learning_rate": 9.303098628998878e-05, "loss": 0.7426, "step": 34161 }, { "epoch": 0.9045963785524802, "grad_norm": 0.796875, "learning_rate": 9.30263937498115e-05, "loss": 0.8797, "step": 34162 }, { "epoch": 0.9046228581607746, "grad_norm": 1.046875, "learning_rate": 9.302180122441439e-05, "loss": 0.821, "step": 34163 }, { "epoch": 0.904649337769069, "grad_norm": 0.76171875, "learning_rate": 9.301720871380716e-05, "loss": 0.8275, "step": 34164 }, { "epoch": 0.9046758173773634, "grad_norm": 0.73828125, "learning_rate": 9.301261621799952e-05, "loss": 0.7868, "step": 34165 }, { "epoch": 0.9047022969856576, "grad_norm": 0.76171875, "learning_rate": 9.300802373700128e-05, "loss": 0.803, "step": 34166 }, { "epoch": 0.904728776593952, "grad_norm": 0.79296875, "learning_rate": 9.300343127082215e-05, "loss": 0.7773, "step": 34167 }, { "epoch": 0.9047552562022464, "grad_norm": 0.76953125, "learning_rate": 9.299883881947183e-05, "loss": 0.8182, "step": 34168 }, { "epoch": 0.9047817358105408, "grad_norm": 0.765625, "learning_rate": 9.299424638296007e-05, "loss": 0.7944, "step": 34169 }, { "epoch": 0.9048082154188352, "grad_norm": 0.78125, "learning_rate": 9.298965396129656e-05, "loss": 0.7318, "step": 34170 }, { "epoch": 0.9048346950271295, "grad_norm": 0.796875, "learning_rate": 9.298506155449114e-05, "loss": 0.8418, "step": 34171 }, { "epoch": 0.9048611746354239, "grad_norm": 0.734375, "learning_rate": 9.298046916255346e-05, "loss": 0.8676, "step": 34172 }, { "epoch": 0.9048876542437183, "grad_norm": 0.83984375, "learning_rate": 9.297587678549329e-05, "loss": 0.8541, "step": 34173 }, { "epoch": 0.9049141338520127, "grad_norm": 0.78125, "learning_rate": 9.297128442332033e-05, "loss": 0.8284, "step": 34174 }, { "epoch": 0.9049406134603071, "grad_norm": 0.875, "learning_rate": 9.29666920760443e-05, "loss": 0.8408, "step": 34175 }, { "epoch": 0.9049670930686015, "grad_norm": 0.7890625, "learning_rate": 9.296209974367501e-05, "loss": 0.7739, "step": 34176 }, { "epoch": 0.9049935726768958, "grad_norm": 0.82421875, "learning_rate": 9.295750742622215e-05, "loss": 0.8086, "step": 34177 }, { "epoch": 0.9050200522851902, "grad_norm": 0.8125, "learning_rate": 9.295291512369544e-05, "loss": 0.7777, "step": 34178 }, { "epoch": 0.9050465318934846, "grad_norm": 0.83203125, "learning_rate": 9.294832283610463e-05, "loss": 0.8589, "step": 34179 }, { "epoch": 0.905073011501779, "grad_norm": 0.77734375, "learning_rate": 9.294373056345941e-05, "loss": 0.8004, "step": 34180 }, { "epoch": 0.9050994911100734, "grad_norm": 0.74609375, "learning_rate": 9.293913830576959e-05, "loss": 0.7206, "step": 34181 }, { "epoch": 0.9051259707183678, "grad_norm": 0.77734375, "learning_rate": 9.293454606304485e-05, "loss": 0.8799, "step": 34182 }, { "epoch": 0.905152450326662, "grad_norm": 0.84375, "learning_rate": 9.292995383529496e-05, "loss": 0.8016, "step": 34183 }, { "epoch": 0.9051789299349564, "grad_norm": 0.76171875, "learning_rate": 9.29253616225296e-05, "loss": 0.8197, "step": 34184 }, { "epoch": 0.9052054095432508, "grad_norm": 0.73828125, "learning_rate": 9.29207694247585e-05, "loss": 0.7336, "step": 34185 }, { "epoch": 0.9052318891515452, "grad_norm": 0.7734375, "learning_rate": 9.291617724199147e-05, "loss": 0.7063, "step": 34186 }, { "epoch": 0.9052583687598396, "grad_norm": 0.80859375, "learning_rate": 9.291158507423821e-05, "loss": 0.8214, "step": 34187 }, { "epoch": 0.9052848483681339, "grad_norm": 0.78125, "learning_rate": 9.290699292150842e-05, "loss": 0.8227, "step": 34188 }, { "epoch": 0.9053113279764283, "grad_norm": 0.8515625, "learning_rate": 9.290240078381184e-05, "loss": 0.7521, "step": 34189 }, { "epoch": 0.9053378075847227, "grad_norm": 0.8125, "learning_rate": 9.28978086611582e-05, "loss": 0.7607, "step": 34190 }, { "epoch": 0.9053642871930171, "grad_norm": 0.765625, "learning_rate": 9.289321655355728e-05, "loss": 0.7533, "step": 34191 }, { "epoch": 0.9053907668013115, "grad_norm": 0.7890625, "learning_rate": 9.288862446101879e-05, "loss": 0.7935, "step": 34192 }, { "epoch": 0.9054172464096059, "grad_norm": 0.84765625, "learning_rate": 9.288403238355245e-05, "loss": 0.8118, "step": 34193 }, { "epoch": 0.9054437260179002, "grad_norm": 0.83984375, "learning_rate": 9.2879440321168e-05, "loss": 0.9613, "step": 34194 }, { "epoch": 0.9054702056261946, "grad_norm": 0.7734375, "learning_rate": 9.287484827387511e-05, "loss": 0.8393, "step": 34195 }, { "epoch": 0.905496685234489, "grad_norm": 0.71484375, "learning_rate": 9.287025624168363e-05, "loss": 0.7187, "step": 34196 }, { "epoch": 0.9055231648427834, "grad_norm": 1.921875, "learning_rate": 9.286566422460323e-05, "loss": 0.8526, "step": 34197 }, { "epoch": 0.9055496444510778, "grad_norm": 0.77734375, "learning_rate": 9.286107222264365e-05, "loss": 0.7242, "step": 34198 }, { "epoch": 0.905576124059372, "grad_norm": 0.72265625, "learning_rate": 9.285648023581461e-05, "loss": 0.7209, "step": 34199 }, { "epoch": 0.9056026036676664, "grad_norm": 0.80078125, "learning_rate": 9.285188826412584e-05, "loss": 0.7716, "step": 34200 }, { "epoch": 0.9056290832759608, "grad_norm": 0.73828125, "learning_rate": 9.284729630758711e-05, "loss": 0.8227, "step": 34201 }, { "epoch": 0.9056555628842552, "grad_norm": 0.79296875, "learning_rate": 9.284270436620812e-05, "loss": 0.7918, "step": 34202 }, { "epoch": 0.9056820424925496, "grad_norm": 0.7265625, "learning_rate": 9.28381124399986e-05, "loss": 0.7834, "step": 34203 }, { "epoch": 0.905708522100844, "grad_norm": 0.73828125, "learning_rate": 9.28335205289683e-05, "loss": 0.7959, "step": 34204 }, { "epoch": 0.9057350017091383, "grad_norm": 0.75390625, "learning_rate": 9.28289286331269e-05, "loss": 0.8138, "step": 34205 }, { "epoch": 0.9057614813174327, "grad_norm": 1.1796875, "learning_rate": 9.282433675248422e-05, "loss": 0.7561, "step": 34206 }, { "epoch": 0.9057879609257271, "grad_norm": 0.734375, "learning_rate": 9.281974488704996e-05, "loss": 0.7843, "step": 34207 }, { "epoch": 0.9058144405340215, "grad_norm": 0.71875, "learning_rate": 9.281515303683381e-05, "loss": 0.8091, "step": 34208 }, { "epoch": 0.9058409201423159, "grad_norm": 0.7421875, "learning_rate": 9.281056120184556e-05, "loss": 0.7824, "step": 34209 }, { "epoch": 0.9058673997506103, "grad_norm": 0.7265625, "learning_rate": 9.280596938209486e-05, "loss": 0.7369, "step": 34210 }, { "epoch": 0.9058938793589046, "grad_norm": 0.78515625, "learning_rate": 9.280137757759154e-05, "loss": 0.7581, "step": 34211 }, { "epoch": 0.905920358967199, "grad_norm": 0.78515625, "learning_rate": 9.279678578834529e-05, "loss": 0.8393, "step": 34212 }, { "epoch": 0.9059468385754934, "grad_norm": 0.81640625, "learning_rate": 9.279219401436584e-05, "loss": 0.8418, "step": 34213 }, { "epoch": 0.9059733181837878, "grad_norm": 0.88671875, "learning_rate": 9.278760225566292e-05, "loss": 0.7305, "step": 34214 }, { "epoch": 0.9059997977920821, "grad_norm": 0.8359375, "learning_rate": 9.278301051224622e-05, "loss": 0.9067, "step": 34215 }, { "epoch": 0.9060262774003764, "grad_norm": 0.7578125, "learning_rate": 9.277841878412558e-05, "loss": 0.812, "step": 34216 }, { "epoch": 0.9060527570086708, "grad_norm": 0.73828125, "learning_rate": 9.277382707131065e-05, "loss": 0.8103, "step": 34217 }, { "epoch": 0.9060792366169652, "grad_norm": 0.82421875, "learning_rate": 9.276923537381119e-05, "loss": 0.7672, "step": 34218 }, { "epoch": 0.9061057162252596, "grad_norm": 0.7890625, "learning_rate": 9.27646436916369e-05, "loss": 0.7893, "step": 34219 }, { "epoch": 0.906132195833554, "grad_norm": 0.7578125, "learning_rate": 9.276005202479754e-05, "loss": 0.8045, "step": 34220 }, { "epoch": 0.9061586754418484, "grad_norm": 0.78515625, "learning_rate": 9.275546037330285e-05, "loss": 0.7699, "step": 34221 }, { "epoch": 0.9061851550501427, "grad_norm": 0.8671875, "learning_rate": 9.275086873716255e-05, "loss": 0.7832, "step": 34222 }, { "epoch": 0.9062116346584371, "grad_norm": 0.7578125, "learning_rate": 9.274627711638635e-05, "loss": 0.7641, "step": 34223 }, { "epoch": 0.9062381142667315, "grad_norm": 0.765625, "learning_rate": 9.2741685510984e-05, "loss": 0.7253, "step": 34224 }, { "epoch": 0.9062645938750259, "grad_norm": 0.76171875, "learning_rate": 9.273709392096522e-05, "loss": 0.9423, "step": 34225 }, { "epoch": 0.9062910734833203, "grad_norm": 0.7890625, "learning_rate": 9.273250234633978e-05, "loss": 0.7247, "step": 34226 }, { "epoch": 0.9063175530916147, "grad_norm": 0.7890625, "learning_rate": 9.272791078711739e-05, "loss": 0.8953, "step": 34227 }, { "epoch": 0.906344032699909, "grad_norm": 0.8203125, "learning_rate": 9.272331924330777e-05, "loss": 0.6533, "step": 34228 }, { "epoch": 0.9063705123082034, "grad_norm": 0.8203125, "learning_rate": 9.271872771492067e-05, "loss": 0.8332, "step": 34229 }, { "epoch": 0.9063969919164978, "grad_norm": 0.83984375, "learning_rate": 9.271413620196576e-05, "loss": 0.7974, "step": 34230 }, { "epoch": 0.9064234715247922, "grad_norm": 0.7578125, "learning_rate": 9.270954470445287e-05, "loss": 0.7993, "step": 34231 }, { "epoch": 0.9064499511330865, "grad_norm": 0.8984375, "learning_rate": 9.270495322239167e-05, "loss": 0.8885, "step": 34232 }, { "epoch": 0.9064764307413808, "grad_norm": 0.77734375, "learning_rate": 9.270036175579192e-05, "loss": 0.8636, "step": 34233 }, { "epoch": 0.9065029103496752, "grad_norm": 0.79296875, "learning_rate": 9.269577030466334e-05, "loss": 0.8709, "step": 34234 }, { "epoch": 0.9065293899579696, "grad_norm": 0.76953125, "learning_rate": 9.26911788690156e-05, "loss": 0.8122, "step": 34235 }, { "epoch": 0.906555869566264, "grad_norm": 0.75, "learning_rate": 9.268658744885853e-05, "loss": 0.9009, "step": 34236 }, { "epoch": 0.9065823491745584, "grad_norm": 0.7734375, "learning_rate": 9.268199604420182e-05, "loss": 0.7351, "step": 34237 }, { "epoch": 0.9066088287828528, "grad_norm": 0.80859375, "learning_rate": 9.267740465505523e-05, "loss": 0.7782, "step": 34238 }, { "epoch": 0.9066353083911471, "grad_norm": 0.8359375, "learning_rate": 9.267281328142843e-05, "loss": 0.8772, "step": 34239 }, { "epoch": 0.9066617879994415, "grad_norm": 0.828125, "learning_rate": 9.266822192333117e-05, "loss": 0.8858, "step": 34240 }, { "epoch": 0.9066882676077359, "grad_norm": 0.75, "learning_rate": 9.266363058077322e-05, "loss": 0.7671, "step": 34241 }, { "epoch": 0.9067147472160303, "grad_norm": 0.76953125, "learning_rate": 9.265903925376426e-05, "loss": 0.8802, "step": 34242 }, { "epoch": 0.9067412268243247, "grad_norm": 0.82421875, "learning_rate": 9.265444794231407e-05, "loss": 0.7855, "step": 34243 }, { "epoch": 0.9067677064326191, "grad_norm": 0.83984375, "learning_rate": 9.264985664643235e-05, "loss": 0.8006, "step": 34244 }, { "epoch": 0.9067941860409134, "grad_norm": 0.84375, "learning_rate": 9.264526536612883e-05, "loss": 0.8195, "step": 34245 }, { "epoch": 0.9068206656492078, "grad_norm": 0.80859375, "learning_rate": 9.264067410141327e-05, "loss": 0.8601, "step": 34246 }, { "epoch": 0.9068471452575022, "grad_norm": 0.765625, "learning_rate": 9.263608285229536e-05, "loss": 0.7587, "step": 34247 }, { "epoch": 0.9068736248657965, "grad_norm": 0.765625, "learning_rate": 9.263149161878488e-05, "loss": 0.734, "step": 34248 }, { "epoch": 0.9069001044740909, "grad_norm": 0.81640625, "learning_rate": 9.262690040089153e-05, "loss": 0.8657, "step": 34249 }, { "epoch": 0.9069265840823852, "grad_norm": 0.77734375, "learning_rate": 9.262230919862498e-05, "loss": 0.7377, "step": 34250 }, { "epoch": 0.9069530636906796, "grad_norm": 0.7734375, "learning_rate": 9.261771801199509e-05, "loss": 0.743, "step": 34251 }, { "epoch": 0.906979543298974, "grad_norm": 0.984375, "learning_rate": 9.26131268410115e-05, "loss": 0.7849, "step": 34252 }, { "epoch": 0.9070060229072684, "grad_norm": 0.796875, "learning_rate": 9.260853568568398e-05, "loss": 0.8502, "step": 34253 }, { "epoch": 0.9070325025155628, "grad_norm": 0.81640625, "learning_rate": 9.260394454602224e-05, "loss": 0.708, "step": 34254 }, { "epoch": 0.9070589821238572, "grad_norm": 0.92578125, "learning_rate": 9.259935342203598e-05, "loss": 0.7466, "step": 34255 }, { "epoch": 0.9070854617321515, "grad_norm": 0.79296875, "learning_rate": 9.2594762313735e-05, "loss": 0.7995, "step": 34256 }, { "epoch": 0.9071119413404459, "grad_norm": 0.7421875, "learning_rate": 9.259017122112902e-05, "loss": 0.8172, "step": 34257 }, { "epoch": 0.9071384209487403, "grad_norm": 0.875, "learning_rate": 9.258558014422774e-05, "loss": 1.0501, "step": 34258 }, { "epoch": 0.9071649005570347, "grad_norm": 0.75390625, "learning_rate": 9.258098908304089e-05, "loss": 0.6502, "step": 34259 }, { "epoch": 0.9071913801653291, "grad_norm": 0.89453125, "learning_rate": 9.257639803757819e-05, "loss": 0.7795, "step": 34260 }, { "epoch": 0.9072178597736235, "grad_norm": 0.796875, "learning_rate": 9.257180700784939e-05, "loss": 0.8296, "step": 34261 }, { "epoch": 0.9072443393819178, "grad_norm": 0.734375, "learning_rate": 9.256721599386424e-05, "loss": 0.6949, "step": 34262 }, { "epoch": 0.9072708189902122, "grad_norm": 0.953125, "learning_rate": 9.256262499563246e-05, "loss": 0.8817, "step": 34263 }, { "epoch": 0.9072972985985065, "grad_norm": 0.7265625, "learning_rate": 9.255803401316376e-05, "loss": 0.9082, "step": 34264 }, { "epoch": 0.9073237782068009, "grad_norm": 0.75390625, "learning_rate": 9.255344304646787e-05, "loss": 0.6917, "step": 34265 }, { "epoch": 0.9073502578150953, "grad_norm": 0.78125, "learning_rate": 9.254885209555455e-05, "loss": 0.9274, "step": 34266 }, { "epoch": 0.9073767374233896, "grad_norm": 0.76171875, "learning_rate": 9.254426116043352e-05, "loss": 0.8457, "step": 34267 }, { "epoch": 0.907403217031684, "grad_norm": 0.7890625, "learning_rate": 9.253967024111448e-05, "loss": 0.8369, "step": 34268 }, { "epoch": 0.9074296966399784, "grad_norm": 0.79296875, "learning_rate": 9.253507933760721e-05, "loss": 0.8276, "step": 34269 }, { "epoch": 0.9074561762482728, "grad_norm": 0.796875, "learning_rate": 9.253048844992135e-05, "loss": 0.7671, "step": 34270 }, { "epoch": 0.9074826558565672, "grad_norm": 0.7734375, "learning_rate": 9.252589757806675e-05, "loss": 0.8052, "step": 34271 }, { "epoch": 0.9075091354648616, "grad_norm": 0.7734375, "learning_rate": 9.252130672205307e-05, "loss": 0.7711, "step": 34272 }, { "epoch": 0.9075356150731559, "grad_norm": 0.76953125, "learning_rate": 9.251671588189006e-05, "loss": 0.744, "step": 34273 }, { "epoch": 0.9075620946814503, "grad_norm": 0.7109375, "learning_rate": 9.251212505758744e-05, "loss": 0.7115, "step": 34274 }, { "epoch": 0.9075885742897447, "grad_norm": 0.7890625, "learning_rate": 9.25075342491549e-05, "loss": 0.8437, "step": 34275 }, { "epoch": 0.9076150538980391, "grad_norm": 0.76171875, "learning_rate": 9.250294345660225e-05, "loss": 0.8167, "step": 34276 }, { "epoch": 0.9076415335063335, "grad_norm": 0.78125, "learning_rate": 9.249835267993919e-05, "loss": 0.7912, "step": 34277 }, { "epoch": 0.9076680131146279, "grad_norm": 0.80078125, "learning_rate": 9.249376191917545e-05, "loss": 0.7877, "step": 34278 }, { "epoch": 0.9076944927229222, "grad_norm": 0.765625, "learning_rate": 9.248917117432075e-05, "loss": 0.8865, "step": 34279 }, { "epoch": 0.9077209723312166, "grad_norm": 0.83203125, "learning_rate": 9.248458044538475e-05, "loss": 0.9462, "step": 34280 }, { "epoch": 0.9077474519395109, "grad_norm": 0.75, "learning_rate": 9.247998973237733e-05, "loss": 0.7696, "step": 34281 }, { "epoch": 0.9077739315478053, "grad_norm": 0.890625, "learning_rate": 9.247539903530813e-05, "loss": 0.7618, "step": 34282 }, { "epoch": 0.9078004111560997, "grad_norm": 0.8515625, "learning_rate": 9.24708083541869e-05, "loss": 0.739, "step": 34283 }, { "epoch": 0.907826890764394, "grad_norm": 0.75390625, "learning_rate": 9.246621768902335e-05, "loss": 0.8335, "step": 34284 }, { "epoch": 0.9078533703726884, "grad_norm": 0.78125, "learning_rate": 9.246162703982723e-05, "loss": 0.8746, "step": 34285 }, { "epoch": 0.9078798499809828, "grad_norm": 0.78125, "learning_rate": 9.245703640660824e-05, "loss": 0.7859, "step": 34286 }, { "epoch": 0.9079063295892772, "grad_norm": 0.8515625, "learning_rate": 9.245244578937615e-05, "loss": 0.7938, "step": 34287 }, { "epoch": 0.9079328091975716, "grad_norm": 0.73046875, "learning_rate": 9.244785518814066e-05, "loss": 0.6714, "step": 34288 }, { "epoch": 0.907959288805866, "grad_norm": 0.75390625, "learning_rate": 9.24432646029115e-05, "loss": 0.7387, "step": 34289 }, { "epoch": 0.9079857684141603, "grad_norm": 0.7890625, "learning_rate": 9.243867403369842e-05, "loss": 0.7778, "step": 34290 }, { "epoch": 0.9080122480224547, "grad_norm": 0.73828125, "learning_rate": 9.243408348051111e-05, "loss": 0.8826, "step": 34291 }, { "epoch": 0.9080387276307491, "grad_norm": 0.71875, "learning_rate": 9.242949294335937e-05, "loss": 0.7575, "step": 34292 }, { "epoch": 0.9080652072390435, "grad_norm": 0.73046875, "learning_rate": 9.242490242225287e-05, "loss": 0.7322, "step": 34293 }, { "epoch": 0.9080916868473379, "grad_norm": 0.8515625, "learning_rate": 9.242031191720136e-05, "loss": 0.8613, "step": 34294 }, { "epoch": 0.9081181664556323, "grad_norm": 0.75, "learning_rate": 9.241572142821457e-05, "loss": 0.7883, "step": 34295 }, { "epoch": 0.9081446460639266, "grad_norm": 0.79296875, "learning_rate": 9.241113095530218e-05, "loss": 0.7829, "step": 34296 }, { "epoch": 0.9081711256722209, "grad_norm": 0.73046875, "learning_rate": 9.2406540498474e-05, "loss": 0.7546, "step": 34297 }, { "epoch": 0.9081976052805153, "grad_norm": 0.765625, "learning_rate": 9.240195005773974e-05, "loss": 0.8346, "step": 34298 }, { "epoch": 0.9082240848888097, "grad_norm": 0.84375, "learning_rate": 9.23973596331091e-05, "loss": 0.775, "step": 34299 }, { "epoch": 0.9082505644971041, "grad_norm": 0.71875, "learning_rate": 9.239276922459183e-05, "loss": 0.7478, "step": 34300 }, { "epoch": 0.9082770441053984, "grad_norm": 0.7734375, "learning_rate": 9.23881788321976e-05, "loss": 0.8358, "step": 34301 }, { "epoch": 0.9083035237136928, "grad_norm": 0.80859375, "learning_rate": 9.238358845593623e-05, "loss": 0.7422, "step": 34302 }, { "epoch": 0.9083300033219872, "grad_norm": 0.6953125, "learning_rate": 9.237899809581742e-05, "loss": 0.7386, "step": 34303 }, { "epoch": 0.9083564829302816, "grad_norm": 0.76953125, "learning_rate": 9.237440775185089e-05, "loss": 0.7534, "step": 34304 }, { "epoch": 0.908382962538576, "grad_norm": 0.74609375, "learning_rate": 9.236981742404636e-05, "loss": 0.658, "step": 34305 }, { "epoch": 0.9084094421468704, "grad_norm": 0.7734375, "learning_rate": 9.236522711241354e-05, "loss": 0.8831, "step": 34306 }, { "epoch": 0.9084359217551647, "grad_norm": 0.83984375, "learning_rate": 9.23606368169622e-05, "loss": 0.9022, "step": 34307 }, { "epoch": 0.9084624013634591, "grad_norm": 0.765625, "learning_rate": 9.235604653770209e-05, "loss": 0.6691, "step": 34308 }, { "epoch": 0.9084888809717535, "grad_norm": 0.69140625, "learning_rate": 9.235145627464283e-05, "loss": 0.7314, "step": 34309 }, { "epoch": 0.9085153605800479, "grad_norm": 0.72265625, "learning_rate": 9.234686602779428e-05, "loss": 0.7823, "step": 34310 }, { "epoch": 0.9085418401883423, "grad_norm": 0.80078125, "learning_rate": 9.234227579716607e-05, "loss": 0.7558, "step": 34311 }, { "epoch": 0.9085683197966367, "grad_norm": 0.84375, "learning_rate": 9.2337685582768e-05, "loss": 0.7393, "step": 34312 }, { "epoch": 0.9085947994049309, "grad_norm": 0.671875, "learning_rate": 9.233309538460977e-05, "loss": 0.7856, "step": 34313 }, { "epoch": 0.9086212790132253, "grad_norm": 0.86328125, "learning_rate": 9.23285052027011e-05, "loss": 0.6888, "step": 34314 }, { "epoch": 0.9086477586215197, "grad_norm": 0.8515625, "learning_rate": 9.232391503705172e-05, "loss": 0.88, "step": 34315 }, { "epoch": 0.9086742382298141, "grad_norm": 0.71484375, "learning_rate": 9.231932488767133e-05, "loss": 0.6952, "step": 34316 }, { "epoch": 0.9087007178381085, "grad_norm": 0.7734375, "learning_rate": 9.231473475456972e-05, "loss": 0.8605, "step": 34317 }, { "epoch": 0.9087271974464028, "grad_norm": 0.71484375, "learning_rate": 9.231014463775661e-05, "loss": 0.7952, "step": 34318 }, { "epoch": 0.9087536770546972, "grad_norm": 0.73828125, "learning_rate": 9.23055545372417e-05, "loss": 0.6836, "step": 34319 }, { "epoch": 0.9087801566629916, "grad_norm": 0.6875, "learning_rate": 9.230096445303472e-05, "loss": 0.7462, "step": 34320 }, { "epoch": 0.908806636271286, "grad_norm": 0.7578125, "learning_rate": 9.229637438514536e-05, "loss": 0.7557, "step": 34321 }, { "epoch": 0.9088331158795804, "grad_norm": 0.78515625, "learning_rate": 9.229178433358345e-05, "loss": 0.7355, "step": 34322 }, { "epoch": 0.9088595954878748, "grad_norm": 0.75, "learning_rate": 9.228719429835867e-05, "loss": 0.9682, "step": 34323 }, { "epoch": 0.9088860750961691, "grad_norm": 0.66015625, "learning_rate": 9.228260427948074e-05, "loss": 0.712, "step": 34324 }, { "epoch": 0.9089125547044635, "grad_norm": 0.83203125, "learning_rate": 9.227801427695936e-05, "loss": 0.7771, "step": 34325 }, { "epoch": 0.9089390343127579, "grad_norm": 0.7890625, "learning_rate": 9.227342429080429e-05, "loss": 0.8743, "step": 34326 }, { "epoch": 0.9089655139210523, "grad_norm": 0.80859375, "learning_rate": 9.226883432102528e-05, "loss": 0.7838, "step": 34327 }, { "epoch": 0.9089919935293467, "grad_norm": 0.7890625, "learning_rate": 9.2264244367632e-05, "loss": 0.7133, "step": 34328 }, { "epoch": 0.9090184731376411, "grad_norm": 0.85546875, "learning_rate": 9.225965443063423e-05, "loss": 0.8145, "step": 34329 }, { "epoch": 0.9090449527459353, "grad_norm": 0.88671875, "learning_rate": 9.22550645100417e-05, "loss": 0.7405, "step": 34330 }, { "epoch": 0.9090714323542297, "grad_norm": 0.7421875, "learning_rate": 9.225047460586408e-05, "loss": 0.6086, "step": 34331 }, { "epoch": 0.9090979119625241, "grad_norm": 0.8203125, "learning_rate": 9.224588471811116e-05, "loss": 0.8481, "step": 34332 }, { "epoch": 0.9091243915708185, "grad_norm": 0.765625, "learning_rate": 9.224129484679265e-05, "loss": 0.8907, "step": 34333 }, { "epoch": 0.9091508711791129, "grad_norm": 0.78515625, "learning_rate": 9.223670499191827e-05, "loss": 0.9103, "step": 34334 }, { "epoch": 0.9091773507874072, "grad_norm": 0.75, "learning_rate": 9.223211515349775e-05, "loss": 0.7212, "step": 34335 }, { "epoch": 0.9092038303957016, "grad_norm": 0.78125, "learning_rate": 9.222752533154077e-05, "loss": 0.6895, "step": 34336 }, { "epoch": 0.909230310003996, "grad_norm": 0.734375, "learning_rate": 9.222293552605715e-05, "loss": 0.648, "step": 34337 }, { "epoch": 0.9092567896122904, "grad_norm": 0.7421875, "learning_rate": 9.221834573705658e-05, "loss": 0.7368, "step": 34338 }, { "epoch": 0.9092832692205848, "grad_norm": 0.7734375, "learning_rate": 9.221375596454878e-05, "loss": 0.6579, "step": 34339 }, { "epoch": 0.9093097488288792, "grad_norm": 0.828125, "learning_rate": 9.220916620854347e-05, "loss": 0.8758, "step": 34340 }, { "epoch": 0.9093362284371735, "grad_norm": 0.77734375, "learning_rate": 9.220457646905035e-05, "loss": 0.8487, "step": 34341 }, { "epoch": 0.9093627080454679, "grad_norm": 0.8046875, "learning_rate": 9.219998674607923e-05, "loss": 0.8747, "step": 34342 }, { "epoch": 0.9093891876537623, "grad_norm": 0.8125, "learning_rate": 9.219539703963978e-05, "loss": 0.7587, "step": 34343 }, { "epoch": 0.9094156672620567, "grad_norm": 0.859375, "learning_rate": 9.219080734974176e-05, "loss": 0.7978, "step": 34344 }, { "epoch": 0.9094421468703511, "grad_norm": 0.73046875, "learning_rate": 9.218621767639487e-05, "loss": 0.7593, "step": 34345 }, { "epoch": 0.9094686264786453, "grad_norm": 0.94140625, "learning_rate": 9.21816280196088e-05, "loss": 0.8092, "step": 34346 }, { "epoch": 0.9094951060869397, "grad_norm": 0.765625, "learning_rate": 9.217703837939337e-05, "loss": 0.8136, "step": 34347 }, { "epoch": 0.9095215856952341, "grad_norm": 0.82421875, "learning_rate": 9.217244875575825e-05, "loss": 0.7992, "step": 34348 }, { "epoch": 0.9095480653035285, "grad_norm": 0.76953125, "learning_rate": 9.216785914871318e-05, "loss": 0.8365, "step": 34349 }, { "epoch": 0.9095745449118229, "grad_norm": 0.7421875, "learning_rate": 9.216326955826788e-05, "loss": 0.7636, "step": 34350 }, { "epoch": 0.9096010245201173, "grad_norm": 0.76953125, "learning_rate": 9.215867998443208e-05, "loss": 0.8378, "step": 34351 }, { "epoch": 0.9096275041284116, "grad_norm": 0.765625, "learning_rate": 9.215409042721552e-05, "loss": 0.8144, "step": 34352 }, { "epoch": 0.909653983736706, "grad_norm": 0.73046875, "learning_rate": 9.214950088662791e-05, "loss": 0.7831, "step": 34353 }, { "epoch": 0.9096804633450004, "grad_norm": 0.77734375, "learning_rate": 9.2144911362679e-05, "loss": 0.8265, "step": 34354 }, { "epoch": 0.9097069429532948, "grad_norm": 0.79296875, "learning_rate": 9.214032185537849e-05, "loss": 0.7817, "step": 34355 }, { "epoch": 0.9097334225615892, "grad_norm": 0.734375, "learning_rate": 9.213573236473607e-05, "loss": 0.815, "step": 34356 }, { "epoch": 0.9097599021698836, "grad_norm": 0.8046875, "learning_rate": 9.213114289076156e-05, "loss": 0.8273, "step": 34357 }, { "epoch": 0.9097863817781779, "grad_norm": 0.80078125, "learning_rate": 9.212655343346466e-05, "loss": 0.8541, "step": 34358 }, { "epoch": 0.9098128613864723, "grad_norm": 0.796875, "learning_rate": 9.212196399285506e-05, "loss": 0.7732, "step": 34359 }, { "epoch": 0.9098393409947667, "grad_norm": 0.7578125, "learning_rate": 9.211737456894253e-05, "loss": 0.794, "step": 34360 }, { "epoch": 0.9098658206030611, "grad_norm": 0.7578125, "learning_rate": 9.211278516173671e-05, "loss": 0.7188, "step": 34361 }, { "epoch": 0.9098923002113554, "grad_norm": 0.9140625, "learning_rate": 9.210819577124745e-05, "loss": 0.7147, "step": 34362 }, { "epoch": 0.9099187798196497, "grad_norm": 0.7734375, "learning_rate": 9.21036063974844e-05, "loss": 0.8136, "step": 34363 }, { "epoch": 0.9099452594279441, "grad_norm": 0.77734375, "learning_rate": 9.209901704045732e-05, "loss": 0.6773, "step": 34364 }, { "epoch": 0.9099717390362385, "grad_norm": 0.71875, "learning_rate": 9.209442770017591e-05, "loss": 0.765, "step": 34365 }, { "epoch": 0.9099982186445329, "grad_norm": 0.71484375, "learning_rate": 9.208983837664987e-05, "loss": 0.667, "step": 34366 }, { "epoch": 0.9100246982528273, "grad_norm": 0.82421875, "learning_rate": 9.208524906988902e-05, "loss": 0.8288, "step": 34367 }, { "epoch": 0.9100511778611217, "grad_norm": 0.7578125, "learning_rate": 9.2080659779903e-05, "loss": 0.9128, "step": 34368 }, { "epoch": 0.910077657469416, "grad_norm": 0.8125, "learning_rate": 9.207607050670161e-05, "loss": 0.7431, "step": 34369 }, { "epoch": 0.9101041370777104, "grad_norm": 0.75, "learning_rate": 9.20714812502945e-05, "loss": 0.8587, "step": 34370 }, { "epoch": 0.9101306166860048, "grad_norm": 0.796875, "learning_rate": 9.206689201069144e-05, "loss": 0.7925, "step": 34371 }, { "epoch": 0.9101570962942992, "grad_norm": 0.75390625, "learning_rate": 9.206230278790216e-05, "loss": 0.7632, "step": 34372 }, { "epoch": 0.9101835759025936, "grad_norm": 0.8046875, "learning_rate": 9.205771358193637e-05, "loss": 0.8939, "step": 34373 }, { "epoch": 0.910210055510888, "grad_norm": 0.7578125, "learning_rate": 9.20531243928038e-05, "loss": 0.773, "step": 34374 }, { "epoch": 0.9102365351191823, "grad_norm": 0.78125, "learning_rate": 9.204853522051419e-05, "loss": 0.7586, "step": 34375 }, { "epoch": 0.9102630147274767, "grad_norm": 0.78125, "learning_rate": 9.20439460650772e-05, "loss": 0.6883, "step": 34376 }, { "epoch": 0.9102894943357711, "grad_norm": 0.78515625, "learning_rate": 9.203935692650267e-05, "loss": 0.7411, "step": 34377 }, { "epoch": 0.9103159739440655, "grad_norm": 0.76953125, "learning_rate": 9.203476780480026e-05, "loss": 0.8977, "step": 34378 }, { "epoch": 0.9103424535523598, "grad_norm": 0.81640625, "learning_rate": 9.20301786999797e-05, "loss": 0.7482, "step": 34379 }, { "epoch": 0.9103689331606541, "grad_norm": 0.75390625, "learning_rate": 9.202558961205073e-05, "loss": 0.7638, "step": 34380 }, { "epoch": 0.9103954127689485, "grad_norm": 0.796875, "learning_rate": 9.2021000541023e-05, "loss": 0.8754, "step": 34381 }, { "epoch": 0.9104218923772429, "grad_norm": 0.78125, "learning_rate": 9.201641148690638e-05, "loss": 0.7568, "step": 34382 }, { "epoch": 0.9104483719855373, "grad_norm": 0.81640625, "learning_rate": 9.201182244971049e-05, "loss": 0.7877, "step": 34383 }, { "epoch": 0.9104748515938317, "grad_norm": 0.87890625, "learning_rate": 9.20072334294451e-05, "loss": 0.8294, "step": 34384 }, { "epoch": 0.9105013312021261, "grad_norm": 0.77734375, "learning_rate": 9.200264442611993e-05, "loss": 0.8324, "step": 34385 }, { "epoch": 0.9105278108104204, "grad_norm": 0.75, "learning_rate": 9.199805543974464e-05, "loss": 0.7688, "step": 34386 }, { "epoch": 0.9105542904187148, "grad_norm": 0.80078125, "learning_rate": 9.199346647032906e-05, "loss": 0.827, "step": 34387 }, { "epoch": 0.9105807700270092, "grad_norm": 0.77734375, "learning_rate": 9.198887751788289e-05, "loss": 0.7007, "step": 34388 }, { "epoch": 0.9106072496353036, "grad_norm": 0.75, "learning_rate": 9.198428858241581e-05, "loss": 0.8158, "step": 34389 }, { "epoch": 0.910633729243598, "grad_norm": 0.765625, "learning_rate": 9.19796996639376e-05, "loss": 0.898, "step": 34390 }, { "epoch": 0.9106602088518924, "grad_norm": 0.703125, "learning_rate": 9.197511076245791e-05, "loss": 0.703, "step": 34391 }, { "epoch": 0.9106866884601867, "grad_norm": 0.82421875, "learning_rate": 9.197052187798653e-05, "loss": 0.7907, "step": 34392 }, { "epoch": 0.9107131680684811, "grad_norm": 0.76953125, "learning_rate": 9.19659330105332e-05, "loss": 0.8798, "step": 34393 }, { "epoch": 0.9107396476767755, "grad_norm": 0.7265625, "learning_rate": 9.196134416010758e-05, "loss": 0.803, "step": 34394 }, { "epoch": 0.9107661272850698, "grad_norm": 0.6796875, "learning_rate": 9.195675532671945e-05, "loss": 0.7933, "step": 34395 }, { "epoch": 0.9107926068933642, "grad_norm": 0.80078125, "learning_rate": 9.195216651037849e-05, "loss": 0.7305, "step": 34396 }, { "epoch": 0.9108190865016585, "grad_norm": 0.70703125, "learning_rate": 9.194757771109449e-05, "loss": 0.6835, "step": 34397 }, { "epoch": 0.9108455661099529, "grad_norm": 0.828125, "learning_rate": 9.194298892887712e-05, "loss": 0.7647, "step": 34398 }, { "epoch": 0.9108720457182473, "grad_norm": 0.765625, "learning_rate": 9.193840016373613e-05, "loss": 0.9137, "step": 34399 }, { "epoch": 0.9108985253265417, "grad_norm": 0.79296875, "learning_rate": 9.193381141568126e-05, "loss": 0.8619, "step": 34400 }, { "epoch": 0.9109250049348361, "grad_norm": 0.828125, "learning_rate": 9.192922268472215e-05, "loss": 0.807, "step": 34401 }, { "epoch": 0.9109514845431305, "grad_norm": 0.78515625, "learning_rate": 9.192463397086865e-05, "loss": 0.8715, "step": 34402 }, { "epoch": 0.9109779641514248, "grad_norm": 0.71875, "learning_rate": 9.192004527413042e-05, "loss": 0.8823, "step": 34403 }, { "epoch": 0.9110044437597192, "grad_norm": 0.8046875, "learning_rate": 9.191545659451719e-05, "loss": 0.7956, "step": 34404 }, { "epoch": 0.9110309233680136, "grad_norm": 0.765625, "learning_rate": 9.191086793203869e-05, "loss": 0.8123, "step": 34405 }, { "epoch": 0.911057402976308, "grad_norm": 0.73046875, "learning_rate": 9.19062792867046e-05, "loss": 0.7838, "step": 34406 }, { "epoch": 0.9110838825846024, "grad_norm": 0.80078125, "learning_rate": 9.190169065852473e-05, "loss": 0.6993, "step": 34407 }, { "epoch": 0.9111103621928968, "grad_norm": 0.765625, "learning_rate": 9.189710204750877e-05, "loss": 0.8318, "step": 34408 }, { "epoch": 0.9111368418011911, "grad_norm": 0.77734375, "learning_rate": 9.189251345366643e-05, "loss": 0.8549, "step": 34409 }, { "epoch": 0.9111633214094855, "grad_norm": 0.79296875, "learning_rate": 9.188792487700744e-05, "loss": 0.719, "step": 34410 }, { "epoch": 0.9111898010177798, "grad_norm": 0.74609375, "learning_rate": 9.188333631754151e-05, "loss": 0.7785, "step": 34411 }, { "epoch": 0.9112162806260742, "grad_norm": 0.6796875, "learning_rate": 9.187874777527841e-05, "loss": 0.764, "step": 34412 }, { "epoch": 0.9112427602343686, "grad_norm": 0.89453125, "learning_rate": 9.187415925022783e-05, "loss": 0.8445, "step": 34413 }, { "epoch": 0.911269239842663, "grad_norm": 0.7890625, "learning_rate": 9.186957074239952e-05, "loss": 0.7332, "step": 34414 }, { "epoch": 0.9112957194509573, "grad_norm": 0.79296875, "learning_rate": 9.186498225180318e-05, "loss": 0.7495, "step": 34415 }, { "epoch": 0.9113221990592517, "grad_norm": 0.7578125, "learning_rate": 9.186039377844853e-05, "loss": 0.7544, "step": 34416 }, { "epoch": 0.9113486786675461, "grad_norm": 0.72265625, "learning_rate": 9.185580532234533e-05, "loss": 0.7104, "step": 34417 }, { "epoch": 0.9113751582758405, "grad_norm": 0.8046875, "learning_rate": 9.18512168835033e-05, "loss": 0.8042, "step": 34418 }, { "epoch": 0.9114016378841349, "grad_norm": 0.796875, "learning_rate": 9.184662846193213e-05, "loss": 0.8751, "step": 34419 }, { "epoch": 0.9114281174924292, "grad_norm": 0.87890625, "learning_rate": 9.184204005764158e-05, "loss": 0.7549, "step": 34420 }, { "epoch": 0.9114545971007236, "grad_norm": 0.796875, "learning_rate": 9.18374516706413e-05, "loss": 0.7843, "step": 34421 }, { "epoch": 0.911481076709018, "grad_norm": 0.8046875, "learning_rate": 9.183286330094113e-05, "loss": 0.7942, "step": 34422 }, { "epoch": 0.9115075563173124, "grad_norm": 0.70703125, "learning_rate": 9.182827494855075e-05, "loss": 0.8914, "step": 34423 }, { "epoch": 0.9115340359256068, "grad_norm": 0.7421875, "learning_rate": 9.182368661347984e-05, "loss": 0.8631, "step": 34424 }, { "epoch": 0.9115605155339012, "grad_norm": 0.98828125, "learning_rate": 9.181909829573818e-05, "loss": 0.8003, "step": 34425 }, { "epoch": 0.9115869951421955, "grad_norm": 0.78125, "learning_rate": 9.181450999533549e-05, "loss": 0.8452, "step": 34426 }, { "epoch": 0.9116134747504899, "grad_norm": 0.76171875, "learning_rate": 9.180992171228141e-05, "loss": 0.9028, "step": 34427 }, { "epoch": 0.9116399543587842, "grad_norm": 0.79296875, "learning_rate": 9.180533344658578e-05, "loss": 0.8542, "step": 34428 }, { "epoch": 0.9116664339670786, "grad_norm": 0.7890625, "learning_rate": 9.18007451982583e-05, "loss": 0.8145, "step": 34429 }, { "epoch": 0.911692913575373, "grad_norm": 0.80078125, "learning_rate": 9.179615696730863e-05, "loss": 0.8928, "step": 34430 }, { "epoch": 0.9117193931836673, "grad_norm": 0.7578125, "learning_rate": 9.179156875374658e-05, "loss": 0.7603, "step": 34431 }, { "epoch": 0.9117458727919617, "grad_norm": 0.89453125, "learning_rate": 9.178698055758175e-05, "loss": 0.8035, "step": 34432 }, { "epoch": 0.9117723524002561, "grad_norm": 0.75, "learning_rate": 9.178239237882401e-05, "loss": 0.7646, "step": 34433 }, { "epoch": 0.9117988320085505, "grad_norm": 0.87109375, "learning_rate": 9.177780421748301e-05, "loss": 0.7786, "step": 34434 }, { "epoch": 0.9118253116168449, "grad_norm": 0.7734375, "learning_rate": 9.177321607356849e-05, "loss": 0.7298, "step": 34435 }, { "epoch": 0.9118517912251393, "grad_norm": 0.70703125, "learning_rate": 9.176862794709017e-05, "loss": 0.7306, "step": 34436 }, { "epoch": 0.9118782708334336, "grad_norm": 0.87109375, "learning_rate": 9.176403983805775e-05, "loss": 0.8442, "step": 34437 }, { "epoch": 0.911904750441728, "grad_norm": 0.8203125, "learning_rate": 9.175945174648099e-05, "loss": 0.88, "step": 34438 }, { "epoch": 0.9119312300500224, "grad_norm": 0.8203125, "learning_rate": 9.175486367236961e-05, "loss": 0.8494, "step": 34439 }, { "epoch": 0.9119577096583168, "grad_norm": 0.72265625, "learning_rate": 9.175027561573333e-05, "loss": 0.7362, "step": 34440 }, { "epoch": 0.9119841892666112, "grad_norm": 0.7890625, "learning_rate": 9.174568757658186e-05, "loss": 0.8505, "step": 34441 }, { "epoch": 0.9120106688749056, "grad_norm": 0.73828125, "learning_rate": 9.174109955492489e-05, "loss": 0.7823, "step": 34442 }, { "epoch": 0.9120371484831999, "grad_norm": 0.82421875, "learning_rate": 9.173651155077225e-05, "loss": 0.7448, "step": 34443 }, { "epoch": 0.9120636280914942, "grad_norm": 0.765625, "learning_rate": 9.173192356413357e-05, "loss": 0.7237, "step": 34444 }, { "epoch": 0.9120901076997886, "grad_norm": 0.76953125, "learning_rate": 9.172733559501862e-05, "loss": 0.8869, "step": 34445 }, { "epoch": 0.912116587308083, "grad_norm": 0.75, "learning_rate": 9.172274764343711e-05, "loss": 0.6991, "step": 34446 }, { "epoch": 0.9121430669163774, "grad_norm": 0.76171875, "learning_rate": 9.171815970939872e-05, "loss": 0.8708, "step": 34447 }, { "epoch": 0.9121695465246717, "grad_norm": 0.8046875, "learning_rate": 9.171357179291326e-05, "loss": 0.7599, "step": 34448 }, { "epoch": 0.9121960261329661, "grad_norm": 0.7578125, "learning_rate": 9.170898389399042e-05, "loss": 0.7022, "step": 34449 }, { "epoch": 0.9122225057412605, "grad_norm": 0.796875, "learning_rate": 9.17043960126399e-05, "loss": 0.8258, "step": 34450 }, { "epoch": 0.9122489853495549, "grad_norm": 0.72265625, "learning_rate": 9.169980814887145e-05, "loss": 0.8571, "step": 34451 }, { "epoch": 0.9122754649578493, "grad_norm": 0.76953125, "learning_rate": 9.169522030269473e-05, "loss": 0.7026, "step": 34452 }, { "epoch": 0.9123019445661437, "grad_norm": 0.79296875, "learning_rate": 9.169063247411957e-05, "loss": 0.7619, "step": 34453 }, { "epoch": 0.912328424174438, "grad_norm": 0.7734375, "learning_rate": 9.168604466315562e-05, "loss": 0.7905, "step": 34454 }, { "epoch": 0.9123549037827324, "grad_norm": 0.828125, "learning_rate": 9.168145686981264e-05, "loss": 0.776, "step": 34455 }, { "epoch": 0.9123813833910268, "grad_norm": 0.8203125, "learning_rate": 9.167686909410032e-05, "loss": 0.8804, "step": 34456 }, { "epoch": 0.9124078629993212, "grad_norm": 0.828125, "learning_rate": 9.16722813360284e-05, "loss": 0.8235, "step": 34457 }, { "epoch": 0.9124343426076156, "grad_norm": 0.84765625, "learning_rate": 9.166769359560662e-05, "loss": 0.7567, "step": 34458 }, { "epoch": 0.91246082221591, "grad_norm": 0.8046875, "learning_rate": 9.166310587284468e-05, "loss": 0.8532, "step": 34459 }, { "epoch": 0.9124873018242042, "grad_norm": 0.83984375, "learning_rate": 9.165851816775231e-05, "loss": 0.7684, "step": 34460 }, { "epoch": 0.9125137814324986, "grad_norm": 0.765625, "learning_rate": 9.16539304803392e-05, "loss": 0.7581, "step": 34461 }, { "epoch": 0.912540261040793, "grad_norm": 0.75390625, "learning_rate": 9.164934281061513e-05, "loss": 0.7439, "step": 34462 }, { "epoch": 0.9125667406490874, "grad_norm": 0.78125, "learning_rate": 9.164475515858983e-05, "loss": 0.7928, "step": 34463 }, { "epoch": 0.9125932202573818, "grad_norm": 0.75390625, "learning_rate": 9.164016752427297e-05, "loss": 0.7705, "step": 34464 }, { "epoch": 0.9126196998656761, "grad_norm": 1.6640625, "learning_rate": 9.16355799076743e-05, "loss": 0.7925, "step": 34465 }, { "epoch": 0.9126461794739705, "grad_norm": 0.74609375, "learning_rate": 9.163099230880356e-05, "loss": 0.7705, "step": 34466 }, { "epoch": 0.9126726590822649, "grad_norm": 0.68359375, "learning_rate": 9.162640472767038e-05, "loss": 0.8106, "step": 34467 }, { "epoch": 0.9126991386905593, "grad_norm": 0.7421875, "learning_rate": 9.162181716428463e-05, "loss": 0.7167, "step": 34468 }, { "epoch": 0.9127256182988537, "grad_norm": 0.86328125, "learning_rate": 9.161722961865596e-05, "loss": 0.8709, "step": 34469 }, { "epoch": 0.9127520979071481, "grad_norm": 0.8046875, "learning_rate": 9.161264209079407e-05, "loss": 0.8115, "step": 34470 }, { "epoch": 0.9127785775154424, "grad_norm": 0.91796875, "learning_rate": 9.160805458070871e-05, "loss": 0.9012, "step": 34471 }, { "epoch": 0.9128050571237368, "grad_norm": 0.72265625, "learning_rate": 9.160346708840957e-05, "loss": 0.7992, "step": 34472 }, { "epoch": 0.9128315367320312, "grad_norm": 0.7421875, "learning_rate": 9.159887961390642e-05, "loss": 0.698, "step": 34473 }, { "epoch": 0.9128580163403256, "grad_norm": 0.6875, "learning_rate": 9.159429215720898e-05, "loss": 0.741, "step": 34474 }, { "epoch": 0.91288449594862, "grad_norm": 0.8125, "learning_rate": 9.158970471832697e-05, "loss": 0.8469, "step": 34475 }, { "epoch": 0.9129109755569144, "grad_norm": 0.8125, "learning_rate": 9.158511729727008e-05, "loss": 0.8362, "step": 34476 }, { "epoch": 0.9129374551652086, "grad_norm": 0.79296875, "learning_rate": 9.158052989404803e-05, "loss": 0.8402, "step": 34477 }, { "epoch": 0.912963934773503, "grad_norm": 0.85546875, "learning_rate": 9.157594250867061e-05, "loss": 0.8562, "step": 34478 }, { "epoch": 0.9129904143817974, "grad_norm": 0.74609375, "learning_rate": 9.157135514114747e-05, "loss": 0.8003, "step": 34479 }, { "epoch": 0.9130168939900918, "grad_norm": 0.77734375, "learning_rate": 9.156676779148836e-05, "loss": 0.8477, "step": 34480 }, { "epoch": 0.9130433735983862, "grad_norm": 0.76953125, "learning_rate": 9.156218045970302e-05, "loss": 0.8291, "step": 34481 }, { "epoch": 0.9130698532066805, "grad_norm": 0.78125, "learning_rate": 9.155759314580113e-05, "loss": 0.8261, "step": 34482 }, { "epoch": 0.9130963328149749, "grad_norm": 0.71875, "learning_rate": 9.155300584979246e-05, "loss": 0.7559, "step": 34483 }, { "epoch": 0.9131228124232693, "grad_norm": 0.890625, "learning_rate": 9.154841857168672e-05, "loss": 0.7971, "step": 34484 }, { "epoch": 0.9131492920315637, "grad_norm": 0.72265625, "learning_rate": 9.154383131149362e-05, "loss": 0.6765, "step": 34485 }, { "epoch": 0.9131757716398581, "grad_norm": 0.734375, "learning_rate": 9.153924406922289e-05, "loss": 0.8235, "step": 34486 }, { "epoch": 0.9132022512481525, "grad_norm": 0.8125, "learning_rate": 9.15346568448842e-05, "loss": 0.8129, "step": 34487 }, { "epoch": 0.9132287308564468, "grad_norm": 0.76171875, "learning_rate": 9.153006963848736e-05, "loss": 0.7782, "step": 34488 }, { "epoch": 0.9132552104647412, "grad_norm": 0.796875, "learning_rate": 9.152548245004205e-05, "loss": 0.9135, "step": 34489 }, { "epoch": 0.9132816900730356, "grad_norm": 0.76171875, "learning_rate": 9.1520895279558e-05, "loss": 0.7343, "step": 34490 }, { "epoch": 0.91330816968133, "grad_norm": 0.78515625, "learning_rate": 9.151630812704494e-05, "loss": 0.8022, "step": 34491 }, { "epoch": 0.9133346492896244, "grad_norm": 0.7578125, "learning_rate": 9.151172099251253e-05, "loss": 0.9558, "step": 34492 }, { "epoch": 0.9133611288979187, "grad_norm": 0.765625, "learning_rate": 9.150713387597059e-05, "loss": 0.7229, "step": 34493 }, { "epoch": 0.913387608506213, "grad_norm": 0.76171875, "learning_rate": 9.150254677742877e-05, "loss": 0.8922, "step": 34494 }, { "epoch": 0.9134140881145074, "grad_norm": 0.69921875, "learning_rate": 9.149795969689684e-05, "loss": 0.7319, "step": 34495 }, { "epoch": 0.9134405677228018, "grad_norm": 0.8046875, "learning_rate": 9.149337263438449e-05, "loss": 0.8913, "step": 34496 }, { "epoch": 0.9134670473310962, "grad_norm": 0.73046875, "learning_rate": 9.148878558990144e-05, "loss": 1.0285, "step": 34497 }, { "epoch": 0.9134935269393906, "grad_norm": 0.7421875, "learning_rate": 9.148419856345743e-05, "loss": 0.811, "step": 34498 }, { "epoch": 0.913520006547685, "grad_norm": 0.9296875, "learning_rate": 9.147961155506216e-05, "loss": 0.7756, "step": 34499 }, { "epoch": 0.9135464861559793, "grad_norm": 0.87109375, "learning_rate": 9.147502456472539e-05, "loss": 0.8173, "step": 34500 }, { "epoch": 0.9135729657642737, "grad_norm": 0.7578125, "learning_rate": 9.147043759245682e-05, "loss": 0.7302, "step": 34501 }, { "epoch": 0.9135994453725681, "grad_norm": 0.80859375, "learning_rate": 9.146585063826614e-05, "loss": 0.7956, "step": 34502 }, { "epoch": 0.9136259249808625, "grad_norm": 0.80078125, "learning_rate": 9.146126370216312e-05, "loss": 0.8818, "step": 34503 }, { "epoch": 0.9136524045891569, "grad_norm": 0.734375, "learning_rate": 9.145667678415747e-05, "loss": 0.7899, "step": 34504 }, { "epoch": 0.9136788841974512, "grad_norm": 0.81640625, "learning_rate": 9.145208988425891e-05, "loss": 0.7801, "step": 34505 }, { "epoch": 0.9137053638057456, "grad_norm": 0.81640625, "learning_rate": 9.144750300247715e-05, "loss": 0.862, "step": 34506 }, { "epoch": 0.91373184341404, "grad_norm": 0.78515625, "learning_rate": 9.144291613882188e-05, "loss": 0.7501, "step": 34507 }, { "epoch": 0.9137583230223344, "grad_norm": 0.734375, "learning_rate": 9.14383292933029e-05, "loss": 0.7994, "step": 34508 }, { "epoch": 0.9137848026306287, "grad_norm": 0.80859375, "learning_rate": 9.143374246592989e-05, "loss": 0.8182, "step": 34509 }, { "epoch": 0.913811282238923, "grad_norm": 0.7109375, "learning_rate": 9.142915565671257e-05, "loss": 0.8392, "step": 34510 }, { "epoch": 0.9138377618472174, "grad_norm": 0.75390625, "learning_rate": 9.142456886566067e-05, "loss": 0.7294, "step": 34511 }, { "epoch": 0.9138642414555118, "grad_norm": 0.72265625, "learning_rate": 9.141998209278386e-05, "loss": 0.806, "step": 34512 }, { "epoch": 0.9138907210638062, "grad_norm": 0.7734375, "learning_rate": 9.141539533809195e-05, "loss": 0.7727, "step": 34513 }, { "epoch": 0.9139172006721006, "grad_norm": 0.8125, "learning_rate": 9.141080860159464e-05, "loss": 0.7173, "step": 34514 }, { "epoch": 0.913943680280395, "grad_norm": 0.796875, "learning_rate": 9.140622188330161e-05, "loss": 0.8204, "step": 34515 }, { "epoch": 0.9139701598886893, "grad_norm": 0.765625, "learning_rate": 9.14016351832226e-05, "loss": 0.7844, "step": 34516 }, { "epoch": 0.9139966394969837, "grad_norm": 0.7109375, "learning_rate": 9.139704850136729e-05, "loss": 0.7687, "step": 34517 }, { "epoch": 0.9140231191052781, "grad_norm": 0.73828125, "learning_rate": 9.139246183774549e-05, "loss": 0.7759, "step": 34518 }, { "epoch": 0.9140495987135725, "grad_norm": 0.81640625, "learning_rate": 9.138787519236687e-05, "loss": 0.8672, "step": 34519 }, { "epoch": 0.9140760783218669, "grad_norm": 0.73046875, "learning_rate": 9.138328856524116e-05, "loss": 0.7785, "step": 34520 }, { "epoch": 0.9141025579301613, "grad_norm": 0.79296875, "learning_rate": 9.137870195637809e-05, "loss": 0.8307, "step": 34521 }, { "epoch": 0.9141290375384556, "grad_norm": 0.765625, "learning_rate": 9.137411536578734e-05, "loss": 0.6681, "step": 34522 }, { "epoch": 0.91415551714675, "grad_norm": 0.7578125, "learning_rate": 9.136952879347868e-05, "loss": 0.8198, "step": 34523 }, { "epoch": 0.9141819967550444, "grad_norm": 0.75, "learning_rate": 9.13649422394618e-05, "loss": 0.8619, "step": 34524 }, { "epoch": 0.9142084763633388, "grad_norm": 0.8125, "learning_rate": 9.136035570374645e-05, "loss": 0.7955, "step": 34525 }, { "epoch": 0.9142349559716331, "grad_norm": 0.75390625, "learning_rate": 9.135576918634231e-05, "loss": 0.7599, "step": 34526 }, { "epoch": 0.9142614355799275, "grad_norm": 0.76171875, "learning_rate": 9.135118268725909e-05, "loss": 0.7868, "step": 34527 }, { "epoch": 0.9142879151882218, "grad_norm": 0.828125, "learning_rate": 9.13465962065066e-05, "loss": 0.72, "step": 34528 }, { "epoch": 0.9143143947965162, "grad_norm": 0.76171875, "learning_rate": 9.13420097440945e-05, "loss": 0.7915, "step": 34529 }, { "epoch": 0.9143408744048106, "grad_norm": 0.76171875, "learning_rate": 9.13374233000325e-05, "loss": 0.8681, "step": 34530 }, { "epoch": 0.914367354013105, "grad_norm": 0.7265625, "learning_rate": 9.133283687433035e-05, "loss": 0.7675, "step": 34531 }, { "epoch": 0.9143938336213994, "grad_norm": 0.81640625, "learning_rate": 9.13282504669977e-05, "loss": 0.8702, "step": 34532 }, { "epoch": 0.9144203132296937, "grad_norm": 0.765625, "learning_rate": 9.132366407804438e-05, "loss": 0.6989, "step": 34533 }, { "epoch": 0.9144467928379881, "grad_norm": 0.88671875, "learning_rate": 9.131907770748006e-05, "loss": 0.971, "step": 34534 }, { "epoch": 0.9144732724462825, "grad_norm": 0.74609375, "learning_rate": 9.131449135531445e-05, "loss": 0.7079, "step": 34535 }, { "epoch": 0.9144997520545769, "grad_norm": 0.78125, "learning_rate": 9.130990502155729e-05, "loss": 0.6942, "step": 34536 }, { "epoch": 0.9145262316628713, "grad_norm": 0.83203125, "learning_rate": 9.130531870621824e-05, "loss": 0.7901, "step": 34537 }, { "epoch": 0.9145527112711657, "grad_norm": 0.74609375, "learning_rate": 9.130073240930712e-05, "loss": 0.8573, "step": 34538 }, { "epoch": 0.91457919087946, "grad_norm": 0.81640625, "learning_rate": 9.129614613083359e-05, "loss": 0.7235, "step": 34539 }, { "epoch": 0.9146056704877544, "grad_norm": 0.85546875, "learning_rate": 9.129155987080739e-05, "loss": 0.8377, "step": 34540 }, { "epoch": 0.9146321500960488, "grad_norm": 0.76171875, "learning_rate": 9.128697362923822e-05, "loss": 0.7017, "step": 34541 }, { "epoch": 0.9146586297043431, "grad_norm": 0.79296875, "learning_rate": 9.12823874061358e-05, "loss": 0.8029, "step": 34542 }, { "epoch": 0.9146851093126375, "grad_norm": 0.765625, "learning_rate": 9.127780120150988e-05, "loss": 0.7887, "step": 34543 }, { "epoch": 0.9147115889209319, "grad_norm": 0.76171875, "learning_rate": 9.127321501537018e-05, "loss": 0.7656, "step": 34544 }, { "epoch": 0.9147380685292262, "grad_norm": 0.8125, "learning_rate": 9.126862884772638e-05, "loss": 0.8211, "step": 34545 }, { "epoch": 0.9147645481375206, "grad_norm": 0.75390625, "learning_rate": 9.126404269858821e-05, "loss": 0.8006, "step": 34546 }, { "epoch": 0.914791027745815, "grad_norm": 0.7734375, "learning_rate": 9.125945656796539e-05, "loss": 0.8547, "step": 34547 }, { "epoch": 0.9148175073541094, "grad_norm": 0.703125, "learning_rate": 9.125487045586767e-05, "loss": 0.8003, "step": 34548 }, { "epoch": 0.9148439869624038, "grad_norm": 0.703125, "learning_rate": 9.125028436230478e-05, "loss": 0.7698, "step": 34549 }, { "epoch": 0.9148704665706981, "grad_norm": 0.80859375, "learning_rate": 9.124569828728639e-05, "loss": 0.7967, "step": 34550 }, { "epoch": 0.9148969461789925, "grad_norm": 0.80859375, "learning_rate": 9.124111223082224e-05, "loss": 0.8235, "step": 34551 }, { "epoch": 0.9149234257872869, "grad_norm": 0.9296875, "learning_rate": 9.123652619292202e-05, "loss": 0.7796, "step": 34552 }, { "epoch": 0.9149499053955813, "grad_norm": 0.75390625, "learning_rate": 9.123194017359552e-05, "loss": 0.7913, "step": 34553 }, { "epoch": 0.9149763850038757, "grad_norm": 0.7734375, "learning_rate": 9.122735417285242e-05, "loss": 0.8125, "step": 34554 }, { "epoch": 0.9150028646121701, "grad_norm": 0.79296875, "learning_rate": 9.122276819070244e-05, "loss": 0.7914, "step": 34555 }, { "epoch": 0.9150293442204644, "grad_norm": 0.78125, "learning_rate": 9.121818222715531e-05, "loss": 0.7174, "step": 34556 }, { "epoch": 0.9150558238287588, "grad_norm": 0.94140625, "learning_rate": 9.12135962822207e-05, "loss": 0.7865, "step": 34557 }, { "epoch": 0.9150823034370531, "grad_norm": 0.8359375, "learning_rate": 9.120901035590839e-05, "loss": 0.9108, "step": 34558 }, { "epoch": 0.9151087830453475, "grad_norm": 0.91796875, "learning_rate": 9.120442444822811e-05, "loss": 0.7954, "step": 34559 }, { "epoch": 0.9151352626536419, "grad_norm": 0.83984375, "learning_rate": 9.119983855918953e-05, "loss": 0.8844, "step": 34560 }, { "epoch": 0.9151617422619363, "grad_norm": 0.78515625, "learning_rate": 9.11952526888024e-05, "loss": 0.859, "step": 34561 }, { "epoch": 0.9151882218702306, "grad_norm": 0.765625, "learning_rate": 9.11906668370764e-05, "loss": 0.7951, "step": 34562 }, { "epoch": 0.915214701478525, "grad_norm": 0.82421875, "learning_rate": 9.118608100402131e-05, "loss": 0.7506, "step": 34563 }, { "epoch": 0.9152411810868194, "grad_norm": 0.7890625, "learning_rate": 9.118149518964681e-05, "loss": 0.7615, "step": 34564 }, { "epoch": 0.9152676606951138, "grad_norm": 0.80859375, "learning_rate": 9.11769093939626e-05, "loss": 0.8297, "step": 34565 }, { "epoch": 0.9152941403034082, "grad_norm": 0.80078125, "learning_rate": 9.117232361697844e-05, "loss": 0.7695, "step": 34566 }, { "epoch": 0.9153206199117025, "grad_norm": 0.80859375, "learning_rate": 9.116773785870406e-05, "loss": 0.8357, "step": 34567 }, { "epoch": 0.9153470995199969, "grad_norm": 0.765625, "learning_rate": 9.116315211914913e-05, "loss": 0.7644, "step": 34568 }, { "epoch": 0.9153735791282913, "grad_norm": 0.86328125, "learning_rate": 9.11585663983234e-05, "loss": 0.8292, "step": 34569 }, { "epoch": 0.9154000587365857, "grad_norm": 0.71484375, "learning_rate": 9.11539806962366e-05, "loss": 0.7056, "step": 34570 }, { "epoch": 0.9154265383448801, "grad_norm": 0.78515625, "learning_rate": 9.114939501289841e-05, "loss": 0.8853, "step": 34571 }, { "epoch": 0.9154530179531745, "grad_norm": 0.88671875, "learning_rate": 9.114480934831858e-05, "loss": 0.8438, "step": 34572 }, { "epoch": 0.9154794975614688, "grad_norm": 0.74609375, "learning_rate": 9.11402237025068e-05, "loss": 0.7578, "step": 34573 }, { "epoch": 0.9155059771697632, "grad_norm": 0.78515625, "learning_rate": 9.113563807547282e-05, "loss": 0.7825, "step": 34574 }, { "epoch": 0.9155324567780575, "grad_norm": 0.74609375, "learning_rate": 9.113105246722637e-05, "loss": 0.7351, "step": 34575 }, { "epoch": 0.9155589363863519, "grad_norm": 0.7734375, "learning_rate": 9.112646687777714e-05, "loss": 0.8057, "step": 34576 }, { "epoch": 0.9155854159946463, "grad_norm": 0.77734375, "learning_rate": 9.112188130713485e-05, "loss": 0.7687, "step": 34577 }, { "epoch": 0.9156118956029407, "grad_norm": 0.9375, "learning_rate": 9.111729575530919e-05, "loss": 0.8393, "step": 34578 }, { "epoch": 0.915638375211235, "grad_norm": 0.78125, "learning_rate": 9.111271022230995e-05, "loss": 0.8282, "step": 34579 }, { "epoch": 0.9156648548195294, "grad_norm": 0.7265625, "learning_rate": 9.110812470814683e-05, "loss": 0.7011, "step": 34580 }, { "epoch": 0.9156913344278238, "grad_norm": 0.78125, "learning_rate": 9.110353921282951e-05, "loss": 0.8101, "step": 34581 }, { "epoch": 0.9157178140361182, "grad_norm": 0.74609375, "learning_rate": 9.109895373636774e-05, "loss": 0.7246, "step": 34582 }, { "epoch": 0.9157442936444126, "grad_norm": 0.79296875, "learning_rate": 9.10943682787712e-05, "loss": 0.8001, "step": 34583 }, { "epoch": 0.915770773252707, "grad_norm": 0.796875, "learning_rate": 9.108978284004966e-05, "loss": 0.82, "step": 34584 }, { "epoch": 0.9157972528610013, "grad_norm": 0.828125, "learning_rate": 9.108519742021281e-05, "loss": 0.9098, "step": 34585 }, { "epoch": 0.9158237324692957, "grad_norm": 0.734375, "learning_rate": 9.108061201927039e-05, "loss": 0.7448, "step": 34586 }, { "epoch": 0.9158502120775901, "grad_norm": 0.72265625, "learning_rate": 9.10760266372321e-05, "loss": 0.7617, "step": 34587 }, { "epoch": 0.9158766916858845, "grad_norm": 0.77734375, "learning_rate": 9.107144127410764e-05, "loss": 0.7587, "step": 34588 }, { "epoch": 0.9159031712941789, "grad_norm": 0.86328125, "learning_rate": 9.106685592990677e-05, "loss": 0.7988, "step": 34589 }, { "epoch": 0.9159296509024732, "grad_norm": 0.8046875, "learning_rate": 9.106227060463919e-05, "loss": 0.8165, "step": 34590 }, { "epoch": 0.9159561305107675, "grad_norm": 0.73046875, "learning_rate": 9.105768529831462e-05, "loss": 0.8149, "step": 34591 }, { "epoch": 0.9159826101190619, "grad_norm": 0.69921875, "learning_rate": 9.105310001094276e-05, "loss": 0.7492, "step": 34592 }, { "epoch": 0.9160090897273563, "grad_norm": 0.7265625, "learning_rate": 9.10485147425333e-05, "loss": 0.7476, "step": 34593 }, { "epoch": 0.9160355693356507, "grad_norm": 0.85546875, "learning_rate": 9.104392949309607e-05, "loss": 0.9355, "step": 34594 }, { "epoch": 0.916062048943945, "grad_norm": 0.78515625, "learning_rate": 9.10393442626407e-05, "loss": 0.7556, "step": 34595 }, { "epoch": 0.9160885285522394, "grad_norm": 0.79296875, "learning_rate": 9.103475905117693e-05, "loss": 0.8257, "step": 34596 }, { "epoch": 0.9161150081605338, "grad_norm": 0.72265625, "learning_rate": 9.103017385871448e-05, "loss": 0.6644, "step": 34597 }, { "epoch": 0.9161414877688282, "grad_norm": 0.76171875, "learning_rate": 9.102558868526302e-05, "loss": 0.8479, "step": 34598 }, { "epoch": 0.9161679673771226, "grad_norm": 0.79296875, "learning_rate": 9.102100353083236e-05, "loss": 0.5966, "step": 34599 }, { "epoch": 0.916194446985417, "grad_norm": 0.8203125, "learning_rate": 9.101641839543216e-05, "loss": 0.8193, "step": 34600 }, { "epoch": 0.9162209265937113, "grad_norm": 0.76171875, "learning_rate": 9.101183327907214e-05, "loss": 0.861, "step": 34601 }, { "epoch": 0.9162474062020057, "grad_norm": 0.7265625, "learning_rate": 9.100724818176205e-05, "loss": 0.7225, "step": 34602 }, { "epoch": 0.9162738858103001, "grad_norm": 0.7890625, "learning_rate": 9.100266310351152e-05, "loss": 0.7401, "step": 34603 }, { "epoch": 0.9163003654185945, "grad_norm": 0.75, "learning_rate": 9.099807804433038e-05, "loss": 0.7608, "step": 34604 }, { "epoch": 0.9163268450268889, "grad_norm": 0.75, "learning_rate": 9.09934930042283e-05, "loss": 0.7255, "step": 34605 }, { "epoch": 0.9163533246351833, "grad_norm": 0.8828125, "learning_rate": 9.0988907983215e-05, "loss": 0.8801, "step": 34606 }, { "epoch": 0.9163798042434775, "grad_norm": 0.77734375, "learning_rate": 9.09843229813002e-05, "loss": 0.7649, "step": 34607 }, { "epoch": 0.9164062838517719, "grad_norm": 0.8828125, "learning_rate": 9.097973799849357e-05, "loss": 0.857, "step": 34608 }, { "epoch": 0.9164327634600663, "grad_norm": 0.78125, "learning_rate": 9.097515303480491e-05, "loss": 0.8684, "step": 34609 }, { "epoch": 0.9164592430683607, "grad_norm": 0.796875, "learning_rate": 9.097056809024389e-05, "loss": 0.7084, "step": 34610 }, { "epoch": 0.9164857226766551, "grad_norm": 0.80859375, "learning_rate": 9.096598316482023e-05, "loss": 0.8207, "step": 34611 }, { "epoch": 0.9165122022849495, "grad_norm": 0.82421875, "learning_rate": 9.096139825854366e-05, "loss": 0.8284, "step": 34612 }, { "epoch": 0.9165386818932438, "grad_norm": 0.78515625, "learning_rate": 9.095681337142386e-05, "loss": 0.809, "step": 34613 }, { "epoch": 0.9165651615015382, "grad_norm": 0.75, "learning_rate": 9.09522285034706e-05, "loss": 0.6337, "step": 34614 }, { "epoch": 0.9165916411098326, "grad_norm": 0.8046875, "learning_rate": 9.094764365469357e-05, "loss": 0.7539, "step": 34615 }, { "epoch": 0.916618120718127, "grad_norm": 0.89453125, "learning_rate": 9.09430588251025e-05, "loss": 0.8479, "step": 34616 }, { "epoch": 0.9166446003264214, "grad_norm": 0.73828125, "learning_rate": 9.093847401470711e-05, "loss": 0.7515, "step": 34617 }, { "epoch": 0.9166710799347157, "grad_norm": 0.7421875, "learning_rate": 9.093388922351705e-05, "loss": 0.7964, "step": 34618 }, { "epoch": 0.9166975595430101, "grad_norm": 0.671875, "learning_rate": 9.092930445154214e-05, "loss": 0.7988, "step": 34619 }, { "epoch": 0.9167240391513045, "grad_norm": 0.86328125, "learning_rate": 9.092471969879204e-05, "loss": 0.9186, "step": 34620 }, { "epoch": 0.9167505187595989, "grad_norm": 0.70703125, "learning_rate": 9.092013496527651e-05, "loss": 0.7425, "step": 34621 }, { "epoch": 0.9167769983678933, "grad_norm": 0.7578125, "learning_rate": 9.091555025100521e-05, "loss": 0.8618, "step": 34622 }, { "epoch": 0.9168034779761877, "grad_norm": 0.78515625, "learning_rate": 9.091096555598783e-05, "loss": 0.7029, "step": 34623 }, { "epoch": 0.9168299575844819, "grad_norm": 0.8515625, "learning_rate": 9.09063808802342e-05, "loss": 0.7948, "step": 34624 }, { "epoch": 0.9168564371927763, "grad_norm": 0.80078125, "learning_rate": 9.090179622375398e-05, "loss": 0.8505, "step": 34625 }, { "epoch": 0.9168829168010707, "grad_norm": 0.7421875, "learning_rate": 9.089721158655686e-05, "loss": 0.7304, "step": 34626 }, { "epoch": 0.9169093964093651, "grad_norm": 0.7578125, "learning_rate": 9.089262696865261e-05, "loss": 0.8145, "step": 34627 }, { "epoch": 0.9169358760176595, "grad_norm": 0.80859375, "learning_rate": 9.088804237005087e-05, "loss": 0.7888, "step": 34628 }, { "epoch": 0.9169623556259539, "grad_norm": 0.79296875, "learning_rate": 9.088345779076144e-05, "loss": 0.7913, "step": 34629 }, { "epoch": 0.9169888352342482, "grad_norm": 0.76171875, "learning_rate": 9.0878873230794e-05, "loss": 0.8183, "step": 34630 }, { "epoch": 0.9170153148425426, "grad_norm": 0.78515625, "learning_rate": 9.087428869015824e-05, "loss": 0.6891, "step": 34631 }, { "epoch": 0.917041794450837, "grad_norm": 0.7265625, "learning_rate": 9.086970416886391e-05, "loss": 0.8453, "step": 34632 }, { "epoch": 0.9170682740591314, "grad_norm": 0.83203125, "learning_rate": 9.086511966692072e-05, "loss": 0.8763, "step": 34633 }, { "epoch": 0.9170947536674258, "grad_norm": 0.74609375, "learning_rate": 9.08605351843384e-05, "loss": 0.7748, "step": 34634 }, { "epoch": 0.9171212332757201, "grad_norm": 0.71484375, "learning_rate": 9.085595072112664e-05, "loss": 0.7582, "step": 34635 }, { "epoch": 0.9171477128840145, "grad_norm": 0.73828125, "learning_rate": 9.08513662772952e-05, "loss": 0.8062, "step": 34636 }, { "epoch": 0.9171741924923089, "grad_norm": 0.78515625, "learning_rate": 9.084678185285374e-05, "loss": 0.7843, "step": 34637 }, { "epoch": 0.9172006721006033, "grad_norm": 0.73828125, "learning_rate": 9.084219744781197e-05, "loss": 0.6886, "step": 34638 }, { "epoch": 0.9172271517088977, "grad_norm": 0.83203125, "learning_rate": 9.083761306217969e-05, "loss": 0.8333, "step": 34639 }, { "epoch": 0.917253631317192, "grad_norm": 0.734375, "learning_rate": 9.083302869596656e-05, "loss": 0.7607, "step": 34640 }, { "epoch": 0.9172801109254863, "grad_norm": 0.7734375, "learning_rate": 9.082844434918229e-05, "loss": 0.8214, "step": 34641 }, { "epoch": 0.9173065905337807, "grad_norm": 0.7734375, "learning_rate": 9.082386002183663e-05, "loss": 0.8331, "step": 34642 }, { "epoch": 0.9173330701420751, "grad_norm": 0.765625, "learning_rate": 9.081927571393921e-05, "loss": 0.7969, "step": 34643 }, { "epoch": 0.9173595497503695, "grad_norm": 0.76171875, "learning_rate": 9.081469142549986e-05, "loss": 0.8276, "step": 34644 }, { "epoch": 0.9173860293586639, "grad_norm": 0.75, "learning_rate": 9.081010715652825e-05, "loss": 0.6867, "step": 34645 }, { "epoch": 0.9174125089669583, "grad_norm": 0.89453125, "learning_rate": 9.080552290703409e-05, "loss": 0.809, "step": 34646 }, { "epoch": 0.9174389885752526, "grad_norm": 0.7421875, "learning_rate": 9.080093867702711e-05, "loss": 0.7716, "step": 34647 }, { "epoch": 0.917465468183547, "grad_norm": 0.734375, "learning_rate": 9.079635446651697e-05, "loss": 0.7912, "step": 34648 }, { "epoch": 0.9174919477918414, "grad_norm": 0.8046875, "learning_rate": 9.079177027551346e-05, "loss": 0.8517, "step": 34649 }, { "epoch": 0.9175184274001358, "grad_norm": 0.73828125, "learning_rate": 9.078718610402628e-05, "loss": 0.8471, "step": 34650 }, { "epoch": 0.9175449070084302, "grad_norm": 0.796875, "learning_rate": 9.078260195206511e-05, "loss": 0.7757, "step": 34651 }, { "epoch": 0.9175713866167245, "grad_norm": 0.82421875, "learning_rate": 9.077801781963969e-05, "loss": 0.759, "step": 34652 }, { "epoch": 0.9175978662250189, "grad_norm": 0.81640625, "learning_rate": 9.077343370675972e-05, "loss": 0.8707, "step": 34653 }, { "epoch": 0.9176243458333133, "grad_norm": 0.81640625, "learning_rate": 9.076884961343497e-05, "loss": 0.89, "step": 34654 }, { "epoch": 0.9176508254416077, "grad_norm": 0.79296875, "learning_rate": 9.07642655396751e-05, "loss": 0.8459, "step": 34655 }, { "epoch": 0.917677305049902, "grad_norm": 0.83984375, "learning_rate": 9.075968148548984e-05, "loss": 0.8293, "step": 34656 }, { "epoch": 0.9177037846581964, "grad_norm": 0.73046875, "learning_rate": 9.075509745088892e-05, "loss": 0.7789, "step": 34657 }, { "epoch": 0.9177302642664907, "grad_norm": 0.796875, "learning_rate": 9.075051343588198e-05, "loss": 0.7615, "step": 34658 }, { "epoch": 0.9177567438747851, "grad_norm": 0.7109375, "learning_rate": 9.074592944047884e-05, "loss": 0.6987, "step": 34659 }, { "epoch": 0.9177832234830795, "grad_norm": 0.80859375, "learning_rate": 9.07413454646892e-05, "loss": 0.7447, "step": 34660 }, { "epoch": 0.9178097030913739, "grad_norm": 0.80078125, "learning_rate": 9.073676150852272e-05, "loss": 0.857, "step": 34661 }, { "epoch": 0.9178361826996683, "grad_norm": 0.796875, "learning_rate": 9.073217757198917e-05, "loss": 0.8316, "step": 34662 }, { "epoch": 0.9178626623079627, "grad_norm": 0.68359375, "learning_rate": 9.072759365509818e-05, "loss": 0.7405, "step": 34663 }, { "epoch": 0.917889141916257, "grad_norm": 0.83984375, "learning_rate": 9.072300975785958e-05, "loss": 0.88, "step": 34664 }, { "epoch": 0.9179156215245514, "grad_norm": 0.86328125, "learning_rate": 9.071842588028302e-05, "loss": 0.9168, "step": 34665 }, { "epoch": 0.9179421011328458, "grad_norm": 0.765625, "learning_rate": 9.071384202237824e-05, "loss": 0.6854, "step": 34666 }, { "epoch": 0.9179685807411402, "grad_norm": 0.71875, "learning_rate": 9.070925818415492e-05, "loss": 0.817, "step": 34667 }, { "epoch": 0.9179950603494346, "grad_norm": 0.8125, "learning_rate": 9.070467436562278e-05, "loss": 0.8757, "step": 34668 }, { "epoch": 0.918021539957729, "grad_norm": 0.80078125, "learning_rate": 9.070009056679156e-05, "loss": 0.7164, "step": 34669 }, { "epoch": 0.9180480195660233, "grad_norm": 0.72265625, "learning_rate": 9.069550678767099e-05, "loss": 0.7223, "step": 34670 }, { "epoch": 0.9180744991743177, "grad_norm": 0.765625, "learning_rate": 9.069092302827076e-05, "loss": 0.7825, "step": 34671 }, { "epoch": 0.9181009787826121, "grad_norm": 0.85546875, "learning_rate": 9.068633928860058e-05, "loss": 0.8221, "step": 34672 }, { "epoch": 0.9181274583909064, "grad_norm": 0.75390625, "learning_rate": 9.068175556867015e-05, "loss": 0.6925, "step": 34673 }, { "epoch": 0.9181539379992008, "grad_norm": 0.74609375, "learning_rate": 9.067717186848923e-05, "loss": 0.7826, "step": 34674 }, { "epoch": 0.9181804176074951, "grad_norm": 0.765625, "learning_rate": 9.067258818806752e-05, "loss": 0.785, "step": 34675 }, { "epoch": 0.9182068972157895, "grad_norm": 0.71484375, "learning_rate": 9.066800452741472e-05, "loss": 0.772, "step": 34676 }, { "epoch": 0.9182333768240839, "grad_norm": 0.78515625, "learning_rate": 9.066342088654055e-05, "loss": 0.7975, "step": 34677 }, { "epoch": 0.9182598564323783, "grad_norm": 0.734375, "learning_rate": 9.065883726545468e-05, "loss": 0.6468, "step": 34678 }, { "epoch": 0.9182863360406727, "grad_norm": 0.80859375, "learning_rate": 9.06542536641669e-05, "loss": 0.8079, "step": 34679 }, { "epoch": 0.918312815648967, "grad_norm": 0.88671875, "learning_rate": 9.064967008268692e-05, "loss": 0.9184, "step": 34680 }, { "epoch": 0.9183392952572614, "grad_norm": 0.734375, "learning_rate": 9.064508652102442e-05, "loss": 0.8228, "step": 34681 }, { "epoch": 0.9183657748655558, "grad_norm": 0.74609375, "learning_rate": 9.064050297918914e-05, "loss": 0.6551, "step": 34682 }, { "epoch": 0.9183922544738502, "grad_norm": 0.76171875, "learning_rate": 9.063591945719071e-05, "loss": 0.7582, "step": 34683 }, { "epoch": 0.9184187340821446, "grad_norm": 0.73046875, "learning_rate": 9.063133595503897e-05, "loss": 0.6384, "step": 34684 }, { "epoch": 0.918445213690439, "grad_norm": 0.765625, "learning_rate": 9.062675247274358e-05, "loss": 0.7845, "step": 34685 }, { "epoch": 0.9184716932987333, "grad_norm": 0.8515625, "learning_rate": 9.062216901031424e-05, "loss": 0.7253, "step": 34686 }, { "epoch": 0.9184981729070277, "grad_norm": 0.76171875, "learning_rate": 9.061758556776069e-05, "loss": 0.8972, "step": 34687 }, { "epoch": 0.9185246525153221, "grad_norm": 0.7578125, "learning_rate": 9.061300214509258e-05, "loss": 0.7862, "step": 34688 }, { "epoch": 0.9185511321236164, "grad_norm": 0.7578125, "learning_rate": 9.060841874231972e-05, "loss": 0.8276, "step": 34689 }, { "epoch": 0.9185776117319108, "grad_norm": 0.84765625, "learning_rate": 9.060383535945178e-05, "loss": 0.8095, "step": 34690 }, { "epoch": 0.9186040913402052, "grad_norm": 0.8671875, "learning_rate": 9.059925199649847e-05, "loss": 0.8606, "step": 34691 }, { "epoch": 0.9186305709484995, "grad_norm": 0.79296875, "learning_rate": 9.059466865346953e-05, "loss": 0.8565, "step": 34692 }, { "epoch": 0.9186570505567939, "grad_norm": 0.85546875, "learning_rate": 9.059008533037461e-05, "loss": 0.7678, "step": 34693 }, { "epoch": 0.9186835301650883, "grad_norm": 0.72265625, "learning_rate": 9.058550202722349e-05, "loss": 0.7942, "step": 34694 }, { "epoch": 0.9187100097733827, "grad_norm": 0.7578125, "learning_rate": 9.058091874402586e-05, "loss": 0.8788, "step": 34695 }, { "epoch": 0.9187364893816771, "grad_norm": 0.78125, "learning_rate": 9.057633548079144e-05, "loss": 0.7717, "step": 34696 }, { "epoch": 0.9187629689899715, "grad_norm": 0.83203125, "learning_rate": 9.057175223752993e-05, "loss": 0.7505, "step": 34697 }, { "epoch": 0.9187894485982658, "grad_norm": 0.9296875, "learning_rate": 9.056716901425102e-05, "loss": 0.8552, "step": 34698 }, { "epoch": 0.9188159282065602, "grad_norm": 0.7109375, "learning_rate": 9.056258581096448e-05, "loss": 0.8432, "step": 34699 }, { "epoch": 0.9188424078148546, "grad_norm": 0.69921875, "learning_rate": 9.055800262768001e-05, "loss": 0.6567, "step": 34700 }, { "epoch": 0.918868887423149, "grad_norm": 0.7890625, "learning_rate": 9.055341946440733e-05, "loss": 0.7208, "step": 34701 }, { "epoch": 0.9188953670314434, "grad_norm": 0.7421875, "learning_rate": 9.054883632115611e-05, "loss": 0.8901, "step": 34702 }, { "epoch": 0.9189218466397377, "grad_norm": 0.71484375, "learning_rate": 9.054425319793607e-05, "loss": 0.6446, "step": 34703 }, { "epoch": 0.9189483262480321, "grad_norm": 0.7734375, "learning_rate": 9.0539670094757e-05, "loss": 0.8494, "step": 34704 }, { "epoch": 0.9189748058563264, "grad_norm": 0.703125, "learning_rate": 9.053508701162853e-05, "loss": 0.8313, "step": 34705 }, { "epoch": 0.9190012854646208, "grad_norm": 0.7421875, "learning_rate": 9.05305039485604e-05, "loss": 0.7158, "step": 34706 }, { "epoch": 0.9190277650729152, "grad_norm": 0.8359375, "learning_rate": 9.052592090556234e-05, "loss": 0.8442, "step": 34707 }, { "epoch": 0.9190542446812096, "grad_norm": 0.7890625, "learning_rate": 9.052133788264405e-05, "loss": 0.7564, "step": 34708 }, { "epoch": 0.9190807242895039, "grad_norm": 0.7265625, "learning_rate": 9.051675487981519e-05, "loss": 0.7582, "step": 34709 }, { "epoch": 0.9191072038977983, "grad_norm": 0.8125, "learning_rate": 9.051217189708557e-05, "loss": 0.7554, "step": 34710 }, { "epoch": 0.9191336835060927, "grad_norm": 0.7421875, "learning_rate": 9.050758893446487e-05, "loss": 0.6294, "step": 34711 }, { "epoch": 0.9191601631143871, "grad_norm": 0.7734375, "learning_rate": 9.050300599196278e-05, "loss": 0.8562, "step": 34712 }, { "epoch": 0.9191866427226815, "grad_norm": 0.84375, "learning_rate": 9.049842306958905e-05, "loss": 0.7458, "step": 34713 }, { "epoch": 0.9192131223309759, "grad_norm": 0.7890625, "learning_rate": 9.049384016735332e-05, "loss": 0.7989, "step": 34714 }, { "epoch": 0.9192396019392702, "grad_norm": 0.6953125, "learning_rate": 9.048925728526538e-05, "loss": 0.647, "step": 34715 }, { "epoch": 0.9192660815475646, "grad_norm": 0.9453125, "learning_rate": 9.048467442333492e-05, "loss": 0.8804, "step": 34716 }, { "epoch": 0.919292561155859, "grad_norm": 0.78515625, "learning_rate": 9.048009158157165e-05, "loss": 0.7385, "step": 34717 }, { "epoch": 0.9193190407641534, "grad_norm": 0.77734375, "learning_rate": 9.047550875998526e-05, "loss": 0.8028, "step": 34718 }, { "epoch": 0.9193455203724478, "grad_norm": 0.80078125, "learning_rate": 9.047092595858549e-05, "loss": 0.8589, "step": 34719 }, { "epoch": 0.9193719999807421, "grad_norm": 0.79296875, "learning_rate": 9.046634317738206e-05, "loss": 0.6464, "step": 34720 }, { "epoch": 0.9193984795890365, "grad_norm": 0.77734375, "learning_rate": 9.046176041638467e-05, "loss": 0.8471, "step": 34721 }, { "epoch": 0.9194249591973308, "grad_norm": 0.8046875, "learning_rate": 9.045717767560304e-05, "loss": 0.7235, "step": 34722 }, { "epoch": 0.9194514388056252, "grad_norm": 0.734375, "learning_rate": 9.045259495504687e-05, "loss": 0.8442, "step": 34723 }, { "epoch": 0.9194779184139196, "grad_norm": 0.75, "learning_rate": 9.044801225472584e-05, "loss": 0.7273, "step": 34724 }, { "epoch": 0.919504398022214, "grad_norm": 0.71875, "learning_rate": 9.044342957464974e-05, "loss": 0.682, "step": 34725 }, { "epoch": 0.9195308776305083, "grad_norm": 0.75390625, "learning_rate": 9.043884691482824e-05, "loss": 0.7482, "step": 34726 }, { "epoch": 0.9195573572388027, "grad_norm": 0.78515625, "learning_rate": 9.043426427527107e-05, "loss": 0.7294, "step": 34727 }, { "epoch": 0.9195838368470971, "grad_norm": 0.77734375, "learning_rate": 9.042968165598793e-05, "loss": 0.6909, "step": 34728 }, { "epoch": 0.9196103164553915, "grad_norm": 0.65625, "learning_rate": 9.04250990569885e-05, "loss": 0.6381, "step": 34729 }, { "epoch": 0.9196367960636859, "grad_norm": 0.85546875, "learning_rate": 9.042051647828255e-05, "loss": 0.7428, "step": 34730 }, { "epoch": 0.9196632756719803, "grad_norm": 0.7734375, "learning_rate": 9.041593391987977e-05, "loss": 0.7139, "step": 34731 }, { "epoch": 0.9196897552802746, "grad_norm": 0.68359375, "learning_rate": 9.041135138178987e-05, "loss": 0.6919, "step": 34732 }, { "epoch": 0.919716234888569, "grad_norm": 0.79296875, "learning_rate": 9.040676886402257e-05, "loss": 0.8241, "step": 34733 }, { "epoch": 0.9197427144968634, "grad_norm": 0.8046875, "learning_rate": 9.040218636658755e-05, "loss": 0.8773, "step": 34734 }, { "epoch": 0.9197691941051578, "grad_norm": 0.796875, "learning_rate": 9.039760388949456e-05, "loss": 0.6723, "step": 34735 }, { "epoch": 0.9197956737134522, "grad_norm": 0.71875, "learning_rate": 9.039302143275329e-05, "loss": 0.662, "step": 34736 }, { "epoch": 0.9198221533217465, "grad_norm": 0.7421875, "learning_rate": 9.038843899637348e-05, "loss": 0.7754, "step": 34737 }, { "epoch": 0.9198486329300408, "grad_norm": 0.7578125, "learning_rate": 9.038385658036482e-05, "loss": 0.7093, "step": 34738 }, { "epoch": 0.9198751125383352, "grad_norm": 0.796875, "learning_rate": 9.037927418473702e-05, "loss": 0.7238, "step": 34739 }, { "epoch": 0.9199015921466296, "grad_norm": 0.83984375, "learning_rate": 9.03746918094998e-05, "loss": 0.7673, "step": 34740 }, { "epoch": 0.919928071754924, "grad_norm": 0.859375, "learning_rate": 9.03701094546629e-05, "loss": 0.9249, "step": 34741 }, { "epoch": 0.9199545513632184, "grad_norm": 0.79296875, "learning_rate": 9.036552712023598e-05, "loss": 0.8788, "step": 34742 }, { "epoch": 0.9199810309715127, "grad_norm": 0.77734375, "learning_rate": 9.036094480622876e-05, "loss": 0.7448, "step": 34743 }, { "epoch": 0.9200075105798071, "grad_norm": 0.828125, "learning_rate": 9.035636251265093e-05, "loss": 0.7677, "step": 34744 }, { "epoch": 0.9200339901881015, "grad_norm": 0.71484375, "learning_rate": 9.035178023951231e-05, "loss": 0.8119, "step": 34745 }, { "epoch": 0.9200604697963959, "grad_norm": 0.73046875, "learning_rate": 9.034719798682251e-05, "loss": 0.9036, "step": 34746 }, { "epoch": 0.9200869494046903, "grad_norm": 0.875, "learning_rate": 9.034261575459129e-05, "loss": 0.9467, "step": 34747 }, { "epoch": 0.9201134290129847, "grad_norm": 0.81640625, "learning_rate": 9.033803354282833e-05, "loss": 0.8196, "step": 34748 }, { "epoch": 0.920139908621279, "grad_norm": 0.71484375, "learning_rate": 9.033345135154333e-05, "loss": 0.7727, "step": 34749 }, { "epoch": 0.9201663882295734, "grad_norm": 0.8125, "learning_rate": 9.032886918074606e-05, "loss": 0.8375, "step": 34750 }, { "epoch": 0.9201928678378678, "grad_norm": 0.78515625, "learning_rate": 9.03242870304462e-05, "loss": 0.7195, "step": 34751 }, { "epoch": 0.9202193474461622, "grad_norm": 0.7890625, "learning_rate": 9.031970490065346e-05, "loss": 0.7655, "step": 34752 }, { "epoch": 0.9202458270544566, "grad_norm": 0.8046875, "learning_rate": 9.031512279137755e-05, "loss": 0.7689, "step": 34753 }, { "epoch": 0.920272306662751, "grad_norm": 0.96484375, "learning_rate": 9.031054070262816e-05, "loss": 0.7952, "step": 34754 }, { "epoch": 0.9202987862710452, "grad_norm": 0.7578125, "learning_rate": 9.030595863441505e-05, "loss": 0.728, "step": 34755 }, { "epoch": 0.9203252658793396, "grad_norm": 0.75, "learning_rate": 9.03013765867479e-05, "loss": 0.7577, "step": 34756 }, { "epoch": 0.920351745487634, "grad_norm": 0.765625, "learning_rate": 9.029679455963643e-05, "loss": 0.6966, "step": 34757 }, { "epoch": 0.9203782250959284, "grad_norm": 0.76953125, "learning_rate": 9.029221255309036e-05, "loss": 0.7568, "step": 34758 }, { "epoch": 0.9204047047042228, "grad_norm": 0.82421875, "learning_rate": 9.028763056711936e-05, "loss": 0.7966, "step": 34759 }, { "epoch": 0.9204311843125171, "grad_norm": 0.76953125, "learning_rate": 9.02830486017332e-05, "loss": 0.8331, "step": 34760 }, { "epoch": 0.9204576639208115, "grad_norm": 0.796875, "learning_rate": 9.027846665694157e-05, "loss": 0.8842, "step": 34761 }, { "epoch": 0.9204841435291059, "grad_norm": 0.8046875, "learning_rate": 9.027388473275416e-05, "loss": 0.7588, "step": 34762 }, { "epoch": 0.9205106231374003, "grad_norm": 0.765625, "learning_rate": 9.02693028291807e-05, "loss": 0.7655, "step": 34763 }, { "epoch": 0.9205371027456947, "grad_norm": 0.80078125, "learning_rate": 9.026472094623086e-05, "loss": 0.8182, "step": 34764 }, { "epoch": 0.920563582353989, "grad_norm": 0.7265625, "learning_rate": 9.026013908391442e-05, "loss": 0.7502, "step": 34765 }, { "epoch": 0.9205900619622834, "grad_norm": 0.77734375, "learning_rate": 9.025555724224106e-05, "loss": 0.8038, "step": 34766 }, { "epoch": 0.9206165415705778, "grad_norm": 0.77734375, "learning_rate": 9.025097542122049e-05, "loss": 0.8698, "step": 34767 }, { "epoch": 0.9206430211788722, "grad_norm": 0.87109375, "learning_rate": 9.024639362086243e-05, "loss": 0.8111, "step": 34768 }, { "epoch": 0.9206695007871666, "grad_norm": 0.80078125, "learning_rate": 9.024181184117652e-05, "loss": 0.78, "step": 34769 }, { "epoch": 0.920695980395461, "grad_norm": 0.796875, "learning_rate": 9.02372300821726e-05, "loss": 0.8327, "step": 34770 }, { "epoch": 0.9207224600037552, "grad_norm": 0.765625, "learning_rate": 9.023264834386029e-05, "loss": 0.7619, "step": 34771 }, { "epoch": 0.9207489396120496, "grad_norm": 1.0859375, "learning_rate": 9.022806662624934e-05, "loss": 0.7801, "step": 34772 }, { "epoch": 0.920775419220344, "grad_norm": 0.859375, "learning_rate": 9.022348492934944e-05, "loss": 0.7453, "step": 34773 }, { "epoch": 0.9208018988286384, "grad_norm": 0.77734375, "learning_rate": 9.021890325317025e-05, "loss": 0.7752, "step": 34774 }, { "epoch": 0.9208283784369328, "grad_norm": 0.83984375, "learning_rate": 9.021432159772158e-05, "loss": 0.7024, "step": 34775 }, { "epoch": 0.9208548580452272, "grad_norm": 0.7421875, "learning_rate": 9.020973996301312e-05, "loss": 0.6178, "step": 34776 }, { "epoch": 0.9208813376535215, "grad_norm": 0.75390625, "learning_rate": 9.020515834905453e-05, "loss": 0.7034, "step": 34777 }, { "epoch": 0.9209078172618159, "grad_norm": 0.75390625, "learning_rate": 9.020057675585556e-05, "loss": 0.7149, "step": 34778 }, { "epoch": 0.9209342968701103, "grad_norm": 0.765625, "learning_rate": 9.019599518342589e-05, "loss": 0.8838, "step": 34779 }, { "epoch": 0.9209607764784047, "grad_norm": 0.78515625, "learning_rate": 9.019141363177525e-05, "loss": 0.7886, "step": 34780 }, { "epoch": 0.9209872560866991, "grad_norm": 0.71875, "learning_rate": 9.018683210091337e-05, "loss": 0.8214, "step": 34781 }, { "epoch": 0.9210137356949935, "grad_norm": 0.73828125, "learning_rate": 9.018225059084992e-05, "loss": 0.7799, "step": 34782 }, { "epoch": 0.9210402153032878, "grad_norm": 0.77734375, "learning_rate": 9.017766910159465e-05, "loss": 0.8285, "step": 34783 }, { "epoch": 0.9210666949115822, "grad_norm": 0.7421875, "learning_rate": 9.017308763315718e-05, "loss": 0.7407, "step": 34784 }, { "epoch": 0.9210931745198766, "grad_norm": 0.68359375, "learning_rate": 9.016850618554736e-05, "loss": 0.7962, "step": 34785 }, { "epoch": 0.921119654128171, "grad_norm": 0.828125, "learning_rate": 9.016392475877482e-05, "loss": 0.8762, "step": 34786 }, { "epoch": 0.9211461337364653, "grad_norm": 1.09375, "learning_rate": 9.015934335284926e-05, "loss": 0.7466, "step": 34787 }, { "epoch": 0.9211726133447596, "grad_norm": 0.77734375, "learning_rate": 9.015476196778042e-05, "loss": 0.7748, "step": 34788 }, { "epoch": 0.921199092953054, "grad_norm": 0.93359375, "learning_rate": 9.015018060357796e-05, "loss": 0.8636, "step": 34789 }, { "epoch": 0.9212255725613484, "grad_norm": 0.83203125, "learning_rate": 9.014559926025168e-05, "loss": 0.7923, "step": 34790 }, { "epoch": 0.9212520521696428, "grad_norm": 0.734375, "learning_rate": 9.014101793781122e-05, "loss": 0.7795, "step": 34791 }, { "epoch": 0.9212785317779372, "grad_norm": 0.8515625, "learning_rate": 9.013643663626633e-05, "loss": 0.7613, "step": 34792 }, { "epoch": 0.9213050113862316, "grad_norm": 0.7578125, "learning_rate": 9.01318553556267e-05, "loss": 0.7822, "step": 34793 }, { "epoch": 0.9213314909945259, "grad_norm": 0.78125, "learning_rate": 9.012727409590197e-05, "loss": 0.767, "step": 34794 }, { "epoch": 0.9213579706028203, "grad_norm": 0.71875, "learning_rate": 9.012269285710198e-05, "loss": 0.7358, "step": 34795 }, { "epoch": 0.9213844502111147, "grad_norm": 0.8125, "learning_rate": 9.011811163923637e-05, "loss": 0.8555, "step": 34796 }, { "epoch": 0.9214109298194091, "grad_norm": 0.79296875, "learning_rate": 9.011353044231486e-05, "loss": 0.7666, "step": 34797 }, { "epoch": 0.9214374094277035, "grad_norm": 0.80859375, "learning_rate": 9.010894926634717e-05, "loss": 0.7218, "step": 34798 }, { "epoch": 0.9214638890359979, "grad_norm": 0.77734375, "learning_rate": 9.010436811134295e-05, "loss": 0.8592, "step": 34799 }, { "epoch": 0.9214903686442922, "grad_norm": 0.71875, "learning_rate": 9.009978697731199e-05, "loss": 0.7896, "step": 34800 }, { "epoch": 0.9215168482525866, "grad_norm": 0.8203125, "learning_rate": 9.009520586426397e-05, "loss": 0.8694, "step": 34801 }, { "epoch": 0.921543327860881, "grad_norm": 0.80078125, "learning_rate": 9.00906247722086e-05, "loss": 0.8129, "step": 34802 }, { "epoch": 0.9215698074691754, "grad_norm": 0.765625, "learning_rate": 9.008604370115555e-05, "loss": 0.8012, "step": 34803 }, { "epoch": 0.9215962870774697, "grad_norm": 0.734375, "learning_rate": 9.008146265111457e-05, "loss": 0.6521, "step": 34804 }, { "epoch": 0.921622766685764, "grad_norm": 0.71484375, "learning_rate": 9.007688162209538e-05, "loss": 0.679, "step": 34805 }, { "epoch": 0.9216492462940584, "grad_norm": 0.75, "learning_rate": 9.007230061410768e-05, "loss": 0.7052, "step": 34806 }, { "epoch": 0.9216757259023528, "grad_norm": 0.8125, "learning_rate": 9.006771962716117e-05, "loss": 0.8309, "step": 34807 }, { "epoch": 0.9217022055106472, "grad_norm": 0.765625, "learning_rate": 9.006313866126554e-05, "loss": 0.867, "step": 34808 }, { "epoch": 0.9217286851189416, "grad_norm": 0.7734375, "learning_rate": 9.00585577164305e-05, "loss": 0.7527, "step": 34809 }, { "epoch": 0.921755164727236, "grad_norm": 0.72265625, "learning_rate": 9.005397679266582e-05, "loss": 0.7352, "step": 34810 }, { "epoch": 0.9217816443355303, "grad_norm": 0.7421875, "learning_rate": 9.004939588998116e-05, "loss": 0.7055, "step": 34811 }, { "epoch": 0.9218081239438247, "grad_norm": 0.70703125, "learning_rate": 9.004481500838624e-05, "loss": 0.694, "step": 34812 }, { "epoch": 0.9218346035521191, "grad_norm": 0.81640625, "learning_rate": 9.004023414789077e-05, "loss": 0.6905, "step": 34813 }, { "epoch": 0.9218610831604135, "grad_norm": 0.859375, "learning_rate": 9.003565330850441e-05, "loss": 0.9185, "step": 34814 }, { "epoch": 0.9218875627687079, "grad_norm": 0.7265625, "learning_rate": 9.003107249023695e-05, "loss": 0.8328, "step": 34815 }, { "epoch": 0.9219140423770023, "grad_norm": 0.76171875, "learning_rate": 9.002649169309806e-05, "loss": 0.8218, "step": 34816 }, { "epoch": 0.9219405219852966, "grad_norm": 0.80859375, "learning_rate": 9.002191091709746e-05, "loss": 0.7828, "step": 34817 }, { "epoch": 0.921967001593591, "grad_norm": 0.8515625, "learning_rate": 9.001733016224484e-05, "loss": 0.7512, "step": 34818 }, { "epoch": 0.9219934812018854, "grad_norm": 0.73046875, "learning_rate": 9.00127494285499e-05, "loss": 0.7736, "step": 34819 }, { "epoch": 0.9220199608101797, "grad_norm": 0.78125, "learning_rate": 9.00081687160224e-05, "loss": 0.8995, "step": 34820 }, { "epoch": 0.9220464404184741, "grad_norm": 0.7734375, "learning_rate": 9.0003588024672e-05, "loss": 0.8792, "step": 34821 }, { "epoch": 0.9220729200267684, "grad_norm": 0.68359375, "learning_rate": 8.999900735450842e-05, "loss": 0.7805, "step": 34822 }, { "epoch": 0.9220993996350628, "grad_norm": 0.74609375, "learning_rate": 8.999442670554136e-05, "loss": 0.8894, "step": 34823 }, { "epoch": 0.9221258792433572, "grad_norm": 0.765625, "learning_rate": 8.998984607778054e-05, "loss": 0.826, "step": 34824 }, { "epoch": 0.9221523588516516, "grad_norm": 0.77734375, "learning_rate": 8.99852654712357e-05, "loss": 0.7679, "step": 34825 }, { "epoch": 0.922178838459946, "grad_norm": 0.796875, "learning_rate": 8.998068488591651e-05, "loss": 1.0175, "step": 34826 }, { "epoch": 0.9222053180682404, "grad_norm": 0.82421875, "learning_rate": 8.997610432183269e-05, "loss": 0.799, "step": 34827 }, { "epoch": 0.9222317976765347, "grad_norm": 0.75390625, "learning_rate": 8.997152377899394e-05, "loss": 0.6676, "step": 34828 }, { "epoch": 0.9222582772848291, "grad_norm": 0.8203125, "learning_rate": 8.996694325740992e-05, "loss": 0.7582, "step": 34829 }, { "epoch": 0.9222847568931235, "grad_norm": 0.8671875, "learning_rate": 8.996236275709044e-05, "loss": 0.8602, "step": 34830 }, { "epoch": 0.9223112365014179, "grad_norm": 0.828125, "learning_rate": 8.995778227804516e-05, "loss": 0.7935, "step": 34831 }, { "epoch": 0.9223377161097123, "grad_norm": 0.83984375, "learning_rate": 8.995320182028379e-05, "loss": 0.8086, "step": 34832 }, { "epoch": 0.9223641957180067, "grad_norm": 0.8359375, "learning_rate": 8.994862138381604e-05, "loss": 0.7856, "step": 34833 }, { "epoch": 0.922390675326301, "grad_norm": 0.7734375, "learning_rate": 8.994404096865154e-05, "loss": 0.6813, "step": 34834 }, { "epoch": 0.9224171549345954, "grad_norm": 0.796875, "learning_rate": 8.993946057480013e-05, "loss": 0.8336, "step": 34835 }, { "epoch": 0.9224436345428897, "grad_norm": 0.88671875, "learning_rate": 8.993488020227147e-05, "loss": 0.7268, "step": 34836 }, { "epoch": 0.9224701141511841, "grad_norm": 1.015625, "learning_rate": 8.993029985107525e-05, "loss": 0.8106, "step": 34837 }, { "epoch": 0.9224965937594785, "grad_norm": 0.77734375, "learning_rate": 8.992571952122119e-05, "loss": 0.9092, "step": 34838 }, { "epoch": 0.9225230733677728, "grad_norm": 0.71875, "learning_rate": 8.992113921271896e-05, "loss": 0.8633, "step": 34839 }, { "epoch": 0.9225495529760672, "grad_norm": 0.7890625, "learning_rate": 8.99165589255783e-05, "loss": 0.8249, "step": 34840 }, { "epoch": 0.9225760325843616, "grad_norm": 0.72265625, "learning_rate": 8.991197865980894e-05, "loss": 0.7883, "step": 34841 }, { "epoch": 0.922602512192656, "grad_norm": 0.8046875, "learning_rate": 8.990739841542056e-05, "loss": 0.8524, "step": 34842 }, { "epoch": 0.9226289918009504, "grad_norm": 0.7421875, "learning_rate": 8.990281819242288e-05, "loss": 0.703, "step": 34843 }, { "epoch": 0.9226554714092448, "grad_norm": 0.8046875, "learning_rate": 8.989823799082557e-05, "loss": 0.8488, "step": 34844 }, { "epoch": 0.9226819510175391, "grad_norm": 0.81640625, "learning_rate": 8.98936578106384e-05, "loss": 0.756, "step": 34845 }, { "epoch": 0.9227084306258335, "grad_norm": 0.83203125, "learning_rate": 8.988907765187104e-05, "loss": 0.7471, "step": 34846 }, { "epoch": 0.9227349102341279, "grad_norm": 0.796875, "learning_rate": 8.98844975145332e-05, "loss": 0.7788, "step": 34847 }, { "epoch": 0.9227613898424223, "grad_norm": 0.7421875, "learning_rate": 8.987991739863458e-05, "loss": 0.6973, "step": 34848 }, { "epoch": 0.9227878694507167, "grad_norm": 0.7734375, "learning_rate": 8.987533730418487e-05, "loss": 0.7953, "step": 34849 }, { "epoch": 0.922814349059011, "grad_norm": 0.7578125, "learning_rate": 8.987075723119384e-05, "loss": 0.8552, "step": 34850 }, { "epoch": 0.9228408286673054, "grad_norm": 0.734375, "learning_rate": 8.986617717967116e-05, "loss": 0.716, "step": 34851 }, { "epoch": 0.9228673082755998, "grad_norm": 0.859375, "learning_rate": 8.986159714962654e-05, "loss": 0.7607, "step": 34852 }, { "epoch": 0.9228937878838941, "grad_norm": 0.796875, "learning_rate": 8.985701714106968e-05, "loss": 0.7589, "step": 34853 }, { "epoch": 0.9229202674921885, "grad_norm": 0.87109375, "learning_rate": 8.98524371540103e-05, "loss": 0.9253, "step": 34854 }, { "epoch": 0.9229467471004829, "grad_norm": 0.8515625, "learning_rate": 8.984785718845805e-05, "loss": 0.8458, "step": 34855 }, { "epoch": 0.9229732267087772, "grad_norm": 0.77734375, "learning_rate": 8.984327724442275e-05, "loss": 0.7396, "step": 34856 }, { "epoch": 0.9229997063170716, "grad_norm": 0.859375, "learning_rate": 8.983869732191401e-05, "loss": 0.9039, "step": 34857 }, { "epoch": 0.923026185925366, "grad_norm": 0.7578125, "learning_rate": 8.983411742094159e-05, "loss": 0.7363, "step": 34858 }, { "epoch": 0.9230526655336604, "grad_norm": 3.078125, "learning_rate": 8.982953754151518e-05, "loss": 0.86, "step": 34859 }, { "epoch": 0.9230791451419548, "grad_norm": 0.6484375, "learning_rate": 8.982495768364442e-05, "loss": 0.7122, "step": 34860 }, { "epoch": 0.9231056247502492, "grad_norm": 0.77734375, "learning_rate": 8.982037784733914e-05, "loss": 0.7814, "step": 34861 }, { "epoch": 0.9231321043585435, "grad_norm": 0.90234375, "learning_rate": 8.981579803260898e-05, "loss": 0.8379, "step": 34862 }, { "epoch": 0.9231585839668379, "grad_norm": 0.8125, "learning_rate": 8.981121823946366e-05, "loss": 0.8841, "step": 34863 }, { "epoch": 0.9231850635751323, "grad_norm": 0.7734375, "learning_rate": 8.980663846791288e-05, "loss": 0.84, "step": 34864 }, { "epoch": 0.9232115431834267, "grad_norm": 0.76953125, "learning_rate": 8.980205871796633e-05, "loss": 0.798, "step": 34865 }, { "epoch": 0.9232380227917211, "grad_norm": 0.7421875, "learning_rate": 8.979747898963376e-05, "loss": 0.9187, "step": 34866 }, { "epoch": 0.9232645024000155, "grad_norm": 0.74609375, "learning_rate": 8.979289928292483e-05, "loss": 0.8793, "step": 34867 }, { "epoch": 0.9232909820083098, "grad_norm": 0.74609375, "learning_rate": 8.978831959784929e-05, "loss": 0.8269, "step": 34868 }, { "epoch": 0.9233174616166041, "grad_norm": 0.78125, "learning_rate": 8.97837399344168e-05, "loss": 0.9406, "step": 34869 }, { "epoch": 0.9233439412248985, "grad_norm": 0.77734375, "learning_rate": 8.977916029263707e-05, "loss": 0.7323, "step": 34870 }, { "epoch": 0.9233704208331929, "grad_norm": 0.84375, "learning_rate": 8.977458067251984e-05, "loss": 0.7311, "step": 34871 }, { "epoch": 0.9233969004414873, "grad_norm": 0.71484375, "learning_rate": 8.977000107407483e-05, "loss": 0.7621, "step": 34872 }, { "epoch": 0.9234233800497816, "grad_norm": 0.79296875, "learning_rate": 8.97654214973117e-05, "loss": 0.8018, "step": 34873 }, { "epoch": 0.923449859658076, "grad_norm": 0.74609375, "learning_rate": 8.976084194224018e-05, "loss": 0.7804, "step": 34874 }, { "epoch": 0.9234763392663704, "grad_norm": 0.75, "learning_rate": 8.975626240886991e-05, "loss": 0.7234, "step": 34875 }, { "epoch": 0.9235028188746648, "grad_norm": 0.76953125, "learning_rate": 8.975168289721073e-05, "loss": 0.7445, "step": 34876 }, { "epoch": 0.9235292984829592, "grad_norm": 0.75390625, "learning_rate": 8.974710340727225e-05, "loss": 0.8509, "step": 34877 }, { "epoch": 0.9235557780912536, "grad_norm": 0.80078125, "learning_rate": 8.974252393906422e-05, "loss": 0.8644, "step": 34878 }, { "epoch": 0.9235822576995479, "grad_norm": 0.7890625, "learning_rate": 8.97379444925963e-05, "loss": 0.7794, "step": 34879 }, { "epoch": 0.9236087373078423, "grad_norm": 0.78515625, "learning_rate": 8.973336506787819e-05, "loss": 0.6756, "step": 34880 }, { "epoch": 0.9236352169161367, "grad_norm": 0.71484375, "learning_rate": 8.972878566491967e-05, "loss": 0.6981, "step": 34881 }, { "epoch": 0.9236616965244311, "grad_norm": 0.80859375, "learning_rate": 8.97242062837304e-05, "loss": 0.815, "step": 34882 }, { "epoch": 0.9236881761327255, "grad_norm": 0.78125, "learning_rate": 8.971962692432007e-05, "loss": 0.8645, "step": 34883 }, { "epoch": 0.9237146557410199, "grad_norm": 0.859375, "learning_rate": 8.971504758669843e-05, "loss": 0.9575, "step": 34884 }, { "epoch": 0.9237411353493141, "grad_norm": 0.765625, "learning_rate": 8.971046827087513e-05, "loss": 0.6643, "step": 34885 }, { "epoch": 0.9237676149576085, "grad_norm": 1.0, "learning_rate": 8.970588897685992e-05, "loss": 0.7289, "step": 34886 }, { "epoch": 0.9237940945659029, "grad_norm": 0.68359375, "learning_rate": 8.97013097046625e-05, "loss": 0.5745, "step": 34887 }, { "epoch": 0.9238205741741973, "grad_norm": 0.7890625, "learning_rate": 8.969673045429254e-05, "loss": 0.7549, "step": 34888 }, { "epoch": 0.9238470537824917, "grad_norm": 0.8125, "learning_rate": 8.969215122575977e-05, "loss": 0.8056, "step": 34889 }, { "epoch": 0.923873533390786, "grad_norm": 0.76171875, "learning_rate": 8.96875720190739e-05, "loss": 0.7991, "step": 34890 }, { "epoch": 0.9239000129990804, "grad_norm": 0.77734375, "learning_rate": 8.968299283424464e-05, "loss": 0.8495, "step": 34891 }, { "epoch": 0.9239264926073748, "grad_norm": 0.765625, "learning_rate": 8.967841367128171e-05, "loss": 0.754, "step": 34892 }, { "epoch": 0.9239529722156692, "grad_norm": 0.7890625, "learning_rate": 8.967383453019477e-05, "loss": 0.7036, "step": 34893 }, { "epoch": 0.9239794518239636, "grad_norm": 0.73046875, "learning_rate": 8.966925541099354e-05, "loss": 0.8455, "step": 34894 }, { "epoch": 0.924005931432258, "grad_norm": 0.80078125, "learning_rate": 8.966467631368771e-05, "loss": 0.8121, "step": 34895 }, { "epoch": 0.9240324110405523, "grad_norm": 0.7265625, "learning_rate": 8.966009723828705e-05, "loss": 0.8682, "step": 34896 }, { "epoch": 0.9240588906488467, "grad_norm": 0.796875, "learning_rate": 8.965551818480122e-05, "loss": 0.7309, "step": 34897 }, { "epoch": 0.9240853702571411, "grad_norm": 0.80859375, "learning_rate": 8.965093915323991e-05, "loss": 0.748, "step": 34898 }, { "epoch": 0.9241118498654355, "grad_norm": 0.7578125, "learning_rate": 8.964636014361286e-05, "loss": 0.7259, "step": 34899 }, { "epoch": 0.9241383294737299, "grad_norm": 0.81640625, "learning_rate": 8.964178115592971e-05, "loss": 0.8757, "step": 34900 }, { "epoch": 0.9241648090820243, "grad_norm": 0.734375, "learning_rate": 8.963720219020026e-05, "loss": 0.7742, "step": 34901 }, { "epoch": 0.9241912886903185, "grad_norm": 0.734375, "learning_rate": 8.963262324643417e-05, "loss": 0.7835, "step": 34902 }, { "epoch": 0.9242177682986129, "grad_norm": 0.75390625, "learning_rate": 8.962804432464114e-05, "loss": 0.7418, "step": 34903 }, { "epoch": 0.9242442479069073, "grad_norm": 0.7578125, "learning_rate": 8.962346542483087e-05, "loss": 0.7717, "step": 34904 }, { "epoch": 0.9242707275152017, "grad_norm": 0.76171875, "learning_rate": 8.961888654701306e-05, "loss": 0.818, "step": 34905 }, { "epoch": 0.9242972071234961, "grad_norm": 0.7265625, "learning_rate": 8.961430769119744e-05, "loss": 0.7806, "step": 34906 }, { "epoch": 0.9243236867317904, "grad_norm": 0.84765625, "learning_rate": 8.960972885739369e-05, "loss": 0.7654, "step": 34907 }, { "epoch": 0.9243501663400848, "grad_norm": 0.75, "learning_rate": 8.960515004561154e-05, "loss": 0.8321, "step": 34908 }, { "epoch": 0.9243766459483792, "grad_norm": 0.86328125, "learning_rate": 8.960057125586067e-05, "loss": 0.8881, "step": 34909 }, { "epoch": 0.9244031255566736, "grad_norm": 0.77734375, "learning_rate": 8.95959924881508e-05, "loss": 0.7191, "step": 34910 }, { "epoch": 0.924429605164968, "grad_norm": 0.8125, "learning_rate": 8.959141374249162e-05, "loss": 0.7742, "step": 34911 }, { "epoch": 0.9244560847732624, "grad_norm": 0.7890625, "learning_rate": 8.958683501889286e-05, "loss": 0.794, "step": 34912 }, { "epoch": 0.9244825643815567, "grad_norm": 0.78515625, "learning_rate": 8.958225631736421e-05, "loss": 0.7603, "step": 34913 }, { "epoch": 0.9245090439898511, "grad_norm": 0.79296875, "learning_rate": 8.957767763791537e-05, "loss": 0.8675, "step": 34914 }, { "epoch": 0.9245355235981455, "grad_norm": 0.7109375, "learning_rate": 8.9573098980556e-05, "loss": 0.8208, "step": 34915 }, { "epoch": 0.9245620032064399, "grad_norm": 0.80078125, "learning_rate": 8.95685203452959e-05, "loss": 0.7845, "step": 34916 }, { "epoch": 0.9245884828147343, "grad_norm": 0.84375, "learning_rate": 8.956394173214472e-05, "loss": 0.8903, "step": 34917 }, { "epoch": 0.9246149624230285, "grad_norm": 0.8046875, "learning_rate": 8.955936314111218e-05, "loss": 0.7856, "step": 34918 }, { "epoch": 0.9246414420313229, "grad_norm": 0.71875, "learning_rate": 8.955478457220796e-05, "loss": 0.739, "step": 34919 }, { "epoch": 0.9246679216396173, "grad_norm": 0.78515625, "learning_rate": 8.955020602544172e-05, "loss": 0.8305, "step": 34920 }, { "epoch": 0.9246944012479117, "grad_norm": 0.890625, "learning_rate": 8.954562750082329e-05, "loss": 0.9689, "step": 34921 }, { "epoch": 0.9247208808562061, "grad_norm": 0.78515625, "learning_rate": 8.95410489983623e-05, "loss": 0.8027, "step": 34922 }, { "epoch": 0.9247473604645005, "grad_norm": 0.8046875, "learning_rate": 8.953647051806845e-05, "loss": 0.8225, "step": 34923 }, { "epoch": 0.9247738400727948, "grad_norm": 0.734375, "learning_rate": 8.953189205995147e-05, "loss": 0.8446, "step": 34924 }, { "epoch": 0.9248003196810892, "grad_norm": 0.76171875, "learning_rate": 8.9527313624021e-05, "loss": 0.7823, "step": 34925 }, { "epoch": 0.9248267992893836, "grad_norm": 0.7734375, "learning_rate": 8.952273521028682e-05, "loss": 0.8235, "step": 34926 }, { "epoch": 0.924853278897678, "grad_norm": 0.828125, "learning_rate": 8.95181568187586e-05, "loss": 0.7967, "step": 34927 }, { "epoch": 0.9248797585059724, "grad_norm": 0.796875, "learning_rate": 8.951357844944605e-05, "loss": 0.8747, "step": 34928 }, { "epoch": 0.9249062381142668, "grad_norm": 0.7734375, "learning_rate": 8.950900010235889e-05, "loss": 0.6616, "step": 34929 }, { "epoch": 0.9249327177225611, "grad_norm": 0.72265625, "learning_rate": 8.950442177750676e-05, "loss": 0.7551, "step": 34930 }, { "epoch": 0.9249591973308555, "grad_norm": 0.734375, "learning_rate": 8.949984347489944e-05, "loss": 0.8427, "step": 34931 }, { "epoch": 0.9249856769391499, "grad_norm": 0.6875, "learning_rate": 8.94952651945466e-05, "loss": 0.7542, "step": 34932 }, { "epoch": 0.9250121565474443, "grad_norm": 0.76953125, "learning_rate": 8.949068693645795e-05, "loss": 0.7341, "step": 34933 }, { "epoch": 0.9250386361557386, "grad_norm": 0.8515625, "learning_rate": 8.948610870064319e-05, "loss": 0.7917, "step": 34934 }, { "epoch": 0.9250651157640329, "grad_norm": 0.79296875, "learning_rate": 8.948153048711198e-05, "loss": 0.793, "step": 34935 }, { "epoch": 0.9250915953723273, "grad_norm": 0.80859375, "learning_rate": 8.94769522958741e-05, "loss": 0.7795, "step": 34936 }, { "epoch": 0.9251180749806217, "grad_norm": 0.75390625, "learning_rate": 8.947237412693921e-05, "loss": 0.7801, "step": 34937 }, { "epoch": 0.9251445545889161, "grad_norm": 0.80078125, "learning_rate": 8.946779598031703e-05, "loss": 0.7572, "step": 34938 }, { "epoch": 0.9251710341972105, "grad_norm": 0.7890625, "learning_rate": 8.946321785601725e-05, "loss": 0.6653, "step": 34939 }, { "epoch": 0.9251975138055049, "grad_norm": 0.734375, "learning_rate": 8.945863975404955e-05, "loss": 0.7781, "step": 34940 }, { "epoch": 0.9252239934137992, "grad_norm": 0.8671875, "learning_rate": 8.94540616744237e-05, "loss": 0.6919, "step": 34941 }, { "epoch": 0.9252504730220936, "grad_norm": 0.76171875, "learning_rate": 8.944948361714936e-05, "loss": 0.7681, "step": 34942 }, { "epoch": 0.925276952630388, "grad_norm": 0.82421875, "learning_rate": 8.944490558223623e-05, "loss": 0.7517, "step": 34943 }, { "epoch": 0.9253034322386824, "grad_norm": 0.77734375, "learning_rate": 8.944032756969401e-05, "loss": 0.8899, "step": 34944 }, { "epoch": 0.9253299118469768, "grad_norm": 0.77734375, "learning_rate": 8.94357495795324e-05, "loss": 0.865, "step": 34945 }, { "epoch": 0.9253563914552712, "grad_norm": 0.796875, "learning_rate": 8.943117161176113e-05, "loss": 0.8369, "step": 34946 }, { "epoch": 0.9253828710635655, "grad_norm": 0.7734375, "learning_rate": 8.942659366638989e-05, "loss": 0.7524, "step": 34947 }, { "epoch": 0.9254093506718599, "grad_norm": 0.8046875, "learning_rate": 8.942201574342838e-05, "loss": 0.8079, "step": 34948 }, { "epoch": 0.9254358302801543, "grad_norm": 0.78515625, "learning_rate": 8.941743784288632e-05, "loss": 0.7206, "step": 34949 }, { "epoch": 0.9254623098884487, "grad_norm": 0.765625, "learning_rate": 8.941285996477336e-05, "loss": 0.8855, "step": 34950 }, { "epoch": 0.925488789496743, "grad_norm": 0.82421875, "learning_rate": 8.940828210909927e-05, "loss": 0.7276, "step": 34951 }, { "epoch": 0.9255152691050373, "grad_norm": 0.82421875, "learning_rate": 8.940370427587373e-05, "loss": 0.7193, "step": 34952 }, { "epoch": 0.9255417487133317, "grad_norm": 0.79296875, "learning_rate": 8.939912646510642e-05, "loss": 0.7828, "step": 34953 }, { "epoch": 0.9255682283216261, "grad_norm": 0.76171875, "learning_rate": 8.939454867680705e-05, "loss": 0.8321, "step": 34954 }, { "epoch": 0.9255947079299205, "grad_norm": 0.703125, "learning_rate": 8.938997091098527e-05, "loss": 0.7833, "step": 34955 }, { "epoch": 0.9256211875382149, "grad_norm": 0.77734375, "learning_rate": 8.938539316765091e-05, "loss": 0.7393, "step": 34956 }, { "epoch": 0.9256476671465093, "grad_norm": 0.73046875, "learning_rate": 8.93808154468136e-05, "loss": 0.8035, "step": 34957 }, { "epoch": 0.9256741467548036, "grad_norm": 0.83203125, "learning_rate": 8.937623774848304e-05, "loss": 0.821, "step": 34958 }, { "epoch": 0.925700626363098, "grad_norm": 0.79296875, "learning_rate": 8.937166007266892e-05, "loss": 0.8113, "step": 34959 }, { "epoch": 0.9257271059713924, "grad_norm": 0.77734375, "learning_rate": 8.936708241938094e-05, "loss": 0.7998, "step": 34960 }, { "epoch": 0.9257535855796868, "grad_norm": 0.78515625, "learning_rate": 8.936250478862887e-05, "loss": 0.6753, "step": 34961 }, { "epoch": 0.9257800651879812, "grad_norm": 0.8671875, "learning_rate": 8.935792718042234e-05, "loss": 0.814, "step": 34962 }, { "epoch": 0.9258065447962756, "grad_norm": 0.77734375, "learning_rate": 8.935334959477108e-05, "loss": 0.9401, "step": 34963 }, { "epoch": 0.9258330244045699, "grad_norm": 0.77734375, "learning_rate": 8.93487720316848e-05, "loss": 0.7117, "step": 34964 }, { "epoch": 0.9258595040128643, "grad_norm": 0.71484375, "learning_rate": 8.934419449117314e-05, "loss": 0.7169, "step": 34965 }, { "epoch": 0.9258859836211587, "grad_norm": 0.71484375, "learning_rate": 8.933961697324588e-05, "loss": 0.7343, "step": 34966 }, { "epoch": 0.925912463229453, "grad_norm": 0.78125, "learning_rate": 8.93350394779127e-05, "loss": 0.8064, "step": 34967 }, { "epoch": 0.9259389428377474, "grad_norm": 0.78125, "learning_rate": 8.933046200518331e-05, "loss": 0.8782, "step": 34968 }, { "epoch": 0.9259654224460417, "grad_norm": 0.6875, "learning_rate": 8.932588455506738e-05, "loss": 0.7362, "step": 34969 }, { "epoch": 0.9259919020543361, "grad_norm": 0.828125, "learning_rate": 8.93213071275746e-05, "loss": 0.7198, "step": 34970 }, { "epoch": 0.9260183816626305, "grad_norm": 0.77734375, "learning_rate": 8.931672972271475e-05, "loss": 0.8426, "step": 34971 }, { "epoch": 0.9260448612709249, "grad_norm": 0.83203125, "learning_rate": 8.931215234049745e-05, "loss": 0.8489, "step": 34972 }, { "epoch": 0.9260713408792193, "grad_norm": 0.83203125, "learning_rate": 8.930757498093244e-05, "loss": 0.7726, "step": 34973 }, { "epoch": 0.9260978204875137, "grad_norm": 0.8125, "learning_rate": 8.93029976440294e-05, "loss": 0.8225, "step": 34974 }, { "epoch": 0.926124300095808, "grad_norm": 0.72265625, "learning_rate": 8.929842032979804e-05, "loss": 0.9152, "step": 34975 }, { "epoch": 0.9261507797041024, "grad_norm": 1.0234375, "learning_rate": 8.92938430382481e-05, "loss": 0.8253, "step": 34976 }, { "epoch": 0.9261772593123968, "grad_norm": 0.76953125, "learning_rate": 8.928926576938924e-05, "loss": 0.7249, "step": 34977 }, { "epoch": 0.9262037389206912, "grad_norm": 0.78515625, "learning_rate": 8.928468852323115e-05, "loss": 0.7107, "step": 34978 }, { "epoch": 0.9262302185289856, "grad_norm": 0.7734375, "learning_rate": 8.928011129978357e-05, "loss": 0.7771, "step": 34979 }, { "epoch": 0.92625669813728, "grad_norm": 0.765625, "learning_rate": 8.927553409905613e-05, "loss": 0.8562, "step": 34980 }, { "epoch": 0.9262831777455743, "grad_norm": 0.7265625, "learning_rate": 8.927095692105863e-05, "loss": 0.7069, "step": 34981 }, { "epoch": 0.9263096573538687, "grad_norm": 0.74609375, "learning_rate": 8.926637976580072e-05, "loss": 0.8663, "step": 34982 }, { "epoch": 0.926336136962163, "grad_norm": 0.796875, "learning_rate": 8.926180263329212e-05, "loss": 0.8252, "step": 34983 }, { "epoch": 0.9263626165704574, "grad_norm": 0.73046875, "learning_rate": 8.92572255235425e-05, "loss": 0.7971, "step": 34984 }, { "epoch": 0.9263890961787518, "grad_norm": 0.75390625, "learning_rate": 8.925264843656153e-05, "loss": 0.6756, "step": 34985 }, { "epoch": 0.9264155757870461, "grad_norm": 0.86328125, "learning_rate": 8.924807137235902e-05, "loss": 0.8019, "step": 34986 }, { "epoch": 0.9264420553953405, "grad_norm": 0.87109375, "learning_rate": 8.924349433094459e-05, "loss": 0.877, "step": 34987 }, { "epoch": 0.9264685350036349, "grad_norm": 1.484375, "learning_rate": 8.923891731232796e-05, "loss": 0.7116, "step": 34988 }, { "epoch": 0.9264950146119293, "grad_norm": 0.7578125, "learning_rate": 8.923434031651883e-05, "loss": 0.7098, "step": 34989 }, { "epoch": 0.9265214942202237, "grad_norm": 0.66015625, "learning_rate": 8.922976334352688e-05, "loss": 0.6848, "step": 34990 }, { "epoch": 0.9265479738285181, "grad_norm": 0.81640625, "learning_rate": 8.922518639336186e-05, "loss": 0.7496, "step": 34991 }, { "epoch": 0.9265744534368124, "grad_norm": 0.84375, "learning_rate": 8.922060946603343e-05, "loss": 0.7526, "step": 34992 }, { "epoch": 0.9266009330451068, "grad_norm": 0.76953125, "learning_rate": 8.92160325615513e-05, "loss": 0.8034, "step": 34993 }, { "epoch": 0.9266274126534012, "grad_norm": 0.81640625, "learning_rate": 8.921145567992517e-05, "loss": 0.8831, "step": 34994 }, { "epoch": 0.9266538922616956, "grad_norm": 0.796875, "learning_rate": 8.920687882116477e-05, "loss": 0.716, "step": 34995 }, { "epoch": 0.92668037186999, "grad_norm": 0.796875, "learning_rate": 8.920230198527972e-05, "loss": 0.8092, "step": 34996 }, { "epoch": 0.9267068514782844, "grad_norm": 0.78125, "learning_rate": 8.919772517227982e-05, "loss": 0.7097, "step": 34997 }, { "epoch": 0.9267333310865787, "grad_norm": 0.84375, "learning_rate": 8.919314838217472e-05, "loss": 0.8158, "step": 34998 }, { "epoch": 0.9267598106948731, "grad_norm": 0.73828125, "learning_rate": 8.918857161497412e-05, "loss": 0.7256, "step": 34999 }, { "epoch": 0.9267862903031674, "grad_norm": 0.7421875, "learning_rate": 8.918399487068772e-05, "loss": 0.7456, "step": 35000 }, { "epoch": 0.9267862903031674, "eval_loss": 0.7882528305053711, "eval_runtime": 252.9514, "eval_samples_per_second": 39.533, "eval_steps_per_second": 0.826, "step": 35000 }, { "epoch": 0.9268127699114618, "grad_norm": 0.78515625, "learning_rate": 8.917941814932519e-05, "loss": 0.8181, "step": 35001 }, { "epoch": 0.9268392495197562, "grad_norm": 0.81640625, "learning_rate": 8.917484145089629e-05, "loss": 0.8445, "step": 35002 }, { "epoch": 0.9268657291280505, "grad_norm": 0.765625, "learning_rate": 8.917026477541072e-05, "loss": 0.7937, "step": 35003 }, { "epoch": 0.9268922087363449, "grad_norm": 0.78515625, "learning_rate": 8.916568812287814e-05, "loss": 0.7859, "step": 35004 }, { "epoch": 0.9269186883446393, "grad_norm": 0.7421875, "learning_rate": 8.916111149330827e-05, "loss": 0.7546, "step": 35005 }, { "epoch": 0.9269451679529337, "grad_norm": 0.828125, "learning_rate": 8.915653488671077e-05, "loss": 0.9477, "step": 35006 }, { "epoch": 0.9269716475612281, "grad_norm": 0.8046875, "learning_rate": 8.91519583030954e-05, "loss": 0.6848, "step": 35007 }, { "epoch": 0.9269981271695225, "grad_norm": 0.828125, "learning_rate": 8.914738174247184e-05, "loss": 0.7627, "step": 35008 }, { "epoch": 0.9270246067778168, "grad_norm": 0.78125, "learning_rate": 8.914280520484979e-05, "loss": 0.7443, "step": 35009 }, { "epoch": 0.9270510863861112, "grad_norm": 0.89453125, "learning_rate": 8.913822869023893e-05, "loss": 0.8584, "step": 35010 }, { "epoch": 0.9270775659944056, "grad_norm": 0.765625, "learning_rate": 8.913365219864894e-05, "loss": 0.8282, "step": 35011 }, { "epoch": 0.9271040456027, "grad_norm": 0.74609375, "learning_rate": 8.91290757300896e-05, "loss": 0.7495, "step": 35012 }, { "epoch": 0.9271305252109944, "grad_norm": 0.8125, "learning_rate": 8.912449928457057e-05, "loss": 0.7597, "step": 35013 }, { "epoch": 0.9271570048192888, "grad_norm": 0.85546875, "learning_rate": 8.911992286210153e-05, "loss": 0.8581, "step": 35014 }, { "epoch": 0.9271834844275831, "grad_norm": 0.76171875, "learning_rate": 8.911534646269218e-05, "loss": 0.8291, "step": 35015 }, { "epoch": 0.9272099640358774, "grad_norm": 0.75390625, "learning_rate": 8.911077008635222e-05, "loss": 0.7828, "step": 35016 }, { "epoch": 0.9272364436441718, "grad_norm": 0.84375, "learning_rate": 8.910619373309139e-05, "loss": 0.8714, "step": 35017 }, { "epoch": 0.9272629232524662, "grad_norm": 0.8515625, "learning_rate": 8.910161740291936e-05, "loss": 0.7823, "step": 35018 }, { "epoch": 0.9272894028607606, "grad_norm": 0.828125, "learning_rate": 8.909704109584582e-05, "loss": 0.6801, "step": 35019 }, { "epoch": 0.9273158824690549, "grad_norm": 0.77734375, "learning_rate": 8.909246481188048e-05, "loss": 0.8952, "step": 35020 }, { "epoch": 0.9273423620773493, "grad_norm": 0.78515625, "learning_rate": 8.9087888551033e-05, "loss": 0.8124, "step": 35021 }, { "epoch": 0.9273688416856437, "grad_norm": 0.76171875, "learning_rate": 8.908331231331315e-05, "loss": 0.7473, "step": 35022 }, { "epoch": 0.9273953212939381, "grad_norm": 0.859375, "learning_rate": 8.907873609873061e-05, "loss": 0.7249, "step": 35023 }, { "epoch": 0.9274218009022325, "grad_norm": 0.77734375, "learning_rate": 8.907415990729506e-05, "loss": 0.7476, "step": 35024 }, { "epoch": 0.9274482805105269, "grad_norm": 0.78515625, "learning_rate": 8.906958373901619e-05, "loss": 0.8355, "step": 35025 }, { "epoch": 0.9274747601188212, "grad_norm": 0.7734375, "learning_rate": 8.906500759390368e-05, "loss": 0.8982, "step": 35026 }, { "epoch": 0.9275012397271156, "grad_norm": 0.73046875, "learning_rate": 8.90604314719673e-05, "loss": 0.7375, "step": 35027 }, { "epoch": 0.92752771933541, "grad_norm": 0.78125, "learning_rate": 8.905585537321671e-05, "loss": 0.7527, "step": 35028 }, { "epoch": 0.9275541989437044, "grad_norm": 0.890625, "learning_rate": 8.905127929766162e-05, "loss": 0.7397, "step": 35029 }, { "epoch": 0.9275806785519988, "grad_norm": 0.78515625, "learning_rate": 8.90467032453117e-05, "loss": 0.737, "step": 35030 }, { "epoch": 0.9276071581602932, "grad_norm": 0.80078125, "learning_rate": 8.904212721617664e-05, "loss": 0.924, "step": 35031 }, { "epoch": 0.9276336377685874, "grad_norm": 0.77734375, "learning_rate": 8.90375512102662e-05, "loss": 0.7979, "step": 35032 }, { "epoch": 0.9276601173768818, "grad_norm": 0.75, "learning_rate": 8.903297522759005e-05, "loss": 0.6898, "step": 35033 }, { "epoch": 0.9276865969851762, "grad_norm": 0.73046875, "learning_rate": 8.902839926815787e-05, "loss": 0.617, "step": 35034 }, { "epoch": 0.9277130765934706, "grad_norm": 0.8046875, "learning_rate": 8.902382333197938e-05, "loss": 0.9083, "step": 35035 }, { "epoch": 0.927739556201765, "grad_norm": 0.8203125, "learning_rate": 8.901924741906425e-05, "loss": 0.8214, "step": 35036 }, { "epoch": 0.9277660358100593, "grad_norm": 0.8203125, "learning_rate": 8.901467152942221e-05, "loss": 0.7215, "step": 35037 }, { "epoch": 0.9277925154183537, "grad_norm": 0.80859375, "learning_rate": 8.901009566306295e-05, "loss": 0.7997, "step": 35038 }, { "epoch": 0.9278189950266481, "grad_norm": 0.7265625, "learning_rate": 8.900551981999616e-05, "loss": 0.7635, "step": 35039 }, { "epoch": 0.9278454746349425, "grad_norm": 0.72265625, "learning_rate": 8.900094400023155e-05, "loss": 0.7572, "step": 35040 }, { "epoch": 0.9278719542432369, "grad_norm": 0.8515625, "learning_rate": 8.899636820377876e-05, "loss": 0.784, "step": 35041 }, { "epoch": 0.9278984338515313, "grad_norm": 0.7578125, "learning_rate": 8.899179243064759e-05, "loss": 0.863, "step": 35042 }, { "epoch": 0.9279249134598256, "grad_norm": 0.796875, "learning_rate": 8.898721668084768e-05, "loss": 0.8093, "step": 35043 }, { "epoch": 0.92795139306812, "grad_norm": 0.96875, "learning_rate": 8.898264095438874e-05, "loss": 0.8484, "step": 35044 }, { "epoch": 0.9279778726764144, "grad_norm": 0.7890625, "learning_rate": 8.897806525128045e-05, "loss": 0.8408, "step": 35045 }, { "epoch": 0.9280043522847088, "grad_norm": 0.72265625, "learning_rate": 8.89734895715325e-05, "loss": 0.7135, "step": 35046 }, { "epoch": 0.9280308318930032, "grad_norm": 0.78125, "learning_rate": 8.896891391515463e-05, "loss": 0.7796, "step": 35047 }, { "epoch": 0.9280573115012976, "grad_norm": 0.75390625, "learning_rate": 8.896433828215653e-05, "loss": 0.7668, "step": 35048 }, { "epoch": 0.9280837911095918, "grad_norm": 0.7578125, "learning_rate": 8.895976267254788e-05, "loss": 0.7944, "step": 35049 }, { "epoch": 0.9281102707178862, "grad_norm": 0.75, "learning_rate": 8.895518708633838e-05, "loss": 0.8075, "step": 35050 }, { "epoch": 0.9281367503261806, "grad_norm": 0.79296875, "learning_rate": 8.895061152353768e-05, "loss": 0.7363, "step": 35051 }, { "epoch": 0.928163229934475, "grad_norm": 0.70703125, "learning_rate": 8.894603598415558e-05, "loss": 0.8274, "step": 35052 }, { "epoch": 0.9281897095427694, "grad_norm": 0.796875, "learning_rate": 8.894146046820174e-05, "loss": 0.7818, "step": 35053 }, { "epoch": 0.9282161891510637, "grad_norm": 0.7734375, "learning_rate": 8.893688497568582e-05, "loss": 0.7784, "step": 35054 }, { "epoch": 0.9282426687593581, "grad_norm": 0.74609375, "learning_rate": 8.893230950661755e-05, "loss": 0.836, "step": 35055 }, { "epoch": 0.9282691483676525, "grad_norm": 0.796875, "learning_rate": 8.892773406100658e-05, "loss": 0.8144, "step": 35056 }, { "epoch": 0.9282956279759469, "grad_norm": 0.7890625, "learning_rate": 8.892315863886268e-05, "loss": 0.771, "step": 35057 }, { "epoch": 0.9283221075842413, "grad_norm": 0.81640625, "learning_rate": 8.891858324019552e-05, "loss": 0.8606, "step": 35058 }, { "epoch": 0.9283485871925357, "grad_norm": 0.83984375, "learning_rate": 8.891400786501476e-05, "loss": 0.8162, "step": 35059 }, { "epoch": 0.92837506680083, "grad_norm": 0.76171875, "learning_rate": 8.890943251333014e-05, "loss": 0.8104, "step": 35060 }, { "epoch": 0.9284015464091244, "grad_norm": 0.76953125, "learning_rate": 8.890485718515132e-05, "loss": 0.803, "step": 35061 }, { "epoch": 0.9284280260174188, "grad_norm": 0.7734375, "learning_rate": 8.890028188048806e-05, "loss": 0.7644, "step": 35062 }, { "epoch": 0.9284545056257132, "grad_norm": 0.78125, "learning_rate": 8.889570659935002e-05, "loss": 0.7302, "step": 35063 }, { "epoch": 0.9284809852340076, "grad_norm": 0.80859375, "learning_rate": 8.889113134174689e-05, "loss": 0.8063, "step": 35064 }, { "epoch": 0.9285074648423018, "grad_norm": 0.8359375, "learning_rate": 8.888655610768836e-05, "loss": 0.7728, "step": 35065 }, { "epoch": 0.9285339444505962, "grad_norm": 0.81640625, "learning_rate": 8.88819808971841e-05, "loss": 0.8521, "step": 35066 }, { "epoch": 0.9285604240588906, "grad_norm": 0.87890625, "learning_rate": 8.88774057102439e-05, "loss": 0.7422, "step": 35067 }, { "epoch": 0.928586903667185, "grad_norm": 0.75, "learning_rate": 8.887283054687738e-05, "loss": 0.7258, "step": 35068 }, { "epoch": 0.9286133832754794, "grad_norm": 0.984375, "learning_rate": 8.886825540709428e-05, "loss": 0.858, "step": 35069 }, { "epoch": 0.9286398628837738, "grad_norm": 0.79296875, "learning_rate": 8.886368029090428e-05, "loss": 0.8671, "step": 35070 }, { "epoch": 0.9286663424920681, "grad_norm": 0.79296875, "learning_rate": 8.8859105198317e-05, "loss": 0.8131, "step": 35071 }, { "epoch": 0.9286928221003625, "grad_norm": 0.76953125, "learning_rate": 8.885453012934228e-05, "loss": 0.6768, "step": 35072 }, { "epoch": 0.9287193017086569, "grad_norm": 0.796875, "learning_rate": 8.884995508398975e-05, "loss": 0.9119, "step": 35073 }, { "epoch": 0.9287457813169513, "grad_norm": 0.8203125, "learning_rate": 8.884538006226909e-05, "loss": 0.7747, "step": 35074 }, { "epoch": 0.9287722609252457, "grad_norm": 0.74609375, "learning_rate": 8.884080506419002e-05, "loss": 0.6689, "step": 35075 }, { "epoch": 0.9287987405335401, "grad_norm": 0.74609375, "learning_rate": 8.883623008976219e-05, "loss": 0.7972, "step": 35076 }, { "epoch": 0.9288252201418344, "grad_norm": 0.79296875, "learning_rate": 8.883165513899536e-05, "loss": 0.8735, "step": 35077 }, { "epoch": 0.9288516997501288, "grad_norm": 0.82421875, "learning_rate": 8.882708021189917e-05, "loss": 0.7392, "step": 35078 }, { "epoch": 0.9288781793584232, "grad_norm": 0.81640625, "learning_rate": 8.882250530848338e-05, "loss": 0.8702, "step": 35079 }, { "epoch": 0.9289046589667176, "grad_norm": 0.75, "learning_rate": 8.881793042875764e-05, "loss": 0.7885, "step": 35080 }, { "epoch": 0.9289311385750119, "grad_norm": 0.828125, "learning_rate": 8.881335557273165e-05, "loss": 0.9156, "step": 35081 }, { "epoch": 0.9289576181833062, "grad_norm": 0.8125, "learning_rate": 8.880878074041512e-05, "loss": 0.8429, "step": 35082 }, { "epoch": 0.9289840977916006, "grad_norm": 0.796875, "learning_rate": 8.880420593181775e-05, "loss": 0.8381, "step": 35083 }, { "epoch": 0.929010577399895, "grad_norm": 0.75390625, "learning_rate": 8.879963114694923e-05, "loss": 0.7421, "step": 35084 }, { "epoch": 0.9290370570081894, "grad_norm": 0.8515625, "learning_rate": 8.879505638581922e-05, "loss": 0.7964, "step": 35085 }, { "epoch": 0.9290635366164838, "grad_norm": 0.78125, "learning_rate": 8.879048164843744e-05, "loss": 0.7835, "step": 35086 }, { "epoch": 0.9290900162247782, "grad_norm": 0.72265625, "learning_rate": 8.878590693481362e-05, "loss": 0.6858, "step": 35087 }, { "epoch": 0.9291164958330725, "grad_norm": 0.76171875, "learning_rate": 8.878133224495743e-05, "loss": 0.8152, "step": 35088 }, { "epoch": 0.9291429754413669, "grad_norm": 0.6875, "learning_rate": 8.877675757887858e-05, "loss": 0.7781, "step": 35089 }, { "epoch": 0.9291694550496613, "grad_norm": 0.76171875, "learning_rate": 8.877218293658673e-05, "loss": 0.7748, "step": 35090 }, { "epoch": 0.9291959346579557, "grad_norm": 0.75390625, "learning_rate": 8.876760831809156e-05, "loss": 0.8203, "step": 35091 }, { "epoch": 0.9292224142662501, "grad_norm": 0.78515625, "learning_rate": 8.876303372340286e-05, "loss": 0.8042, "step": 35092 }, { "epoch": 0.9292488938745445, "grad_norm": 0.82421875, "learning_rate": 8.875845915253025e-05, "loss": 0.7569, "step": 35093 }, { "epoch": 0.9292753734828388, "grad_norm": 0.80078125, "learning_rate": 8.875388460548345e-05, "loss": 0.8347, "step": 35094 }, { "epoch": 0.9293018530911332, "grad_norm": 0.8671875, "learning_rate": 8.874931008227214e-05, "loss": 0.8721, "step": 35095 }, { "epoch": 0.9293283326994276, "grad_norm": 0.72265625, "learning_rate": 8.874473558290602e-05, "loss": 0.7601, "step": 35096 }, { "epoch": 0.929354812307722, "grad_norm": 0.80859375, "learning_rate": 8.874016110739477e-05, "loss": 0.8251, "step": 35097 }, { "epoch": 0.9293812919160163, "grad_norm": 0.76953125, "learning_rate": 8.873558665574813e-05, "loss": 0.7878, "step": 35098 }, { "epoch": 0.9294077715243106, "grad_norm": 0.83203125, "learning_rate": 8.873101222797578e-05, "loss": 0.8295, "step": 35099 }, { "epoch": 0.929434251132605, "grad_norm": 0.77734375, "learning_rate": 8.87264378240874e-05, "loss": 0.8483, "step": 35100 }, { "epoch": 0.9294607307408994, "grad_norm": 0.90625, "learning_rate": 8.872186344409268e-05, "loss": 0.8379, "step": 35101 }, { "epoch": 0.9294872103491938, "grad_norm": 0.73828125, "learning_rate": 8.871728908800134e-05, "loss": 0.7371, "step": 35102 }, { "epoch": 0.9295136899574882, "grad_norm": 0.76171875, "learning_rate": 8.871271475582305e-05, "loss": 0.7158, "step": 35103 }, { "epoch": 0.9295401695657826, "grad_norm": 0.80078125, "learning_rate": 8.870814044756754e-05, "loss": 0.8548, "step": 35104 }, { "epoch": 0.9295666491740769, "grad_norm": 0.8125, "learning_rate": 8.870356616324447e-05, "loss": 0.7514, "step": 35105 }, { "epoch": 0.9295931287823713, "grad_norm": 0.7890625, "learning_rate": 8.86989919028635e-05, "loss": 0.7399, "step": 35106 }, { "epoch": 0.9296196083906657, "grad_norm": 0.80078125, "learning_rate": 8.86944176664344e-05, "loss": 0.6481, "step": 35107 }, { "epoch": 0.9296460879989601, "grad_norm": 0.78125, "learning_rate": 8.868984345396684e-05, "loss": 0.8339, "step": 35108 }, { "epoch": 0.9296725676072545, "grad_norm": 0.765625, "learning_rate": 8.868526926547053e-05, "loss": 0.7862, "step": 35109 }, { "epoch": 0.9296990472155489, "grad_norm": 0.80859375, "learning_rate": 8.868069510095513e-05, "loss": 0.8588, "step": 35110 }, { "epoch": 0.9297255268238432, "grad_norm": 0.734375, "learning_rate": 8.867612096043031e-05, "loss": 0.7332, "step": 35111 }, { "epoch": 0.9297520064321376, "grad_norm": 0.80078125, "learning_rate": 8.867154684390586e-05, "loss": 0.9167, "step": 35112 }, { "epoch": 0.929778486040432, "grad_norm": 0.8984375, "learning_rate": 8.86669727513914e-05, "loss": 0.799, "step": 35113 }, { "epoch": 0.9298049656487263, "grad_norm": 0.76171875, "learning_rate": 8.866239868289666e-05, "loss": 0.6258, "step": 35114 }, { "epoch": 0.9298314452570207, "grad_norm": 0.75390625, "learning_rate": 8.86578246384313e-05, "loss": 0.7397, "step": 35115 }, { "epoch": 0.929857924865315, "grad_norm": 0.8359375, "learning_rate": 8.865325061800499e-05, "loss": 0.9208, "step": 35116 }, { "epoch": 0.9298844044736094, "grad_norm": 0.828125, "learning_rate": 8.864867662162752e-05, "loss": 0.8417, "step": 35117 }, { "epoch": 0.9299108840819038, "grad_norm": 0.81640625, "learning_rate": 8.864410264930854e-05, "loss": 0.7791, "step": 35118 }, { "epoch": 0.9299373636901982, "grad_norm": 0.71484375, "learning_rate": 8.863952870105772e-05, "loss": 0.9516, "step": 35119 }, { "epoch": 0.9299638432984926, "grad_norm": 0.765625, "learning_rate": 8.863495477688477e-05, "loss": 0.6605, "step": 35120 }, { "epoch": 0.929990322906787, "grad_norm": 0.90625, "learning_rate": 8.863038087679936e-05, "loss": 0.8321, "step": 35121 }, { "epoch": 0.9300168025150813, "grad_norm": 0.8046875, "learning_rate": 8.862580700081123e-05, "loss": 0.8554, "step": 35122 }, { "epoch": 0.9300432821233757, "grad_norm": 0.74609375, "learning_rate": 8.862123314893007e-05, "loss": 0.7519, "step": 35123 }, { "epoch": 0.9300697617316701, "grad_norm": 0.84375, "learning_rate": 8.861665932116554e-05, "loss": 0.8216, "step": 35124 }, { "epoch": 0.9300962413399645, "grad_norm": 0.75390625, "learning_rate": 8.861208551752736e-05, "loss": 0.8471, "step": 35125 }, { "epoch": 0.9301227209482589, "grad_norm": 0.82421875, "learning_rate": 8.860751173802515e-05, "loss": 0.7924, "step": 35126 }, { "epoch": 0.9301492005565533, "grad_norm": 0.76953125, "learning_rate": 8.860293798266873e-05, "loss": 0.7692, "step": 35127 }, { "epoch": 0.9301756801648476, "grad_norm": 0.83203125, "learning_rate": 8.859836425146773e-05, "loss": 0.8298, "step": 35128 }, { "epoch": 0.930202159773142, "grad_norm": 0.71484375, "learning_rate": 8.859379054443185e-05, "loss": 0.7072, "step": 35129 }, { "epoch": 0.9302286393814363, "grad_norm": 0.734375, "learning_rate": 8.858921686157076e-05, "loss": 0.8342, "step": 35130 }, { "epoch": 0.9302551189897307, "grad_norm": 0.70703125, "learning_rate": 8.858464320289414e-05, "loss": 0.7468, "step": 35131 }, { "epoch": 0.9302815985980251, "grad_norm": 0.8359375, "learning_rate": 8.858006956841176e-05, "loss": 0.8586, "step": 35132 }, { "epoch": 0.9303080782063194, "grad_norm": 0.76171875, "learning_rate": 8.857549595813327e-05, "loss": 0.7276, "step": 35133 }, { "epoch": 0.9303345578146138, "grad_norm": 0.8125, "learning_rate": 8.857092237206836e-05, "loss": 0.8244, "step": 35134 }, { "epoch": 0.9303610374229082, "grad_norm": 0.77734375, "learning_rate": 8.856634881022673e-05, "loss": 0.8044, "step": 35135 }, { "epoch": 0.9303875170312026, "grad_norm": 0.75390625, "learning_rate": 8.856177527261807e-05, "loss": 0.8399, "step": 35136 }, { "epoch": 0.930413996639497, "grad_norm": 0.7890625, "learning_rate": 8.855720175925203e-05, "loss": 0.8316, "step": 35137 }, { "epoch": 0.9304404762477914, "grad_norm": 0.80078125, "learning_rate": 8.855262827013838e-05, "loss": 0.8201, "step": 35138 }, { "epoch": 0.9304669558560857, "grad_norm": 0.80859375, "learning_rate": 8.85480548052868e-05, "loss": 0.8839, "step": 35139 }, { "epoch": 0.9304934354643801, "grad_norm": 0.77734375, "learning_rate": 8.854348136470695e-05, "loss": 0.8153, "step": 35140 }, { "epoch": 0.9305199150726745, "grad_norm": 0.78515625, "learning_rate": 8.853890794840854e-05, "loss": 0.8287, "step": 35141 }, { "epoch": 0.9305463946809689, "grad_norm": 0.71875, "learning_rate": 8.853433455640124e-05, "loss": 0.8052, "step": 35142 }, { "epoch": 0.9305728742892633, "grad_norm": 0.84765625, "learning_rate": 8.852976118869478e-05, "loss": 0.8602, "step": 35143 }, { "epoch": 0.9305993538975577, "grad_norm": 0.78515625, "learning_rate": 8.852518784529884e-05, "loss": 0.8506, "step": 35144 }, { "epoch": 0.930625833505852, "grad_norm": 0.77734375, "learning_rate": 8.852061452622308e-05, "loss": 0.6993, "step": 35145 }, { "epoch": 0.9306523131141464, "grad_norm": 0.765625, "learning_rate": 8.851604123147725e-05, "loss": 0.7407, "step": 35146 }, { "epoch": 0.9306787927224407, "grad_norm": 0.7734375, "learning_rate": 8.851146796107098e-05, "loss": 0.8717, "step": 35147 }, { "epoch": 0.9307052723307351, "grad_norm": 0.79296875, "learning_rate": 8.850689471501403e-05, "loss": 0.7279, "step": 35148 }, { "epoch": 0.9307317519390295, "grad_norm": 0.73046875, "learning_rate": 8.850232149331606e-05, "loss": 0.7586, "step": 35149 }, { "epoch": 0.9307582315473238, "grad_norm": 0.80859375, "learning_rate": 8.849774829598676e-05, "loss": 0.7543, "step": 35150 }, { "epoch": 0.9307847111556182, "grad_norm": 0.75, "learning_rate": 8.849317512303583e-05, "loss": 0.6649, "step": 35151 }, { "epoch": 0.9308111907639126, "grad_norm": 0.76953125, "learning_rate": 8.848860197447291e-05, "loss": 0.7259, "step": 35152 }, { "epoch": 0.930837670372207, "grad_norm": 0.8125, "learning_rate": 8.848402885030778e-05, "loss": 0.9223, "step": 35153 }, { "epoch": 0.9308641499805014, "grad_norm": 0.86328125, "learning_rate": 8.84794557505501e-05, "loss": 0.882, "step": 35154 }, { "epoch": 0.9308906295887958, "grad_norm": 0.8359375, "learning_rate": 8.847488267520954e-05, "loss": 0.7464, "step": 35155 }, { "epoch": 0.9309171091970901, "grad_norm": 0.78125, "learning_rate": 8.847030962429582e-05, "loss": 0.7747, "step": 35156 }, { "epoch": 0.9309435888053845, "grad_norm": 0.76171875, "learning_rate": 8.846573659781859e-05, "loss": 0.7477, "step": 35157 }, { "epoch": 0.9309700684136789, "grad_norm": 0.73828125, "learning_rate": 8.846116359578758e-05, "loss": 0.681, "step": 35158 }, { "epoch": 0.9309965480219733, "grad_norm": 0.79296875, "learning_rate": 8.845659061821251e-05, "loss": 0.8143, "step": 35159 }, { "epoch": 0.9310230276302677, "grad_norm": 0.859375, "learning_rate": 8.845201766510301e-05, "loss": 0.7205, "step": 35160 }, { "epoch": 0.9310495072385621, "grad_norm": 0.94921875, "learning_rate": 8.844744473646882e-05, "loss": 0.7032, "step": 35161 }, { "epoch": 0.9310759868468564, "grad_norm": 0.75, "learning_rate": 8.844287183231957e-05, "loss": 0.7934, "step": 35162 }, { "epoch": 0.9311024664551507, "grad_norm": 0.7890625, "learning_rate": 8.843829895266503e-05, "loss": 0.7665, "step": 35163 }, { "epoch": 0.9311289460634451, "grad_norm": 0.8203125, "learning_rate": 8.843372609751482e-05, "loss": 0.824, "step": 35164 }, { "epoch": 0.9311554256717395, "grad_norm": 0.76953125, "learning_rate": 8.84291532668787e-05, "loss": 0.7027, "step": 35165 }, { "epoch": 0.9311819052800339, "grad_norm": 0.76171875, "learning_rate": 8.842458046076632e-05, "loss": 0.7922, "step": 35166 }, { "epoch": 0.9312083848883282, "grad_norm": 0.83984375, "learning_rate": 8.842000767918736e-05, "loss": 0.7047, "step": 35167 }, { "epoch": 0.9312348644966226, "grad_norm": 0.8359375, "learning_rate": 8.841543492215156e-05, "loss": 0.7605, "step": 35168 }, { "epoch": 0.931261344104917, "grad_norm": 0.796875, "learning_rate": 8.841086218966857e-05, "loss": 0.8376, "step": 35169 }, { "epoch": 0.9312878237132114, "grad_norm": 0.890625, "learning_rate": 8.84062894817481e-05, "loss": 0.9529, "step": 35170 }, { "epoch": 0.9313143033215058, "grad_norm": 0.78515625, "learning_rate": 8.840171679839983e-05, "loss": 0.7968, "step": 35171 }, { "epoch": 0.9313407829298002, "grad_norm": 0.76171875, "learning_rate": 8.839714413963343e-05, "loss": 0.6876, "step": 35172 }, { "epoch": 0.9313672625380945, "grad_norm": 0.7421875, "learning_rate": 8.839257150545866e-05, "loss": 0.7149, "step": 35173 }, { "epoch": 0.9313937421463889, "grad_norm": 0.734375, "learning_rate": 8.838799889588518e-05, "loss": 0.7744, "step": 35174 }, { "epoch": 0.9314202217546833, "grad_norm": 0.76953125, "learning_rate": 8.838342631092265e-05, "loss": 0.8307, "step": 35175 }, { "epoch": 0.9314467013629777, "grad_norm": 0.71875, "learning_rate": 8.83788537505808e-05, "loss": 0.853, "step": 35176 }, { "epoch": 0.9314731809712721, "grad_norm": 0.81640625, "learning_rate": 8.837428121486925e-05, "loss": 0.7995, "step": 35177 }, { "epoch": 0.9314996605795665, "grad_norm": 0.70703125, "learning_rate": 8.836970870379779e-05, "loss": 0.8126, "step": 35178 }, { "epoch": 0.9315261401878607, "grad_norm": 0.89453125, "learning_rate": 8.836513621737608e-05, "loss": 0.8902, "step": 35179 }, { "epoch": 0.9315526197961551, "grad_norm": 0.8125, "learning_rate": 8.83605637556138e-05, "loss": 0.8198, "step": 35180 }, { "epoch": 0.9315790994044495, "grad_norm": 0.703125, "learning_rate": 8.835599131852063e-05, "loss": 0.8254, "step": 35181 }, { "epoch": 0.9316055790127439, "grad_norm": 0.78125, "learning_rate": 8.835141890610624e-05, "loss": 0.7739, "step": 35182 }, { "epoch": 0.9316320586210383, "grad_norm": 0.703125, "learning_rate": 8.834684651838037e-05, "loss": 0.7403, "step": 35183 }, { "epoch": 0.9316585382293326, "grad_norm": 0.75390625, "learning_rate": 8.83422741553527e-05, "loss": 0.7083, "step": 35184 }, { "epoch": 0.931685017837627, "grad_norm": 0.76171875, "learning_rate": 8.833770181703293e-05, "loss": 0.808, "step": 35185 }, { "epoch": 0.9317114974459214, "grad_norm": 0.79296875, "learning_rate": 8.833312950343073e-05, "loss": 0.8173, "step": 35186 }, { "epoch": 0.9317379770542158, "grad_norm": 0.7265625, "learning_rate": 8.832855721455577e-05, "loss": 0.8166, "step": 35187 }, { "epoch": 0.9317644566625102, "grad_norm": 0.7734375, "learning_rate": 8.832398495041779e-05, "loss": 0.7587, "step": 35188 }, { "epoch": 0.9317909362708046, "grad_norm": 0.8359375, "learning_rate": 8.831941271102645e-05, "loss": 0.9162, "step": 35189 }, { "epoch": 0.9318174158790989, "grad_norm": 0.80859375, "learning_rate": 8.831484049639146e-05, "loss": 0.6923, "step": 35190 }, { "epoch": 0.9318438954873933, "grad_norm": 0.76171875, "learning_rate": 8.831026830652248e-05, "loss": 0.7248, "step": 35191 }, { "epoch": 0.9318703750956877, "grad_norm": 0.734375, "learning_rate": 8.830569614142919e-05, "loss": 0.7995, "step": 35192 }, { "epoch": 0.9318968547039821, "grad_norm": 0.72265625, "learning_rate": 8.830112400112134e-05, "loss": 0.6961, "step": 35193 }, { "epoch": 0.9319233343122765, "grad_norm": 0.828125, "learning_rate": 8.82965518856086e-05, "loss": 0.7397, "step": 35194 }, { "epoch": 0.9319498139205709, "grad_norm": 0.7734375, "learning_rate": 8.829197979490065e-05, "loss": 0.6424, "step": 35195 }, { "epoch": 0.9319762935288651, "grad_norm": 0.83203125, "learning_rate": 8.828740772900716e-05, "loss": 0.814, "step": 35196 }, { "epoch": 0.9320027731371595, "grad_norm": 0.77734375, "learning_rate": 8.828283568793782e-05, "loss": 0.8598, "step": 35197 }, { "epoch": 0.9320292527454539, "grad_norm": 0.84375, "learning_rate": 8.827826367170236e-05, "loss": 0.8354, "step": 35198 }, { "epoch": 0.9320557323537483, "grad_norm": 0.875, "learning_rate": 8.827369168031048e-05, "loss": 0.835, "step": 35199 }, { "epoch": 0.9320822119620427, "grad_norm": 0.83203125, "learning_rate": 8.826911971377182e-05, "loss": 0.8858, "step": 35200 }, { "epoch": 0.932108691570337, "grad_norm": 0.70703125, "learning_rate": 8.82645477720961e-05, "loss": 0.7804, "step": 35201 }, { "epoch": 0.9321351711786314, "grad_norm": 0.75390625, "learning_rate": 8.825997585529295e-05, "loss": 0.8038, "step": 35202 }, { "epoch": 0.9321616507869258, "grad_norm": 0.84765625, "learning_rate": 8.825540396337215e-05, "loss": 0.8483, "step": 35203 }, { "epoch": 0.9321881303952202, "grad_norm": 0.9609375, "learning_rate": 8.825083209634336e-05, "loss": 0.7119, "step": 35204 }, { "epoch": 0.9322146100035146, "grad_norm": 0.75390625, "learning_rate": 8.824626025421626e-05, "loss": 0.7749, "step": 35205 }, { "epoch": 0.932241089611809, "grad_norm": 0.80078125, "learning_rate": 8.824168843700052e-05, "loss": 0.77, "step": 35206 }, { "epoch": 0.9322675692201033, "grad_norm": 0.828125, "learning_rate": 8.823711664470585e-05, "loss": 0.8858, "step": 35207 }, { "epoch": 0.9322940488283977, "grad_norm": 0.73828125, "learning_rate": 8.823254487734194e-05, "loss": 0.824, "step": 35208 }, { "epoch": 0.9323205284366921, "grad_norm": 0.78515625, "learning_rate": 8.82279731349185e-05, "loss": 0.7382, "step": 35209 }, { "epoch": 0.9323470080449865, "grad_norm": 0.828125, "learning_rate": 8.822340141744518e-05, "loss": 0.7482, "step": 35210 }, { "epoch": 0.9323734876532809, "grad_norm": 0.8359375, "learning_rate": 8.82188297249317e-05, "loss": 0.8862, "step": 35211 }, { "epoch": 0.9323999672615751, "grad_norm": 0.75, "learning_rate": 8.82142580573877e-05, "loss": 0.831, "step": 35212 }, { "epoch": 0.9324264468698695, "grad_norm": 0.76953125, "learning_rate": 8.820968641482294e-05, "loss": 0.9107, "step": 35213 }, { "epoch": 0.9324529264781639, "grad_norm": 0.78125, "learning_rate": 8.820511479724707e-05, "loss": 0.8441, "step": 35214 }, { "epoch": 0.9324794060864583, "grad_norm": 0.765625, "learning_rate": 8.820054320466978e-05, "loss": 0.8265, "step": 35215 }, { "epoch": 0.9325058856947527, "grad_norm": 0.7578125, "learning_rate": 8.819597163710078e-05, "loss": 0.8503, "step": 35216 }, { "epoch": 0.9325323653030471, "grad_norm": 0.7734375, "learning_rate": 8.819140009454969e-05, "loss": 0.9335, "step": 35217 }, { "epoch": 0.9325588449113414, "grad_norm": 0.83203125, "learning_rate": 8.81868285770263e-05, "loss": 0.7558, "step": 35218 }, { "epoch": 0.9325853245196358, "grad_norm": 0.7890625, "learning_rate": 8.818225708454026e-05, "loss": 0.6889, "step": 35219 }, { "epoch": 0.9326118041279302, "grad_norm": 0.73828125, "learning_rate": 8.817768561710125e-05, "loss": 0.6994, "step": 35220 }, { "epoch": 0.9326382837362246, "grad_norm": 0.8671875, "learning_rate": 8.817311417471894e-05, "loss": 0.909, "step": 35221 }, { "epoch": 0.932664763344519, "grad_norm": 0.859375, "learning_rate": 8.816854275740301e-05, "loss": 0.7341, "step": 35222 }, { "epoch": 0.9326912429528134, "grad_norm": 0.71875, "learning_rate": 8.816397136516322e-05, "loss": 0.6286, "step": 35223 }, { "epoch": 0.9327177225611077, "grad_norm": 0.80859375, "learning_rate": 8.815939999800922e-05, "loss": 0.7808, "step": 35224 }, { "epoch": 0.9327442021694021, "grad_norm": 0.796875, "learning_rate": 8.815482865595068e-05, "loss": 0.7507, "step": 35225 }, { "epoch": 0.9327706817776965, "grad_norm": 0.8046875, "learning_rate": 8.815025733899732e-05, "loss": 0.7894, "step": 35226 }, { "epoch": 0.9327971613859909, "grad_norm": 0.8203125, "learning_rate": 8.814568604715878e-05, "loss": 0.772, "step": 35227 }, { "epoch": 0.9328236409942852, "grad_norm": 0.7734375, "learning_rate": 8.814111478044481e-05, "loss": 0.8749, "step": 35228 }, { "epoch": 0.9328501206025795, "grad_norm": 0.78125, "learning_rate": 8.813654353886506e-05, "loss": 0.7347, "step": 35229 }, { "epoch": 0.9328766002108739, "grad_norm": 0.80078125, "learning_rate": 8.813197232242922e-05, "loss": 1.0664, "step": 35230 }, { "epoch": 0.9329030798191683, "grad_norm": 0.84765625, "learning_rate": 8.812740113114701e-05, "loss": 0.8136, "step": 35231 }, { "epoch": 0.9329295594274627, "grad_norm": 0.79296875, "learning_rate": 8.812282996502805e-05, "loss": 0.7441, "step": 35232 }, { "epoch": 0.9329560390357571, "grad_norm": 0.76953125, "learning_rate": 8.81182588240821e-05, "loss": 0.7735, "step": 35233 }, { "epoch": 0.9329825186440515, "grad_norm": 0.79296875, "learning_rate": 8.811368770831884e-05, "loss": 0.7804, "step": 35234 }, { "epoch": 0.9330089982523458, "grad_norm": 0.8046875, "learning_rate": 8.810911661774792e-05, "loss": 0.7243, "step": 35235 }, { "epoch": 0.9330354778606402, "grad_norm": 0.76953125, "learning_rate": 8.810454555237906e-05, "loss": 0.8517, "step": 35236 }, { "epoch": 0.9330619574689346, "grad_norm": 0.76171875, "learning_rate": 8.809997451222189e-05, "loss": 0.7934, "step": 35237 }, { "epoch": 0.933088437077229, "grad_norm": 0.7421875, "learning_rate": 8.809540349728619e-05, "loss": 0.8419, "step": 35238 }, { "epoch": 0.9331149166855234, "grad_norm": 0.82421875, "learning_rate": 8.80908325075816e-05, "loss": 0.8179, "step": 35239 }, { "epoch": 0.9331413962938178, "grad_norm": 1.2109375, "learning_rate": 8.808626154311781e-05, "loss": 0.8058, "step": 35240 }, { "epoch": 0.9331678759021121, "grad_norm": 0.83203125, "learning_rate": 8.808169060390449e-05, "loss": 0.8305, "step": 35241 }, { "epoch": 0.9331943555104065, "grad_norm": 0.76171875, "learning_rate": 8.807711968995133e-05, "loss": 0.79, "step": 35242 }, { "epoch": 0.9332208351187009, "grad_norm": 0.76171875, "learning_rate": 8.807254880126807e-05, "loss": 0.6897, "step": 35243 }, { "epoch": 0.9332473147269953, "grad_norm": 0.79296875, "learning_rate": 8.806797793786434e-05, "loss": 0.7554, "step": 35244 }, { "epoch": 0.9332737943352896, "grad_norm": 0.7265625, "learning_rate": 8.806340709974987e-05, "loss": 0.7171, "step": 35245 }, { "epoch": 0.933300273943584, "grad_norm": 0.8046875, "learning_rate": 8.805883628693432e-05, "loss": 0.7394, "step": 35246 }, { "epoch": 0.9333267535518783, "grad_norm": 0.76171875, "learning_rate": 8.805426549942737e-05, "loss": 0.7611, "step": 35247 }, { "epoch": 0.9333532331601727, "grad_norm": 0.7421875, "learning_rate": 8.804969473723874e-05, "loss": 0.861, "step": 35248 }, { "epoch": 0.9333797127684671, "grad_norm": 0.80078125, "learning_rate": 8.804512400037807e-05, "loss": 0.8681, "step": 35249 }, { "epoch": 0.9334061923767615, "grad_norm": 0.859375, "learning_rate": 8.80405532888551e-05, "loss": 0.7375, "step": 35250 }, { "epoch": 0.9334326719850559, "grad_norm": 0.7890625, "learning_rate": 8.803598260267949e-05, "loss": 0.7759, "step": 35251 }, { "epoch": 0.9334591515933502, "grad_norm": 0.8203125, "learning_rate": 8.803141194186091e-05, "loss": 0.8972, "step": 35252 }, { "epoch": 0.9334856312016446, "grad_norm": 0.78125, "learning_rate": 8.80268413064091e-05, "loss": 0.8488, "step": 35253 }, { "epoch": 0.933512110809939, "grad_norm": 0.828125, "learning_rate": 8.802227069633371e-05, "loss": 0.8579, "step": 35254 }, { "epoch": 0.9335385904182334, "grad_norm": 0.8125, "learning_rate": 8.801770011164443e-05, "loss": 0.7793, "step": 35255 }, { "epoch": 0.9335650700265278, "grad_norm": 0.8203125, "learning_rate": 8.801312955235094e-05, "loss": 0.8765, "step": 35256 }, { "epoch": 0.9335915496348222, "grad_norm": 0.7265625, "learning_rate": 8.800855901846292e-05, "loss": 0.8654, "step": 35257 }, { "epoch": 0.9336180292431165, "grad_norm": 0.890625, "learning_rate": 8.80039885099901e-05, "loss": 0.7538, "step": 35258 }, { "epoch": 0.9336445088514109, "grad_norm": 0.78515625, "learning_rate": 8.799941802694213e-05, "loss": 0.7369, "step": 35259 }, { "epoch": 0.9336709884597053, "grad_norm": 0.765625, "learning_rate": 8.799484756932872e-05, "loss": 0.8319, "step": 35260 }, { "epoch": 0.9336974680679996, "grad_norm": 0.80859375, "learning_rate": 8.799027713715956e-05, "loss": 0.841, "step": 35261 }, { "epoch": 0.933723947676294, "grad_norm": 0.83984375, "learning_rate": 8.798570673044426e-05, "loss": 0.857, "step": 35262 }, { "epoch": 0.9337504272845883, "grad_norm": 1.375, "learning_rate": 8.798113634919262e-05, "loss": 1.0359, "step": 35263 }, { "epoch": 0.9337769068928827, "grad_norm": 0.71484375, "learning_rate": 8.797656599341426e-05, "loss": 0.802, "step": 35264 }, { "epoch": 0.9338033865011771, "grad_norm": 0.94921875, "learning_rate": 8.79719956631189e-05, "loss": 0.874, "step": 35265 }, { "epoch": 0.9338298661094715, "grad_norm": 0.78125, "learning_rate": 8.796742535831619e-05, "loss": 0.7697, "step": 35266 }, { "epoch": 0.9338563457177659, "grad_norm": 0.81640625, "learning_rate": 8.796285507901582e-05, "loss": 0.8761, "step": 35267 }, { "epoch": 0.9338828253260603, "grad_norm": 0.75390625, "learning_rate": 8.79582848252275e-05, "loss": 0.6701, "step": 35268 }, { "epoch": 0.9339093049343546, "grad_norm": 0.74609375, "learning_rate": 8.795371459696091e-05, "loss": 0.7756, "step": 35269 }, { "epoch": 0.933935784542649, "grad_norm": 0.73828125, "learning_rate": 8.794914439422575e-05, "loss": 0.6645, "step": 35270 }, { "epoch": 0.9339622641509434, "grad_norm": 0.78515625, "learning_rate": 8.794457421703168e-05, "loss": 0.9246, "step": 35271 }, { "epoch": 0.9339887437592378, "grad_norm": 0.71875, "learning_rate": 8.794000406538838e-05, "loss": 0.7448, "step": 35272 }, { "epoch": 0.9340152233675322, "grad_norm": 0.78125, "learning_rate": 8.793543393930557e-05, "loss": 0.8511, "step": 35273 }, { "epoch": 0.9340417029758266, "grad_norm": 0.83984375, "learning_rate": 8.793086383879292e-05, "loss": 0.8037, "step": 35274 }, { "epoch": 0.9340681825841209, "grad_norm": 0.78515625, "learning_rate": 8.792629376386012e-05, "loss": 0.8013, "step": 35275 }, { "epoch": 0.9340946621924153, "grad_norm": 0.75, "learning_rate": 8.792172371451684e-05, "loss": 0.8569, "step": 35276 }, { "epoch": 0.9341211418007097, "grad_norm": 0.80078125, "learning_rate": 8.791715369077277e-05, "loss": 0.7789, "step": 35277 }, { "epoch": 0.934147621409004, "grad_norm": 0.73046875, "learning_rate": 8.791258369263758e-05, "loss": 0.7124, "step": 35278 }, { "epoch": 0.9341741010172984, "grad_norm": 0.7734375, "learning_rate": 8.790801372012101e-05, "loss": 0.7889, "step": 35279 }, { "epoch": 0.9342005806255927, "grad_norm": 0.765625, "learning_rate": 8.790344377323271e-05, "loss": 0.8546, "step": 35280 }, { "epoch": 0.9342270602338871, "grad_norm": 0.796875, "learning_rate": 8.789887385198237e-05, "loss": 0.7583, "step": 35281 }, { "epoch": 0.9342535398421815, "grad_norm": 0.86328125, "learning_rate": 8.789430395637967e-05, "loss": 0.7466, "step": 35282 }, { "epoch": 0.9342800194504759, "grad_norm": 0.72265625, "learning_rate": 8.788973408643427e-05, "loss": 0.7363, "step": 35283 }, { "epoch": 0.9343064990587703, "grad_norm": 0.84375, "learning_rate": 8.788516424215593e-05, "loss": 0.7668, "step": 35284 }, { "epoch": 0.9343329786670647, "grad_norm": 0.796875, "learning_rate": 8.788059442355429e-05, "loss": 0.7797, "step": 35285 }, { "epoch": 0.934359458275359, "grad_norm": 0.734375, "learning_rate": 8.787602463063904e-05, "loss": 0.7256, "step": 35286 }, { "epoch": 0.9343859378836534, "grad_norm": 0.77734375, "learning_rate": 8.787145486341984e-05, "loss": 0.7555, "step": 35287 }, { "epoch": 0.9344124174919478, "grad_norm": 0.80078125, "learning_rate": 8.786688512190638e-05, "loss": 0.7863, "step": 35288 }, { "epoch": 0.9344388971002422, "grad_norm": 0.7265625, "learning_rate": 8.78623154061084e-05, "loss": 0.8198, "step": 35289 }, { "epoch": 0.9344653767085366, "grad_norm": 0.71484375, "learning_rate": 8.785774571603555e-05, "loss": 0.8032, "step": 35290 }, { "epoch": 0.934491856316831, "grad_norm": 0.8203125, "learning_rate": 8.785317605169751e-05, "loss": 0.8278, "step": 35291 }, { "epoch": 0.9345183359251253, "grad_norm": 0.8359375, "learning_rate": 8.784860641310398e-05, "loss": 0.8862, "step": 35292 }, { "epoch": 0.9345448155334197, "grad_norm": 0.8359375, "learning_rate": 8.78440368002646e-05, "loss": 0.882, "step": 35293 }, { "epoch": 0.934571295141714, "grad_norm": 0.81640625, "learning_rate": 8.783946721318911e-05, "loss": 0.7834, "step": 35294 }, { "epoch": 0.9345977747500084, "grad_norm": 0.73828125, "learning_rate": 8.783489765188719e-05, "loss": 0.7319, "step": 35295 }, { "epoch": 0.9346242543583028, "grad_norm": 0.6796875, "learning_rate": 8.783032811636848e-05, "loss": 0.7012, "step": 35296 }, { "epoch": 0.9346507339665971, "grad_norm": 0.78515625, "learning_rate": 8.78257586066427e-05, "loss": 0.7518, "step": 35297 }, { "epoch": 0.9346772135748915, "grad_norm": 0.8125, "learning_rate": 8.782118912271952e-05, "loss": 0.8635, "step": 35298 }, { "epoch": 0.9347036931831859, "grad_norm": 0.78515625, "learning_rate": 8.781661966460865e-05, "loss": 0.782, "step": 35299 }, { "epoch": 0.9347301727914803, "grad_norm": 0.765625, "learning_rate": 8.781205023231976e-05, "loss": 0.7881, "step": 35300 }, { "epoch": 0.9347566523997747, "grad_norm": 0.765625, "learning_rate": 8.780748082586255e-05, "loss": 0.7891, "step": 35301 }, { "epoch": 0.9347831320080691, "grad_norm": 0.77734375, "learning_rate": 8.780291144524667e-05, "loss": 0.8324, "step": 35302 }, { "epoch": 0.9348096116163634, "grad_norm": 0.75, "learning_rate": 8.779834209048177e-05, "loss": 0.8039, "step": 35303 }, { "epoch": 0.9348360912246578, "grad_norm": 0.7421875, "learning_rate": 8.779377276157765e-05, "loss": 0.7951, "step": 35304 }, { "epoch": 0.9348625708329522, "grad_norm": 0.796875, "learning_rate": 8.778920345854391e-05, "loss": 0.7877, "step": 35305 }, { "epoch": 0.9348890504412466, "grad_norm": 0.85546875, "learning_rate": 8.778463418139026e-05, "loss": 0.7105, "step": 35306 }, { "epoch": 0.934915530049541, "grad_norm": 0.82421875, "learning_rate": 8.778006493012641e-05, "loss": 0.7467, "step": 35307 }, { "epoch": 0.9349420096578354, "grad_norm": 0.7578125, "learning_rate": 8.777549570476194e-05, "loss": 0.7659, "step": 35308 }, { "epoch": 0.9349684892661297, "grad_norm": 0.89453125, "learning_rate": 8.777092650530666e-05, "loss": 0.9027, "step": 35309 }, { "epoch": 0.934994968874424, "grad_norm": 0.8125, "learning_rate": 8.77663573317702e-05, "loss": 0.76, "step": 35310 }, { "epoch": 0.9350214484827184, "grad_norm": 0.75390625, "learning_rate": 8.776178818416225e-05, "loss": 0.8751, "step": 35311 }, { "epoch": 0.9350479280910128, "grad_norm": 0.72265625, "learning_rate": 8.775721906249248e-05, "loss": 0.7808, "step": 35312 }, { "epoch": 0.9350744076993072, "grad_norm": 0.7734375, "learning_rate": 8.775264996677057e-05, "loss": 0.7857, "step": 35313 }, { "epoch": 0.9351008873076015, "grad_norm": 0.7890625, "learning_rate": 8.774808089700624e-05, "loss": 0.8647, "step": 35314 }, { "epoch": 0.9351273669158959, "grad_norm": 0.75390625, "learning_rate": 8.774351185320915e-05, "loss": 0.6811, "step": 35315 }, { "epoch": 0.9351538465241903, "grad_norm": 0.73046875, "learning_rate": 8.773894283538898e-05, "loss": 0.7511, "step": 35316 }, { "epoch": 0.9351803261324847, "grad_norm": 0.8125, "learning_rate": 8.773437384355543e-05, "loss": 0.7875, "step": 35317 }, { "epoch": 0.9352068057407791, "grad_norm": 0.8046875, "learning_rate": 8.772980487771814e-05, "loss": 0.7538, "step": 35318 }, { "epoch": 0.9352332853490735, "grad_norm": 0.79296875, "learning_rate": 8.772523593788687e-05, "loss": 0.8461, "step": 35319 }, { "epoch": 0.9352597649573678, "grad_norm": 0.765625, "learning_rate": 8.772066702407125e-05, "loss": 0.8091, "step": 35320 }, { "epoch": 0.9352862445656622, "grad_norm": 0.7890625, "learning_rate": 8.771609813628097e-05, "loss": 0.8071, "step": 35321 }, { "epoch": 0.9353127241739566, "grad_norm": 0.8046875, "learning_rate": 8.771152927452572e-05, "loss": 0.7601, "step": 35322 }, { "epoch": 0.935339203782251, "grad_norm": 0.828125, "learning_rate": 8.770696043881514e-05, "loss": 0.8598, "step": 35323 }, { "epoch": 0.9353656833905454, "grad_norm": 0.76953125, "learning_rate": 8.770239162915902e-05, "loss": 0.7559, "step": 35324 }, { "epoch": 0.9353921629988398, "grad_norm": 0.8515625, "learning_rate": 8.769782284556695e-05, "loss": 0.6412, "step": 35325 }, { "epoch": 0.9354186426071341, "grad_norm": 0.80859375, "learning_rate": 8.769325408804864e-05, "loss": 0.8331, "step": 35326 }, { "epoch": 0.9354451222154284, "grad_norm": 0.76953125, "learning_rate": 8.768868535661379e-05, "loss": 0.7963, "step": 35327 }, { "epoch": 0.9354716018237228, "grad_norm": 0.78515625, "learning_rate": 8.768411665127203e-05, "loss": 0.855, "step": 35328 }, { "epoch": 0.9354980814320172, "grad_norm": 0.80078125, "learning_rate": 8.76795479720331e-05, "loss": 0.7651, "step": 35329 }, { "epoch": 0.9355245610403116, "grad_norm": 0.79296875, "learning_rate": 8.767497931890666e-05, "loss": 0.905, "step": 35330 }, { "epoch": 0.935551040648606, "grad_norm": 0.7421875, "learning_rate": 8.767041069190242e-05, "loss": 0.8121, "step": 35331 }, { "epoch": 0.9355775202569003, "grad_norm": 0.93359375, "learning_rate": 8.766584209103004e-05, "loss": 0.7327, "step": 35332 }, { "epoch": 0.9356039998651947, "grad_norm": 0.7421875, "learning_rate": 8.766127351629917e-05, "loss": 0.7547, "step": 35333 }, { "epoch": 0.9356304794734891, "grad_norm": 0.80078125, "learning_rate": 8.765670496771952e-05, "loss": 0.8524, "step": 35334 }, { "epoch": 0.9356569590817835, "grad_norm": 0.84375, "learning_rate": 8.765213644530083e-05, "loss": 0.7902, "step": 35335 }, { "epoch": 0.9356834386900779, "grad_norm": 0.6796875, "learning_rate": 8.76475679490527e-05, "loss": 0.8565, "step": 35336 }, { "epoch": 0.9357099182983722, "grad_norm": 0.84765625, "learning_rate": 8.764299947898486e-05, "loss": 0.7823, "step": 35337 }, { "epoch": 0.9357363979066666, "grad_norm": 0.69140625, "learning_rate": 8.763843103510694e-05, "loss": 0.7444, "step": 35338 }, { "epoch": 0.935762877514961, "grad_norm": 0.734375, "learning_rate": 8.763386261742869e-05, "loss": 0.7239, "step": 35339 }, { "epoch": 0.9357893571232554, "grad_norm": 0.7734375, "learning_rate": 8.762929422595976e-05, "loss": 0.8989, "step": 35340 }, { "epoch": 0.9358158367315498, "grad_norm": 0.7109375, "learning_rate": 8.762472586070984e-05, "loss": 0.7746, "step": 35341 }, { "epoch": 0.9358423163398442, "grad_norm": 0.75, "learning_rate": 8.76201575216886e-05, "loss": 0.8323, "step": 35342 }, { "epoch": 0.9358687959481384, "grad_norm": 0.98828125, "learning_rate": 8.761558920890569e-05, "loss": 0.7848, "step": 35343 }, { "epoch": 0.9358952755564328, "grad_norm": 0.7890625, "learning_rate": 8.761102092237086e-05, "loss": 0.9672, "step": 35344 }, { "epoch": 0.9359217551647272, "grad_norm": 0.7578125, "learning_rate": 8.760645266209378e-05, "loss": 0.7999, "step": 35345 }, { "epoch": 0.9359482347730216, "grad_norm": 0.91015625, "learning_rate": 8.76018844280841e-05, "loss": 0.8599, "step": 35346 }, { "epoch": 0.935974714381316, "grad_norm": 0.76171875, "learning_rate": 8.759731622035152e-05, "loss": 0.7402, "step": 35347 }, { "epoch": 0.9360011939896103, "grad_norm": 0.671875, "learning_rate": 8.759274803890569e-05, "loss": 0.6316, "step": 35348 }, { "epoch": 0.9360276735979047, "grad_norm": 0.859375, "learning_rate": 8.758817988375634e-05, "loss": 0.6896, "step": 35349 }, { "epoch": 0.9360541532061991, "grad_norm": 0.79296875, "learning_rate": 8.758361175491317e-05, "loss": 0.6964, "step": 35350 }, { "epoch": 0.9360806328144935, "grad_norm": 0.7734375, "learning_rate": 8.757904365238579e-05, "loss": 0.6611, "step": 35351 }, { "epoch": 0.9361071124227879, "grad_norm": 0.7578125, "learning_rate": 8.757447557618393e-05, "loss": 0.8074, "step": 35352 }, { "epoch": 0.9361335920310823, "grad_norm": 0.69921875, "learning_rate": 8.756990752631723e-05, "loss": 0.8702, "step": 35353 }, { "epoch": 0.9361600716393766, "grad_norm": 0.82421875, "learning_rate": 8.756533950279543e-05, "loss": 0.9535, "step": 35354 }, { "epoch": 0.936186551247671, "grad_norm": 0.77734375, "learning_rate": 8.756077150562818e-05, "loss": 0.8412, "step": 35355 }, { "epoch": 0.9362130308559654, "grad_norm": 0.8046875, "learning_rate": 8.755620353482517e-05, "loss": 0.7486, "step": 35356 }, { "epoch": 0.9362395104642598, "grad_norm": 0.7890625, "learning_rate": 8.755163559039606e-05, "loss": 0.84, "step": 35357 }, { "epoch": 0.9362659900725542, "grad_norm": 0.75, "learning_rate": 8.754706767235055e-05, "loss": 0.6627, "step": 35358 }, { "epoch": 0.9362924696808484, "grad_norm": 0.796875, "learning_rate": 8.754249978069832e-05, "loss": 0.7923, "step": 35359 }, { "epoch": 0.9363189492891428, "grad_norm": 0.76953125, "learning_rate": 8.753793191544905e-05, "loss": 0.8897, "step": 35360 }, { "epoch": 0.9363454288974372, "grad_norm": 0.8046875, "learning_rate": 8.753336407661244e-05, "loss": 0.7635, "step": 35361 }, { "epoch": 0.9363719085057316, "grad_norm": 0.82421875, "learning_rate": 8.752879626419814e-05, "loss": 0.9094, "step": 35362 }, { "epoch": 0.936398388114026, "grad_norm": 0.85546875, "learning_rate": 8.75242284782158e-05, "loss": 0.8463, "step": 35363 }, { "epoch": 0.9364248677223204, "grad_norm": 1.0859375, "learning_rate": 8.751966071867519e-05, "loss": 0.8545, "step": 35364 }, { "epoch": 0.9364513473306147, "grad_norm": 0.80078125, "learning_rate": 8.751509298558595e-05, "loss": 0.8499, "step": 35365 }, { "epoch": 0.9364778269389091, "grad_norm": 0.80859375, "learning_rate": 8.751052527895775e-05, "loss": 0.7811, "step": 35366 }, { "epoch": 0.9365043065472035, "grad_norm": 0.76953125, "learning_rate": 8.750595759880027e-05, "loss": 0.7702, "step": 35367 }, { "epoch": 0.9365307861554979, "grad_norm": 0.75390625, "learning_rate": 8.750138994512316e-05, "loss": 0.8221, "step": 35368 }, { "epoch": 0.9365572657637923, "grad_norm": 0.7265625, "learning_rate": 8.749682231793619e-05, "loss": 0.7341, "step": 35369 }, { "epoch": 0.9365837453720867, "grad_norm": 0.796875, "learning_rate": 8.749225471724898e-05, "loss": 0.9293, "step": 35370 }, { "epoch": 0.936610224980381, "grad_norm": 0.87109375, "learning_rate": 8.748768714307124e-05, "loss": 0.8685, "step": 35371 }, { "epoch": 0.9366367045886754, "grad_norm": 0.77734375, "learning_rate": 8.748311959541262e-05, "loss": 0.8571, "step": 35372 }, { "epoch": 0.9366631841969698, "grad_norm": 0.69140625, "learning_rate": 8.747855207428277e-05, "loss": 0.7081, "step": 35373 }, { "epoch": 0.9366896638052642, "grad_norm": 0.82421875, "learning_rate": 8.747398457969145e-05, "loss": 0.8442, "step": 35374 }, { "epoch": 0.9367161434135586, "grad_norm": 0.75, "learning_rate": 8.746941711164831e-05, "loss": 0.8031, "step": 35375 }, { "epoch": 0.9367426230218528, "grad_norm": 0.76953125, "learning_rate": 8.746484967016303e-05, "loss": 0.7495, "step": 35376 }, { "epoch": 0.9367691026301472, "grad_norm": 0.71875, "learning_rate": 8.746028225524528e-05, "loss": 0.709, "step": 35377 }, { "epoch": 0.9367955822384416, "grad_norm": 0.71875, "learning_rate": 8.745571486690471e-05, "loss": 0.7854, "step": 35378 }, { "epoch": 0.936822061846736, "grad_norm": 0.77734375, "learning_rate": 8.745114750515109e-05, "loss": 0.7902, "step": 35379 }, { "epoch": 0.9368485414550304, "grad_norm": 0.7421875, "learning_rate": 8.744658016999401e-05, "loss": 0.7346, "step": 35380 }, { "epoch": 0.9368750210633248, "grad_norm": 0.80859375, "learning_rate": 8.74420128614432e-05, "loss": 0.7952, "step": 35381 }, { "epoch": 0.9369015006716191, "grad_norm": 0.79296875, "learning_rate": 8.743744557950833e-05, "loss": 0.7666, "step": 35382 }, { "epoch": 0.9369279802799135, "grad_norm": 0.76171875, "learning_rate": 8.743287832419903e-05, "loss": 0.7747, "step": 35383 }, { "epoch": 0.9369544598882079, "grad_norm": 0.7890625, "learning_rate": 8.742831109552508e-05, "loss": 0.8081, "step": 35384 }, { "epoch": 0.9369809394965023, "grad_norm": 0.80078125, "learning_rate": 8.742374389349608e-05, "loss": 0.8974, "step": 35385 }, { "epoch": 0.9370074191047967, "grad_norm": 0.8125, "learning_rate": 8.741917671812177e-05, "loss": 0.691, "step": 35386 }, { "epoch": 0.9370338987130911, "grad_norm": 0.79296875, "learning_rate": 8.741460956941177e-05, "loss": 0.8177, "step": 35387 }, { "epoch": 0.9370603783213854, "grad_norm": 0.953125, "learning_rate": 8.741004244737574e-05, "loss": 0.8088, "step": 35388 }, { "epoch": 0.9370868579296798, "grad_norm": 0.69140625, "learning_rate": 8.740547535202346e-05, "loss": 0.6692, "step": 35389 }, { "epoch": 0.9371133375379742, "grad_norm": 0.87109375, "learning_rate": 8.740090828336456e-05, "loss": 0.8628, "step": 35390 }, { "epoch": 0.9371398171462686, "grad_norm": 0.8046875, "learning_rate": 8.73963412414087e-05, "loss": 0.8614, "step": 35391 }, { "epoch": 0.9371662967545629, "grad_norm": 0.74609375, "learning_rate": 8.739177422616558e-05, "loss": 0.8388, "step": 35392 }, { "epoch": 0.9371927763628572, "grad_norm": 0.765625, "learning_rate": 8.738720723764485e-05, "loss": 0.6995, "step": 35393 }, { "epoch": 0.9372192559711516, "grad_norm": 0.72265625, "learning_rate": 8.738264027585623e-05, "loss": 0.6672, "step": 35394 }, { "epoch": 0.937245735579446, "grad_norm": 0.78515625, "learning_rate": 8.737807334080938e-05, "loss": 0.7032, "step": 35395 }, { "epoch": 0.9372722151877404, "grad_norm": 0.79296875, "learning_rate": 8.737350643251401e-05, "loss": 0.8079, "step": 35396 }, { "epoch": 0.9372986947960348, "grad_norm": 0.79296875, "learning_rate": 8.736893955097975e-05, "loss": 0.8977, "step": 35397 }, { "epoch": 0.9373251744043292, "grad_norm": 0.7578125, "learning_rate": 8.73643726962163e-05, "loss": 0.8255, "step": 35398 }, { "epoch": 0.9373516540126235, "grad_norm": 0.84765625, "learning_rate": 8.735980586823334e-05, "loss": 0.8183, "step": 35399 }, { "epoch": 0.9373781336209179, "grad_norm": 0.7734375, "learning_rate": 8.735523906704056e-05, "loss": 0.8407, "step": 35400 }, { "epoch": 0.9374046132292123, "grad_norm": 0.75, "learning_rate": 8.735067229264759e-05, "loss": 0.7602, "step": 35401 }, { "epoch": 0.9374310928375067, "grad_norm": 0.84765625, "learning_rate": 8.734610554506417e-05, "loss": 0.8766, "step": 35402 }, { "epoch": 0.9374575724458011, "grad_norm": 0.765625, "learning_rate": 8.734153882429995e-05, "loss": 0.804, "step": 35403 }, { "epoch": 0.9374840520540955, "grad_norm": 0.87890625, "learning_rate": 8.733697213036463e-05, "loss": 0.8824, "step": 35404 }, { "epoch": 0.9375105316623898, "grad_norm": 0.77734375, "learning_rate": 8.733240546326788e-05, "loss": 0.7507, "step": 35405 }, { "epoch": 0.9375370112706842, "grad_norm": 0.69140625, "learning_rate": 8.732783882301937e-05, "loss": 0.6994, "step": 35406 }, { "epoch": 0.9375634908789786, "grad_norm": 0.765625, "learning_rate": 8.732327220962877e-05, "loss": 0.7097, "step": 35407 }, { "epoch": 0.9375899704872729, "grad_norm": 0.83203125, "learning_rate": 8.731870562310574e-05, "loss": 0.7853, "step": 35408 }, { "epoch": 0.9376164500955673, "grad_norm": 0.859375, "learning_rate": 8.731413906346003e-05, "loss": 0.7966, "step": 35409 }, { "epoch": 0.9376429297038616, "grad_norm": 0.70703125, "learning_rate": 8.730957253070127e-05, "loss": 0.702, "step": 35410 }, { "epoch": 0.937669409312156, "grad_norm": 0.78515625, "learning_rate": 8.730500602483915e-05, "loss": 0.6759, "step": 35411 }, { "epoch": 0.9376958889204504, "grad_norm": 0.875, "learning_rate": 8.730043954588335e-05, "loss": 0.8434, "step": 35412 }, { "epoch": 0.9377223685287448, "grad_norm": 0.76171875, "learning_rate": 8.729587309384349e-05, "loss": 0.8855, "step": 35413 }, { "epoch": 0.9377488481370392, "grad_norm": 1.4765625, "learning_rate": 8.729130666872935e-05, "loss": 0.7442, "step": 35414 }, { "epoch": 0.9377753277453336, "grad_norm": 0.84375, "learning_rate": 8.728674027055057e-05, "loss": 0.8619, "step": 35415 }, { "epoch": 0.937801807353628, "grad_norm": 0.765625, "learning_rate": 8.728217389931679e-05, "loss": 0.8205, "step": 35416 }, { "epoch": 0.9378282869619223, "grad_norm": 0.76171875, "learning_rate": 8.727760755503773e-05, "loss": 0.7417, "step": 35417 }, { "epoch": 0.9378547665702167, "grad_norm": 0.73828125, "learning_rate": 8.727304123772304e-05, "loss": 0.8084, "step": 35418 }, { "epoch": 0.9378812461785111, "grad_norm": 0.75, "learning_rate": 8.726847494738241e-05, "loss": 0.6905, "step": 35419 }, { "epoch": 0.9379077257868055, "grad_norm": 0.77734375, "learning_rate": 8.726390868402552e-05, "loss": 0.8236, "step": 35420 }, { "epoch": 0.9379342053950999, "grad_norm": 0.796875, "learning_rate": 8.725934244766206e-05, "loss": 0.8361, "step": 35421 }, { "epoch": 0.9379606850033942, "grad_norm": 0.75390625, "learning_rate": 8.72547762383017e-05, "loss": 0.8857, "step": 35422 }, { "epoch": 0.9379871646116886, "grad_norm": 0.76171875, "learning_rate": 8.725021005595412e-05, "loss": 0.7997, "step": 35423 }, { "epoch": 0.938013644219983, "grad_norm": 0.796875, "learning_rate": 8.724564390062895e-05, "loss": 0.6988, "step": 35424 }, { "epoch": 0.9380401238282773, "grad_norm": 0.82421875, "learning_rate": 8.724107777233595e-05, "loss": 0.8514, "step": 35425 }, { "epoch": 0.9380666034365717, "grad_norm": 0.8671875, "learning_rate": 8.723651167108474e-05, "loss": 0.9237, "step": 35426 }, { "epoch": 0.938093083044866, "grad_norm": 0.75390625, "learning_rate": 8.723194559688502e-05, "loss": 0.7734, "step": 35427 }, { "epoch": 0.9381195626531604, "grad_norm": 0.8515625, "learning_rate": 8.722737954974647e-05, "loss": 0.7074, "step": 35428 }, { "epoch": 0.9381460422614548, "grad_norm": 0.84765625, "learning_rate": 8.722281352967871e-05, "loss": 0.8962, "step": 35429 }, { "epoch": 0.9381725218697492, "grad_norm": 0.859375, "learning_rate": 8.721824753669149e-05, "loss": 0.7083, "step": 35430 }, { "epoch": 0.9381990014780436, "grad_norm": 0.796875, "learning_rate": 8.72136815707945e-05, "loss": 0.9243, "step": 35431 }, { "epoch": 0.938225481086338, "grad_norm": 0.8515625, "learning_rate": 8.720911563199736e-05, "loss": 0.8019, "step": 35432 }, { "epoch": 0.9382519606946323, "grad_norm": 0.78125, "learning_rate": 8.720454972030977e-05, "loss": 0.7916, "step": 35433 }, { "epoch": 0.9382784403029267, "grad_norm": 0.73828125, "learning_rate": 8.719998383574137e-05, "loss": 0.8259, "step": 35434 }, { "epoch": 0.9383049199112211, "grad_norm": 0.82421875, "learning_rate": 8.719541797830191e-05, "loss": 0.8393, "step": 35435 }, { "epoch": 0.9383313995195155, "grad_norm": 0.69921875, "learning_rate": 8.719085214800102e-05, "loss": 0.6216, "step": 35436 }, { "epoch": 0.9383578791278099, "grad_norm": 0.9140625, "learning_rate": 8.71862863448484e-05, "loss": 0.8121, "step": 35437 }, { "epoch": 0.9383843587361043, "grad_norm": 0.7890625, "learning_rate": 8.718172056885371e-05, "loss": 0.7839, "step": 35438 }, { "epoch": 0.9384108383443986, "grad_norm": 0.7578125, "learning_rate": 8.717715482002659e-05, "loss": 0.7685, "step": 35439 }, { "epoch": 0.938437317952693, "grad_norm": 0.79296875, "learning_rate": 8.71725890983768e-05, "loss": 0.9, "step": 35440 }, { "epoch": 0.9384637975609873, "grad_norm": 0.82421875, "learning_rate": 8.716802340391397e-05, "loss": 0.9407, "step": 35441 }, { "epoch": 0.9384902771692817, "grad_norm": 0.7578125, "learning_rate": 8.716345773664779e-05, "loss": 0.7728, "step": 35442 }, { "epoch": 0.9385167567775761, "grad_norm": 0.8671875, "learning_rate": 8.715889209658791e-05, "loss": 0.7487, "step": 35443 }, { "epoch": 0.9385432363858704, "grad_norm": 0.7109375, "learning_rate": 8.715432648374402e-05, "loss": 0.8067, "step": 35444 }, { "epoch": 0.9385697159941648, "grad_norm": 0.7578125, "learning_rate": 8.714976089812581e-05, "loss": 0.7413, "step": 35445 }, { "epoch": 0.9385961956024592, "grad_norm": 0.75, "learning_rate": 8.714519533974295e-05, "loss": 0.6105, "step": 35446 }, { "epoch": 0.9386226752107536, "grad_norm": 0.80078125, "learning_rate": 8.714062980860513e-05, "loss": 0.8751, "step": 35447 }, { "epoch": 0.938649154819048, "grad_norm": 0.75, "learning_rate": 8.713606430472199e-05, "loss": 0.7594, "step": 35448 }, { "epoch": 0.9386756344273424, "grad_norm": 0.74609375, "learning_rate": 8.713149882810319e-05, "loss": 0.7338, "step": 35449 }, { "epoch": 0.9387021140356367, "grad_norm": 0.75, "learning_rate": 8.712693337875848e-05, "loss": 0.8008, "step": 35450 }, { "epoch": 0.9387285936439311, "grad_norm": 1.4375, "learning_rate": 8.71223679566975e-05, "loss": 0.8545, "step": 35451 }, { "epoch": 0.9387550732522255, "grad_norm": 0.78515625, "learning_rate": 8.711780256192994e-05, "loss": 0.8156, "step": 35452 }, { "epoch": 0.9387815528605199, "grad_norm": 0.80078125, "learning_rate": 8.711323719446545e-05, "loss": 0.7959, "step": 35453 }, { "epoch": 0.9388080324688143, "grad_norm": 0.8359375, "learning_rate": 8.710867185431367e-05, "loss": 0.7208, "step": 35454 }, { "epoch": 0.9388345120771087, "grad_norm": 0.75390625, "learning_rate": 8.710410654148437e-05, "loss": 0.7122, "step": 35455 }, { "epoch": 0.938860991685403, "grad_norm": 0.85546875, "learning_rate": 8.709954125598719e-05, "loss": 0.9611, "step": 35456 }, { "epoch": 0.9388874712936973, "grad_norm": 0.83984375, "learning_rate": 8.709497599783178e-05, "loss": 0.796, "step": 35457 }, { "epoch": 0.9389139509019917, "grad_norm": 0.7890625, "learning_rate": 8.709041076702784e-05, "loss": 0.8357, "step": 35458 }, { "epoch": 0.9389404305102861, "grad_norm": 0.6953125, "learning_rate": 8.7085845563585e-05, "loss": 0.6733, "step": 35459 }, { "epoch": 0.9389669101185805, "grad_norm": 0.83984375, "learning_rate": 8.708128038751299e-05, "loss": 0.7521, "step": 35460 }, { "epoch": 0.9389933897268748, "grad_norm": 0.96484375, "learning_rate": 8.707671523882149e-05, "loss": 0.9317, "step": 35461 }, { "epoch": 0.9390198693351692, "grad_norm": 0.79296875, "learning_rate": 8.707215011752015e-05, "loss": 0.8527, "step": 35462 }, { "epoch": 0.9390463489434636, "grad_norm": 0.73828125, "learning_rate": 8.706758502361863e-05, "loss": 0.7587, "step": 35463 }, { "epoch": 0.939072828551758, "grad_norm": 0.796875, "learning_rate": 8.706301995712663e-05, "loss": 0.7994, "step": 35464 }, { "epoch": 0.9390993081600524, "grad_norm": 0.76953125, "learning_rate": 8.705845491805382e-05, "loss": 0.8647, "step": 35465 }, { "epoch": 0.9391257877683468, "grad_norm": 0.7734375, "learning_rate": 8.705388990640988e-05, "loss": 0.7404, "step": 35466 }, { "epoch": 0.9391522673766411, "grad_norm": 0.7265625, "learning_rate": 8.704932492220448e-05, "loss": 0.752, "step": 35467 }, { "epoch": 0.9391787469849355, "grad_norm": 0.77734375, "learning_rate": 8.704475996544729e-05, "loss": 0.8235, "step": 35468 }, { "epoch": 0.9392052265932299, "grad_norm": 0.84765625, "learning_rate": 8.704019503614798e-05, "loss": 0.8748, "step": 35469 }, { "epoch": 0.9392317062015243, "grad_norm": 0.8046875, "learning_rate": 8.703563013431626e-05, "loss": 0.8218, "step": 35470 }, { "epoch": 0.9392581858098187, "grad_norm": 0.7265625, "learning_rate": 8.703106525996177e-05, "loss": 0.6756, "step": 35471 }, { "epoch": 0.9392846654181131, "grad_norm": 0.8046875, "learning_rate": 8.702650041309421e-05, "loss": 0.777, "step": 35472 }, { "epoch": 0.9393111450264074, "grad_norm": 0.74609375, "learning_rate": 8.702193559372324e-05, "loss": 0.7783, "step": 35473 }, { "epoch": 0.9393376246347017, "grad_norm": 0.8125, "learning_rate": 8.701737080185848e-05, "loss": 0.9069, "step": 35474 }, { "epoch": 0.9393641042429961, "grad_norm": 0.83203125, "learning_rate": 8.701280603750971e-05, "loss": 0.8421, "step": 35475 }, { "epoch": 0.9393905838512905, "grad_norm": 0.9296875, "learning_rate": 8.700824130068655e-05, "loss": 0.8975, "step": 35476 }, { "epoch": 0.9394170634595849, "grad_norm": 0.765625, "learning_rate": 8.700367659139869e-05, "loss": 0.7317, "step": 35477 }, { "epoch": 0.9394435430678792, "grad_norm": 0.82421875, "learning_rate": 8.699911190965579e-05, "loss": 0.7365, "step": 35478 }, { "epoch": 0.9394700226761736, "grad_norm": 0.7734375, "learning_rate": 8.699454725546749e-05, "loss": 0.7912, "step": 35479 }, { "epoch": 0.939496502284468, "grad_norm": 0.8203125, "learning_rate": 8.698998262884354e-05, "loss": 0.874, "step": 35480 }, { "epoch": 0.9395229818927624, "grad_norm": 0.796875, "learning_rate": 8.698541802979358e-05, "loss": 0.674, "step": 35481 }, { "epoch": 0.9395494615010568, "grad_norm": 0.796875, "learning_rate": 8.69808534583273e-05, "loss": 0.7908, "step": 35482 }, { "epoch": 0.9395759411093512, "grad_norm": 0.7265625, "learning_rate": 8.697628891445433e-05, "loss": 0.7436, "step": 35483 }, { "epoch": 0.9396024207176455, "grad_norm": 0.7734375, "learning_rate": 8.697172439818436e-05, "loss": 0.7476, "step": 35484 }, { "epoch": 0.9396289003259399, "grad_norm": 0.79296875, "learning_rate": 8.696715990952709e-05, "loss": 0.7929, "step": 35485 }, { "epoch": 0.9396553799342343, "grad_norm": 0.8671875, "learning_rate": 8.69625954484922e-05, "loss": 0.6278, "step": 35486 }, { "epoch": 0.9396818595425287, "grad_norm": 0.80859375, "learning_rate": 8.69580310150893e-05, "loss": 0.749, "step": 35487 }, { "epoch": 0.9397083391508231, "grad_norm": 0.80859375, "learning_rate": 8.695346660932815e-05, "loss": 0.8144, "step": 35488 }, { "epoch": 0.9397348187591175, "grad_norm": 0.84375, "learning_rate": 8.694890223121835e-05, "loss": 0.899, "step": 35489 }, { "epoch": 0.9397612983674117, "grad_norm": 0.90625, "learning_rate": 8.694433788076963e-05, "loss": 0.7798, "step": 35490 }, { "epoch": 0.9397877779757061, "grad_norm": 0.84765625, "learning_rate": 8.693977355799163e-05, "loss": 0.8542, "step": 35491 }, { "epoch": 0.9398142575840005, "grad_norm": 0.75, "learning_rate": 8.693520926289405e-05, "loss": 0.8691, "step": 35492 }, { "epoch": 0.9398407371922949, "grad_norm": 0.8203125, "learning_rate": 8.693064499548653e-05, "loss": 0.8366, "step": 35493 }, { "epoch": 0.9398672168005893, "grad_norm": 0.82421875, "learning_rate": 8.692608075577873e-05, "loss": 0.777, "step": 35494 }, { "epoch": 0.9398936964088836, "grad_norm": 0.82421875, "learning_rate": 8.692151654378038e-05, "loss": 0.8436, "step": 35495 }, { "epoch": 0.939920176017178, "grad_norm": 0.80859375, "learning_rate": 8.691695235950115e-05, "loss": 0.8924, "step": 35496 }, { "epoch": 0.9399466556254724, "grad_norm": 0.87109375, "learning_rate": 8.691238820295069e-05, "loss": 0.8207, "step": 35497 }, { "epoch": 0.9399731352337668, "grad_norm": 0.796875, "learning_rate": 8.690782407413865e-05, "loss": 0.8768, "step": 35498 }, { "epoch": 0.9399996148420612, "grad_norm": 0.8046875, "learning_rate": 8.690325997307472e-05, "loss": 0.8163, "step": 35499 }, { "epoch": 0.9400260944503556, "grad_norm": 0.73828125, "learning_rate": 8.68986958997686e-05, "loss": 0.8709, "step": 35500 }, { "epoch": 0.94005257405865, "grad_norm": 0.79296875, "learning_rate": 8.689413185422995e-05, "loss": 0.7927, "step": 35501 }, { "epoch": 0.9400790536669443, "grad_norm": 0.77734375, "learning_rate": 8.688956783646844e-05, "loss": 0.702, "step": 35502 }, { "epoch": 0.9401055332752387, "grad_norm": 0.7734375, "learning_rate": 8.688500384649375e-05, "loss": 0.8155, "step": 35503 }, { "epoch": 0.9401320128835331, "grad_norm": 0.73046875, "learning_rate": 8.688043988431552e-05, "loss": 0.8398, "step": 35504 }, { "epoch": 0.9401584924918275, "grad_norm": 0.7734375, "learning_rate": 8.687587594994344e-05, "loss": 0.8769, "step": 35505 }, { "epoch": 0.9401849721001218, "grad_norm": 0.78125, "learning_rate": 8.687131204338721e-05, "loss": 0.818, "step": 35506 }, { "epoch": 0.9402114517084161, "grad_norm": 0.9296875, "learning_rate": 8.68667481646565e-05, "loss": 0.8238, "step": 35507 }, { "epoch": 0.9402379313167105, "grad_norm": 0.78125, "learning_rate": 8.686218431376096e-05, "loss": 0.781, "step": 35508 }, { "epoch": 0.9402644109250049, "grad_norm": 0.74609375, "learning_rate": 8.685762049071023e-05, "loss": 0.827, "step": 35509 }, { "epoch": 0.9402908905332993, "grad_norm": 0.79296875, "learning_rate": 8.685305669551406e-05, "loss": 0.7691, "step": 35510 }, { "epoch": 0.9403173701415937, "grad_norm": 0.8828125, "learning_rate": 8.68484929281821e-05, "loss": 0.794, "step": 35511 }, { "epoch": 0.940343849749888, "grad_norm": 0.9765625, "learning_rate": 8.684392918872398e-05, "loss": 0.8917, "step": 35512 }, { "epoch": 0.9403703293581824, "grad_norm": 0.828125, "learning_rate": 8.683936547714941e-05, "loss": 0.8838, "step": 35513 }, { "epoch": 0.9403968089664768, "grad_norm": 0.734375, "learning_rate": 8.683480179346801e-05, "loss": 0.7415, "step": 35514 }, { "epoch": 0.9404232885747712, "grad_norm": 0.8203125, "learning_rate": 8.683023813768955e-05, "loss": 0.7504, "step": 35515 }, { "epoch": 0.9404497681830656, "grad_norm": 0.953125, "learning_rate": 8.682567450982363e-05, "loss": 0.8263, "step": 35516 }, { "epoch": 0.94047624779136, "grad_norm": 0.83984375, "learning_rate": 8.682111090987994e-05, "loss": 0.9313, "step": 35517 }, { "epoch": 0.9405027273996543, "grad_norm": 0.77734375, "learning_rate": 8.681654733786817e-05, "loss": 0.7998, "step": 35518 }, { "epoch": 0.9405292070079487, "grad_norm": 1.0078125, "learning_rate": 8.681198379379791e-05, "loss": 0.8664, "step": 35519 }, { "epoch": 0.9405556866162431, "grad_norm": 0.765625, "learning_rate": 8.680742027767895e-05, "loss": 0.8165, "step": 35520 }, { "epoch": 0.9405821662245375, "grad_norm": 0.796875, "learning_rate": 8.680285678952091e-05, "loss": 0.7337, "step": 35521 }, { "epoch": 0.9406086458328319, "grad_norm": 0.7578125, "learning_rate": 8.679829332933346e-05, "loss": 0.7572, "step": 35522 }, { "epoch": 0.9406351254411262, "grad_norm": 0.78515625, "learning_rate": 8.679372989712628e-05, "loss": 0.7867, "step": 35523 }, { "epoch": 0.9406616050494205, "grad_norm": 0.77734375, "learning_rate": 8.678916649290898e-05, "loss": 0.7964, "step": 35524 }, { "epoch": 0.9406880846577149, "grad_norm": 0.80859375, "learning_rate": 8.678460311669133e-05, "loss": 0.7854, "step": 35525 }, { "epoch": 0.9407145642660093, "grad_norm": 0.69140625, "learning_rate": 8.678003976848296e-05, "loss": 0.699, "step": 35526 }, { "epoch": 0.9407410438743037, "grad_norm": 0.734375, "learning_rate": 8.677547644829353e-05, "loss": 0.6677, "step": 35527 }, { "epoch": 0.9407675234825981, "grad_norm": 0.78515625, "learning_rate": 8.677091315613275e-05, "loss": 0.7465, "step": 35528 }, { "epoch": 0.9407940030908925, "grad_norm": 0.75390625, "learning_rate": 8.676634989201022e-05, "loss": 0.8187, "step": 35529 }, { "epoch": 0.9408204826991868, "grad_norm": 0.79296875, "learning_rate": 8.676178665593568e-05, "loss": 0.7287, "step": 35530 }, { "epoch": 0.9408469623074812, "grad_norm": 0.80078125, "learning_rate": 8.675722344791877e-05, "loss": 0.7247, "step": 35531 }, { "epoch": 0.9408734419157756, "grad_norm": 0.76171875, "learning_rate": 8.675266026796919e-05, "loss": 0.7342, "step": 35532 }, { "epoch": 0.94089992152407, "grad_norm": 0.75, "learning_rate": 8.674809711609657e-05, "loss": 0.7696, "step": 35533 }, { "epoch": 0.9409264011323644, "grad_norm": 0.78515625, "learning_rate": 8.674353399231057e-05, "loss": 0.796, "step": 35534 }, { "epoch": 0.9409528807406587, "grad_norm": 0.76171875, "learning_rate": 8.673897089662092e-05, "loss": 0.6744, "step": 35535 }, { "epoch": 0.9409793603489531, "grad_norm": 0.7734375, "learning_rate": 8.673440782903728e-05, "loss": 0.7775, "step": 35536 }, { "epoch": 0.9410058399572475, "grad_norm": 0.80859375, "learning_rate": 8.672984478956929e-05, "loss": 0.7835, "step": 35537 }, { "epoch": 0.9410323195655419, "grad_norm": 0.8046875, "learning_rate": 8.672528177822664e-05, "loss": 0.8102, "step": 35538 }, { "epoch": 0.9410587991738362, "grad_norm": 0.76171875, "learning_rate": 8.672071879501894e-05, "loss": 0.6051, "step": 35539 }, { "epoch": 0.9410852787821306, "grad_norm": 0.72265625, "learning_rate": 8.6716155839956e-05, "loss": 0.6604, "step": 35540 }, { "epoch": 0.9411117583904249, "grad_norm": 0.80078125, "learning_rate": 8.671159291304738e-05, "loss": 0.8248, "step": 35541 }, { "epoch": 0.9411382379987193, "grad_norm": 0.734375, "learning_rate": 8.670703001430278e-05, "loss": 0.8278, "step": 35542 }, { "epoch": 0.9411647176070137, "grad_norm": 0.7734375, "learning_rate": 8.670246714373187e-05, "loss": 0.9386, "step": 35543 }, { "epoch": 0.9411911972153081, "grad_norm": 0.75, "learning_rate": 8.669790430134428e-05, "loss": 0.7534, "step": 35544 }, { "epoch": 0.9412176768236025, "grad_norm": 0.76171875, "learning_rate": 8.669334148714976e-05, "loss": 0.8349, "step": 35545 }, { "epoch": 0.9412441564318969, "grad_norm": 0.7890625, "learning_rate": 8.668877870115795e-05, "loss": 0.7817, "step": 35546 }, { "epoch": 0.9412706360401912, "grad_norm": 0.71484375, "learning_rate": 8.668421594337851e-05, "loss": 0.6702, "step": 35547 }, { "epoch": 0.9412971156484856, "grad_norm": 0.71875, "learning_rate": 8.667965321382112e-05, "loss": 0.7491, "step": 35548 }, { "epoch": 0.94132359525678, "grad_norm": 0.828125, "learning_rate": 8.667509051249541e-05, "loss": 0.7827, "step": 35549 }, { "epoch": 0.9413500748650744, "grad_norm": 0.74609375, "learning_rate": 8.667052783941112e-05, "loss": 0.647, "step": 35550 }, { "epoch": 0.9413765544733688, "grad_norm": 0.8125, "learning_rate": 8.666596519457787e-05, "loss": 0.756, "step": 35551 }, { "epoch": 0.9414030340816631, "grad_norm": 0.7890625, "learning_rate": 8.666140257800534e-05, "loss": 0.8157, "step": 35552 }, { "epoch": 0.9414295136899575, "grad_norm": 0.73828125, "learning_rate": 8.665683998970322e-05, "loss": 0.7003, "step": 35553 }, { "epoch": 0.9414559932982519, "grad_norm": 0.984375, "learning_rate": 8.665227742968111e-05, "loss": 0.7691, "step": 35554 }, { "epoch": 0.9414824729065462, "grad_norm": 0.765625, "learning_rate": 8.66477148979488e-05, "loss": 0.7157, "step": 35555 }, { "epoch": 0.9415089525148406, "grad_norm": 0.79296875, "learning_rate": 8.664315239451587e-05, "loss": 0.8078, "step": 35556 }, { "epoch": 0.941535432123135, "grad_norm": 0.765625, "learning_rate": 8.663858991939203e-05, "loss": 0.7928, "step": 35557 }, { "epoch": 0.9415619117314293, "grad_norm": 0.74609375, "learning_rate": 8.663402747258693e-05, "loss": 0.6584, "step": 35558 }, { "epoch": 0.9415883913397237, "grad_norm": 0.7265625, "learning_rate": 8.66294650541102e-05, "loss": 0.7452, "step": 35559 }, { "epoch": 0.9416148709480181, "grad_norm": 0.80078125, "learning_rate": 8.662490266397161e-05, "loss": 0.8749, "step": 35560 }, { "epoch": 0.9416413505563125, "grad_norm": 0.79296875, "learning_rate": 8.662034030218076e-05, "loss": 0.8485, "step": 35561 }, { "epoch": 0.9416678301646069, "grad_norm": 0.7265625, "learning_rate": 8.661577796874734e-05, "loss": 0.7054, "step": 35562 }, { "epoch": 0.9416943097729013, "grad_norm": 0.80859375, "learning_rate": 8.6611215663681e-05, "loss": 0.7832, "step": 35563 }, { "epoch": 0.9417207893811956, "grad_norm": 0.9140625, "learning_rate": 8.660665338699145e-05, "loss": 0.7061, "step": 35564 }, { "epoch": 0.94174726898949, "grad_norm": 1.890625, "learning_rate": 8.660209113868827e-05, "loss": 0.776, "step": 35565 }, { "epoch": 0.9417737485977844, "grad_norm": 0.75390625, "learning_rate": 8.659752891878123e-05, "loss": 0.7342, "step": 35566 }, { "epoch": 0.9418002282060788, "grad_norm": 0.8828125, "learning_rate": 8.659296672727997e-05, "loss": 0.7114, "step": 35567 }, { "epoch": 0.9418267078143732, "grad_norm": 0.84765625, "learning_rate": 8.658840456419416e-05, "loss": 0.7995, "step": 35568 }, { "epoch": 0.9418531874226675, "grad_norm": 0.7890625, "learning_rate": 8.658384242953345e-05, "loss": 0.7943, "step": 35569 }, { "epoch": 0.9418796670309619, "grad_norm": 0.8046875, "learning_rate": 8.65792803233075e-05, "loss": 0.7784, "step": 35570 }, { "epoch": 0.9419061466392563, "grad_norm": 0.75390625, "learning_rate": 8.657471824552602e-05, "loss": 0.8696, "step": 35571 }, { "epoch": 0.9419326262475506, "grad_norm": 0.80078125, "learning_rate": 8.657015619619864e-05, "loss": 0.7834, "step": 35572 }, { "epoch": 0.941959105855845, "grad_norm": 0.79296875, "learning_rate": 8.656559417533505e-05, "loss": 0.7926, "step": 35573 }, { "epoch": 0.9419855854641394, "grad_norm": 0.8359375, "learning_rate": 8.656103218294493e-05, "loss": 0.8631, "step": 35574 }, { "epoch": 0.9420120650724337, "grad_norm": 0.796875, "learning_rate": 8.65564702190379e-05, "loss": 0.777, "step": 35575 }, { "epoch": 0.9420385446807281, "grad_norm": 0.8359375, "learning_rate": 8.65519082836237e-05, "loss": 0.8495, "step": 35576 }, { "epoch": 0.9420650242890225, "grad_norm": 0.72265625, "learning_rate": 8.654734637671196e-05, "loss": 0.7563, "step": 35577 }, { "epoch": 0.9420915038973169, "grad_norm": 0.86328125, "learning_rate": 8.654278449831234e-05, "loss": 0.7595, "step": 35578 }, { "epoch": 0.9421179835056113, "grad_norm": 0.83203125, "learning_rate": 8.653822264843454e-05, "loss": 0.7834, "step": 35579 }, { "epoch": 0.9421444631139057, "grad_norm": 0.89453125, "learning_rate": 8.653366082708815e-05, "loss": 0.8059, "step": 35580 }, { "epoch": 0.9421709427222, "grad_norm": 0.6953125, "learning_rate": 8.652909903428293e-05, "loss": 0.7263, "step": 35581 }, { "epoch": 0.9421974223304944, "grad_norm": 0.78125, "learning_rate": 8.652453727002854e-05, "loss": 0.7872, "step": 35582 }, { "epoch": 0.9422239019387888, "grad_norm": 0.80859375, "learning_rate": 8.651997553433459e-05, "loss": 0.7062, "step": 35583 }, { "epoch": 0.9422503815470832, "grad_norm": 0.734375, "learning_rate": 8.651541382721082e-05, "loss": 0.8145, "step": 35584 }, { "epoch": 0.9422768611553776, "grad_norm": 0.86328125, "learning_rate": 8.651085214866679e-05, "loss": 0.8775, "step": 35585 }, { "epoch": 0.942303340763672, "grad_norm": 0.74609375, "learning_rate": 8.650629049871229e-05, "loss": 0.7801, "step": 35586 }, { "epoch": 0.9423298203719663, "grad_norm": 0.7578125, "learning_rate": 8.650172887735693e-05, "loss": 0.6849, "step": 35587 }, { "epoch": 0.9423562999802606, "grad_norm": 0.82421875, "learning_rate": 8.649716728461038e-05, "loss": 0.8074, "step": 35588 }, { "epoch": 0.942382779588555, "grad_norm": 0.78515625, "learning_rate": 8.649260572048233e-05, "loss": 0.6695, "step": 35589 }, { "epoch": 0.9424092591968494, "grad_norm": 0.79296875, "learning_rate": 8.648804418498239e-05, "loss": 0.7172, "step": 35590 }, { "epoch": 0.9424357388051438, "grad_norm": 0.8203125, "learning_rate": 8.648348267812027e-05, "loss": 0.8341, "step": 35591 }, { "epoch": 0.9424622184134381, "grad_norm": 0.796875, "learning_rate": 8.647892119990566e-05, "loss": 0.8239, "step": 35592 }, { "epoch": 0.9424886980217325, "grad_norm": 0.76171875, "learning_rate": 8.647435975034822e-05, "loss": 0.8195, "step": 35593 }, { "epoch": 0.9425151776300269, "grad_norm": 0.8125, "learning_rate": 8.646979832945758e-05, "loss": 0.7708, "step": 35594 }, { "epoch": 0.9425416572383213, "grad_norm": 0.69140625, "learning_rate": 8.64652369372434e-05, "loss": 0.7597, "step": 35595 }, { "epoch": 0.9425681368466157, "grad_norm": 0.75390625, "learning_rate": 8.646067557371543e-05, "loss": 0.8225, "step": 35596 }, { "epoch": 0.94259461645491, "grad_norm": 0.80078125, "learning_rate": 8.645611423888327e-05, "loss": 0.7998, "step": 35597 }, { "epoch": 0.9426210960632044, "grad_norm": 0.75390625, "learning_rate": 8.645155293275659e-05, "loss": 0.7372, "step": 35598 }, { "epoch": 0.9426475756714988, "grad_norm": 0.78515625, "learning_rate": 8.644699165534508e-05, "loss": 0.8403, "step": 35599 }, { "epoch": 0.9426740552797932, "grad_norm": 0.76171875, "learning_rate": 8.644243040665836e-05, "loss": 0.792, "step": 35600 }, { "epoch": 0.9427005348880876, "grad_norm": 0.86328125, "learning_rate": 8.643786918670617e-05, "loss": 0.8314, "step": 35601 }, { "epoch": 0.942727014496382, "grad_norm": 0.90625, "learning_rate": 8.643330799549814e-05, "loss": 0.6899, "step": 35602 }, { "epoch": 0.9427534941046763, "grad_norm": 0.85546875, "learning_rate": 8.642874683304394e-05, "loss": 0.9921, "step": 35603 }, { "epoch": 0.9427799737129706, "grad_norm": 0.8203125, "learning_rate": 8.642418569935323e-05, "loss": 0.8641, "step": 35604 }, { "epoch": 0.942806453321265, "grad_norm": 0.8359375, "learning_rate": 8.641962459443565e-05, "loss": 0.7791, "step": 35605 }, { "epoch": 0.9428329329295594, "grad_norm": 0.7734375, "learning_rate": 8.641506351830094e-05, "loss": 0.7203, "step": 35606 }, { "epoch": 0.9428594125378538, "grad_norm": 0.76171875, "learning_rate": 8.641050247095872e-05, "loss": 0.8752, "step": 35607 }, { "epoch": 0.9428858921461482, "grad_norm": 0.84765625, "learning_rate": 8.640594145241867e-05, "loss": 0.841, "step": 35608 }, { "epoch": 0.9429123717544425, "grad_norm": 0.8125, "learning_rate": 8.640138046269043e-05, "loss": 0.7413, "step": 35609 }, { "epoch": 0.9429388513627369, "grad_norm": 0.7734375, "learning_rate": 8.639681950178367e-05, "loss": 0.7926, "step": 35610 }, { "epoch": 0.9429653309710313, "grad_norm": 0.8828125, "learning_rate": 8.63922585697081e-05, "loss": 0.7542, "step": 35611 }, { "epoch": 0.9429918105793257, "grad_norm": 0.78125, "learning_rate": 8.638769766647336e-05, "loss": 0.8748, "step": 35612 }, { "epoch": 0.9430182901876201, "grad_norm": 0.77734375, "learning_rate": 8.638313679208912e-05, "loss": 0.7075, "step": 35613 }, { "epoch": 0.9430447697959145, "grad_norm": 0.7890625, "learning_rate": 8.637857594656505e-05, "loss": 0.883, "step": 35614 }, { "epoch": 0.9430712494042088, "grad_norm": 0.69921875, "learning_rate": 8.637401512991079e-05, "loss": 0.6577, "step": 35615 }, { "epoch": 0.9430977290125032, "grad_norm": 0.68359375, "learning_rate": 8.636945434213603e-05, "loss": 0.6436, "step": 35616 }, { "epoch": 0.9431242086207976, "grad_norm": 0.72265625, "learning_rate": 8.636489358325046e-05, "loss": 0.7416, "step": 35617 }, { "epoch": 0.943150688229092, "grad_norm": 0.7578125, "learning_rate": 8.63603328532637e-05, "loss": 0.8862, "step": 35618 }, { "epoch": 0.9431771678373864, "grad_norm": 0.82421875, "learning_rate": 8.635577215218545e-05, "loss": 0.8429, "step": 35619 }, { "epoch": 0.9432036474456807, "grad_norm": 0.74609375, "learning_rate": 8.635121148002529e-05, "loss": 0.7801, "step": 35620 }, { "epoch": 0.943230127053975, "grad_norm": 0.76171875, "learning_rate": 8.634665083679302e-05, "loss": 0.831, "step": 35621 }, { "epoch": 0.9432566066622694, "grad_norm": 0.859375, "learning_rate": 8.634209022249824e-05, "loss": 0.7736, "step": 35622 }, { "epoch": 0.9432830862705638, "grad_norm": 0.765625, "learning_rate": 8.633752963715062e-05, "loss": 0.8502, "step": 35623 }, { "epoch": 0.9433095658788582, "grad_norm": 0.81640625, "learning_rate": 8.633296908075982e-05, "loss": 0.786, "step": 35624 }, { "epoch": 0.9433360454871526, "grad_norm": 0.765625, "learning_rate": 8.632840855333547e-05, "loss": 0.7872, "step": 35625 }, { "epoch": 0.9433625250954469, "grad_norm": 0.7734375, "learning_rate": 8.632384805488732e-05, "loss": 0.8067, "step": 35626 }, { "epoch": 0.9433890047037413, "grad_norm": 0.6953125, "learning_rate": 8.6319287585425e-05, "loss": 0.7699, "step": 35627 }, { "epoch": 0.9434154843120357, "grad_norm": 0.70703125, "learning_rate": 8.631472714495817e-05, "loss": 0.8227, "step": 35628 }, { "epoch": 0.9434419639203301, "grad_norm": 0.85546875, "learning_rate": 8.631016673349648e-05, "loss": 0.8129, "step": 35629 }, { "epoch": 0.9434684435286245, "grad_norm": 0.83203125, "learning_rate": 8.630560635104956e-05, "loss": 0.7438, "step": 35630 }, { "epoch": 0.9434949231369189, "grad_norm": 0.7421875, "learning_rate": 8.630104599762719e-05, "loss": 0.6933, "step": 35631 }, { "epoch": 0.9435214027452132, "grad_norm": 0.81640625, "learning_rate": 8.629648567323896e-05, "loss": 0.9004, "step": 35632 }, { "epoch": 0.9435478823535076, "grad_norm": 0.80859375, "learning_rate": 8.629192537789454e-05, "loss": 0.7226, "step": 35633 }, { "epoch": 0.943574361961802, "grad_norm": 0.80078125, "learning_rate": 8.62873651116036e-05, "loss": 0.7509, "step": 35634 }, { "epoch": 0.9436008415700964, "grad_norm": 0.75, "learning_rate": 8.628280487437579e-05, "loss": 0.7535, "step": 35635 }, { "epoch": 0.9436273211783908, "grad_norm": 0.97265625, "learning_rate": 8.627824466622081e-05, "loss": 0.8201, "step": 35636 }, { "epoch": 0.943653800786685, "grad_norm": 0.78515625, "learning_rate": 8.627368448714831e-05, "loss": 0.7987, "step": 35637 }, { "epoch": 0.9436802803949794, "grad_norm": 0.78125, "learning_rate": 8.626912433716795e-05, "loss": 0.7264, "step": 35638 }, { "epoch": 0.9437067600032738, "grad_norm": 0.75, "learning_rate": 8.626456421628938e-05, "loss": 0.6662, "step": 35639 }, { "epoch": 0.9437332396115682, "grad_norm": 0.7890625, "learning_rate": 8.626000412452226e-05, "loss": 0.7558, "step": 35640 }, { "epoch": 0.9437597192198626, "grad_norm": 0.765625, "learning_rate": 8.625544406187632e-05, "loss": 0.7344, "step": 35641 }, { "epoch": 0.943786198828157, "grad_norm": 0.8125, "learning_rate": 8.625088402836118e-05, "loss": 0.7352, "step": 35642 }, { "epoch": 0.9438126784364513, "grad_norm": 0.84375, "learning_rate": 8.624632402398649e-05, "loss": 0.7282, "step": 35643 }, { "epoch": 0.9438391580447457, "grad_norm": 0.8515625, "learning_rate": 8.624176404876194e-05, "loss": 0.6849, "step": 35644 }, { "epoch": 0.9438656376530401, "grad_norm": 0.671875, "learning_rate": 8.623720410269714e-05, "loss": 0.7647, "step": 35645 }, { "epoch": 0.9438921172613345, "grad_norm": 0.7890625, "learning_rate": 8.623264418580185e-05, "loss": 0.7408, "step": 35646 }, { "epoch": 0.9439185968696289, "grad_norm": 0.74609375, "learning_rate": 8.622808429808567e-05, "loss": 0.7736, "step": 35647 }, { "epoch": 0.9439450764779233, "grad_norm": 0.99609375, "learning_rate": 8.622352443955829e-05, "loss": 0.8401, "step": 35648 }, { "epoch": 0.9439715560862176, "grad_norm": 0.74609375, "learning_rate": 8.621896461022936e-05, "loss": 0.7741, "step": 35649 }, { "epoch": 0.943998035694512, "grad_norm": 0.8203125, "learning_rate": 8.62144048101085e-05, "loss": 0.7005, "step": 35650 }, { "epoch": 0.9440245153028064, "grad_norm": 0.75390625, "learning_rate": 8.620984503920547e-05, "loss": 0.755, "step": 35651 }, { "epoch": 0.9440509949111008, "grad_norm": 0.875, "learning_rate": 8.620528529752987e-05, "loss": 0.8112, "step": 35652 }, { "epoch": 0.944077474519395, "grad_norm": 0.7578125, "learning_rate": 8.62007255850914e-05, "loss": 0.6913, "step": 35653 }, { "epoch": 0.9441039541276894, "grad_norm": 0.82421875, "learning_rate": 8.619616590189968e-05, "loss": 0.7723, "step": 35654 }, { "epoch": 0.9441304337359838, "grad_norm": 0.8359375, "learning_rate": 8.619160624796439e-05, "loss": 0.7779, "step": 35655 }, { "epoch": 0.9441569133442782, "grad_norm": 0.8125, "learning_rate": 8.618704662329522e-05, "loss": 0.7425, "step": 35656 }, { "epoch": 0.9441833929525726, "grad_norm": 0.765625, "learning_rate": 8.618248702790181e-05, "loss": 0.8143, "step": 35657 }, { "epoch": 0.944209872560867, "grad_norm": 0.8046875, "learning_rate": 8.617792746179383e-05, "loss": 0.7851, "step": 35658 }, { "epoch": 0.9442363521691614, "grad_norm": 0.7265625, "learning_rate": 8.617336792498094e-05, "loss": 0.7628, "step": 35659 }, { "epoch": 0.9442628317774557, "grad_norm": 0.76953125, "learning_rate": 8.61688084174728e-05, "loss": 0.8271, "step": 35660 }, { "epoch": 0.9442893113857501, "grad_norm": 0.765625, "learning_rate": 8.616424893927909e-05, "loss": 0.7114, "step": 35661 }, { "epoch": 0.9443157909940445, "grad_norm": 0.765625, "learning_rate": 8.615968949040947e-05, "loss": 0.6988, "step": 35662 }, { "epoch": 0.9443422706023389, "grad_norm": 0.83984375, "learning_rate": 8.615513007087359e-05, "loss": 0.7889, "step": 35663 }, { "epoch": 0.9443687502106333, "grad_norm": 0.77734375, "learning_rate": 8.615057068068113e-05, "loss": 0.7106, "step": 35664 }, { "epoch": 0.9443952298189277, "grad_norm": 0.8671875, "learning_rate": 8.61460113198417e-05, "loss": 0.8581, "step": 35665 }, { "epoch": 0.944421709427222, "grad_norm": 0.7734375, "learning_rate": 8.614145198836504e-05, "loss": 0.7194, "step": 35666 }, { "epoch": 0.9444481890355164, "grad_norm": 0.734375, "learning_rate": 8.61368926862608e-05, "loss": 0.6923, "step": 35667 }, { "epoch": 0.9444746686438108, "grad_norm": 0.8515625, "learning_rate": 8.613233341353861e-05, "loss": 0.749, "step": 35668 }, { "epoch": 0.9445011482521052, "grad_norm": 0.80078125, "learning_rate": 8.612777417020815e-05, "loss": 0.7644, "step": 35669 }, { "epoch": 0.9445276278603995, "grad_norm": 0.7578125, "learning_rate": 8.612321495627902e-05, "loss": 0.8014, "step": 35670 }, { "epoch": 0.9445541074686938, "grad_norm": 0.71875, "learning_rate": 8.611865577176101e-05, "loss": 0.6701, "step": 35671 }, { "epoch": 0.9445805870769882, "grad_norm": 0.73828125, "learning_rate": 8.61140966166637e-05, "loss": 0.6801, "step": 35672 }, { "epoch": 0.9446070666852826, "grad_norm": 0.8046875, "learning_rate": 8.610953749099678e-05, "loss": 0.8354, "step": 35673 }, { "epoch": 0.944633546293577, "grad_norm": 0.71484375, "learning_rate": 8.610497839476989e-05, "loss": 0.6962, "step": 35674 }, { "epoch": 0.9446600259018714, "grad_norm": 0.78125, "learning_rate": 8.610041932799269e-05, "loss": 0.7207, "step": 35675 }, { "epoch": 0.9446865055101658, "grad_norm": 0.734375, "learning_rate": 8.609586029067485e-05, "loss": 0.8289, "step": 35676 }, { "epoch": 0.9447129851184601, "grad_norm": 0.76171875, "learning_rate": 8.609130128282607e-05, "loss": 0.7644, "step": 35677 }, { "epoch": 0.9447394647267545, "grad_norm": 0.8125, "learning_rate": 8.608674230445597e-05, "loss": 0.7592, "step": 35678 }, { "epoch": 0.9447659443350489, "grad_norm": 0.75390625, "learning_rate": 8.608218335557423e-05, "loss": 0.8326, "step": 35679 }, { "epoch": 0.9447924239433433, "grad_norm": 0.703125, "learning_rate": 8.60776244361905e-05, "loss": 0.7258, "step": 35680 }, { "epoch": 0.9448189035516377, "grad_norm": 1.125, "learning_rate": 8.607306554631444e-05, "loss": 0.8737, "step": 35681 }, { "epoch": 0.944845383159932, "grad_norm": 0.74609375, "learning_rate": 8.606850668595576e-05, "loss": 0.7702, "step": 35682 }, { "epoch": 0.9448718627682264, "grad_norm": 0.66015625, "learning_rate": 8.606394785512407e-05, "loss": 0.6356, "step": 35683 }, { "epoch": 0.9448983423765208, "grad_norm": 0.8359375, "learning_rate": 8.605938905382903e-05, "loss": 0.8004, "step": 35684 }, { "epoch": 0.9449248219848152, "grad_norm": 0.76953125, "learning_rate": 8.605483028208028e-05, "loss": 0.7596, "step": 35685 }, { "epoch": 0.9449513015931095, "grad_norm": 0.79296875, "learning_rate": 8.605027153988756e-05, "loss": 0.7526, "step": 35686 }, { "epoch": 0.9449777812014039, "grad_norm": 0.78125, "learning_rate": 8.604571282726051e-05, "loss": 0.8088, "step": 35687 }, { "epoch": 0.9450042608096982, "grad_norm": 0.76171875, "learning_rate": 8.604115414420876e-05, "loss": 0.7802, "step": 35688 }, { "epoch": 0.9450307404179926, "grad_norm": 0.7421875, "learning_rate": 8.603659549074199e-05, "loss": 0.8047, "step": 35689 }, { "epoch": 0.945057220026287, "grad_norm": 0.74609375, "learning_rate": 8.603203686686982e-05, "loss": 0.8067, "step": 35690 }, { "epoch": 0.9450836996345814, "grad_norm": 0.80078125, "learning_rate": 8.602747827260198e-05, "loss": 0.7554, "step": 35691 }, { "epoch": 0.9451101792428758, "grad_norm": 0.796875, "learning_rate": 8.602291970794812e-05, "loss": 0.7839, "step": 35692 }, { "epoch": 0.9451366588511702, "grad_norm": 0.75390625, "learning_rate": 8.601836117291786e-05, "loss": 0.7845, "step": 35693 }, { "epoch": 0.9451631384594645, "grad_norm": 0.77734375, "learning_rate": 8.60138026675209e-05, "loss": 0.7974, "step": 35694 }, { "epoch": 0.9451896180677589, "grad_norm": 0.79296875, "learning_rate": 8.600924419176683e-05, "loss": 0.8133, "step": 35695 }, { "epoch": 0.9452160976760533, "grad_norm": 0.78515625, "learning_rate": 8.600468574566542e-05, "loss": 0.6575, "step": 35696 }, { "epoch": 0.9452425772843477, "grad_norm": 0.7734375, "learning_rate": 8.600012732922628e-05, "loss": 0.8644, "step": 35697 }, { "epoch": 0.9452690568926421, "grad_norm": 0.71484375, "learning_rate": 8.599556894245907e-05, "loss": 0.6674, "step": 35698 }, { "epoch": 0.9452955365009365, "grad_norm": 0.7734375, "learning_rate": 8.599101058537345e-05, "loss": 0.826, "step": 35699 }, { "epoch": 0.9453220161092308, "grad_norm": 0.84765625, "learning_rate": 8.598645225797906e-05, "loss": 0.802, "step": 35700 }, { "epoch": 0.9453484957175252, "grad_norm": 0.77734375, "learning_rate": 8.59818939602856e-05, "loss": 0.718, "step": 35701 }, { "epoch": 0.9453749753258195, "grad_norm": 0.8125, "learning_rate": 8.597733569230272e-05, "loss": 0.7508, "step": 35702 }, { "epoch": 0.9454014549341139, "grad_norm": 0.86328125, "learning_rate": 8.597277745404008e-05, "loss": 0.8701, "step": 35703 }, { "epoch": 0.9454279345424083, "grad_norm": 0.9296875, "learning_rate": 8.596821924550733e-05, "loss": 0.6944, "step": 35704 }, { "epoch": 0.9454544141507026, "grad_norm": 0.8046875, "learning_rate": 8.596366106671414e-05, "loss": 0.7614, "step": 35705 }, { "epoch": 0.945480893758997, "grad_norm": 0.7734375, "learning_rate": 8.595910291767012e-05, "loss": 0.9029, "step": 35706 }, { "epoch": 0.9455073733672914, "grad_norm": 0.85546875, "learning_rate": 8.595454479838503e-05, "loss": 0.8625, "step": 35707 }, { "epoch": 0.9455338529755858, "grad_norm": 0.77734375, "learning_rate": 8.594998670886847e-05, "loss": 0.8419, "step": 35708 }, { "epoch": 0.9455603325838802, "grad_norm": 0.71484375, "learning_rate": 8.594542864913011e-05, "loss": 0.819, "step": 35709 }, { "epoch": 0.9455868121921746, "grad_norm": 0.81640625, "learning_rate": 8.594087061917961e-05, "loss": 0.7232, "step": 35710 }, { "epoch": 0.9456132918004689, "grad_norm": 0.765625, "learning_rate": 8.59363126190266e-05, "loss": 0.773, "step": 35711 }, { "epoch": 0.9456397714087633, "grad_norm": 0.765625, "learning_rate": 8.593175464868081e-05, "loss": 0.7263, "step": 35712 }, { "epoch": 0.9456662510170577, "grad_norm": 0.81640625, "learning_rate": 8.592719670815186e-05, "loss": 0.9862, "step": 35713 }, { "epoch": 0.9456927306253521, "grad_norm": 0.73046875, "learning_rate": 8.592263879744941e-05, "loss": 0.7866, "step": 35714 }, { "epoch": 0.9457192102336465, "grad_norm": 0.8125, "learning_rate": 8.591808091658313e-05, "loss": 0.9219, "step": 35715 }, { "epoch": 0.9457456898419409, "grad_norm": 0.7578125, "learning_rate": 8.591352306556261e-05, "loss": 0.6819, "step": 35716 }, { "epoch": 0.9457721694502352, "grad_norm": 1.015625, "learning_rate": 8.590896524439764e-05, "loss": 0.8355, "step": 35717 }, { "epoch": 0.9457986490585296, "grad_norm": 0.7421875, "learning_rate": 8.59044074530978e-05, "loss": 0.6914, "step": 35718 }, { "epoch": 0.9458251286668239, "grad_norm": 0.75390625, "learning_rate": 8.589984969167276e-05, "loss": 0.7713, "step": 35719 }, { "epoch": 0.9458516082751183, "grad_norm": 0.7421875, "learning_rate": 8.58952919601322e-05, "loss": 0.7916, "step": 35720 }, { "epoch": 0.9458780878834127, "grad_norm": 0.80859375, "learning_rate": 8.589073425848572e-05, "loss": 0.7784, "step": 35721 }, { "epoch": 0.945904567491707, "grad_norm": 0.84765625, "learning_rate": 8.588617658674306e-05, "loss": 0.8331, "step": 35722 }, { "epoch": 0.9459310471000014, "grad_norm": 0.69921875, "learning_rate": 8.588161894491384e-05, "loss": 0.7623, "step": 35723 }, { "epoch": 0.9459575267082958, "grad_norm": 0.8046875, "learning_rate": 8.58770613330077e-05, "loss": 0.7348, "step": 35724 }, { "epoch": 0.9459840063165902, "grad_norm": 0.8359375, "learning_rate": 8.587250375103433e-05, "loss": 0.7745, "step": 35725 }, { "epoch": 0.9460104859248846, "grad_norm": 0.796875, "learning_rate": 8.586794619900335e-05, "loss": 0.9393, "step": 35726 }, { "epoch": 0.946036965533179, "grad_norm": 0.85546875, "learning_rate": 8.58633886769245e-05, "loss": 0.7009, "step": 35727 }, { "epoch": 0.9460634451414733, "grad_norm": 0.73828125, "learning_rate": 8.585883118480737e-05, "loss": 0.7925, "step": 35728 }, { "epoch": 0.9460899247497677, "grad_norm": 0.75, "learning_rate": 8.585427372266166e-05, "loss": 0.6733, "step": 35729 }, { "epoch": 0.9461164043580621, "grad_norm": 0.76953125, "learning_rate": 8.584971629049698e-05, "loss": 0.7781, "step": 35730 }, { "epoch": 0.9461428839663565, "grad_norm": 0.71484375, "learning_rate": 8.5845158888323e-05, "loss": 0.7693, "step": 35731 }, { "epoch": 0.9461693635746509, "grad_norm": 0.796875, "learning_rate": 8.584060151614944e-05, "loss": 0.7738, "step": 35732 }, { "epoch": 0.9461958431829453, "grad_norm": 0.828125, "learning_rate": 8.583604417398589e-05, "loss": 0.7679, "step": 35733 }, { "epoch": 0.9462223227912396, "grad_norm": 0.703125, "learning_rate": 8.583148686184206e-05, "loss": 0.6889, "step": 35734 }, { "epoch": 0.9462488023995339, "grad_norm": 0.765625, "learning_rate": 8.582692957972758e-05, "loss": 0.8823, "step": 35735 }, { "epoch": 0.9462752820078283, "grad_norm": 0.828125, "learning_rate": 8.582237232765206e-05, "loss": 0.7621, "step": 35736 }, { "epoch": 0.9463017616161227, "grad_norm": 0.70703125, "learning_rate": 8.581781510562526e-05, "loss": 0.732, "step": 35737 }, { "epoch": 0.946328241224417, "grad_norm": 0.8359375, "learning_rate": 8.581325791365678e-05, "loss": 0.8806, "step": 35738 }, { "epoch": 0.9463547208327114, "grad_norm": 0.79296875, "learning_rate": 8.58087007517563e-05, "loss": 0.8019, "step": 35739 }, { "epoch": 0.9463812004410058, "grad_norm": 0.76171875, "learning_rate": 8.580414361993346e-05, "loss": 0.7883, "step": 35740 }, { "epoch": 0.9464076800493002, "grad_norm": 0.90625, "learning_rate": 8.579958651819791e-05, "loss": 0.8735, "step": 35741 }, { "epoch": 0.9464341596575946, "grad_norm": 0.73046875, "learning_rate": 8.579502944655935e-05, "loss": 0.7033, "step": 35742 }, { "epoch": 0.946460639265889, "grad_norm": 0.68359375, "learning_rate": 8.579047240502739e-05, "loss": 0.8351, "step": 35743 }, { "epoch": 0.9464871188741834, "grad_norm": 0.78515625, "learning_rate": 8.578591539361173e-05, "loss": 0.7552, "step": 35744 }, { "epoch": 0.9465135984824777, "grad_norm": 0.78515625, "learning_rate": 8.578135841232201e-05, "loss": 0.7972, "step": 35745 }, { "epoch": 0.9465400780907721, "grad_norm": 0.75, "learning_rate": 8.577680146116787e-05, "loss": 0.7306, "step": 35746 }, { "epoch": 0.9465665576990665, "grad_norm": 0.87109375, "learning_rate": 8.577224454015901e-05, "loss": 0.8133, "step": 35747 }, { "epoch": 0.9465930373073609, "grad_norm": 0.78125, "learning_rate": 8.576768764930505e-05, "loss": 0.8528, "step": 35748 }, { "epoch": 0.9466195169156553, "grad_norm": 0.76171875, "learning_rate": 8.576313078861568e-05, "loss": 0.8019, "step": 35749 }, { "epoch": 0.9466459965239497, "grad_norm": 0.80078125, "learning_rate": 8.575857395810053e-05, "loss": 0.7759, "step": 35750 }, { "epoch": 0.9466724761322439, "grad_norm": 0.75, "learning_rate": 8.575401715776923e-05, "loss": 0.7842, "step": 35751 }, { "epoch": 0.9466989557405383, "grad_norm": 0.76953125, "learning_rate": 8.574946038763152e-05, "loss": 0.736, "step": 35752 }, { "epoch": 0.9467254353488327, "grad_norm": 0.74609375, "learning_rate": 8.5744903647697e-05, "loss": 0.6704, "step": 35753 }, { "epoch": 0.9467519149571271, "grad_norm": 0.7421875, "learning_rate": 8.574034693797537e-05, "loss": 0.7834, "step": 35754 }, { "epoch": 0.9467783945654215, "grad_norm": 1.15625, "learning_rate": 8.573579025847624e-05, "loss": 0.8038, "step": 35755 }, { "epoch": 0.9468048741737158, "grad_norm": 0.7578125, "learning_rate": 8.573123360920924e-05, "loss": 0.8385, "step": 35756 }, { "epoch": 0.9468313537820102, "grad_norm": 0.75390625, "learning_rate": 8.572667699018413e-05, "loss": 0.7851, "step": 35757 }, { "epoch": 0.9468578333903046, "grad_norm": 0.71484375, "learning_rate": 8.572212040141051e-05, "loss": 0.6526, "step": 35758 }, { "epoch": 0.946884312998599, "grad_norm": 0.92578125, "learning_rate": 8.571756384289804e-05, "loss": 0.8172, "step": 35759 }, { "epoch": 0.9469107926068934, "grad_norm": 0.78125, "learning_rate": 8.571300731465637e-05, "loss": 0.7547, "step": 35760 }, { "epoch": 0.9469372722151878, "grad_norm": 0.7578125, "learning_rate": 8.570845081669515e-05, "loss": 0.6922, "step": 35761 }, { "epoch": 0.9469637518234821, "grad_norm": 0.80859375, "learning_rate": 8.570389434902405e-05, "loss": 0.8297, "step": 35762 }, { "epoch": 0.9469902314317765, "grad_norm": 0.8359375, "learning_rate": 8.569933791165274e-05, "loss": 0.8772, "step": 35763 }, { "epoch": 0.9470167110400709, "grad_norm": 0.84765625, "learning_rate": 8.569478150459088e-05, "loss": 0.8075, "step": 35764 }, { "epoch": 0.9470431906483653, "grad_norm": 0.69140625, "learning_rate": 8.569022512784811e-05, "loss": 0.6676, "step": 35765 }, { "epoch": 0.9470696702566597, "grad_norm": 0.8359375, "learning_rate": 8.568566878143407e-05, "loss": 0.9195, "step": 35766 }, { "epoch": 0.947096149864954, "grad_norm": 0.78515625, "learning_rate": 8.568111246535846e-05, "loss": 0.7476, "step": 35767 }, { "epoch": 0.9471226294732483, "grad_norm": 0.81640625, "learning_rate": 8.567655617963091e-05, "loss": 0.7715, "step": 35768 }, { "epoch": 0.9471491090815427, "grad_norm": 0.71875, "learning_rate": 8.567199992426107e-05, "loss": 0.7304, "step": 35769 }, { "epoch": 0.9471755886898371, "grad_norm": 0.8203125, "learning_rate": 8.566744369925861e-05, "loss": 0.8087, "step": 35770 }, { "epoch": 0.9472020682981315, "grad_norm": 0.75390625, "learning_rate": 8.566288750463316e-05, "loss": 0.8011, "step": 35771 }, { "epoch": 0.9472285479064259, "grad_norm": 0.77734375, "learning_rate": 8.565833134039442e-05, "loss": 0.7521, "step": 35772 }, { "epoch": 0.9472550275147202, "grad_norm": 0.77734375, "learning_rate": 8.565377520655204e-05, "loss": 0.8366, "step": 35773 }, { "epoch": 0.9472815071230146, "grad_norm": 0.79296875, "learning_rate": 8.564921910311568e-05, "loss": 0.7078, "step": 35774 }, { "epoch": 0.947307986731309, "grad_norm": 0.80078125, "learning_rate": 8.564466303009495e-05, "loss": 0.7976, "step": 35775 }, { "epoch": 0.9473344663396034, "grad_norm": 0.859375, "learning_rate": 8.564010698749951e-05, "loss": 0.7788, "step": 35776 }, { "epoch": 0.9473609459478978, "grad_norm": 0.828125, "learning_rate": 8.563555097533907e-05, "loss": 0.8778, "step": 35777 }, { "epoch": 0.9473874255561922, "grad_norm": 0.73046875, "learning_rate": 8.563099499362327e-05, "loss": 0.6685, "step": 35778 }, { "epoch": 0.9474139051644865, "grad_norm": 0.76953125, "learning_rate": 8.562643904236176e-05, "loss": 0.8645, "step": 35779 }, { "epoch": 0.9474403847727809, "grad_norm": 0.859375, "learning_rate": 8.562188312156418e-05, "loss": 0.8947, "step": 35780 }, { "epoch": 0.9474668643810753, "grad_norm": 0.8828125, "learning_rate": 8.561732723124017e-05, "loss": 0.7558, "step": 35781 }, { "epoch": 0.9474933439893697, "grad_norm": 0.7265625, "learning_rate": 8.561277137139943e-05, "loss": 0.7291, "step": 35782 }, { "epoch": 0.9475198235976641, "grad_norm": 0.78515625, "learning_rate": 8.560821554205161e-05, "loss": 0.7779, "step": 35783 }, { "epoch": 0.9475463032059583, "grad_norm": 0.796875, "learning_rate": 8.560365974320637e-05, "loss": 0.7649, "step": 35784 }, { "epoch": 0.9475727828142527, "grad_norm": 0.74609375, "learning_rate": 8.559910397487333e-05, "loss": 0.8016, "step": 35785 }, { "epoch": 0.9475992624225471, "grad_norm": 0.67578125, "learning_rate": 8.559454823706216e-05, "loss": 0.7162, "step": 35786 }, { "epoch": 0.9476257420308415, "grad_norm": 0.76953125, "learning_rate": 8.558999252978252e-05, "loss": 0.7577, "step": 35787 }, { "epoch": 0.9476522216391359, "grad_norm": 0.7265625, "learning_rate": 8.55854368530441e-05, "loss": 0.7225, "step": 35788 }, { "epoch": 0.9476787012474303, "grad_norm": 0.80859375, "learning_rate": 8.55808812068565e-05, "loss": 0.7514, "step": 35789 }, { "epoch": 0.9477051808557246, "grad_norm": 0.7421875, "learning_rate": 8.55763255912294e-05, "loss": 0.7545, "step": 35790 }, { "epoch": 0.947731660464019, "grad_norm": 0.75390625, "learning_rate": 8.557177000617241e-05, "loss": 0.7645, "step": 35791 }, { "epoch": 0.9477581400723134, "grad_norm": 0.7890625, "learning_rate": 8.556721445169528e-05, "loss": 0.7026, "step": 35792 }, { "epoch": 0.9477846196806078, "grad_norm": 0.8125, "learning_rate": 8.556265892780761e-05, "loss": 0.8273, "step": 35793 }, { "epoch": 0.9478110992889022, "grad_norm": 0.78125, "learning_rate": 8.555810343451907e-05, "loss": 0.7838, "step": 35794 }, { "epoch": 0.9478375788971966, "grad_norm": 0.74609375, "learning_rate": 8.555354797183929e-05, "loss": 0.6832, "step": 35795 }, { "epoch": 0.9478640585054909, "grad_norm": 0.83203125, "learning_rate": 8.55489925397779e-05, "loss": 0.7801, "step": 35796 }, { "epoch": 0.9478905381137853, "grad_norm": 0.765625, "learning_rate": 8.554443713834464e-05, "loss": 0.7457, "step": 35797 }, { "epoch": 0.9479170177220797, "grad_norm": 0.80859375, "learning_rate": 8.55398817675491e-05, "loss": 0.8188, "step": 35798 }, { "epoch": 0.9479434973303741, "grad_norm": 0.7578125, "learning_rate": 8.553532642740098e-05, "loss": 0.7162, "step": 35799 }, { "epoch": 0.9479699769386684, "grad_norm": 0.7734375, "learning_rate": 8.553077111790988e-05, "loss": 0.767, "step": 35800 }, { "epoch": 0.9479964565469627, "grad_norm": 0.75390625, "learning_rate": 8.552621583908548e-05, "loss": 0.7668, "step": 35801 }, { "epoch": 0.9480229361552571, "grad_norm": 0.7734375, "learning_rate": 8.552166059093746e-05, "loss": 0.7943, "step": 35802 }, { "epoch": 0.9480494157635515, "grad_norm": 0.765625, "learning_rate": 8.551710537347545e-05, "loss": 0.847, "step": 35803 }, { "epoch": 0.9480758953718459, "grad_norm": 0.8125, "learning_rate": 8.551255018670912e-05, "loss": 0.7273, "step": 35804 }, { "epoch": 0.9481023749801403, "grad_norm": 0.76953125, "learning_rate": 8.550799503064809e-05, "loss": 0.8998, "step": 35805 }, { "epoch": 0.9481288545884347, "grad_norm": 0.7890625, "learning_rate": 8.550343990530203e-05, "loss": 0.7973, "step": 35806 }, { "epoch": 0.948155334196729, "grad_norm": 0.75, "learning_rate": 8.549888481068063e-05, "loss": 0.852, "step": 35807 }, { "epoch": 0.9481818138050234, "grad_norm": 0.81640625, "learning_rate": 8.54943297467935e-05, "loss": 0.717, "step": 35808 }, { "epoch": 0.9482082934133178, "grad_norm": 0.8046875, "learning_rate": 8.548977471365032e-05, "loss": 0.7097, "step": 35809 }, { "epoch": 0.9482347730216122, "grad_norm": 0.7421875, "learning_rate": 8.548521971126069e-05, "loss": 0.6832, "step": 35810 }, { "epoch": 0.9482612526299066, "grad_norm": 0.7421875, "learning_rate": 8.548066473963432e-05, "loss": 0.7666, "step": 35811 }, { "epoch": 0.948287732238201, "grad_norm": 0.76953125, "learning_rate": 8.547610979878087e-05, "loss": 0.7138, "step": 35812 }, { "epoch": 0.9483142118464953, "grad_norm": 0.84765625, "learning_rate": 8.547155488871e-05, "loss": 0.8121, "step": 35813 }, { "epoch": 0.9483406914547897, "grad_norm": 0.765625, "learning_rate": 8.546700000943131e-05, "loss": 0.8202, "step": 35814 }, { "epoch": 0.9483671710630841, "grad_norm": 0.7421875, "learning_rate": 8.54624451609545e-05, "loss": 0.7049, "step": 35815 }, { "epoch": 0.9483936506713785, "grad_norm": 0.79296875, "learning_rate": 8.545789034328914e-05, "loss": 0.8093, "step": 35816 }, { "epoch": 0.9484201302796728, "grad_norm": 0.91015625, "learning_rate": 8.545333555644501e-05, "loss": 0.711, "step": 35817 }, { "epoch": 0.9484466098879671, "grad_norm": 0.87890625, "learning_rate": 8.54487808004317e-05, "loss": 0.9337, "step": 35818 }, { "epoch": 0.9484730894962615, "grad_norm": 0.765625, "learning_rate": 8.544422607525889e-05, "loss": 0.8095, "step": 35819 }, { "epoch": 0.9484995691045559, "grad_norm": 0.71875, "learning_rate": 8.543967138093618e-05, "loss": 0.719, "step": 35820 }, { "epoch": 0.9485260487128503, "grad_norm": 0.76171875, "learning_rate": 8.543511671747322e-05, "loss": 0.8661, "step": 35821 }, { "epoch": 0.9485525283211447, "grad_norm": 0.84375, "learning_rate": 8.543056208487975e-05, "loss": 0.6524, "step": 35822 }, { "epoch": 0.948579007929439, "grad_norm": 0.76171875, "learning_rate": 8.542600748316536e-05, "loss": 0.7675, "step": 35823 }, { "epoch": 0.9486054875377334, "grad_norm": 0.78125, "learning_rate": 8.542145291233972e-05, "loss": 0.7344, "step": 35824 }, { "epoch": 0.9486319671460278, "grad_norm": 0.87109375, "learning_rate": 8.541689837241247e-05, "loss": 0.7254, "step": 35825 }, { "epoch": 0.9486584467543222, "grad_norm": 0.7265625, "learning_rate": 8.541234386339326e-05, "loss": 0.7994, "step": 35826 }, { "epoch": 0.9486849263626166, "grad_norm": 0.83203125, "learning_rate": 8.540778938529177e-05, "loss": 0.7826, "step": 35827 }, { "epoch": 0.948711405970911, "grad_norm": 0.81640625, "learning_rate": 8.540323493811763e-05, "loss": 0.8192, "step": 35828 }, { "epoch": 0.9487378855792054, "grad_norm": 0.7578125, "learning_rate": 8.539868052188049e-05, "loss": 0.7849, "step": 35829 }, { "epoch": 0.9487643651874997, "grad_norm": 0.75390625, "learning_rate": 8.539412613659003e-05, "loss": 0.7856, "step": 35830 }, { "epoch": 0.9487908447957941, "grad_norm": 0.77734375, "learning_rate": 8.538957178225585e-05, "loss": 0.833, "step": 35831 }, { "epoch": 0.9488173244040885, "grad_norm": 0.859375, "learning_rate": 8.538501745888767e-05, "loss": 0.9296, "step": 35832 }, { "epoch": 0.9488438040123828, "grad_norm": 0.7265625, "learning_rate": 8.53804631664951e-05, "loss": 0.7172, "step": 35833 }, { "epoch": 0.9488702836206772, "grad_norm": 0.75390625, "learning_rate": 8.537590890508782e-05, "loss": 0.7592, "step": 35834 }, { "epoch": 0.9488967632289715, "grad_norm": 0.78515625, "learning_rate": 8.537135467467546e-05, "loss": 0.8309, "step": 35835 }, { "epoch": 0.9489232428372659, "grad_norm": 0.82421875, "learning_rate": 8.536680047526762e-05, "loss": 0.7973, "step": 35836 }, { "epoch": 0.9489497224455603, "grad_norm": 0.7421875, "learning_rate": 8.536224630687407e-05, "loss": 0.8655, "step": 35837 }, { "epoch": 0.9489762020538547, "grad_norm": 0.7734375, "learning_rate": 8.53576921695044e-05, "loss": 0.8394, "step": 35838 }, { "epoch": 0.9490026816621491, "grad_norm": 0.87109375, "learning_rate": 8.535313806316827e-05, "loss": 0.8909, "step": 35839 }, { "epoch": 0.9490291612704435, "grad_norm": 0.734375, "learning_rate": 8.534858398787531e-05, "loss": 0.7346, "step": 35840 }, { "epoch": 0.9490556408787378, "grad_norm": 0.875, "learning_rate": 8.534402994363515e-05, "loss": 0.8382, "step": 35841 }, { "epoch": 0.9490821204870322, "grad_norm": 1.203125, "learning_rate": 8.533947593045754e-05, "loss": 0.7426, "step": 35842 }, { "epoch": 0.9491086000953266, "grad_norm": 0.76171875, "learning_rate": 8.533492194835206e-05, "loss": 0.7932, "step": 35843 }, { "epoch": 0.949135079703621, "grad_norm": 0.80078125, "learning_rate": 8.533036799732837e-05, "loss": 0.7021, "step": 35844 }, { "epoch": 0.9491615593119154, "grad_norm": 0.78515625, "learning_rate": 8.532581407739612e-05, "loss": 0.8433, "step": 35845 }, { "epoch": 0.9491880389202098, "grad_norm": 0.734375, "learning_rate": 8.5321260188565e-05, "loss": 0.8287, "step": 35846 }, { "epoch": 0.9492145185285041, "grad_norm": 0.7890625, "learning_rate": 8.531670633084457e-05, "loss": 0.8587, "step": 35847 }, { "epoch": 0.9492409981367985, "grad_norm": 0.79296875, "learning_rate": 8.531215250424457e-05, "loss": 0.7165, "step": 35848 }, { "epoch": 0.9492674777450929, "grad_norm": 0.8125, "learning_rate": 8.530759870877464e-05, "loss": 0.8272, "step": 35849 }, { "epoch": 0.9492939573533872, "grad_norm": 0.703125, "learning_rate": 8.530304494444441e-05, "loss": 0.6918, "step": 35850 }, { "epoch": 0.9493204369616816, "grad_norm": 0.796875, "learning_rate": 8.529849121126353e-05, "loss": 0.8485, "step": 35851 }, { "epoch": 0.9493469165699759, "grad_norm": 0.8046875, "learning_rate": 8.529393750924164e-05, "loss": 0.821, "step": 35852 }, { "epoch": 0.9493733961782703, "grad_norm": 0.79296875, "learning_rate": 8.528938383838843e-05, "loss": 0.8277, "step": 35853 }, { "epoch": 0.9493998757865647, "grad_norm": 0.98046875, "learning_rate": 8.528483019871355e-05, "loss": 0.8642, "step": 35854 }, { "epoch": 0.9494263553948591, "grad_norm": 0.74609375, "learning_rate": 8.52802765902266e-05, "loss": 0.8881, "step": 35855 }, { "epoch": 0.9494528350031535, "grad_norm": 0.77734375, "learning_rate": 8.527572301293728e-05, "loss": 0.8063, "step": 35856 }, { "epoch": 0.9494793146114479, "grad_norm": 0.828125, "learning_rate": 8.527116946685519e-05, "loss": 0.7911, "step": 35857 }, { "epoch": 0.9495057942197422, "grad_norm": 0.74609375, "learning_rate": 8.526661595199004e-05, "loss": 0.703, "step": 35858 }, { "epoch": 0.9495322738280366, "grad_norm": 0.8828125, "learning_rate": 8.526206246835145e-05, "loss": 0.8093, "step": 35859 }, { "epoch": 0.949558753436331, "grad_norm": 0.8359375, "learning_rate": 8.525750901594909e-05, "loss": 0.7995, "step": 35860 }, { "epoch": 0.9495852330446254, "grad_norm": 0.84765625, "learning_rate": 8.525295559479259e-05, "loss": 0.7694, "step": 35861 }, { "epoch": 0.9496117126529198, "grad_norm": 1.203125, "learning_rate": 8.524840220489157e-05, "loss": 0.8271, "step": 35862 }, { "epoch": 0.9496381922612142, "grad_norm": 0.765625, "learning_rate": 8.524384884625576e-05, "loss": 0.7006, "step": 35863 }, { "epoch": 0.9496646718695085, "grad_norm": 0.8125, "learning_rate": 8.523929551889477e-05, "loss": 0.8423, "step": 35864 }, { "epoch": 0.9496911514778029, "grad_norm": 0.7734375, "learning_rate": 8.523474222281824e-05, "loss": 0.826, "step": 35865 }, { "epoch": 0.9497176310860972, "grad_norm": 0.8359375, "learning_rate": 8.523018895803585e-05, "loss": 0.7492, "step": 35866 }, { "epoch": 0.9497441106943916, "grad_norm": 0.78125, "learning_rate": 8.522563572455717e-05, "loss": 0.7902, "step": 35867 }, { "epoch": 0.949770590302686, "grad_norm": 0.85546875, "learning_rate": 8.522108252239196e-05, "loss": 0.9243, "step": 35868 }, { "epoch": 0.9497970699109803, "grad_norm": 0.76953125, "learning_rate": 8.521652935154981e-05, "loss": 0.855, "step": 35869 }, { "epoch": 0.9498235495192747, "grad_norm": 0.8046875, "learning_rate": 8.52119762120404e-05, "loss": 0.8328, "step": 35870 }, { "epoch": 0.9498500291275691, "grad_norm": 0.76953125, "learning_rate": 8.520742310387335e-05, "loss": 0.8399, "step": 35871 }, { "epoch": 0.9498765087358635, "grad_norm": 0.74609375, "learning_rate": 8.520287002705829e-05, "loss": 0.8227, "step": 35872 }, { "epoch": 0.9499029883441579, "grad_norm": 0.75, "learning_rate": 8.519831698160494e-05, "loss": 0.8109, "step": 35873 }, { "epoch": 0.9499294679524523, "grad_norm": 0.8046875, "learning_rate": 8.51937639675229e-05, "loss": 0.7161, "step": 35874 }, { "epoch": 0.9499559475607466, "grad_norm": 0.73046875, "learning_rate": 8.518921098482184e-05, "loss": 0.8445, "step": 35875 }, { "epoch": 0.949982427169041, "grad_norm": 0.7734375, "learning_rate": 8.51846580335114e-05, "loss": 0.8181, "step": 35876 }, { "epoch": 0.9500089067773354, "grad_norm": 0.78125, "learning_rate": 8.518010511360118e-05, "loss": 0.7234, "step": 35877 }, { "epoch": 0.9500353863856298, "grad_norm": 0.70703125, "learning_rate": 8.517555222510093e-05, "loss": 0.7501, "step": 35878 }, { "epoch": 0.9500618659939242, "grad_norm": 0.796875, "learning_rate": 8.517099936802025e-05, "loss": 0.9232, "step": 35879 }, { "epoch": 0.9500883456022186, "grad_norm": 0.80078125, "learning_rate": 8.516644654236879e-05, "loss": 0.7665, "step": 35880 }, { "epoch": 0.9501148252105129, "grad_norm": 0.82421875, "learning_rate": 8.516189374815619e-05, "loss": 0.8155, "step": 35881 }, { "epoch": 0.9501413048188072, "grad_norm": 0.734375, "learning_rate": 8.515734098539207e-05, "loss": 0.8918, "step": 35882 }, { "epoch": 0.9501677844271016, "grad_norm": 0.734375, "learning_rate": 8.515278825408616e-05, "loss": 0.7561, "step": 35883 }, { "epoch": 0.950194264035396, "grad_norm": 0.7578125, "learning_rate": 8.514823555424807e-05, "loss": 0.8606, "step": 35884 }, { "epoch": 0.9502207436436904, "grad_norm": 0.8828125, "learning_rate": 8.514368288588744e-05, "loss": 0.8084, "step": 35885 }, { "epoch": 0.9502472232519847, "grad_norm": 0.70703125, "learning_rate": 8.513913024901393e-05, "loss": 0.7695, "step": 35886 }, { "epoch": 0.9502737028602791, "grad_norm": 6.21875, "learning_rate": 8.513457764363713e-05, "loss": 0.7656, "step": 35887 }, { "epoch": 0.9503001824685735, "grad_norm": 0.83984375, "learning_rate": 8.513002506976681e-05, "loss": 0.8167, "step": 35888 }, { "epoch": 0.9503266620768679, "grad_norm": 0.74609375, "learning_rate": 8.512547252741253e-05, "loss": 0.7926, "step": 35889 }, { "epoch": 0.9503531416851623, "grad_norm": 0.77734375, "learning_rate": 8.512092001658396e-05, "loss": 0.9479, "step": 35890 }, { "epoch": 0.9503796212934567, "grad_norm": 0.76171875, "learning_rate": 8.511636753729077e-05, "loss": 0.7054, "step": 35891 }, { "epoch": 0.950406100901751, "grad_norm": 0.703125, "learning_rate": 8.511181508954255e-05, "loss": 0.7467, "step": 35892 }, { "epoch": 0.9504325805100454, "grad_norm": 0.7421875, "learning_rate": 8.510726267334901e-05, "loss": 0.8303, "step": 35893 }, { "epoch": 0.9504590601183398, "grad_norm": 1.75, "learning_rate": 8.510271028871978e-05, "loss": 0.859, "step": 35894 }, { "epoch": 0.9504855397266342, "grad_norm": 0.765625, "learning_rate": 8.509815793566448e-05, "loss": 0.7557, "step": 35895 }, { "epoch": 0.9505120193349286, "grad_norm": 0.87109375, "learning_rate": 8.509360561419281e-05, "loss": 0.9015, "step": 35896 }, { "epoch": 0.950538498943223, "grad_norm": 0.8671875, "learning_rate": 8.508905332431436e-05, "loss": 0.8421, "step": 35897 }, { "epoch": 0.9505649785515173, "grad_norm": 0.89453125, "learning_rate": 8.508450106603884e-05, "loss": 0.9164, "step": 35898 }, { "epoch": 0.9505914581598116, "grad_norm": 0.74609375, "learning_rate": 8.507994883937586e-05, "loss": 0.706, "step": 35899 }, { "epoch": 0.950617937768106, "grad_norm": 0.72265625, "learning_rate": 8.507539664433509e-05, "loss": 0.8288, "step": 35900 }, { "epoch": 0.9506444173764004, "grad_norm": 0.8203125, "learning_rate": 8.507084448092614e-05, "loss": 0.89, "step": 35901 }, { "epoch": 0.9506708969846948, "grad_norm": 0.76953125, "learning_rate": 8.506629234915865e-05, "loss": 0.6538, "step": 35902 }, { "epoch": 0.9506973765929891, "grad_norm": 0.9140625, "learning_rate": 8.506174024904234e-05, "loss": 0.8346, "step": 35903 }, { "epoch": 0.9507238562012835, "grad_norm": 0.7421875, "learning_rate": 8.505718818058682e-05, "loss": 0.8123, "step": 35904 }, { "epoch": 0.9507503358095779, "grad_norm": 0.71875, "learning_rate": 8.505263614380175e-05, "loss": 0.6982, "step": 35905 }, { "epoch": 0.9507768154178723, "grad_norm": 0.70703125, "learning_rate": 8.504808413869673e-05, "loss": 0.9082, "step": 35906 }, { "epoch": 0.9508032950261667, "grad_norm": 1.4375, "learning_rate": 8.504353216528142e-05, "loss": 0.8188, "step": 35907 }, { "epoch": 0.9508297746344611, "grad_norm": 0.83203125, "learning_rate": 8.503898022356553e-05, "loss": 0.8187, "step": 35908 }, { "epoch": 0.9508562542427554, "grad_norm": 0.875, "learning_rate": 8.503442831355868e-05, "loss": 0.8769, "step": 35909 }, { "epoch": 0.9508827338510498, "grad_norm": 0.859375, "learning_rate": 8.502987643527048e-05, "loss": 0.8201, "step": 35910 }, { "epoch": 0.9509092134593442, "grad_norm": 0.734375, "learning_rate": 8.502532458871063e-05, "loss": 0.83, "step": 35911 }, { "epoch": 0.9509356930676386, "grad_norm": 0.828125, "learning_rate": 8.502077277388869e-05, "loss": 0.7617, "step": 35912 }, { "epoch": 0.950962172675933, "grad_norm": 0.6953125, "learning_rate": 8.501622099081441e-05, "loss": 0.7578, "step": 35913 }, { "epoch": 0.9509886522842274, "grad_norm": 0.7421875, "learning_rate": 8.501166923949737e-05, "loss": 0.8005, "step": 35914 }, { "epoch": 0.9510151318925216, "grad_norm": 0.79296875, "learning_rate": 8.500711751994726e-05, "loss": 0.876, "step": 35915 }, { "epoch": 0.951041611500816, "grad_norm": 0.94921875, "learning_rate": 8.50025658321737e-05, "loss": 0.8914, "step": 35916 }, { "epoch": 0.9510680911091104, "grad_norm": 0.703125, "learning_rate": 8.499801417618634e-05, "loss": 0.8578, "step": 35917 }, { "epoch": 0.9510945707174048, "grad_norm": 0.796875, "learning_rate": 8.499346255199484e-05, "loss": 0.7754, "step": 35918 }, { "epoch": 0.9511210503256992, "grad_norm": 0.8046875, "learning_rate": 8.498891095960883e-05, "loss": 0.7272, "step": 35919 }, { "epoch": 0.9511475299339935, "grad_norm": 0.8125, "learning_rate": 8.498435939903798e-05, "loss": 0.7603, "step": 35920 }, { "epoch": 0.9511740095422879, "grad_norm": 0.7734375, "learning_rate": 8.497980787029192e-05, "loss": 0.8201, "step": 35921 }, { "epoch": 0.9512004891505823, "grad_norm": 0.80859375, "learning_rate": 8.497525637338025e-05, "loss": 0.8659, "step": 35922 }, { "epoch": 0.9512269687588767, "grad_norm": 0.8515625, "learning_rate": 8.497070490831272e-05, "loss": 0.7656, "step": 35923 }, { "epoch": 0.9512534483671711, "grad_norm": 0.88671875, "learning_rate": 8.496615347509891e-05, "loss": 0.9062, "step": 35924 }, { "epoch": 0.9512799279754655, "grad_norm": 0.82421875, "learning_rate": 8.496160207374848e-05, "loss": 0.6853, "step": 35925 }, { "epoch": 0.9513064075837598, "grad_norm": 0.984375, "learning_rate": 8.495705070427108e-05, "loss": 0.9433, "step": 35926 }, { "epoch": 0.9513328871920542, "grad_norm": 0.80078125, "learning_rate": 8.49524993666763e-05, "loss": 0.952, "step": 35927 }, { "epoch": 0.9513593668003486, "grad_norm": 0.84765625, "learning_rate": 8.49479480609739e-05, "loss": 0.9692, "step": 35928 }, { "epoch": 0.951385846408643, "grad_norm": 0.80078125, "learning_rate": 8.494339678717345e-05, "loss": 0.8975, "step": 35929 }, { "epoch": 0.9514123260169374, "grad_norm": 0.83203125, "learning_rate": 8.49388455452846e-05, "loss": 0.8599, "step": 35930 }, { "epoch": 0.9514388056252316, "grad_norm": 0.84375, "learning_rate": 8.493429433531702e-05, "loss": 0.8131, "step": 35931 }, { "epoch": 0.951465285233526, "grad_norm": 0.75, "learning_rate": 8.492974315728032e-05, "loss": 0.7625, "step": 35932 }, { "epoch": 0.9514917648418204, "grad_norm": 0.75390625, "learning_rate": 8.492519201118416e-05, "loss": 0.6621, "step": 35933 }, { "epoch": 0.9515182444501148, "grad_norm": 1.21875, "learning_rate": 8.492064089703822e-05, "loss": 0.8465, "step": 35934 }, { "epoch": 0.9515447240584092, "grad_norm": 0.72265625, "learning_rate": 8.491608981485211e-05, "loss": 0.7274, "step": 35935 }, { "epoch": 0.9515712036667036, "grad_norm": 0.85546875, "learning_rate": 8.491153876463551e-05, "loss": 0.776, "step": 35936 }, { "epoch": 0.9515976832749979, "grad_norm": 0.8828125, "learning_rate": 8.490698774639801e-05, "loss": 0.8205, "step": 35937 }, { "epoch": 0.9516241628832923, "grad_norm": 0.83984375, "learning_rate": 8.49024367601493e-05, "loss": 0.75, "step": 35938 }, { "epoch": 0.9516506424915867, "grad_norm": 0.80078125, "learning_rate": 8.489788580589902e-05, "loss": 0.7976, "step": 35939 }, { "epoch": 0.9516771220998811, "grad_norm": 0.7890625, "learning_rate": 8.489333488365679e-05, "loss": 0.7983, "step": 35940 }, { "epoch": 0.9517036017081755, "grad_norm": 0.765625, "learning_rate": 8.48887839934323e-05, "loss": 0.7428, "step": 35941 }, { "epoch": 0.9517300813164699, "grad_norm": 0.78515625, "learning_rate": 8.48842331352351e-05, "loss": 0.7522, "step": 35942 }, { "epoch": 0.9517565609247642, "grad_norm": 0.7890625, "learning_rate": 8.487968230907496e-05, "loss": 0.7602, "step": 35943 }, { "epoch": 0.9517830405330586, "grad_norm": 0.91796875, "learning_rate": 8.487513151496147e-05, "loss": 0.8875, "step": 35944 }, { "epoch": 0.951809520141353, "grad_norm": 0.83203125, "learning_rate": 8.487058075290427e-05, "loss": 0.8308, "step": 35945 }, { "epoch": 0.9518359997496474, "grad_norm": 0.734375, "learning_rate": 8.486603002291301e-05, "loss": 0.7423, "step": 35946 }, { "epoch": 0.9518624793579418, "grad_norm": 0.8046875, "learning_rate": 8.48614793249973e-05, "loss": 0.8042, "step": 35947 }, { "epoch": 0.951888958966236, "grad_norm": 0.7734375, "learning_rate": 8.485692865916686e-05, "loss": 0.7645, "step": 35948 }, { "epoch": 0.9519154385745304, "grad_norm": 0.76171875, "learning_rate": 8.485237802543128e-05, "loss": 0.8225, "step": 35949 }, { "epoch": 0.9519419181828248, "grad_norm": 0.83984375, "learning_rate": 8.484782742380023e-05, "loss": 0.8368, "step": 35950 }, { "epoch": 0.9519683977911192, "grad_norm": 0.77734375, "learning_rate": 8.484327685428336e-05, "loss": 0.7794, "step": 35951 }, { "epoch": 0.9519948773994136, "grad_norm": 0.80078125, "learning_rate": 8.483872631689022e-05, "loss": 0.7523, "step": 35952 }, { "epoch": 0.952021357007708, "grad_norm": 0.7578125, "learning_rate": 8.48341758116306e-05, "loss": 0.7106, "step": 35953 }, { "epoch": 0.9520478366160023, "grad_norm": 0.89453125, "learning_rate": 8.482962533851407e-05, "loss": 0.6897, "step": 35954 }, { "epoch": 0.9520743162242967, "grad_norm": 0.8203125, "learning_rate": 8.482507489755028e-05, "loss": 0.8339, "step": 35955 }, { "epoch": 0.9521007958325911, "grad_norm": 0.81640625, "learning_rate": 8.482052448874888e-05, "loss": 0.7256, "step": 35956 }, { "epoch": 0.9521272754408855, "grad_norm": 0.7109375, "learning_rate": 8.481597411211949e-05, "loss": 0.7943, "step": 35957 }, { "epoch": 0.9521537550491799, "grad_norm": 0.73828125, "learning_rate": 8.481142376767179e-05, "loss": 0.8078, "step": 35958 }, { "epoch": 0.9521802346574743, "grad_norm": 1.1171875, "learning_rate": 8.480687345541542e-05, "loss": 0.8462, "step": 35959 }, { "epoch": 0.9522067142657686, "grad_norm": 0.80078125, "learning_rate": 8.480232317536001e-05, "loss": 0.7192, "step": 35960 }, { "epoch": 0.952233193874063, "grad_norm": 0.7421875, "learning_rate": 8.479777292751521e-05, "loss": 0.8414, "step": 35961 }, { "epoch": 0.9522596734823574, "grad_norm": 0.734375, "learning_rate": 8.479322271189061e-05, "loss": 0.7184, "step": 35962 }, { "epoch": 0.9522861530906518, "grad_norm": 0.796875, "learning_rate": 8.478867252849595e-05, "loss": 0.8647, "step": 35963 }, { "epoch": 0.9523126326989461, "grad_norm": 0.77734375, "learning_rate": 8.478412237734083e-05, "loss": 0.7785, "step": 35964 }, { "epoch": 0.9523391123072404, "grad_norm": 0.71875, "learning_rate": 8.477957225843489e-05, "loss": 0.7807, "step": 35965 }, { "epoch": 0.9523655919155348, "grad_norm": 0.95703125, "learning_rate": 8.477502217178779e-05, "loss": 0.7325, "step": 35966 }, { "epoch": 0.9523920715238292, "grad_norm": 0.765625, "learning_rate": 8.47704721174091e-05, "loss": 0.7288, "step": 35967 }, { "epoch": 0.9524185511321236, "grad_norm": 0.79296875, "learning_rate": 8.476592209530858e-05, "loss": 0.7672, "step": 35968 }, { "epoch": 0.952445030740418, "grad_norm": 0.8671875, "learning_rate": 8.47613721054958e-05, "loss": 0.8251, "step": 35969 }, { "epoch": 0.9524715103487124, "grad_norm": 0.76171875, "learning_rate": 8.475682214798045e-05, "loss": 0.7054, "step": 35970 }, { "epoch": 0.9524979899570067, "grad_norm": 0.80078125, "learning_rate": 8.47522722227721e-05, "loss": 0.7404, "step": 35971 }, { "epoch": 0.9525244695653011, "grad_norm": 0.73828125, "learning_rate": 8.474772232988045e-05, "loss": 0.6727, "step": 35972 }, { "epoch": 0.9525509491735955, "grad_norm": 0.75390625, "learning_rate": 8.474317246931514e-05, "loss": 0.7584, "step": 35973 }, { "epoch": 0.9525774287818899, "grad_norm": 0.75390625, "learning_rate": 8.473862264108581e-05, "loss": 0.8784, "step": 35974 }, { "epoch": 0.9526039083901843, "grad_norm": 0.859375, "learning_rate": 8.47340728452021e-05, "loss": 0.8592, "step": 35975 }, { "epoch": 0.9526303879984787, "grad_norm": 1.8046875, "learning_rate": 8.472952308167363e-05, "loss": 0.8237, "step": 35976 }, { "epoch": 0.952656867606773, "grad_norm": 0.81640625, "learning_rate": 8.472497335051007e-05, "loss": 0.8437, "step": 35977 }, { "epoch": 0.9526833472150674, "grad_norm": 0.7890625, "learning_rate": 8.472042365172106e-05, "loss": 0.777, "step": 35978 }, { "epoch": 0.9527098268233618, "grad_norm": 0.7109375, "learning_rate": 8.471587398531624e-05, "loss": 0.7014, "step": 35979 }, { "epoch": 0.9527363064316561, "grad_norm": 0.7421875, "learning_rate": 8.471132435130525e-05, "loss": 0.859, "step": 35980 }, { "epoch": 0.9527627860399505, "grad_norm": 0.7578125, "learning_rate": 8.470677474969773e-05, "loss": 0.735, "step": 35981 }, { "epoch": 0.9527892656482448, "grad_norm": 0.7734375, "learning_rate": 8.470222518050332e-05, "loss": 0.7241, "step": 35982 }, { "epoch": 0.9528157452565392, "grad_norm": 0.7578125, "learning_rate": 8.469767564373168e-05, "loss": 0.7079, "step": 35983 }, { "epoch": 0.9528422248648336, "grad_norm": 0.79296875, "learning_rate": 8.469312613939244e-05, "loss": 0.822, "step": 35984 }, { "epoch": 0.952868704473128, "grad_norm": 0.81640625, "learning_rate": 8.468857666749526e-05, "loss": 0.6851, "step": 35985 }, { "epoch": 0.9528951840814224, "grad_norm": 0.80859375, "learning_rate": 8.468402722804976e-05, "loss": 0.894, "step": 35986 }, { "epoch": 0.9529216636897168, "grad_norm": 0.734375, "learning_rate": 8.467947782106557e-05, "loss": 0.8209, "step": 35987 }, { "epoch": 0.9529481432980111, "grad_norm": 0.82421875, "learning_rate": 8.467492844655234e-05, "loss": 0.8343, "step": 35988 }, { "epoch": 0.9529746229063055, "grad_norm": 0.8671875, "learning_rate": 8.467037910451975e-05, "loss": 0.6474, "step": 35989 }, { "epoch": 0.9530011025145999, "grad_norm": 0.73046875, "learning_rate": 8.466582979497742e-05, "loss": 0.7378, "step": 35990 }, { "epoch": 0.9530275821228943, "grad_norm": 0.7734375, "learning_rate": 8.466128051793498e-05, "loss": 0.7921, "step": 35991 }, { "epoch": 0.9530540617311887, "grad_norm": 0.83984375, "learning_rate": 8.465673127340207e-05, "loss": 0.7594, "step": 35992 }, { "epoch": 0.9530805413394831, "grad_norm": 0.74609375, "learning_rate": 8.465218206138832e-05, "loss": 0.7895, "step": 35993 }, { "epoch": 0.9531070209477774, "grad_norm": 0.84375, "learning_rate": 8.464763288190342e-05, "loss": 0.9008, "step": 35994 }, { "epoch": 0.9531335005560718, "grad_norm": 0.734375, "learning_rate": 8.4643083734957e-05, "loss": 0.7352, "step": 35995 }, { "epoch": 0.9531599801643662, "grad_norm": 0.71875, "learning_rate": 8.463853462055869e-05, "loss": 0.7825, "step": 35996 }, { "epoch": 0.9531864597726605, "grad_norm": 0.78125, "learning_rate": 8.463398553871811e-05, "loss": 0.8441, "step": 35997 }, { "epoch": 0.9532129393809549, "grad_norm": 0.84765625, "learning_rate": 8.462943648944491e-05, "loss": 0.7937, "step": 35998 }, { "epoch": 0.9532394189892492, "grad_norm": 0.8046875, "learning_rate": 8.462488747274873e-05, "loss": 0.788, "step": 35999 }, { "epoch": 0.9532658985975436, "grad_norm": 0.80078125, "learning_rate": 8.462033848863924e-05, "loss": 0.7795, "step": 36000 }, { "epoch": 0.9532658985975436, "eval_loss": 0.7839899659156799, "eval_runtime": 281.3245, "eval_samples_per_second": 35.546, "eval_steps_per_second": 0.743, "step": 36000 }, { "epoch": 0.953292378205838, "grad_norm": 0.76171875, "learning_rate": 8.461578953712608e-05, "loss": 0.7456, "step": 36001 }, { "epoch": 0.9533188578141324, "grad_norm": 0.8046875, "learning_rate": 8.461124061821887e-05, "loss": 0.8013, "step": 36002 }, { "epoch": 0.9533453374224268, "grad_norm": 0.8359375, "learning_rate": 8.460669173192723e-05, "loss": 0.7346, "step": 36003 }, { "epoch": 0.9533718170307212, "grad_norm": 0.76953125, "learning_rate": 8.460214287826085e-05, "loss": 0.7343, "step": 36004 }, { "epoch": 0.9533982966390155, "grad_norm": 0.78125, "learning_rate": 8.459759405722934e-05, "loss": 0.8038, "step": 36005 }, { "epoch": 0.9534247762473099, "grad_norm": 0.6953125, "learning_rate": 8.459304526884237e-05, "loss": 0.6792, "step": 36006 }, { "epoch": 0.9534512558556043, "grad_norm": 0.7421875, "learning_rate": 8.458849651310954e-05, "loss": 0.8136, "step": 36007 }, { "epoch": 0.9534777354638987, "grad_norm": 0.94140625, "learning_rate": 8.458394779004047e-05, "loss": 0.7481, "step": 36008 }, { "epoch": 0.9535042150721931, "grad_norm": 0.77734375, "learning_rate": 8.45793990996449e-05, "loss": 0.7701, "step": 36009 }, { "epoch": 0.9535306946804875, "grad_norm": 0.80859375, "learning_rate": 8.457485044193239e-05, "loss": 0.8313, "step": 36010 }, { "epoch": 0.9535571742887818, "grad_norm": 0.83984375, "learning_rate": 8.45703018169126e-05, "loss": 0.7842, "step": 36011 }, { "epoch": 0.9535836538970762, "grad_norm": 0.79296875, "learning_rate": 8.456575322459522e-05, "loss": 0.7933, "step": 36012 }, { "epoch": 0.9536101335053705, "grad_norm": 0.859375, "learning_rate": 8.456120466498975e-05, "loss": 0.8357, "step": 36013 }, { "epoch": 0.9536366131136649, "grad_norm": 0.80859375, "learning_rate": 8.4556656138106e-05, "loss": 0.6872, "step": 36014 }, { "epoch": 0.9536630927219593, "grad_norm": 0.7734375, "learning_rate": 8.455210764395351e-05, "loss": 0.8285, "step": 36015 }, { "epoch": 0.9536895723302536, "grad_norm": 0.78515625, "learning_rate": 8.454755918254196e-05, "loss": 0.7719, "step": 36016 }, { "epoch": 0.953716051938548, "grad_norm": 0.70703125, "learning_rate": 8.454301075388097e-05, "loss": 0.7694, "step": 36017 }, { "epoch": 0.9537425315468424, "grad_norm": 0.7890625, "learning_rate": 8.453846235798015e-05, "loss": 0.6899, "step": 36018 }, { "epoch": 0.9537690111551368, "grad_norm": 0.7578125, "learning_rate": 8.45339139948492e-05, "loss": 0.7909, "step": 36019 }, { "epoch": 0.9537954907634312, "grad_norm": 0.76171875, "learning_rate": 8.452936566449774e-05, "loss": 0.6972, "step": 36020 }, { "epoch": 0.9538219703717256, "grad_norm": 0.80078125, "learning_rate": 8.452481736693541e-05, "loss": 0.7866, "step": 36021 }, { "epoch": 0.9538484499800199, "grad_norm": 0.796875, "learning_rate": 8.452026910217186e-05, "loss": 0.7358, "step": 36022 }, { "epoch": 0.9538749295883143, "grad_norm": 0.8359375, "learning_rate": 8.451572087021667e-05, "loss": 0.9316, "step": 36023 }, { "epoch": 0.9539014091966087, "grad_norm": 0.73046875, "learning_rate": 8.451117267107955e-05, "loss": 0.7682, "step": 36024 }, { "epoch": 0.9539278888049031, "grad_norm": 0.84765625, "learning_rate": 8.450662450477013e-05, "loss": 0.885, "step": 36025 }, { "epoch": 0.9539543684131975, "grad_norm": 0.80078125, "learning_rate": 8.450207637129802e-05, "loss": 0.7253, "step": 36026 }, { "epoch": 0.9539808480214919, "grad_norm": 0.80078125, "learning_rate": 8.449752827067288e-05, "loss": 0.7455, "step": 36027 }, { "epoch": 0.9540073276297862, "grad_norm": 0.7890625, "learning_rate": 8.44929802029043e-05, "loss": 0.8337, "step": 36028 }, { "epoch": 0.9540338072380805, "grad_norm": 0.81640625, "learning_rate": 8.4488432168002e-05, "loss": 0.737, "step": 36029 }, { "epoch": 0.9540602868463749, "grad_norm": 0.859375, "learning_rate": 8.448388416597558e-05, "loss": 0.8974, "step": 36030 }, { "epoch": 0.9540867664546693, "grad_norm": 0.77734375, "learning_rate": 8.447933619683468e-05, "loss": 0.7518, "step": 36031 }, { "epoch": 0.9541132460629637, "grad_norm": 0.91015625, "learning_rate": 8.447478826058894e-05, "loss": 0.8114, "step": 36032 }, { "epoch": 0.954139725671258, "grad_norm": 0.76171875, "learning_rate": 8.447024035724798e-05, "loss": 0.7825, "step": 36033 }, { "epoch": 0.9541662052795524, "grad_norm": 0.81640625, "learning_rate": 8.446569248682146e-05, "loss": 0.8704, "step": 36034 }, { "epoch": 0.9541926848878468, "grad_norm": 0.75390625, "learning_rate": 8.446114464931904e-05, "loss": 0.7013, "step": 36035 }, { "epoch": 0.9542191644961412, "grad_norm": 0.7265625, "learning_rate": 8.445659684475033e-05, "loss": 0.8375, "step": 36036 }, { "epoch": 0.9542456441044356, "grad_norm": 0.81640625, "learning_rate": 8.445204907312498e-05, "loss": 0.7829, "step": 36037 }, { "epoch": 0.95427212371273, "grad_norm": 0.8046875, "learning_rate": 8.444750133445259e-05, "loss": 0.7158, "step": 36038 }, { "epoch": 0.9542986033210243, "grad_norm": 0.85546875, "learning_rate": 8.444295362874286e-05, "loss": 0.7862, "step": 36039 }, { "epoch": 0.9543250829293187, "grad_norm": 0.79296875, "learning_rate": 8.44384059560054e-05, "loss": 0.8626, "step": 36040 }, { "epoch": 0.9543515625376131, "grad_norm": 0.71484375, "learning_rate": 8.443385831624986e-05, "loss": 0.719, "step": 36041 }, { "epoch": 0.9543780421459075, "grad_norm": 0.76171875, "learning_rate": 8.442931070948585e-05, "loss": 0.7307, "step": 36042 }, { "epoch": 0.9544045217542019, "grad_norm": 0.84765625, "learning_rate": 8.442476313572303e-05, "loss": 0.8526, "step": 36043 }, { "epoch": 0.9544310013624963, "grad_norm": 0.84765625, "learning_rate": 8.442021559497103e-05, "loss": 0.7382, "step": 36044 }, { "epoch": 0.9544574809707906, "grad_norm": 0.71875, "learning_rate": 8.441566808723951e-05, "loss": 0.7124, "step": 36045 }, { "epoch": 0.9544839605790849, "grad_norm": 0.75, "learning_rate": 8.441112061253808e-05, "loss": 0.7767, "step": 36046 }, { "epoch": 0.9545104401873793, "grad_norm": 0.70703125, "learning_rate": 8.44065731708764e-05, "loss": 0.7095, "step": 36047 }, { "epoch": 0.9545369197956737, "grad_norm": 0.8125, "learning_rate": 8.440202576226404e-05, "loss": 0.7664, "step": 36048 }, { "epoch": 0.9545633994039681, "grad_norm": 0.80859375, "learning_rate": 8.439747838671075e-05, "loss": 0.705, "step": 36049 }, { "epoch": 0.9545898790122624, "grad_norm": 0.8359375, "learning_rate": 8.439293104422609e-05, "loss": 0.8848, "step": 36050 }, { "epoch": 0.9546163586205568, "grad_norm": 0.80078125, "learning_rate": 8.438838373481975e-05, "loss": 0.7956, "step": 36051 }, { "epoch": 0.9546428382288512, "grad_norm": 1.1015625, "learning_rate": 8.438383645850134e-05, "loss": 0.8482, "step": 36052 }, { "epoch": 0.9546693178371456, "grad_norm": 0.72265625, "learning_rate": 8.437928921528043e-05, "loss": 0.7504, "step": 36053 }, { "epoch": 0.95469579744544, "grad_norm": 1.0390625, "learning_rate": 8.437474200516677e-05, "loss": 0.7688, "step": 36054 }, { "epoch": 0.9547222770537344, "grad_norm": 0.7578125, "learning_rate": 8.437019482816996e-05, "loss": 0.8193, "step": 36055 }, { "epoch": 0.9547487566620287, "grad_norm": 0.76171875, "learning_rate": 8.436564768429964e-05, "loss": 0.7561, "step": 36056 }, { "epoch": 0.9547752362703231, "grad_norm": 0.70703125, "learning_rate": 8.436110057356541e-05, "loss": 0.7515, "step": 36057 }, { "epoch": 0.9548017158786175, "grad_norm": 0.75, "learning_rate": 8.435655349597689e-05, "loss": 0.6805, "step": 36058 }, { "epoch": 0.9548281954869119, "grad_norm": 0.75390625, "learning_rate": 8.435200645154383e-05, "loss": 0.6133, "step": 36059 }, { "epoch": 0.9548546750952063, "grad_norm": 0.80859375, "learning_rate": 8.434745944027578e-05, "loss": 0.6639, "step": 36060 }, { "epoch": 0.9548811547035007, "grad_norm": 0.8203125, "learning_rate": 8.43429124621824e-05, "loss": 0.8387, "step": 36061 }, { "epoch": 0.9549076343117949, "grad_norm": 0.8046875, "learning_rate": 8.433836551727333e-05, "loss": 0.7792, "step": 36062 }, { "epoch": 0.9549341139200893, "grad_norm": 0.6875, "learning_rate": 8.433381860555816e-05, "loss": 0.5624, "step": 36063 }, { "epoch": 0.9549605935283837, "grad_norm": 0.734375, "learning_rate": 8.43292717270466e-05, "loss": 0.6897, "step": 36064 }, { "epoch": 0.9549870731366781, "grad_norm": 0.84375, "learning_rate": 8.432472488174826e-05, "loss": 0.7766, "step": 36065 }, { "epoch": 0.9550135527449725, "grad_norm": 0.71875, "learning_rate": 8.432017806967273e-05, "loss": 0.8052, "step": 36066 }, { "epoch": 0.9550400323532668, "grad_norm": 0.87109375, "learning_rate": 8.43156312908297e-05, "loss": 0.7818, "step": 36067 }, { "epoch": 0.9550665119615612, "grad_norm": 0.7734375, "learning_rate": 8.431108454522881e-05, "loss": 0.9002, "step": 36068 }, { "epoch": 0.9550929915698556, "grad_norm": 0.85546875, "learning_rate": 8.430653783287968e-05, "loss": 0.7342, "step": 36069 }, { "epoch": 0.95511947117815, "grad_norm": 0.7578125, "learning_rate": 8.430199115379194e-05, "loss": 0.7019, "step": 36070 }, { "epoch": 0.9551459507864444, "grad_norm": 0.75390625, "learning_rate": 8.429744450797524e-05, "loss": 0.8337, "step": 36071 }, { "epoch": 0.9551724303947388, "grad_norm": 0.8125, "learning_rate": 8.429289789543919e-05, "loss": 0.9238, "step": 36072 }, { "epoch": 0.9551989100030331, "grad_norm": 0.734375, "learning_rate": 8.428835131619341e-05, "loss": 0.7213, "step": 36073 }, { "epoch": 0.9552253896113275, "grad_norm": 0.73828125, "learning_rate": 8.428380477024763e-05, "loss": 0.7336, "step": 36074 }, { "epoch": 0.9552518692196219, "grad_norm": 0.76171875, "learning_rate": 8.427925825761142e-05, "loss": 0.7773, "step": 36075 }, { "epoch": 0.9552783488279163, "grad_norm": 0.78515625, "learning_rate": 8.427471177829443e-05, "loss": 0.8327, "step": 36076 }, { "epoch": 0.9553048284362107, "grad_norm": 0.75, "learning_rate": 8.427016533230627e-05, "loss": 0.8754, "step": 36077 }, { "epoch": 0.955331308044505, "grad_norm": 0.8359375, "learning_rate": 8.426561891965658e-05, "loss": 0.8728, "step": 36078 }, { "epoch": 0.9553577876527993, "grad_norm": 0.7734375, "learning_rate": 8.426107254035503e-05, "loss": 0.7824, "step": 36079 }, { "epoch": 0.9553842672610937, "grad_norm": 0.828125, "learning_rate": 8.425652619441125e-05, "loss": 0.8514, "step": 36080 }, { "epoch": 0.9554107468693881, "grad_norm": 0.85546875, "learning_rate": 8.425197988183487e-05, "loss": 0.827, "step": 36081 }, { "epoch": 0.9554372264776825, "grad_norm": 0.89453125, "learning_rate": 8.42474336026355e-05, "loss": 0.7823, "step": 36082 }, { "epoch": 0.9554637060859769, "grad_norm": 0.81640625, "learning_rate": 8.424288735682277e-05, "loss": 0.8172, "step": 36083 }, { "epoch": 0.9554901856942712, "grad_norm": 0.75, "learning_rate": 8.423834114440639e-05, "loss": 0.812, "step": 36084 }, { "epoch": 0.9555166653025656, "grad_norm": 0.7578125, "learning_rate": 8.42337949653959e-05, "loss": 0.7438, "step": 36085 }, { "epoch": 0.95554314491086, "grad_norm": 0.796875, "learning_rate": 8.4229248819801e-05, "loss": 0.8432, "step": 36086 }, { "epoch": 0.9555696245191544, "grad_norm": 0.82421875, "learning_rate": 8.422470270763133e-05, "loss": 0.6591, "step": 36087 }, { "epoch": 0.9555961041274488, "grad_norm": 0.8828125, "learning_rate": 8.422015662889645e-05, "loss": 0.7848, "step": 36088 }, { "epoch": 0.9556225837357432, "grad_norm": 0.78125, "learning_rate": 8.421561058360607e-05, "loss": 0.8541, "step": 36089 }, { "epoch": 0.9556490633440375, "grad_norm": 0.69921875, "learning_rate": 8.421106457176982e-05, "loss": 0.7997, "step": 36090 }, { "epoch": 0.9556755429523319, "grad_norm": 0.72265625, "learning_rate": 8.420651859339731e-05, "loss": 0.7851, "step": 36091 }, { "epoch": 0.9557020225606263, "grad_norm": 0.828125, "learning_rate": 8.420197264849818e-05, "loss": 0.7573, "step": 36092 }, { "epoch": 0.9557285021689207, "grad_norm": 0.859375, "learning_rate": 8.419742673708203e-05, "loss": 0.723, "step": 36093 }, { "epoch": 0.9557549817772151, "grad_norm": 0.81640625, "learning_rate": 8.419288085915855e-05, "loss": 0.7489, "step": 36094 }, { "epoch": 0.9557814613855093, "grad_norm": 0.86328125, "learning_rate": 8.418833501473737e-05, "loss": 0.6797, "step": 36095 }, { "epoch": 0.9558079409938037, "grad_norm": 0.83203125, "learning_rate": 8.41837892038281e-05, "loss": 0.8359, "step": 36096 }, { "epoch": 0.9558344206020981, "grad_norm": 0.70703125, "learning_rate": 8.417924342644042e-05, "loss": 0.6679, "step": 36097 }, { "epoch": 0.9558609002103925, "grad_norm": 0.7421875, "learning_rate": 8.417469768258385e-05, "loss": 0.7418, "step": 36098 }, { "epoch": 0.9558873798186869, "grad_norm": 0.71875, "learning_rate": 8.417015197226816e-05, "loss": 0.7641, "step": 36099 }, { "epoch": 0.9559138594269813, "grad_norm": 0.78515625, "learning_rate": 8.416560629550293e-05, "loss": 0.7373, "step": 36100 }, { "epoch": 0.9559403390352756, "grad_norm": 1.65625, "learning_rate": 8.416106065229778e-05, "loss": 0.7558, "step": 36101 }, { "epoch": 0.95596681864357, "grad_norm": 0.7890625, "learning_rate": 8.415651504266238e-05, "loss": 0.7854, "step": 36102 }, { "epoch": 0.9559932982518644, "grad_norm": 0.78125, "learning_rate": 8.415196946660628e-05, "loss": 0.8035, "step": 36103 }, { "epoch": 0.9560197778601588, "grad_norm": 0.7734375, "learning_rate": 8.41474239241392e-05, "loss": 0.7753, "step": 36104 }, { "epoch": 0.9560462574684532, "grad_norm": 0.83984375, "learning_rate": 8.414287841527079e-05, "loss": 0.8659, "step": 36105 }, { "epoch": 0.9560727370767476, "grad_norm": 0.8515625, "learning_rate": 8.413833294001062e-05, "loss": 0.9053, "step": 36106 }, { "epoch": 0.9560992166850419, "grad_norm": 0.69921875, "learning_rate": 8.413378749836836e-05, "loss": 0.6801, "step": 36107 }, { "epoch": 0.9561256962933363, "grad_norm": 0.73046875, "learning_rate": 8.412924209035359e-05, "loss": 0.7038, "step": 36108 }, { "epoch": 0.9561521759016307, "grad_norm": 0.7890625, "learning_rate": 8.412469671597602e-05, "loss": 0.7934, "step": 36109 }, { "epoch": 0.9561786555099251, "grad_norm": 0.78125, "learning_rate": 8.412015137524524e-05, "loss": 0.8182, "step": 36110 }, { "epoch": 0.9562051351182194, "grad_norm": 0.7265625, "learning_rate": 8.41156060681709e-05, "loss": 0.6867, "step": 36111 }, { "epoch": 0.9562316147265137, "grad_norm": 0.8046875, "learning_rate": 8.411106079476261e-05, "loss": 0.8893, "step": 36112 }, { "epoch": 0.9562580943348081, "grad_norm": 0.7578125, "learning_rate": 8.410651555502998e-05, "loss": 0.7384, "step": 36113 }, { "epoch": 0.9562845739431025, "grad_norm": 0.8046875, "learning_rate": 8.410197034898274e-05, "loss": 0.9014, "step": 36114 }, { "epoch": 0.9563110535513969, "grad_norm": 1.25, "learning_rate": 8.409742517663045e-05, "loss": 0.8485, "step": 36115 }, { "epoch": 0.9563375331596913, "grad_norm": 0.8203125, "learning_rate": 8.409288003798277e-05, "loss": 0.9422, "step": 36116 }, { "epoch": 0.9563640127679857, "grad_norm": 0.76171875, "learning_rate": 8.408833493304931e-05, "loss": 0.7552, "step": 36117 }, { "epoch": 0.95639049237628, "grad_norm": 0.71875, "learning_rate": 8.408378986183969e-05, "loss": 0.7424, "step": 36118 }, { "epoch": 0.9564169719845744, "grad_norm": 0.69140625, "learning_rate": 8.407924482436357e-05, "loss": 0.647, "step": 36119 }, { "epoch": 0.9564434515928688, "grad_norm": 0.76171875, "learning_rate": 8.407469982063063e-05, "loss": 0.8773, "step": 36120 }, { "epoch": 0.9564699312011632, "grad_norm": 0.8828125, "learning_rate": 8.407015485065042e-05, "loss": 0.6544, "step": 36121 }, { "epoch": 0.9564964108094576, "grad_norm": 0.99609375, "learning_rate": 8.406560991443261e-05, "loss": 0.7701, "step": 36122 }, { "epoch": 0.956522890417752, "grad_norm": 0.88671875, "learning_rate": 8.406106501198681e-05, "loss": 0.8006, "step": 36123 }, { "epoch": 0.9565493700260463, "grad_norm": 0.7421875, "learning_rate": 8.40565201433227e-05, "loss": 0.7584, "step": 36124 }, { "epoch": 0.9565758496343407, "grad_norm": 0.7421875, "learning_rate": 8.405197530844987e-05, "loss": 0.8291, "step": 36125 }, { "epoch": 0.9566023292426351, "grad_norm": 0.828125, "learning_rate": 8.404743050737797e-05, "loss": 0.856, "step": 36126 }, { "epoch": 0.9566288088509294, "grad_norm": 0.73828125, "learning_rate": 8.404288574011665e-05, "loss": 0.8263, "step": 36127 }, { "epoch": 0.9566552884592238, "grad_norm": 0.77734375, "learning_rate": 8.403834100667549e-05, "loss": 0.6409, "step": 36128 }, { "epoch": 0.9566817680675181, "grad_norm": 0.82421875, "learning_rate": 8.403379630706417e-05, "loss": 0.7076, "step": 36129 }, { "epoch": 0.9567082476758125, "grad_norm": 0.75390625, "learning_rate": 8.402925164129232e-05, "loss": 0.7268, "step": 36130 }, { "epoch": 0.9567347272841069, "grad_norm": 0.703125, "learning_rate": 8.402470700936954e-05, "loss": 0.7763, "step": 36131 }, { "epoch": 0.9567612068924013, "grad_norm": 0.8515625, "learning_rate": 8.402016241130549e-05, "loss": 0.921, "step": 36132 }, { "epoch": 0.9567876865006957, "grad_norm": 0.7109375, "learning_rate": 8.401561784710978e-05, "loss": 0.8449, "step": 36133 }, { "epoch": 0.9568141661089901, "grad_norm": 0.80859375, "learning_rate": 8.401107331679203e-05, "loss": 0.783, "step": 36134 }, { "epoch": 0.9568406457172844, "grad_norm": 0.8203125, "learning_rate": 8.400652882036194e-05, "loss": 0.8066, "step": 36135 }, { "epoch": 0.9568671253255788, "grad_norm": 0.7890625, "learning_rate": 8.400198435782909e-05, "loss": 0.9421, "step": 36136 }, { "epoch": 0.9568936049338732, "grad_norm": 0.7734375, "learning_rate": 8.399743992920311e-05, "loss": 0.7462, "step": 36137 }, { "epoch": 0.9569200845421676, "grad_norm": 0.67578125, "learning_rate": 8.399289553449365e-05, "loss": 0.6853, "step": 36138 }, { "epoch": 0.956946564150462, "grad_norm": 0.7265625, "learning_rate": 8.39883511737103e-05, "loss": 0.6992, "step": 36139 }, { "epoch": 0.9569730437587564, "grad_norm": 0.76953125, "learning_rate": 8.398380684686276e-05, "loss": 0.7597, "step": 36140 }, { "epoch": 0.9569995233670507, "grad_norm": 0.77734375, "learning_rate": 8.397926255396062e-05, "loss": 0.7885, "step": 36141 }, { "epoch": 0.9570260029753451, "grad_norm": 0.79296875, "learning_rate": 8.397471829501352e-05, "loss": 0.8028, "step": 36142 }, { "epoch": 0.9570524825836395, "grad_norm": 0.8515625, "learning_rate": 8.397017407003109e-05, "loss": 0.7462, "step": 36143 }, { "epoch": 0.9570789621919338, "grad_norm": 0.8125, "learning_rate": 8.396562987902292e-05, "loss": 0.6564, "step": 36144 }, { "epoch": 0.9571054418002282, "grad_norm": 0.734375, "learning_rate": 8.396108572199872e-05, "loss": 0.7119, "step": 36145 }, { "epoch": 0.9571319214085225, "grad_norm": 0.77734375, "learning_rate": 8.395654159896809e-05, "loss": 0.9428, "step": 36146 }, { "epoch": 0.9571584010168169, "grad_norm": 0.73046875, "learning_rate": 8.395199750994065e-05, "loss": 0.7337, "step": 36147 }, { "epoch": 0.9571848806251113, "grad_norm": 0.77734375, "learning_rate": 8.394745345492603e-05, "loss": 0.7508, "step": 36148 }, { "epoch": 0.9572113602334057, "grad_norm": 1.03125, "learning_rate": 8.394290943393383e-05, "loss": 0.7594, "step": 36149 }, { "epoch": 0.9572378398417001, "grad_norm": 0.765625, "learning_rate": 8.393836544697376e-05, "loss": 0.7893, "step": 36150 }, { "epoch": 0.9572643194499945, "grad_norm": 0.7421875, "learning_rate": 8.39338214940554e-05, "loss": 0.7551, "step": 36151 }, { "epoch": 0.9572907990582888, "grad_norm": 0.74609375, "learning_rate": 8.392927757518835e-05, "loss": 0.7151, "step": 36152 }, { "epoch": 0.9573172786665832, "grad_norm": 0.80078125, "learning_rate": 8.392473369038231e-05, "loss": 0.8276, "step": 36153 }, { "epoch": 0.9573437582748776, "grad_norm": 0.7421875, "learning_rate": 8.392018983964687e-05, "loss": 0.7498, "step": 36154 }, { "epoch": 0.957370237883172, "grad_norm": 0.72265625, "learning_rate": 8.391564602299168e-05, "loss": 0.7766, "step": 36155 }, { "epoch": 0.9573967174914664, "grad_norm": 0.83203125, "learning_rate": 8.391110224042635e-05, "loss": 0.7886, "step": 36156 }, { "epoch": 0.9574231970997608, "grad_norm": 0.82421875, "learning_rate": 8.390655849196053e-05, "loss": 0.7616, "step": 36157 }, { "epoch": 0.9574496767080551, "grad_norm": 0.8203125, "learning_rate": 8.390201477760385e-05, "loss": 0.8149, "step": 36158 }, { "epoch": 0.9574761563163495, "grad_norm": 0.83984375, "learning_rate": 8.389747109736587e-05, "loss": 0.8821, "step": 36159 }, { "epoch": 0.9575026359246438, "grad_norm": 0.81640625, "learning_rate": 8.389292745125633e-05, "loss": 0.7276, "step": 36160 }, { "epoch": 0.9575291155329382, "grad_norm": 0.83203125, "learning_rate": 8.388838383928479e-05, "loss": 0.7911, "step": 36161 }, { "epoch": 0.9575555951412326, "grad_norm": 0.78125, "learning_rate": 8.388384026146092e-05, "loss": 0.6861, "step": 36162 }, { "epoch": 0.957582074749527, "grad_norm": 0.78125, "learning_rate": 8.387929671779432e-05, "loss": 0.8836, "step": 36163 }, { "epoch": 0.9576085543578213, "grad_norm": 1.1875, "learning_rate": 8.38747532082946e-05, "loss": 0.8129, "step": 36164 }, { "epoch": 0.9576350339661157, "grad_norm": 0.7265625, "learning_rate": 8.387020973297144e-05, "loss": 0.7451, "step": 36165 }, { "epoch": 0.9576615135744101, "grad_norm": 0.82421875, "learning_rate": 8.386566629183446e-05, "loss": 0.7779, "step": 36166 }, { "epoch": 0.9576879931827045, "grad_norm": 0.859375, "learning_rate": 8.386112288489329e-05, "loss": 0.8073, "step": 36167 }, { "epoch": 0.9577144727909989, "grad_norm": 0.83203125, "learning_rate": 8.385657951215753e-05, "loss": 0.7857, "step": 36168 }, { "epoch": 0.9577409523992932, "grad_norm": 0.76953125, "learning_rate": 8.385203617363682e-05, "loss": 0.9691, "step": 36169 }, { "epoch": 0.9577674320075876, "grad_norm": 0.765625, "learning_rate": 8.384749286934078e-05, "loss": 0.7476, "step": 36170 }, { "epoch": 0.957793911615882, "grad_norm": 0.7265625, "learning_rate": 8.384294959927908e-05, "loss": 0.7648, "step": 36171 }, { "epoch": 0.9578203912241764, "grad_norm": 0.7734375, "learning_rate": 8.383840636346135e-05, "loss": 0.7908, "step": 36172 }, { "epoch": 0.9578468708324708, "grad_norm": 0.7578125, "learning_rate": 8.383386316189716e-05, "loss": 0.8067, "step": 36173 }, { "epoch": 0.9578733504407652, "grad_norm": 0.78125, "learning_rate": 8.382931999459617e-05, "loss": 0.7315, "step": 36174 }, { "epoch": 0.9578998300490595, "grad_norm": 0.7734375, "learning_rate": 8.382477686156802e-05, "loss": 0.7618, "step": 36175 }, { "epoch": 0.9579263096573538, "grad_norm": 0.82421875, "learning_rate": 8.382023376282234e-05, "loss": 0.7876, "step": 36176 }, { "epoch": 0.9579527892656482, "grad_norm": 0.7734375, "learning_rate": 8.381569069836875e-05, "loss": 0.6955, "step": 36177 }, { "epoch": 0.9579792688739426, "grad_norm": 0.7734375, "learning_rate": 8.381114766821689e-05, "loss": 0.8207, "step": 36178 }, { "epoch": 0.958005748482237, "grad_norm": 0.7578125, "learning_rate": 8.380660467237631e-05, "loss": 0.7825, "step": 36179 }, { "epoch": 0.9580322280905313, "grad_norm": 0.76171875, "learning_rate": 8.380206171085676e-05, "loss": 0.7449, "step": 36180 }, { "epoch": 0.9580587076988257, "grad_norm": 0.78515625, "learning_rate": 8.379751878366782e-05, "loss": 0.7199, "step": 36181 }, { "epoch": 0.9580851873071201, "grad_norm": 0.7734375, "learning_rate": 8.379297589081911e-05, "loss": 0.8224, "step": 36182 }, { "epoch": 0.9581116669154145, "grad_norm": 0.83203125, "learning_rate": 8.378843303232025e-05, "loss": 0.7906, "step": 36183 }, { "epoch": 0.9581381465237089, "grad_norm": 0.7421875, "learning_rate": 8.378389020818086e-05, "loss": 0.7014, "step": 36184 }, { "epoch": 0.9581646261320033, "grad_norm": 0.84765625, "learning_rate": 8.377934741841061e-05, "loss": 0.7476, "step": 36185 }, { "epoch": 0.9581911057402976, "grad_norm": 0.82421875, "learning_rate": 8.377480466301912e-05, "loss": 0.8635, "step": 36186 }, { "epoch": 0.958217585348592, "grad_norm": 0.76171875, "learning_rate": 8.377026194201599e-05, "loss": 0.7664, "step": 36187 }, { "epoch": 0.9582440649568864, "grad_norm": 0.78125, "learning_rate": 8.376571925541088e-05, "loss": 0.7158, "step": 36188 }, { "epoch": 0.9582705445651808, "grad_norm": 0.85546875, "learning_rate": 8.376117660321335e-05, "loss": 0.7861, "step": 36189 }, { "epoch": 0.9582970241734752, "grad_norm": 0.8671875, "learning_rate": 8.375663398543311e-05, "loss": 0.8262, "step": 36190 }, { "epoch": 0.9583235037817696, "grad_norm": 0.8515625, "learning_rate": 8.375209140207976e-05, "loss": 0.825, "step": 36191 }, { "epoch": 0.9583499833900639, "grad_norm": 0.8359375, "learning_rate": 8.374754885316293e-05, "loss": 0.9309, "step": 36192 }, { "epoch": 0.9583764629983582, "grad_norm": 0.8046875, "learning_rate": 8.374300633869223e-05, "loss": 0.7545, "step": 36193 }, { "epoch": 0.9584029426066526, "grad_norm": 0.8046875, "learning_rate": 8.37384638586773e-05, "loss": 0.865, "step": 36194 }, { "epoch": 0.958429422214947, "grad_norm": 0.83984375, "learning_rate": 8.373392141312777e-05, "loss": 0.9215, "step": 36195 }, { "epoch": 0.9584559018232414, "grad_norm": 0.65625, "learning_rate": 8.372937900205326e-05, "loss": 0.6383, "step": 36196 }, { "epoch": 0.9584823814315357, "grad_norm": 0.77734375, "learning_rate": 8.372483662546342e-05, "loss": 0.8055, "step": 36197 }, { "epoch": 0.9585088610398301, "grad_norm": 0.78515625, "learning_rate": 8.372029428336783e-05, "loss": 0.8129, "step": 36198 }, { "epoch": 0.9585353406481245, "grad_norm": 1.234375, "learning_rate": 8.371575197577613e-05, "loss": 0.7719, "step": 36199 }, { "epoch": 0.9585618202564189, "grad_norm": 0.77734375, "learning_rate": 8.3711209702698e-05, "loss": 0.7804, "step": 36200 }, { "epoch": 0.9585882998647133, "grad_norm": 0.8671875, "learning_rate": 8.370666746414301e-05, "loss": 0.7452, "step": 36201 }, { "epoch": 0.9586147794730077, "grad_norm": 0.80859375, "learning_rate": 8.370212526012083e-05, "loss": 0.815, "step": 36202 }, { "epoch": 0.958641259081302, "grad_norm": 0.765625, "learning_rate": 8.369758309064106e-05, "loss": 0.8536, "step": 36203 }, { "epoch": 0.9586677386895964, "grad_norm": 0.78515625, "learning_rate": 8.369304095571328e-05, "loss": 0.7559, "step": 36204 }, { "epoch": 0.9586942182978908, "grad_norm": 0.76171875, "learning_rate": 8.368849885534722e-05, "loss": 0.849, "step": 36205 }, { "epoch": 0.9587206979061852, "grad_norm": 0.90234375, "learning_rate": 8.368395678955246e-05, "loss": 0.8459, "step": 36206 }, { "epoch": 0.9587471775144796, "grad_norm": 0.82421875, "learning_rate": 8.36794147583386e-05, "loss": 0.8031, "step": 36207 }, { "epoch": 0.958773657122774, "grad_norm": 0.8671875, "learning_rate": 8.36748727617153e-05, "loss": 0.8295, "step": 36208 }, { "epoch": 0.9588001367310682, "grad_norm": 0.79296875, "learning_rate": 8.367033079969213e-05, "loss": 0.7297, "step": 36209 }, { "epoch": 0.9588266163393626, "grad_norm": 0.81640625, "learning_rate": 8.36657888722788e-05, "loss": 0.7621, "step": 36210 }, { "epoch": 0.958853095947657, "grad_norm": 0.78125, "learning_rate": 8.36612469794849e-05, "loss": 0.6782, "step": 36211 }, { "epoch": 0.9588795755559514, "grad_norm": 0.6875, "learning_rate": 8.365670512132006e-05, "loss": 0.6636, "step": 36212 }, { "epoch": 0.9589060551642458, "grad_norm": 0.81640625, "learning_rate": 8.365216329779388e-05, "loss": 0.774, "step": 36213 }, { "epoch": 0.9589325347725401, "grad_norm": 0.7734375, "learning_rate": 8.364762150891601e-05, "loss": 0.8896, "step": 36214 }, { "epoch": 0.9589590143808345, "grad_norm": 0.78515625, "learning_rate": 8.364307975469608e-05, "loss": 0.8534, "step": 36215 }, { "epoch": 0.9589854939891289, "grad_norm": 0.75390625, "learning_rate": 8.36385380351437e-05, "loss": 0.8054, "step": 36216 }, { "epoch": 0.9590119735974233, "grad_norm": 0.7578125, "learning_rate": 8.363399635026852e-05, "loss": 0.7695, "step": 36217 }, { "epoch": 0.9590384532057177, "grad_norm": 0.8671875, "learning_rate": 8.362945470008013e-05, "loss": 0.7562, "step": 36218 }, { "epoch": 0.9590649328140121, "grad_norm": 0.78515625, "learning_rate": 8.362491308458815e-05, "loss": 0.7765, "step": 36219 }, { "epoch": 0.9590914124223064, "grad_norm": 0.75390625, "learning_rate": 8.362037150380227e-05, "loss": 0.7359, "step": 36220 }, { "epoch": 0.9591178920306008, "grad_norm": 0.8359375, "learning_rate": 8.361582995773206e-05, "loss": 0.7114, "step": 36221 }, { "epoch": 0.9591443716388952, "grad_norm": 0.7421875, "learning_rate": 8.361128844638717e-05, "loss": 0.7515, "step": 36222 }, { "epoch": 0.9591708512471896, "grad_norm": 1.4921875, "learning_rate": 8.360674696977722e-05, "loss": 0.7417, "step": 36223 }, { "epoch": 0.959197330855484, "grad_norm": 0.734375, "learning_rate": 8.36022055279118e-05, "loss": 0.6886, "step": 36224 }, { "epoch": 0.9592238104637782, "grad_norm": 0.75390625, "learning_rate": 8.35976641208006e-05, "loss": 0.791, "step": 36225 }, { "epoch": 0.9592502900720726, "grad_norm": 2.015625, "learning_rate": 8.359312274845322e-05, "loss": 0.7781, "step": 36226 }, { "epoch": 0.959276769680367, "grad_norm": 0.74609375, "learning_rate": 8.358858141087928e-05, "loss": 0.761, "step": 36227 }, { "epoch": 0.9593032492886614, "grad_norm": 0.96875, "learning_rate": 8.358404010808839e-05, "loss": 0.7138, "step": 36228 }, { "epoch": 0.9593297288969558, "grad_norm": 0.8046875, "learning_rate": 8.357949884009016e-05, "loss": 0.8339, "step": 36229 }, { "epoch": 0.9593562085052502, "grad_norm": 0.984375, "learning_rate": 8.357495760689426e-05, "loss": 0.891, "step": 36230 }, { "epoch": 0.9593826881135445, "grad_norm": 0.765625, "learning_rate": 8.357041640851034e-05, "loss": 0.8016, "step": 36231 }, { "epoch": 0.9594091677218389, "grad_norm": 0.76171875, "learning_rate": 8.356587524494796e-05, "loss": 0.7877, "step": 36232 }, { "epoch": 0.9594356473301333, "grad_norm": 0.80078125, "learning_rate": 8.356133411621676e-05, "loss": 0.8733, "step": 36233 }, { "epoch": 0.9594621269384277, "grad_norm": 0.796875, "learning_rate": 8.355679302232637e-05, "loss": 0.8489, "step": 36234 }, { "epoch": 0.9594886065467221, "grad_norm": 0.71484375, "learning_rate": 8.355225196328642e-05, "loss": 0.7181, "step": 36235 }, { "epoch": 0.9595150861550165, "grad_norm": 0.796875, "learning_rate": 8.354771093910655e-05, "loss": 0.7626, "step": 36236 }, { "epoch": 0.9595415657633108, "grad_norm": 0.73828125, "learning_rate": 8.354316994979634e-05, "loss": 0.733, "step": 36237 }, { "epoch": 0.9595680453716052, "grad_norm": 0.80859375, "learning_rate": 8.353862899536545e-05, "loss": 0.7455, "step": 36238 }, { "epoch": 0.9595945249798996, "grad_norm": 0.7734375, "learning_rate": 8.353408807582348e-05, "loss": 0.8783, "step": 36239 }, { "epoch": 0.959621004588194, "grad_norm": 0.734375, "learning_rate": 8.35295471911801e-05, "loss": 0.7062, "step": 36240 }, { "epoch": 0.9596474841964884, "grad_norm": 0.77734375, "learning_rate": 8.352500634144489e-05, "loss": 0.7576, "step": 36241 }, { "epoch": 0.9596739638047826, "grad_norm": 0.8125, "learning_rate": 8.35204655266275e-05, "loss": 0.8796, "step": 36242 }, { "epoch": 0.959700443413077, "grad_norm": 0.86328125, "learning_rate": 8.351592474673753e-05, "loss": 0.8506, "step": 36243 }, { "epoch": 0.9597269230213714, "grad_norm": 0.75390625, "learning_rate": 8.351138400178457e-05, "loss": 0.8088, "step": 36244 }, { "epoch": 0.9597534026296658, "grad_norm": 0.796875, "learning_rate": 8.350684329177836e-05, "loss": 0.7515, "step": 36245 }, { "epoch": 0.9597798822379602, "grad_norm": 0.7890625, "learning_rate": 8.350230261672842e-05, "loss": 0.8588, "step": 36246 }, { "epoch": 0.9598063618462546, "grad_norm": 0.8203125, "learning_rate": 8.349776197664442e-05, "loss": 0.7461, "step": 36247 }, { "epoch": 0.959832841454549, "grad_norm": 0.7890625, "learning_rate": 8.349322137153597e-05, "loss": 0.838, "step": 36248 }, { "epoch": 0.9598593210628433, "grad_norm": 0.8203125, "learning_rate": 8.348868080141265e-05, "loss": 0.8082, "step": 36249 }, { "epoch": 0.9598858006711377, "grad_norm": 0.78125, "learning_rate": 8.348414026628417e-05, "loss": 0.901, "step": 36250 }, { "epoch": 0.9599122802794321, "grad_norm": 0.80859375, "learning_rate": 8.347959976616011e-05, "loss": 0.8144, "step": 36251 }, { "epoch": 0.9599387598877265, "grad_norm": 0.890625, "learning_rate": 8.347505930105011e-05, "loss": 0.7835, "step": 36252 }, { "epoch": 0.9599652394960209, "grad_norm": 0.796875, "learning_rate": 8.347051887096376e-05, "loss": 0.767, "step": 36253 }, { "epoch": 0.9599917191043152, "grad_norm": 0.71484375, "learning_rate": 8.346597847591068e-05, "loss": 0.8108, "step": 36254 }, { "epoch": 0.9600181987126096, "grad_norm": 0.796875, "learning_rate": 8.346143811590054e-05, "loss": 0.7152, "step": 36255 }, { "epoch": 0.960044678320904, "grad_norm": 0.82421875, "learning_rate": 8.34568977909429e-05, "loss": 0.8033, "step": 36256 }, { "epoch": 0.9600711579291984, "grad_norm": 0.796875, "learning_rate": 8.345235750104746e-05, "loss": 0.7803, "step": 36257 }, { "epoch": 0.9600976375374927, "grad_norm": 0.85546875, "learning_rate": 8.34478172462238e-05, "loss": 0.871, "step": 36258 }, { "epoch": 0.960124117145787, "grad_norm": 0.765625, "learning_rate": 8.344327702648151e-05, "loss": 0.8793, "step": 36259 }, { "epoch": 0.9601505967540814, "grad_norm": 0.82421875, "learning_rate": 8.343873684183028e-05, "loss": 0.7947, "step": 36260 }, { "epoch": 0.9601770763623758, "grad_norm": 0.9296875, "learning_rate": 8.343419669227969e-05, "loss": 0.8956, "step": 36261 }, { "epoch": 0.9602035559706702, "grad_norm": 0.828125, "learning_rate": 8.342965657783938e-05, "loss": 0.8612, "step": 36262 }, { "epoch": 0.9602300355789646, "grad_norm": 0.72265625, "learning_rate": 8.342511649851898e-05, "loss": 0.7626, "step": 36263 }, { "epoch": 0.960256515187259, "grad_norm": 0.7421875, "learning_rate": 8.342057645432805e-05, "loss": 0.7991, "step": 36264 }, { "epoch": 0.9602829947955533, "grad_norm": 0.79296875, "learning_rate": 8.34160364452763e-05, "loss": 0.7363, "step": 36265 }, { "epoch": 0.9603094744038477, "grad_norm": 0.84375, "learning_rate": 8.341149647137331e-05, "loss": 0.9166, "step": 36266 }, { "epoch": 0.9603359540121421, "grad_norm": 0.94921875, "learning_rate": 8.34069565326287e-05, "loss": 0.7342, "step": 36267 }, { "epoch": 0.9603624336204365, "grad_norm": 0.8515625, "learning_rate": 8.34024166290521e-05, "loss": 0.8456, "step": 36268 }, { "epoch": 0.9603889132287309, "grad_norm": 0.75390625, "learning_rate": 8.33978767606531e-05, "loss": 0.8264, "step": 36269 }, { "epoch": 0.9604153928370253, "grad_norm": 0.75390625, "learning_rate": 8.339333692744138e-05, "loss": 0.7838, "step": 36270 }, { "epoch": 0.9604418724453196, "grad_norm": 0.77734375, "learning_rate": 8.338879712942653e-05, "loss": 0.7502, "step": 36271 }, { "epoch": 0.960468352053614, "grad_norm": 0.76171875, "learning_rate": 8.338425736661818e-05, "loss": 0.7282, "step": 36272 }, { "epoch": 0.9604948316619084, "grad_norm": 0.81640625, "learning_rate": 8.337971763902594e-05, "loss": 0.8566, "step": 36273 }, { "epoch": 0.9605213112702027, "grad_norm": 0.71875, "learning_rate": 8.337517794665945e-05, "loss": 0.7133, "step": 36274 }, { "epoch": 0.9605477908784971, "grad_norm": 0.73828125, "learning_rate": 8.337063828952828e-05, "loss": 0.7049, "step": 36275 }, { "epoch": 0.9605742704867914, "grad_norm": 0.75, "learning_rate": 8.336609866764212e-05, "loss": 0.7846, "step": 36276 }, { "epoch": 0.9606007500950858, "grad_norm": 0.7421875, "learning_rate": 8.336155908101058e-05, "loss": 0.8151, "step": 36277 }, { "epoch": 0.9606272297033802, "grad_norm": 0.796875, "learning_rate": 8.335701952964325e-05, "loss": 0.7159, "step": 36278 }, { "epoch": 0.9606537093116746, "grad_norm": 0.80078125, "learning_rate": 8.335248001354978e-05, "loss": 0.7528, "step": 36279 }, { "epoch": 0.960680188919969, "grad_norm": 0.7734375, "learning_rate": 8.334794053273974e-05, "loss": 0.8619, "step": 36280 }, { "epoch": 0.9607066685282634, "grad_norm": 0.671875, "learning_rate": 8.334340108722282e-05, "loss": 0.7076, "step": 36281 }, { "epoch": 0.9607331481365577, "grad_norm": 0.84375, "learning_rate": 8.33388616770086e-05, "loss": 0.856, "step": 36282 }, { "epoch": 0.9607596277448521, "grad_norm": 0.77734375, "learning_rate": 8.333432230210673e-05, "loss": 0.7692, "step": 36283 }, { "epoch": 0.9607861073531465, "grad_norm": 0.81640625, "learning_rate": 8.332978296252679e-05, "loss": 0.7277, "step": 36284 }, { "epoch": 0.9608125869614409, "grad_norm": 0.7578125, "learning_rate": 8.332524365827839e-05, "loss": 0.7717, "step": 36285 }, { "epoch": 0.9608390665697353, "grad_norm": 0.77734375, "learning_rate": 8.332070438937123e-05, "loss": 0.8136, "step": 36286 }, { "epoch": 0.9608655461780297, "grad_norm": 1.34375, "learning_rate": 8.331616515581487e-05, "loss": 0.8001, "step": 36287 }, { "epoch": 0.960892025786324, "grad_norm": 0.84375, "learning_rate": 8.331162595761895e-05, "loss": 0.7621, "step": 36288 }, { "epoch": 0.9609185053946184, "grad_norm": 1.65625, "learning_rate": 8.33070867947931e-05, "loss": 0.7515, "step": 36289 }, { "epoch": 0.9609449850029128, "grad_norm": 0.859375, "learning_rate": 8.330254766734685e-05, "loss": 0.8069, "step": 36290 }, { "epoch": 0.9609714646112071, "grad_norm": 0.80859375, "learning_rate": 8.329800857528996e-05, "loss": 0.7681, "step": 36291 }, { "epoch": 0.9609979442195015, "grad_norm": 0.84375, "learning_rate": 8.329346951863199e-05, "loss": 0.7838, "step": 36292 }, { "epoch": 0.9610244238277958, "grad_norm": 0.80859375, "learning_rate": 8.328893049738255e-05, "loss": 0.7918, "step": 36293 }, { "epoch": 0.9610509034360902, "grad_norm": 0.75390625, "learning_rate": 8.328439151155126e-05, "loss": 0.6833, "step": 36294 }, { "epoch": 0.9610773830443846, "grad_norm": 0.80078125, "learning_rate": 8.327985256114772e-05, "loss": 0.6994, "step": 36295 }, { "epoch": 0.961103862652679, "grad_norm": 0.81640625, "learning_rate": 8.327531364618161e-05, "loss": 0.8997, "step": 36296 }, { "epoch": 0.9611303422609734, "grad_norm": 0.7109375, "learning_rate": 8.327077476666252e-05, "loss": 0.8317, "step": 36297 }, { "epoch": 0.9611568218692678, "grad_norm": 0.73828125, "learning_rate": 8.326623592260006e-05, "loss": 0.7413, "step": 36298 }, { "epoch": 0.9611833014775621, "grad_norm": 0.81640625, "learning_rate": 8.326169711400385e-05, "loss": 0.7669, "step": 36299 }, { "epoch": 0.9612097810858565, "grad_norm": 0.88671875, "learning_rate": 8.325715834088352e-05, "loss": 0.7881, "step": 36300 }, { "epoch": 0.9612362606941509, "grad_norm": 0.765625, "learning_rate": 8.325261960324868e-05, "loss": 0.7974, "step": 36301 }, { "epoch": 0.9612627403024453, "grad_norm": 0.7890625, "learning_rate": 8.324808090110897e-05, "loss": 0.7262, "step": 36302 }, { "epoch": 0.9612892199107397, "grad_norm": 0.74609375, "learning_rate": 8.3243542234474e-05, "loss": 0.7745, "step": 36303 }, { "epoch": 0.9613156995190341, "grad_norm": 0.7578125, "learning_rate": 8.323900360335335e-05, "loss": 0.7209, "step": 36304 }, { "epoch": 0.9613421791273284, "grad_norm": 0.7109375, "learning_rate": 8.323446500775668e-05, "loss": 0.7166, "step": 36305 }, { "epoch": 0.9613686587356228, "grad_norm": 0.828125, "learning_rate": 8.322992644769362e-05, "loss": 0.7897, "step": 36306 }, { "epoch": 0.9613951383439171, "grad_norm": 0.73828125, "learning_rate": 8.322538792317378e-05, "loss": 0.7003, "step": 36307 }, { "epoch": 0.9614216179522115, "grad_norm": 0.78515625, "learning_rate": 8.322084943420678e-05, "loss": 0.9194, "step": 36308 }, { "epoch": 0.9614480975605059, "grad_norm": 0.7578125, "learning_rate": 8.321631098080221e-05, "loss": 0.708, "step": 36309 }, { "epoch": 0.9614745771688002, "grad_norm": 0.78125, "learning_rate": 8.321177256296968e-05, "loss": 0.743, "step": 36310 }, { "epoch": 0.9615010567770946, "grad_norm": 0.80078125, "learning_rate": 8.320723418071888e-05, "loss": 0.8079, "step": 36311 }, { "epoch": 0.961527536385389, "grad_norm": 0.80078125, "learning_rate": 8.320269583405938e-05, "loss": 0.7195, "step": 36312 }, { "epoch": 0.9615540159936834, "grad_norm": 0.78515625, "learning_rate": 8.319815752300081e-05, "loss": 0.8221, "step": 36313 }, { "epoch": 0.9615804956019778, "grad_norm": 0.7890625, "learning_rate": 8.319361924755277e-05, "loss": 0.8067, "step": 36314 }, { "epoch": 0.9616069752102722, "grad_norm": 0.80078125, "learning_rate": 8.318908100772488e-05, "loss": 0.7652, "step": 36315 }, { "epoch": 0.9616334548185665, "grad_norm": 0.7265625, "learning_rate": 8.31845428035268e-05, "loss": 0.8488, "step": 36316 }, { "epoch": 0.9616599344268609, "grad_norm": 0.75390625, "learning_rate": 8.318000463496812e-05, "loss": 0.7855, "step": 36317 }, { "epoch": 0.9616864140351553, "grad_norm": 0.8515625, "learning_rate": 8.317546650205846e-05, "loss": 0.7801, "step": 36318 }, { "epoch": 0.9617128936434497, "grad_norm": 0.83984375, "learning_rate": 8.317092840480744e-05, "loss": 0.8452, "step": 36319 }, { "epoch": 0.9617393732517441, "grad_norm": 0.79296875, "learning_rate": 8.316639034322464e-05, "loss": 0.7654, "step": 36320 }, { "epoch": 0.9617658528600385, "grad_norm": 0.75390625, "learning_rate": 8.316185231731976e-05, "loss": 0.8033, "step": 36321 }, { "epoch": 0.9617923324683328, "grad_norm": 0.73828125, "learning_rate": 8.315731432710235e-05, "loss": 0.7621, "step": 36322 }, { "epoch": 0.9618188120766271, "grad_norm": 0.69921875, "learning_rate": 8.315277637258205e-05, "loss": 0.7536, "step": 36323 }, { "epoch": 0.9618452916849215, "grad_norm": 0.8515625, "learning_rate": 8.314823845376847e-05, "loss": 0.788, "step": 36324 }, { "epoch": 0.9618717712932159, "grad_norm": 0.859375, "learning_rate": 8.314370057067123e-05, "loss": 0.9108, "step": 36325 }, { "epoch": 0.9618982509015103, "grad_norm": 0.6875, "learning_rate": 8.313916272329998e-05, "loss": 0.6818, "step": 36326 }, { "epoch": 0.9619247305098046, "grad_norm": 0.7890625, "learning_rate": 8.31346249116643e-05, "loss": 0.8229, "step": 36327 }, { "epoch": 0.961951210118099, "grad_norm": 0.796875, "learning_rate": 8.313008713577381e-05, "loss": 0.7554, "step": 36328 }, { "epoch": 0.9619776897263934, "grad_norm": 1.078125, "learning_rate": 8.312554939563816e-05, "loss": 0.7785, "step": 36329 }, { "epoch": 0.9620041693346878, "grad_norm": 0.78515625, "learning_rate": 8.31210116912669e-05, "loss": 0.826, "step": 36330 }, { "epoch": 0.9620306489429822, "grad_norm": 0.90234375, "learning_rate": 8.31164740226697e-05, "loss": 0.7719, "step": 36331 }, { "epoch": 0.9620571285512766, "grad_norm": 0.75, "learning_rate": 8.311193638985621e-05, "loss": 0.7729, "step": 36332 }, { "epoch": 0.962083608159571, "grad_norm": 0.78515625, "learning_rate": 8.310739879283598e-05, "loss": 0.7323, "step": 36333 }, { "epoch": 0.9621100877678653, "grad_norm": 0.85546875, "learning_rate": 8.310286123161866e-05, "loss": 0.8459, "step": 36334 }, { "epoch": 0.9621365673761597, "grad_norm": 0.75390625, "learning_rate": 8.309832370621382e-05, "loss": 0.7627, "step": 36335 }, { "epoch": 0.9621630469844541, "grad_norm": 0.7734375, "learning_rate": 8.309378621663116e-05, "loss": 0.8284, "step": 36336 }, { "epoch": 0.9621895265927485, "grad_norm": 0.84765625, "learning_rate": 8.308924876288024e-05, "loss": 0.6882, "step": 36337 }, { "epoch": 0.9622160062010429, "grad_norm": 0.80859375, "learning_rate": 8.30847113449707e-05, "loss": 0.8387, "step": 36338 }, { "epoch": 0.9622424858093372, "grad_norm": 0.9375, "learning_rate": 8.308017396291216e-05, "loss": 0.9008, "step": 36339 }, { "epoch": 0.9622689654176315, "grad_norm": 0.7578125, "learning_rate": 8.307563661671418e-05, "loss": 0.777, "step": 36340 }, { "epoch": 0.9622954450259259, "grad_norm": 0.75390625, "learning_rate": 8.307109930638643e-05, "loss": 0.7778, "step": 36341 }, { "epoch": 0.9623219246342203, "grad_norm": 0.734375, "learning_rate": 8.306656203193855e-05, "loss": 0.7277, "step": 36342 }, { "epoch": 0.9623484042425147, "grad_norm": 0.734375, "learning_rate": 8.306202479338012e-05, "loss": 0.7982, "step": 36343 }, { "epoch": 0.962374883850809, "grad_norm": 0.796875, "learning_rate": 8.305748759072074e-05, "loss": 0.7501, "step": 36344 }, { "epoch": 0.9624013634591034, "grad_norm": 0.796875, "learning_rate": 8.305295042397004e-05, "loss": 0.871, "step": 36345 }, { "epoch": 0.9624278430673978, "grad_norm": 0.77734375, "learning_rate": 8.304841329313768e-05, "loss": 0.682, "step": 36346 }, { "epoch": 0.9624543226756922, "grad_norm": 0.7578125, "learning_rate": 8.304387619823322e-05, "loss": 0.6906, "step": 36347 }, { "epoch": 0.9624808022839866, "grad_norm": 0.828125, "learning_rate": 8.303933913926629e-05, "loss": 0.8272, "step": 36348 }, { "epoch": 0.962507281892281, "grad_norm": 0.76953125, "learning_rate": 8.303480211624653e-05, "loss": 0.7639, "step": 36349 }, { "epoch": 0.9625337615005753, "grad_norm": 0.74609375, "learning_rate": 8.303026512918348e-05, "loss": 0.6638, "step": 36350 }, { "epoch": 0.9625602411088697, "grad_norm": 0.6640625, "learning_rate": 8.302572817808686e-05, "loss": 0.6484, "step": 36351 }, { "epoch": 0.9625867207171641, "grad_norm": 0.83203125, "learning_rate": 8.302119126296623e-05, "loss": 0.8172, "step": 36352 }, { "epoch": 0.9626132003254585, "grad_norm": 0.80859375, "learning_rate": 8.301665438383124e-05, "loss": 0.783, "step": 36353 }, { "epoch": 0.9626396799337529, "grad_norm": 0.78515625, "learning_rate": 8.301211754069146e-05, "loss": 0.9315, "step": 36354 }, { "epoch": 0.9626661595420473, "grad_norm": 0.7578125, "learning_rate": 8.300758073355647e-05, "loss": 0.7991, "step": 36355 }, { "epoch": 0.9626926391503415, "grad_norm": 0.7421875, "learning_rate": 8.3003043962436e-05, "loss": 0.8024, "step": 36356 }, { "epoch": 0.9627191187586359, "grad_norm": 0.71484375, "learning_rate": 8.299850722733963e-05, "loss": 0.7618, "step": 36357 }, { "epoch": 0.9627455983669303, "grad_norm": 0.796875, "learning_rate": 8.299397052827692e-05, "loss": 0.7872, "step": 36358 }, { "epoch": 0.9627720779752247, "grad_norm": 0.8203125, "learning_rate": 8.298943386525752e-05, "loss": 0.7232, "step": 36359 }, { "epoch": 0.9627985575835191, "grad_norm": 0.734375, "learning_rate": 8.298489723829102e-05, "loss": 0.7957, "step": 36360 }, { "epoch": 0.9628250371918134, "grad_norm": 0.86328125, "learning_rate": 8.298036064738708e-05, "loss": 0.8265, "step": 36361 }, { "epoch": 0.9628515168001078, "grad_norm": 0.73828125, "learning_rate": 8.297582409255532e-05, "loss": 0.8212, "step": 36362 }, { "epoch": 0.9628779964084022, "grad_norm": 0.79296875, "learning_rate": 8.29712875738053e-05, "loss": 0.922, "step": 36363 }, { "epoch": 0.9629044760166966, "grad_norm": 0.87890625, "learning_rate": 8.296675109114667e-05, "loss": 0.6936, "step": 36364 }, { "epoch": 0.962930955624991, "grad_norm": 0.78515625, "learning_rate": 8.296221464458902e-05, "loss": 0.7148, "step": 36365 }, { "epoch": 0.9629574352332854, "grad_norm": 0.81640625, "learning_rate": 8.2957678234142e-05, "loss": 0.7404, "step": 36366 }, { "epoch": 0.9629839148415797, "grad_norm": 0.91015625, "learning_rate": 8.295314185981521e-05, "loss": 0.7318, "step": 36367 }, { "epoch": 0.9630103944498741, "grad_norm": 0.8203125, "learning_rate": 8.294860552161826e-05, "loss": 0.773, "step": 36368 }, { "epoch": 0.9630368740581685, "grad_norm": 0.765625, "learning_rate": 8.294406921956077e-05, "loss": 0.7079, "step": 36369 }, { "epoch": 0.9630633536664629, "grad_norm": 0.7421875, "learning_rate": 8.293953295365229e-05, "loss": 0.7407, "step": 36370 }, { "epoch": 0.9630898332747573, "grad_norm": 0.74609375, "learning_rate": 8.293499672390255e-05, "loss": 0.7221, "step": 36371 }, { "epoch": 0.9631163128830517, "grad_norm": 0.83984375, "learning_rate": 8.293046053032112e-05, "loss": 0.7026, "step": 36372 }, { "epoch": 0.9631427924913459, "grad_norm": 0.88671875, "learning_rate": 8.29259243729176e-05, "loss": 0.7911, "step": 36373 }, { "epoch": 0.9631692720996403, "grad_norm": 0.8359375, "learning_rate": 8.29213882517016e-05, "loss": 0.7668, "step": 36374 }, { "epoch": 0.9631957517079347, "grad_norm": 0.79296875, "learning_rate": 8.29168521666827e-05, "loss": 0.836, "step": 36375 }, { "epoch": 0.9632222313162291, "grad_norm": 0.7890625, "learning_rate": 8.291231611787059e-05, "loss": 0.8296, "step": 36376 }, { "epoch": 0.9632487109245235, "grad_norm": 0.8203125, "learning_rate": 8.290778010527486e-05, "loss": 0.7653, "step": 36377 }, { "epoch": 0.9632751905328178, "grad_norm": 0.82421875, "learning_rate": 8.290324412890511e-05, "loss": 0.8094, "step": 36378 }, { "epoch": 0.9633016701411122, "grad_norm": 0.83984375, "learning_rate": 8.289870818877096e-05, "loss": 0.8988, "step": 36379 }, { "epoch": 0.9633281497494066, "grad_norm": 0.78515625, "learning_rate": 8.289417228488197e-05, "loss": 0.7571, "step": 36380 }, { "epoch": 0.963354629357701, "grad_norm": 0.83984375, "learning_rate": 8.288963641724785e-05, "loss": 0.8646, "step": 36381 }, { "epoch": 0.9633811089659954, "grad_norm": 0.78125, "learning_rate": 8.288510058587817e-05, "loss": 0.7071, "step": 36382 }, { "epoch": 0.9634075885742898, "grad_norm": 0.75, "learning_rate": 8.288056479078255e-05, "loss": 0.7499, "step": 36383 }, { "epoch": 0.9634340681825841, "grad_norm": 0.765625, "learning_rate": 8.287602903197058e-05, "loss": 0.7414, "step": 36384 }, { "epoch": 0.9634605477908785, "grad_norm": 0.80859375, "learning_rate": 8.287149330945187e-05, "loss": 0.7582, "step": 36385 }, { "epoch": 0.9634870273991729, "grad_norm": 0.83984375, "learning_rate": 8.286695762323607e-05, "loss": 0.8904, "step": 36386 }, { "epoch": 0.9635135070074673, "grad_norm": 0.78515625, "learning_rate": 8.286242197333279e-05, "loss": 0.7184, "step": 36387 }, { "epoch": 0.9635399866157617, "grad_norm": 0.8671875, "learning_rate": 8.28578863597516e-05, "loss": 0.7333, "step": 36388 }, { "epoch": 0.963566466224056, "grad_norm": 0.8125, "learning_rate": 8.285335078250215e-05, "loss": 0.8821, "step": 36389 }, { "epoch": 0.9635929458323503, "grad_norm": 0.70703125, "learning_rate": 8.284881524159402e-05, "loss": 0.6864, "step": 36390 }, { "epoch": 0.9636194254406447, "grad_norm": 0.83984375, "learning_rate": 8.284427973703686e-05, "loss": 0.7939, "step": 36391 }, { "epoch": 0.9636459050489391, "grad_norm": 0.81640625, "learning_rate": 8.28397442688403e-05, "loss": 0.7158, "step": 36392 }, { "epoch": 0.9636723846572335, "grad_norm": 0.8359375, "learning_rate": 8.283520883701389e-05, "loss": 0.9584, "step": 36393 }, { "epoch": 0.9636988642655279, "grad_norm": 0.80859375, "learning_rate": 8.283067344156729e-05, "loss": 0.8499, "step": 36394 }, { "epoch": 0.9637253438738222, "grad_norm": 0.83984375, "learning_rate": 8.282613808251005e-05, "loss": 0.7062, "step": 36395 }, { "epoch": 0.9637518234821166, "grad_norm": 0.8125, "learning_rate": 8.282160275985186e-05, "loss": 0.7077, "step": 36396 }, { "epoch": 0.963778303090411, "grad_norm": 0.796875, "learning_rate": 8.281706747360231e-05, "loss": 0.8904, "step": 36397 }, { "epoch": 0.9638047826987054, "grad_norm": 0.82421875, "learning_rate": 8.281253222377103e-05, "loss": 0.8667, "step": 36398 }, { "epoch": 0.9638312623069998, "grad_norm": 0.76171875, "learning_rate": 8.280799701036757e-05, "loss": 0.8107, "step": 36399 }, { "epoch": 0.9638577419152942, "grad_norm": 0.7890625, "learning_rate": 8.280346183340156e-05, "loss": 0.7844, "step": 36400 }, { "epoch": 0.9638842215235885, "grad_norm": 0.7421875, "learning_rate": 8.279892669288266e-05, "loss": 0.848, "step": 36401 }, { "epoch": 0.9639107011318829, "grad_norm": 0.78515625, "learning_rate": 8.279439158882043e-05, "loss": 0.7481, "step": 36402 }, { "epoch": 0.9639371807401773, "grad_norm": 0.78125, "learning_rate": 8.278985652122453e-05, "loss": 0.7611, "step": 36403 }, { "epoch": 0.9639636603484717, "grad_norm": 0.796875, "learning_rate": 8.278532149010454e-05, "loss": 0.8931, "step": 36404 }, { "epoch": 0.963990139956766, "grad_norm": 0.83984375, "learning_rate": 8.278078649547006e-05, "loss": 0.7655, "step": 36405 }, { "epoch": 0.9640166195650604, "grad_norm": 0.8203125, "learning_rate": 8.277625153733073e-05, "loss": 0.8177, "step": 36406 }, { "epoch": 0.9640430991733547, "grad_norm": 0.77734375, "learning_rate": 8.277171661569616e-05, "loss": 0.7126, "step": 36407 }, { "epoch": 0.9640695787816491, "grad_norm": 0.8359375, "learning_rate": 8.276718173057593e-05, "loss": 0.753, "step": 36408 }, { "epoch": 0.9640960583899435, "grad_norm": 0.71484375, "learning_rate": 8.276264688197968e-05, "loss": 0.744, "step": 36409 }, { "epoch": 0.9641225379982379, "grad_norm": 0.79296875, "learning_rate": 8.275811206991701e-05, "loss": 0.7847, "step": 36410 }, { "epoch": 0.9641490176065323, "grad_norm": 0.76953125, "learning_rate": 8.275357729439756e-05, "loss": 0.6929, "step": 36411 }, { "epoch": 0.9641754972148266, "grad_norm": 0.80859375, "learning_rate": 8.27490425554309e-05, "loss": 0.688, "step": 36412 }, { "epoch": 0.964201976823121, "grad_norm": 0.671875, "learning_rate": 8.274450785302667e-05, "loss": 0.6489, "step": 36413 }, { "epoch": 0.9642284564314154, "grad_norm": 0.765625, "learning_rate": 8.273997318719447e-05, "loss": 0.6705, "step": 36414 }, { "epoch": 0.9642549360397098, "grad_norm": 0.80859375, "learning_rate": 8.273543855794391e-05, "loss": 0.828, "step": 36415 }, { "epoch": 0.9642814156480042, "grad_norm": 0.84375, "learning_rate": 8.273090396528456e-05, "loss": 0.9863, "step": 36416 }, { "epoch": 0.9643078952562986, "grad_norm": 0.83984375, "learning_rate": 8.27263694092261e-05, "loss": 0.8783, "step": 36417 }, { "epoch": 0.964334374864593, "grad_norm": 1.015625, "learning_rate": 8.272183488977813e-05, "loss": 0.7684, "step": 36418 }, { "epoch": 0.9643608544728873, "grad_norm": 0.77734375, "learning_rate": 8.271730040695023e-05, "loss": 0.8293, "step": 36419 }, { "epoch": 0.9643873340811817, "grad_norm": 0.77734375, "learning_rate": 8.271276596075204e-05, "loss": 0.8381, "step": 36420 }, { "epoch": 0.9644138136894761, "grad_norm": 0.765625, "learning_rate": 8.270823155119309e-05, "loss": 0.7897, "step": 36421 }, { "epoch": 0.9644402932977704, "grad_norm": 0.796875, "learning_rate": 8.270369717828312e-05, "loss": 0.8848, "step": 36422 }, { "epoch": 0.9644667729060648, "grad_norm": 0.73046875, "learning_rate": 8.269916284203166e-05, "loss": 0.7098, "step": 36423 }, { "epoch": 0.9644932525143591, "grad_norm": 0.71875, "learning_rate": 8.269462854244833e-05, "loss": 0.7173, "step": 36424 }, { "epoch": 0.9645197321226535, "grad_norm": 0.7734375, "learning_rate": 8.269009427954277e-05, "loss": 0.7546, "step": 36425 }, { "epoch": 0.9645462117309479, "grad_norm": 0.8359375, "learning_rate": 8.268556005332453e-05, "loss": 0.7504, "step": 36426 }, { "epoch": 0.9645726913392423, "grad_norm": 0.69921875, "learning_rate": 8.268102586380325e-05, "loss": 0.7552, "step": 36427 }, { "epoch": 0.9645991709475367, "grad_norm": 0.76171875, "learning_rate": 8.267649171098858e-05, "loss": 0.7529, "step": 36428 }, { "epoch": 0.964625650555831, "grad_norm": 0.79296875, "learning_rate": 8.267195759489008e-05, "loss": 0.7549, "step": 36429 }, { "epoch": 0.9646521301641254, "grad_norm": 0.75, "learning_rate": 8.26674235155174e-05, "loss": 0.7234, "step": 36430 }, { "epoch": 0.9646786097724198, "grad_norm": 0.72265625, "learning_rate": 8.266288947288008e-05, "loss": 0.6127, "step": 36431 }, { "epoch": 0.9647050893807142, "grad_norm": 0.734375, "learning_rate": 8.26583554669878e-05, "loss": 0.7668, "step": 36432 }, { "epoch": 0.9647315689890086, "grad_norm": 0.81640625, "learning_rate": 8.265382149785016e-05, "loss": 0.8523, "step": 36433 }, { "epoch": 0.964758048597303, "grad_norm": 0.71484375, "learning_rate": 8.264928756547674e-05, "loss": 0.6607, "step": 36434 }, { "epoch": 0.9647845282055973, "grad_norm": 0.73828125, "learning_rate": 8.264475366987716e-05, "loss": 0.7668, "step": 36435 }, { "epoch": 0.9648110078138917, "grad_norm": 0.7734375, "learning_rate": 8.2640219811061e-05, "loss": 0.8703, "step": 36436 }, { "epoch": 0.9648374874221861, "grad_norm": 0.80859375, "learning_rate": 8.263568598903795e-05, "loss": 0.797, "step": 36437 }, { "epoch": 0.9648639670304804, "grad_norm": 1.59375, "learning_rate": 8.263115220381756e-05, "loss": 0.755, "step": 36438 }, { "epoch": 0.9648904466387748, "grad_norm": 0.8046875, "learning_rate": 8.262661845540945e-05, "loss": 0.764, "step": 36439 }, { "epoch": 0.9649169262470692, "grad_norm": 0.7578125, "learning_rate": 8.262208474382323e-05, "loss": 0.8264, "step": 36440 }, { "epoch": 0.9649434058553635, "grad_norm": 0.7734375, "learning_rate": 8.261755106906847e-05, "loss": 0.717, "step": 36441 }, { "epoch": 0.9649698854636579, "grad_norm": 0.79296875, "learning_rate": 8.261301743115484e-05, "loss": 0.8903, "step": 36442 }, { "epoch": 0.9649963650719523, "grad_norm": 0.80078125, "learning_rate": 8.260848383009195e-05, "loss": 0.8349, "step": 36443 }, { "epoch": 0.9650228446802467, "grad_norm": 0.81640625, "learning_rate": 8.260395026588938e-05, "loss": 0.8391, "step": 36444 }, { "epoch": 0.9650493242885411, "grad_norm": 0.86328125, "learning_rate": 8.259941673855674e-05, "loss": 0.7912, "step": 36445 }, { "epoch": 0.9650758038968354, "grad_norm": 0.71484375, "learning_rate": 8.259488324810359e-05, "loss": 0.6654, "step": 36446 }, { "epoch": 0.9651022835051298, "grad_norm": 0.9453125, "learning_rate": 8.259034979453964e-05, "loss": 0.7916, "step": 36447 }, { "epoch": 0.9651287631134242, "grad_norm": 0.70703125, "learning_rate": 8.258581637787446e-05, "loss": 0.8259, "step": 36448 }, { "epoch": 0.9651552427217186, "grad_norm": 0.90234375, "learning_rate": 8.258128299811764e-05, "loss": 0.9429, "step": 36449 }, { "epoch": 0.965181722330013, "grad_norm": 0.7890625, "learning_rate": 8.257674965527879e-05, "loss": 0.7729, "step": 36450 }, { "epoch": 0.9652082019383074, "grad_norm": 0.7578125, "learning_rate": 8.25722163493675e-05, "loss": 0.7102, "step": 36451 }, { "epoch": 0.9652346815466017, "grad_norm": 0.82421875, "learning_rate": 8.256768308039342e-05, "loss": 0.729, "step": 36452 }, { "epoch": 0.9652611611548961, "grad_norm": 0.76171875, "learning_rate": 8.256314984836615e-05, "loss": 0.7842, "step": 36453 }, { "epoch": 0.9652876407631904, "grad_norm": 0.78125, "learning_rate": 8.255861665329529e-05, "loss": 0.7884, "step": 36454 }, { "epoch": 0.9653141203714848, "grad_norm": 0.80859375, "learning_rate": 8.255408349519044e-05, "loss": 0.8011, "step": 36455 }, { "epoch": 0.9653405999797792, "grad_norm": 0.7578125, "learning_rate": 8.254955037406116e-05, "loss": 0.8093, "step": 36456 }, { "epoch": 0.9653670795880736, "grad_norm": 0.74609375, "learning_rate": 8.254501728991715e-05, "loss": 0.9383, "step": 36457 }, { "epoch": 0.9653935591963679, "grad_norm": 0.78515625, "learning_rate": 8.2540484242768e-05, "loss": 0.7538, "step": 36458 }, { "epoch": 0.9654200388046623, "grad_norm": 0.7734375, "learning_rate": 8.253595123262329e-05, "loss": 0.7739, "step": 36459 }, { "epoch": 0.9654465184129567, "grad_norm": 0.7578125, "learning_rate": 8.253141825949262e-05, "loss": 0.7653, "step": 36460 }, { "epoch": 0.9654729980212511, "grad_norm": 0.70703125, "learning_rate": 8.252688532338558e-05, "loss": 0.7385, "step": 36461 }, { "epoch": 0.9654994776295455, "grad_norm": 0.79296875, "learning_rate": 8.252235242431185e-05, "loss": 0.8061, "step": 36462 }, { "epoch": 0.9655259572378398, "grad_norm": 0.7734375, "learning_rate": 8.251781956228099e-05, "loss": 0.7575, "step": 36463 }, { "epoch": 0.9655524368461342, "grad_norm": 0.76171875, "learning_rate": 8.251328673730262e-05, "loss": 0.7981, "step": 36464 }, { "epoch": 0.9655789164544286, "grad_norm": 0.75390625, "learning_rate": 8.250875394938632e-05, "loss": 0.8717, "step": 36465 }, { "epoch": 0.965605396062723, "grad_norm": 0.75, "learning_rate": 8.25042211985417e-05, "loss": 0.7718, "step": 36466 }, { "epoch": 0.9656318756710174, "grad_norm": 0.72265625, "learning_rate": 8.249968848477841e-05, "loss": 0.6699, "step": 36467 }, { "epoch": 0.9656583552793118, "grad_norm": 0.74609375, "learning_rate": 8.249515580810603e-05, "loss": 0.8308, "step": 36468 }, { "epoch": 0.9656848348876061, "grad_norm": 0.75390625, "learning_rate": 8.249062316853418e-05, "loss": 0.7069, "step": 36469 }, { "epoch": 0.9657113144959005, "grad_norm": 0.68359375, "learning_rate": 8.248609056607243e-05, "loss": 0.779, "step": 36470 }, { "epoch": 0.9657377941041948, "grad_norm": 0.80078125, "learning_rate": 8.24815580007304e-05, "loss": 0.8123, "step": 36471 }, { "epoch": 0.9657642737124892, "grad_norm": 0.84765625, "learning_rate": 8.247702547251773e-05, "loss": 0.7997, "step": 36472 }, { "epoch": 0.9657907533207836, "grad_norm": 0.78125, "learning_rate": 8.247249298144402e-05, "loss": 0.6871, "step": 36473 }, { "epoch": 0.965817232929078, "grad_norm": 0.84375, "learning_rate": 8.246796052751883e-05, "loss": 0.84, "step": 36474 }, { "epoch": 0.9658437125373723, "grad_norm": 0.7265625, "learning_rate": 8.246342811075178e-05, "loss": 0.7352, "step": 36475 }, { "epoch": 0.9658701921456667, "grad_norm": 0.83984375, "learning_rate": 8.24588957311525e-05, "loss": 0.8702, "step": 36476 }, { "epoch": 0.9658966717539611, "grad_norm": 0.83984375, "learning_rate": 8.245436338873061e-05, "loss": 0.8025, "step": 36477 }, { "epoch": 0.9659231513622555, "grad_norm": 0.83984375, "learning_rate": 8.244983108349569e-05, "loss": 0.7442, "step": 36478 }, { "epoch": 0.9659496309705499, "grad_norm": 0.75390625, "learning_rate": 8.244529881545735e-05, "loss": 0.8013, "step": 36479 }, { "epoch": 0.9659761105788442, "grad_norm": 0.7421875, "learning_rate": 8.244076658462521e-05, "loss": 0.675, "step": 36480 }, { "epoch": 0.9660025901871386, "grad_norm": 0.78515625, "learning_rate": 8.24362343910088e-05, "loss": 0.7834, "step": 36481 }, { "epoch": 0.966029069795433, "grad_norm": 0.7890625, "learning_rate": 8.243170223461782e-05, "loss": 0.8797, "step": 36482 }, { "epoch": 0.9660555494037274, "grad_norm": 0.73046875, "learning_rate": 8.242717011546187e-05, "loss": 0.7363, "step": 36483 }, { "epoch": 0.9660820290120218, "grad_norm": 0.77734375, "learning_rate": 8.242263803355052e-05, "loss": 0.7521, "step": 36484 }, { "epoch": 0.9661085086203162, "grad_norm": 0.8125, "learning_rate": 8.241810598889338e-05, "loss": 0.8308, "step": 36485 }, { "epoch": 0.9661349882286105, "grad_norm": 0.75, "learning_rate": 8.241357398150001e-05, "loss": 0.7418, "step": 36486 }, { "epoch": 0.9661614678369048, "grad_norm": 0.76171875, "learning_rate": 8.240904201138012e-05, "loss": 0.6975, "step": 36487 }, { "epoch": 0.9661879474451992, "grad_norm": 0.80859375, "learning_rate": 8.240451007854327e-05, "loss": 0.9387, "step": 36488 }, { "epoch": 0.9662144270534936, "grad_norm": 0.85546875, "learning_rate": 8.239997818299903e-05, "loss": 0.9544, "step": 36489 }, { "epoch": 0.966240906661788, "grad_norm": 0.8359375, "learning_rate": 8.239544632475705e-05, "loss": 0.7631, "step": 36490 }, { "epoch": 0.9662673862700824, "grad_norm": 0.796875, "learning_rate": 8.239091450382688e-05, "loss": 0.8072, "step": 36491 }, { "epoch": 0.9662938658783767, "grad_norm": 0.7265625, "learning_rate": 8.238638272021818e-05, "loss": 0.762, "step": 36492 }, { "epoch": 0.9663203454866711, "grad_norm": 0.80078125, "learning_rate": 8.238185097394055e-05, "loss": 0.7384, "step": 36493 }, { "epoch": 0.9663468250949655, "grad_norm": 0.7734375, "learning_rate": 8.237731926500354e-05, "loss": 0.7915, "step": 36494 }, { "epoch": 0.9663733047032599, "grad_norm": 0.875, "learning_rate": 8.237278759341683e-05, "loss": 0.9618, "step": 36495 }, { "epoch": 0.9663997843115543, "grad_norm": 0.7421875, "learning_rate": 8.236825595918995e-05, "loss": 0.8799, "step": 36496 }, { "epoch": 0.9664262639198486, "grad_norm": 0.76171875, "learning_rate": 8.236372436233258e-05, "loss": 0.8054, "step": 36497 }, { "epoch": 0.966452743528143, "grad_norm": 0.8359375, "learning_rate": 8.235919280285429e-05, "loss": 0.7635, "step": 36498 }, { "epoch": 0.9664792231364374, "grad_norm": 0.78515625, "learning_rate": 8.235466128076468e-05, "loss": 0.7983, "step": 36499 }, { "epoch": 0.9665057027447318, "grad_norm": 0.80078125, "learning_rate": 8.235012979607337e-05, "loss": 0.8247, "step": 36500 }, { "epoch": 0.9665321823530262, "grad_norm": 0.73046875, "learning_rate": 8.234559834878989e-05, "loss": 0.7343, "step": 36501 }, { "epoch": 0.9665586619613206, "grad_norm": 0.91796875, "learning_rate": 8.234106693892394e-05, "loss": 0.8949, "step": 36502 }, { "epoch": 0.9665851415696148, "grad_norm": 0.75390625, "learning_rate": 8.23365355664851e-05, "loss": 0.9719, "step": 36503 }, { "epoch": 0.9666116211779092, "grad_norm": 0.69140625, "learning_rate": 8.233200423148297e-05, "loss": 0.7153, "step": 36504 }, { "epoch": 0.9666381007862036, "grad_norm": 0.79296875, "learning_rate": 8.232747293392715e-05, "loss": 0.7686, "step": 36505 }, { "epoch": 0.966664580394498, "grad_norm": 0.7890625, "learning_rate": 8.232294167382719e-05, "loss": 0.9335, "step": 36506 }, { "epoch": 0.9666910600027924, "grad_norm": 0.75390625, "learning_rate": 8.231841045119279e-05, "loss": 0.8762, "step": 36507 }, { "epoch": 0.9667175396110868, "grad_norm": 0.765625, "learning_rate": 8.23138792660335e-05, "loss": 0.8111, "step": 36508 }, { "epoch": 0.9667440192193811, "grad_norm": 0.79296875, "learning_rate": 8.230934811835894e-05, "loss": 0.8005, "step": 36509 }, { "epoch": 0.9667704988276755, "grad_norm": 0.875, "learning_rate": 8.23048170081787e-05, "loss": 0.8281, "step": 36510 }, { "epoch": 0.9667969784359699, "grad_norm": 0.76953125, "learning_rate": 8.230028593550238e-05, "loss": 0.8061, "step": 36511 }, { "epoch": 0.9668234580442643, "grad_norm": 0.734375, "learning_rate": 8.229575490033958e-05, "loss": 0.7237, "step": 36512 }, { "epoch": 0.9668499376525587, "grad_norm": 0.79296875, "learning_rate": 8.229122390269994e-05, "loss": 0.8147, "step": 36513 }, { "epoch": 0.966876417260853, "grad_norm": 0.8203125, "learning_rate": 8.228669294259304e-05, "loss": 0.7801, "step": 36514 }, { "epoch": 0.9669028968691474, "grad_norm": 0.77734375, "learning_rate": 8.228216202002848e-05, "loss": 0.74, "step": 36515 }, { "epoch": 0.9669293764774418, "grad_norm": 0.796875, "learning_rate": 8.227763113501584e-05, "loss": 0.7837, "step": 36516 }, { "epoch": 0.9669558560857362, "grad_norm": 0.7265625, "learning_rate": 8.227310028756478e-05, "loss": 0.8155, "step": 36517 }, { "epoch": 0.9669823356940306, "grad_norm": 0.7734375, "learning_rate": 8.226856947768486e-05, "loss": 0.7774, "step": 36518 }, { "epoch": 0.967008815302325, "grad_norm": 0.7578125, "learning_rate": 8.226403870538571e-05, "loss": 0.8248, "step": 36519 }, { "epoch": 0.9670352949106192, "grad_norm": 0.8203125, "learning_rate": 8.22595079706769e-05, "loss": 0.8479, "step": 36520 }, { "epoch": 0.9670617745189136, "grad_norm": 0.73828125, "learning_rate": 8.225497727356801e-05, "loss": 0.7669, "step": 36521 }, { "epoch": 0.967088254127208, "grad_norm": 0.79296875, "learning_rate": 8.225044661406872e-05, "loss": 0.7716, "step": 36522 }, { "epoch": 0.9671147337355024, "grad_norm": 0.7578125, "learning_rate": 8.22459159921886e-05, "loss": 0.8008, "step": 36523 }, { "epoch": 0.9671412133437968, "grad_norm": 0.86328125, "learning_rate": 8.224138540793724e-05, "loss": 0.7829, "step": 36524 }, { "epoch": 0.9671676929520912, "grad_norm": 0.87890625, "learning_rate": 8.223685486132427e-05, "loss": 0.7891, "step": 36525 }, { "epoch": 0.9671941725603855, "grad_norm": 0.7734375, "learning_rate": 8.22323243523592e-05, "loss": 0.7367, "step": 36526 }, { "epoch": 0.9672206521686799, "grad_norm": 0.859375, "learning_rate": 8.222779388105177e-05, "loss": 0.9234, "step": 36527 }, { "epoch": 0.9672471317769743, "grad_norm": 0.85546875, "learning_rate": 8.22232634474115e-05, "loss": 0.8409, "step": 36528 }, { "epoch": 0.9672736113852687, "grad_norm": 0.76953125, "learning_rate": 8.2218733051448e-05, "loss": 0.7908, "step": 36529 }, { "epoch": 0.9673000909935631, "grad_norm": 0.94140625, "learning_rate": 8.221420269317088e-05, "loss": 0.8587, "step": 36530 }, { "epoch": 0.9673265706018574, "grad_norm": 1.0546875, "learning_rate": 8.220967237258972e-05, "loss": 0.7281, "step": 36531 }, { "epoch": 0.9673530502101518, "grad_norm": 0.79296875, "learning_rate": 8.220514208971417e-05, "loss": 0.7966, "step": 36532 }, { "epoch": 0.9673795298184462, "grad_norm": 0.765625, "learning_rate": 8.220061184455381e-05, "loss": 0.7659, "step": 36533 }, { "epoch": 0.9674060094267406, "grad_norm": 0.77734375, "learning_rate": 8.219608163711823e-05, "loss": 0.7611, "step": 36534 }, { "epoch": 0.967432489035035, "grad_norm": 0.71875, "learning_rate": 8.219155146741703e-05, "loss": 0.7793, "step": 36535 }, { "epoch": 0.9674589686433293, "grad_norm": 0.88671875, "learning_rate": 8.218702133545981e-05, "loss": 0.819, "step": 36536 }, { "epoch": 0.9674854482516236, "grad_norm": 0.73046875, "learning_rate": 8.21824912412562e-05, "loss": 0.7529, "step": 36537 }, { "epoch": 0.967511927859918, "grad_norm": 0.83984375, "learning_rate": 8.217796118481578e-05, "loss": 0.8868, "step": 36538 }, { "epoch": 0.9675384074682124, "grad_norm": 0.765625, "learning_rate": 8.217343116614816e-05, "loss": 0.8471, "step": 36539 }, { "epoch": 0.9675648870765068, "grad_norm": 0.7421875, "learning_rate": 8.216890118526292e-05, "loss": 0.6661, "step": 36540 }, { "epoch": 0.9675913666848012, "grad_norm": 0.78125, "learning_rate": 8.216437124216964e-05, "loss": 0.7233, "step": 36541 }, { "epoch": 0.9676178462930956, "grad_norm": 0.76171875, "learning_rate": 8.2159841336878e-05, "loss": 0.7279, "step": 36542 }, { "epoch": 0.9676443259013899, "grad_norm": 0.75390625, "learning_rate": 8.215531146939754e-05, "loss": 0.8053, "step": 36543 }, { "epoch": 0.9676708055096843, "grad_norm": 0.890625, "learning_rate": 8.21507816397379e-05, "loss": 0.8029, "step": 36544 }, { "epoch": 0.9676972851179787, "grad_norm": 0.7578125, "learning_rate": 8.214625184790865e-05, "loss": 0.7078, "step": 36545 }, { "epoch": 0.9677237647262731, "grad_norm": 0.71875, "learning_rate": 8.214172209391935e-05, "loss": 0.7124, "step": 36546 }, { "epoch": 0.9677502443345675, "grad_norm": 0.8125, "learning_rate": 8.21371923777797e-05, "loss": 0.8178, "step": 36547 }, { "epoch": 0.9677767239428618, "grad_norm": 0.7890625, "learning_rate": 8.213266269949925e-05, "loss": 0.7284, "step": 36548 }, { "epoch": 0.9678032035511562, "grad_norm": 0.78515625, "learning_rate": 8.212813305908759e-05, "loss": 0.844, "step": 36549 }, { "epoch": 0.9678296831594506, "grad_norm": 0.81640625, "learning_rate": 8.212360345655434e-05, "loss": 0.7963, "step": 36550 }, { "epoch": 0.967856162767745, "grad_norm": 0.80859375, "learning_rate": 8.211907389190903e-05, "loss": 0.7208, "step": 36551 }, { "epoch": 0.9678826423760393, "grad_norm": 0.8203125, "learning_rate": 8.21145443651614e-05, "loss": 0.8659, "step": 36552 }, { "epoch": 0.9679091219843337, "grad_norm": 0.890625, "learning_rate": 8.211001487632094e-05, "loss": 0.8791, "step": 36553 }, { "epoch": 0.967935601592628, "grad_norm": 0.86328125, "learning_rate": 8.21054854253973e-05, "loss": 0.7267, "step": 36554 }, { "epoch": 0.9679620812009224, "grad_norm": 0.734375, "learning_rate": 8.210095601240004e-05, "loss": 0.6642, "step": 36555 }, { "epoch": 0.9679885608092168, "grad_norm": 0.80078125, "learning_rate": 8.209642663733879e-05, "loss": 0.8736, "step": 36556 }, { "epoch": 0.9680150404175112, "grad_norm": 0.76171875, "learning_rate": 8.209189730022312e-05, "loss": 0.7474, "step": 36557 }, { "epoch": 0.9680415200258056, "grad_norm": 0.796875, "learning_rate": 8.208736800106268e-05, "loss": 0.8353, "step": 36558 }, { "epoch": 0.9680679996341, "grad_norm": 0.91796875, "learning_rate": 8.208283873986703e-05, "loss": 0.7156, "step": 36559 }, { "epoch": 0.9680944792423943, "grad_norm": 0.75390625, "learning_rate": 8.207830951664576e-05, "loss": 0.8185, "step": 36560 }, { "epoch": 0.9681209588506887, "grad_norm": 0.73828125, "learning_rate": 8.207378033140851e-05, "loss": 0.7862, "step": 36561 }, { "epoch": 0.9681474384589831, "grad_norm": 0.8984375, "learning_rate": 8.206925118416483e-05, "loss": 0.8811, "step": 36562 }, { "epoch": 0.9681739180672775, "grad_norm": 0.73046875, "learning_rate": 8.206472207492437e-05, "loss": 0.7216, "step": 36563 }, { "epoch": 0.9682003976755719, "grad_norm": 0.71875, "learning_rate": 8.206019300369672e-05, "loss": 0.6698, "step": 36564 }, { "epoch": 0.9682268772838662, "grad_norm": 0.75, "learning_rate": 8.205566397049146e-05, "loss": 0.7731, "step": 36565 }, { "epoch": 0.9682533568921606, "grad_norm": 0.796875, "learning_rate": 8.205113497531819e-05, "loss": 0.7592, "step": 36566 }, { "epoch": 0.968279836500455, "grad_norm": 1.25, "learning_rate": 8.204660601818647e-05, "loss": 0.813, "step": 36567 }, { "epoch": 0.9683063161087494, "grad_norm": 0.76953125, "learning_rate": 8.204207709910599e-05, "loss": 0.7665, "step": 36568 }, { "epoch": 0.9683327957170437, "grad_norm": 0.80078125, "learning_rate": 8.203754821808628e-05, "loss": 0.8358, "step": 36569 }, { "epoch": 0.968359275325338, "grad_norm": 0.78125, "learning_rate": 8.2033019375137e-05, "loss": 0.7857, "step": 36570 }, { "epoch": 0.9683857549336324, "grad_norm": 0.671875, "learning_rate": 8.202849057026768e-05, "loss": 0.6436, "step": 36571 }, { "epoch": 0.9684122345419268, "grad_norm": 0.70703125, "learning_rate": 8.202396180348791e-05, "loss": 0.7678, "step": 36572 }, { "epoch": 0.9684387141502212, "grad_norm": 0.75, "learning_rate": 8.201943307480737e-05, "loss": 0.8124, "step": 36573 }, { "epoch": 0.9684651937585156, "grad_norm": 0.94140625, "learning_rate": 8.201490438423562e-05, "loss": 0.8026, "step": 36574 }, { "epoch": 0.96849167336681, "grad_norm": 0.92578125, "learning_rate": 8.201037573178224e-05, "loss": 0.7768, "step": 36575 }, { "epoch": 0.9685181529751044, "grad_norm": 0.67578125, "learning_rate": 8.200584711745684e-05, "loss": 0.6879, "step": 36576 }, { "epoch": 0.9685446325833987, "grad_norm": 0.81640625, "learning_rate": 8.200131854126901e-05, "loss": 0.8447, "step": 36577 }, { "epoch": 0.9685711121916931, "grad_norm": 0.78125, "learning_rate": 8.199679000322837e-05, "loss": 0.7483, "step": 36578 }, { "epoch": 0.9685975917999875, "grad_norm": 0.78125, "learning_rate": 8.199226150334449e-05, "loss": 0.6764, "step": 36579 }, { "epoch": 0.9686240714082819, "grad_norm": 0.8046875, "learning_rate": 8.198773304162699e-05, "loss": 0.6802, "step": 36580 }, { "epoch": 0.9686505510165763, "grad_norm": 0.7265625, "learning_rate": 8.198320461808547e-05, "loss": 0.774, "step": 36581 }, { "epoch": 0.9686770306248706, "grad_norm": 0.7109375, "learning_rate": 8.19786762327295e-05, "loss": 0.8196, "step": 36582 }, { "epoch": 0.968703510233165, "grad_norm": 0.79296875, "learning_rate": 8.197414788556873e-05, "loss": 0.7442, "step": 36583 }, { "epoch": 0.9687299898414594, "grad_norm": 0.78515625, "learning_rate": 8.196961957661269e-05, "loss": 0.6801, "step": 36584 }, { "epoch": 0.9687564694497537, "grad_norm": 0.8125, "learning_rate": 8.196509130587102e-05, "loss": 0.9157, "step": 36585 }, { "epoch": 0.9687829490580481, "grad_norm": 0.765625, "learning_rate": 8.196056307335332e-05, "loss": 0.7661, "step": 36586 }, { "epoch": 0.9688094286663425, "grad_norm": 0.80859375, "learning_rate": 8.195603487906913e-05, "loss": 0.7663, "step": 36587 }, { "epoch": 0.9688359082746368, "grad_norm": 0.8203125, "learning_rate": 8.195150672302812e-05, "loss": 0.7398, "step": 36588 }, { "epoch": 0.9688623878829312, "grad_norm": 0.77734375, "learning_rate": 8.194697860523987e-05, "loss": 0.8634, "step": 36589 }, { "epoch": 0.9688888674912256, "grad_norm": 0.734375, "learning_rate": 8.1942450525714e-05, "loss": 0.8147, "step": 36590 }, { "epoch": 0.96891534709952, "grad_norm": 0.640625, "learning_rate": 8.193792248446003e-05, "loss": 0.6871, "step": 36591 }, { "epoch": 0.9689418267078144, "grad_norm": 0.8125, "learning_rate": 8.193339448148757e-05, "loss": 0.797, "step": 36592 }, { "epoch": 0.9689683063161088, "grad_norm": 0.75, "learning_rate": 8.192886651680628e-05, "loss": 0.833, "step": 36593 }, { "epoch": 0.9689947859244031, "grad_norm": 0.734375, "learning_rate": 8.192433859042574e-05, "loss": 0.7892, "step": 36594 }, { "epoch": 0.9690212655326975, "grad_norm": 0.8125, "learning_rate": 8.191981070235552e-05, "loss": 0.7597, "step": 36595 }, { "epoch": 0.9690477451409919, "grad_norm": 0.75, "learning_rate": 8.191528285260525e-05, "loss": 0.759, "step": 36596 }, { "epoch": 0.9690742247492863, "grad_norm": 0.74609375, "learning_rate": 8.191075504118446e-05, "loss": 0.8251, "step": 36597 }, { "epoch": 0.9691007043575807, "grad_norm": 0.71484375, "learning_rate": 8.190622726810281e-05, "loss": 0.6376, "step": 36598 }, { "epoch": 0.969127183965875, "grad_norm": 0.87109375, "learning_rate": 8.190169953336988e-05, "loss": 0.9124, "step": 36599 }, { "epoch": 0.9691536635741694, "grad_norm": 0.796875, "learning_rate": 8.189717183699528e-05, "loss": 0.7647, "step": 36600 }, { "epoch": 0.9691801431824637, "grad_norm": 0.8359375, "learning_rate": 8.189264417898857e-05, "loss": 0.7506, "step": 36601 }, { "epoch": 0.9692066227907581, "grad_norm": 0.76171875, "learning_rate": 8.188811655935936e-05, "loss": 0.7759, "step": 36602 }, { "epoch": 0.9692331023990525, "grad_norm": 0.7578125, "learning_rate": 8.188358897811727e-05, "loss": 0.8151, "step": 36603 }, { "epoch": 0.9692595820073469, "grad_norm": 0.828125, "learning_rate": 8.187906143527187e-05, "loss": 0.8372, "step": 36604 }, { "epoch": 0.9692860616156412, "grad_norm": 0.76953125, "learning_rate": 8.187453393083277e-05, "loss": 0.7191, "step": 36605 }, { "epoch": 0.9693125412239356, "grad_norm": 0.72265625, "learning_rate": 8.187000646480956e-05, "loss": 0.7383, "step": 36606 }, { "epoch": 0.96933902083223, "grad_norm": 0.796875, "learning_rate": 8.18654790372118e-05, "loss": 0.8016, "step": 36607 }, { "epoch": 0.9693655004405244, "grad_norm": 0.82421875, "learning_rate": 8.186095164804915e-05, "loss": 0.8334, "step": 36608 }, { "epoch": 0.9693919800488188, "grad_norm": 0.7890625, "learning_rate": 8.185642429733118e-05, "loss": 0.7929, "step": 36609 }, { "epoch": 0.9694184596571132, "grad_norm": 0.8203125, "learning_rate": 8.185189698506748e-05, "loss": 0.7604, "step": 36610 }, { "epoch": 0.9694449392654075, "grad_norm": 0.79296875, "learning_rate": 8.184736971126765e-05, "loss": 0.7488, "step": 36611 }, { "epoch": 0.9694714188737019, "grad_norm": 0.7109375, "learning_rate": 8.184284247594124e-05, "loss": 0.6857, "step": 36612 }, { "epoch": 0.9694978984819963, "grad_norm": 0.67578125, "learning_rate": 8.183831527909794e-05, "loss": 0.7655, "step": 36613 }, { "epoch": 0.9695243780902907, "grad_norm": 0.765625, "learning_rate": 8.183378812074727e-05, "loss": 0.8114, "step": 36614 }, { "epoch": 0.9695508576985851, "grad_norm": 0.87890625, "learning_rate": 8.182926100089886e-05, "loss": 0.8746, "step": 36615 }, { "epoch": 0.9695773373068794, "grad_norm": 0.73828125, "learning_rate": 8.182473391956228e-05, "loss": 0.678, "step": 36616 }, { "epoch": 0.9696038169151738, "grad_norm": 0.796875, "learning_rate": 8.182020687674712e-05, "loss": 0.8066, "step": 36617 }, { "epoch": 0.9696302965234681, "grad_norm": 0.75390625, "learning_rate": 8.181567987246303e-05, "loss": 0.6989, "step": 36618 }, { "epoch": 0.9696567761317625, "grad_norm": 0.78515625, "learning_rate": 8.181115290671956e-05, "loss": 0.7372, "step": 36619 }, { "epoch": 0.9696832557400569, "grad_norm": 0.75390625, "learning_rate": 8.180662597952631e-05, "loss": 0.8374, "step": 36620 }, { "epoch": 0.9697097353483513, "grad_norm": 0.8125, "learning_rate": 8.180209909089287e-05, "loss": 0.8527, "step": 36621 }, { "epoch": 0.9697362149566456, "grad_norm": 0.796875, "learning_rate": 8.179757224082884e-05, "loss": 0.808, "step": 36622 }, { "epoch": 0.96976269456494, "grad_norm": 0.84375, "learning_rate": 8.179304542934382e-05, "loss": 0.7662, "step": 36623 }, { "epoch": 0.9697891741732344, "grad_norm": 0.81640625, "learning_rate": 8.17885186564474e-05, "loss": 0.6976, "step": 36624 }, { "epoch": 0.9698156537815288, "grad_norm": 0.77734375, "learning_rate": 8.178399192214917e-05, "loss": 0.7003, "step": 36625 }, { "epoch": 0.9698421333898232, "grad_norm": 0.7578125, "learning_rate": 8.177946522645873e-05, "loss": 0.7842, "step": 36626 }, { "epoch": 0.9698686129981176, "grad_norm": 0.7578125, "learning_rate": 8.177493856938566e-05, "loss": 0.763, "step": 36627 }, { "epoch": 0.9698950926064119, "grad_norm": 0.7734375, "learning_rate": 8.177041195093957e-05, "loss": 0.7521, "step": 36628 }, { "epoch": 0.9699215722147063, "grad_norm": 0.87890625, "learning_rate": 8.176588537113006e-05, "loss": 0.8775, "step": 36629 }, { "epoch": 0.9699480518230007, "grad_norm": 0.86328125, "learning_rate": 8.176135882996672e-05, "loss": 0.8362, "step": 36630 }, { "epoch": 0.9699745314312951, "grad_norm": 0.76171875, "learning_rate": 8.175683232745913e-05, "loss": 0.7381, "step": 36631 }, { "epoch": 0.9700010110395895, "grad_norm": 0.8515625, "learning_rate": 8.175230586361685e-05, "loss": 0.9037, "step": 36632 }, { "epoch": 0.9700274906478838, "grad_norm": 0.7890625, "learning_rate": 8.174777943844956e-05, "loss": 0.775, "step": 36633 }, { "epoch": 0.9700539702561781, "grad_norm": 1.3828125, "learning_rate": 8.174325305196681e-05, "loss": 0.7184, "step": 36634 }, { "epoch": 0.9700804498644725, "grad_norm": 0.7421875, "learning_rate": 8.173872670417819e-05, "loss": 0.8918, "step": 36635 }, { "epoch": 0.9701069294727669, "grad_norm": 0.78515625, "learning_rate": 8.17342003950933e-05, "loss": 0.7433, "step": 36636 }, { "epoch": 0.9701334090810613, "grad_norm": 0.765625, "learning_rate": 8.172967412472168e-05, "loss": 0.8219, "step": 36637 }, { "epoch": 0.9701598886893557, "grad_norm": 0.765625, "learning_rate": 8.172514789307301e-05, "loss": 0.9802, "step": 36638 }, { "epoch": 0.97018636829765, "grad_norm": 0.7890625, "learning_rate": 8.172062170015685e-05, "loss": 0.7188, "step": 36639 }, { "epoch": 0.9702128479059444, "grad_norm": 0.77734375, "learning_rate": 8.171609554598279e-05, "loss": 0.8869, "step": 36640 }, { "epoch": 0.9702393275142388, "grad_norm": 0.828125, "learning_rate": 8.171156943056042e-05, "loss": 0.7376, "step": 36641 }, { "epoch": 0.9702658071225332, "grad_norm": 0.84765625, "learning_rate": 8.170704335389931e-05, "loss": 0.8481, "step": 36642 }, { "epoch": 0.9702922867308276, "grad_norm": 0.8359375, "learning_rate": 8.170251731600909e-05, "loss": 0.7278, "step": 36643 }, { "epoch": 0.970318766339122, "grad_norm": 0.75, "learning_rate": 8.169799131689935e-05, "loss": 0.7916, "step": 36644 }, { "epoch": 0.9703452459474163, "grad_norm": 0.8515625, "learning_rate": 8.169346535657967e-05, "loss": 0.831, "step": 36645 }, { "epoch": 0.9703717255557107, "grad_norm": 0.79296875, "learning_rate": 8.168893943505962e-05, "loss": 0.7388, "step": 36646 }, { "epoch": 0.9703982051640051, "grad_norm": 0.76953125, "learning_rate": 8.168441355234882e-05, "loss": 0.7275, "step": 36647 }, { "epoch": 0.9704246847722995, "grad_norm": 0.8046875, "learning_rate": 8.167988770845687e-05, "loss": 0.7643, "step": 36648 }, { "epoch": 0.9704511643805939, "grad_norm": 0.8515625, "learning_rate": 8.167536190339335e-05, "loss": 0.748, "step": 36649 }, { "epoch": 0.9704776439888881, "grad_norm": 0.82421875, "learning_rate": 8.167083613716787e-05, "loss": 0.6797, "step": 36650 }, { "epoch": 0.9705041235971825, "grad_norm": 0.80859375, "learning_rate": 8.166631040978998e-05, "loss": 0.8407, "step": 36651 }, { "epoch": 0.9705306032054769, "grad_norm": 0.77734375, "learning_rate": 8.166178472126928e-05, "loss": 0.7373, "step": 36652 }, { "epoch": 0.9705570828137713, "grad_norm": 0.8359375, "learning_rate": 8.16572590716154e-05, "loss": 0.8258, "step": 36653 }, { "epoch": 0.9705835624220657, "grad_norm": 0.83203125, "learning_rate": 8.165273346083793e-05, "loss": 0.8016, "step": 36654 }, { "epoch": 0.97061004203036, "grad_norm": 0.76953125, "learning_rate": 8.164820788894643e-05, "loss": 0.7491, "step": 36655 }, { "epoch": 0.9706365216386544, "grad_norm": 0.7578125, "learning_rate": 8.164368235595052e-05, "loss": 0.7549, "step": 36656 }, { "epoch": 0.9706630012469488, "grad_norm": 0.80859375, "learning_rate": 8.16391568618597e-05, "loss": 0.7923, "step": 36657 }, { "epoch": 0.9706894808552432, "grad_norm": 0.7734375, "learning_rate": 8.163463140668372e-05, "loss": 0.7743, "step": 36658 }, { "epoch": 0.9707159604635376, "grad_norm": 0.7890625, "learning_rate": 8.163010599043206e-05, "loss": 0.7064, "step": 36659 }, { "epoch": 0.970742440071832, "grad_norm": 0.796875, "learning_rate": 8.162558061311436e-05, "loss": 0.7583, "step": 36660 }, { "epoch": 0.9707689196801264, "grad_norm": 0.8125, "learning_rate": 8.162105527474017e-05, "loss": 0.7971, "step": 36661 }, { "epoch": 0.9707953992884207, "grad_norm": 0.796875, "learning_rate": 8.16165299753191e-05, "loss": 0.6931, "step": 36662 }, { "epoch": 0.9708218788967151, "grad_norm": 0.80078125, "learning_rate": 8.161200471486076e-05, "loss": 0.6656, "step": 36663 }, { "epoch": 0.9708483585050095, "grad_norm": 0.7578125, "learning_rate": 8.16074794933747e-05, "loss": 0.744, "step": 36664 }, { "epoch": 0.9708748381133039, "grad_norm": 0.734375, "learning_rate": 8.160295431087057e-05, "loss": 0.8073, "step": 36665 }, { "epoch": 0.9709013177215983, "grad_norm": 0.75390625, "learning_rate": 8.159842916735792e-05, "loss": 0.7379, "step": 36666 }, { "epoch": 0.9709277973298925, "grad_norm": 0.88671875, "learning_rate": 8.159390406284633e-05, "loss": 0.7711, "step": 36667 }, { "epoch": 0.9709542769381869, "grad_norm": 0.77734375, "learning_rate": 8.158937899734542e-05, "loss": 0.7589, "step": 36668 }, { "epoch": 0.9709807565464813, "grad_norm": 0.828125, "learning_rate": 8.158485397086478e-05, "loss": 0.8413, "step": 36669 }, { "epoch": 0.9710072361547757, "grad_norm": 0.78515625, "learning_rate": 8.158032898341397e-05, "loss": 0.8033, "step": 36670 }, { "epoch": 0.9710337157630701, "grad_norm": 0.796875, "learning_rate": 8.157580403500262e-05, "loss": 0.7912, "step": 36671 }, { "epoch": 0.9710601953713645, "grad_norm": 0.8203125, "learning_rate": 8.157127912564024e-05, "loss": 0.6775, "step": 36672 }, { "epoch": 0.9710866749796588, "grad_norm": 0.796875, "learning_rate": 8.156675425533654e-05, "loss": 0.7228, "step": 36673 }, { "epoch": 0.9711131545879532, "grad_norm": 0.71484375, "learning_rate": 8.156222942410104e-05, "loss": 0.8574, "step": 36674 }, { "epoch": 0.9711396341962476, "grad_norm": 0.76171875, "learning_rate": 8.155770463194335e-05, "loss": 0.8235, "step": 36675 }, { "epoch": 0.971166113804542, "grad_norm": 0.77734375, "learning_rate": 8.155317987887304e-05, "loss": 0.8609, "step": 36676 }, { "epoch": 0.9711925934128364, "grad_norm": 0.83984375, "learning_rate": 8.154865516489968e-05, "loss": 0.9349, "step": 36677 }, { "epoch": 0.9712190730211308, "grad_norm": 0.80078125, "learning_rate": 8.154413049003293e-05, "loss": 0.8021, "step": 36678 }, { "epoch": 0.9712455526294251, "grad_norm": 0.80859375, "learning_rate": 8.153960585428233e-05, "loss": 0.778, "step": 36679 }, { "epoch": 0.9712720322377195, "grad_norm": 0.81640625, "learning_rate": 8.15350812576575e-05, "loss": 0.851, "step": 36680 }, { "epoch": 0.9712985118460139, "grad_norm": 0.796875, "learning_rate": 8.153055670016799e-05, "loss": 0.7867, "step": 36681 }, { "epoch": 0.9713249914543083, "grad_norm": 0.77734375, "learning_rate": 8.15260321818234e-05, "loss": 0.808, "step": 36682 }, { "epoch": 0.9713514710626026, "grad_norm": 0.85546875, "learning_rate": 8.152150770263332e-05, "loss": 0.8016, "step": 36683 }, { "epoch": 0.9713779506708969, "grad_norm": 0.8125, "learning_rate": 8.151698326260737e-05, "loss": 0.7899, "step": 36684 }, { "epoch": 0.9714044302791913, "grad_norm": 0.76171875, "learning_rate": 8.151245886175513e-05, "loss": 0.7745, "step": 36685 }, { "epoch": 0.9714309098874857, "grad_norm": 0.78125, "learning_rate": 8.150793450008616e-05, "loss": 0.6893, "step": 36686 }, { "epoch": 0.9714573894957801, "grad_norm": 0.8671875, "learning_rate": 8.150341017761005e-05, "loss": 0.8168, "step": 36687 }, { "epoch": 0.9714838691040745, "grad_norm": 0.7421875, "learning_rate": 8.149888589433644e-05, "loss": 0.7424, "step": 36688 }, { "epoch": 0.9715103487123689, "grad_norm": 0.734375, "learning_rate": 8.149436165027486e-05, "loss": 0.7084, "step": 36689 }, { "epoch": 0.9715368283206632, "grad_norm": 0.83984375, "learning_rate": 8.148983744543494e-05, "loss": 0.7678, "step": 36690 }, { "epoch": 0.9715633079289576, "grad_norm": 0.80078125, "learning_rate": 8.148531327982626e-05, "loss": 0.8632, "step": 36691 }, { "epoch": 0.971589787537252, "grad_norm": 0.80078125, "learning_rate": 8.148078915345833e-05, "loss": 0.6968, "step": 36692 }, { "epoch": 0.9716162671455464, "grad_norm": 0.79296875, "learning_rate": 8.147626506634087e-05, "loss": 0.7679, "step": 36693 }, { "epoch": 0.9716427467538408, "grad_norm": 0.80859375, "learning_rate": 8.147174101848341e-05, "loss": 0.773, "step": 36694 }, { "epoch": 0.9716692263621352, "grad_norm": 0.8046875, "learning_rate": 8.146721700989554e-05, "loss": 0.7155, "step": 36695 }, { "epoch": 0.9716957059704295, "grad_norm": 0.7265625, "learning_rate": 8.146269304058682e-05, "loss": 0.8113, "step": 36696 }, { "epoch": 0.9717221855787239, "grad_norm": 0.82421875, "learning_rate": 8.145816911056684e-05, "loss": 0.7138, "step": 36697 }, { "epoch": 0.9717486651870183, "grad_norm": 0.83203125, "learning_rate": 8.145364521984524e-05, "loss": 0.8021, "step": 36698 }, { "epoch": 0.9717751447953126, "grad_norm": 0.77734375, "learning_rate": 8.14491213684316e-05, "loss": 0.7712, "step": 36699 }, { "epoch": 0.971801624403607, "grad_norm": 0.71875, "learning_rate": 8.144459755633547e-05, "loss": 0.7903, "step": 36700 }, { "epoch": 0.9718281040119013, "grad_norm": 1.3828125, "learning_rate": 8.144007378356646e-05, "loss": 0.7924, "step": 36701 }, { "epoch": 0.9718545836201957, "grad_norm": 0.8359375, "learning_rate": 8.143555005013415e-05, "loss": 0.8888, "step": 36702 }, { "epoch": 0.9718810632284901, "grad_norm": 0.76171875, "learning_rate": 8.14310263560481e-05, "loss": 0.7137, "step": 36703 }, { "epoch": 0.9719075428367845, "grad_norm": 0.73046875, "learning_rate": 8.142650270131795e-05, "loss": 0.6633, "step": 36704 }, { "epoch": 0.9719340224450789, "grad_norm": 0.78515625, "learning_rate": 8.142197908595327e-05, "loss": 0.7869, "step": 36705 }, { "epoch": 0.9719605020533733, "grad_norm": 0.796875, "learning_rate": 8.141745550996365e-05, "loss": 0.8275, "step": 36706 }, { "epoch": 0.9719869816616676, "grad_norm": 0.7890625, "learning_rate": 8.141293197335867e-05, "loss": 0.6859, "step": 36707 }, { "epoch": 0.972013461269962, "grad_norm": 0.76171875, "learning_rate": 8.14084084761479e-05, "loss": 0.7612, "step": 36708 }, { "epoch": 0.9720399408782564, "grad_norm": 0.73046875, "learning_rate": 8.140388501834097e-05, "loss": 0.7345, "step": 36709 }, { "epoch": 0.9720664204865508, "grad_norm": 0.703125, "learning_rate": 8.139936159994744e-05, "loss": 0.6848, "step": 36710 }, { "epoch": 0.9720929000948452, "grad_norm": 0.7265625, "learning_rate": 8.139483822097688e-05, "loss": 0.7009, "step": 36711 }, { "epoch": 0.9721193797031396, "grad_norm": 0.77734375, "learning_rate": 8.139031488143892e-05, "loss": 0.8063, "step": 36712 }, { "epoch": 0.9721458593114339, "grad_norm": 0.8046875, "learning_rate": 8.138579158134307e-05, "loss": 0.9303, "step": 36713 }, { "epoch": 0.9721723389197283, "grad_norm": 0.92578125, "learning_rate": 8.138126832069903e-05, "loss": 0.7706, "step": 36714 }, { "epoch": 0.9721988185280227, "grad_norm": 0.83203125, "learning_rate": 8.137674509951632e-05, "loss": 0.7308, "step": 36715 }, { "epoch": 0.972225298136317, "grad_norm": 0.796875, "learning_rate": 8.137222191780452e-05, "loss": 0.7395, "step": 36716 }, { "epoch": 0.9722517777446114, "grad_norm": 0.75390625, "learning_rate": 8.136769877557325e-05, "loss": 0.7359, "step": 36717 }, { "epoch": 0.9722782573529057, "grad_norm": 0.77734375, "learning_rate": 8.136317567283202e-05, "loss": 0.8216, "step": 36718 }, { "epoch": 0.9723047369612001, "grad_norm": 0.78125, "learning_rate": 8.135865260959053e-05, "loss": 0.7896, "step": 36719 }, { "epoch": 0.9723312165694945, "grad_norm": 0.79296875, "learning_rate": 8.13541295858583e-05, "loss": 0.8207, "step": 36720 }, { "epoch": 0.9723576961777889, "grad_norm": 0.71875, "learning_rate": 8.134960660164493e-05, "loss": 0.8572, "step": 36721 }, { "epoch": 0.9723841757860833, "grad_norm": 0.7890625, "learning_rate": 8.134508365696001e-05, "loss": 0.7814, "step": 36722 }, { "epoch": 0.9724106553943777, "grad_norm": 0.73046875, "learning_rate": 8.134056075181307e-05, "loss": 0.6372, "step": 36723 }, { "epoch": 0.972437135002672, "grad_norm": 0.703125, "learning_rate": 8.133603788621377e-05, "loss": 0.7285, "step": 36724 }, { "epoch": 0.9724636146109664, "grad_norm": 0.78515625, "learning_rate": 8.133151506017169e-05, "loss": 0.8838, "step": 36725 }, { "epoch": 0.9724900942192608, "grad_norm": 0.6640625, "learning_rate": 8.13269922736964e-05, "loss": 0.6685, "step": 36726 }, { "epoch": 0.9725165738275552, "grad_norm": 0.7421875, "learning_rate": 8.132246952679748e-05, "loss": 0.7654, "step": 36727 }, { "epoch": 0.9725430534358496, "grad_norm": 0.8359375, "learning_rate": 8.131794681948449e-05, "loss": 0.8673, "step": 36728 }, { "epoch": 0.972569533044144, "grad_norm": 0.73828125, "learning_rate": 8.131342415176707e-05, "loss": 0.7301, "step": 36729 }, { "epoch": 0.9725960126524383, "grad_norm": 0.7734375, "learning_rate": 8.130890152365477e-05, "loss": 0.904, "step": 36730 }, { "epoch": 0.9726224922607327, "grad_norm": 0.76953125, "learning_rate": 8.130437893515717e-05, "loss": 0.7391, "step": 36731 }, { "epoch": 0.972648971869027, "grad_norm": 0.76953125, "learning_rate": 8.12998563862839e-05, "loss": 0.7863, "step": 36732 }, { "epoch": 0.9726754514773214, "grad_norm": 0.76171875, "learning_rate": 8.129533387704449e-05, "loss": 0.7309, "step": 36733 }, { "epoch": 0.9727019310856158, "grad_norm": 0.8203125, "learning_rate": 8.129081140744857e-05, "loss": 0.8202, "step": 36734 }, { "epoch": 0.9727284106939101, "grad_norm": 0.76953125, "learning_rate": 8.128628897750571e-05, "loss": 0.8135, "step": 36735 }, { "epoch": 0.9727548903022045, "grad_norm": 0.83203125, "learning_rate": 8.128176658722548e-05, "loss": 0.7632, "step": 36736 }, { "epoch": 0.9727813699104989, "grad_norm": 0.734375, "learning_rate": 8.127724423661749e-05, "loss": 0.6223, "step": 36737 }, { "epoch": 0.9728078495187933, "grad_norm": 0.890625, "learning_rate": 8.127272192569126e-05, "loss": 0.86, "step": 36738 }, { "epoch": 0.9728343291270877, "grad_norm": 0.7734375, "learning_rate": 8.126819965445647e-05, "loss": 0.7911, "step": 36739 }, { "epoch": 0.972860808735382, "grad_norm": 0.84765625, "learning_rate": 8.126367742292266e-05, "loss": 0.7625, "step": 36740 }, { "epoch": 0.9728872883436764, "grad_norm": 0.828125, "learning_rate": 8.125915523109942e-05, "loss": 0.8244, "step": 36741 }, { "epoch": 0.9729137679519708, "grad_norm": 0.73046875, "learning_rate": 8.125463307899631e-05, "loss": 0.7395, "step": 36742 }, { "epoch": 0.9729402475602652, "grad_norm": 0.76171875, "learning_rate": 8.12501109666229e-05, "loss": 0.625, "step": 36743 }, { "epoch": 0.9729667271685596, "grad_norm": 0.72265625, "learning_rate": 8.124558889398886e-05, "loss": 0.7368, "step": 36744 }, { "epoch": 0.972993206776854, "grad_norm": 0.73828125, "learning_rate": 8.124106686110372e-05, "loss": 0.762, "step": 36745 }, { "epoch": 0.9730196863851484, "grad_norm": 0.796875, "learning_rate": 8.123654486797706e-05, "loss": 0.7777, "step": 36746 }, { "epoch": 0.9730461659934427, "grad_norm": 0.84765625, "learning_rate": 8.123202291461848e-05, "loss": 0.9708, "step": 36747 }, { "epoch": 0.973072645601737, "grad_norm": 0.703125, "learning_rate": 8.122750100103752e-05, "loss": 0.7628, "step": 36748 }, { "epoch": 0.9730991252100314, "grad_norm": 0.96484375, "learning_rate": 8.122297912724383e-05, "loss": 0.7848, "step": 36749 }, { "epoch": 0.9731256048183258, "grad_norm": 0.75, "learning_rate": 8.121845729324694e-05, "loss": 0.7835, "step": 36750 }, { "epoch": 0.9731520844266202, "grad_norm": 0.74609375, "learning_rate": 8.121393549905649e-05, "loss": 0.8254, "step": 36751 }, { "epoch": 0.9731785640349145, "grad_norm": 0.8359375, "learning_rate": 8.1209413744682e-05, "loss": 0.8701, "step": 36752 }, { "epoch": 0.9732050436432089, "grad_norm": 0.69140625, "learning_rate": 8.120489203013309e-05, "loss": 0.6923, "step": 36753 }, { "epoch": 0.9732315232515033, "grad_norm": 0.828125, "learning_rate": 8.120037035541934e-05, "loss": 0.7484, "step": 36754 }, { "epoch": 0.9732580028597977, "grad_norm": 0.8203125, "learning_rate": 8.119584872055034e-05, "loss": 0.7604, "step": 36755 }, { "epoch": 0.9732844824680921, "grad_norm": 0.80078125, "learning_rate": 8.119132712553566e-05, "loss": 0.9112, "step": 36756 }, { "epoch": 0.9733109620763865, "grad_norm": 0.7109375, "learning_rate": 8.11868055703849e-05, "loss": 0.8685, "step": 36757 }, { "epoch": 0.9733374416846808, "grad_norm": 1.078125, "learning_rate": 8.118228405510756e-05, "loss": 0.8005, "step": 36758 }, { "epoch": 0.9733639212929752, "grad_norm": 0.77734375, "learning_rate": 8.117776257971335e-05, "loss": 0.7427, "step": 36759 }, { "epoch": 0.9733904009012696, "grad_norm": 0.79296875, "learning_rate": 8.117324114421181e-05, "loss": 0.8151, "step": 36760 }, { "epoch": 0.973416880509564, "grad_norm": 0.8359375, "learning_rate": 8.116871974861248e-05, "loss": 0.8502, "step": 36761 }, { "epoch": 0.9734433601178584, "grad_norm": 0.90234375, "learning_rate": 8.116419839292501e-05, "loss": 0.8332, "step": 36762 }, { "epoch": 0.9734698397261528, "grad_norm": 0.79296875, "learning_rate": 8.115967707715888e-05, "loss": 0.8097, "step": 36763 }, { "epoch": 0.9734963193344471, "grad_norm": 0.80078125, "learning_rate": 8.115515580132378e-05, "loss": 0.8205, "step": 36764 }, { "epoch": 0.9735227989427414, "grad_norm": 0.90625, "learning_rate": 8.115063456542925e-05, "loss": 0.9028, "step": 36765 }, { "epoch": 0.9735492785510358, "grad_norm": 0.8125, "learning_rate": 8.114611336948487e-05, "loss": 0.9323, "step": 36766 }, { "epoch": 0.9735757581593302, "grad_norm": 0.7421875, "learning_rate": 8.114159221350025e-05, "loss": 0.7624, "step": 36767 }, { "epoch": 0.9736022377676246, "grad_norm": 0.7265625, "learning_rate": 8.113707109748488e-05, "loss": 0.8079, "step": 36768 }, { "epoch": 0.9736287173759189, "grad_norm": 0.76171875, "learning_rate": 8.113255002144845e-05, "loss": 0.8248, "step": 36769 }, { "epoch": 0.9736551969842133, "grad_norm": 0.7734375, "learning_rate": 8.11280289854005e-05, "loss": 0.7191, "step": 36770 }, { "epoch": 0.9736816765925077, "grad_norm": 0.7578125, "learning_rate": 8.112350798935063e-05, "loss": 0.7222, "step": 36771 }, { "epoch": 0.9737081562008021, "grad_norm": 0.83984375, "learning_rate": 8.11189870333084e-05, "loss": 0.6617, "step": 36772 }, { "epoch": 0.9737346358090965, "grad_norm": 0.71875, "learning_rate": 8.111446611728337e-05, "loss": 0.6649, "step": 36773 }, { "epoch": 0.9737611154173909, "grad_norm": 0.703125, "learning_rate": 8.110994524128519e-05, "loss": 0.7724, "step": 36774 }, { "epoch": 0.9737875950256852, "grad_norm": 0.734375, "learning_rate": 8.110542440532339e-05, "loss": 0.8487, "step": 36775 }, { "epoch": 0.9738140746339796, "grad_norm": 0.8125, "learning_rate": 8.110090360940755e-05, "loss": 0.7364, "step": 36776 }, { "epoch": 0.973840554242274, "grad_norm": 0.7734375, "learning_rate": 8.109638285354728e-05, "loss": 0.7506, "step": 36777 }, { "epoch": 0.9738670338505684, "grad_norm": 0.96875, "learning_rate": 8.10918621377521e-05, "loss": 0.8681, "step": 36778 }, { "epoch": 0.9738935134588628, "grad_norm": 0.7421875, "learning_rate": 8.108734146203167e-05, "loss": 0.6947, "step": 36779 }, { "epoch": 0.9739199930671572, "grad_norm": 0.921875, "learning_rate": 8.108282082639556e-05, "loss": 0.9459, "step": 36780 }, { "epoch": 0.9739464726754514, "grad_norm": 0.7734375, "learning_rate": 8.10783002308533e-05, "loss": 0.6844, "step": 36781 }, { "epoch": 0.9739729522837458, "grad_norm": 0.79296875, "learning_rate": 8.107377967541452e-05, "loss": 0.8636, "step": 36782 }, { "epoch": 0.9739994318920402, "grad_norm": 0.7265625, "learning_rate": 8.106925916008875e-05, "loss": 0.8261, "step": 36783 }, { "epoch": 0.9740259115003346, "grad_norm": 0.83984375, "learning_rate": 8.106473868488563e-05, "loss": 0.737, "step": 36784 }, { "epoch": 0.974052391108629, "grad_norm": 0.796875, "learning_rate": 8.106021824981472e-05, "loss": 0.8528, "step": 36785 }, { "epoch": 0.9740788707169233, "grad_norm": 0.83984375, "learning_rate": 8.10556978548856e-05, "loss": 0.6989, "step": 36786 }, { "epoch": 0.9741053503252177, "grad_norm": 0.77734375, "learning_rate": 8.105117750010783e-05, "loss": 0.8343, "step": 36787 }, { "epoch": 0.9741318299335121, "grad_norm": 0.78515625, "learning_rate": 8.104665718549099e-05, "loss": 0.7166, "step": 36788 }, { "epoch": 0.9741583095418065, "grad_norm": 0.75, "learning_rate": 8.104213691104469e-05, "loss": 0.7815, "step": 36789 }, { "epoch": 0.9741847891501009, "grad_norm": 0.8046875, "learning_rate": 8.103761667677852e-05, "loss": 0.7238, "step": 36790 }, { "epoch": 0.9742112687583953, "grad_norm": 0.7421875, "learning_rate": 8.103309648270202e-05, "loss": 0.829, "step": 36791 }, { "epoch": 0.9742377483666896, "grad_norm": 0.82421875, "learning_rate": 8.102857632882479e-05, "loss": 0.7033, "step": 36792 }, { "epoch": 0.974264227974984, "grad_norm": 0.87890625, "learning_rate": 8.102405621515639e-05, "loss": 0.7971, "step": 36793 }, { "epoch": 0.9742907075832784, "grad_norm": 0.78515625, "learning_rate": 8.101953614170645e-05, "loss": 0.8012, "step": 36794 }, { "epoch": 0.9743171871915728, "grad_norm": 0.78515625, "learning_rate": 8.10150161084845e-05, "loss": 0.8832, "step": 36795 }, { "epoch": 0.9743436667998672, "grad_norm": 0.79296875, "learning_rate": 8.101049611550016e-05, "loss": 0.7178, "step": 36796 }, { "epoch": 0.9743701464081614, "grad_norm": 0.70703125, "learning_rate": 8.100597616276297e-05, "loss": 0.7662, "step": 36797 }, { "epoch": 0.9743966260164558, "grad_norm": 0.76171875, "learning_rate": 8.100145625028249e-05, "loss": 0.8434, "step": 36798 }, { "epoch": 0.9744231056247502, "grad_norm": 0.76953125, "learning_rate": 8.099693637806838e-05, "loss": 0.8316, "step": 36799 }, { "epoch": 0.9744495852330446, "grad_norm": 0.6953125, "learning_rate": 8.099241654613018e-05, "loss": 0.571, "step": 36800 }, { "epoch": 0.974476064841339, "grad_norm": 0.79296875, "learning_rate": 8.098789675447747e-05, "loss": 0.8692, "step": 36801 }, { "epoch": 0.9745025444496334, "grad_norm": 0.8203125, "learning_rate": 8.098337700311982e-05, "loss": 0.6822, "step": 36802 }, { "epoch": 0.9745290240579277, "grad_norm": 0.77734375, "learning_rate": 8.097885729206677e-05, "loss": 0.7259, "step": 36803 }, { "epoch": 0.9745555036662221, "grad_norm": 0.859375, "learning_rate": 8.097433762132799e-05, "loss": 0.8048, "step": 36804 }, { "epoch": 0.9745819832745165, "grad_norm": 0.78515625, "learning_rate": 8.0969817990913e-05, "loss": 0.7825, "step": 36805 }, { "epoch": 0.9746084628828109, "grad_norm": 0.7890625, "learning_rate": 8.096529840083142e-05, "loss": 0.7624, "step": 36806 }, { "epoch": 0.9746349424911053, "grad_norm": 0.80078125, "learning_rate": 8.096077885109279e-05, "loss": 0.8524, "step": 36807 }, { "epoch": 0.9746614220993997, "grad_norm": 0.80859375, "learning_rate": 8.095625934170667e-05, "loss": 0.8978, "step": 36808 }, { "epoch": 0.974687901707694, "grad_norm": 1.390625, "learning_rate": 8.09517398726827e-05, "loss": 0.7864, "step": 36809 }, { "epoch": 0.9747143813159884, "grad_norm": 0.84375, "learning_rate": 8.094722044403043e-05, "loss": 1.0688, "step": 36810 }, { "epoch": 0.9747408609242828, "grad_norm": 0.69921875, "learning_rate": 8.094270105575942e-05, "loss": 0.6999, "step": 36811 }, { "epoch": 0.9747673405325772, "grad_norm": 0.765625, "learning_rate": 8.09381817078793e-05, "loss": 0.6736, "step": 36812 }, { "epoch": 0.9747938201408716, "grad_norm": 0.8671875, "learning_rate": 8.093366240039957e-05, "loss": 0.9078, "step": 36813 }, { "epoch": 0.9748202997491658, "grad_norm": 0.703125, "learning_rate": 8.092914313332988e-05, "loss": 0.7105, "step": 36814 }, { "epoch": 0.9748467793574602, "grad_norm": 0.76171875, "learning_rate": 8.092462390667979e-05, "loss": 0.8297, "step": 36815 }, { "epoch": 0.9748732589657546, "grad_norm": 0.87109375, "learning_rate": 8.092010472045886e-05, "loss": 0.7792, "step": 36816 }, { "epoch": 0.974899738574049, "grad_norm": 0.77734375, "learning_rate": 8.091558557467665e-05, "loss": 0.7607, "step": 36817 }, { "epoch": 0.9749262181823434, "grad_norm": 0.7421875, "learning_rate": 8.091106646934276e-05, "loss": 0.8726, "step": 36818 }, { "epoch": 0.9749526977906378, "grad_norm": 0.80859375, "learning_rate": 8.090654740446681e-05, "loss": 0.8092, "step": 36819 }, { "epoch": 0.9749791773989321, "grad_norm": 0.74609375, "learning_rate": 8.090202838005835e-05, "loss": 0.7204, "step": 36820 }, { "epoch": 0.9750056570072265, "grad_norm": 0.75390625, "learning_rate": 8.089750939612693e-05, "loss": 0.6198, "step": 36821 }, { "epoch": 0.9750321366155209, "grad_norm": 0.77734375, "learning_rate": 8.089299045268216e-05, "loss": 0.7392, "step": 36822 }, { "epoch": 0.9750586162238153, "grad_norm": 0.7265625, "learning_rate": 8.088847154973356e-05, "loss": 0.7724, "step": 36823 }, { "epoch": 0.9750850958321097, "grad_norm": 0.7421875, "learning_rate": 8.088395268729078e-05, "loss": 0.7959, "step": 36824 }, { "epoch": 0.975111575440404, "grad_norm": 0.87890625, "learning_rate": 8.087943386536339e-05, "loss": 0.837, "step": 36825 }, { "epoch": 0.9751380550486984, "grad_norm": 0.7734375, "learning_rate": 8.087491508396094e-05, "loss": 0.656, "step": 36826 }, { "epoch": 0.9751645346569928, "grad_norm": 0.765625, "learning_rate": 8.087039634309302e-05, "loss": 0.8508, "step": 36827 }, { "epoch": 0.9751910142652872, "grad_norm": 0.859375, "learning_rate": 8.086587764276913e-05, "loss": 0.8347, "step": 36828 }, { "epoch": 0.9752174938735816, "grad_norm": 0.75, "learning_rate": 8.0861358982999e-05, "loss": 0.7528, "step": 36829 }, { "epoch": 0.9752439734818759, "grad_norm": 0.75, "learning_rate": 8.08568403637921e-05, "loss": 0.7736, "step": 36830 }, { "epoch": 0.9752704530901702, "grad_norm": 0.7890625, "learning_rate": 8.085232178515805e-05, "loss": 0.8319, "step": 36831 }, { "epoch": 0.9752969326984646, "grad_norm": 0.79296875, "learning_rate": 8.08478032471064e-05, "loss": 0.7788, "step": 36832 }, { "epoch": 0.975323412306759, "grad_norm": 0.859375, "learning_rate": 8.084328474964671e-05, "loss": 0.8125, "step": 36833 }, { "epoch": 0.9753498919150534, "grad_norm": 0.83203125, "learning_rate": 8.083876629278861e-05, "loss": 0.8324, "step": 36834 }, { "epoch": 0.9753763715233478, "grad_norm": 0.84765625, "learning_rate": 8.083424787654163e-05, "loss": 0.7631, "step": 36835 }, { "epoch": 0.9754028511316422, "grad_norm": 0.7109375, "learning_rate": 8.082972950091539e-05, "loss": 0.7569, "step": 36836 }, { "epoch": 0.9754293307399365, "grad_norm": 0.7890625, "learning_rate": 8.082521116591944e-05, "loss": 0.7229, "step": 36837 }, { "epoch": 0.9754558103482309, "grad_norm": 0.77734375, "learning_rate": 8.082069287156333e-05, "loss": 0.7025, "step": 36838 }, { "epoch": 0.9754822899565253, "grad_norm": 0.77734375, "learning_rate": 8.08161746178567e-05, "loss": 0.8182, "step": 36839 }, { "epoch": 0.9755087695648197, "grad_norm": 0.80859375, "learning_rate": 8.081165640480908e-05, "loss": 0.8158, "step": 36840 }, { "epoch": 0.9755352491731141, "grad_norm": 0.80078125, "learning_rate": 8.080713823243007e-05, "loss": 0.734, "step": 36841 }, { "epoch": 0.9755617287814085, "grad_norm": 0.90625, "learning_rate": 8.080262010072922e-05, "loss": 0.7382, "step": 36842 }, { "epoch": 0.9755882083897028, "grad_norm": 0.76171875, "learning_rate": 8.079810200971613e-05, "loss": 0.7179, "step": 36843 }, { "epoch": 0.9756146879979972, "grad_norm": 0.796875, "learning_rate": 8.079358395940031e-05, "loss": 0.7107, "step": 36844 }, { "epoch": 0.9756411676062916, "grad_norm": 0.734375, "learning_rate": 8.078906594979145e-05, "loss": 0.7377, "step": 36845 }, { "epoch": 0.9756676472145859, "grad_norm": 0.7734375, "learning_rate": 8.078454798089905e-05, "loss": 0.7007, "step": 36846 }, { "epoch": 0.9756941268228803, "grad_norm": 0.8125, "learning_rate": 8.078003005273272e-05, "loss": 0.8251, "step": 36847 }, { "epoch": 0.9757206064311746, "grad_norm": 0.7421875, "learning_rate": 8.077551216530199e-05, "loss": 0.7136, "step": 36848 }, { "epoch": 0.975747086039469, "grad_norm": 0.77734375, "learning_rate": 8.077099431861644e-05, "loss": 0.6973, "step": 36849 }, { "epoch": 0.9757735656477634, "grad_norm": 0.82421875, "learning_rate": 8.076647651268572e-05, "loss": 0.6843, "step": 36850 }, { "epoch": 0.9758000452560578, "grad_norm": 0.76171875, "learning_rate": 8.076195874751934e-05, "loss": 0.876, "step": 36851 }, { "epoch": 0.9758265248643522, "grad_norm": 0.83203125, "learning_rate": 8.075744102312689e-05, "loss": 0.9243, "step": 36852 }, { "epoch": 0.9758530044726466, "grad_norm": 0.796875, "learning_rate": 8.075292333951795e-05, "loss": 0.7392, "step": 36853 }, { "epoch": 0.9758794840809409, "grad_norm": 0.91796875, "learning_rate": 8.074840569670203e-05, "loss": 0.776, "step": 36854 }, { "epoch": 0.9759059636892353, "grad_norm": 0.78515625, "learning_rate": 8.074388809468882e-05, "loss": 0.7004, "step": 36855 }, { "epoch": 0.9759324432975297, "grad_norm": 0.80078125, "learning_rate": 8.073937053348784e-05, "loss": 0.8449, "step": 36856 }, { "epoch": 0.9759589229058241, "grad_norm": 0.859375, "learning_rate": 8.073485301310865e-05, "loss": 0.8403, "step": 36857 }, { "epoch": 0.9759854025141185, "grad_norm": 0.8359375, "learning_rate": 8.073033553356086e-05, "loss": 0.9243, "step": 36858 }, { "epoch": 0.9760118821224129, "grad_norm": 0.80859375, "learning_rate": 8.072581809485398e-05, "loss": 0.7405, "step": 36859 }, { "epoch": 0.9760383617307072, "grad_norm": 0.796875, "learning_rate": 8.072130069699767e-05, "loss": 0.8274, "step": 36860 }, { "epoch": 0.9760648413390016, "grad_norm": 0.80078125, "learning_rate": 8.071678334000144e-05, "loss": 0.9327, "step": 36861 }, { "epoch": 0.976091320947296, "grad_norm": 0.7578125, "learning_rate": 8.071226602387491e-05, "loss": 0.8154, "step": 36862 }, { "epoch": 0.9761178005555903, "grad_norm": 0.8515625, "learning_rate": 8.07077487486276e-05, "loss": 0.7886, "step": 36863 }, { "epoch": 0.9761442801638847, "grad_norm": 0.84765625, "learning_rate": 8.070323151426909e-05, "loss": 0.8355, "step": 36864 }, { "epoch": 0.976170759772179, "grad_norm": 0.79296875, "learning_rate": 8.069871432080902e-05, "loss": 0.7591, "step": 36865 }, { "epoch": 0.9761972393804734, "grad_norm": 0.7734375, "learning_rate": 8.069419716825691e-05, "loss": 0.8271, "step": 36866 }, { "epoch": 0.9762237189887678, "grad_norm": 0.7578125, "learning_rate": 8.068968005662237e-05, "loss": 0.7179, "step": 36867 }, { "epoch": 0.9762501985970622, "grad_norm": 0.8125, "learning_rate": 8.068516298591492e-05, "loss": 0.7589, "step": 36868 }, { "epoch": 0.9762766782053566, "grad_norm": 0.77734375, "learning_rate": 8.068064595614413e-05, "loss": 0.7984, "step": 36869 }, { "epoch": 0.976303157813651, "grad_norm": 0.828125, "learning_rate": 8.067612896731965e-05, "loss": 0.7329, "step": 36870 }, { "epoch": 0.9763296374219453, "grad_norm": 0.7734375, "learning_rate": 8.067161201945102e-05, "loss": 0.8685, "step": 36871 }, { "epoch": 0.9763561170302397, "grad_norm": 0.828125, "learning_rate": 8.06670951125478e-05, "loss": 0.702, "step": 36872 }, { "epoch": 0.9763825966385341, "grad_norm": 0.7734375, "learning_rate": 8.066257824661955e-05, "loss": 0.7299, "step": 36873 }, { "epoch": 0.9764090762468285, "grad_norm": 0.78515625, "learning_rate": 8.065806142167584e-05, "loss": 0.7861, "step": 36874 }, { "epoch": 0.9764355558551229, "grad_norm": 0.87890625, "learning_rate": 8.065354463772629e-05, "loss": 0.8365, "step": 36875 }, { "epoch": 0.9764620354634173, "grad_norm": 0.7109375, "learning_rate": 8.064902789478046e-05, "loss": 0.8379, "step": 36876 }, { "epoch": 0.9764885150717116, "grad_norm": 0.74609375, "learning_rate": 8.06445111928479e-05, "loss": 0.6954, "step": 36877 }, { "epoch": 0.976514994680006, "grad_norm": 0.8046875, "learning_rate": 8.063999453193819e-05, "loss": 0.8656, "step": 36878 }, { "epoch": 0.9765414742883003, "grad_norm": 1.546875, "learning_rate": 8.063547791206088e-05, "loss": 0.6871, "step": 36879 }, { "epoch": 0.9765679538965947, "grad_norm": 0.796875, "learning_rate": 8.063096133322561e-05, "loss": 0.771, "step": 36880 }, { "epoch": 0.9765944335048891, "grad_norm": 0.7421875, "learning_rate": 8.062644479544189e-05, "loss": 0.7901, "step": 36881 }, { "epoch": 0.9766209131131834, "grad_norm": 0.7734375, "learning_rate": 8.062192829871933e-05, "loss": 0.8721, "step": 36882 }, { "epoch": 0.9766473927214778, "grad_norm": 0.828125, "learning_rate": 8.061741184306747e-05, "loss": 0.9451, "step": 36883 }, { "epoch": 0.9766738723297722, "grad_norm": 0.85546875, "learning_rate": 8.061289542849586e-05, "loss": 0.6739, "step": 36884 }, { "epoch": 0.9767003519380666, "grad_norm": 0.76953125, "learning_rate": 8.060837905501415e-05, "loss": 0.7765, "step": 36885 }, { "epoch": 0.976726831546361, "grad_norm": 0.68359375, "learning_rate": 8.060386272263187e-05, "loss": 0.8033, "step": 36886 }, { "epoch": 0.9767533111546554, "grad_norm": 0.81640625, "learning_rate": 8.059934643135861e-05, "loss": 0.8605, "step": 36887 }, { "epoch": 0.9767797907629497, "grad_norm": 0.80078125, "learning_rate": 8.059483018120391e-05, "loss": 0.7421, "step": 36888 }, { "epoch": 0.9768062703712441, "grad_norm": 0.75, "learning_rate": 8.059031397217732e-05, "loss": 0.7326, "step": 36889 }, { "epoch": 0.9768327499795385, "grad_norm": 0.8203125, "learning_rate": 8.05857978042885e-05, "loss": 0.7422, "step": 36890 }, { "epoch": 0.9768592295878329, "grad_norm": 0.85546875, "learning_rate": 8.058128167754698e-05, "loss": 0.7699, "step": 36891 }, { "epoch": 0.9768857091961273, "grad_norm": 0.80078125, "learning_rate": 8.05767655919623e-05, "loss": 0.6911, "step": 36892 }, { "epoch": 0.9769121888044217, "grad_norm": 0.796875, "learning_rate": 8.057224954754406e-05, "loss": 0.8131, "step": 36893 }, { "epoch": 0.976938668412716, "grad_norm": 0.79296875, "learning_rate": 8.056773354430179e-05, "loss": 0.7889, "step": 36894 }, { "epoch": 0.9769651480210103, "grad_norm": 0.8125, "learning_rate": 8.056321758224514e-05, "loss": 0.748, "step": 36895 }, { "epoch": 0.9769916276293047, "grad_norm": 0.80078125, "learning_rate": 8.055870166138364e-05, "loss": 0.7852, "step": 36896 }, { "epoch": 0.9770181072375991, "grad_norm": 0.7890625, "learning_rate": 8.055418578172686e-05, "loss": 0.7674, "step": 36897 }, { "epoch": 0.9770445868458935, "grad_norm": 0.77734375, "learning_rate": 8.054966994328436e-05, "loss": 0.7992, "step": 36898 }, { "epoch": 0.9770710664541878, "grad_norm": 0.71875, "learning_rate": 8.054515414606573e-05, "loss": 0.6932, "step": 36899 }, { "epoch": 0.9770975460624822, "grad_norm": 0.72265625, "learning_rate": 8.054063839008054e-05, "loss": 0.7768, "step": 36900 }, { "epoch": 0.9771240256707766, "grad_norm": 0.76171875, "learning_rate": 8.053612267533834e-05, "loss": 0.8192, "step": 36901 }, { "epoch": 0.977150505279071, "grad_norm": 1.140625, "learning_rate": 8.053160700184872e-05, "loss": 0.7947, "step": 36902 }, { "epoch": 0.9771769848873654, "grad_norm": 0.8203125, "learning_rate": 8.052709136962124e-05, "loss": 0.748, "step": 36903 }, { "epoch": 0.9772034644956598, "grad_norm": 0.7578125, "learning_rate": 8.052257577866548e-05, "loss": 0.8544, "step": 36904 }, { "epoch": 0.9772299441039541, "grad_norm": 0.73828125, "learning_rate": 8.051806022899102e-05, "loss": 0.7392, "step": 36905 }, { "epoch": 0.9772564237122485, "grad_norm": 0.796875, "learning_rate": 8.051354472060741e-05, "loss": 0.7198, "step": 36906 }, { "epoch": 0.9772829033205429, "grad_norm": 0.76171875, "learning_rate": 8.050902925352424e-05, "loss": 0.6941, "step": 36907 }, { "epoch": 0.9773093829288373, "grad_norm": 0.75, "learning_rate": 8.050451382775106e-05, "loss": 0.8632, "step": 36908 }, { "epoch": 0.9773358625371317, "grad_norm": 0.703125, "learning_rate": 8.04999984432974e-05, "loss": 0.6695, "step": 36909 }, { "epoch": 0.977362342145426, "grad_norm": 0.90625, "learning_rate": 8.049548310017294e-05, "loss": 0.8647, "step": 36910 }, { "epoch": 0.9773888217537204, "grad_norm": 0.8203125, "learning_rate": 8.049096779838719e-05, "loss": 0.8748, "step": 36911 }, { "epoch": 0.9774153013620147, "grad_norm": 0.9296875, "learning_rate": 8.048645253794971e-05, "loss": 0.7622, "step": 36912 }, { "epoch": 0.9774417809703091, "grad_norm": 0.7578125, "learning_rate": 8.048193731887007e-05, "loss": 0.7174, "step": 36913 }, { "epoch": 0.9774682605786035, "grad_norm": 0.8203125, "learning_rate": 8.047742214115781e-05, "loss": 0.7308, "step": 36914 }, { "epoch": 0.9774947401868979, "grad_norm": 0.796875, "learning_rate": 8.047290700482258e-05, "loss": 0.9016, "step": 36915 }, { "epoch": 0.9775212197951922, "grad_norm": 0.8203125, "learning_rate": 8.046839190987391e-05, "loss": 0.768, "step": 36916 }, { "epoch": 0.9775476994034866, "grad_norm": 0.79296875, "learning_rate": 8.046387685632138e-05, "loss": 0.8045, "step": 36917 }, { "epoch": 0.977574179011781, "grad_norm": 0.74609375, "learning_rate": 8.045936184417453e-05, "loss": 0.7563, "step": 36918 }, { "epoch": 0.9776006586200754, "grad_norm": 0.84765625, "learning_rate": 8.045484687344293e-05, "loss": 0.8175, "step": 36919 }, { "epoch": 0.9776271382283698, "grad_norm": 0.7578125, "learning_rate": 8.045033194413618e-05, "loss": 0.825, "step": 36920 }, { "epoch": 0.9776536178366642, "grad_norm": 0.83203125, "learning_rate": 8.044581705626381e-05, "loss": 0.709, "step": 36921 }, { "epoch": 0.9776800974449585, "grad_norm": 0.8359375, "learning_rate": 8.044130220983544e-05, "loss": 0.8472, "step": 36922 }, { "epoch": 0.9777065770532529, "grad_norm": 0.8125, "learning_rate": 8.043678740486062e-05, "loss": 0.7979, "step": 36923 }, { "epoch": 0.9777330566615473, "grad_norm": 0.8046875, "learning_rate": 8.043227264134889e-05, "loss": 0.8773, "step": 36924 }, { "epoch": 0.9777595362698417, "grad_norm": 0.8046875, "learning_rate": 8.042775791930984e-05, "loss": 0.7806, "step": 36925 }, { "epoch": 0.9777860158781361, "grad_norm": 0.76953125, "learning_rate": 8.042324323875306e-05, "loss": 0.8073, "step": 36926 }, { "epoch": 0.9778124954864305, "grad_norm": 0.75390625, "learning_rate": 8.041872859968808e-05, "loss": 0.7325, "step": 36927 }, { "epoch": 0.9778389750947247, "grad_norm": 0.7734375, "learning_rate": 8.04142140021245e-05, "loss": 0.709, "step": 36928 }, { "epoch": 0.9778654547030191, "grad_norm": 0.78125, "learning_rate": 8.040969944607183e-05, "loss": 0.7776, "step": 36929 }, { "epoch": 0.9778919343113135, "grad_norm": 0.8671875, "learning_rate": 8.040518493153973e-05, "loss": 0.8543, "step": 36930 }, { "epoch": 0.9779184139196079, "grad_norm": 0.75, "learning_rate": 8.04006704585377e-05, "loss": 0.7364, "step": 36931 }, { "epoch": 0.9779448935279023, "grad_norm": 0.77734375, "learning_rate": 8.039615602707534e-05, "loss": 0.7306, "step": 36932 }, { "epoch": 0.9779713731361966, "grad_norm": 0.80078125, "learning_rate": 8.039164163716221e-05, "loss": 0.8009, "step": 36933 }, { "epoch": 0.977997852744491, "grad_norm": 0.83203125, "learning_rate": 8.038712728880783e-05, "loss": 0.7607, "step": 36934 }, { "epoch": 0.9780243323527854, "grad_norm": 0.73046875, "learning_rate": 8.038261298202185e-05, "loss": 0.686, "step": 36935 }, { "epoch": 0.9780508119610798, "grad_norm": 0.78515625, "learning_rate": 8.037809871681381e-05, "loss": 0.8944, "step": 36936 }, { "epoch": 0.9780772915693742, "grad_norm": 0.8359375, "learning_rate": 8.037358449319326e-05, "loss": 0.9358, "step": 36937 }, { "epoch": 0.9781037711776686, "grad_norm": 0.76171875, "learning_rate": 8.036907031116978e-05, "loss": 0.698, "step": 36938 }, { "epoch": 0.9781302507859629, "grad_norm": 0.74609375, "learning_rate": 8.036455617075289e-05, "loss": 0.8, "step": 36939 }, { "epoch": 0.9781567303942573, "grad_norm": 0.828125, "learning_rate": 8.036004207195224e-05, "loss": 0.776, "step": 36940 }, { "epoch": 0.9781832100025517, "grad_norm": 0.890625, "learning_rate": 8.035552801477737e-05, "loss": 0.9424, "step": 36941 }, { "epoch": 0.9782096896108461, "grad_norm": 0.80078125, "learning_rate": 8.035101399923782e-05, "loss": 0.7523, "step": 36942 }, { "epoch": 0.9782361692191405, "grad_norm": 0.765625, "learning_rate": 8.034650002534319e-05, "loss": 0.7411, "step": 36943 }, { "epoch": 0.9782626488274349, "grad_norm": 0.82421875, "learning_rate": 8.034198609310299e-05, "loss": 0.8988, "step": 36944 }, { "epoch": 0.9782891284357291, "grad_norm": 0.8203125, "learning_rate": 8.033747220252686e-05, "loss": 0.731, "step": 36945 }, { "epoch": 0.9783156080440235, "grad_norm": 0.7890625, "learning_rate": 8.033295835362433e-05, "loss": 0.7669, "step": 36946 }, { "epoch": 0.9783420876523179, "grad_norm": 0.81640625, "learning_rate": 8.032844454640498e-05, "loss": 0.7004, "step": 36947 }, { "epoch": 0.9783685672606123, "grad_norm": 0.84375, "learning_rate": 8.032393078087835e-05, "loss": 0.8073, "step": 36948 }, { "epoch": 0.9783950468689067, "grad_norm": 0.7265625, "learning_rate": 8.031941705705399e-05, "loss": 0.6866, "step": 36949 }, { "epoch": 0.978421526477201, "grad_norm": 0.79296875, "learning_rate": 8.031490337494155e-05, "loss": 0.8612, "step": 36950 }, { "epoch": 0.9784480060854954, "grad_norm": 0.75390625, "learning_rate": 8.031038973455054e-05, "loss": 0.8109, "step": 36951 }, { "epoch": 0.9784744856937898, "grad_norm": 0.83984375, "learning_rate": 8.030587613589053e-05, "loss": 0.7485, "step": 36952 }, { "epoch": 0.9785009653020842, "grad_norm": 0.78125, "learning_rate": 8.03013625789711e-05, "loss": 0.8507, "step": 36953 }, { "epoch": 0.9785274449103786, "grad_norm": 0.73046875, "learning_rate": 8.029684906380175e-05, "loss": 0.7196, "step": 36954 }, { "epoch": 0.978553924518673, "grad_norm": 0.7578125, "learning_rate": 8.029233559039216e-05, "loss": 0.782, "step": 36955 }, { "epoch": 0.9785804041269673, "grad_norm": 0.78125, "learning_rate": 8.028782215875183e-05, "loss": 0.8256, "step": 36956 }, { "epoch": 0.9786068837352617, "grad_norm": 0.82421875, "learning_rate": 8.028330876889033e-05, "loss": 0.7244, "step": 36957 }, { "epoch": 0.9786333633435561, "grad_norm": 0.78125, "learning_rate": 8.027879542081724e-05, "loss": 0.8189, "step": 36958 }, { "epoch": 0.9786598429518505, "grad_norm": 0.8359375, "learning_rate": 8.027428211454207e-05, "loss": 0.8099, "step": 36959 }, { "epoch": 0.9786863225601449, "grad_norm": 0.78125, "learning_rate": 8.026976885007447e-05, "loss": 0.6897, "step": 36960 }, { "epoch": 0.9787128021684391, "grad_norm": 0.7578125, "learning_rate": 8.026525562742398e-05, "loss": 0.7686, "step": 36961 }, { "epoch": 0.9787392817767335, "grad_norm": 0.70703125, "learning_rate": 8.026074244660014e-05, "loss": 0.7055, "step": 36962 }, { "epoch": 0.9787657613850279, "grad_norm": 0.7734375, "learning_rate": 8.025622930761252e-05, "loss": 0.8619, "step": 36963 }, { "epoch": 0.9787922409933223, "grad_norm": 0.84375, "learning_rate": 8.025171621047068e-05, "loss": 0.7776, "step": 36964 }, { "epoch": 0.9788187206016167, "grad_norm": 0.8046875, "learning_rate": 8.024720315518422e-05, "loss": 0.868, "step": 36965 }, { "epoch": 0.9788452002099111, "grad_norm": 0.8046875, "learning_rate": 8.024269014176269e-05, "loss": 0.8919, "step": 36966 }, { "epoch": 0.9788716798182054, "grad_norm": 0.86328125, "learning_rate": 8.023817717021565e-05, "loss": 0.8223, "step": 36967 }, { "epoch": 0.9788981594264998, "grad_norm": 0.71875, "learning_rate": 8.023366424055266e-05, "loss": 0.8038, "step": 36968 }, { "epoch": 0.9789246390347942, "grad_norm": 0.73828125, "learning_rate": 8.022915135278325e-05, "loss": 0.6862, "step": 36969 }, { "epoch": 0.9789511186430886, "grad_norm": 0.828125, "learning_rate": 8.022463850691705e-05, "loss": 0.7944, "step": 36970 }, { "epoch": 0.978977598251383, "grad_norm": 0.78125, "learning_rate": 8.022012570296362e-05, "loss": 0.7947, "step": 36971 }, { "epoch": 0.9790040778596774, "grad_norm": 0.8203125, "learning_rate": 8.02156129409325e-05, "loss": 0.9012, "step": 36972 }, { "epoch": 0.9790305574679717, "grad_norm": 0.76953125, "learning_rate": 8.021110022083324e-05, "loss": 0.7634, "step": 36973 }, { "epoch": 0.9790570370762661, "grad_norm": 0.74609375, "learning_rate": 8.02065875426754e-05, "loss": 0.8781, "step": 36974 }, { "epoch": 0.9790835166845605, "grad_norm": 0.7265625, "learning_rate": 8.020207490646859e-05, "loss": 0.6901, "step": 36975 }, { "epoch": 0.9791099962928549, "grad_norm": 0.7421875, "learning_rate": 8.019756231222236e-05, "loss": 0.8666, "step": 36976 }, { "epoch": 0.9791364759011492, "grad_norm": 0.83203125, "learning_rate": 8.019304975994627e-05, "loss": 0.7164, "step": 36977 }, { "epoch": 0.9791629555094435, "grad_norm": 0.7578125, "learning_rate": 8.018853724964988e-05, "loss": 0.8115, "step": 36978 }, { "epoch": 0.9791894351177379, "grad_norm": 0.80078125, "learning_rate": 8.01840247813427e-05, "loss": 0.7615, "step": 36979 }, { "epoch": 0.9792159147260323, "grad_norm": 0.78515625, "learning_rate": 8.01795123550344e-05, "loss": 0.7826, "step": 36980 }, { "epoch": 0.9792423943343267, "grad_norm": 0.74609375, "learning_rate": 8.017499997073448e-05, "loss": 0.7655, "step": 36981 }, { "epoch": 0.9792688739426211, "grad_norm": 0.79296875, "learning_rate": 8.017048762845252e-05, "loss": 0.71, "step": 36982 }, { "epoch": 0.9792953535509155, "grad_norm": 0.77734375, "learning_rate": 8.016597532819808e-05, "loss": 0.8344, "step": 36983 }, { "epoch": 0.9793218331592098, "grad_norm": 0.74609375, "learning_rate": 8.016146306998071e-05, "loss": 0.6568, "step": 36984 }, { "epoch": 0.9793483127675042, "grad_norm": 0.828125, "learning_rate": 8.015695085380998e-05, "loss": 0.8858, "step": 36985 }, { "epoch": 0.9793747923757986, "grad_norm": 0.8203125, "learning_rate": 8.015243867969546e-05, "loss": 0.8526, "step": 36986 }, { "epoch": 0.979401271984093, "grad_norm": 0.828125, "learning_rate": 8.014792654764673e-05, "loss": 0.885, "step": 36987 }, { "epoch": 0.9794277515923874, "grad_norm": 0.7890625, "learning_rate": 8.014341445767331e-05, "loss": 0.7877, "step": 36988 }, { "epoch": 0.9794542312006818, "grad_norm": 0.79296875, "learning_rate": 8.013890240978481e-05, "loss": 0.8476, "step": 36989 }, { "epoch": 0.9794807108089761, "grad_norm": 0.7421875, "learning_rate": 8.013439040399074e-05, "loss": 0.7275, "step": 36990 }, { "epoch": 0.9795071904172705, "grad_norm": 0.7421875, "learning_rate": 8.012987844030072e-05, "loss": 0.7574, "step": 36991 }, { "epoch": 0.9795336700255649, "grad_norm": 0.82421875, "learning_rate": 8.012536651872429e-05, "loss": 0.7813, "step": 36992 }, { "epoch": 0.9795601496338593, "grad_norm": 0.8046875, "learning_rate": 8.012085463927101e-05, "loss": 0.7644, "step": 36993 }, { "epoch": 0.9795866292421536, "grad_norm": 0.734375, "learning_rate": 8.011634280195044e-05, "loss": 0.7076, "step": 36994 }, { "epoch": 0.979613108850448, "grad_norm": 0.83984375, "learning_rate": 8.011183100677211e-05, "loss": 0.8135, "step": 36995 }, { "epoch": 0.9796395884587423, "grad_norm": 0.71875, "learning_rate": 8.010731925374564e-05, "loss": 0.7606, "step": 36996 }, { "epoch": 0.9796660680670367, "grad_norm": 0.76171875, "learning_rate": 8.01028075428806e-05, "loss": 0.8408, "step": 36997 }, { "epoch": 0.9796925476753311, "grad_norm": 0.7265625, "learning_rate": 8.00982958741865e-05, "loss": 0.7485, "step": 36998 }, { "epoch": 0.9797190272836255, "grad_norm": 0.76953125, "learning_rate": 8.009378424767293e-05, "loss": 0.8173, "step": 36999 }, { "epoch": 0.9797455068919199, "grad_norm": 0.76171875, "learning_rate": 8.00892726633494e-05, "loss": 0.7883, "step": 37000 }, { "epoch": 0.9797455068919199, "eval_loss": 0.7816874980926514, "eval_runtime": 280.8839, "eval_samples_per_second": 35.602, "eval_steps_per_second": 0.744, "step": 37000 }, { "epoch": 0.9797719865002142, "grad_norm": 0.69921875, "learning_rate": 8.008476112122558e-05, "loss": 0.7045, "step": 37001 }, { "epoch": 0.9797984661085086, "grad_norm": 0.859375, "learning_rate": 8.008024962131095e-05, "loss": 0.819, "step": 37002 }, { "epoch": 0.979824945716803, "grad_norm": 0.74609375, "learning_rate": 8.00757381636151e-05, "loss": 0.7293, "step": 37003 }, { "epoch": 0.9798514253250974, "grad_norm": 0.8359375, "learning_rate": 8.007122674814757e-05, "loss": 0.8056, "step": 37004 }, { "epoch": 0.9798779049333918, "grad_norm": 0.76171875, "learning_rate": 8.006671537491793e-05, "loss": 0.8215, "step": 37005 }, { "epoch": 0.9799043845416862, "grad_norm": 0.80078125, "learning_rate": 8.006220404393574e-05, "loss": 0.8893, "step": 37006 }, { "epoch": 0.9799308641499805, "grad_norm": 0.71875, "learning_rate": 8.00576927552106e-05, "loss": 0.7647, "step": 37007 }, { "epoch": 0.9799573437582749, "grad_norm": 0.7734375, "learning_rate": 8.005318150875203e-05, "loss": 0.7585, "step": 37008 }, { "epoch": 0.9799838233665693, "grad_norm": 0.796875, "learning_rate": 8.004867030456961e-05, "loss": 0.7356, "step": 37009 }, { "epoch": 0.9800103029748636, "grad_norm": 0.69921875, "learning_rate": 8.004415914267287e-05, "loss": 0.6443, "step": 37010 }, { "epoch": 0.980036782583158, "grad_norm": 0.8359375, "learning_rate": 8.003964802307141e-05, "loss": 0.8185, "step": 37011 }, { "epoch": 0.9800632621914523, "grad_norm": 0.8046875, "learning_rate": 8.003513694577478e-05, "loss": 0.8209, "step": 37012 }, { "epoch": 0.9800897417997467, "grad_norm": 0.8046875, "learning_rate": 8.003062591079253e-05, "loss": 0.8585, "step": 37013 }, { "epoch": 0.9801162214080411, "grad_norm": 0.796875, "learning_rate": 8.002611491813424e-05, "loss": 0.7129, "step": 37014 }, { "epoch": 0.9801427010163355, "grad_norm": 0.78125, "learning_rate": 8.00216039678094e-05, "loss": 0.7461, "step": 37015 }, { "epoch": 0.9801691806246299, "grad_norm": 0.7421875, "learning_rate": 8.001709305982767e-05, "loss": 0.6702, "step": 37016 }, { "epoch": 0.9801956602329243, "grad_norm": 0.7109375, "learning_rate": 8.001258219419858e-05, "loss": 0.7889, "step": 37017 }, { "epoch": 0.9802221398412186, "grad_norm": 0.71875, "learning_rate": 8.000807137093168e-05, "loss": 0.8085, "step": 37018 }, { "epoch": 0.980248619449513, "grad_norm": 0.77734375, "learning_rate": 8.000356059003654e-05, "loss": 0.6943, "step": 37019 }, { "epoch": 0.9802750990578074, "grad_norm": 0.8515625, "learning_rate": 7.999904985152264e-05, "loss": 0.8448, "step": 37020 }, { "epoch": 0.9803015786661018, "grad_norm": 0.83984375, "learning_rate": 7.999453915539967e-05, "loss": 0.7905, "step": 37021 }, { "epoch": 0.9803280582743962, "grad_norm": 0.82421875, "learning_rate": 7.999002850167713e-05, "loss": 0.8636, "step": 37022 }, { "epoch": 0.9803545378826906, "grad_norm": 0.76953125, "learning_rate": 7.998551789036459e-05, "loss": 0.7672, "step": 37023 }, { "epoch": 0.9803810174909849, "grad_norm": 0.8515625, "learning_rate": 7.998100732147157e-05, "loss": 0.8006, "step": 37024 }, { "epoch": 0.9804074970992793, "grad_norm": 0.78515625, "learning_rate": 7.997649679500765e-05, "loss": 0.7395, "step": 37025 }, { "epoch": 0.9804339767075736, "grad_norm": 0.69140625, "learning_rate": 7.997198631098243e-05, "loss": 0.5975, "step": 37026 }, { "epoch": 0.980460456315868, "grad_norm": 0.796875, "learning_rate": 7.996747586940543e-05, "loss": 0.8392, "step": 37027 }, { "epoch": 0.9804869359241624, "grad_norm": 0.734375, "learning_rate": 7.996296547028624e-05, "loss": 0.7497, "step": 37028 }, { "epoch": 0.9805134155324567, "grad_norm": 0.7890625, "learning_rate": 7.995845511363438e-05, "loss": 0.7992, "step": 37029 }, { "epoch": 0.9805398951407511, "grad_norm": 0.765625, "learning_rate": 7.995394479945944e-05, "loss": 0.7501, "step": 37030 }, { "epoch": 0.9805663747490455, "grad_norm": 0.73046875, "learning_rate": 7.994943452777096e-05, "loss": 0.8105, "step": 37031 }, { "epoch": 0.9805928543573399, "grad_norm": 0.7578125, "learning_rate": 7.994492429857852e-05, "loss": 0.7298, "step": 37032 }, { "epoch": 0.9806193339656343, "grad_norm": 0.73828125, "learning_rate": 7.994041411189167e-05, "loss": 0.7013, "step": 37033 }, { "epoch": 0.9806458135739287, "grad_norm": 0.8203125, "learning_rate": 7.993590396771995e-05, "loss": 0.7937, "step": 37034 }, { "epoch": 0.980672293182223, "grad_norm": 0.796875, "learning_rate": 7.993139386607289e-05, "loss": 0.7432, "step": 37035 }, { "epoch": 0.9806987727905174, "grad_norm": 0.74609375, "learning_rate": 7.992688380696015e-05, "loss": 0.7344, "step": 37036 }, { "epoch": 0.9807252523988118, "grad_norm": 0.91796875, "learning_rate": 7.992237379039123e-05, "loss": 0.8844, "step": 37037 }, { "epoch": 0.9807517320071062, "grad_norm": 0.75, "learning_rate": 7.991786381637569e-05, "loss": 0.7443, "step": 37038 }, { "epoch": 0.9807782116154006, "grad_norm": 0.7578125, "learning_rate": 7.99133538849231e-05, "loss": 0.8208, "step": 37039 }, { "epoch": 0.980804691223695, "grad_norm": 0.7734375, "learning_rate": 7.990884399604294e-05, "loss": 0.7828, "step": 37040 }, { "epoch": 0.9808311708319893, "grad_norm": 0.7734375, "learning_rate": 7.99043341497449e-05, "loss": 0.731, "step": 37041 }, { "epoch": 0.9808576504402837, "grad_norm": 0.83203125, "learning_rate": 7.989982434603847e-05, "loss": 0.9046, "step": 37042 }, { "epoch": 0.980884130048578, "grad_norm": 0.72265625, "learning_rate": 7.989531458493321e-05, "loss": 0.6739, "step": 37043 }, { "epoch": 0.9809106096568724, "grad_norm": 0.83984375, "learning_rate": 7.98908048664387e-05, "loss": 0.7912, "step": 37044 }, { "epoch": 0.9809370892651668, "grad_norm": 0.72265625, "learning_rate": 7.98862951905644e-05, "loss": 0.7986, "step": 37045 }, { "epoch": 0.9809635688734611, "grad_norm": 0.828125, "learning_rate": 7.988178555732002e-05, "loss": 0.8014, "step": 37046 }, { "epoch": 0.9809900484817555, "grad_norm": 0.80078125, "learning_rate": 7.987727596671504e-05, "loss": 0.8434, "step": 37047 }, { "epoch": 0.9810165280900499, "grad_norm": 0.7109375, "learning_rate": 7.987276641875902e-05, "loss": 0.7688, "step": 37048 }, { "epoch": 0.9810430076983443, "grad_norm": 0.81640625, "learning_rate": 7.986825691346152e-05, "loss": 0.8244, "step": 37049 }, { "epoch": 0.9810694873066387, "grad_norm": 0.79296875, "learning_rate": 7.986374745083208e-05, "loss": 0.8849, "step": 37050 }, { "epoch": 0.9810959669149331, "grad_norm": 0.7890625, "learning_rate": 7.985923803088031e-05, "loss": 0.7763, "step": 37051 }, { "epoch": 0.9811224465232274, "grad_norm": 0.86328125, "learning_rate": 7.985472865361571e-05, "loss": 0.8073, "step": 37052 }, { "epoch": 0.9811489261315218, "grad_norm": 0.7734375, "learning_rate": 7.985021931904788e-05, "loss": 0.7835, "step": 37053 }, { "epoch": 0.9811754057398162, "grad_norm": 0.7890625, "learning_rate": 7.984571002718634e-05, "loss": 0.7169, "step": 37054 }, { "epoch": 0.9812018853481106, "grad_norm": 0.875, "learning_rate": 7.984120077804065e-05, "loss": 0.9122, "step": 37055 }, { "epoch": 0.981228364956405, "grad_norm": 0.734375, "learning_rate": 7.98366915716204e-05, "loss": 0.7668, "step": 37056 }, { "epoch": 0.9812548445646994, "grad_norm": 0.76953125, "learning_rate": 7.983218240793514e-05, "loss": 0.7302, "step": 37057 }, { "epoch": 0.9812813241729937, "grad_norm": 0.7265625, "learning_rate": 7.982767328699441e-05, "loss": 0.7291, "step": 37058 }, { "epoch": 0.981307803781288, "grad_norm": 0.73828125, "learning_rate": 7.982316420880779e-05, "loss": 0.6469, "step": 37059 }, { "epoch": 0.9813342833895824, "grad_norm": 0.80859375, "learning_rate": 7.981865517338476e-05, "loss": 0.7948, "step": 37060 }, { "epoch": 0.9813607629978768, "grad_norm": 0.7734375, "learning_rate": 7.981414618073499e-05, "loss": 0.806, "step": 37061 }, { "epoch": 0.9813872426061712, "grad_norm": 0.8515625, "learning_rate": 7.980963723086799e-05, "loss": 0.8978, "step": 37062 }, { "epoch": 0.9814137222144655, "grad_norm": 0.78125, "learning_rate": 7.980512832379328e-05, "loss": 0.8657, "step": 37063 }, { "epoch": 0.9814402018227599, "grad_norm": 0.81640625, "learning_rate": 7.980061945952048e-05, "loss": 0.7889, "step": 37064 }, { "epoch": 0.9814666814310543, "grad_norm": 0.75390625, "learning_rate": 7.979611063805905e-05, "loss": 0.8137, "step": 37065 }, { "epoch": 0.9814931610393487, "grad_norm": 0.83203125, "learning_rate": 7.979160185941866e-05, "loss": 0.8226, "step": 37066 }, { "epoch": 0.9815196406476431, "grad_norm": 0.72265625, "learning_rate": 7.978709312360881e-05, "loss": 0.7399, "step": 37067 }, { "epoch": 0.9815461202559375, "grad_norm": 0.828125, "learning_rate": 7.978258443063906e-05, "loss": 0.8756, "step": 37068 }, { "epoch": 0.9815725998642318, "grad_norm": 0.83984375, "learning_rate": 7.977807578051897e-05, "loss": 0.7709, "step": 37069 }, { "epoch": 0.9815990794725262, "grad_norm": 0.78125, "learning_rate": 7.977356717325808e-05, "loss": 0.7235, "step": 37070 }, { "epoch": 0.9816255590808206, "grad_norm": 0.9296875, "learning_rate": 7.976905860886597e-05, "loss": 0.8517, "step": 37071 }, { "epoch": 0.981652038689115, "grad_norm": 0.81640625, "learning_rate": 7.976455008735218e-05, "loss": 0.8119, "step": 37072 }, { "epoch": 0.9816785182974094, "grad_norm": 0.77734375, "learning_rate": 7.976004160872625e-05, "loss": 0.7297, "step": 37073 }, { "epoch": 0.9817049979057038, "grad_norm": 0.81640625, "learning_rate": 7.975553317299777e-05, "loss": 0.7166, "step": 37074 }, { "epoch": 0.981731477513998, "grad_norm": 0.8359375, "learning_rate": 7.975102478017627e-05, "loss": 0.8268, "step": 37075 }, { "epoch": 0.9817579571222924, "grad_norm": 0.75, "learning_rate": 7.974651643027134e-05, "loss": 0.7433, "step": 37076 }, { "epoch": 0.9817844367305868, "grad_norm": 0.8125, "learning_rate": 7.974200812329251e-05, "loss": 0.7099, "step": 37077 }, { "epoch": 0.9818109163388812, "grad_norm": 0.796875, "learning_rate": 7.973749985924933e-05, "loss": 0.7037, "step": 37078 }, { "epoch": 0.9818373959471756, "grad_norm": 0.76171875, "learning_rate": 7.973299163815137e-05, "loss": 0.7034, "step": 37079 }, { "epoch": 0.98186387555547, "grad_norm": 0.78515625, "learning_rate": 7.972848346000812e-05, "loss": 0.8052, "step": 37080 }, { "epoch": 0.9818903551637643, "grad_norm": 0.79296875, "learning_rate": 7.972397532482924e-05, "loss": 0.6763, "step": 37081 }, { "epoch": 0.9819168347720587, "grad_norm": 0.76953125, "learning_rate": 7.971946723262424e-05, "loss": 0.6533, "step": 37082 }, { "epoch": 0.9819433143803531, "grad_norm": 0.76953125, "learning_rate": 7.971495918340266e-05, "loss": 0.7355, "step": 37083 }, { "epoch": 0.9819697939886475, "grad_norm": 0.8515625, "learning_rate": 7.971045117717407e-05, "loss": 0.8559, "step": 37084 }, { "epoch": 0.9819962735969419, "grad_norm": 0.75, "learning_rate": 7.970594321394797e-05, "loss": 0.6582, "step": 37085 }, { "epoch": 0.9820227532052362, "grad_norm": 0.83203125, "learning_rate": 7.970143529373402e-05, "loss": 0.7472, "step": 37086 }, { "epoch": 0.9820492328135306, "grad_norm": 0.765625, "learning_rate": 7.96969274165417e-05, "loss": 0.7868, "step": 37087 }, { "epoch": 0.982075712421825, "grad_norm": 0.828125, "learning_rate": 7.96924195823806e-05, "loss": 0.7918, "step": 37088 }, { "epoch": 0.9821021920301194, "grad_norm": 0.8359375, "learning_rate": 7.968791179126024e-05, "loss": 0.8814, "step": 37089 }, { "epoch": 0.9821286716384138, "grad_norm": 0.76953125, "learning_rate": 7.968340404319017e-05, "loss": 0.6651, "step": 37090 }, { "epoch": 0.9821551512467082, "grad_norm": 0.89453125, "learning_rate": 7.967889633818e-05, "loss": 0.8336, "step": 37091 }, { "epoch": 0.9821816308550024, "grad_norm": 0.79296875, "learning_rate": 7.967438867623921e-05, "loss": 0.7511, "step": 37092 }, { "epoch": 0.9822081104632968, "grad_norm": 0.734375, "learning_rate": 7.966988105737743e-05, "loss": 0.5527, "step": 37093 }, { "epoch": 0.9822345900715912, "grad_norm": 0.8203125, "learning_rate": 7.966537348160415e-05, "loss": 0.7182, "step": 37094 }, { "epoch": 0.9822610696798856, "grad_norm": 0.70703125, "learning_rate": 7.966086594892894e-05, "loss": 0.7128, "step": 37095 }, { "epoch": 0.98228754928818, "grad_norm": 0.8125, "learning_rate": 7.96563584593614e-05, "loss": 0.8658, "step": 37096 }, { "epoch": 0.9823140288964743, "grad_norm": 0.79296875, "learning_rate": 7.965185101291103e-05, "loss": 0.6338, "step": 37097 }, { "epoch": 0.9823405085047687, "grad_norm": 0.7734375, "learning_rate": 7.964734360958739e-05, "loss": 0.7789, "step": 37098 }, { "epoch": 0.9823669881130631, "grad_norm": 0.80078125, "learning_rate": 7.964283624940006e-05, "loss": 0.8225, "step": 37099 }, { "epoch": 0.9823934677213575, "grad_norm": 0.8125, "learning_rate": 7.963832893235852e-05, "loss": 0.6875, "step": 37100 }, { "epoch": 0.9824199473296519, "grad_norm": 0.77734375, "learning_rate": 7.963382165847242e-05, "loss": 0.7181, "step": 37101 }, { "epoch": 0.9824464269379463, "grad_norm": 0.73828125, "learning_rate": 7.962931442775128e-05, "loss": 0.6923, "step": 37102 }, { "epoch": 0.9824729065462406, "grad_norm": 0.73046875, "learning_rate": 7.962480724020465e-05, "loss": 0.7814, "step": 37103 }, { "epoch": 0.982499386154535, "grad_norm": 0.8515625, "learning_rate": 7.962030009584204e-05, "loss": 0.8263, "step": 37104 }, { "epoch": 0.9825258657628294, "grad_norm": 0.82421875, "learning_rate": 7.961579299467304e-05, "loss": 0.8102, "step": 37105 }, { "epoch": 0.9825523453711238, "grad_norm": 0.7734375, "learning_rate": 7.961128593670722e-05, "loss": 0.776, "step": 37106 }, { "epoch": 0.9825788249794182, "grad_norm": 0.70703125, "learning_rate": 7.960677892195413e-05, "loss": 0.6339, "step": 37107 }, { "epoch": 0.9826053045877124, "grad_norm": 0.7578125, "learning_rate": 7.96022719504233e-05, "loss": 0.7345, "step": 37108 }, { "epoch": 0.9826317841960068, "grad_norm": 0.84375, "learning_rate": 7.959776502212428e-05, "loss": 0.8788, "step": 37109 }, { "epoch": 0.9826582638043012, "grad_norm": 0.796875, "learning_rate": 7.95932581370666e-05, "loss": 0.8022, "step": 37110 }, { "epoch": 0.9826847434125956, "grad_norm": 2.34375, "learning_rate": 7.958875129525986e-05, "loss": 0.6148, "step": 37111 }, { "epoch": 0.98271122302089, "grad_norm": 0.81640625, "learning_rate": 7.958424449671362e-05, "loss": 0.7441, "step": 37112 }, { "epoch": 0.9827377026291844, "grad_norm": 0.7890625, "learning_rate": 7.957973774143742e-05, "loss": 0.715, "step": 37113 }, { "epoch": 0.9827641822374787, "grad_norm": 0.796875, "learning_rate": 7.957523102944077e-05, "loss": 0.9448, "step": 37114 }, { "epoch": 0.9827906618457731, "grad_norm": 0.84375, "learning_rate": 7.957072436073325e-05, "loss": 0.823, "step": 37115 }, { "epoch": 0.9828171414540675, "grad_norm": 0.76953125, "learning_rate": 7.956621773532442e-05, "loss": 0.7406, "step": 37116 }, { "epoch": 0.9828436210623619, "grad_norm": 0.69921875, "learning_rate": 7.956171115322381e-05, "loss": 0.7285, "step": 37117 }, { "epoch": 0.9828701006706563, "grad_norm": 0.859375, "learning_rate": 7.9557204614441e-05, "loss": 0.9091, "step": 37118 }, { "epoch": 0.9828965802789507, "grad_norm": 0.77734375, "learning_rate": 7.955269811898554e-05, "loss": 0.8597, "step": 37119 }, { "epoch": 0.982923059887245, "grad_norm": 0.77734375, "learning_rate": 7.954819166686691e-05, "loss": 0.8308, "step": 37120 }, { "epoch": 0.9829495394955394, "grad_norm": 0.6953125, "learning_rate": 7.954368525809477e-05, "loss": 0.8112, "step": 37121 }, { "epoch": 0.9829760191038338, "grad_norm": 0.81640625, "learning_rate": 7.953917889267861e-05, "loss": 0.9193, "step": 37122 }, { "epoch": 0.9830024987121282, "grad_norm": 0.765625, "learning_rate": 7.953467257062799e-05, "loss": 0.7558, "step": 37123 }, { "epoch": 0.9830289783204225, "grad_norm": 0.79296875, "learning_rate": 7.953016629195246e-05, "loss": 0.6789, "step": 37124 }, { "epoch": 0.9830554579287168, "grad_norm": 0.72265625, "learning_rate": 7.952566005666159e-05, "loss": 0.7292, "step": 37125 }, { "epoch": 0.9830819375370112, "grad_norm": 0.75, "learning_rate": 7.952115386476486e-05, "loss": 0.6784, "step": 37126 }, { "epoch": 0.9831084171453056, "grad_norm": 0.796875, "learning_rate": 7.951664771627192e-05, "loss": 0.7426, "step": 37127 }, { "epoch": 0.9831348967536, "grad_norm": 0.73046875, "learning_rate": 7.951214161119228e-05, "loss": 0.6629, "step": 37128 }, { "epoch": 0.9831613763618944, "grad_norm": 0.828125, "learning_rate": 7.950763554953547e-05, "loss": 0.733, "step": 37129 }, { "epoch": 0.9831878559701888, "grad_norm": 0.78125, "learning_rate": 7.950312953131105e-05, "loss": 0.6583, "step": 37130 }, { "epoch": 0.9832143355784831, "grad_norm": 0.80078125, "learning_rate": 7.949862355652855e-05, "loss": 0.8077, "step": 37131 }, { "epoch": 0.9832408151867775, "grad_norm": 0.7734375, "learning_rate": 7.949411762519757e-05, "loss": 0.8642, "step": 37132 }, { "epoch": 0.9832672947950719, "grad_norm": 0.84765625, "learning_rate": 7.948961173732765e-05, "loss": 0.7279, "step": 37133 }, { "epoch": 0.9832937744033663, "grad_norm": 0.8046875, "learning_rate": 7.948510589292833e-05, "loss": 0.8468, "step": 37134 }, { "epoch": 0.9833202540116607, "grad_norm": 0.765625, "learning_rate": 7.948060009200916e-05, "loss": 0.8768, "step": 37135 }, { "epoch": 0.9833467336199551, "grad_norm": 0.76171875, "learning_rate": 7.947609433457963e-05, "loss": 0.7573, "step": 37136 }, { "epoch": 0.9833732132282494, "grad_norm": 0.74609375, "learning_rate": 7.94715886206494e-05, "loss": 0.6833, "step": 37137 }, { "epoch": 0.9833996928365438, "grad_norm": 0.765625, "learning_rate": 7.946708295022795e-05, "loss": 0.8403, "step": 37138 }, { "epoch": 0.9834261724448382, "grad_norm": 0.86328125, "learning_rate": 7.946257732332485e-05, "loss": 0.7931, "step": 37139 }, { "epoch": 0.9834526520531326, "grad_norm": 0.8359375, "learning_rate": 7.945807173994962e-05, "loss": 0.7579, "step": 37140 }, { "epoch": 0.9834791316614269, "grad_norm": 0.76953125, "learning_rate": 7.945356620011183e-05, "loss": 0.8058, "step": 37141 }, { "epoch": 0.9835056112697212, "grad_norm": 0.80859375, "learning_rate": 7.944906070382106e-05, "loss": 0.8084, "step": 37142 }, { "epoch": 0.9835320908780156, "grad_norm": 0.8203125, "learning_rate": 7.944455525108682e-05, "loss": 0.7017, "step": 37143 }, { "epoch": 0.98355857048631, "grad_norm": 0.85546875, "learning_rate": 7.944004984191868e-05, "loss": 0.8081, "step": 37144 }, { "epoch": 0.9835850500946044, "grad_norm": 0.85546875, "learning_rate": 7.943554447632617e-05, "loss": 0.8434, "step": 37145 }, { "epoch": 0.9836115297028988, "grad_norm": 0.80078125, "learning_rate": 7.943103915431882e-05, "loss": 0.6887, "step": 37146 }, { "epoch": 0.9836380093111932, "grad_norm": 0.76953125, "learning_rate": 7.942653387590623e-05, "loss": 0.8784, "step": 37147 }, { "epoch": 0.9836644889194875, "grad_norm": 0.73828125, "learning_rate": 7.942202864109794e-05, "loss": 0.7909, "step": 37148 }, { "epoch": 0.9836909685277819, "grad_norm": 0.77734375, "learning_rate": 7.941752344990349e-05, "loss": 0.7716, "step": 37149 }, { "epoch": 0.9837174481360763, "grad_norm": 0.75, "learning_rate": 7.941301830233241e-05, "loss": 0.7263, "step": 37150 }, { "epoch": 0.9837439277443707, "grad_norm": 0.7890625, "learning_rate": 7.940851319839422e-05, "loss": 0.7106, "step": 37151 }, { "epoch": 0.9837704073526651, "grad_norm": 0.921875, "learning_rate": 7.940400813809855e-05, "loss": 0.7081, "step": 37152 }, { "epoch": 0.9837968869609595, "grad_norm": 0.98046875, "learning_rate": 7.939950312145492e-05, "loss": 0.8301, "step": 37153 }, { "epoch": 0.9838233665692538, "grad_norm": 0.67578125, "learning_rate": 7.939499814847286e-05, "loss": 0.686, "step": 37154 }, { "epoch": 0.9838498461775482, "grad_norm": 0.7890625, "learning_rate": 7.939049321916191e-05, "loss": 0.7766, "step": 37155 }, { "epoch": 0.9838763257858426, "grad_norm": 0.9140625, "learning_rate": 7.938598833353162e-05, "loss": 0.893, "step": 37156 }, { "epoch": 0.9839028053941369, "grad_norm": 0.796875, "learning_rate": 7.938148349159158e-05, "loss": 0.7905, "step": 37157 }, { "epoch": 0.9839292850024313, "grad_norm": 0.75, "learning_rate": 7.937697869335129e-05, "loss": 0.6793, "step": 37158 }, { "epoch": 0.9839557646107256, "grad_norm": 0.84375, "learning_rate": 7.93724739388203e-05, "loss": 0.7965, "step": 37159 }, { "epoch": 0.98398224421902, "grad_norm": 0.80078125, "learning_rate": 7.93679692280082e-05, "loss": 0.7921, "step": 37160 }, { "epoch": 0.9840087238273144, "grad_norm": 0.78515625, "learning_rate": 7.936346456092448e-05, "loss": 0.8276, "step": 37161 }, { "epoch": 0.9840352034356088, "grad_norm": 0.78515625, "learning_rate": 7.935895993757875e-05, "loss": 0.7366, "step": 37162 }, { "epoch": 0.9840616830439032, "grad_norm": 0.77734375, "learning_rate": 7.935445535798052e-05, "loss": 0.7722, "step": 37163 }, { "epoch": 0.9840881626521976, "grad_norm": 1.1875, "learning_rate": 7.934995082213934e-05, "loss": 0.6926, "step": 37164 }, { "epoch": 0.984114642260492, "grad_norm": 0.70703125, "learning_rate": 7.934544633006475e-05, "loss": 0.632, "step": 37165 }, { "epoch": 0.9841411218687863, "grad_norm": 0.81640625, "learning_rate": 7.934094188176629e-05, "loss": 0.7598, "step": 37166 }, { "epoch": 0.9841676014770807, "grad_norm": 0.8359375, "learning_rate": 7.933643747725355e-05, "loss": 0.8248, "step": 37167 }, { "epoch": 0.9841940810853751, "grad_norm": 0.73828125, "learning_rate": 7.933193311653604e-05, "loss": 0.7043, "step": 37168 }, { "epoch": 0.9842205606936695, "grad_norm": 0.8515625, "learning_rate": 7.932742879962331e-05, "loss": 0.7874, "step": 37169 }, { "epoch": 0.9842470403019639, "grad_norm": 0.79296875, "learning_rate": 7.932292452652494e-05, "loss": 0.8504, "step": 37170 }, { "epoch": 0.9842735199102582, "grad_norm": 0.984375, "learning_rate": 7.93184202972504e-05, "loss": 0.9006, "step": 37171 }, { "epoch": 0.9842999995185526, "grad_norm": 0.82421875, "learning_rate": 7.931391611180931e-05, "loss": 0.7129, "step": 37172 }, { "epoch": 0.9843264791268469, "grad_norm": 0.8203125, "learning_rate": 7.930941197021121e-05, "loss": 0.6754, "step": 37173 }, { "epoch": 0.9843529587351413, "grad_norm": 0.875, "learning_rate": 7.930490787246563e-05, "loss": 0.8153, "step": 37174 }, { "epoch": 0.9843794383434357, "grad_norm": 0.7265625, "learning_rate": 7.93004038185821e-05, "loss": 0.8134, "step": 37175 }, { "epoch": 0.98440591795173, "grad_norm": 0.828125, "learning_rate": 7.929589980857018e-05, "loss": 0.6777, "step": 37176 }, { "epoch": 0.9844323975600244, "grad_norm": 0.76953125, "learning_rate": 7.92913958424394e-05, "loss": 0.8001, "step": 37177 }, { "epoch": 0.9844588771683188, "grad_norm": 0.79296875, "learning_rate": 7.928689192019935e-05, "loss": 0.8523, "step": 37178 }, { "epoch": 0.9844853567766132, "grad_norm": 0.75390625, "learning_rate": 7.928238804185954e-05, "loss": 0.7843, "step": 37179 }, { "epoch": 0.9845118363849076, "grad_norm": 0.8046875, "learning_rate": 7.927788420742954e-05, "loss": 0.9015, "step": 37180 }, { "epoch": 0.984538315993202, "grad_norm": 0.7578125, "learning_rate": 7.927338041691886e-05, "loss": 0.7911, "step": 37181 }, { "epoch": 0.9845647956014963, "grad_norm": 0.7109375, "learning_rate": 7.926887667033706e-05, "loss": 0.7286, "step": 37182 }, { "epoch": 0.9845912752097907, "grad_norm": 0.765625, "learning_rate": 7.926437296769371e-05, "loss": 0.7395, "step": 37183 }, { "epoch": 0.9846177548180851, "grad_norm": 0.70703125, "learning_rate": 7.925986930899834e-05, "loss": 0.771, "step": 37184 }, { "epoch": 0.9846442344263795, "grad_norm": 0.91015625, "learning_rate": 7.925536569426049e-05, "loss": 0.731, "step": 37185 }, { "epoch": 0.9846707140346739, "grad_norm": 0.78515625, "learning_rate": 7.925086212348967e-05, "loss": 0.8381, "step": 37186 }, { "epoch": 0.9846971936429683, "grad_norm": 0.87109375, "learning_rate": 7.924635859669549e-05, "loss": 0.7635, "step": 37187 }, { "epoch": 0.9847236732512626, "grad_norm": 0.8046875, "learning_rate": 7.924185511388746e-05, "loss": 0.8257, "step": 37188 }, { "epoch": 0.984750152859557, "grad_norm": 0.73828125, "learning_rate": 7.923735167507516e-05, "loss": 0.7513, "step": 37189 }, { "epoch": 0.9847766324678513, "grad_norm": 0.765625, "learning_rate": 7.92328482802681e-05, "loss": 0.8313, "step": 37190 }, { "epoch": 0.9848031120761457, "grad_norm": 0.84375, "learning_rate": 7.922834492947576e-05, "loss": 0.9197, "step": 37191 }, { "epoch": 0.9848295916844401, "grad_norm": 0.79296875, "learning_rate": 7.922384162270782e-05, "loss": 0.7742, "step": 37192 }, { "epoch": 0.9848560712927344, "grad_norm": 0.77734375, "learning_rate": 7.921933835997376e-05, "loss": 0.8859, "step": 37193 }, { "epoch": 0.9848825509010288, "grad_norm": 0.74609375, "learning_rate": 7.921483514128312e-05, "loss": 0.7324, "step": 37194 }, { "epoch": 0.9849090305093232, "grad_norm": 0.74609375, "learning_rate": 7.921033196664545e-05, "loss": 0.7646, "step": 37195 }, { "epoch": 0.9849355101176176, "grad_norm": 0.80078125, "learning_rate": 7.920582883607025e-05, "loss": 0.6652, "step": 37196 }, { "epoch": 0.984961989725912, "grad_norm": 0.9296875, "learning_rate": 7.920132574956716e-05, "loss": 0.8126, "step": 37197 }, { "epoch": 0.9849884693342064, "grad_norm": 0.8046875, "learning_rate": 7.919682270714566e-05, "loss": 0.9109, "step": 37198 }, { "epoch": 0.9850149489425007, "grad_norm": 0.953125, "learning_rate": 7.91923197088153e-05, "loss": 0.8215, "step": 37199 }, { "epoch": 0.9850414285507951, "grad_norm": 0.75, "learning_rate": 7.918781675458565e-05, "loss": 0.72, "step": 37200 }, { "epoch": 0.9850679081590895, "grad_norm": 0.8125, "learning_rate": 7.91833138444662e-05, "loss": 0.7408, "step": 37201 }, { "epoch": 0.9850943877673839, "grad_norm": 0.76953125, "learning_rate": 7.917881097846653e-05, "loss": 0.7836, "step": 37202 }, { "epoch": 0.9851208673756783, "grad_norm": 0.7265625, "learning_rate": 7.91743081565962e-05, "loss": 0.7871, "step": 37203 }, { "epoch": 0.9851473469839727, "grad_norm": 0.80859375, "learning_rate": 7.916980537886473e-05, "loss": 0.736, "step": 37204 }, { "epoch": 0.985173826592267, "grad_norm": 0.765625, "learning_rate": 7.916530264528166e-05, "loss": 0.8755, "step": 37205 }, { "epoch": 0.9852003062005613, "grad_norm": 0.78125, "learning_rate": 7.91607999558565e-05, "loss": 0.5911, "step": 37206 }, { "epoch": 0.9852267858088557, "grad_norm": 0.79296875, "learning_rate": 7.915629731059888e-05, "loss": 0.7854, "step": 37207 }, { "epoch": 0.9852532654171501, "grad_norm": 0.80078125, "learning_rate": 7.91517947095183e-05, "loss": 0.8084, "step": 37208 }, { "epoch": 0.9852797450254445, "grad_norm": 0.7578125, "learning_rate": 7.91472921526243e-05, "loss": 0.6612, "step": 37209 }, { "epoch": 0.9853062246337388, "grad_norm": 0.8359375, "learning_rate": 7.914278963992641e-05, "loss": 0.7109, "step": 37210 }, { "epoch": 0.9853327042420332, "grad_norm": 0.8046875, "learning_rate": 7.913828717143416e-05, "loss": 0.6974, "step": 37211 }, { "epoch": 0.9853591838503276, "grad_norm": 0.79296875, "learning_rate": 7.913378474715715e-05, "loss": 0.7863, "step": 37212 }, { "epoch": 0.985385663458622, "grad_norm": 0.79296875, "learning_rate": 7.91292823671049e-05, "loss": 0.6629, "step": 37213 }, { "epoch": 0.9854121430669164, "grad_norm": 0.95703125, "learning_rate": 7.912478003128693e-05, "loss": 0.9347, "step": 37214 }, { "epoch": 0.9854386226752108, "grad_norm": 0.88671875, "learning_rate": 7.91202777397128e-05, "loss": 0.8927, "step": 37215 }, { "epoch": 0.9854651022835051, "grad_norm": 0.91015625, "learning_rate": 7.911577549239201e-05, "loss": 0.6598, "step": 37216 }, { "epoch": 0.9854915818917995, "grad_norm": 0.81640625, "learning_rate": 7.911127328933418e-05, "loss": 0.917, "step": 37217 }, { "epoch": 0.9855180615000939, "grad_norm": 0.88671875, "learning_rate": 7.91067711305488e-05, "loss": 0.8067, "step": 37218 }, { "epoch": 0.9855445411083883, "grad_norm": 0.7421875, "learning_rate": 7.910226901604543e-05, "loss": 0.7993, "step": 37219 }, { "epoch": 0.9855710207166827, "grad_norm": 0.78515625, "learning_rate": 7.909776694583362e-05, "loss": 0.7646, "step": 37220 }, { "epoch": 0.9855975003249771, "grad_norm": 0.75390625, "learning_rate": 7.909326491992287e-05, "loss": 0.6937, "step": 37221 }, { "epoch": 0.9856239799332713, "grad_norm": 0.75390625, "learning_rate": 7.908876293832277e-05, "loss": 0.8402, "step": 37222 }, { "epoch": 0.9856504595415657, "grad_norm": 0.79296875, "learning_rate": 7.908426100104284e-05, "loss": 0.7956, "step": 37223 }, { "epoch": 0.9856769391498601, "grad_norm": 0.73828125, "learning_rate": 7.907975910809263e-05, "loss": 0.7733, "step": 37224 }, { "epoch": 0.9857034187581545, "grad_norm": 0.7578125, "learning_rate": 7.907525725948165e-05, "loss": 0.7206, "step": 37225 }, { "epoch": 0.9857298983664489, "grad_norm": 0.734375, "learning_rate": 7.907075545521947e-05, "loss": 0.7595, "step": 37226 }, { "epoch": 0.9857563779747432, "grad_norm": 0.83203125, "learning_rate": 7.906625369531564e-05, "loss": 0.8227, "step": 37227 }, { "epoch": 0.9857828575830376, "grad_norm": 0.84375, "learning_rate": 7.90617519797797e-05, "loss": 0.7678, "step": 37228 }, { "epoch": 0.985809337191332, "grad_norm": 0.765625, "learning_rate": 7.905725030862117e-05, "loss": 0.7578, "step": 37229 }, { "epoch": 0.9858358167996264, "grad_norm": 0.77734375, "learning_rate": 7.90527486818496e-05, "loss": 0.793, "step": 37230 }, { "epoch": 0.9858622964079208, "grad_norm": 0.7890625, "learning_rate": 7.904824709947449e-05, "loss": 0.8281, "step": 37231 }, { "epoch": 0.9858887760162152, "grad_norm": 0.76171875, "learning_rate": 7.904374556150547e-05, "loss": 0.8604, "step": 37232 }, { "epoch": 0.9859152556245095, "grad_norm": 0.8515625, "learning_rate": 7.903924406795202e-05, "loss": 0.7719, "step": 37233 }, { "epoch": 0.9859417352328039, "grad_norm": 0.7734375, "learning_rate": 7.903474261882371e-05, "loss": 0.8126, "step": 37234 }, { "epoch": 0.9859682148410983, "grad_norm": 0.74609375, "learning_rate": 7.903024121413006e-05, "loss": 0.7178, "step": 37235 }, { "epoch": 0.9859946944493927, "grad_norm": 0.78515625, "learning_rate": 7.902573985388058e-05, "loss": 0.8169, "step": 37236 }, { "epoch": 0.9860211740576871, "grad_norm": 0.81640625, "learning_rate": 7.902123853808487e-05, "loss": 0.7385, "step": 37237 }, { "epoch": 0.9860476536659815, "grad_norm": 0.83984375, "learning_rate": 7.901673726675247e-05, "loss": 0.8092, "step": 37238 }, { "epoch": 0.9860741332742757, "grad_norm": 0.76953125, "learning_rate": 7.901223603989287e-05, "loss": 0.7826, "step": 37239 }, { "epoch": 0.9861006128825701, "grad_norm": 0.7890625, "learning_rate": 7.900773485751566e-05, "loss": 0.8103, "step": 37240 }, { "epoch": 0.9861270924908645, "grad_norm": 0.78125, "learning_rate": 7.900323371963032e-05, "loss": 0.7108, "step": 37241 }, { "epoch": 0.9861535720991589, "grad_norm": 0.77734375, "learning_rate": 7.899873262624646e-05, "loss": 0.7277, "step": 37242 }, { "epoch": 0.9861800517074533, "grad_norm": 0.85546875, "learning_rate": 7.899423157737357e-05, "loss": 0.7117, "step": 37243 }, { "epoch": 0.9862065313157476, "grad_norm": 0.80859375, "learning_rate": 7.898973057302119e-05, "loss": 0.784, "step": 37244 }, { "epoch": 0.986233010924042, "grad_norm": 0.71875, "learning_rate": 7.89852296131989e-05, "loss": 0.7246, "step": 37245 }, { "epoch": 0.9862594905323364, "grad_norm": 0.8046875, "learning_rate": 7.89807286979162e-05, "loss": 0.9269, "step": 37246 }, { "epoch": 0.9862859701406308, "grad_norm": 0.78515625, "learning_rate": 7.897622782718265e-05, "loss": 0.8048, "step": 37247 }, { "epoch": 0.9863124497489252, "grad_norm": 0.80078125, "learning_rate": 7.897172700100779e-05, "loss": 0.8411, "step": 37248 }, { "epoch": 0.9863389293572196, "grad_norm": 0.78125, "learning_rate": 7.896722621940115e-05, "loss": 0.7507, "step": 37249 }, { "epoch": 0.986365408965514, "grad_norm": 0.7578125, "learning_rate": 7.896272548237226e-05, "loss": 0.8153, "step": 37250 }, { "epoch": 0.9863918885738083, "grad_norm": 0.796875, "learning_rate": 7.895822478993066e-05, "loss": 0.7727, "step": 37251 }, { "epoch": 0.9864183681821027, "grad_norm": 0.84765625, "learning_rate": 7.895372414208592e-05, "loss": 0.8363, "step": 37252 }, { "epoch": 0.9864448477903971, "grad_norm": 0.8203125, "learning_rate": 7.894922353884757e-05, "loss": 0.7447, "step": 37253 }, { "epoch": 0.9864713273986915, "grad_norm": 0.90625, "learning_rate": 7.894472298022512e-05, "loss": 0.8122, "step": 37254 }, { "epoch": 0.9864978070069857, "grad_norm": 0.77734375, "learning_rate": 7.894022246622814e-05, "loss": 0.8741, "step": 37255 }, { "epoch": 0.9865242866152801, "grad_norm": 0.796875, "learning_rate": 7.893572199686612e-05, "loss": 0.7199, "step": 37256 }, { "epoch": 0.9865507662235745, "grad_norm": 0.78125, "learning_rate": 7.893122157214866e-05, "loss": 0.8093, "step": 37257 }, { "epoch": 0.9865772458318689, "grad_norm": 0.7265625, "learning_rate": 7.892672119208528e-05, "loss": 0.7625, "step": 37258 }, { "epoch": 0.9866037254401633, "grad_norm": 0.76953125, "learning_rate": 7.892222085668551e-05, "loss": 0.7041, "step": 37259 }, { "epoch": 0.9866302050484577, "grad_norm": 0.796875, "learning_rate": 7.891772056595889e-05, "loss": 0.7271, "step": 37260 }, { "epoch": 0.986656684656752, "grad_norm": 0.74609375, "learning_rate": 7.891322031991492e-05, "loss": 0.7488, "step": 37261 }, { "epoch": 0.9866831642650464, "grad_norm": 0.7734375, "learning_rate": 7.89087201185632e-05, "loss": 0.8329, "step": 37262 }, { "epoch": 0.9867096438733408, "grad_norm": 0.8046875, "learning_rate": 7.890421996191324e-05, "loss": 0.8083, "step": 37263 }, { "epoch": 0.9867361234816352, "grad_norm": 0.875, "learning_rate": 7.889971984997458e-05, "loss": 0.7663, "step": 37264 }, { "epoch": 0.9867626030899296, "grad_norm": 0.75390625, "learning_rate": 7.889521978275677e-05, "loss": 0.7568, "step": 37265 }, { "epoch": 0.986789082698224, "grad_norm": 0.80078125, "learning_rate": 7.889071976026931e-05, "loss": 0.8798, "step": 37266 }, { "epoch": 0.9868155623065183, "grad_norm": 0.8125, "learning_rate": 7.888621978252179e-05, "loss": 0.8083, "step": 37267 }, { "epoch": 0.9868420419148127, "grad_norm": 0.76171875, "learning_rate": 7.88817198495237e-05, "loss": 0.7549, "step": 37268 }, { "epoch": 0.9868685215231071, "grad_norm": 0.765625, "learning_rate": 7.887721996128462e-05, "loss": 0.8315, "step": 37269 }, { "epoch": 0.9868950011314015, "grad_norm": 0.76171875, "learning_rate": 7.887272011781405e-05, "loss": 0.785, "step": 37270 }, { "epoch": 0.9869214807396958, "grad_norm": 0.7421875, "learning_rate": 7.886822031912155e-05, "loss": 0.7191, "step": 37271 }, { "epoch": 0.9869479603479901, "grad_norm": 0.79296875, "learning_rate": 7.886372056521662e-05, "loss": 0.7944, "step": 37272 }, { "epoch": 0.9869744399562845, "grad_norm": 0.75, "learning_rate": 7.885922085610884e-05, "loss": 0.734, "step": 37273 }, { "epoch": 0.9870009195645789, "grad_norm": 0.875, "learning_rate": 7.885472119180776e-05, "loss": 0.7459, "step": 37274 }, { "epoch": 0.9870273991728733, "grad_norm": 0.83203125, "learning_rate": 7.885022157232287e-05, "loss": 0.8654, "step": 37275 }, { "epoch": 0.9870538787811677, "grad_norm": 0.84375, "learning_rate": 7.884572199766373e-05, "loss": 0.8877, "step": 37276 }, { "epoch": 0.9870803583894621, "grad_norm": 0.8046875, "learning_rate": 7.884122246783984e-05, "loss": 0.8711, "step": 37277 }, { "epoch": 0.9871068379977564, "grad_norm": 0.7109375, "learning_rate": 7.883672298286081e-05, "loss": 0.7432, "step": 37278 }, { "epoch": 0.9871333176060508, "grad_norm": 0.7578125, "learning_rate": 7.883222354273614e-05, "loss": 0.9213, "step": 37279 }, { "epoch": 0.9871597972143452, "grad_norm": 0.7265625, "learning_rate": 7.882772414747535e-05, "loss": 0.8372, "step": 37280 }, { "epoch": 0.9871862768226396, "grad_norm": 0.8515625, "learning_rate": 7.8823224797088e-05, "loss": 0.7706, "step": 37281 }, { "epoch": 0.987212756430934, "grad_norm": 0.87109375, "learning_rate": 7.881872549158356e-05, "loss": 0.8132, "step": 37282 }, { "epoch": 0.9872392360392284, "grad_norm": 0.76171875, "learning_rate": 7.881422623097167e-05, "loss": 0.8483, "step": 37283 }, { "epoch": 0.9872657156475227, "grad_norm": 0.75, "learning_rate": 7.880972701526182e-05, "loss": 0.821, "step": 37284 }, { "epoch": 0.9872921952558171, "grad_norm": 0.8203125, "learning_rate": 7.880522784446354e-05, "loss": 0.8037, "step": 37285 }, { "epoch": 0.9873186748641115, "grad_norm": 0.76953125, "learning_rate": 7.880072871858636e-05, "loss": 0.8375, "step": 37286 }, { "epoch": 0.9873451544724059, "grad_norm": 0.7890625, "learning_rate": 7.87962296376398e-05, "loss": 0.7609, "step": 37287 }, { "epoch": 0.9873716340807002, "grad_norm": 0.8046875, "learning_rate": 7.879173060163346e-05, "loss": 0.6633, "step": 37288 }, { "epoch": 0.9873981136889945, "grad_norm": 0.765625, "learning_rate": 7.878723161057682e-05, "loss": 0.792, "step": 37289 }, { "epoch": 0.9874245932972889, "grad_norm": 0.77734375, "learning_rate": 7.878273266447944e-05, "loss": 0.7658, "step": 37290 }, { "epoch": 0.9874510729055833, "grad_norm": 0.79296875, "learning_rate": 7.877823376335084e-05, "loss": 0.8647, "step": 37291 }, { "epoch": 0.9874775525138777, "grad_norm": 0.78515625, "learning_rate": 7.877373490720051e-05, "loss": 0.7639, "step": 37292 }, { "epoch": 0.9875040321221721, "grad_norm": 0.85546875, "learning_rate": 7.87692360960381e-05, "loss": 0.9981, "step": 37293 }, { "epoch": 0.9875305117304665, "grad_norm": 0.8125, "learning_rate": 7.876473732987307e-05, "loss": 0.7087, "step": 37294 }, { "epoch": 0.9875569913387608, "grad_norm": 0.765625, "learning_rate": 7.876023860871495e-05, "loss": 0.7499, "step": 37295 }, { "epoch": 0.9875834709470552, "grad_norm": 0.87890625, "learning_rate": 7.87557399325733e-05, "loss": 0.7993, "step": 37296 }, { "epoch": 0.9876099505553496, "grad_norm": 0.81640625, "learning_rate": 7.875124130145761e-05, "loss": 0.7643, "step": 37297 }, { "epoch": 0.987636430163644, "grad_norm": 0.73046875, "learning_rate": 7.874674271537748e-05, "loss": 0.7813, "step": 37298 }, { "epoch": 0.9876629097719384, "grad_norm": 0.89453125, "learning_rate": 7.874224417434242e-05, "loss": 0.8987, "step": 37299 }, { "epoch": 0.9876893893802328, "grad_norm": 0.71484375, "learning_rate": 7.873774567836197e-05, "loss": 0.7339, "step": 37300 }, { "epoch": 0.9877158689885271, "grad_norm": 0.76953125, "learning_rate": 7.873324722744563e-05, "loss": 0.7616, "step": 37301 }, { "epoch": 0.9877423485968215, "grad_norm": 0.8359375, "learning_rate": 7.872874882160293e-05, "loss": 0.7573, "step": 37302 }, { "epoch": 0.9877688282051159, "grad_norm": 0.71875, "learning_rate": 7.872425046084346e-05, "loss": 0.688, "step": 37303 }, { "epoch": 0.9877953078134102, "grad_norm": 0.8046875, "learning_rate": 7.871975214517675e-05, "loss": 0.825, "step": 37304 }, { "epoch": 0.9878217874217046, "grad_norm": 0.828125, "learning_rate": 7.87152538746123e-05, "loss": 0.8277, "step": 37305 }, { "epoch": 0.987848267029999, "grad_norm": 0.79296875, "learning_rate": 7.871075564915964e-05, "loss": 0.7515, "step": 37306 }, { "epoch": 0.9878747466382933, "grad_norm": 0.8046875, "learning_rate": 7.87062574688283e-05, "loss": 0.6799, "step": 37307 }, { "epoch": 0.9879012262465877, "grad_norm": 0.72265625, "learning_rate": 7.870175933362784e-05, "loss": 0.7358, "step": 37308 }, { "epoch": 0.9879277058548821, "grad_norm": 0.8046875, "learning_rate": 7.869726124356782e-05, "loss": 0.7956, "step": 37309 }, { "epoch": 0.9879541854631765, "grad_norm": 0.76953125, "learning_rate": 7.86927631986577e-05, "loss": 0.6724, "step": 37310 }, { "epoch": 0.9879806650714709, "grad_norm": 0.76171875, "learning_rate": 7.868826519890704e-05, "loss": 0.7958, "step": 37311 }, { "epoch": 0.9880071446797652, "grad_norm": 0.75, "learning_rate": 7.86837672443254e-05, "loss": 0.7531, "step": 37312 }, { "epoch": 0.9880336242880596, "grad_norm": 0.87890625, "learning_rate": 7.86792693349223e-05, "loss": 0.7016, "step": 37313 }, { "epoch": 0.988060103896354, "grad_norm": 0.75390625, "learning_rate": 7.867477147070728e-05, "loss": 0.8106, "step": 37314 }, { "epoch": 0.9880865835046484, "grad_norm": 0.87109375, "learning_rate": 7.867027365168985e-05, "loss": 0.7591, "step": 37315 }, { "epoch": 0.9881130631129428, "grad_norm": 0.82421875, "learning_rate": 7.866577587787957e-05, "loss": 0.8795, "step": 37316 }, { "epoch": 0.9881395427212372, "grad_norm": 0.76171875, "learning_rate": 7.866127814928591e-05, "loss": 0.8317, "step": 37317 }, { "epoch": 0.9881660223295315, "grad_norm": 0.81640625, "learning_rate": 7.865678046591848e-05, "loss": 0.7185, "step": 37318 }, { "epoch": 0.9881925019378259, "grad_norm": 0.921875, "learning_rate": 7.865228282778682e-05, "loss": 1.0652, "step": 37319 }, { "epoch": 0.9882189815461202, "grad_norm": 0.70703125, "learning_rate": 7.86477852349004e-05, "loss": 0.6485, "step": 37320 }, { "epoch": 0.9882454611544146, "grad_norm": 0.76953125, "learning_rate": 7.864328768726878e-05, "loss": 0.8163, "step": 37321 }, { "epoch": 0.988271940762709, "grad_norm": 0.765625, "learning_rate": 7.863879018490145e-05, "loss": 0.7047, "step": 37322 }, { "epoch": 0.9882984203710033, "grad_norm": 0.75, "learning_rate": 7.863429272780804e-05, "loss": 0.7724, "step": 37323 }, { "epoch": 0.9883248999792977, "grad_norm": 0.71875, "learning_rate": 7.862979531599802e-05, "loss": 0.7244, "step": 37324 }, { "epoch": 0.9883513795875921, "grad_norm": 0.80078125, "learning_rate": 7.86252979494809e-05, "loss": 0.9501, "step": 37325 }, { "epoch": 0.9883778591958865, "grad_norm": 0.78515625, "learning_rate": 7.862080062826627e-05, "loss": 0.7889, "step": 37326 }, { "epoch": 0.9884043388041809, "grad_norm": 0.80859375, "learning_rate": 7.86163033523636e-05, "loss": 0.7263, "step": 37327 }, { "epoch": 0.9884308184124753, "grad_norm": 0.796875, "learning_rate": 7.861180612178247e-05, "loss": 0.7708, "step": 37328 }, { "epoch": 0.9884572980207696, "grad_norm": 0.76953125, "learning_rate": 7.860730893653238e-05, "loss": 0.787, "step": 37329 }, { "epoch": 0.988483777629064, "grad_norm": 0.80859375, "learning_rate": 7.860281179662289e-05, "loss": 0.8682, "step": 37330 }, { "epoch": 0.9885102572373584, "grad_norm": 0.8125, "learning_rate": 7.859831470206353e-05, "loss": 0.7567, "step": 37331 }, { "epoch": 0.9885367368456528, "grad_norm": 0.7890625, "learning_rate": 7.859381765286378e-05, "loss": 0.8087, "step": 37332 }, { "epoch": 0.9885632164539472, "grad_norm": 0.7734375, "learning_rate": 7.858932064903323e-05, "loss": 0.7391, "step": 37333 }, { "epoch": 0.9885896960622416, "grad_norm": 0.78515625, "learning_rate": 7.85848236905814e-05, "loss": 0.7848, "step": 37334 }, { "epoch": 0.988616175670536, "grad_norm": 0.7734375, "learning_rate": 7.858032677751783e-05, "loss": 0.7431, "step": 37335 }, { "epoch": 0.9886426552788303, "grad_norm": 0.7109375, "learning_rate": 7.857582990985202e-05, "loss": 0.6949, "step": 37336 }, { "epoch": 0.9886691348871246, "grad_norm": 0.6953125, "learning_rate": 7.857133308759345e-05, "loss": 0.7675, "step": 37337 }, { "epoch": 0.988695614495419, "grad_norm": 0.81640625, "learning_rate": 7.856683631075179e-05, "loss": 0.7404, "step": 37338 }, { "epoch": 0.9887220941037134, "grad_norm": 0.80078125, "learning_rate": 7.856233957933648e-05, "loss": 0.832, "step": 37339 }, { "epoch": 0.9887485737120078, "grad_norm": 0.859375, "learning_rate": 7.855784289335705e-05, "loss": 0.767, "step": 37340 }, { "epoch": 0.9887750533203021, "grad_norm": 0.796875, "learning_rate": 7.855334625282308e-05, "loss": 0.8204, "step": 37341 }, { "epoch": 0.9888015329285965, "grad_norm": 0.84375, "learning_rate": 7.8548849657744e-05, "loss": 0.7697, "step": 37342 }, { "epoch": 0.9888280125368909, "grad_norm": 0.8046875, "learning_rate": 7.854435310812946e-05, "loss": 0.7453, "step": 37343 }, { "epoch": 0.9888544921451853, "grad_norm": 0.76953125, "learning_rate": 7.853985660398894e-05, "loss": 0.735, "step": 37344 }, { "epoch": 0.9888809717534797, "grad_norm": 0.85546875, "learning_rate": 7.853536014533197e-05, "loss": 0.9167, "step": 37345 }, { "epoch": 0.988907451361774, "grad_norm": 0.7890625, "learning_rate": 7.853086373216806e-05, "loss": 0.8174, "step": 37346 }, { "epoch": 0.9889339309700684, "grad_norm": 0.79296875, "learning_rate": 7.852636736450673e-05, "loss": 0.9566, "step": 37347 }, { "epoch": 0.9889604105783628, "grad_norm": 0.80859375, "learning_rate": 7.852187104235756e-05, "loss": 0.8185, "step": 37348 }, { "epoch": 0.9889868901866572, "grad_norm": 0.80859375, "learning_rate": 7.851737476573007e-05, "loss": 0.9501, "step": 37349 }, { "epoch": 0.9890133697949516, "grad_norm": 0.71875, "learning_rate": 7.851287853463378e-05, "loss": 0.786, "step": 37350 }, { "epoch": 0.989039849403246, "grad_norm": 0.71875, "learning_rate": 7.850838234907821e-05, "loss": 0.8279, "step": 37351 }, { "epoch": 0.9890663290115403, "grad_norm": 0.73828125, "learning_rate": 7.850388620907287e-05, "loss": 0.8141, "step": 37352 }, { "epoch": 0.9890928086198346, "grad_norm": 0.7734375, "learning_rate": 7.849939011462735e-05, "loss": 0.8207, "step": 37353 }, { "epoch": 0.989119288228129, "grad_norm": 0.79296875, "learning_rate": 7.849489406575113e-05, "loss": 0.8878, "step": 37354 }, { "epoch": 0.9891457678364234, "grad_norm": 0.83203125, "learning_rate": 7.849039806245376e-05, "loss": 0.7553, "step": 37355 }, { "epoch": 0.9891722474447178, "grad_norm": 0.9140625, "learning_rate": 7.848590210474476e-05, "loss": 0.97, "step": 37356 }, { "epoch": 0.9891987270530122, "grad_norm": 0.8203125, "learning_rate": 7.848140619263362e-05, "loss": 0.7697, "step": 37357 }, { "epoch": 0.9892252066613065, "grad_norm": 0.765625, "learning_rate": 7.847691032612995e-05, "loss": 0.754, "step": 37358 }, { "epoch": 0.9892516862696009, "grad_norm": 0.75, "learning_rate": 7.847241450524324e-05, "loss": 0.7224, "step": 37359 }, { "epoch": 0.9892781658778953, "grad_norm": 0.828125, "learning_rate": 7.846791872998302e-05, "loss": 0.8129, "step": 37360 }, { "epoch": 0.9893046454861897, "grad_norm": 0.7734375, "learning_rate": 7.84634230003588e-05, "loss": 0.6441, "step": 37361 }, { "epoch": 0.9893311250944841, "grad_norm": 0.72265625, "learning_rate": 7.845892731638011e-05, "loss": 0.6913, "step": 37362 }, { "epoch": 0.9893576047027784, "grad_norm": 0.75, "learning_rate": 7.84544316780565e-05, "loss": 0.7645, "step": 37363 }, { "epoch": 0.9893840843110728, "grad_norm": 0.74609375, "learning_rate": 7.844993608539752e-05, "loss": 0.8001, "step": 37364 }, { "epoch": 0.9894105639193672, "grad_norm": 0.75, "learning_rate": 7.844544053841265e-05, "loss": 0.7641, "step": 37365 }, { "epoch": 0.9894370435276616, "grad_norm": 0.8515625, "learning_rate": 7.844094503711145e-05, "loss": 0.8121, "step": 37366 }, { "epoch": 0.989463523135956, "grad_norm": 0.75, "learning_rate": 7.843644958150338e-05, "loss": 0.8459, "step": 37367 }, { "epoch": 0.9894900027442504, "grad_norm": 0.7421875, "learning_rate": 7.843195417159808e-05, "loss": 0.8056, "step": 37368 }, { "epoch": 0.9895164823525446, "grad_norm": 0.765625, "learning_rate": 7.8427458807405e-05, "loss": 0.8123, "step": 37369 }, { "epoch": 0.989542961960839, "grad_norm": 0.76953125, "learning_rate": 7.842296348893371e-05, "loss": 0.7156, "step": 37370 }, { "epoch": 0.9895694415691334, "grad_norm": 0.7578125, "learning_rate": 7.84184682161937e-05, "loss": 0.7588, "step": 37371 }, { "epoch": 0.9895959211774278, "grad_norm": 0.78125, "learning_rate": 7.84139729891945e-05, "loss": 0.6658, "step": 37372 }, { "epoch": 0.9896224007857222, "grad_norm": 0.72265625, "learning_rate": 7.840947780794566e-05, "loss": 0.7589, "step": 37373 }, { "epoch": 0.9896488803940166, "grad_norm": 0.82421875, "learning_rate": 7.840498267245672e-05, "loss": 0.8112, "step": 37374 }, { "epoch": 0.9896753600023109, "grad_norm": 0.73828125, "learning_rate": 7.840048758273717e-05, "loss": 0.6768, "step": 37375 }, { "epoch": 0.9897018396106053, "grad_norm": 0.7734375, "learning_rate": 7.839599253879655e-05, "loss": 0.6659, "step": 37376 }, { "epoch": 0.9897283192188997, "grad_norm": 0.86328125, "learning_rate": 7.839149754064435e-05, "loss": 0.8182, "step": 37377 }, { "epoch": 0.9897547988271941, "grad_norm": 0.7578125, "learning_rate": 7.838700258829017e-05, "loss": 0.8208, "step": 37378 }, { "epoch": 0.9897812784354885, "grad_norm": 0.80859375, "learning_rate": 7.838250768174351e-05, "loss": 0.8557, "step": 37379 }, { "epoch": 0.9898077580437828, "grad_norm": 0.765625, "learning_rate": 7.837801282101389e-05, "loss": 0.7112, "step": 37380 }, { "epoch": 0.9898342376520772, "grad_norm": 0.80078125, "learning_rate": 7.837351800611084e-05, "loss": 0.7807, "step": 37381 }, { "epoch": 0.9898607172603716, "grad_norm": 0.95703125, "learning_rate": 7.836902323704383e-05, "loss": 0.6774, "step": 37382 }, { "epoch": 0.989887196868666, "grad_norm": 0.76953125, "learning_rate": 7.836452851382249e-05, "loss": 0.6814, "step": 37383 }, { "epoch": 0.9899136764769604, "grad_norm": 0.8203125, "learning_rate": 7.836003383645629e-05, "loss": 0.8369, "step": 37384 }, { "epoch": 0.9899401560852548, "grad_norm": 0.80078125, "learning_rate": 7.835553920495478e-05, "loss": 0.8158, "step": 37385 }, { "epoch": 0.989966635693549, "grad_norm": 0.8046875, "learning_rate": 7.835104461932745e-05, "loss": 0.8015, "step": 37386 }, { "epoch": 0.9899931153018434, "grad_norm": 0.75, "learning_rate": 7.83465500795838e-05, "loss": 0.7502, "step": 37387 }, { "epoch": 0.9900195949101378, "grad_norm": 0.8203125, "learning_rate": 7.834205558573345e-05, "loss": 0.927, "step": 37388 }, { "epoch": 0.9900460745184322, "grad_norm": 0.73046875, "learning_rate": 7.833756113778586e-05, "loss": 0.7297, "step": 37389 }, { "epoch": 0.9900725541267266, "grad_norm": 0.76171875, "learning_rate": 7.833306673575058e-05, "loss": 0.7433, "step": 37390 }, { "epoch": 0.990099033735021, "grad_norm": 0.84375, "learning_rate": 7.832857237963714e-05, "loss": 0.8504, "step": 37391 }, { "epoch": 0.9901255133433153, "grad_norm": 0.82421875, "learning_rate": 7.832407806945501e-05, "loss": 0.8354, "step": 37392 }, { "epoch": 0.9901519929516097, "grad_norm": 0.76953125, "learning_rate": 7.831958380521379e-05, "loss": 0.7354, "step": 37393 }, { "epoch": 0.9901784725599041, "grad_norm": 0.75, "learning_rate": 7.831508958692297e-05, "loss": 0.7462, "step": 37394 }, { "epoch": 0.9902049521681985, "grad_norm": 0.80859375, "learning_rate": 7.831059541459208e-05, "loss": 0.8837, "step": 37395 }, { "epoch": 0.9902314317764929, "grad_norm": 0.8046875, "learning_rate": 7.830610128823062e-05, "loss": 0.8658, "step": 37396 }, { "epoch": 0.9902579113847872, "grad_norm": 0.73828125, "learning_rate": 7.830160720784814e-05, "loss": 0.6587, "step": 37397 }, { "epoch": 0.9902843909930816, "grad_norm": 0.73828125, "learning_rate": 7.829711317345418e-05, "loss": 0.6744, "step": 37398 }, { "epoch": 0.990310870601376, "grad_norm": 0.796875, "learning_rate": 7.829261918505825e-05, "loss": 0.7331, "step": 37399 }, { "epoch": 0.9903373502096704, "grad_norm": 0.7890625, "learning_rate": 7.828812524266988e-05, "loss": 0.7587, "step": 37400 }, { "epoch": 0.9903638298179648, "grad_norm": 0.828125, "learning_rate": 7.828363134629859e-05, "loss": 0.7428, "step": 37401 }, { "epoch": 0.990390309426259, "grad_norm": 0.78125, "learning_rate": 7.827913749595383e-05, "loss": 0.7669, "step": 37402 }, { "epoch": 0.9904167890345534, "grad_norm": 0.8125, "learning_rate": 7.827464369164526e-05, "loss": 0.7487, "step": 37403 }, { "epoch": 0.9904432686428478, "grad_norm": 0.7421875, "learning_rate": 7.827014993338235e-05, "loss": 0.7696, "step": 37404 }, { "epoch": 0.9904697482511422, "grad_norm": 0.76171875, "learning_rate": 7.82656562211746e-05, "loss": 0.7224, "step": 37405 }, { "epoch": 0.9904962278594366, "grad_norm": 0.75390625, "learning_rate": 7.826116255503157e-05, "loss": 0.7761, "step": 37406 }, { "epoch": 0.990522707467731, "grad_norm": 0.859375, "learning_rate": 7.825666893496271e-05, "loss": 0.8584, "step": 37407 }, { "epoch": 0.9905491870760254, "grad_norm": 0.78515625, "learning_rate": 7.825217536097763e-05, "loss": 0.7746, "step": 37408 }, { "epoch": 0.9905756666843197, "grad_norm": 0.82421875, "learning_rate": 7.824768183308583e-05, "loss": 0.8808, "step": 37409 }, { "epoch": 0.9906021462926141, "grad_norm": 0.71484375, "learning_rate": 7.824318835129683e-05, "loss": 0.7395, "step": 37410 }, { "epoch": 0.9906286259009085, "grad_norm": 0.81640625, "learning_rate": 7.823869491562013e-05, "loss": 0.788, "step": 37411 }, { "epoch": 0.9906551055092029, "grad_norm": 0.79296875, "learning_rate": 7.823420152606528e-05, "loss": 0.7953, "step": 37412 }, { "epoch": 0.9906815851174973, "grad_norm": 0.80078125, "learning_rate": 7.822970818264179e-05, "loss": 0.8676, "step": 37413 }, { "epoch": 0.9907080647257916, "grad_norm": 0.76171875, "learning_rate": 7.822521488535919e-05, "loss": 0.819, "step": 37414 }, { "epoch": 0.990734544334086, "grad_norm": 0.74609375, "learning_rate": 7.8220721634227e-05, "loss": 0.881, "step": 37415 }, { "epoch": 0.9907610239423804, "grad_norm": 0.79296875, "learning_rate": 7.821622842925476e-05, "loss": 0.7208, "step": 37416 }, { "epoch": 0.9907875035506748, "grad_norm": 0.8046875, "learning_rate": 7.821173527045197e-05, "loss": 0.8064, "step": 37417 }, { "epoch": 0.9908139831589691, "grad_norm": 0.76171875, "learning_rate": 7.820724215782814e-05, "loss": 0.9241, "step": 37418 }, { "epoch": 0.9908404627672635, "grad_norm": 0.79296875, "learning_rate": 7.820274909139285e-05, "loss": 0.8077, "step": 37419 }, { "epoch": 0.9908669423755578, "grad_norm": 0.72265625, "learning_rate": 7.819825607115558e-05, "loss": 0.7657, "step": 37420 }, { "epoch": 0.9908934219838522, "grad_norm": 0.82421875, "learning_rate": 7.819376309712585e-05, "loss": 0.7877, "step": 37421 }, { "epoch": 0.9909199015921466, "grad_norm": 0.74609375, "learning_rate": 7.818927016931321e-05, "loss": 0.847, "step": 37422 }, { "epoch": 0.990946381200441, "grad_norm": 0.70703125, "learning_rate": 7.818477728772711e-05, "loss": 0.7823, "step": 37423 }, { "epoch": 0.9909728608087354, "grad_norm": 0.75390625, "learning_rate": 7.818028445237718e-05, "loss": 0.7633, "step": 37424 }, { "epoch": 0.9909993404170298, "grad_norm": 1.0625, "learning_rate": 7.817579166327289e-05, "loss": 0.7094, "step": 37425 }, { "epoch": 0.9910258200253241, "grad_norm": 0.80859375, "learning_rate": 7.817129892042377e-05, "loss": 0.7734, "step": 37426 }, { "epoch": 0.9910522996336185, "grad_norm": 0.7578125, "learning_rate": 7.81668062238393e-05, "loss": 0.6656, "step": 37427 }, { "epoch": 0.9910787792419129, "grad_norm": 0.83203125, "learning_rate": 7.816231357352903e-05, "loss": 0.8249, "step": 37428 }, { "epoch": 0.9911052588502073, "grad_norm": 0.734375, "learning_rate": 7.815782096950251e-05, "loss": 0.706, "step": 37429 }, { "epoch": 0.9911317384585017, "grad_norm": 0.765625, "learning_rate": 7.815332841176925e-05, "loss": 0.7211, "step": 37430 }, { "epoch": 0.991158218066796, "grad_norm": 1.046875, "learning_rate": 7.814883590033876e-05, "loss": 0.7573, "step": 37431 }, { "epoch": 0.9911846976750904, "grad_norm": 0.77734375, "learning_rate": 7.814434343522056e-05, "loss": 0.7657, "step": 37432 }, { "epoch": 0.9912111772833848, "grad_norm": 0.78515625, "learning_rate": 7.813985101642414e-05, "loss": 0.8846, "step": 37433 }, { "epoch": 0.9912376568916792, "grad_norm": 0.85546875, "learning_rate": 7.813535864395909e-05, "loss": 0.7189, "step": 37434 }, { "epoch": 0.9912641364999735, "grad_norm": 0.74609375, "learning_rate": 7.813086631783489e-05, "loss": 0.8868, "step": 37435 }, { "epoch": 0.9912906161082679, "grad_norm": 0.765625, "learning_rate": 7.81263740380611e-05, "loss": 0.7439, "step": 37436 }, { "epoch": 0.9913170957165622, "grad_norm": 0.8203125, "learning_rate": 7.812188180464717e-05, "loss": 0.6603, "step": 37437 }, { "epoch": 0.9913435753248566, "grad_norm": 0.8515625, "learning_rate": 7.811738961760266e-05, "loss": 0.8966, "step": 37438 }, { "epoch": 0.991370054933151, "grad_norm": 0.71484375, "learning_rate": 7.811289747693711e-05, "loss": 0.9026, "step": 37439 }, { "epoch": 0.9913965345414454, "grad_norm": 0.75, "learning_rate": 7.810840538266004e-05, "loss": 0.7341, "step": 37440 }, { "epoch": 0.9914230141497398, "grad_norm": 0.78515625, "learning_rate": 7.810391333478093e-05, "loss": 0.7323, "step": 37441 }, { "epoch": 0.9914494937580342, "grad_norm": 0.82421875, "learning_rate": 7.809942133330933e-05, "loss": 0.8188, "step": 37442 }, { "epoch": 0.9914759733663285, "grad_norm": 0.796875, "learning_rate": 7.809492937825472e-05, "loss": 0.7499, "step": 37443 }, { "epoch": 0.9915024529746229, "grad_norm": 0.828125, "learning_rate": 7.809043746962669e-05, "loss": 0.8338, "step": 37444 }, { "epoch": 0.9915289325829173, "grad_norm": 0.8203125, "learning_rate": 7.808594560743473e-05, "loss": 0.8275, "step": 37445 }, { "epoch": 0.9915554121912117, "grad_norm": 0.7734375, "learning_rate": 7.808145379168835e-05, "loss": 0.8799, "step": 37446 }, { "epoch": 0.9915818917995061, "grad_norm": 0.76953125, "learning_rate": 7.807696202239708e-05, "loss": 0.8212, "step": 37447 }, { "epoch": 0.9916083714078004, "grad_norm": 0.75, "learning_rate": 7.80724702995704e-05, "loss": 0.7351, "step": 37448 }, { "epoch": 0.9916348510160948, "grad_norm": 0.76953125, "learning_rate": 7.80679786232179e-05, "loss": 0.7685, "step": 37449 }, { "epoch": 0.9916613306243892, "grad_norm": 0.828125, "learning_rate": 7.806348699334906e-05, "loss": 0.8333, "step": 37450 }, { "epoch": 0.9916878102326835, "grad_norm": 0.734375, "learning_rate": 7.805899540997342e-05, "loss": 0.8241, "step": 37451 }, { "epoch": 0.9917142898409779, "grad_norm": 0.75390625, "learning_rate": 7.805450387310047e-05, "loss": 0.7896, "step": 37452 }, { "epoch": 0.9917407694492723, "grad_norm": 0.78125, "learning_rate": 7.805001238273972e-05, "loss": 0.9228, "step": 37453 }, { "epoch": 0.9917672490575666, "grad_norm": 0.828125, "learning_rate": 7.804552093890074e-05, "loss": 0.8625, "step": 37454 }, { "epoch": 0.991793728665861, "grad_norm": 0.82421875, "learning_rate": 7.804102954159304e-05, "loss": 0.7303, "step": 37455 }, { "epoch": 0.9918202082741554, "grad_norm": 0.82421875, "learning_rate": 7.80365381908261e-05, "loss": 0.6602, "step": 37456 }, { "epoch": 0.9918466878824498, "grad_norm": 0.84765625, "learning_rate": 7.803204688660948e-05, "loss": 0.9361, "step": 37457 }, { "epoch": 0.9918731674907442, "grad_norm": 0.81640625, "learning_rate": 7.802755562895265e-05, "loss": 0.7114, "step": 37458 }, { "epoch": 0.9918996470990386, "grad_norm": 0.8125, "learning_rate": 7.80230644178652e-05, "loss": 0.8074, "step": 37459 }, { "epoch": 0.9919261267073329, "grad_norm": 0.71484375, "learning_rate": 7.801857325335659e-05, "loss": 0.7028, "step": 37460 }, { "epoch": 0.9919526063156273, "grad_norm": 0.9453125, "learning_rate": 7.801408213543637e-05, "loss": 0.806, "step": 37461 }, { "epoch": 0.9919790859239217, "grad_norm": 0.7734375, "learning_rate": 7.800959106411403e-05, "loss": 0.8169, "step": 37462 }, { "epoch": 0.9920055655322161, "grad_norm": 1.0078125, "learning_rate": 7.800510003939907e-05, "loss": 0.83, "step": 37463 }, { "epoch": 0.9920320451405105, "grad_norm": 0.81640625, "learning_rate": 7.80006090613011e-05, "loss": 0.8218, "step": 37464 }, { "epoch": 0.9920585247488048, "grad_norm": 0.765625, "learning_rate": 7.799611812982956e-05, "loss": 0.7978, "step": 37465 }, { "epoch": 0.9920850043570992, "grad_norm": 0.77734375, "learning_rate": 7.799162724499399e-05, "loss": 0.8479, "step": 37466 }, { "epoch": 0.9921114839653936, "grad_norm": 0.7421875, "learning_rate": 7.798713640680391e-05, "loss": 0.8048, "step": 37467 }, { "epoch": 0.9921379635736879, "grad_norm": 0.81640625, "learning_rate": 7.79826456152688e-05, "loss": 0.8652, "step": 37468 }, { "epoch": 0.9921644431819823, "grad_norm": 0.75390625, "learning_rate": 7.797815487039826e-05, "loss": 0.7945, "step": 37469 }, { "epoch": 0.9921909227902767, "grad_norm": 0.79296875, "learning_rate": 7.797366417220177e-05, "loss": 0.786, "step": 37470 }, { "epoch": 0.992217402398571, "grad_norm": 0.75, "learning_rate": 7.796917352068882e-05, "loss": 0.8126, "step": 37471 }, { "epoch": 0.9922438820068654, "grad_norm": 0.84765625, "learning_rate": 7.796468291586896e-05, "loss": 0.7043, "step": 37472 }, { "epoch": 0.9922703616151598, "grad_norm": 0.796875, "learning_rate": 7.796019235775165e-05, "loss": 0.7434, "step": 37473 }, { "epoch": 0.9922968412234542, "grad_norm": 0.75390625, "learning_rate": 7.79557018463465e-05, "loss": 0.7643, "step": 37474 }, { "epoch": 0.9923233208317486, "grad_norm": 0.82421875, "learning_rate": 7.795121138166297e-05, "loss": 0.727, "step": 37475 }, { "epoch": 0.992349800440043, "grad_norm": 0.7265625, "learning_rate": 7.794672096371058e-05, "loss": 0.6291, "step": 37476 }, { "epoch": 0.9923762800483373, "grad_norm": 0.81640625, "learning_rate": 7.794223059249888e-05, "loss": 0.6667, "step": 37477 }, { "epoch": 0.9924027596566317, "grad_norm": 0.78515625, "learning_rate": 7.793774026803734e-05, "loss": 0.7231, "step": 37478 }, { "epoch": 0.9924292392649261, "grad_norm": 0.78515625, "learning_rate": 7.79332499903355e-05, "loss": 0.8344, "step": 37479 }, { "epoch": 0.9924557188732205, "grad_norm": 0.84375, "learning_rate": 7.792875975940287e-05, "loss": 0.8016, "step": 37480 }, { "epoch": 0.9924821984815149, "grad_norm": 0.90625, "learning_rate": 7.792426957524899e-05, "loss": 0.7848, "step": 37481 }, { "epoch": 0.9925086780898092, "grad_norm": 0.88671875, "learning_rate": 7.791977943788332e-05, "loss": 0.7963, "step": 37482 }, { "epoch": 0.9925351576981036, "grad_norm": 0.859375, "learning_rate": 7.791528934731542e-05, "loss": 0.7859, "step": 37483 }, { "epoch": 0.9925616373063979, "grad_norm": 0.76953125, "learning_rate": 7.791079930355484e-05, "loss": 0.8118, "step": 37484 }, { "epoch": 0.9925881169146923, "grad_norm": 0.7421875, "learning_rate": 7.790630930661104e-05, "loss": 0.7293, "step": 37485 }, { "epoch": 0.9926145965229867, "grad_norm": 0.83984375, "learning_rate": 7.790181935649357e-05, "loss": 0.7634, "step": 37486 }, { "epoch": 0.992641076131281, "grad_norm": 0.80859375, "learning_rate": 7.789732945321192e-05, "loss": 0.7817, "step": 37487 }, { "epoch": 0.9926675557395754, "grad_norm": 0.7265625, "learning_rate": 7.789283959677559e-05, "loss": 0.784, "step": 37488 }, { "epoch": 0.9926940353478698, "grad_norm": 0.7578125, "learning_rate": 7.788834978719415e-05, "loss": 0.8548, "step": 37489 }, { "epoch": 0.9927205149561642, "grad_norm": 0.85546875, "learning_rate": 7.788386002447709e-05, "loss": 0.7914, "step": 37490 }, { "epoch": 0.9927469945644586, "grad_norm": 0.828125, "learning_rate": 7.787937030863393e-05, "loss": 0.8735, "step": 37491 }, { "epoch": 0.992773474172753, "grad_norm": 0.984375, "learning_rate": 7.787488063967416e-05, "loss": 0.8217, "step": 37492 }, { "epoch": 0.9927999537810474, "grad_norm": 0.79296875, "learning_rate": 7.787039101760729e-05, "loss": 0.8389, "step": 37493 }, { "epoch": 0.9928264333893417, "grad_norm": 0.8515625, "learning_rate": 7.786590144244291e-05, "loss": 0.8029, "step": 37494 }, { "epoch": 0.9928529129976361, "grad_norm": 0.76953125, "learning_rate": 7.786141191419048e-05, "loss": 0.6965, "step": 37495 }, { "epoch": 0.9928793926059305, "grad_norm": 0.8359375, "learning_rate": 7.785692243285953e-05, "loss": 0.7969, "step": 37496 }, { "epoch": 0.9929058722142249, "grad_norm": 0.81640625, "learning_rate": 7.785243299845954e-05, "loss": 0.8762, "step": 37497 }, { "epoch": 0.9929323518225193, "grad_norm": 0.7890625, "learning_rate": 7.784794361100006e-05, "loss": 0.86, "step": 37498 }, { "epoch": 0.9929588314308136, "grad_norm": 0.76953125, "learning_rate": 7.784345427049062e-05, "loss": 0.6533, "step": 37499 }, { "epoch": 0.9929853110391079, "grad_norm": 0.765625, "learning_rate": 7.783896497694068e-05, "loss": 0.8047, "step": 37500 }, { "epoch": 0.9930117906474023, "grad_norm": 0.76171875, "learning_rate": 7.78344757303598e-05, "loss": 0.7002, "step": 37501 }, { "epoch": 0.9930382702556967, "grad_norm": 0.6953125, "learning_rate": 7.782998653075749e-05, "loss": 0.6674, "step": 37502 }, { "epoch": 0.9930647498639911, "grad_norm": 0.77734375, "learning_rate": 7.782549737814324e-05, "loss": 0.7874, "step": 37503 }, { "epoch": 0.9930912294722855, "grad_norm": 0.796875, "learning_rate": 7.78210082725266e-05, "loss": 0.906, "step": 37504 }, { "epoch": 0.9931177090805798, "grad_norm": 0.83203125, "learning_rate": 7.781651921391706e-05, "loss": 0.7696, "step": 37505 }, { "epoch": 0.9931441886888742, "grad_norm": 0.79296875, "learning_rate": 7.781203020232416e-05, "loss": 0.9235, "step": 37506 }, { "epoch": 0.9931706682971686, "grad_norm": 0.90625, "learning_rate": 7.780754123775735e-05, "loss": 0.7887, "step": 37507 }, { "epoch": 0.993197147905463, "grad_norm": 0.74609375, "learning_rate": 7.780305232022618e-05, "loss": 0.7946, "step": 37508 }, { "epoch": 0.9932236275137574, "grad_norm": 0.80078125, "learning_rate": 7.779856344974021e-05, "loss": 0.7708, "step": 37509 }, { "epoch": 0.9932501071220518, "grad_norm": 0.765625, "learning_rate": 7.779407462630891e-05, "loss": 0.7985, "step": 37510 }, { "epoch": 0.9932765867303461, "grad_norm": 0.80078125, "learning_rate": 7.77895858499418e-05, "loss": 0.7838, "step": 37511 }, { "epoch": 0.9933030663386405, "grad_norm": 0.7578125, "learning_rate": 7.77850971206484e-05, "loss": 0.6761, "step": 37512 }, { "epoch": 0.9933295459469349, "grad_norm": 0.78515625, "learning_rate": 7.778060843843816e-05, "loss": 0.7212, "step": 37513 }, { "epoch": 0.9933560255552293, "grad_norm": 0.9140625, "learning_rate": 7.777611980332072e-05, "loss": 0.7535, "step": 37514 }, { "epoch": 0.9933825051635237, "grad_norm": 0.71484375, "learning_rate": 7.77716312153055e-05, "loss": 0.7618, "step": 37515 }, { "epoch": 0.993408984771818, "grad_norm": 0.8125, "learning_rate": 7.776714267440204e-05, "loss": 0.7294, "step": 37516 }, { "epoch": 0.9934354643801123, "grad_norm": 0.6953125, "learning_rate": 7.776265418061985e-05, "loss": 0.8137, "step": 37517 }, { "epoch": 0.9934619439884067, "grad_norm": 0.79296875, "learning_rate": 7.775816573396843e-05, "loss": 0.7795, "step": 37518 }, { "epoch": 0.9934884235967011, "grad_norm": 0.82421875, "learning_rate": 7.775367733445729e-05, "loss": 0.8554, "step": 37519 }, { "epoch": 0.9935149032049955, "grad_norm": 0.7578125, "learning_rate": 7.7749188982096e-05, "loss": 0.7122, "step": 37520 }, { "epoch": 0.9935413828132899, "grad_norm": 0.75390625, "learning_rate": 7.774470067689403e-05, "loss": 0.8633, "step": 37521 }, { "epoch": 0.9935678624215842, "grad_norm": 0.7578125, "learning_rate": 7.774021241886089e-05, "loss": 0.7309, "step": 37522 }, { "epoch": 0.9935943420298786, "grad_norm": 0.8359375, "learning_rate": 7.773572420800608e-05, "loss": 0.7434, "step": 37523 }, { "epoch": 0.993620821638173, "grad_norm": 0.82421875, "learning_rate": 7.773123604433914e-05, "loss": 0.7782, "step": 37524 }, { "epoch": 0.9936473012464674, "grad_norm": 0.75, "learning_rate": 7.772674792786957e-05, "loss": 0.7975, "step": 37525 }, { "epoch": 0.9936737808547618, "grad_norm": 0.83984375, "learning_rate": 7.772225985860691e-05, "loss": 0.8678, "step": 37526 }, { "epoch": 0.9937002604630562, "grad_norm": 0.8125, "learning_rate": 7.771777183656062e-05, "loss": 0.8538, "step": 37527 }, { "epoch": 0.9937267400713505, "grad_norm": 0.796875, "learning_rate": 7.771328386174022e-05, "loss": 0.7443, "step": 37528 }, { "epoch": 0.9937532196796449, "grad_norm": 0.69921875, "learning_rate": 7.770879593415526e-05, "loss": 0.694, "step": 37529 }, { "epoch": 0.9937796992879393, "grad_norm": 0.86328125, "learning_rate": 7.770430805381526e-05, "loss": 0.7311, "step": 37530 }, { "epoch": 0.9938061788962337, "grad_norm": 0.796875, "learning_rate": 7.769982022072968e-05, "loss": 0.7463, "step": 37531 }, { "epoch": 0.9938326585045281, "grad_norm": 0.73828125, "learning_rate": 7.769533243490806e-05, "loss": 0.7454, "step": 37532 }, { "epoch": 0.9938591381128223, "grad_norm": 0.734375, "learning_rate": 7.769084469635988e-05, "loss": 0.7296, "step": 37533 }, { "epoch": 0.9938856177211167, "grad_norm": 0.71484375, "learning_rate": 7.768635700509472e-05, "loss": 0.7563, "step": 37534 }, { "epoch": 0.9939120973294111, "grad_norm": 0.78125, "learning_rate": 7.768186936112202e-05, "loss": 0.8075, "step": 37535 }, { "epoch": 0.9939385769377055, "grad_norm": 0.78125, "learning_rate": 7.767738176445136e-05, "loss": 0.7074, "step": 37536 }, { "epoch": 0.9939650565459999, "grad_norm": 0.71484375, "learning_rate": 7.76728942150922e-05, "loss": 0.7347, "step": 37537 }, { "epoch": 0.9939915361542943, "grad_norm": 0.70703125, "learning_rate": 7.766840671305403e-05, "loss": 0.617, "step": 37538 }, { "epoch": 0.9940180157625886, "grad_norm": 0.89453125, "learning_rate": 7.766391925834642e-05, "loss": 0.8272, "step": 37539 }, { "epoch": 0.994044495370883, "grad_norm": 0.87109375, "learning_rate": 7.765943185097886e-05, "loss": 0.7222, "step": 37540 }, { "epoch": 0.9940709749791774, "grad_norm": 0.7578125, "learning_rate": 7.765494449096086e-05, "loss": 0.7807, "step": 37541 }, { "epoch": 0.9940974545874718, "grad_norm": 0.79296875, "learning_rate": 7.765045717830193e-05, "loss": 0.6803, "step": 37542 }, { "epoch": 0.9941239341957662, "grad_norm": 0.84765625, "learning_rate": 7.764596991301157e-05, "loss": 0.681, "step": 37543 }, { "epoch": 0.9941504138040606, "grad_norm": 0.77734375, "learning_rate": 7.764148269509932e-05, "loss": 0.7923, "step": 37544 }, { "epoch": 0.9941768934123549, "grad_norm": 0.7578125, "learning_rate": 7.763699552457464e-05, "loss": 0.6822, "step": 37545 }, { "epoch": 0.9942033730206493, "grad_norm": 0.8203125, "learning_rate": 7.763250840144709e-05, "loss": 0.8646, "step": 37546 }, { "epoch": 0.9942298526289437, "grad_norm": 0.8125, "learning_rate": 7.762802132572616e-05, "loss": 0.7805, "step": 37547 }, { "epoch": 0.9942563322372381, "grad_norm": 0.6640625, "learning_rate": 7.762353429742131e-05, "loss": 0.741, "step": 37548 }, { "epoch": 0.9942828118455324, "grad_norm": 0.83984375, "learning_rate": 7.761904731654215e-05, "loss": 0.8557, "step": 37549 }, { "epoch": 0.9943092914538267, "grad_norm": 1.3125, "learning_rate": 7.761456038309814e-05, "loss": 0.7435, "step": 37550 }, { "epoch": 0.9943357710621211, "grad_norm": 0.80078125, "learning_rate": 7.761007349709879e-05, "loss": 0.6388, "step": 37551 }, { "epoch": 0.9943622506704155, "grad_norm": 0.82421875, "learning_rate": 7.760558665855362e-05, "loss": 0.7125, "step": 37552 }, { "epoch": 0.9943887302787099, "grad_norm": 0.7421875, "learning_rate": 7.76010998674721e-05, "loss": 0.7288, "step": 37553 }, { "epoch": 0.9944152098870043, "grad_norm": 0.796875, "learning_rate": 7.759661312386375e-05, "loss": 0.8346, "step": 37554 }, { "epoch": 0.9944416894952987, "grad_norm": 0.8203125, "learning_rate": 7.759212642773814e-05, "loss": 0.7073, "step": 37555 }, { "epoch": 0.994468169103593, "grad_norm": 0.796875, "learning_rate": 7.758763977910472e-05, "loss": 0.8524, "step": 37556 }, { "epoch": 0.9944946487118874, "grad_norm": 0.7890625, "learning_rate": 7.758315317797304e-05, "loss": 0.8567, "step": 37557 }, { "epoch": 0.9945211283201818, "grad_norm": 0.9453125, "learning_rate": 7.757866662435257e-05, "loss": 0.7505, "step": 37558 }, { "epoch": 0.9945476079284762, "grad_norm": 0.78515625, "learning_rate": 7.757418011825279e-05, "loss": 0.7273, "step": 37559 }, { "epoch": 0.9945740875367706, "grad_norm": 0.76171875, "learning_rate": 7.75696936596833e-05, "loss": 0.8456, "step": 37560 }, { "epoch": 0.994600567145065, "grad_norm": 0.8125, "learning_rate": 7.756520724865356e-05, "loss": 0.8743, "step": 37561 }, { "epoch": 0.9946270467533593, "grad_norm": 0.8125, "learning_rate": 7.756072088517308e-05, "loss": 0.8295, "step": 37562 }, { "epoch": 0.9946535263616537, "grad_norm": 0.82421875, "learning_rate": 7.755623456925138e-05, "loss": 0.7908, "step": 37563 }, { "epoch": 0.9946800059699481, "grad_norm": 0.78515625, "learning_rate": 7.755174830089793e-05, "loss": 0.7492, "step": 37564 }, { "epoch": 0.9947064855782425, "grad_norm": 0.76171875, "learning_rate": 7.754726208012229e-05, "loss": 0.7698, "step": 37565 }, { "epoch": 0.9947329651865368, "grad_norm": 0.82421875, "learning_rate": 7.754277590693394e-05, "loss": 0.8022, "step": 37566 }, { "epoch": 0.9947594447948311, "grad_norm": 0.76953125, "learning_rate": 7.753828978134238e-05, "loss": 0.8658, "step": 37567 }, { "epoch": 0.9947859244031255, "grad_norm": 0.7890625, "learning_rate": 7.753380370335714e-05, "loss": 0.8678, "step": 37568 }, { "epoch": 0.9948124040114199, "grad_norm": 0.76171875, "learning_rate": 7.75293176729877e-05, "loss": 0.7137, "step": 37569 }, { "epoch": 0.9948388836197143, "grad_norm": 0.7421875, "learning_rate": 7.752483169024361e-05, "loss": 0.6987, "step": 37570 }, { "epoch": 0.9948653632280087, "grad_norm": 0.71484375, "learning_rate": 7.752034575513437e-05, "loss": 0.7274, "step": 37571 }, { "epoch": 0.994891842836303, "grad_norm": 0.7890625, "learning_rate": 7.751585986766945e-05, "loss": 0.7259, "step": 37572 }, { "epoch": 0.9949183224445974, "grad_norm": 0.88671875, "learning_rate": 7.751137402785839e-05, "loss": 0.7161, "step": 37573 }, { "epoch": 0.9949448020528918, "grad_norm": 0.85546875, "learning_rate": 7.750688823571064e-05, "loss": 0.8738, "step": 37574 }, { "epoch": 0.9949712816611862, "grad_norm": 0.765625, "learning_rate": 7.75024024912358e-05, "loss": 0.8663, "step": 37575 }, { "epoch": 0.9949977612694806, "grad_norm": 0.765625, "learning_rate": 7.749791679444333e-05, "loss": 0.7717, "step": 37576 }, { "epoch": 0.995024240877775, "grad_norm": 0.78125, "learning_rate": 7.749343114534276e-05, "loss": 0.7312, "step": 37577 }, { "epoch": 0.9950507204860694, "grad_norm": 0.79296875, "learning_rate": 7.748894554394354e-05, "loss": 0.7998, "step": 37578 }, { "epoch": 0.9950772000943637, "grad_norm": 0.88671875, "learning_rate": 7.74844599902552e-05, "loss": 0.729, "step": 37579 }, { "epoch": 0.9951036797026581, "grad_norm": 0.8515625, "learning_rate": 7.747997448428729e-05, "loss": 0.8104, "step": 37580 }, { "epoch": 0.9951301593109525, "grad_norm": 0.78515625, "learning_rate": 7.747548902604928e-05, "loss": 0.724, "step": 37581 }, { "epoch": 0.9951566389192468, "grad_norm": 0.74609375, "learning_rate": 7.74710036155507e-05, "loss": 0.7383, "step": 37582 }, { "epoch": 0.9951831185275412, "grad_norm": 0.78125, "learning_rate": 7.746651825280103e-05, "loss": 0.7666, "step": 37583 }, { "epoch": 0.9952095981358355, "grad_norm": 0.8671875, "learning_rate": 7.746203293780977e-05, "loss": 0.7894, "step": 37584 }, { "epoch": 0.9952360777441299, "grad_norm": 0.69140625, "learning_rate": 7.745754767058647e-05, "loss": 0.7331, "step": 37585 }, { "epoch": 0.9952625573524243, "grad_norm": 0.76171875, "learning_rate": 7.745306245114056e-05, "loss": 0.8687, "step": 37586 }, { "epoch": 0.9952890369607187, "grad_norm": 0.78515625, "learning_rate": 7.744857727948164e-05, "loss": 0.7798, "step": 37587 }, { "epoch": 0.9953155165690131, "grad_norm": 0.76953125, "learning_rate": 7.744409215561918e-05, "loss": 0.8701, "step": 37588 }, { "epoch": 0.9953419961773075, "grad_norm": 0.7109375, "learning_rate": 7.743960707956263e-05, "loss": 0.758, "step": 37589 }, { "epoch": 0.9953684757856018, "grad_norm": 0.7890625, "learning_rate": 7.74351220513216e-05, "loss": 0.6697, "step": 37590 }, { "epoch": 0.9953949553938962, "grad_norm": 0.87109375, "learning_rate": 7.74306370709055e-05, "loss": 0.7535, "step": 37591 }, { "epoch": 0.9954214350021906, "grad_norm": 0.765625, "learning_rate": 7.74261521383239e-05, "loss": 0.8001, "step": 37592 }, { "epoch": 0.995447914610485, "grad_norm": 0.7265625, "learning_rate": 7.742166725358628e-05, "loss": 0.8601, "step": 37593 }, { "epoch": 0.9954743942187794, "grad_norm": 0.8359375, "learning_rate": 7.741718241670209e-05, "loss": 0.9143, "step": 37594 }, { "epoch": 0.9955008738270738, "grad_norm": 0.73828125, "learning_rate": 7.741269762768093e-05, "loss": 0.7674, "step": 37595 }, { "epoch": 0.9955273534353681, "grad_norm": 0.88671875, "learning_rate": 7.740821288653228e-05, "loss": 0.8546, "step": 37596 }, { "epoch": 0.9955538330436625, "grad_norm": 0.7421875, "learning_rate": 7.740372819326563e-05, "loss": 0.6777, "step": 37597 }, { "epoch": 0.9955803126519568, "grad_norm": 0.8046875, "learning_rate": 7.739924354789048e-05, "loss": 0.914, "step": 37598 }, { "epoch": 0.9956067922602512, "grad_norm": 0.71484375, "learning_rate": 7.739475895041632e-05, "loss": 0.7801, "step": 37599 }, { "epoch": 0.9956332718685456, "grad_norm": 0.75390625, "learning_rate": 7.73902744008527e-05, "loss": 0.8, "step": 37600 }, { "epoch": 0.9956597514768399, "grad_norm": 0.83203125, "learning_rate": 7.738578989920909e-05, "loss": 0.791, "step": 37601 }, { "epoch": 0.9956862310851343, "grad_norm": 0.85546875, "learning_rate": 7.738130544549502e-05, "loss": 0.8068, "step": 37602 }, { "epoch": 0.9957127106934287, "grad_norm": 0.75, "learning_rate": 7.737682103971999e-05, "loss": 0.7226, "step": 37603 }, { "epoch": 0.9957391903017231, "grad_norm": 0.73828125, "learning_rate": 7.737233668189343e-05, "loss": 0.7496, "step": 37604 }, { "epoch": 0.9957656699100175, "grad_norm": 0.7421875, "learning_rate": 7.736785237202498e-05, "loss": 0.8071, "step": 37605 }, { "epoch": 0.9957921495183119, "grad_norm": 0.80078125, "learning_rate": 7.736336811012405e-05, "loss": 0.8213, "step": 37606 }, { "epoch": 0.9958186291266062, "grad_norm": 0.8046875, "learning_rate": 7.735888389620016e-05, "loss": 0.7343, "step": 37607 }, { "epoch": 0.9958451087349006, "grad_norm": 0.8984375, "learning_rate": 7.735439973026284e-05, "loss": 0.8687, "step": 37608 }, { "epoch": 0.995871588343195, "grad_norm": 0.74609375, "learning_rate": 7.734991561232155e-05, "loss": 0.8252, "step": 37609 }, { "epoch": 0.9958980679514894, "grad_norm": 0.81640625, "learning_rate": 7.734543154238585e-05, "loss": 0.7609, "step": 37610 }, { "epoch": 0.9959245475597838, "grad_norm": 0.828125, "learning_rate": 7.73409475204652e-05, "loss": 0.8701, "step": 37611 }, { "epoch": 0.9959510271680782, "grad_norm": 0.76953125, "learning_rate": 7.733646354656911e-05, "loss": 0.7608, "step": 37612 }, { "epoch": 0.9959775067763725, "grad_norm": 0.7734375, "learning_rate": 7.733197962070711e-05, "loss": 0.8397, "step": 37613 }, { "epoch": 0.9960039863846669, "grad_norm": 0.765625, "learning_rate": 7.732749574288862e-05, "loss": 0.7306, "step": 37614 }, { "epoch": 0.9960304659929612, "grad_norm": 0.796875, "learning_rate": 7.732301191312327e-05, "loss": 0.7318, "step": 37615 }, { "epoch": 0.9960569456012556, "grad_norm": 0.84765625, "learning_rate": 7.731852813142048e-05, "loss": 0.9463, "step": 37616 }, { "epoch": 0.99608342520955, "grad_norm": 0.77734375, "learning_rate": 7.731404439778979e-05, "loss": 0.7661, "step": 37617 }, { "epoch": 0.9961099048178443, "grad_norm": 0.7578125, "learning_rate": 7.730956071224068e-05, "loss": 0.705, "step": 37618 }, { "epoch": 0.9961363844261387, "grad_norm": 0.9609375, "learning_rate": 7.730507707478261e-05, "loss": 0.719, "step": 37619 }, { "epoch": 0.9961628640344331, "grad_norm": 0.78515625, "learning_rate": 7.730059348542519e-05, "loss": 0.7985, "step": 37620 }, { "epoch": 0.9961893436427275, "grad_norm": 0.88671875, "learning_rate": 7.729610994417785e-05, "loss": 0.8381, "step": 37621 }, { "epoch": 0.9962158232510219, "grad_norm": 0.75390625, "learning_rate": 7.729162645105011e-05, "loss": 0.791, "step": 37622 }, { "epoch": 0.9962423028593163, "grad_norm": 1.0078125, "learning_rate": 7.728714300605148e-05, "loss": 0.7872, "step": 37623 }, { "epoch": 0.9962687824676106, "grad_norm": 0.82421875, "learning_rate": 7.72826596091914e-05, "loss": 0.7139, "step": 37624 }, { "epoch": 0.996295262075905, "grad_norm": 0.77734375, "learning_rate": 7.727817626047947e-05, "loss": 0.7615, "step": 37625 }, { "epoch": 0.9963217416841994, "grad_norm": 0.83203125, "learning_rate": 7.727369295992515e-05, "loss": 0.8466, "step": 37626 }, { "epoch": 0.9963482212924938, "grad_norm": 0.83203125, "learning_rate": 7.726920970753792e-05, "loss": 0.8772, "step": 37627 }, { "epoch": 0.9963747009007882, "grad_norm": 0.828125, "learning_rate": 7.726472650332732e-05, "loss": 0.7708, "step": 37628 }, { "epoch": 0.9964011805090826, "grad_norm": 0.7890625, "learning_rate": 7.726024334730281e-05, "loss": 0.7187, "step": 37629 }, { "epoch": 0.9964276601173769, "grad_norm": 0.8515625, "learning_rate": 7.725576023947393e-05, "loss": 0.9438, "step": 37630 }, { "epoch": 0.9964541397256712, "grad_norm": 0.78125, "learning_rate": 7.725127717985017e-05, "loss": 0.8697, "step": 37631 }, { "epoch": 0.9964806193339656, "grad_norm": 0.77734375, "learning_rate": 7.724679416844102e-05, "loss": 0.8013, "step": 37632 }, { "epoch": 0.99650709894226, "grad_norm": 0.76171875, "learning_rate": 7.7242311205256e-05, "loss": 0.8202, "step": 37633 }, { "epoch": 0.9965335785505544, "grad_norm": 0.7890625, "learning_rate": 7.723782829030456e-05, "loss": 0.8537, "step": 37634 }, { "epoch": 0.9965600581588487, "grad_norm": 0.80078125, "learning_rate": 7.723334542359627e-05, "loss": 0.8326, "step": 37635 }, { "epoch": 0.9965865377671431, "grad_norm": 0.7890625, "learning_rate": 7.722886260514062e-05, "loss": 0.8258, "step": 37636 }, { "epoch": 0.9966130173754375, "grad_norm": 0.82421875, "learning_rate": 7.722437983494708e-05, "loss": 0.7711, "step": 37637 }, { "epoch": 0.9966394969837319, "grad_norm": 0.80859375, "learning_rate": 7.721989711302518e-05, "loss": 0.7714, "step": 37638 }, { "epoch": 0.9966659765920263, "grad_norm": 0.85546875, "learning_rate": 7.721541443938435e-05, "loss": 0.7901, "step": 37639 }, { "epoch": 0.9966924562003207, "grad_norm": 0.76953125, "learning_rate": 7.721093181403418e-05, "loss": 0.8906, "step": 37640 }, { "epoch": 0.996718935808615, "grad_norm": 0.73828125, "learning_rate": 7.720644923698415e-05, "loss": 0.7662, "step": 37641 }, { "epoch": 0.9967454154169094, "grad_norm": 0.7578125, "learning_rate": 7.720196670824375e-05, "loss": 0.6873, "step": 37642 }, { "epoch": 0.9967718950252038, "grad_norm": 0.78515625, "learning_rate": 7.71974842278225e-05, "loss": 0.7639, "step": 37643 }, { "epoch": 0.9967983746334982, "grad_norm": 0.7890625, "learning_rate": 7.71930017957298e-05, "loss": 0.8121, "step": 37644 }, { "epoch": 0.9968248542417926, "grad_norm": 0.80078125, "learning_rate": 7.71885194119753e-05, "loss": 0.8095, "step": 37645 }, { "epoch": 0.996851333850087, "grad_norm": 0.87109375, "learning_rate": 7.71840370765684e-05, "loss": 0.7619, "step": 37646 }, { "epoch": 0.9968778134583812, "grad_norm": 0.7734375, "learning_rate": 7.717955478951863e-05, "loss": 0.7381, "step": 37647 }, { "epoch": 0.9969042930666756, "grad_norm": 0.70703125, "learning_rate": 7.717507255083552e-05, "loss": 0.6976, "step": 37648 }, { "epoch": 0.99693077267497, "grad_norm": 0.765625, "learning_rate": 7.71705903605285e-05, "loss": 0.7346, "step": 37649 }, { "epoch": 0.9969572522832644, "grad_norm": 0.80078125, "learning_rate": 7.716610821860713e-05, "loss": 0.8884, "step": 37650 }, { "epoch": 0.9969837318915588, "grad_norm": 0.84765625, "learning_rate": 7.716162612508089e-05, "loss": 0.7013, "step": 37651 }, { "epoch": 0.9970102114998531, "grad_norm": 0.78515625, "learning_rate": 7.715714407995927e-05, "loss": 0.7706, "step": 37652 }, { "epoch": 0.9970366911081475, "grad_norm": 0.79296875, "learning_rate": 7.715266208325177e-05, "loss": 0.7916, "step": 37653 }, { "epoch": 0.9970631707164419, "grad_norm": 0.76171875, "learning_rate": 7.71481801349679e-05, "loss": 0.794, "step": 37654 }, { "epoch": 0.9970896503247363, "grad_norm": 0.74609375, "learning_rate": 7.714369823511716e-05, "loss": 0.7532, "step": 37655 }, { "epoch": 0.9971161299330307, "grad_norm": 0.80078125, "learning_rate": 7.713921638370905e-05, "loss": 0.9179, "step": 37656 }, { "epoch": 0.997142609541325, "grad_norm": 0.84375, "learning_rate": 7.713473458075306e-05, "loss": 0.8384, "step": 37657 }, { "epoch": 0.9971690891496194, "grad_norm": 0.7421875, "learning_rate": 7.713025282625872e-05, "loss": 0.7392, "step": 37658 }, { "epoch": 0.9971955687579138, "grad_norm": 0.81640625, "learning_rate": 7.712577112023543e-05, "loss": 0.8002, "step": 37659 }, { "epoch": 0.9972220483662082, "grad_norm": 0.78515625, "learning_rate": 7.712128946269281e-05, "loss": 0.7411, "step": 37660 }, { "epoch": 0.9972485279745026, "grad_norm": 0.80859375, "learning_rate": 7.711680785364032e-05, "loss": 0.8139, "step": 37661 }, { "epoch": 0.997275007582797, "grad_norm": 0.8046875, "learning_rate": 7.711232629308744e-05, "loss": 0.8859, "step": 37662 }, { "epoch": 0.9973014871910914, "grad_norm": 0.78125, "learning_rate": 7.710784478104367e-05, "loss": 0.7968, "step": 37663 }, { "epoch": 0.9973279667993856, "grad_norm": 0.73828125, "learning_rate": 7.710336331751848e-05, "loss": 0.8646, "step": 37664 }, { "epoch": 0.99735444640768, "grad_norm": 0.7421875, "learning_rate": 7.709888190252144e-05, "loss": 0.6562, "step": 37665 }, { "epoch": 0.9973809260159744, "grad_norm": 0.7578125, "learning_rate": 7.709440053606203e-05, "loss": 0.8248, "step": 37666 }, { "epoch": 0.9974074056242688, "grad_norm": 0.8828125, "learning_rate": 7.708991921814971e-05, "loss": 0.7795, "step": 37667 }, { "epoch": 0.9974338852325632, "grad_norm": 0.82421875, "learning_rate": 7.7085437948794e-05, "loss": 0.7571, "step": 37668 }, { "epoch": 0.9974603648408575, "grad_norm": 0.77734375, "learning_rate": 7.708095672800437e-05, "loss": 0.7826, "step": 37669 }, { "epoch": 0.9974868444491519, "grad_norm": 0.734375, "learning_rate": 7.707647555579038e-05, "loss": 0.6782, "step": 37670 }, { "epoch": 0.9975133240574463, "grad_norm": 0.74609375, "learning_rate": 7.707199443216145e-05, "loss": 0.8293, "step": 37671 }, { "epoch": 0.9975398036657407, "grad_norm": 0.78125, "learning_rate": 7.706751335712714e-05, "loss": 0.8664, "step": 37672 }, { "epoch": 0.9975662832740351, "grad_norm": 0.77734375, "learning_rate": 7.706303233069692e-05, "loss": 0.6835, "step": 37673 }, { "epoch": 0.9975927628823295, "grad_norm": 0.7421875, "learning_rate": 7.70585513528803e-05, "loss": 0.7025, "step": 37674 }, { "epoch": 0.9976192424906238, "grad_norm": 6.03125, "learning_rate": 7.705407042368675e-05, "loss": 0.7934, "step": 37675 }, { "epoch": 0.9976457220989182, "grad_norm": 0.80078125, "learning_rate": 7.704958954312581e-05, "loss": 0.7656, "step": 37676 }, { "epoch": 0.9976722017072126, "grad_norm": 0.84765625, "learning_rate": 7.704510871120695e-05, "loss": 0.8597, "step": 37677 }, { "epoch": 0.997698681315507, "grad_norm": 0.81640625, "learning_rate": 7.704062792793967e-05, "loss": 0.6929, "step": 37678 }, { "epoch": 0.9977251609238014, "grad_norm": 0.84375, "learning_rate": 7.703614719333344e-05, "loss": 0.7905, "step": 37679 }, { "epoch": 0.9977516405320956, "grad_norm": 0.83984375, "learning_rate": 7.703166650739779e-05, "loss": 0.6665, "step": 37680 }, { "epoch": 0.99777812014039, "grad_norm": 0.6953125, "learning_rate": 7.702718587014224e-05, "loss": 0.7891, "step": 37681 }, { "epoch": 0.9978045997486844, "grad_norm": 0.80859375, "learning_rate": 7.702270528157625e-05, "loss": 0.7359, "step": 37682 }, { "epoch": 0.9978310793569788, "grad_norm": 0.7734375, "learning_rate": 7.70182247417093e-05, "loss": 0.7335, "step": 37683 }, { "epoch": 0.9978575589652732, "grad_norm": 0.7890625, "learning_rate": 7.701374425055091e-05, "loss": 0.8848, "step": 37684 }, { "epoch": 0.9978840385735676, "grad_norm": 0.8359375, "learning_rate": 7.700926380811058e-05, "loss": 0.7827, "step": 37685 }, { "epoch": 0.9979105181818619, "grad_norm": 0.81640625, "learning_rate": 7.700478341439782e-05, "loss": 0.7397, "step": 37686 }, { "epoch": 0.9979369977901563, "grad_norm": 1.1015625, "learning_rate": 7.70003030694221e-05, "loss": 0.8682, "step": 37687 }, { "epoch": 0.9979634773984507, "grad_norm": 1.3828125, "learning_rate": 7.699582277319292e-05, "loss": 0.8779, "step": 37688 }, { "epoch": 0.9979899570067451, "grad_norm": 0.8828125, "learning_rate": 7.699134252571976e-05, "loss": 0.8014, "step": 37689 }, { "epoch": 0.9980164366150395, "grad_norm": 0.81640625, "learning_rate": 7.698686232701215e-05, "loss": 0.8, "step": 37690 }, { "epoch": 0.9980429162233339, "grad_norm": 0.8359375, "learning_rate": 7.698238217707958e-05, "loss": 0.9196, "step": 37691 }, { "epoch": 0.9980693958316282, "grad_norm": 0.77734375, "learning_rate": 7.697790207593152e-05, "loss": 0.7764, "step": 37692 }, { "epoch": 0.9980958754399226, "grad_norm": 0.72265625, "learning_rate": 7.69734220235775e-05, "loss": 0.6807, "step": 37693 }, { "epoch": 0.998122355048217, "grad_norm": 0.8515625, "learning_rate": 7.696894202002699e-05, "loss": 0.8906, "step": 37694 }, { "epoch": 0.9981488346565114, "grad_norm": 0.81640625, "learning_rate": 7.696446206528946e-05, "loss": 0.8696, "step": 37695 }, { "epoch": 0.9981753142648057, "grad_norm": 0.82421875, "learning_rate": 7.695998215937448e-05, "loss": 0.921, "step": 37696 }, { "epoch": 0.9982017938731, "grad_norm": 0.8203125, "learning_rate": 7.695550230229149e-05, "loss": 0.8944, "step": 37697 }, { "epoch": 0.9982282734813944, "grad_norm": 0.82421875, "learning_rate": 7.695102249404999e-05, "loss": 0.7531, "step": 37698 }, { "epoch": 0.9982547530896888, "grad_norm": 0.75, "learning_rate": 7.694654273465949e-05, "loss": 0.7071, "step": 37699 }, { "epoch": 0.9982812326979832, "grad_norm": 0.7890625, "learning_rate": 7.694206302412942e-05, "loss": 0.7739, "step": 37700 }, { "epoch": 0.9983077123062776, "grad_norm": 0.73046875, "learning_rate": 7.693758336246937e-05, "loss": 0.7412, "step": 37701 }, { "epoch": 0.998334191914572, "grad_norm": 0.765625, "learning_rate": 7.69331037496888e-05, "loss": 0.6809, "step": 37702 }, { "epoch": 0.9983606715228663, "grad_norm": 0.8046875, "learning_rate": 7.69286241857972e-05, "loss": 0.7564, "step": 37703 }, { "epoch": 0.9983871511311607, "grad_norm": 0.78515625, "learning_rate": 7.692414467080407e-05, "loss": 0.7197, "step": 37704 }, { "epoch": 0.9984136307394551, "grad_norm": 0.84765625, "learning_rate": 7.691966520471885e-05, "loss": 0.8655, "step": 37705 }, { "epoch": 0.9984401103477495, "grad_norm": 0.828125, "learning_rate": 7.691518578755112e-05, "loss": 0.8258, "step": 37706 }, { "epoch": 0.9984665899560439, "grad_norm": 1.453125, "learning_rate": 7.691070641931033e-05, "loss": 0.7362, "step": 37707 }, { "epoch": 0.9984930695643383, "grad_norm": 0.75, "learning_rate": 7.6906227100006e-05, "loss": 0.7852, "step": 37708 }, { "epoch": 0.9985195491726326, "grad_norm": 0.7734375, "learning_rate": 7.690174782964756e-05, "loss": 0.7265, "step": 37709 }, { "epoch": 0.998546028780927, "grad_norm": 0.85546875, "learning_rate": 7.689726860824453e-05, "loss": 0.862, "step": 37710 }, { "epoch": 0.9985725083892214, "grad_norm": 0.70703125, "learning_rate": 7.689278943580646e-05, "loss": 0.8022, "step": 37711 }, { "epoch": 0.9985989879975158, "grad_norm": 0.79296875, "learning_rate": 7.68883103123428e-05, "loss": 0.7964, "step": 37712 }, { "epoch": 0.9986254676058101, "grad_norm": 0.80859375, "learning_rate": 7.688383123786305e-05, "loss": 0.7423, "step": 37713 }, { "epoch": 0.9986519472141044, "grad_norm": 0.72265625, "learning_rate": 7.68793522123767e-05, "loss": 0.7045, "step": 37714 }, { "epoch": 0.9986784268223988, "grad_norm": 0.7734375, "learning_rate": 7.68748732358932e-05, "loss": 0.8098, "step": 37715 }, { "epoch": 0.9987049064306932, "grad_norm": 0.859375, "learning_rate": 7.687039430842212e-05, "loss": 0.7934, "step": 37716 }, { "epoch": 0.9987313860389876, "grad_norm": 0.7265625, "learning_rate": 7.686591542997292e-05, "loss": 0.7439, "step": 37717 }, { "epoch": 0.998757865647282, "grad_norm": 0.76953125, "learning_rate": 7.686143660055509e-05, "loss": 0.6693, "step": 37718 }, { "epoch": 0.9987843452555764, "grad_norm": 0.828125, "learning_rate": 7.685695782017812e-05, "loss": 0.7707, "step": 37719 }, { "epoch": 0.9988108248638707, "grad_norm": 0.8671875, "learning_rate": 7.685247908885146e-05, "loss": 0.8705, "step": 37720 }, { "epoch": 0.9988373044721651, "grad_norm": 0.75, "learning_rate": 7.684800040658468e-05, "loss": 0.6799, "step": 37721 }, { "epoch": 0.9988637840804595, "grad_norm": 0.82421875, "learning_rate": 7.684352177338726e-05, "loss": 0.773, "step": 37722 }, { "epoch": 0.9988902636887539, "grad_norm": 0.7578125, "learning_rate": 7.683904318926866e-05, "loss": 0.7833, "step": 37723 }, { "epoch": 0.9989167432970483, "grad_norm": 0.8046875, "learning_rate": 7.68345646542384e-05, "loss": 0.8052, "step": 37724 }, { "epoch": 0.9989432229053427, "grad_norm": 0.73828125, "learning_rate": 7.68300861683059e-05, "loss": 0.771, "step": 37725 }, { "epoch": 0.998969702513637, "grad_norm": 0.78515625, "learning_rate": 7.682560773148075e-05, "loss": 0.8149, "step": 37726 }, { "epoch": 0.9989961821219314, "grad_norm": 0.72265625, "learning_rate": 7.68211293437724e-05, "loss": 0.6675, "step": 37727 }, { "epoch": 0.9990226617302258, "grad_norm": 0.7890625, "learning_rate": 7.681665100519034e-05, "loss": 0.7651, "step": 37728 }, { "epoch": 0.9990491413385201, "grad_norm": 0.703125, "learning_rate": 7.681217271574407e-05, "loss": 0.8266, "step": 37729 }, { "epoch": 0.9990756209468145, "grad_norm": 0.796875, "learning_rate": 7.680769447544303e-05, "loss": 0.7295, "step": 37730 }, { "epoch": 0.9991021005551088, "grad_norm": 0.765625, "learning_rate": 7.680321628429679e-05, "loss": 0.8975, "step": 37731 }, { "epoch": 0.9991285801634032, "grad_norm": 0.77734375, "learning_rate": 7.679873814231481e-05, "loss": 0.809, "step": 37732 }, { "epoch": 0.9991550597716976, "grad_norm": 0.765625, "learning_rate": 7.67942600495066e-05, "loss": 0.7446, "step": 37733 }, { "epoch": 0.999181539379992, "grad_norm": 0.8203125, "learning_rate": 7.67897820058816e-05, "loss": 0.7862, "step": 37734 }, { "epoch": 0.9992080189882864, "grad_norm": 0.75390625, "learning_rate": 7.678530401144932e-05, "loss": 0.8964, "step": 37735 }, { "epoch": 0.9992344985965808, "grad_norm": 0.9453125, "learning_rate": 7.678082606621929e-05, "loss": 0.7678, "step": 37736 }, { "epoch": 0.9992609782048751, "grad_norm": 0.7890625, "learning_rate": 7.677634817020096e-05, "loss": 0.7941, "step": 37737 }, { "epoch": 0.9992874578131695, "grad_norm": 0.70703125, "learning_rate": 7.677187032340381e-05, "loss": 0.7564, "step": 37738 }, { "epoch": 0.9993139374214639, "grad_norm": 0.71484375, "learning_rate": 7.676739252583739e-05, "loss": 0.695, "step": 37739 }, { "epoch": 0.9993404170297583, "grad_norm": 0.79296875, "learning_rate": 7.676291477751111e-05, "loss": 0.708, "step": 37740 }, { "epoch": 0.9993668966380527, "grad_norm": 0.76171875, "learning_rate": 7.675843707843455e-05, "loss": 0.7906, "step": 37741 }, { "epoch": 0.999393376246347, "grad_norm": 0.82421875, "learning_rate": 7.675395942861715e-05, "loss": 0.7871, "step": 37742 }, { "epoch": 0.9994198558546414, "grad_norm": 0.7421875, "learning_rate": 7.674948182806839e-05, "loss": 0.6801, "step": 37743 }, { "epoch": 0.9994463354629358, "grad_norm": 0.80859375, "learning_rate": 7.674500427679779e-05, "loss": 0.8638, "step": 37744 }, { "epoch": 0.9994728150712301, "grad_norm": 0.71484375, "learning_rate": 7.674052677481477e-05, "loss": 0.7462, "step": 37745 }, { "epoch": 0.9994992946795245, "grad_norm": 0.84375, "learning_rate": 7.673604932212892e-05, "loss": 0.9259, "step": 37746 }, { "epoch": 0.9995257742878189, "grad_norm": 0.74609375, "learning_rate": 7.673157191874968e-05, "loss": 0.8082, "step": 37747 }, { "epoch": 0.9995522538961132, "grad_norm": 0.80078125, "learning_rate": 7.672709456468655e-05, "loss": 0.763, "step": 37748 }, { "epoch": 0.9995787335044076, "grad_norm": 0.828125, "learning_rate": 7.672261725994902e-05, "loss": 0.8387, "step": 37749 }, { "epoch": 0.999605213112702, "grad_norm": 0.8359375, "learning_rate": 7.671814000454651e-05, "loss": 0.8578, "step": 37750 }, { "epoch": 0.9996316927209964, "grad_norm": 0.90625, "learning_rate": 7.671366279848863e-05, "loss": 0.867, "step": 37751 }, { "epoch": 0.9996581723292908, "grad_norm": 0.78125, "learning_rate": 7.67091856417848e-05, "loss": 0.6541, "step": 37752 }, { "epoch": 0.9996846519375852, "grad_norm": 0.82421875, "learning_rate": 7.670470853444452e-05, "loss": 0.7566, "step": 37753 }, { "epoch": 0.9997111315458795, "grad_norm": 0.79296875, "learning_rate": 7.670023147647729e-05, "loss": 0.7997, "step": 37754 }, { "epoch": 0.9997376111541739, "grad_norm": 0.8515625, "learning_rate": 7.669575446789255e-05, "loss": 0.7718, "step": 37755 }, { "epoch": 0.9997640907624683, "grad_norm": 0.796875, "learning_rate": 7.669127750869987e-05, "loss": 0.8023, "step": 37756 }, { "epoch": 0.9997905703707627, "grad_norm": 0.81640625, "learning_rate": 7.668680059890866e-05, "loss": 0.7616, "step": 37757 }, { "epoch": 0.9998170499790571, "grad_norm": 0.78515625, "learning_rate": 7.668232373852847e-05, "loss": 0.6713, "step": 37758 }, { "epoch": 0.9998435295873515, "grad_norm": 0.8828125, "learning_rate": 7.667784692756875e-05, "loss": 0.7796, "step": 37759 }, { "epoch": 0.9998700091956458, "grad_norm": 0.9140625, "learning_rate": 7.667337016603899e-05, "loss": 0.8406, "step": 37760 }, { "epoch": 0.9998964888039402, "grad_norm": 0.86328125, "learning_rate": 7.666889345394872e-05, "loss": 0.7383, "step": 37761 }, { "epoch": 0.9999229684122345, "grad_norm": 0.7265625, "learning_rate": 7.666441679130737e-05, "loss": 0.7733, "step": 37762 }, { "epoch": 0.9999494480205289, "grad_norm": 0.765625, "learning_rate": 7.665994017812447e-05, "loss": 0.8468, "step": 37763 }, { "epoch": 0.9999759276288233, "grad_norm": 0.80078125, "learning_rate": 7.66554636144095e-05, "loss": 0.7439, "step": 37764 }, { "epoch": 0.9999759276288233, "step": 37764, "total_flos": 1.0820185937641524e+20, "train_loss": 0.07852082892805701, "train_runtime": 12569.6366, "train_samples_per_second": 396.589, "train_steps_per_second": 3.004 }, { "epoch": 0.00015765184739301172, "grad_norm": 1.5234375, "learning_rate": 7.733197962070711e-05, "loss": 2.0038, "step": 1 }, { "epoch": 0.00031530369478602343, "grad_norm": 7.21875, "learning_rate": 7.732749574288862e-05, "loss": 2.1873, "step": 2 }, { "epoch": 0.0004729555421790352, "grad_norm": 2.109375, "learning_rate": 7.732301191312327e-05, "loss": 1.8105, "step": 3 }, { "epoch": 0.0006306073895720469, "grad_norm": 1.7578125, "learning_rate": 7.731852813142048e-05, "loss": 1.6517, "step": 4 }, { "epoch": 0.0007882592369650586, "grad_norm": 2.265625, "learning_rate": 7.731404439778979e-05, "loss": 1.6581, "step": 5 }, { "epoch": 0.0009459110843580704, "grad_norm": 1.4453125, "learning_rate": 7.730956071224068e-05, "loss": 1.586, "step": 6 }, { "epoch": 0.001103562931751082, "grad_norm": 1.4453125, "learning_rate": 7.730507707478261e-05, "loss": 1.6468, "step": 7 }, { "epoch": 0.0012612147791440937, "grad_norm": 1.109375, "learning_rate": 7.730059348542519e-05, "loss": 1.4984, "step": 8 }, { "epoch": 0.0014188666265371056, "grad_norm": 1.3203125, "learning_rate": 7.729610994417785e-05, "loss": 1.7313, "step": 9 }, { "epoch": 0.0015765184739301172, "grad_norm": 1.140625, "learning_rate": 7.729162645105011e-05, "loss": 1.8015, "step": 10 }, { "epoch": 0.001734170321323129, "grad_norm": 1.4453125, "learning_rate": 7.728714300605148e-05, "loss": 1.6603, "step": 11 }, { "epoch": 0.0018918221687161407, "grad_norm": 1.15625, "learning_rate": 7.72826596091914e-05, "loss": 1.4449, "step": 12 }, { "epoch": 0.0020494740161091526, "grad_norm": 1.1875, "learning_rate": 7.727817626047947e-05, "loss": 1.5075, "step": 13 }, { "epoch": 0.002207125863502164, "grad_norm": 1.2890625, "learning_rate": 7.727369295992515e-05, "loss": 1.6191, "step": 14 }, { "epoch": 0.002364777710895176, "grad_norm": 1.2578125, "learning_rate": 7.726920970753792e-05, "loss": 1.8938, "step": 15 }, { "epoch": 0.0025224295582881875, "grad_norm": 1.2109375, "learning_rate": 7.726472650332732e-05, "loss": 1.4453, "step": 16 }, { "epoch": 0.002680081405681199, "grad_norm": 1.2265625, "learning_rate": 7.726024334730281e-05, "loss": 1.805, "step": 17 }, { "epoch": 0.002837733253074211, "grad_norm": 1.2578125, "learning_rate": 7.725576023947393e-05, "loss": 1.5799, "step": 18 }, { "epoch": 0.002995385100467223, "grad_norm": 1.1015625, "learning_rate": 7.725127717985017e-05, "loss": 1.5906, "step": 19 }, { "epoch": 0.0031530369478602344, "grad_norm": 1.2265625, "learning_rate": 7.724679416844102e-05, "loss": 1.7925, "step": 20 }, { "epoch": 0.003310688795253246, "grad_norm": 1.1640625, "learning_rate": 7.7242311205256e-05, "loss": 1.7829, "step": 21 }, { "epoch": 0.003468340642646258, "grad_norm": 1.125, "learning_rate": 7.723782829030456e-05, "loss": 1.4992, "step": 22 }, { "epoch": 0.0036259924900392698, "grad_norm": 1.171875, "learning_rate": 7.723334542359627e-05, "loss": 1.5473, "step": 23 }, { "epoch": 0.0037836443374322814, "grad_norm": 1.21875, "learning_rate": 7.722886260514062e-05, "loss": 1.5765, "step": 24 }, { "epoch": 0.0039412961848252935, "grad_norm": 1.1015625, "learning_rate": 7.722437983494708e-05, "loss": 1.484, "step": 25 }, { "epoch": 0.004098948032218305, "grad_norm": 1.2265625, "learning_rate": 7.721989711302518e-05, "loss": 1.5618, "step": 26 }, { "epoch": 0.004256599879611317, "grad_norm": 1.1484375, "learning_rate": 7.721541443938435e-05, "loss": 1.4351, "step": 27 }, { "epoch": 0.004414251727004328, "grad_norm": 1.109375, "learning_rate": 7.721093181403418e-05, "loss": 1.4263, "step": 28 }, { "epoch": 0.00457190357439734, "grad_norm": 1.0625, "learning_rate": 7.720644923698415e-05, "loss": 1.2921, "step": 29 }, { "epoch": 0.004729555421790352, "grad_norm": 1.1875, "learning_rate": 7.720196670824375e-05, "loss": 1.6779, "step": 30 }, { "epoch": 0.004887207269183363, "grad_norm": 1.0078125, "learning_rate": 7.71974842278225e-05, "loss": 1.4489, "step": 31 }, { "epoch": 0.005044859116576375, "grad_norm": 1.03125, "learning_rate": 7.71930017957298e-05, "loss": 1.4959, "step": 32 }, { "epoch": 0.0052025109639693866, "grad_norm": 0.96875, "learning_rate": 7.71885194119753e-05, "loss": 1.3712, "step": 33 }, { "epoch": 0.005360162811362398, "grad_norm": 1.1171875, "learning_rate": 7.71840370765684e-05, "loss": 1.6498, "step": 34 }, { "epoch": 0.005517814658755411, "grad_norm": 1.0546875, "learning_rate": 7.717955478951863e-05, "loss": 1.6476, "step": 35 }, { "epoch": 0.005675466506148422, "grad_norm": 1.15625, "learning_rate": 7.717507255083552e-05, "loss": 1.5398, "step": 36 }, { "epoch": 0.005833118353541434, "grad_norm": 1.078125, "learning_rate": 7.71705903605285e-05, "loss": 1.5979, "step": 37 }, { "epoch": 0.005990770200934446, "grad_norm": 1.328125, "learning_rate": 7.716610821860713e-05, "loss": 1.5245, "step": 38 }, { "epoch": 0.006148422048327457, "grad_norm": 1.0078125, "learning_rate": 7.716162612508089e-05, "loss": 1.2426, "step": 39 }, { "epoch": 0.006306073895720469, "grad_norm": 1.0625, "learning_rate": 7.715714407995927e-05, "loss": 1.3583, "step": 40 }, { "epoch": 0.0064637257431134805, "grad_norm": 1.0390625, "learning_rate": 7.715266208325177e-05, "loss": 1.6029, "step": 41 }, { "epoch": 0.006621377590506492, "grad_norm": 1.1328125, "learning_rate": 7.71481801349679e-05, "loss": 1.7693, "step": 42 }, { "epoch": 0.006779029437899504, "grad_norm": 1.140625, "learning_rate": 7.714369823511716e-05, "loss": 1.6219, "step": 43 }, { "epoch": 0.006936681285292516, "grad_norm": 0.9765625, "learning_rate": 7.713921638370905e-05, "loss": 1.255, "step": 44 }, { "epoch": 0.007094333132685528, "grad_norm": 0.98828125, "learning_rate": 7.713473458075306e-05, "loss": 1.646, "step": 45 }, { "epoch": 0.0072519849800785395, "grad_norm": 1.1015625, "learning_rate": 7.713025282625872e-05, "loss": 1.4255, "step": 46 }, { "epoch": 0.007409636827471551, "grad_norm": 1.0234375, "learning_rate": 7.712577112023543e-05, "loss": 1.4606, "step": 47 }, { "epoch": 0.007567288674864563, "grad_norm": 1.046875, "learning_rate": 7.712128946269281e-05, "loss": 1.3715, "step": 48 }, { "epoch": 0.0077249405222575744, "grad_norm": 1.03125, "learning_rate": 7.711680785364032e-05, "loss": 1.3441, "step": 49 }, { "epoch": 0.007882592369650587, "grad_norm": 1.078125, "learning_rate": 7.711232629308744e-05, "loss": 1.4596, "step": 50 }, { "epoch": 0.008040244217043599, "grad_norm": 1.0859375, "learning_rate": 7.710784478104367e-05, "loss": 1.3161, "step": 51 }, { "epoch": 0.00819789606443661, "grad_norm": 2.59375, "learning_rate": 7.710336331751848e-05, "loss": 1.5608, "step": 52 }, { "epoch": 0.008355547911829622, "grad_norm": 0.96484375, "learning_rate": 7.709888190252144e-05, "loss": 1.3919, "step": 53 }, { "epoch": 0.008513199759222633, "grad_norm": 1.171875, "learning_rate": 7.709440053606203e-05, "loss": 1.6252, "step": 54 }, { "epoch": 0.008670851606615645, "grad_norm": 1.1796875, "learning_rate": 7.708991921814971e-05, "loss": 1.5166, "step": 55 }, { "epoch": 0.008828503454008657, "grad_norm": 1.0625, "learning_rate": 7.7085437948794e-05, "loss": 1.3341, "step": 56 }, { "epoch": 0.008986155301401668, "grad_norm": 1.046875, "learning_rate": 7.708095672800437e-05, "loss": 1.6018, "step": 57 }, { "epoch": 0.00914380714879468, "grad_norm": 1.125, "learning_rate": 7.707647555579038e-05, "loss": 1.4213, "step": 58 }, { "epoch": 0.009301458996187692, "grad_norm": 1.2265625, "learning_rate": 7.707199443216145e-05, "loss": 1.5771, "step": 59 }, { "epoch": 0.009459110843580703, "grad_norm": 1.0546875, "learning_rate": 7.706751335712714e-05, "loss": 1.3836, "step": 60 }, { "epoch": 0.009616762690973715, "grad_norm": 0.9453125, "learning_rate": 7.706303233069692e-05, "loss": 1.4195, "step": 61 }, { "epoch": 0.009774414538366727, "grad_norm": 1.1171875, "learning_rate": 7.70585513528803e-05, "loss": 1.4836, "step": 62 }, { "epoch": 0.009932066385759738, "grad_norm": 1.0390625, "learning_rate": 7.705407042368675e-05, "loss": 1.5355, "step": 63 }, { "epoch": 0.01008971823315275, "grad_norm": 1.1328125, "learning_rate": 7.704958954312581e-05, "loss": 1.5794, "step": 64 }, { "epoch": 0.010247370080545761, "grad_norm": 0.96484375, "learning_rate": 7.704510871120695e-05, "loss": 1.3337, "step": 65 }, { "epoch": 0.010405021927938773, "grad_norm": 1.1953125, "learning_rate": 7.704062792793967e-05, "loss": 1.7244, "step": 66 }, { "epoch": 0.010562673775331785, "grad_norm": 1.0234375, "learning_rate": 7.703614719333344e-05, "loss": 1.3335, "step": 67 }, { "epoch": 0.010720325622724796, "grad_norm": 1.0625, "learning_rate": 7.703166650739779e-05, "loss": 1.3979, "step": 68 }, { "epoch": 0.01087797747011781, "grad_norm": 1.1875, "learning_rate": 7.702718587014224e-05, "loss": 1.3189, "step": 69 }, { "epoch": 0.011035629317510821, "grad_norm": 1.140625, "learning_rate": 7.702270528157625e-05, "loss": 1.323, "step": 70 }, { "epoch": 0.011193281164903833, "grad_norm": 1.0234375, "learning_rate": 7.70182247417093e-05, "loss": 1.2675, "step": 71 }, { "epoch": 0.011350933012296845, "grad_norm": 1.09375, "learning_rate": 7.701374425055091e-05, "loss": 1.4025, "step": 72 }, { "epoch": 0.011508584859689856, "grad_norm": 1.0390625, "learning_rate": 7.700926380811058e-05, "loss": 1.4356, "step": 73 }, { "epoch": 0.011666236707082868, "grad_norm": 1.03125, "learning_rate": 7.700478341439782e-05, "loss": 1.2854, "step": 74 }, { "epoch": 0.01182388855447588, "grad_norm": 1.0546875, "learning_rate": 7.70003030694221e-05, "loss": 1.4212, "step": 75 }, { "epoch": 0.011981540401868891, "grad_norm": 1.1796875, "learning_rate": 7.699582277319292e-05, "loss": 1.6788, "step": 76 }, { "epoch": 0.012139192249261903, "grad_norm": 1.03125, "learning_rate": 7.699134252571976e-05, "loss": 1.5051, "step": 77 }, { "epoch": 0.012296844096654914, "grad_norm": 1.109375, "learning_rate": 7.698686232701215e-05, "loss": 1.4897, "step": 78 }, { "epoch": 0.012454495944047926, "grad_norm": 1.140625, "learning_rate": 7.698238217707958e-05, "loss": 1.6293, "step": 79 }, { "epoch": 0.012612147791440938, "grad_norm": 1.171875, "learning_rate": 7.697790207593152e-05, "loss": 1.6356, "step": 80 }, { "epoch": 0.01276979963883395, "grad_norm": 1.0859375, "learning_rate": 7.69734220235775e-05, "loss": 1.6491, "step": 81 }, { "epoch": 0.012927451486226961, "grad_norm": 1.0234375, "learning_rate": 7.696894202002699e-05, "loss": 1.4218, "step": 82 }, { "epoch": 0.013085103333619973, "grad_norm": 1.1484375, "learning_rate": 7.696446206528946e-05, "loss": 1.5862, "step": 83 }, { "epoch": 0.013242755181012984, "grad_norm": 1.1875, "learning_rate": 7.695998215937448e-05, "loss": 1.5699, "step": 84 }, { "epoch": 0.013400407028405996, "grad_norm": 1.1171875, "learning_rate": 7.695550230229149e-05, "loss": 1.2374, "step": 85 }, { "epoch": 0.013558058875799008, "grad_norm": 1.1875, "learning_rate": 7.695102249404999e-05, "loss": 1.4611, "step": 86 }, { "epoch": 0.01371571072319202, "grad_norm": 0.98046875, "learning_rate": 7.694654273465949e-05, "loss": 1.215, "step": 87 }, { "epoch": 0.013873362570585033, "grad_norm": 1.0625, "learning_rate": 7.694206302412942e-05, "loss": 1.5837, "step": 88 }, { "epoch": 0.014031014417978044, "grad_norm": 1.0859375, "learning_rate": 7.693758336246937e-05, "loss": 1.4436, "step": 89 }, { "epoch": 0.014188666265371056, "grad_norm": 1.15625, "learning_rate": 7.69331037496888e-05, "loss": 1.6648, "step": 90 }, { "epoch": 0.014346318112764067, "grad_norm": 1.046875, "learning_rate": 7.69286241857972e-05, "loss": 1.3123, "step": 91 }, { "epoch": 0.014503969960157079, "grad_norm": 0.96875, "learning_rate": 7.692414467080407e-05, "loss": 1.3214, "step": 92 }, { "epoch": 0.01466162180755009, "grad_norm": 1.046875, "learning_rate": 7.691966520471885e-05, "loss": 1.4412, "step": 93 }, { "epoch": 0.014819273654943102, "grad_norm": 1.1015625, "learning_rate": 7.691518578755112e-05, "loss": 1.4515, "step": 94 }, { "epoch": 0.014976925502336114, "grad_norm": 1.0859375, "learning_rate": 7.691070641931033e-05, "loss": 1.5151, "step": 95 }, { "epoch": 0.015134577349729126, "grad_norm": 1.15625, "learning_rate": 7.6906227100006e-05, "loss": 1.5282, "step": 96 }, { "epoch": 0.015292229197122137, "grad_norm": 1.1015625, "learning_rate": 7.690174782964756e-05, "loss": 1.3737, "step": 97 }, { "epoch": 0.015449881044515149, "grad_norm": 1.046875, "learning_rate": 7.689726860824453e-05, "loss": 1.4524, "step": 98 }, { "epoch": 0.01560753289190816, "grad_norm": 0.98046875, "learning_rate": 7.689278943580646e-05, "loss": 1.5046, "step": 99 }, { "epoch": 0.015765184739301174, "grad_norm": 1.15625, "learning_rate": 7.68883103123428e-05, "loss": 1.5331, "step": 100 }, { "epoch": 0.015922836586694186, "grad_norm": 0.94140625, "learning_rate": 7.688383123786305e-05, "loss": 1.2469, "step": 101 }, { "epoch": 0.016080488434087197, "grad_norm": 1.1484375, "learning_rate": 7.68793522123767e-05, "loss": 1.5894, "step": 102 }, { "epoch": 0.01623814028148021, "grad_norm": 1.0078125, "learning_rate": 7.68748732358932e-05, "loss": 1.1792, "step": 103 }, { "epoch": 0.01639579212887322, "grad_norm": 1.1328125, "learning_rate": 7.687039430842212e-05, "loss": 1.4037, "step": 104 }, { "epoch": 0.016553443976266232, "grad_norm": 1.046875, "learning_rate": 7.686591542997292e-05, "loss": 1.5406, "step": 105 }, { "epoch": 0.016711095823659244, "grad_norm": 0.9921875, "learning_rate": 7.686143660055509e-05, "loss": 1.3978, "step": 106 }, { "epoch": 0.016868747671052255, "grad_norm": 1.125, "learning_rate": 7.685695782017812e-05, "loss": 1.3442, "step": 107 }, { "epoch": 0.017026399518445267, "grad_norm": 1.0234375, "learning_rate": 7.685247908885146e-05, "loss": 1.3122, "step": 108 }, { "epoch": 0.01718405136583828, "grad_norm": 2.109375, "learning_rate": 7.684800040658468e-05, "loss": 1.1921, "step": 109 }, { "epoch": 0.01734170321323129, "grad_norm": 1.0703125, "learning_rate": 7.684352177338726e-05, "loss": 1.3773, "step": 110 }, { "epoch": 0.017499355060624302, "grad_norm": 1.0546875, "learning_rate": 7.683904318926866e-05, "loss": 1.1859, "step": 111 }, { "epoch": 0.017657006908017314, "grad_norm": 1.0625, "learning_rate": 7.68345646542384e-05, "loss": 1.4292, "step": 112 }, { "epoch": 0.017814658755410325, "grad_norm": 1.0625, "learning_rate": 7.68300861683059e-05, "loss": 1.2496, "step": 113 }, { "epoch": 0.017972310602803337, "grad_norm": 0.96875, "learning_rate": 7.682560773148075e-05, "loss": 1.3713, "step": 114 }, { "epoch": 0.01812996245019635, "grad_norm": 0.94921875, "learning_rate": 7.68211293437724e-05, "loss": 1.2039, "step": 115 }, { "epoch": 0.01828761429758936, "grad_norm": 1.1015625, "learning_rate": 7.681665100519034e-05, "loss": 1.5079, "step": 116 }, { "epoch": 0.01844526614498237, "grad_norm": 1.1640625, "learning_rate": 7.681217271574407e-05, "loss": 1.5297, "step": 117 }, { "epoch": 0.018602917992375383, "grad_norm": 1.1328125, "learning_rate": 7.680769447544303e-05, "loss": 1.2977, "step": 118 }, { "epoch": 0.018760569839768395, "grad_norm": 1.125, "learning_rate": 7.680321628429679e-05, "loss": 1.3847, "step": 119 }, { "epoch": 0.018918221687161407, "grad_norm": 1.0859375, "learning_rate": 7.679873814231481e-05, "loss": 1.7289, "step": 120 }, { "epoch": 0.019075873534554418, "grad_norm": 1.015625, "learning_rate": 7.67942600495066e-05, "loss": 1.3275, "step": 121 }, { "epoch": 0.01923352538194743, "grad_norm": 1.03125, "learning_rate": 7.67897820058816e-05, "loss": 1.2296, "step": 122 }, { "epoch": 0.01939117722934044, "grad_norm": 0.87109375, "learning_rate": 7.678530401144932e-05, "loss": 1.0845, "step": 123 }, { "epoch": 0.019548829076733453, "grad_norm": 0.9921875, "learning_rate": 7.678082606621929e-05, "loss": 1.2004, "step": 124 }, { "epoch": 0.019706480924126465, "grad_norm": 1.28125, "learning_rate": 7.677634817020096e-05, "loss": 1.2791, "step": 125 }, { "epoch": 0.019864132771519476, "grad_norm": 0.9921875, "learning_rate": 7.677187032340381e-05, "loss": 1.2597, "step": 126 }, { "epoch": 0.020021784618912488, "grad_norm": 0.9765625, "learning_rate": 7.676739252583739e-05, "loss": 1.1538, "step": 127 }, { "epoch": 0.0201794364663055, "grad_norm": 0.92578125, "learning_rate": 7.676291477751111e-05, "loss": 1.2538, "step": 128 }, { "epoch": 0.02033708831369851, "grad_norm": 1.1328125, "learning_rate": 7.675843707843455e-05, "loss": 1.2804, "step": 129 }, { "epoch": 0.020494740161091523, "grad_norm": 1.0546875, "learning_rate": 7.675395942861715e-05, "loss": 1.1411, "step": 130 }, { "epoch": 0.020652392008484535, "grad_norm": 1.0703125, "learning_rate": 7.674948182806839e-05, "loss": 1.1425, "step": 131 }, { "epoch": 0.020810043855877546, "grad_norm": 1.0234375, "learning_rate": 7.674500427679779e-05, "loss": 1.595, "step": 132 }, { "epoch": 0.020967695703270558, "grad_norm": 1.1171875, "learning_rate": 7.674052677481477e-05, "loss": 1.6207, "step": 133 }, { "epoch": 0.02112534755066357, "grad_norm": 0.99609375, "learning_rate": 7.673604932212892e-05, "loss": 1.1974, "step": 134 }, { "epoch": 0.02128299939805658, "grad_norm": 1.046875, "learning_rate": 7.673157191874968e-05, "loss": 1.4377, "step": 135 }, { "epoch": 0.021440651245449593, "grad_norm": 1.0703125, "learning_rate": 7.672709456468655e-05, "loss": 1.4708, "step": 136 }, { "epoch": 0.021598303092842608, "grad_norm": 1.1015625, "learning_rate": 7.672261725994902e-05, "loss": 1.2266, "step": 137 }, { "epoch": 0.02175595494023562, "grad_norm": 1.015625, "learning_rate": 7.671814000454651e-05, "loss": 1.2505, "step": 138 }, { "epoch": 0.02191360678762863, "grad_norm": 1.1015625, "learning_rate": 7.671366279848863e-05, "loss": 1.3418, "step": 139 }, { "epoch": 0.022071258635021643, "grad_norm": 1.0078125, "learning_rate": 7.67091856417848e-05, "loss": 1.2083, "step": 140 }, { "epoch": 0.022228910482414654, "grad_norm": 1.109375, "learning_rate": 7.670470853444452e-05, "loss": 1.4812, "step": 141 }, { "epoch": 0.022386562329807666, "grad_norm": 1.0546875, "learning_rate": 7.670023147647729e-05, "loss": 1.4736, "step": 142 }, { "epoch": 0.022544214177200678, "grad_norm": 0.97265625, "learning_rate": 7.669575446789255e-05, "loss": 1.2786, "step": 143 }, { "epoch": 0.02270186602459369, "grad_norm": 1.109375, "learning_rate": 7.669127750869987e-05, "loss": 1.3641, "step": 144 }, { "epoch": 0.0228595178719867, "grad_norm": 1.1015625, "learning_rate": 7.668680059890866e-05, "loss": 1.2386, "step": 145 }, { "epoch": 0.023017169719379713, "grad_norm": 1.0859375, "learning_rate": 7.668232373852847e-05, "loss": 1.5634, "step": 146 }, { "epoch": 0.023174821566772724, "grad_norm": 0.94921875, "learning_rate": 7.667784692756875e-05, "loss": 1.3462, "step": 147 }, { "epoch": 0.023332473414165736, "grad_norm": 1.03125, "learning_rate": 7.667337016603899e-05, "loss": 1.2795, "step": 148 }, { "epoch": 0.023490125261558747, "grad_norm": 1.046875, "learning_rate": 7.666889345394872e-05, "loss": 1.3205, "step": 149 }, { "epoch": 0.02364777710895176, "grad_norm": 0.9765625, "learning_rate": 7.666441679130737e-05, "loss": 1.2658, "step": 150 }, { "epoch": 0.02380542895634477, "grad_norm": 1.0625, "learning_rate": 7.665994017812447e-05, "loss": 1.3257, "step": 151 }, { "epoch": 0.023963080803737782, "grad_norm": 1.25, "learning_rate": 7.66554636144095e-05, "loss": 1.5763, "step": 152 }, { "epoch": 0.024120732651130794, "grad_norm": 0.875, "learning_rate": 7.665098710017188e-05, "loss": 1.1683, "step": 153 }, { "epoch": 0.024278384498523806, "grad_norm": 0.9375, "learning_rate": 7.66465106354212e-05, "loss": 1.3102, "step": 154 }, { "epoch": 0.024436036345916817, "grad_norm": 1.046875, "learning_rate": 7.664203422016693e-05, "loss": 1.3299, "step": 155 }, { "epoch": 0.02459368819330983, "grad_norm": 1.0546875, "learning_rate": 7.663755785441852e-05, "loss": 1.4491, "step": 156 }, { "epoch": 0.02475134004070284, "grad_norm": 1.1953125, "learning_rate": 7.663308153818547e-05, "loss": 1.4689, "step": 157 }, { "epoch": 0.024908991888095852, "grad_norm": 1.1953125, "learning_rate": 7.662860527147721e-05, "loss": 1.4654, "step": 158 }, { "epoch": 0.025066643735488864, "grad_norm": 1.1796875, "learning_rate": 7.662412905430337e-05, "loss": 1.4283, "step": 159 }, { "epoch": 0.025224295582881875, "grad_norm": 1.0, "learning_rate": 7.66196528866733e-05, "loss": 1.2221, "step": 160 }, { "epoch": 0.025381947430274887, "grad_norm": 1.0625, "learning_rate": 7.661517676859657e-05, "loss": 1.3822, "step": 161 }, { "epoch": 0.0255395992776679, "grad_norm": 1.1328125, "learning_rate": 7.661070070008263e-05, "loss": 1.3225, "step": 162 }, { "epoch": 0.02569725112506091, "grad_norm": 0.98828125, "learning_rate": 7.660622468114094e-05, "loss": 1.1407, "step": 163 }, { "epoch": 0.025854902972453922, "grad_norm": 1.0234375, "learning_rate": 7.660174871178106e-05, "loss": 1.5181, "step": 164 }, { "epoch": 0.026012554819846934, "grad_norm": 0.96484375, "learning_rate": 7.659727279201243e-05, "loss": 1.2308, "step": 165 }, { "epoch": 0.026170206667239945, "grad_norm": 1.1171875, "learning_rate": 7.659279692184453e-05, "loss": 1.3064, "step": 166 }, { "epoch": 0.026327858514632957, "grad_norm": 1.1484375, "learning_rate": 7.658832110128688e-05, "loss": 1.4267, "step": 167 }, { "epoch": 0.02648551036202597, "grad_norm": 1.0, "learning_rate": 7.658384533034893e-05, "loss": 1.2501, "step": 168 }, { "epoch": 0.02664316220941898, "grad_norm": 1.0625, "learning_rate": 7.657936960904018e-05, "loss": 1.2482, "step": 169 }, { "epoch": 0.026800814056811992, "grad_norm": 0.96484375, "learning_rate": 7.657489393737012e-05, "loss": 1.3088, "step": 170 }, { "epoch": 0.026958465904205003, "grad_norm": 1.015625, "learning_rate": 7.657041831534824e-05, "loss": 1.3537, "step": 171 }, { "epoch": 0.027116117751598015, "grad_norm": 0.98046875, "learning_rate": 7.656594274298402e-05, "loss": 1.287, "step": 172 }, { "epoch": 0.027273769598991027, "grad_norm": 0.99609375, "learning_rate": 7.65614672202869e-05, "loss": 1.3102, "step": 173 }, { "epoch": 0.02743142144638404, "grad_norm": 0.984375, "learning_rate": 7.655699174726645e-05, "loss": 1.2733, "step": 174 }, { "epoch": 0.02758907329377705, "grad_norm": 1.078125, "learning_rate": 7.655251632393213e-05, "loss": 1.4304, "step": 175 }, { "epoch": 0.027746725141170065, "grad_norm": 1.0546875, "learning_rate": 7.65480409502934e-05, "loss": 1.276, "step": 176 }, { "epoch": 0.027904376988563077, "grad_norm": 1.0625, "learning_rate": 7.654356562635976e-05, "loss": 1.3163, "step": 177 }, { "epoch": 0.02806202883595609, "grad_norm": 0.953125, "learning_rate": 7.653909035214065e-05, "loss": 1.4239, "step": 178 }, { "epoch": 0.0282196806833491, "grad_norm": 1.0546875, "learning_rate": 7.653461512764563e-05, "loss": 1.3669, "step": 179 }, { "epoch": 0.02837733253074211, "grad_norm": 1.1640625, "learning_rate": 7.653013995288416e-05, "loss": 1.263, "step": 180 }, { "epoch": 0.028534984378135123, "grad_norm": 0.96484375, "learning_rate": 7.652566482786572e-05, "loss": 1.249, "step": 181 }, { "epoch": 0.028692636225528135, "grad_norm": 0.9375, "learning_rate": 7.652118975259979e-05, "loss": 1.2723, "step": 182 }, { "epoch": 0.028850288072921147, "grad_norm": 1.0078125, "learning_rate": 7.651671472709581e-05, "loss": 1.3792, "step": 183 }, { "epoch": 0.029007939920314158, "grad_norm": 1.0390625, "learning_rate": 7.651223975136336e-05, "loss": 1.6424, "step": 184 }, { "epoch": 0.02916559176770717, "grad_norm": 1.25, "learning_rate": 7.650776482541187e-05, "loss": 1.2534, "step": 185 }, { "epoch": 0.02932324361510018, "grad_norm": 1.046875, "learning_rate": 7.650328994925083e-05, "loss": 1.211, "step": 186 }, { "epoch": 0.029480895462493193, "grad_norm": 1.984375, "learning_rate": 7.649881512288972e-05, "loss": 1.4544, "step": 187 }, { "epoch": 0.029638547309886205, "grad_norm": 0.98046875, "learning_rate": 7.649434034633802e-05, "loss": 1.3, "step": 188 }, { "epoch": 0.029796199157279216, "grad_norm": 1.09375, "learning_rate": 7.648986561960523e-05, "loss": 1.4345, "step": 189 }, { "epoch": 0.029953851004672228, "grad_norm": 0.94921875, "learning_rate": 7.648539094270083e-05, "loss": 1.0856, "step": 190 }, { "epoch": 0.03011150285206524, "grad_norm": 0.97265625, "learning_rate": 7.64809163156343e-05, "loss": 1.2131, "step": 191 }, { "epoch": 0.03026915469945825, "grad_norm": 0.9921875, "learning_rate": 7.647644173841512e-05, "loss": 1.6161, "step": 192 }, { "epoch": 0.030426806546851263, "grad_norm": 1.015625, "learning_rate": 7.647196721105274e-05, "loss": 1.4315, "step": 193 }, { "epoch": 0.030584458394244275, "grad_norm": 0.95703125, "learning_rate": 7.646749273355674e-05, "loss": 1.4361, "step": 194 }, { "epoch": 0.030742110241637286, "grad_norm": 1.0859375, "learning_rate": 7.646301830593652e-05, "loss": 1.3294, "step": 195 }, { "epoch": 0.030899762089030298, "grad_norm": 0.96875, "learning_rate": 7.645854392820158e-05, "loss": 1.4119, "step": 196 }, { "epoch": 0.03105741393642331, "grad_norm": 1.09375, "learning_rate": 7.645406960036143e-05, "loss": 1.2016, "step": 197 }, { "epoch": 0.03121506578381632, "grad_norm": 0.98828125, "learning_rate": 7.644959532242548e-05, "loss": 1.0864, "step": 198 }, { "epoch": 0.031372717631209336, "grad_norm": 1.171875, "learning_rate": 7.644512109440332e-05, "loss": 1.4124, "step": 199 }, { "epoch": 0.03153036947860235, "grad_norm": 1.0546875, "learning_rate": 7.644064691630437e-05, "loss": 1.1773, "step": 200 }, { "epoch": 0.03168802132599536, "grad_norm": 1.9296875, "learning_rate": 7.643617278813812e-05, "loss": 1.2825, "step": 201 }, { "epoch": 0.03184567317338837, "grad_norm": 1.0078125, "learning_rate": 7.643169870991408e-05, "loss": 1.398, "step": 202 }, { "epoch": 0.03200332502078138, "grad_norm": 0.98828125, "learning_rate": 7.642722468164164e-05, "loss": 1.2979, "step": 203 }, { "epoch": 0.032160976868174394, "grad_norm": 1.0546875, "learning_rate": 7.64227507033304e-05, "loss": 1.3942, "step": 204 }, { "epoch": 0.032318628715567406, "grad_norm": 0.921875, "learning_rate": 7.64182767749898e-05, "loss": 1.2042, "step": 205 }, { "epoch": 0.03247628056296042, "grad_norm": 0.9765625, "learning_rate": 7.64138028966293e-05, "loss": 1.519, "step": 206 }, { "epoch": 0.03263393241035343, "grad_norm": 1.140625, "learning_rate": 7.64093290682584e-05, "loss": 1.4504, "step": 207 }, { "epoch": 0.03279158425774644, "grad_norm": 1.1015625, "learning_rate": 7.640485528988656e-05, "loss": 1.6083, "step": 208 }, { "epoch": 0.03294923610513945, "grad_norm": 1.0703125, "learning_rate": 7.640038156152329e-05, "loss": 1.4028, "step": 209 }, { "epoch": 0.033106887952532464, "grad_norm": 1.0, "learning_rate": 7.639590788317809e-05, "loss": 1.1125, "step": 210 }, { "epoch": 0.033264539799925476, "grad_norm": 0.97265625, "learning_rate": 7.639143425486039e-05, "loss": 1.1355, "step": 211 }, { "epoch": 0.03342219164731849, "grad_norm": 1.046875, "learning_rate": 7.638696067657969e-05, "loss": 1.2931, "step": 212 }, { "epoch": 0.0335798434947115, "grad_norm": 0.94921875, "learning_rate": 7.638248714834545e-05, "loss": 1.2178, "step": 213 }, { "epoch": 0.03373749534210451, "grad_norm": 1.0625, "learning_rate": 7.637801367016723e-05, "loss": 1.6365, "step": 214 }, { "epoch": 0.03389514718949752, "grad_norm": 0.90625, "learning_rate": 7.637354024205446e-05, "loss": 1.1059, "step": 215 }, { "epoch": 0.034052799036890534, "grad_norm": 1.046875, "learning_rate": 7.63690668640166e-05, "loss": 1.2119, "step": 216 }, { "epoch": 0.034210450884283546, "grad_norm": 0.98046875, "learning_rate": 7.636459353606317e-05, "loss": 1.4586, "step": 217 }, { "epoch": 0.03436810273167656, "grad_norm": 1.015625, "learning_rate": 7.636012025820358e-05, "loss": 1.0742, "step": 218 }, { "epoch": 0.03452575457906957, "grad_norm": 1.1875, "learning_rate": 7.63556470304474e-05, "loss": 1.3508, "step": 219 }, { "epoch": 0.03468340642646258, "grad_norm": 1.140625, "learning_rate": 7.635117385280409e-05, "loss": 1.2963, "step": 220 }, { "epoch": 0.03484105827385559, "grad_norm": 1.0703125, "learning_rate": 7.634670072528311e-05, "loss": 1.323, "step": 221 }, { "epoch": 0.034998710121248604, "grad_norm": 1.0703125, "learning_rate": 7.634222764789394e-05, "loss": 1.1162, "step": 222 }, { "epoch": 0.035156361968641615, "grad_norm": 1.1953125, "learning_rate": 7.633775462064602e-05, "loss": 1.2893, "step": 223 }, { "epoch": 0.03531401381603463, "grad_norm": 0.9453125, "learning_rate": 7.633328164354894e-05, "loss": 1.2837, "step": 224 }, { "epoch": 0.03547166566342764, "grad_norm": 1.0625, "learning_rate": 7.632880871661212e-05, "loss": 1.3051, "step": 225 }, { "epoch": 0.03562931751082065, "grad_norm": 1.1328125, "learning_rate": 7.632433583984501e-05, "loss": 1.6281, "step": 226 }, { "epoch": 0.03578696935821366, "grad_norm": 1.0625, "learning_rate": 7.631986301325713e-05, "loss": 1.2134, "step": 227 }, { "epoch": 0.035944621205606674, "grad_norm": 1.015625, "learning_rate": 7.631539023685794e-05, "loss": 1.3279, "step": 228 }, { "epoch": 0.036102273052999685, "grad_norm": 1.0234375, "learning_rate": 7.631091751065691e-05, "loss": 1.1386, "step": 229 }, { "epoch": 0.0362599249003927, "grad_norm": 1.09375, "learning_rate": 7.630644483466354e-05, "loss": 1.2387, "step": 230 }, { "epoch": 0.03641757674778571, "grad_norm": 1.078125, "learning_rate": 7.630197220888733e-05, "loss": 1.2119, "step": 231 }, { "epoch": 0.03657522859517872, "grad_norm": 1.1953125, "learning_rate": 7.629749963333772e-05, "loss": 1.4202, "step": 232 }, { "epoch": 0.03673288044257173, "grad_norm": 0.97265625, "learning_rate": 7.629302710802422e-05, "loss": 1.2874, "step": 233 }, { "epoch": 0.03689053228996474, "grad_norm": 0.9765625, "learning_rate": 7.628855463295626e-05, "loss": 1.1393, "step": 234 }, { "epoch": 0.037048184137357755, "grad_norm": 1.1484375, "learning_rate": 7.628408220814339e-05, "loss": 1.5517, "step": 235 }, { "epoch": 0.03720583598475077, "grad_norm": 1.0703125, "learning_rate": 7.627960983359505e-05, "loss": 1.2827, "step": 236 }, { "epoch": 0.03736348783214378, "grad_norm": 1.453125, "learning_rate": 7.627513750932071e-05, "loss": 1.3977, "step": 237 }, { "epoch": 0.03752113967953679, "grad_norm": 0.953125, "learning_rate": 7.627066523532986e-05, "loss": 1.0914, "step": 238 }, { "epoch": 0.0376787915269298, "grad_norm": 0.90625, "learning_rate": 7.626619301163195e-05, "loss": 0.9954, "step": 239 }, { "epoch": 0.03783644337432281, "grad_norm": 1.0390625, "learning_rate": 7.626172083823652e-05, "loss": 1.1874, "step": 240 }, { "epoch": 0.037994095221715825, "grad_norm": 1.015625, "learning_rate": 7.625724871515302e-05, "loss": 1.1796, "step": 241 }, { "epoch": 0.038151747069108836, "grad_norm": 1.1328125, "learning_rate": 7.625277664239092e-05, "loss": 1.4228, "step": 242 }, { "epoch": 0.03830939891650185, "grad_norm": 1.1796875, "learning_rate": 7.62483046199597e-05, "loss": 1.4132, "step": 243 }, { "epoch": 0.03846705076389486, "grad_norm": 0.9296875, "learning_rate": 7.624383264786881e-05, "loss": 1.1635, "step": 244 }, { "epoch": 0.03862470261128787, "grad_norm": 1.046875, "learning_rate": 7.623936072612779e-05, "loss": 1.4281, "step": 245 }, { "epoch": 0.03878235445868088, "grad_norm": 1.078125, "learning_rate": 7.623488885474609e-05, "loss": 1.3828, "step": 246 }, { "epoch": 0.038940006306073895, "grad_norm": 1.2578125, "learning_rate": 7.623041703373318e-05, "loss": 1.1873, "step": 247 }, { "epoch": 0.039097658153466906, "grad_norm": 1.109375, "learning_rate": 7.622594526309856e-05, "loss": 1.4153, "step": 248 }, { "epoch": 0.03925531000085992, "grad_norm": 0.98828125, "learning_rate": 7.622147354285163e-05, "loss": 1.2454, "step": 249 }, { "epoch": 0.03941296184825293, "grad_norm": 1.21875, "learning_rate": 7.621700187300198e-05, "loss": 1.4777, "step": 250 }, { "epoch": 0.03957061369564594, "grad_norm": 1.0703125, "learning_rate": 7.621253025355902e-05, "loss": 1.2219, "step": 251 }, { "epoch": 0.03972826554303895, "grad_norm": 1.0234375, "learning_rate": 7.620805868453226e-05, "loss": 1.4007, "step": 252 }, { "epoch": 0.039885917390431964, "grad_norm": 1.109375, "learning_rate": 7.620358716593114e-05, "loss": 1.4307, "step": 253 }, { "epoch": 0.040043569237824976, "grad_norm": 1.0, "learning_rate": 7.619911569776515e-05, "loss": 1.233, "step": 254 }, { "epoch": 0.04020122108521799, "grad_norm": 1.328125, "learning_rate": 7.619464428004381e-05, "loss": 1.6911, "step": 255 }, { "epoch": 0.040358872932611, "grad_norm": 0.97265625, "learning_rate": 7.619017291277653e-05, "loss": 1.237, "step": 256 }, { "epoch": 0.04051652478000401, "grad_norm": 1.21875, "learning_rate": 7.618570159597284e-05, "loss": 1.4241, "step": 257 }, { "epoch": 0.04067417662739702, "grad_norm": 1.0, "learning_rate": 7.618123032964218e-05, "loss": 1.3114, "step": 258 }, { "epoch": 0.040831828474790034, "grad_norm": 0.984375, "learning_rate": 7.617675911379401e-05, "loss": 1.4388, "step": 259 }, { "epoch": 0.040989480322183046, "grad_norm": 1.203125, "learning_rate": 7.617228794843787e-05, "loss": 1.4383, "step": 260 }, { "epoch": 0.04114713216957606, "grad_norm": 0.87109375, "learning_rate": 7.616781683358321e-05, "loss": 0.8977, "step": 261 }, { "epoch": 0.04130478401696907, "grad_norm": 1.0234375, "learning_rate": 7.61633457692395e-05, "loss": 1.1832, "step": 262 }, { "epoch": 0.04146243586436208, "grad_norm": 1.1171875, "learning_rate": 7.615887475541623e-05, "loss": 1.437, "step": 263 }, { "epoch": 0.04162008771175509, "grad_norm": 0.953125, "learning_rate": 7.61544037921228e-05, "loss": 1.2289, "step": 264 }, { "epoch": 0.041777739559148104, "grad_norm": 1.0625, "learning_rate": 7.614993287936878e-05, "loss": 1.1745, "step": 265 }, { "epoch": 0.041935391406541116, "grad_norm": 1.03125, "learning_rate": 7.614546201716363e-05, "loss": 1.2957, "step": 266 }, { "epoch": 0.04209304325393413, "grad_norm": 0.9609375, "learning_rate": 7.614099120551681e-05, "loss": 1.3913, "step": 267 }, { "epoch": 0.04225069510132714, "grad_norm": 1.0703125, "learning_rate": 7.61365204444378e-05, "loss": 1.3907, "step": 268 }, { "epoch": 0.04240834694872015, "grad_norm": 1.1171875, "learning_rate": 7.613204973393601e-05, "loss": 1.2667, "step": 269 }, { "epoch": 0.04256599879611316, "grad_norm": 0.99609375, "learning_rate": 7.612757907402103e-05, "loss": 1.2855, "step": 270 }, { "epoch": 0.042723650643506174, "grad_norm": 1.0234375, "learning_rate": 7.61231084647023e-05, "loss": 1.26, "step": 271 }, { "epoch": 0.042881302490899185, "grad_norm": 1.1484375, "learning_rate": 7.611863790598925e-05, "loss": 1.4559, "step": 272 }, { "epoch": 0.0430389543382922, "grad_norm": 1.0234375, "learning_rate": 7.61141673978914e-05, "loss": 1.2738, "step": 273 }, { "epoch": 0.043196606185685216, "grad_norm": 0.9609375, "learning_rate": 7.610969694041819e-05, "loss": 1.1966, "step": 274 }, { "epoch": 0.04335425803307823, "grad_norm": 1.0078125, "learning_rate": 7.610522653357912e-05, "loss": 1.2606, "step": 275 }, { "epoch": 0.04351190988047124, "grad_norm": 1.0859375, "learning_rate": 7.610075617738364e-05, "loss": 1.3679, "step": 276 }, { "epoch": 0.04366956172786425, "grad_norm": 1.0859375, "learning_rate": 7.609628587184127e-05, "loss": 1.2724, "step": 277 }, { "epoch": 0.04382721357525726, "grad_norm": 0.98828125, "learning_rate": 7.609181561696142e-05, "loss": 1.3065, "step": 278 }, { "epoch": 0.043984865422650274, "grad_norm": 1.1015625, "learning_rate": 7.608734541275361e-05, "loss": 1.4963, "step": 279 }, { "epoch": 0.044142517270043286, "grad_norm": 1.03125, "learning_rate": 7.608287525922731e-05, "loss": 1.2244, "step": 280 }, { "epoch": 0.0443001691174363, "grad_norm": 0.9609375, "learning_rate": 7.607840515639201e-05, "loss": 1.0684, "step": 281 }, { "epoch": 0.04445782096482931, "grad_norm": 1.1171875, "learning_rate": 7.607393510425714e-05, "loss": 1.2171, "step": 282 }, { "epoch": 0.04461547281222232, "grad_norm": 1.078125, "learning_rate": 7.606946510283222e-05, "loss": 1.4809, "step": 283 }, { "epoch": 0.04477312465961533, "grad_norm": 0.9375, "learning_rate": 7.606499515212663e-05, "loss": 1.138, "step": 284 }, { "epoch": 0.044930776507008344, "grad_norm": 1.03125, "learning_rate": 7.606052525214999e-05, "loss": 1.2274, "step": 285 }, { "epoch": 0.045088428354401355, "grad_norm": 0.98046875, "learning_rate": 7.605605540291167e-05, "loss": 1.2865, "step": 286 }, { "epoch": 0.04524608020179437, "grad_norm": 0.9921875, "learning_rate": 7.605158560442119e-05, "loss": 1.0963, "step": 287 }, { "epoch": 0.04540373204918738, "grad_norm": 1.03125, "learning_rate": 7.6047115856688e-05, "loss": 1.4318, "step": 288 }, { "epoch": 0.04556138389658039, "grad_norm": 0.96484375, "learning_rate": 7.604264615972154e-05, "loss": 1.1505, "step": 289 }, { "epoch": 0.0457190357439734, "grad_norm": 1.1484375, "learning_rate": 7.603817651353135e-05, "loss": 1.2147, "step": 290 }, { "epoch": 0.045876687591366413, "grad_norm": 1.0, "learning_rate": 7.60337069181269e-05, "loss": 1.2802, "step": 291 }, { "epoch": 0.046034339438759425, "grad_norm": 1.03125, "learning_rate": 7.602923737351762e-05, "loss": 1.2362, "step": 292 }, { "epoch": 0.04619199128615244, "grad_norm": 1.03125, "learning_rate": 7.602476787971301e-05, "loss": 1.1614, "step": 293 }, { "epoch": 0.04634964313354545, "grad_norm": 1.0546875, "learning_rate": 7.60202984367225e-05, "loss": 1.1971, "step": 294 }, { "epoch": 0.04650729498093846, "grad_norm": 1.0234375, "learning_rate": 7.601582904455563e-05, "loss": 1.3712, "step": 295 }, { "epoch": 0.04666494682833147, "grad_norm": 1.046875, "learning_rate": 7.601135970322184e-05, "loss": 1.251, "step": 296 }, { "epoch": 0.04682259867572448, "grad_norm": 1.046875, "learning_rate": 7.600689041273058e-05, "loss": 1.249, "step": 297 }, { "epoch": 0.046980250523117495, "grad_norm": 1.2109375, "learning_rate": 7.600242117309135e-05, "loss": 1.4933, "step": 298 }, { "epoch": 0.04713790237051051, "grad_norm": 1.015625, "learning_rate": 7.599795198431362e-05, "loss": 1.4416, "step": 299 }, { "epoch": 0.04729555421790352, "grad_norm": 1.078125, "learning_rate": 7.599348284640686e-05, "loss": 1.3171, "step": 300 }, { "epoch": 0.04745320606529653, "grad_norm": 2.9375, "learning_rate": 7.598901375938055e-05, "loss": 1.4638, "step": 301 }, { "epoch": 0.04761085791268954, "grad_norm": 1.1171875, "learning_rate": 7.598454472324416e-05, "loss": 1.3142, "step": 302 }, { "epoch": 0.04776850976008255, "grad_norm": 0.984375, "learning_rate": 7.598007573800713e-05, "loss": 1.4089, "step": 303 }, { "epoch": 0.047926161607475565, "grad_norm": 0.98828125, "learning_rate": 7.597560680367895e-05, "loss": 1.4437, "step": 304 }, { "epoch": 0.048083813454868576, "grad_norm": 1.0390625, "learning_rate": 7.597113792026913e-05, "loss": 1.2965, "step": 305 }, { "epoch": 0.04824146530226159, "grad_norm": 1.015625, "learning_rate": 7.596666908778709e-05, "loss": 1.4765, "step": 306 }, { "epoch": 0.0483991171496546, "grad_norm": 1.1484375, "learning_rate": 7.596220030624234e-05, "loss": 1.4994, "step": 307 }, { "epoch": 0.04855676899704761, "grad_norm": 1.03125, "learning_rate": 7.595773157564432e-05, "loss": 1.3982, "step": 308 }, { "epoch": 0.04871442084444062, "grad_norm": 0.93359375, "learning_rate": 7.595326289600248e-05, "loss": 1.1308, "step": 309 }, { "epoch": 0.048872072691833635, "grad_norm": 1.15625, "learning_rate": 7.594879426732636e-05, "loss": 1.3513, "step": 310 }, { "epoch": 0.049029724539226646, "grad_norm": 1.0390625, "learning_rate": 7.594432568962539e-05, "loss": 1.3763, "step": 311 }, { "epoch": 0.04918737638661966, "grad_norm": 1.140625, "learning_rate": 7.593985716290907e-05, "loss": 1.3615, "step": 312 }, { "epoch": 0.04934502823401267, "grad_norm": 0.99609375, "learning_rate": 7.593538868718683e-05, "loss": 1.2998, "step": 313 }, { "epoch": 0.04950268008140568, "grad_norm": 1.0078125, "learning_rate": 7.593092026246814e-05, "loss": 1.2916, "step": 314 }, { "epoch": 0.04966033192879869, "grad_norm": 1.15625, "learning_rate": 7.592645188876251e-05, "loss": 1.3156, "step": 315 }, { "epoch": 0.049817983776191704, "grad_norm": 1.0546875, "learning_rate": 7.592198356607937e-05, "loss": 1.2252, "step": 316 }, { "epoch": 0.049975635623584716, "grad_norm": 1.0078125, "learning_rate": 7.591751529442823e-05, "loss": 1.392, "step": 317 }, { "epoch": 0.05013328747097773, "grad_norm": 0.97265625, "learning_rate": 7.591304707381855e-05, "loss": 0.9968, "step": 318 }, { "epoch": 0.05029093931837074, "grad_norm": 1.078125, "learning_rate": 7.590857890425975e-05, "loss": 1.2946, "step": 319 }, { "epoch": 0.05044859116576375, "grad_norm": 0.9765625, "learning_rate": 7.590411078576137e-05, "loss": 1.1921, "step": 320 }, { "epoch": 0.05060624301315676, "grad_norm": 1.140625, "learning_rate": 7.589964271833286e-05, "loss": 1.2742, "step": 321 }, { "epoch": 0.050763894860549774, "grad_norm": 1.1640625, "learning_rate": 7.589517470198366e-05, "loss": 1.4172, "step": 322 }, { "epoch": 0.050921546707942786, "grad_norm": 1.0078125, "learning_rate": 7.589070673672327e-05, "loss": 1.405, "step": 323 }, { "epoch": 0.0510791985553358, "grad_norm": 1.046875, "learning_rate": 7.58862388225611e-05, "loss": 1.2666, "step": 324 }, { "epoch": 0.05123685040272881, "grad_norm": 1.0078125, "learning_rate": 7.588177095950673e-05, "loss": 1.4633, "step": 325 }, { "epoch": 0.05139450225012182, "grad_norm": 1.09375, "learning_rate": 7.587730314756954e-05, "loss": 1.2416, "step": 326 }, { "epoch": 0.05155215409751483, "grad_norm": 1.0, "learning_rate": 7.587283538675904e-05, "loss": 1.4502, "step": 327 }, { "epoch": 0.051709805944907844, "grad_norm": 1.046875, "learning_rate": 7.58683676770847e-05, "loss": 1.3921, "step": 328 }, { "epoch": 0.051867457792300856, "grad_norm": 1.3046875, "learning_rate": 7.586390001855591e-05, "loss": 1.236, "step": 329 }, { "epoch": 0.05202510963969387, "grad_norm": 0.9140625, "learning_rate": 7.585943241118227e-05, "loss": 1.1263, "step": 330 }, { "epoch": 0.05218276148708688, "grad_norm": 0.9140625, "learning_rate": 7.585496485497316e-05, "loss": 1.1593, "step": 331 }, { "epoch": 0.05234041333447989, "grad_norm": 1.0234375, "learning_rate": 7.585049734993809e-05, "loss": 1.3364, "step": 332 }, { "epoch": 0.0524980651818729, "grad_norm": 1.046875, "learning_rate": 7.584602989608651e-05, "loss": 1.1631, "step": 333 }, { "epoch": 0.052655717029265914, "grad_norm": 0.95703125, "learning_rate": 7.584156249342783e-05, "loss": 1.1316, "step": 334 }, { "epoch": 0.052813368876658925, "grad_norm": 1.3046875, "learning_rate": 7.583709514197162e-05, "loss": 1.2093, "step": 335 }, { "epoch": 0.05297102072405194, "grad_norm": 0.921875, "learning_rate": 7.583262784172733e-05, "loss": 1.2034, "step": 336 }, { "epoch": 0.05312867257144495, "grad_norm": 0.94921875, "learning_rate": 7.582816059270438e-05, "loss": 1.0787, "step": 337 }, { "epoch": 0.05328632441883796, "grad_norm": 0.984375, "learning_rate": 7.582369339491227e-05, "loss": 1.3247, "step": 338 }, { "epoch": 0.05344397626623097, "grad_norm": 1.1328125, "learning_rate": 7.581922624836045e-05, "loss": 1.4561, "step": 339 }, { "epoch": 0.053601628113623984, "grad_norm": 0.9375, "learning_rate": 7.58147591530584e-05, "loss": 1.226, "step": 340 }, { "epoch": 0.053759279961016995, "grad_norm": 1.046875, "learning_rate": 7.58102921090156e-05, "loss": 1.3249, "step": 341 }, { "epoch": 0.05391693180841001, "grad_norm": 0.9375, "learning_rate": 7.58058251162415e-05, "loss": 1.2785, "step": 342 }, { "epoch": 0.05407458365580302, "grad_norm": 1.078125, "learning_rate": 7.580135817474557e-05, "loss": 1.3761, "step": 343 }, { "epoch": 0.05423223550319603, "grad_norm": 1.125, "learning_rate": 7.579689128453723e-05, "loss": 1.3079, "step": 344 }, { "epoch": 0.05438988735058904, "grad_norm": 1.1875, "learning_rate": 7.579242444562605e-05, "loss": 1.3772, "step": 345 }, { "epoch": 0.05454753919798205, "grad_norm": 0.953125, "learning_rate": 7.578795765802143e-05, "loss": 1.0293, "step": 346 }, { "epoch": 0.054705191045375065, "grad_norm": 0.9453125, "learning_rate": 7.578349092173286e-05, "loss": 0.9691, "step": 347 }, { "epoch": 0.05486284289276808, "grad_norm": 1.0, "learning_rate": 7.577902423676978e-05, "loss": 1.2734, "step": 348 }, { "epoch": 0.05502049474016109, "grad_norm": 0.96484375, "learning_rate": 7.577455760314165e-05, "loss": 1.3084, "step": 349 }, { "epoch": 0.0551781465875541, "grad_norm": 1.0625, "learning_rate": 7.577009102085801e-05, "loss": 1.2957, "step": 350 }, { "epoch": 0.05533579843494711, "grad_norm": 1.015625, "learning_rate": 7.576562448992825e-05, "loss": 1.1018, "step": 351 }, { "epoch": 0.05549345028234013, "grad_norm": 1.0078125, "learning_rate": 7.576115801036187e-05, "loss": 1.1478, "step": 352 }, { "epoch": 0.05565110212973314, "grad_norm": 0.93359375, "learning_rate": 7.575669158216835e-05, "loss": 1.2483, "step": 353 }, { "epoch": 0.05580875397712615, "grad_norm": 1.5703125, "learning_rate": 7.575222520535709e-05, "loss": 1.1609, "step": 354 }, { "epoch": 0.055966405824519165, "grad_norm": 1.046875, "learning_rate": 7.574775887993764e-05, "loss": 1.1153, "step": 355 }, { "epoch": 0.05612405767191218, "grad_norm": 0.96875, "learning_rate": 7.574329260591942e-05, "loss": 1.2138, "step": 356 }, { "epoch": 0.05628170951930519, "grad_norm": 0.91015625, "learning_rate": 7.573882638331192e-05, "loss": 1.2048, "step": 357 }, { "epoch": 0.0564393613666982, "grad_norm": 0.8671875, "learning_rate": 7.573436021212456e-05, "loss": 1.2139, "step": 358 }, { "epoch": 0.05659701321409121, "grad_norm": 1.1171875, "learning_rate": 7.572989409236684e-05, "loss": 1.3339, "step": 359 }, { "epoch": 0.05675466506148422, "grad_norm": 0.94140625, "learning_rate": 7.572542802404825e-05, "loss": 1.181, "step": 360 }, { "epoch": 0.056912316908877235, "grad_norm": 1.1875, "learning_rate": 7.572096200717821e-05, "loss": 1.365, "step": 361 }, { "epoch": 0.057069968756270247, "grad_norm": 1.078125, "learning_rate": 7.571649604176622e-05, "loss": 1.3342, "step": 362 }, { "epoch": 0.05722762060366326, "grad_norm": 1.0, "learning_rate": 7.571203012782172e-05, "loss": 1.1962, "step": 363 }, { "epoch": 0.05738527245105627, "grad_norm": 0.8984375, "learning_rate": 7.570756426535414e-05, "loss": 1.0502, "step": 364 }, { "epoch": 0.05754292429844928, "grad_norm": 2.125, "learning_rate": 7.570309845437302e-05, "loss": 1.2771, "step": 365 }, { "epoch": 0.05770057614584229, "grad_norm": 1.0234375, "learning_rate": 7.569863269488782e-05, "loss": 1.2067, "step": 366 }, { "epoch": 0.057858227993235305, "grad_norm": 0.92578125, "learning_rate": 7.569416698690797e-05, "loss": 1.224, "step": 367 }, { "epoch": 0.058015879840628316, "grad_norm": 0.97265625, "learning_rate": 7.568970133044293e-05, "loss": 1.3076, "step": 368 }, { "epoch": 0.05817353168802133, "grad_norm": 1.0078125, "learning_rate": 7.568523572550219e-05, "loss": 1.2475, "step": 369 }, { "epoch": 0.05833118353541434, "grad_norm": 1.1484375, "learning_rate": 7.568077017209516e-05, "loss": 1.4433, "step": 370 }, { "epoch": 0.05848883538280735, "grad_norm": 1.03125, "learning_rate": 7.567630467023138e-05, "loss": 1.4507, "step": 371 }, { "epoch": 0.05864648723020036, "grad_norm": 0.9921875, "learning_rate": 7.567183921992031e-05, "loss": 1.2626, "step": 372 }, { "epoch": 0.058804139077593374, "grad_norm": 1.328125, "learning_rate": 7.566737382117136e-05, "loss": 1.2898, "step": 373 }, { "epoch": 0.058961790924986386, "grad_norm": 1.125, "learning_rate": 7.566290847399403e-05, "loss": 1.3465, "step": 374 }, { "epoch": 0.0591194427723794, "grad_norm": 2.15625, "learning_rate": 7.565844317839772e-05, "loss": 1.4528, "step": 375 }, { "epoch": 0.05927709461977241, "grad_norm": 1.0234375, "learning_rate": 7.565397793439201e-05, "loss": 1.2723, "step": 376 }, { "epoch": 0.05943474646716542, "grad_norm": 0.9921875, "learning_rate": 7.564951274198629e-05, "loss": 1.2087, "step": 377 }, { "epoch": 0.05959239831455843, "grad_norm": 0.94921875, "learning_rate": 7.564504760119005e-05, "loss": 1.1051, "step": 378 }, { "epoch": 0.059750050161951444, "grad_norm": 0.953125, "learning_rate": 7.564058251201272e-05, "loss": 1.3148, "step": 379 }, { "epoch": 0.059907702009344456, "grad_norm": 1.0, "learning_rate": 7.563611747446377e-05, "loss": 1.2887, "step": 380 }, { "epoch": 0.06006535385673747, "grad_norm": 0.8984375, "learning_rate": 7.56316524885527e-05, "loss": 1.0629, "step": 381 }, { "epoch": 0.06022300570413048, "grad_norm": 0.8984375, "learning_rate": 7.562718755428893e-05, "loss": 1.3577, "step": 382 }, { "epoch": 0.06038065755152349, "grad_norm": 1.0859375, "learning_rate": 7.562272267168195e-05, "loss": 1.1758, "step": 383 }, { "epoch": 0.0605383093989165, "grad_norm": 1.078125, "learning_rate": 7.561825784074122e-05, "loss": 1.1331, "step": 384 }, { "epoch": 0.060695961246309514, "grad_norm": 1.15625, "learning_rate": 7.561379306147619e-05, "loss": 1.6068, "step": 385 }, { "epoch": 0.060853613093702526, "grad_norm": 1.1328125, "learning_rate": 7.560932833389634e-05, "loss": 1.2707, "step": 386 }, { "epoch": 0.06101126494109554, "grad_norm": 1.0703125, "learning_rate": 7.560486365801113e-05, "loss": 1.3707, "step": 387 }, { "epoch": 0.06116891678848855, "grad_norm": 1.125, "learning_rate": 7.560039903383002e-05, "loss": 1.0712, "step": 388 }, { "epoch": 0.06132656863588156, "grad_norm": 0.95703125, "learning_rate": 7.559593446136248e-05, "loss": 1.1705, "step": 389 }, { "epoch": 0.06148422048327457, "grad_norm": 0.96875, "learning_rate": 7.559146994061792e-05, "loss": 1.2098, "step": 390 }, { "epoch": 0.061641872330667584, "grad_norm": 2.265625, "learning_rate": 7.558700547160587e-05, "loss": 1.0231, "step": 391 }, { "epoch": 0.061799524178060596, "grad_norm": 0.9921875, "learning_rate": 7.558254105433577e-05, "loss": 1.2373, "step": 392 }, { "epoch": 0.06195717602545361, "grad_norm": 1.0703125, "learning_rate": 7.557807668881707e-05, "loss": 1.2103, "step": 393 }, { "epoch": 0.06211482787284662, "grad_norm": 1.109375, "learning_rate": 7.557361237505926e-05, "loss": 1.5356, "step": 394 }, { "epoch": 0.06227247972023963, "grad_norm": 0.8984375, "learning_rate": 7.556914811307171e-05, "loss": 1.2645, "step": 395 }, { "epoch": 0.06243013156763264, "grad_norm": 1.1953125, "learning_rate": 7.556468390286402e-05, "loss": 1.5393, "step": 396 }, { "epoch": 0.06258778341502566, "grad_norm": 1.03125, "learning_rate": 7.55602197444456e-05, "loss": 1.3203, "step": 397 }, { "epoch": 0.06274543526241867, "grad_norm": 1.0, "learning_rate": 7.555575563782587e-05, "loss": 1.1366, "step": 398 }, { "epoch": 0.06290308710981168, "grad_norm": 1.1640625, "learning_rate": 7.555129158301432e-05, "loss": 1.2643, "step": 399 }, { "epoch": 0.0630607389572047, "grad_norm": 1.0078125, "learning_rate": 7.554682758002041e-05, "loss": 1.3032, "step": 400 }, { "epoch": 0.06321839080459771, "grad_norm": 1.15625, "learning_rate": 7.554236362885358e-05, "loss": 1.3687, "step": 401 }, { "epoch": 0.06337604265199072, "grad_norm": 1.0234375, "learning_rate": 7.553789972952335e-05, "loss": 1.2373, "step": 402 }, { "epoch": 0.06353369449938373, "grad_norm": 0.98828125, "learning_rate": 7.553343588203915e-05, "loss": 1.301, "step": 403 }, { "epoch": 0.06369134634677674, "grad_norm": 0.94921875, "learning_rate": 7.552897208641042e-05, "loss": 1.0514, "step": 404 }, { "epoch": 0.06384899819416975, "grad_norm": 0.95703125, "learning_rate": 7.55245083426466e-05, "loss": 1.2411, "step": 405 }, { "epoch": 0.06400665004156277, "grad_norm": 1.0546875, "learning_rate": 7.552004465075722e-05, "loss": 1.1168, "step": 406 }, { "epoch": 0.06416430188895578, "grad_norm": 1.0, "learning_rate": 7.551558101075171e-05, "loss": 1.1903, "step": 407 }, { "epoch": 0.06432195373634879, "grad_norm": 1.015625, "learning_rate": 7.551111742263953e-05, "loss": 1.3756, "step": 408 }, { "epoch": 0.0644796055837418, "grad_norm": 1.0625, "learning_rate": 7.550665388643013e-05, "loss": 1.1984, "step": 409 }, { "epoch": 0.06463725743113481, "grad_norm": 1.0546875, "learning_rate": 7.550219040213293e-05, "loss": 1.1656, "step": 410 }, { "epoch": 0.06479490927852782, "grad_norm": 1.0859375, "learning_rate": 7.549772696975749e-05, "loss": 1.2355, "step": 411 }, { "epoch": 0.06495256112592084, "grad_norm": 1.0625, "learning_rate": 7.549326358931321e-05, "loss": 1.3744, "step": 412 }, { "epoch": 0.06511021297331385, "grad_norm": 1.03125, "learning_rate": 7.548880026080956e-05, "loss": 1.1551, "step": 413 }, { "epoch": 0.06526786482070686, "grad_norm": 0.96484375, "learning_rate": 7.548433698425598e-05, "loss": 0.9827, "step": 414 }, { "epoch": 0.06542551666809987, "grad_norm": 1.03125, "learning_rate": 7.547987375966194e-05, "loss": 1.2771, "step": 415 }, { "epoch": 0.06558316851549288, "grad_norm": 0.97265625, "learning_rate": 7.547541058703692e-05, "loss": 1.0584, "step": 416 }, { "epoch": 0.0657408203628859, "grad_norm": 0.94140625, "learning_rate": 7.547094746639037e-05, "loss": 1.2938, "step": 417 }, { "epoch": 0.0658984722102789, "grad_norm": 0.9453125, "learning_rate": 7.546648439773175e-05, "loss": 1.1955, "step": 418 }, { "epoch": 0.06605612405767192, "grad_norm": 1.03125, "learning_rate": 7.546202138107052e-05, "loss": 1.2908, "step": 419 }, { "epoch": 0.06621377590506493, "grad_norm": 1.1015625, "learning_rate": 7.545755841641607e-05, "loss": 1.3614, "step": 420 }, { "epoch": 0.06637142775245794, "grad_norm": 1.078125, "learning_rate": 7.545309550377799e-05, "loss": 1.1311, "step": 421 }, { "epoch": 0.06652907959985095, "grad_norm": 1.0625, "learning_rate": 7.544863264316566e-05, "loss": 1.3877, "step": 422 }, { "epoch": 0.06668673144724396, "grad_norm": 0.99609375, "learning_rate": 7.544416983458855e-05, "loss": 1.2271, "step": 423 }, { "epoch": 0.06684438329463697, "grad_norm": 1.0234375, "learning_rate": 7.54397070780561e-05, "loss": 1.3025, "step": 424 }, { "epoch": 0.06700203514202999, "grad_norm": 0.890625, "learning_rate": 7.54352443735778e-05, "loss": 0.9175, "step": 425 }, { "epoch": 0.067159686989423, "grad_norm": 0.9609375, "learning_rate": 7.543078172116309e-05, "loss": 1.3899, "step": 426 }, { "epoch": 0.06731733883681601, "grad_norm": 0.98828125, "learning_rate": 7.542631912082145e-05, "loss": 1.2967, "step": 427 }, { "epoch": 0.06747499068420902, "grad_norm": 0.9453125, "learning_rate": 7.54218565725623e-05, "loss": 1.2848, "step": 428 }, { "epoch": 0.06763264253160203, "grad_norm": 1.125, "learning_rate": 7.541739407639516e-05, "loss": 1.3587, "step": 429 }, { "epoch": 0.06779029437899504, "grad_norm": 1.125, "learning_rate": 7.541293163232938e-05, "loss": 1.3439, "step": 430 }, { "epoch": 0.06794794622638806, "grad_norm": 1.015625, "learning_rate": 7.540846924037452e-05, "loss": 1.2732, "step": 431 }, { "epoch": 0.06810559807378107, "grad_norm": 0.87109375, "learning_rate": 7.540400690054002e-05, "loss": 1.112, "step": 432 }, { "epoch": 0.06826324992117408, "grad_norm": 1.0078125, "learning_rate": 7.539954461283531e-05, "loss": 1.2712, "step": 433 }, { "epoch": 0.06842090176856709, "grad_norm": 1.125, "learning_rate": 7.539508237726986e-05, "loss": 1.3191, "step": 434 }, { "epoch": 0.0685785536159601, "grad_norm": 1.0234375, "learning_rate": 7.53906201938531e-05, "loss": 1.14, "step": 435 }, { "epoch": 0.06873620546335311, "grad_norm": 0.84765625, "learning_rate": 7.538615806259455e-05, "loss": 0.9618, "step": 436 }, { "epoch": 0.06889385731074613, "grad_norm": 1.28125, "learning_rate": 7.538169598350362e-05, "loss": 1.4627, "step": 437 }, { "epoch": 0.06905150915813914, "grad_norm": 1.109375, "learning_rate": 7.537723395658979e-05, "loss": 1.1721, "step": 438 }, { "epoch": 0.06920916100553215, "grad_norm": 1.015625, "learning_rate": 7.53727719818625e-05, "loss": 1.2041, "step": 439 }, { "epoch": 0.06936681285292516, "grad_norm": 1.0546875, "learning_rate": 7.536831005933116e-05, "loss": 1.144, "step": 440 }, { "epoch": 0.06952446470031817, "grad_norm": 1.03125, "learning_rate": 7.536384818900534e-05, "loss": 1.3357, "step": 441 }, { "epoch": 0.06968211654771118, "grad_norm": 0.96875, "learning_rate": 7.535938637089442e-05, "loss": 1.1101, "step": 442 }, { "epoch": 0.0698397683951042, "grad_norm": 0.9453125, "learning_rate": 7.535492460500788e-05, "loss": 1.4905, "step": 443 }, { "epoch": 0.06999742024249721, "grad_norm": 0.9921875, "learning_rate": 7.535046289135517e-05, "loss": 1.2318, "step": 444 }, { "epoch": 0.07015507208989022, "grad_norm": 1.015625, "learning_rate": 7.534600122994572e-05, "loss": 1.258, "step": 445 }, { "epoch": 0.07031272393728323, "grad_norm": 1.0078125, "learning_rate": 7.534153962078903e-05, "loss": 1.2017, "step": 446 }, { "epoch": 0.07047037578467624, "grad_norm": 0.94921875, "learning_rate": 7.533707806389456e-05, "loss": 1.1496, "step": 447 }, { "epoch": 0.07062802763206925, "grad_norm": 0.92578125, "learning_rate": 7.533261655927171e-05, "loss": 1.1001, "step": 448 }, { "epoch": 0.07078567947946227, "grad_norm": 1.0234375, "learning_rate": 7.532815510692997e-05, "loss": 1.3017, "step": 449 }, { "epoch": 0.07094333132685528, "grad_norm": 1.3671875, "learning_rate": 7.532369370687879e-05, "loss": 1.4135, "step": 450 }, { "epoch": 0.07110098317424829, "grad_norm": 1.078125, "learning_rate": 7.531923235912764e-05, "loss": 1.1465, "step": 451 }, { "epoch": 0.0712586350216413, "grad_norm": 1.1328125, "learning_rate": 7.531477106368597e-05, "loss": 1.1826, "step": 452 }, { "epoch": 0.07141628686903431, "grad_norm": 1.078125, "learning_rate": 7.531030982056324e-05, "loss": 1.2396, "step": 453 }, { "epoch": 0.07157393871642732, "grad_norm": 0.9296875, "learning_rate": 7.53058486297689e-05, "loss": 1.2488, "step": 454 }, { "epoch": 0.07173159056382034, "grad_norm": 1.0390625, "learning_rate": 7.530138749131236e-05, "loss": 1.3044, "step": 455 }, { "epoch": 0.07188924241121335, "grad_norm": 0.98828125, "learning_rate": 7.529692640520315e-05, "loss": 1.3514, "step": 456 }, { "epoch": 0.07204689425860636, "grad_norm": 0.984375, "learning_rate": 7.52924653714507e-05, "loss": 1.2322, "step": 457 }, { "epoch": 0.07220454610599937, "grad_norm": 0.91796875, "learning_rate": 7.528800439006444e-05, "loss": 1.0827, "step": 458 }, { "epoch": 0.07236219795339238, "grad_norm": 1.015625, "learning_rate": 7.528354346105387e-05, "loss": 1.2809, "step": 459 }, { "epoch": 0.0725198498007854, "grad_norm": 1.0234375, "learning_rate": 7.527908258442836e-05, "loss": 1.2996, "step": 460 }, { "epoch": 0.0726775016481784, "grad_norm": 0.96875, "learning_rate": 7.527462176019746e-05, "loss": 1.1359, "step": 461 }, { "epoch": 0.07283515349557142, "grad_norm": 0.9609375, "learning_rate": 7.52701609883706e-05, "loss": 1.3207, "step": 462 }, { "epoch": 0.07299280534296443, "grad_norm": 1.765625, "learning_rate": 7.52657002689572e-05, "loss": 1.4157, "step": 463 }, { "epoch": 0.07315045719035744, "grad_norm": 1.046875, "learning_rate": 7.526123960196676e-05, "loss": 1.1813, "step": 464 }, { "epoch": 0.07330810903775045, "grad_norm": 0.890625, "learning_rate": 7.525677898740868e-05, "loss": 1.3201, "step": 465 }, { "epoch": 0.07346576088514346, "grad_norm": 0.87109375, "learning_rate": 7.525231842529244e-05, "loss": 1.2264, "step": 466 }, { "epoch": 0.07362341273253648, "grad_norm": 1.0, "learning_rate": 7.524785791562752e-05, "loss": 1.181, "step": 467 }, { "epoch": 0.07378106457992949, "grad_norm": 1.046875, "learning_rate": 7.524339745842332e-05, "loss": 1.3327, "step": 468 }, { "epoch": 0.0739387164273225, "grad_norm": 0.9765625, "learning_rate": 7.523893705368935e-05, "loss": 1.3032, "step": 469 }, { "epoch": 0.07409636827471551, "grad_norm": 0.98828125, "learning_rate": 7.523447670143502e-05, "loss": 1.1823, "step": 470 }, { "epoch": 0.07425402012210852, "grad_norm": 0.94921875, "learning_rate": 7.523001640166983e-05, "loss": 1.234, "step": 471 }, { "epoch": 0.07441167196950153, "grad_norm": 0.95703125, "learning_rate": 7.522555615440318e-05, "loss": 1.256, "step": 472 }, { "epoch": 0.07456932381689454, "grad_norm": 1.078125, "learning_rate": 7.522109595964456e-05, "loss": 1.3037, "step": 473 }, { "epoch": 0.07472697566428756, "grad_norm": 0.9453125, "learning_rate": 7.521663581740341e-05, "loss": 1.2517, "step": 474 }, { "epoch": 0.07488462751168057, "grad_norm": 0.93359375, "learning_rate": 7.521217572768914e-05, "loss": 1.2364, "step": 475 }, { "epoch": 0.07504227935907358, "grad_norm": 1.0625, "learning_rate": 7.520771569051129e-05, "loss": 1.325, "step": 476 }, { "epoch": 0.07519993120646659, "grad_norm": 1.015625, "learning_rate": 7.520325570587927e-05, "loss": 1.1998, "step": 477 }, { "epoch": 0.0753575830538596, "grad_norm": 1.0390625, "learning_rate": 7.519879577380253e-05, "loss": 1.2116, "step": 478 }, { "epoch": 0.07551523490125261, "grad_norm": 0.98046875, "learning_rate": 7.519433589429053e-05, "loss": 1.2443, "step": 479 }, { "epoch": 0.07567288674864563, "grad_norm": 1.0234375, "learning_rate": 7.518987606735267e-05, "loss": 1.1512, "step": 480 }, { "epoch": 0.07583053859603864, "grad_norm": 0.88671875, "learning_rate": 7.51854162929985e-05, "loss": 1.0509, "step": 481 }, { "epoch": 0.07598819044343165, "grad_norm": 1.0859375, "learning_rate": 7.518095657123742e-05, "loss": 1.4288, "step": 482 }, { "epoch": 0.07614584229082466, "grad_norm": 1.1171875, "learning_rate": 7.517649690207888e-05, "loss": 1.6021, "step": 483 }, { "epoch": 0.07630349413821767, "grad_norm": 0.98828125, "learning_rate": 7.517203728553233e-05, "loss": 1.1761, "step": 484 }, { "epoch": 0.07646114598561068, "grad_norm": 1.0234375, "learning_rate": 7.51675777216072e-05, "loss": 1.043, "step": 485 }, { "epoch": 0.0766187978330037, "grad_norm": 1.0078125, "learning_rate": 7.516311821031298e-05, "loss": 1.1508, "step": 486 }, { "epoch": 0.07677644968039671, "grad_norm": 1.0625, "learning_rate": 7.515865875165915e-05, "loss": 1.1291, "step": 487 }, { "epoch": 0.07693410152778972, "grad_norm": 1.0234375, "learning_rate": 7.51541993456551e-05, "loss": 1.2817, "step": 488 }, { "epoch": 0.07709175337518273, "grad_norm": 0.94140625, "learning_rate": 7.514973999231032e-05, "loss": 1.1202, "step": 489 }, { "epoch": 0.07724940522257574, "grad_norm": 1.03125, "learning_rate": 7.514528069163423e-05, "loss": 1.319, "step": 490 }, { "epoch": 0.07740705706996875, "grad_norm": 1.0390625, "learning_rate": 7.514082144363629e-05, "loss": 1.3601, "step": 491 }, { "epoch": 0.07756470891736177, "grad_norm": 0.98828125, "learning_rate": 7.513636224832597e-05, "loss": 1.2541, "step": 492 }, { "epoch": 0.07772236076475478, "grad_norm": 1.078125, "learning_rate": 7.513190310571271e-05, "loss": 1.4006, "step": 493 }, { "epoch": 0.07788001261214779, "grad_norm": 0.87890625, "learning_rate": 7.512744401580598e-05, "loss": 1.0685, "step": 494 }, { "epoch": 0.0780376644595408, "grad_norm": 0.94921875, "learning_rate": 7.512298497861514e-05, "loss": 1.4167, "step": 495 }, { "epoch": 0.07819531630693381, "grad_norm": 0.984375, "learning_rate": 7.511852599414977e-05, "loss": 1.0967, "step": 496 }, { "epoch": 0.07835296815432682, "grad_norm": 1.1015625, "learning_rate": 7.511406706241924e-05, "loss": 1.4225, "step": 497 }, { "epoch": 0.07851062000171984, "grad_norm": 1.40625, "learning_rate": 7.510960818343305e-05, "loss": 1.3862, "step": 498 }, { "epoch": 0.07866827184911285, "grad_norm": 1.0859375, "learning_rate": 7.51051493572006e-05, "loss": 1.119, "step": 499 }, { "epoch": 0.07882592369650586, "grad_norm": 1.140625, "learning_rate": 7.510069058373135e-05, "loss": 1.2952, "step": 500 }, { "epoch": 0.07898357554389887, "grad_norm": 1.078125, "learning_rate": 7.509623186303478e-05, "loss": 1.5034, "step": 501 }, { "epoch": 0.07914122739129188, "grad_norm": 0.94921875, "learning_rate": 7.509177319512033e-05, "loss": 1.0635, "step": 502 }, { "epoch": 0.0792988792386849, "grad_norm": 0.99609375, "learning_rate": 7.508731457999745e-05, "loss": 1.1524, "step": 503 }, { "epoch": 0.0794565310860779, "grad_norm": 0.97265625, "learning_rate": 7.508285601767558e-05, "loss": 1.2952, "step": 504 }, { "epoch": 0.07961418293347092, "grad_norm": 0.97265625, "learning_rate": 7.507839750816412e-05, "loss": 1.374, "step": 505 }, { "epoch": 0.07977183478086393, "grad_norm": 1.1015625, "learning_rate": 7.507393905147261e-05, "loss": 1.2584, "step": 506 }, { "epoch": 0.07992948662825694, "grad_norm": 0.97265625, "learning_rate": 7.506948064761048e-05, "loss": 1.0858, "step": 507 }, { "epoch": 0.08008713847564995, "grad_norm": 1.03125, "learning_rate": 7.506502229658716e-05, "loss": 1.4086, "step": 508 }, { "epoch": 0.08024479032304296, "grad_norm": 1.0703125, "learning_rate": 7.506056399841209e-05, "loss": 1.4715, "step": 509 }, { "epoch": 0.08040244217043598, "grad_norm": 1.03125, "learning_rate": 7.505610575309472e-05, "loss": 1.3469, "step": 510 }, { "epoch": 0.08056009401782899, "grad_norm": 0.984375, "learning_rate": 7.505164756064451e-05, "loss": 1.1889, "step": 511 }, { "epoch": 0.080717745865222, "grad_norm": 1.078125, "learning_rate": 7.504718942107091e-05, "loss": 1.5048, "step": 512 }, { "epoch": 0.08087539771261501, "grad_norm": 0.890625, "learning_rate": 7.504273133438337e-05, "loss": 1.322, "step": 513 }, { "epoch": 0.08103304956000802, "grad_norm": 1.0234375, "learning_rate": 7.503827330059134e-05, "loss": 1.2944, "step": 514 }, { "epoch": 0.08119070140740103, "grad_norm": 1.078125, "learning_rate": 7.503381531970427e-05, "loss": 1.2741, "step": 515 }, { "epoch": 0.08134835325479405, "grad_norm": 1.015625, "learning_rate": 7.502935739173154e-05, "loss": 1.4, "step": 516 }, { "epoch": 0.08150600510218706, "grad_norm": 1.015625, "learning_rate": 7.502489951668272e-05, "loss": 1.3554, "step": 517 }, { "epoch": 0.08166365694958007, "grad_norm": 1.0078125, "learning_rate": 7.502044169456719e-05, "loss": 1.2887, "step": 518 }, { "epoch": 0.08182130879697308, "grad_norm": 1.03125, "learning_rate": 7.50159839253944e-05, "loss": 1.1278, "step": 519 }, { "epoch": 0.08197896064436609, "grad_norm": 1.03125, "learning_rate": 7.501152620917383e-05, "loss": 1.2579, "step": 520 }, { "epoch": 0.0821366124917591, "grad_norm": 1.0078125, "learning_rate": 7.500706854591483e-05, "loss": 1.3041, "step": 521 }, { "epoch": 0.08229426433915212, "grad_norm": 0.82421875, "learning_rate": 7.500261093562697e-05, "loss": 0.9188, "step": 522 }, { "epoch": 0.08245191618654513, "grad_norm": 1.0078125, "learning_rate": 7.499815337831966e-05, "loss": 1.1688, "step": 523 }, { "epoch": 0.08260956803393814, "grad_norm": 0.9921875, "learning_rate": 7.49936958740023e-05, "loss": 1.103, "step": 524 }, { "epoch": 0.08276721988133115, "grad_norm": 1.046875, "learning_rate": 7.498923842268441e-05, "loss": 1.1761, "step": 525 }, { "epoch": 0.08292487172872416, "grad_norm": 1.0078125, "learning_rate": 7.498478102437534e-05, "loss": 1.1771, "step": 526 }, { "epoch": 0.08308252357611717, "grad_norm": 0.9375, "learning_rate": 7.498032367908465e-05, "loss": 1.1592, "step": 527 }, { "epoch": 0.08324017542351018, "grad_norm": 1.0078125, "learning_rate": 7.497586638682172e-05, "loss": 1.4756, "step": 528 }, { "epoch": 0.0833978272709032, "grad_norm": 0.9921875, "learning_rate": 7.497140914759602e-05, "loss": 1.3079, "step": 529 }, { "epoch": 0.08355547911829621, "grad_norm": 1.0703125, "learning_rate": 7.496695196141699e-05, "loss": 1.2843, "step": 530 }, { "epoch": 0.08371313096568922, "grad_norm": 0.921875, "learning_rate": 7.496249482829405e-05, "loss": 1.2504, "step": 531 }, { "epoch": 0.08387078281308223, "grad_norm": 1.046875, "learning_rate": 7.495803774823669e-05, "loss": 1.2643, "step": 532 }, { "epoch": 0.08402843466047524, "grad_norm": 1.0078125, "learning_rate": 7.495358072125434e-05, "loss": 0.9953, "step": 533 }, { "epoch": 0.08418608650786825, "grad_norm": 0.984375, "learning_rate": 7.494912374735643e-05, "loss": 0.9915, "step": 534 }, { "epoch": 0.08434373835526127, "grad_norm": 0.859375, "learning_rate": 7.494466682655241e-05, "loss": 1.065, "step": 535 }, { "epoch": 0.08450139020265428, "grad_norm": 0.86328125, "learning_rate": 7.494020995885175e-05, "loss": 1.1784, "step": 536 }, { "epoch": 0.08465904205004729, "grad_norm": 1.0078125, "learning_rate": 7.493575314426389e-05, "loss": 1.0711, "step": 537 }, { "epoch": 0.0848166938974403, "grad_norm": 1.0546875, "learning_rate": 7.493129638279827e-05, "loss": 1.2725, "step": 538 }, { "epoch": 0.08497434574483331, "grad_norm": 0.9375, "learning_rate": 7.492683967446434e-05, "loss": 1.114, "step": 539 }, { "epoch": 0.08513199759222632, "grad_norm": 0.98046875, "learning_rate": 7.492238301927153e-05, "loss": 1.2806, "step": 540 }, { "epoch": 0.08528964943961934, "grad_norm": 1.125, "learning_rate": 7.491792641722925e-05, "loss": 1.1968, "step": 541 }, { "epoch": 0.08544730128701235, "grad_norm": 0.984375, "learning_rate": 7.491346986834705e-05, "loss": 1.2989, "step": 542 }, { "epoch": 0.08560495313440536, "grad_norm": 1.0390625, "learning_rate": 7.490901337263432e-05, "loss": 1.4634, "step": 543 }, { "epoch": 0.08576260498179837, "grad_norm": 1.09375, "learning_rate": 7.490455693010048e-05, "loss": 1.2581, "step": 544 }, { "epoch": 0.08592025682919138, "grad_norm": 0.99609375, "learning_rate": 7.490010054075502e-05, "loss": 1.2082, "step": 545 }, { "epoch": 0.0860779086765844, "grad_norm": 1.0859375, "learning_rate": 7.489564420460731e-05, "loss": 1.4202, "step": 546 }, { "epoch": 0.0862355605239774, "grad_norm": 0.98828125, "learning_rate": 7.489118792166687e-05, "loss": 1.2088, "step": 547 }, { "epoch": 0.08639321237137043, "grad_norm": 1.0234375, "learning_rate": 7.488673169194316e-05, "loss": 1.2706, "step": 548 }, { "epoch": 0.08655086421876344, "grad_norm": 1.03125, "learning_rate": 7.488227551544556e-05, "loss": 1.1202, "step": 549 }, { "epoch": 0.08670851606615645, "grad_norm": 1.0546875, "learning_rate": 7.487781939218355e-05, "loss": 1.5892, "step": 550 }, { "epoch": 0.08686616791354947, "grad_norm": 0.9921875, "learning_rate": 7.487336332216654e-05, "loss": 1.1621, "step": 551 }, { "epoch": 0.08702381976094248, "grad_norm": 1.078125, "learning_rate": 7.486890730540403e-05, "loss": 1.1864, "step": 552 }, { "epoch": 0.08718147160833549, "grad_norm": 0.890625, "learning_rate": 7.486445134190541e-05, "loss": 1.156, "step": 553 }, { "epoch": 0.0873391234557285, "grad_norm": 0.88671875, "learning_rate": 7.485999543168017e-05, "loss": 1.0731, "step": 554 }, { "epoch": 0.08749677530312151, "grad_norm": 1.140625, "learning_rate": 7.485553957473771e-05, "loss": 1.4862, "step": 555 }, { "epoch": 0.08765442715051452, "grad_norm": 1.015625, "learning_rate": 7.48510837710875e-05, "loss": 1.1578, "step": 556 }, { "epoch": 0.08781207899790754, "grad_norm": 0.99609375, "learning_rate": 7.4846628020739e-05, "loss": 1.3176, "step": 557 }, { "epoch": 0.08796973084530055, "grad_norm": 0.90625, "learning_rate": 7.484217232370163e-05, "loss": 1.1076, "step": 558 }, { "epoch": 0.08812738269269356, "grad_norm": 0.9140625, "learning_rate": 7.483771667998485e-05, "loss": 1.0937, "step": 559 }, { "epoch": 0.08828503454008657, "grad_norm": 1.03125, "learning_rate": 7.483326108959807e-05, "loss": 1.1822, "step": 560 }, { "epoch": 0.08844268638747958, "grad_norm": 1.078125, "learning_rate": 7.482880555255073e-05, "loss": 1.2227, "step": 561 }, { "epoch": 0.0886003382348726, "grad_norm": 1.0234375, "learning_rate": 7.482435006885234e-05, "loss": 1.2702, "step": 562 }, { "epoch": 0.0887579900822656, "grad_norm": 0.98828125, "learning_rate": 7.481989463851228e-05, "loss": 1.1449, "step": 563 }, { "epoch": 0.08891564192965862, "grad_norm": 1.0390625, "learning_rate": 7.481543926154005e-05, "loss": 1.3391, "step": 564 }, { "epoch": 0.08907329377705163, "grad_norm": 0.9296875, "learning_rate": 7.481098393794502e-05, "loss": 1.2093, "step": 565 }, { "epoch": 0.08923094562444464, "grad_norm": 0.99609375, "learning_rate": 7.480652866773665e-05, "loss": 1.3015, "step": 566 }, { "epoch": 0.08938859747183765, "grad_norm": 0.9453125, "learning_rate": 7.480207345092443e-05, "loss": 1.1732, "step": 567 }, { "epoch": 0.08954624931923066, "grad_norm": 0.9921875, "learning_rate": 7.479761828751779e-05, "loss": 1.1943, "step": 568 }, { "epoch": 0.08970390116662368, "grad_norm": 0.92578125, "learning_rate": 7.479316317752615e-05, "loss": 1.0889, "step": 569 }, { "epoch": 0.08986155301401669, "grad_norm": 0.9296875, "learning_rate": 7.478870812095895e-05, "loss": 1.3222, "step": 570 }, { "epoch": 0.0900192048614097, "grad_norm": 1.0234375, "learning_rate": 7.478425311782564e-05, "loss": 1.2801, "step": 571 }, { "epoch": 0.09017685670880271, "grad_norm": 0.87890625, "learning_rate": 7.477979816813565e-05, "loss": 0.9339, "step": 572 }, { "epoch": 0.09033450855619572, "grad_norm": 0.94921875, "learning_rate": 7.477534327189847e-05, "loss": 1.2102, "step": 573 }, { "epoch": 0.09049216040358873, "grad_norm": 0.8515625, "learning_rate": 7.47708884291235e-05, "loss": 1.1488, "step": 574 }, { "epoch": 0.09064981225098175, "grad_norm": 1.0078125, "learning_rate": 7.476643363982019e-05, "loss": 1.1672, "step": 575 }, { "epoch": 0.09080746409837476, "grad_norm": 1.0, "learning_rate": 7.476197890399797e-05, "loss": 1.1126, "step": 576 }, { "epoch": 0.09096511594576777, "grad_norm": 0.9921875, "learning_rate": 7.47575242216663e-05, "loss": 1.2595, "step": 577 }, { "epoch": 0.09112276779316078, "grad_norm": 1.015625, "learning_rate": 7.475306959283461e-05, "loss": 1.4458, "step": 578 }, { "epoch": 0.09128041964055379, "grad_norm": 0.96484375, "learning_rate": 7.474861501751236e-05, "loss": 1.3733, "step": 579 }, { "epoch": 0.0914380714879468, "grad_norm": 0.98828125, "learning_rate": 7.474416049570898e-05, "loss": 1.2634, "step": 580 }, { "epoch": 0.09159572333533982, "grad_norm": 0.91015625, "learning_rate": 7.473970602743385e-05, "loss": 1.1116, "step": 581 }, { "epoch": 0.09175337518273283, "grad_norm": 0.9140625, "learning_rate": 7.473525161269653e-05, "loss": 1.2457, "step": 582 }, { "epoch": 0.09191102703012584, "grad_norm": 1.09375, "learning_rate": 7.47307972515064e-05, "loss": 1.0115, "step": 583 }, { "epoch": 0.09206867887751885, "grad_norm": 1.109375, "learning_rate": 7.472634294387289e-05, "loss": 1.1516, "step": 584 }, { "epoch": 0.09222633072491186, "grad_norm": 1.046875, "learning_rate": 7.472188868980545e-05, "loss": 1.3839, "step": 585 }, { "epoch": 0.09238398257230487, "grad_norm": 0.96484375, "learning_rate": 7.471743448931347e-05, "loss": 1.1989, "step": 586 }, { "epoch": 0.09254163441969789, "grad_norm": 1.1015625, "learning_rate": 7.471298034240651e-05, "loss": 1.3703, "step": 587 }, { "epoch": 0.0926992862670909, "grad_norm": 1.0546875, "learning_rate": 7.470852624909393e-05, "loss": 1.2874, "step": 588 }, { "epoch": 0.09285693811448391, "grad_norm": 0.94921875, "learning_rate": 7.47040722093852e-05, "loss": 1.1757, "step": 589 }, { "epoch": 0.09301458996187692, "grad_norm": 0.8046875, "learning_rate": 7.469961822328972e-05, "loss": 0.9003, "step": 590 }, { "epoch": 0.09317224180926993, "grad_norm": 1.0703125, "learning_rate": 7.469516429081692e-05, "loss": 1.4065, "step": 591 }, { "epoch": 0.09332989365666294, "grad_norm": 0.9375, "learning_rate": 7.469071041197631e-05, "loss": 1.1492, "step": 592 }, { "epoch": 0.09348754550405595, "grad_norm": 0.91015625, "learning_rate": 7.46862565867773e-05, "loss": 1.1621, "step": 593 }, { "epoch": 0.09364519735144897, "grad_norm": 0.9921875, "learning_rate": 7.468180281522932e-05, "loss": 1.1522, "step": 594 }, { "epoch": 0.09380284919884198, "grad_norm": 0.92578125, "learning_rate": 7.46773490973418e-05, "loss": 1.2551, "step": 595 }, { "epoch": 0.09396050104623499, "grad_norm": 1.1484375, "learning_rate": 7.467289543312419e-05, "loss": 1.2196, "step": 596 }, { "epoch": 0.094118152893628, "grad_norm": 1.1015625, "learning_rate": 7.466844182258595e-05, "loss": 1.2214, "step": 597 }, { "epoch": 0.09427580474102101, "grad_norm": 0.96484375, "learning_rate": 7.466398826573648e-05, "loss": 1.3309, "step": 598 }, { "epoch": 0.09443345658841402, "grad_norm": 0.96484375, "learning_rate": 7.465953476258525e-05, "loss": 1.1182, "step": 599 }, { "epoch": 0.09459110843580704, "grad_norm": 0.98828125, "learning_rate": 7.46550813131417e-05, "loss": 1.2693, "step": 600 }, { "epoch": 0.09474876028320005, "grad_norm": 0.93359375, "learning_rate": 7.465062791741519e-05, "loss": 1.038, "step": 601 }, { "epoch": 0.09490641213059306, "grad_norm": 0.95703125, "learning_rate": 7.464617457541527e-05, "loss": 1.0602, "step": 602 }, { "epoch": 0.09506406397798607, "grad_norm": 1.015625, "learning_rate": 7.464172128715135e-05, "loss": 1.126, "step": 603 }, { "epoch": 0.09522171582537908, "grad_norm": 0.984375, "learning_rate": 7.463726805263285e-05, "loss": 1.2795, "step": 604 }, { "epoch": 0.0953793676727721, "grad_norm": 1.0703125, "learning_rate": 7.46328148718692e-05, "loss": 1.4452, "step": 605 }, { "epoch": 0.0955370195201651, "grad_norm": 1.0078125, "learning_rate": 7.462836174486982e-05, "loss": 1.1347, "step": 606 }, { "epoch": 0.09569467136755812, "grad_norm": 1.0546875, "learning_rate": 7.462390867164422e-05, "loss": 1.3051, "step": 607 }, { "epoch": 0.09585232321495113, "grad_norm": 1.1015625, "learning_rate": 7.461945565220179e-05, "loss": 1.339, "step": 608 }, { "epoch": 0.09600997506234414, "grad_norm": 1.0, "learning_rate": 7.461500268655197e-05, "loss": 1.2509, "step": 609 }, { "epoch": 0.09616762690973715, "grad_norm": 1.0625, "learning_rate": 7.461054977470419e-05, "loss": 1.4698, "step": 610 }, { "epoch": 0.09632527875713016, "grad_norm": 0.859375, "learning_rate": 7.460609691666787e-05, "loss": 1.1182, "step": 611 }, { "epoch": 0.09648293060452318, "grad_norm": 0.9453125, "learning_rate": 7.460164411245252e-05, "loss": 1.0848, "step": 612 }, { "epoch": 0.09664058245191619, "grad_norm": 1.0625, "learning_rate": 7.459719136206753e-05, "loss": 1.1935, "step": 613 }, { "epoch": 0.0967982342993092, "grad_norm": 0.98828125, "learning_rate": 7.459273866552234e-05, "loss": 1.1229, "step": 614 }, { "epoch": 0.09695588614670221, "grad_norm": 0.93359375, "learning_rate": 7.458828602282639e-05, "loss": 1.2689, "step": 615 }, { "epoch": 0.09711353799409522, "grad_norm": 0.984375, "learning_rate": 7.458383343398909e-05, "loss": 1.3101, "step": 616 }, { "epoch": 0.09727118984148823, "grad_norm": 1.09375, "learning_rate": 7.457938089901992e-05, "loss": 1.3334, "step": 617 }, { "epoch": 0.09742884168888125, "grad_norm": 1.140625, "learning_rate": 7.457492841792832e-05, "loss": 1.3037, "step": 618 }, { "epoch": 0.09758649353627426, "grad_norm": 0.96875, "learning_rate": 7.457047599072368e-05, "loss": 1.1142, "step": 619 }, { "epoch": 0.09774414538366727, "grad_norm": 0.99609375, "learning_rate": 7.456602361741547e-05, "loss": 1.1003, "step": 620 }, { "epoch": 0.09790179723106028, "grad_norm": 1.015625, "learning_rate": 7.45615712980131e-05, "loss": 1.4077, "step": 621 }, { "epoch": 0.09805944907845329, "grad_norm": 0.85546875, "learning_rate": 7.455711903252605e-05, "loss": 0.9765, "step": 622 }, { "epoch": 0.0982171009258463, "grad_norm": 1.0234375, "learning_rate": 7.455266682096374e-05, "loss": 1.2055, "step": 623 }, { "epoch": 0.09837475277323932, "grad_norm": 1.1328125, "learning_rate": 7.454821466333557e-05, "loss": 1.1113, "step": 624 }, { "epoch": 0.09853240462063233, "grad_norm": 0.9296875, "learning_rate": 7.454376255965102e-05, "loss": 1.0605, "step": 625 }, { "epoch": 0.09869005646802534, "grad_norm": 1.0234375, "learning_rate": 7.453931050991948e-05, "loss": 1.1806, "step": 626 }, { "epoch": 0.09884770831541835, "grad_norm": 0.98046875, "learning_rate": 7.453485851415045e-05, "loss": 1.1077, "step": 627 }, { "epoch": 0.09900536016281136, "grad_norm": 0.96484375, "learning_rate": 7.453040657235333e-05, "loss": 1.4821, "step": 628 }, { "epoch": 0.09916301201020437, "grad_norm": 0.859375, "learning_rate": 7.452595468453756e-05, "loss": 1.0574, "step": 629 }, { "epoch": 0.09932066385759739, "grad_norm": 0.9296875, "learning_rate": 7.452150285071258e-05, "loss": 1.1567, "step": 630 }, { "epoch": 0.0994783157049904, "grad_norm": 0.97265625, "learning_rate": 7.451705107088777e-05, "loss": 1.3568, "step": 631 }, { "epoch": 0.09963596755238341, "grad_norm": 0.88671875, "learning_rate": 7.451259934507266e-05, "loss": 1.2442, "step": 632 }, { "epoch": 0.09979361939977642, "grad_norm": 0.96875, "learning_rate": 7.450814767327663e-05, "loss": 1.0539, "step": 633 }, { "epoch": 0.09995127124716943, "grad_norm": 0.88671875, "learning_rate": 7.450369605550913e-05, "loss": 1.1088, "step": 634 }, { "epoch": 0.10010892309456244, "grad_norm": 1.0078125, "learning_rate": 7.449924449177958e-05, "loss": 1.0329, "step": 635 }, { "epoch": 0.10026657494195546, "grad_norm": 1.171875, "learning_rate": 7.449479298209741e-05, "loss": 1.3813, "step": 636 }, { "epoch": 0.10042422678934847, "grad_norm": 0.98046875, "learning_rate": 7.44903415264721e-05, "loss": 1.2364, "step": 637 }, { "epoch": 0.10058187863674148, "grad_norm": 1.0, "learning_rate": 7.448589012491303e-05, "loss": 1.3175, "step": 638 }, { "epoch": 0.10073953048413449, "grad_norm": 0.9609375, "learning_rate": 7.448143877742965e-05, "loss": 1.0257, "step": 639 }, { "epoch": 0.1008971823315275, "grad_norm": 1.0, "learning_rate": 7.447698748403142e-05, "loss": 1.3068, "step": 640 }, { "epoch": 0.10105483417892051, "grad_norm": 0.9609375, "learning_rate": 7.447253624472774e-05, "loss": 1.3199, "step": 641 }, { "epoch": 0.10121248602631353, "grad_norm": 0.87890625, "learning_rate": 7.446808505952807e-05, "loss": 1.1259, "step": 642 }, { "epoch": 0.10137013787370654, "grad_norm": 0.90625, "learning_rate": 7.446363392844184e-05, "loss": 1.1717, "step": 643 }, { "epoch": 0.10152778972109955, "grad_norm": 1.0, "learning_rate": 7.445918285147849e-05, "loss": 1.4615, "step": 644 }, { "epoch": 0.10168544156849256, "grad_norm": 1.03125, "learning_rate": 7.445473182864744e-05, "loss": 1.403, "step": 645 }, { "epoch": 0.10184309341588557, "grad_norm": 1.0703125, "learning_rate": 7.445028085995806e-05, "loss": 1.1854, "step": 646 }, { "epoch": 0.10200074526327858, "grad_norm": 0.91015625, "learning_rate": 7.444582994541992e-05, "loss": 1.0989, "step": 647 }, { "epoch": 0.1021583971106716, "grad_norm": 1.0390625, "learning_rate": 7.444137908504236e-05, "loss": 1.4107, "step": 648 }, { "epoch": 0.1023160489580646, "grad_norm": 0.99609375, "learning_rate": 7.443692827883487e-05, "loss": 1.2837, "step": 649 }, { "epoch": 0.10247370080545762, "grad_norm": 0.9296875, "learning_rate": 7.443247752680682e-05, "loss": 1.1837, "step": 650 }, { "epoch": 0.10263135265285063, "grad_norm": 0.95703125, "learning_rate": 7.442802682896768e-05, "loss": 1.2619, "step": 651 }, { "epoch": 0.10278900450024364, "grad_norm": 0.9453125, "learning_rate": 7.442357618532683e-05, "loss": 1.335, "step": 652 }, { "epoch": 0.10294665634763665, "grad_norm": 1.046875, "learning_rate": 7.441912559589379e-05, "loss": 1.0828, "step": 653 }, { "epoch": 0.10310430819502966, "grad_norm": 0.99609375, "learning_rate": 7.441467506067795e-05, "loss": 1.2063, "step": 654 }, { "epoch": 0.10326196004242268, "grad_norm": 1.0234375, "learning_rate": 7.441022457968875e-05, "loss": 1.2698, "step": 655 }, { "epoch": 0.10341961188981569, "grad_norm": 0.97265625, "learning_rate": 7.440577415293562e-05, "loss": 1.1495, "step": 656 }, { "epoch": 0.1035772637372087, "grad_norm": 0.90234375, "learning_rate": 7.440132378042793e-05, "loss": 1.1802, "step": 657 }, { "epoch": 0.10373491558460171, "grad_norm": 1.03125, "learning_rate": 7.439687346217523e-05, "loss": 1.2465, "step": 658 }, { "epoch": 0.10389256743199472, "grad_norm": 1.0, "learning_rate": 7.439242319818689e-05, "loss": 1.1662, "step": 659 }, { "epoch": 0.10405021927938773, "grad_norm": 0.984375, "learning_rate": 7.438797298847234e-05, "loss": 1.1692, "step": 660 }, { "epoch": 0.10420787112678075, "grad_norm": 0.91015625, "learning_rate": 7.438352283304102e-05, "loss": 1.0785, "step": 661 }, { "epoch": 0.10436552297417376, "grad_norm": 0.92578125, "learning_rate": 7.437907273190234e-05, "loss": 0.9894, "step": 662 }, { "epoch": 0.10452317482156677, "grad_norm": 1.0, "learning_rate": 7.437462268506576e-05, "loss": 1.2955, "step": 663 }, { "epoch": 0.10468082666895978, "grad_norm": 1.0390625, "learning_rate": 7.437017269254072e-05, "loss": 1.5138, "step": 664 }, { "epoch": 0.10483847851635279, "grad_norm": 0.96875, "learning_rate": 7.436572275433661e-05, "loss": 1.2694, "step": 665 }, { "epoch": 0.1049961303637458, "grad_norm": 0.90234375, "learning_rate": 7.436127287046293e-05, "loss": 1.1391, "step": 666 }, { "epoch": 0.10515378221113882, "grad_norm": 0.97265625, "learning_rate": 7.4356823040929e-05, "loss": 0.9918, "step": 667 }, { "epoch": 0.10531143405853183, "grad_norm": 0.89453125, "learning_rate": 7.435237326574435e-05, "loss": 0.9228, "step": 668 }, { "epoch": 0.10546908590592484, "grad_norm": 0.96484375, "learning_rate": 7.434792354491837e-05, "loss": 1.1432, "step": 669 }, { "epoch": 0.10562673775331785, "grad_norm": 0.87109375, "learning_rate": 7.434347387846052e-05, "loss": 1.178, "step": 670 }, { "epoch": 0.10578438960071086, "grad_norm": 0.98828125, "learning_rate": 7.433902426638021e-05, "loss": 1.3491, "step": 671 }, { "epoch": 0.10594204144810387, "grad_norm": 0.97265625, "learning_rate": 7.433457470868683e-05, "loss": 1.0879, "step": 672 }, { "epoch": 0.10609969329549689, "grad_norm": 0.95703125, "learning_rate": 7.43301252053899e-05, "loss": 1.1116, "step": 673 }, { "epoch": 0.1062573451428899, "grad_norm": 0.953125, "learning_rate": 7.432567575649878e-05, "loss": 1.2212, "step": 674 }, { "epoch": 0.10641499699028291, "grad_norm": 0.9765625, "learning_rate": 7.432122636202294e-05, "loss": 1.0424, "step": 675 }, { "epoch": 0.10657264883767592, "grad_norm": 0.9375, "learning_rate": 7.431677702197179e-05, "loss": 1.3942, "step": 676 }, { "epoch": 0.10673030068506893, "grad_norm": 1.15625, "learning_rate": 7.43123277363547e-05, "loss": 1.2196, "step": 677 }, { "epoch": 0.10688795253246194, "grad_norm": 1.0390625, "learning_rate": 7.430787850518124e-05, "loss": 1.3524, "step": 678 }, { "epoch": 0.10704560437985496, "grad_norm": 1.0078125, "learning_rate": 7.430342932846073e-05, "loss": 1.1995, "step": 679 }, { "epoch": 0.10720325622724797, "grad_norm": 0.96484375, "learning_rate": 7.429898020620266e-05, "loss": 1.2108, "step": 680 }, { "epoch": 0.10736090807464098, "grad_norm": 0.9453125, "learning_rate": 7.429453113841643e-05, "loss": 1.3323, "step": 681 }, { "epoch": 0.10751855992203399, "grad_norm": 0.9609375, "learning_rate": 7.429008212511143e-05, "loss": 1.1557, "step": 682 }, { "epoch": 0.107676211769427, "grad_norm": 0.88671875, "learning_rate": 7.428563316629718e-05, "loss": 1.1836, "step": 683 }, { "epoch": 0.10783386361682001, "grad_norm": 0.9296875, "learning_rate": 7.428118426198303e-05, "loss": 1.2797, "step": 684 }, { "epoch": 0.10799151546421303, "grad_norm": 0.85546875, "learning_rate": 7.427673541217847e-05, "loss": 0.9795, "step": 685 }, { "epoch": 0.10814916731160604, "grad_norm": 0.93359375, "learning_rate": 7.427228661689288e-05, "loss": 1.197, "step": 686 }, { "epoch": 0.10830681915899905, "grad_norm": 0.99609375, "learning_rate": 7.426783787613567e-05, "loss": 1.2451, "step": 687 }, { "epoch": 0.10846447100639206, "grad_norm": 0.90234375, "learning_rate": 7.426338918991635e-05, "loss": 1.1404, "step": 688 }, { "epoch": 0.10862212285378507, "grad_norm": 0.9375, "learning_rate": 7.425894055824432e-05, "loss": 1.2287, "step": 689 }, { "epoch": 0.10877977470117808, "grad_norm": 0.96484375, "learning_rate": 7.425449198112897e-05, "loss": 1.201, "step": 690 }, { "epoch": 0.1089374265485711, "grad_norm": 1.046875, "learning_rate": 7.425004345857975e-05, "loss": 1.6085, "step": 691 }, { "epoch": 0.1090950783959641, "grad_norm": 0.9921875, "learning_rate": 7.424559499060606e-05, "loss": 1.1462, "step": 692 }, { "epoch": 0.10925273024335712, "grad_norm": 1.046875, "learning_rate": 7.424114657721741e-05, "loss": 1.2834, "step": 693 }, { "epoch": 0.10941038209075013, "grad_norm": 1.15625, "learning_rate": 7.423669821842318e-05, "loss": 1.1607, "step": 694 }, { "epoch": 0.10956803393814314, "grad_norm": 0.98828125, "learning_rate": 7.423224991423279e-05, "loss": 1.134, "step": 695 }, { "epoch": 0.10972568578553615, "grad_norm": 1.0078125, "learning_rate": 7.422780166465566e-05, "loss": 1.1405, "step": 696 }, { "epoch": 0.10988333763292917, "grad_norm": 0.94921875, "learning_rate": 7.422335346970119e-05, "loss": 1.1037, "step": 697 }, { "epoch": 0.11004098948032218, "grad_norm": 1.1328125, "learning_rate": 7.421890532937891e-05, "loss": 1.3617, "step": 698 }, { "epoch": 0.11019864132771519, "grad_norm": 1.03125, "learning_rate": 7.421445724369818e-05, "loss": 1.3546, "step": 699 }, { "epoch": 0.1103562931751082, "grad_norm": 0.94140625, "learning_rate": 7.421000921266843e-05, "loss": 1.1407, "step": 700 }, { "epoch": 0.11051394502250121, "grad_norm": 1.015625, "learning_rate": 7.420556123629909e-05, "loss": 1.129, "step": 701 }, { "epoch": 0.11067159686989422, "grad_norm": 1.5078125, "learning_rate": 7.420111331459957e-05, "loss": 1.1317, "step": 702 }, { "epoch": 0.11082924871728723, "grad_norm": 1.0390625, "learning_rate": 7.419666544757934e-05, "loss": 1.2639, "step": 703 }, { "epoch": 0.11098690056468026, "grad_norm": 0.97265625, "learning_rate": 7.419221763524779e-05, "loss": 1.1172, "step": 704 }, { "epoch": 0.11114455241207327, "grad_norm": 1.0, "learning_rate": 7.418776987761438e-05, "loss": 1.1648, "step": 705 }, { "epoch": 0.11130220425946628, "grad_norm": 0.93359375, "learning_rate": 7.418332217468849e-05, "loss": 1.209, "step": 706 }, { "epoch": 0.1114598561068593, "grad_norm": 0.9296875, "learning_rate": 7.417887452647956e-05, "loss": 1.0799, "step": 707 }, { "epoch": 0.1116175079542523, "grad_norm": 0.95703125, "learning_rate": 7.417442693299705e-05, "loss": 1.0243, "step": 708 }, { "epoch": 0.11177515980164532, "grad_norm": 0.95703125, "learning_rate": 7.416997939425038e-05, "loss": 1.3149, "step": 709 }, { "epoch": 0.11193281164903833, "grad_norm": 0.828125, "learning_rate": 7.416553191024895e-05, "loss": 0.962, "step": 710 }, { "epoch": 0.11209046349643134, "grad_norm": 0.9140625, "learning_rate": 7.41610844810022e-05, "loss": 1.1477, "step": 711 }, { "epoch": 0.11224811534382435, "grad_norm": 0.984375, "learning_rate": 7.415663710651952e-05, "loss": 1.1915, "step": 712 }, { "epoch": 0.11240576719121737, "grad_norm": 0.9375, "learning_rate": 7.415218978681042e-05, "loss": 0.9805, "step": 713 }, { "epoch": 0.11256341903861038, "grad_norm": 0.9765625, "learning_rate": 7.414774252188425e-05, "loss": 1.1969, "step": 714 }, { "epoch": 0.11272107088600339, "grad_norm": 1.015625, "learning_rate": 7.414329531175049e-05, "loss": 1.3074, "step": 715 }, { "epoch": 0.1128787227333964, "grad_norm": 0.96484375, "learning_rate": 7.413884815641853e-05, "loss": 1.3352, "step": 716 }, { "epoch": 0.11303637458078941, "grad_norm": 0.8984375, "learning_rate": 7.413440105589775e-05, "loss": 1.1258, "step": 717 }, { "epoch": 0.11319402642818242, "grad_norm": 1.0234375, "learning_rate": 7.412995401019768e-05, "loss": 1.1097, "step": 718 }, { "epoch": 0.11335167827557543, "grad_norm": 1.09375, "learning_rate": 7.41255070193277e-05, "loss": 1.358, "step": 719 }, { "epoch": 0.11350933012296845, "grad_norm": 0.83984375, "learning_rate": 7.412106008329721e-05, "loss": 0.8733, "step": 720 }, { "epoch": 0.11366698197036146, "grad_norm": 1.0, "learning_rate": 7.411661320211566e-05, "loss": 1.1374, "step": 721 }, { "epoch": 0.11382463381775447, "grad_norm": 1.0078125, "learning_rate": 7.411216637579245e-05, "loss": 1.2428, "step": 722 }, { "epoch": 0.11398228566514748, "grad_norm": 1.0625, "learning_rate": 7.410771960433706e-05, "loss": 1.0459, "step": 723 }, { "epoch": 0.11413993751254049, "grad_norm": 0.96484375, "learning_rate": 7.410327288775885e-05, "loss": 1.2101, "step": 724 }, { "epoch": 0.1142975893599335, "grad_norm": 0.93359375, "learning_rate": 7.409882622606728e-05, "loss": 1.3412, "step": 725 }, { "epoch": 0.11445524120732652, "grad_norm": 0.94921875, "learning_rate": 7.409437961927177e-05, "loss": 1.1829, "step": 726 }, { "epoch": 0.11461289305471953, "grad_norm": 1.0625, "learning_rate": 7.408993306738173e-05, "loss": 1.3572, "step": 727 }, { "epoch": 0.11477054490211254, "grad_norm": 0.96484375, "learning_rate": 7.408548657040661e-05, "loss": 1.1652, "step": 728 }, { "epoch": 0.11492819674950555, "grad_norm": 0.99609375, "learning_rate": 7.408104012835582e-05, "loss": 1.3395, "step": 729 }, { "epoch": 0.11508584859689856, "grad_norm": 0.9140625, "learning_rate": 7.407659374123879e-05, "loss": 1.0933, "step": 730 }, { "epoch": 0.11524350044429157, "grad_norm": 0.98046875, "learning_rate": 7.407214740906494e-05, "loss": 1.1906, "step": 731 }, { "epoch": 0.11540115229168459, "grad_norm": 0.92578125, "learning_rate": 7.406770113184364e-05, "loss": 0.9344, "step": 732 }, { "epoch": 0.1155588041390776, "grad_norm": 0.8828125, "learning_rate": 7.40632549095844e-05, "loss": 1.1063, "step": 733 }, { "epoch": 0.11571645598647061, "grad_norm": 0.96875, "learning_rate": 7.405880874229663e-05, "loss": 1.3994, "step": 734 }, { "epoch": 0.11587410783386362, "grad_norm": 0.9453125, "learning_rate": 7.405436262998972e-05, "loss": 1.1545, "step": 735 }, { "epoch": 0.11603175968125663, "grad_norm": 1.125, "learning_rate": 7.404991657267312e-05, "loss": 1.2821, "step": 736 }, { "epoch": 0.11618941152864964, "grad_norm": 0.95703125, "learning_rate": 7.404547057035617e-05, "loss": 1.1387, "step": 737 }, { "epoch": 0.11634706337604266, "grad_norm": 0.9140625, "learning_rate": 7.404102462304842e-05, "loss": 1.0894, "step": 738 }, { "epoch": 0.11650471522343567, "grad_norm": 0.9375, "learning_rate": 7.403657873075925e-05, "loss": 1.0531, "step": 739 }, { "epoch": 0.11666236707082868, "grad_norm": 0.9375, "learning_rate": 7.403213289349804e-05, "loss": 1.2914, "step": 740 }, { "epoch": 0.11682001891822169, "grad_norm": 1.0390625, "learning_rate": 7.402768711127425e-05, "loss": 1.0987, "step": 741 }, { "epoch": 0.1169776707656147, "grad_norm": 0.921875, "learning_rate": 7.402324138409727e-05, "loss": 1.2507, "step": 742 }, { "epoch": 0.11713532261300771, "grad_norm": 0.99609375, "learning_rate": 7.401879571197656e-05, "loss": 1.2409, "step": 743 }, { "epoch": 0.11729297446040073, "grad_norm": 0.94140625, "learning_rate": 7.401435009492153e-05, "loss": 1.0694, "step": 744 }, { "epoch": 0.11745062630779374, "grad_norm": 0.921875, "learning_rate": 7.40099045329416e-05, "loss": 1.0904, "step": 745 }, { "epoch": 0.11760827815518675, "grad_norm": 1.40625, "learning_rate": 7.400545902604621e-05, "loss": 1.3173, "step": 746 }, { "epoch": 0.11776593000257976, "grad_norm": 0.8828125, "learning_rate": 7.400101357424471e-05, "loss": 1.0317, "step": 747 }, { "epoch": 0.11792358184997277, "grad_norm": 0.95703125, "learning_rate": 7.399656817754664e-05, "loss": 1.2205, "step": 748 }, { "epoch": 0.11808123369736578, "grad_norm": 0.90234375, "learning_rate": 7.399212283596132e-05, "loss": 1.5768, "step": 749 }, { "epoch": 0.1182388855447588, "grad_norm": 1.0, "learning_rate": 7.398767754949822e-05, "loss": 1.1895, "step": 750 }, { "epoch": 0.11839653739215181, "grad_norm": 0.94921875, "learning_rate": 7.398323231816677e-05, "loss": 1.3032, "step": 751 }, { "epoch": 0.11855418923954482, "grad_norm": 0.90234375, "learning_rate": 7.39787871419763e-05, "loss": 1.2666, "step": 752 }, { "epoch": 0.11871184108693783, "grad_norm": 0.953125, "learning_rate": 7.397434202093636e-05, "loss": 1.1103, "step": 753 }, { "epoch": 0.11886949293433084, "grad_norm": 0.875, "learning_rate": 7.396989695505631e-05, "loss": 1.1344, "step": 754 }, { "epoch": 0.11902714478172385, "grad_norm": 0.859375, "learning_rate": 7.396545194434558e-05, "loss": 1.2312, "step": 755 }, { "epoch": 0.11918479662911687, "grad_norm": 1.015625, "learning_rate": 7.39610069888136e-05, "loss": 1.3046, "step": 756 }, { "epoch": 0.11934244847650988, "grad_norm": 0.90625, "learning_rate": 7.395656208846972e-05, "loss": 1.077, "step": 757 }, { "epoch": 0.11950010032390289, "grad_norm": 1.0078125, "learning_rate": 7.395211724332345e-05, "loss": 1.3239, "step": 758 }, { "epoch": 0.1196577521712959, "grad_norm": 0.9296875, "learning_rate": 7.39476724533842e-05, "loss": 1.1966, "step": 759 }, { "epoch": 0.11981540401868891, "grad_norm": 0.8984375, "learning_rate": 7.394322771866136e-05, "loss": 1.1077, "step": 760 }, { "epoch": 0.11997305586608192, "grad_norm": 0.9375, "learning_rate": 7.393878303916435e-05, "loss": 1.2681, "step": 761 }, { "epoch": 0.12013070771347494, "grad_norm": 0.99609375, "learning_rate": 7.393433841490257e-05, "loss": 1.3597, "step": 762 }, { "epoch": 0.12028835956086795, "grad_norm": 0.98828125, "learning_rate": 7.392989384588551e-05, "loss": 1.0681, "step": 763 }, { "epoch": 0.12044601140826096, "grad_norm": 1.125, "learning_rate": 7.392544933212256e-05, "loss": 1.2165, "step": 764 }, { "epoch": 0.12060366325565397, "grad_norm": 0.9375, "learning_rate": 7.39210048736231e-05, "loss": 1.1721, "step": 765 }, { "epoch": 0.12076131510304698, "grad_norm": 0.9453125, "learning_rate": 7.391656047039662e-05, "loss": 1.3108, "step": 766 }, { "epoch": 0.12091896695044, "grad_norm": 0.93359375, "learning_rate": 7.391211612245244e-05, "loss": 1.0176, "step": 767 }, { "epoch": 0.121076618797833, "grad_norm": 0.9453125, "learning_rate": 7.390767182980008e-05, "loss": 1.2099, "step": 768 }, { "epoch": 0.12123427064522602, "grad_norm": 0.96875, "learning_rate": 7.390322759244892e-05, "loss": 1.0398, "step": 769 }, { "epoch": 0.12139192249261903, "grad_norm": 1.046875, "learning_rate": 7.389878341040837e-05, "loss": 1.5509, "step": 770 }, { "epoch": 0.12154957434001204, "grad_norm": 0.97265625, "learning_rate": 7.389433928368785e-05, "loss": 1.2606, "step": 771 }, { "epoch": 0.12170722618740505, "grad_norm": 0.95703125, "learning_rate": 7.388989521229674e-05, "loss": 1.1484, "step": 772 }, { "epoch": 0.12186487803479806, "grad_norm": 1.0078125, "learning_rate": 7.388545119624457e-05, "loss": 1.3385, "step": 773 }, { "epoch": 0.12202252988219107, "grad_norm": 1.0625, "learning_rate": 7.388100723554068e-05, "loss": 1.0916, "step": 774 }, { "epoch": 0.12218018172958409, "grad_norm": 0.9453125, "learning_rate": 7.38765633301945e-05, "loss": 1.1116, "step": 775 }, { "epoch": 0.1223378335769771, "grad_norm": 1.0234375, "learning_rate": 7.387211948021546e-05, "loss": 1.1095, "step": 776 }, { "epoch": 0.12249548542437011, "grad_norm": 1.109375, "learning_rate": 7.386767568561292e-05, "loss": 1.4139, "step": 777 }, { "epoch": 0.12265313727176312, "grad_norm": 1.1015625, "learning_rate": 7.38632319463964e-05, "loss": 1.3321, "step": 778 }, { "epoch": 0.12281078911915613, "grad_norm": 0.984375, "learning_rate": 7.385878826257526e-05, "loss": 1.2532, "step": 779 }, { "epoch": 0.12296844096654914, "grad_norm": 1.0, "learning_rate": 7.385434463415893e-05, "loss": 1.0896, "step": 780 }, { "epoch": 0.12312609281394216, "grad_norm": 0.9765625, "learning_rate": 7.384990106115682e-05, "loss": 1.1889, "step": 781 }, { "epoch": 0.12328374466133517, "grad_norm": 1.03125, "learning_rate": 7.384545754357828e-05, "loss": 1.2576, "step": 782 }, { "epoch": 0.12344139650872818, "grad_norm": 0.89453125, "learning_rate": 7.384101408143286e-05, "loss": 1.027, "step": 783 }, { "epoch": 0.12359904835612119, "grad_norm": 0.921875, "learning_rate": 7.383657067472993e-05, "loss": 1.2874, "step": 784 }, { "epoch": 0.1237567002035142, "grad_norm": 0.86328125, "learning_rate": 7.383212732347889e-05, "loss": 0.9958, "step": 785 }, { "epoch": 0.12391435205090721, "grad_norm": 0.94921875, "learning_rate": 7.382768402768914e-05, "loss": 1.23, "step": 786 }, { "epoch": 0.12407200389830023, "grad_norm": 0.94140625, "learning_rate": 7.38232407873701e-05, "loss": 1.2853, "step": 787 }, { "epoch": 0.12422965574569324, "grad_norm": 1.03125, "learning_rate": 7.381879760253124e-05, "loss": 1.2576, "step": 788 }, { "epoch": 0.12438730759308625, "grad_norm": 0.97265625, "learning_rate": 7.381435447318195e-05, "loss": 1.0389, "step": 789 }, { "epoch": 0.12454495944047926, "grad_norm": 1.0234375, "learning_rate": 7.380991139933163e-05, "loss": 1.0695, "step": 790 }, { "epoch": 0.12470261128787227, "grad_norm": 1.0234375, "learning_rate": 7.380546838098966e-05, "loss": 1.1153, "step": 791 }, { "epoch": 0.12486026313526528, "grad_norm": 1.0, "learning_rate": 7.380102541816554e-05, "loss": 1.258, "step": 792 }, { "epoch": 0.1250179149826583, "grad_norm": 0.96875, "learning_rate": 7.379658251086864e-05, "loss": 1.1759, "step": 793 }, { "epoch": 0.12517556683005132, "grad_norm": 1.0390625, "learning_rate": 7.37921396591084e-05, "loss": 1.2502, "step": 794 }, { "epoch": 0.12533321867744432, "grad_norm": 0.90234375, "learning_rate": 7.378769686289422e-05, "loss": 1.1113, "step": 795 }, { "epoch": 0.12549087052483734, "grad_norm": 0.9140625, "learning_rate": 7.378325412223552e-05, "loss": 1.1515, "step": 796 }, { "epoch": 0.12564852237223034, "grad_norm": 0.98828125, "learning_rate": 7.377881143714172e-05, "loss": 1.1773, "step": 797 }, { "epoch": 0.12580617421962337, "grad_norm": 0.9140625, "learning_rate": 7.377436880762218e-05, "loss": 1.32, "step": 798 }, { "epoch": 0.12596382606701637, "grad_norm": 1.03125, "learning_rate": 7.37699262336864e-05, "loss": 1.3543, "step": 799 }, { "epoch": 0.1261214779144094, "grad_norm": 1.015625, "learning_rate": 7.376548371534378e-05, "loss": 1.2574, "step": 800 }, { "epoch": 0.1262791297618024, "grad_norm": 0.9296875, "learning_rate": 7.37610412526037e-05, "loss": 1.3045, "step": 801 }, { "epoch": 0.12643678160919541, "grad_norm": 1.0078125, "learning_rate": 7.375659884547561e-05, "loss": 1.2639, "step": 802 }, { "epoch": 0.1265944334565884, "grad_norm": 0.9296875, "learning_rate": 7.375215649396885e-05, "loss": 0.9907, "step": 803 }, { "epoch": 0.12675208530398144, "grad_norm": 0.94921875, "learning_rate": 7.374771419809296e-05, "loss": 1.1223, "step": 804 }, { "epoch": 0.12690973715137444, "grad_norm": 0.953125, "learning_rate": 7.374327195785726e-05, "loss": 1.1565, "step": 805 }, { "epoch": 0.12706738899876746, "grad_norm": 0.9453125, "learning_rate": 7.373882977327122e-05, "loss": 1.1606, "step": 806 }, { "epoch": 0.12722504084616046, "grad_norm": 0.921875, "learning_rate": 7.373438764434422e-05, "loss": 1.1745, "step": 807 }, { "epoch": 0.12738269269355348, "grad_norm": 0.9609375, "learning_rate": 7.372994557108566e-05, "loss": 1.3641, "step": 808 }, { "epoch": 0.12754034454094648, "grad_norm": 0.90625, "learning_rate": 7.372550355350501e-05, "loss": 1.1421, "step": 809 }, { "epoch": 0.1276979963883395, "grad_norm": 1.0, "learning_rate": 7.37210615916116e-05, "loss": 1.1201, "step": 810 }, { "epoch": 0.1278556482357325, "grad_norm": 0.98828125, "learning_rate": 7.371661968541496e-05, "loss": 1.1053, "step": 811 }, { "epoch": 0.12801330008312553, "grad_norm": 0.98828125, "learning_rate": 7.371217783492443e-05, "loss": 1.2309, "step": 812 }, { "epoch": 0.12817095193051853, "grad_norm": 0.953125, "learning_rate": 7.37077360401494e-05, "loss": 1.1134, "step": 813 }, { "epoch": 0.12832860377791155, "grad_norm": 0.8828125, "learning_rate": 7.370329430109936e-05, "loss": 1.1089, "step": 814 }, { "epoch": 0.12848625562530455, "grad_norm": 1.1328125, "learning_rate": 7.369885261778368e-05, "loss": 1.2232, "step": 815 }, { "epoch": 0.12864390747269758, "grad_norm": 0.859375, "learning_rate": 7.369441099021177e-05, "loss": 0.9038, "step": 816 }, { "epoch": 0.12880155932009058, "grad_norm": 0.9453125, "learning_rate": 7.368996941839306e-05, "loss": 1.2109, "step": 817 }, { "epoch": 0.1289592111674836, "grad_norm": 0.890625, "learning_rate": 7.368552790233691e-05, "loss": 1.0869, "step": 818 }, { "epoch": 0.1291168630148766, "grad_norm": 0.8984375, "learning_rate": 7.368108644205283e-05, "loss": 1.0288, "step": 819 }, { "epoch": 0.12927451486226962, "grad_norm": 0.796875, "learning_rate": 7.367664503755018e-05, "loss": 0.9274, "step": 820 }, { "epoch": 0.12943216670966262, "grad_norm": 0.9296875, "learning_rate": 7.367220368883836e-05, "loss": 1.0057, "step": 821 }, { "epoch": 0.12958981855705565, "grad_norm": 0.921875, "learning_rate": 7.366776239592681e-05, "loss": 1.2045, "step": 822 }, { "epoch": 0.12974747040444864, "grad_norm": 0.93359375, "learning_rate": 7.36633211588249e-05, "loss": 0.8991, "step": 823 }, { "epoch": 0.12990512225184167, "grad_norm": 1.078125, "learning_rate": 7.36588799775421e-05, "loss": 1.0336, "step": 824 }, { "epoch": 0.13006277409923467, "grad_norm": 0.984375, "learning_rate": 7.365443885208783e-05, "loss": 1.1097, "step": 825 }, { "epoch": 0.1302204259466277, "grad_norm": 0.93359375, "learning_rate": 7.364999778247143e-05, "loss": 0.9801, "step": 826 }, { "epoch": 0.1303780777940207, "grad_norm": 0.99609375, "learning_rate": 7.364555676870238e-05, "loss": 1.2915, "step": 827 }, { "epoch": 0.13053572964141372, "grad_norm": 0.95703125, "learning_rate": 7.364111581079002e-05, "loss": 1.1857, "step": 828 }, { "epoch": 0.13069338148880671, "grad_norm": 0.921875, "learning_rate": 7.363667490874384e-05, "loss": 1.0863, "step": 829 }, { "epoch": 0.13085103333619974, "grad_norm": 1.015625, "learning_rate": 7.363223406257324e-05, "loss": 1.261, "step": 830 }, { "epoch": 0.13100868518359274, "grad_norm": 1.0234375, "learning_rate": 7.36277932722876e-05, "loss": 1.2139, "step": 831 }, { "epoch": 0.13116633703098576, "grad_norm": 0.9609375, "learning_rate": 7.362335253789635e-05, "loss": 1.1666, "step": 832 }, { "epoch": 0.13132398887837876, "grad_norm": 0.984375, "learning_rate": 7.361891185940886e-05, "loss": 1.4789, "step": 833 }, { "epoch": 0.1314816407257718, "grad_norm": 1.0625, "learning_rate": 7.361447123683463e-05, "loss": 1.2802, "step": 834 }, { "epoch": 0.13163929257316478, "grad_norm": 0.97265625, "learning_rate": 7.3610030670183e-05, "loss": 1.2085, "step": 835 }, { "epoch": 0.1317969444205578, "grad_norm": 0.9375, "learning_rate": 7.36055901594634e-05, "loss": 1.1116, "step": 836 }, { "epoch": 0.1319545962679508, "grad_norm": 1.046875, "learning_rate": 7.360114970468523e-05, "loss": 1.1272, "step": 837 }, { "epoch": 0.13211224811534383, "grad_norm": 0.93359375, "learning_rate": 7.35967093058579e-05, "loss": 1.2472, "step": 838 }, { "epoch": 0.13226989996273683, "grad_norm": 0.8671875, "learning_rate": 7.359226896299086e-05, "loss": 1.2304, "step": 839 }, { "epoch": 0.13242755181012986, "grad_norm": 0.94921875, "learning_rate": 7.358782867609351e-05, "loss": 1.0428, "step": 840 }, { "epoch": 0.13258520365752285, "grad_norm": 0.953125, "learning_rate": 7.358338844517523e-05, "loss": 1.1529, "step": 841 }, { "epoch": 0.13274285550491588, "grad_norm": 0.97265625, "learning_rate": 7.357894827024547e-05, "loss": 1.1142, "step": 842 }, { "epoch": 0.13290050735230888, "grad_norm": 0.9296875, "learning_rate": 7.357450815131356e-05, "loss": 1.1024, "step": 843 }, { "epoch": 0.1330581591997019, "grad_norm": 0.93359375, "learning_rate": 7.357006808838903e-05, "loss": 0.9091, "step": 844 }, { "epoch": 0.1332158110470949, "grad_norm": 1.0703125, "learning_rate": 7.35656280814812e-05, "loss": 1.1265, "step": 845 }, { "epoch": 0.13337346289448793, "grad_norm": 0.9609375, "learning_rate": 7.356118813059953e-05, "loss": 1.1058, "step": 846 }, { "epoch": 0.13353111474188092, "grad_norm": 1.0859375, "learning_rate": 7.35567482357534e-05, "loss": 1.2292, "step": 847 }, { "epoch": 0.13368876658927395, "grad_norm": 0.94140625, "learning_rate": 7.35523083969522e-05, "loss": 1.1149, "step": 848 }, { "epoch": 0.13384641843666695, "grad_norm": 0.9609375, "learning_rate": 7.354786861420542e-05, "loss": 1.2834, "step": 849 }, { "epoch": 0.13400407028405997, "grad_norm": 0.92578125, "learning_rate": 7.35434288875224e-05, "loss": 1.0439, "step": 850 }, { "epoch": 0.13416172213145297, "grad_norm": 0.859375, "learning_rate": 7.353898921691258e-05, "loss": 1.0132, "step": 851 }, { "epoch": 0.134319373978846, "grad_norm": 0.95703125, "learning_rate": 7.353454960238536e-05, "loss": 1.2369, "step": 852 }, { "epoch": 0.134477025826239, "grad_norm": 0.87109375, "learning_rate": 7.353011004395013e-05, "loss": 1.0607, "step": 853 }, { "epoch": 0.13463467767363202, "grad_norm": 0.96484375, "learning_rate": 7.352567054161634e-05, "loss": 1.3504, "step": 854 }, { "epoch": 0.13479232952102502, "grad_norm": 0.94140625, "learning_rate": 7.352123109539337e-05, "loss": 1.2534, "step": 855 }, { "epoch": 0.13494998136841804, "grad_norm": 0.984375, "learning_rate": 7.351679170529065e-05, "loss": 0.9253, "step": 856 }, { "epoch": 0.13510763321581104, "grad_norm": 1.0078125, "learning_rate": 7.351235237131757e-05, "loss": 1.2869, "step": 857 }, { "epoch": 0.13526528506320407, "grad_norm": 1.0234375, "learning_rate": 7.35079130934835e-05, "loss": 1.0661, "step": 858 }, { "epoch": 0.13542293691059706, "grad_norm": 1.0234375, "learning_rate": 7.350347387179794e-05, "loss": 1.1316, "step": 859 }, { "epoch": 0.1355805887579901, "grad_norm": 1.046875, "learning_rate": 7.349903470627026e-05, "loss": 1.3311, "step": 860 }, { "epoch": 0.1357382406053831, "grad_norm": 1.0546875, "learning_rate": 7.349459559690984e-05, "loss": 1.224, "step": 861 }, { "epoch": 0.1358958924527761, "grad_norm": 1.0234375, "learning_rate": 7.349015654372613e-05, "loss": 1.1074, "step": 862 }, { "epoch": 0.1360535443001691, "grad_norm": 0.89453125, "learning_rate": 7.348571754672846e-05, "loss": 1.2115, "step": 863 }, { "epoch": 0.13621119614756214, "grad_norm": 0.76171875, "learning_rate": 7.348127860592636e-05, "loss": 0.926, "step": 864 }, { "epoch": 0.13636884799495513, "grad_norm": 0.91796875, "learning_rate": 7.347683972132915e-05, "loss": 1.2034, "step": 865 }, { "epoch": 0.13652649984234816, "grad_norm": 1.0234375, "learning_rate": 7.347240089294627e-05, "loss": 1.2762, "step": 866 }, { "epoch": 0.13668415168974116, "grad_norm": 1.03125, "learning_rate": 7.346796212078713e-05, "loss": 1.0057, "step": 867 }, { "epoch": 0.13684180353713418, "grad_norm": 0.953125, "learning_rate": 7.346352340486108e-05, "loss": 0.9425, "step": 868 }, { "epoch": 0.13699945538452718, "grad_norm": 1.0625, "learning_rate": 7.345908474517762e-05, "loss": 1.2193, "step": 869 }, { "epoch": 0.1371571072319202, "grad_norm": 0.9453125, "learning_rate": 7.345464614174611e-05, "loss": 1.0303, "step": 870 }, { "epoch": 0.1373147590793132, "grad_norm": 0.9765625, "learning_rate": 7.345020759457595e-05, "loss": 1.1257, "step": 871 }, { "epoch": 0.13747241092670623, "grad_norm": 0.9765625, "learning_rate": 7.344576910367658e-05, "loss": 1.1779, "step": 872 }, { "epoch": 0.13763006277409923, "grad_norm": 0.9375, "learning_rate": 7.344133066905734e-05, "loss": 1.1765, "step": 873 }, { "epoch": 0.13778771462149225, "grad_norm": 0.8828125, "learning_rate": 7.343689229072771e-05, "loss": 1.1481, "step": 874 }, { "epoch": 0.13794536646888525, "grad_norm": 0.90625, "learning_rate": 7.343245396869706e-05, "loss": 1.1828, "step": 875 }, { "epoch": 0.13810301831627828, "grad_norm": 0.98828125, "learning_rate": 7.342801570297483e-05, "loss": 1.3029, "step": 876 }, { "epoch": 0.13826067016367127, "grad_norm": 1.0234375, "learning_rate": 7.342357749357036e-05, "loss": 1.2341, "step": 877 }, { "epoch": 0.1384183220110643, "grad_norm": 0.89453125, "learning_rate": 7.341913934049312e-05, "loss": 1.1462, "step": 878 }, { "epoch": 0.1385759738584573, "grad_norm": 1.015625, "learning_rate": 7.341470124375249e-05, "loss": 1.0431, "step": 879 }, { "epoch": 0.13873362570585032, "grad_norm": 1.0625, "learning_rate": 7.34102632033579e-05, "loss": 1.2274, "step": 880 }, { "epoch": 0.13889127755324332, "grad_norm": 0.96875, "learning_rate": 7.340582521931873e-05, "loss": 1.2855, "step": 881 }, { "epoch": 0.13904892940063635, "grad_norm": 1.0234375, "learning_rate": 7.340138729164439e-05, "loss": 1.2785, "step": 882 }, { "epoch": 0.13920658124802934, "grad_norm": 0.921875, "learning_rate": 7.339694942034425e-05, "loss": 1.0836, "step": 883 }, { "epoch": 0.13936423309542237, "grad_norm": 0.99609375, "learning_rate": 7.33925116054278e-05, "loss": 1.2375, "step": 884 }, { "epoch": 0.13952188494281537, "grad_norm": 1.0, "learning_rate": 7.33880738469044e-05, "loss": 1.3075, "step": 885 }, { "epoch": 0.1396795367902084, "grad_norm": 0.91796875, "learning_rate": 7.338363614478347e-05, "loss": 1.3019, "step": 886 }, { "epoch": 0.1398371886376014, "grad_norm": 0.9375, "learning_rate": 7.337919849907437e-05, "loss": 1.0888, "step": 887 }, { "epoch": 0.13999484048499442, "grad_norm": 0.8671875, "learning_rate": 7.337476090978652e-05, "loss": 1.1154, "step": 888 }, { "epoch": 0.1401524923323874, "grad_norm": 1.0, "learning_rate": 7.337032337692937e-05, "loss": 1.1271, "step": 889 }, { "epoch": 0.14031014417978044, "grad_norm": 0.9453125, "learning_rate": 7.336588590051232e-05, "loss": 1.3659, "step": 890 }, { "epoch": 0.14046779602717344, "grad_norm": 1.0078125, "learning_rate": 7.336144848054474e-05, "loss": 1.2111, "step": 891 }, { "epoch": 0.14062544787456646, "grad_norm": 1.03125, "learning_rate": 7.335701111703604e-05, "loss": 1.2921, "step": 892 }, { "epoch": 0.14078309972195946, "grad_norm": 1.0234375, "learning_rate": 7.335257380999561e-05, "loss": 1.4295, "step": 893 }, { "epoch": 0.14094075156935248, "grad_norm": 0.98828125, "learning_rate": 7.334813655943291e-05, "loss": 1.2903, "step": 894 }, { "epoch": 0.14109840341674548, "grad_norm": 0.9609375, "learning_rate": 7.334369936535727e-05, "loss": 1.099, "step": 895 }, { "epoch": 0.1412560552641385, "grad_norm": 0.98828125, "learning_rate": 7.333926222777818e-05, "loss": 1.3001, "step": 896 }, { "epoch": 0.1414137071115315, "grad_norm": 0.90234375, "learning_rate": 7.333482514670499e-05, "loss": 1.0216, "step": 897 }, { "epoch": 0.14157135895892453, "grad_norm": 0.95703125, "learning_rate": 7.33303881221471e-05, "loss": 1.1811, "step": 898 }, { "epoch": 0.14172901080631753, "grad_norm": 1.0, "learning_rate": 7.332595115411393e-05, "loss": 1.1414, "step": 899 }, { "epoch": 0.14188666265371055, "grad_norm": 0.91015625, "learning_rate": 7.33215142426149e-05, "loss": 1.1882, "step": 900 }, { "epoch": 0.14204431450110355, "grad_norm": 0.9453125, "learning_rate": 7.331707738765938e-05, "loss": 1.1021, "step": 901 }, { "epoch": 0.14220196634849658, "grad_norm": 0.85546875, "learning_rate": 7.33126405892568e-05, "loss": 1.0459, "step": 902 }, { "epoch": 0.14235961819588958, "grad_norm": 0.9375, "learning_rate": 7.330820384741652e-05, "loss": 1.2087, "step": 903 }, { "epoch": 0.1425172700432826, "grad_norm": 0.91796875, "learning_rate": 7.3303767162148e-05, "loss": 1.037, "step": 904 }, { "epoch": 0.1426749218906756, "grad_norm": 0.96875, "learning_rate": 7.329933053346063e-05, "loss": 1.1291, "step": 905 }, { "epoch": 0.14283257373806862, "grad_norm": 0.9609375, "learning_rate": 7.32948939613638e-05, "loss": 1.446, "step": 906 }, { "epoch": 0.14299022558546162, "grad_norm": 1.0, "learning_rate": 7.329045744586692e-05, "loss": 1.2247, "step": 907 }, { "epoch": 0.14314787743285465, "grad_norm": 1.1015625, "learning_rate": 7.328602098697934e-05, "loss": 1.2402, "step": 908 }, { "epoch": 0.14330552928024765, "grad_norm": 1.1640625, "learning_rate": 7.328158458471057e-05, "loss": 1.1348, "step": 909 }, { "epoch": 0.14346318112764067, "grad_norm": 0.984375, "learning_rate": 7.327714823906994e-05, "loss": 1.3082, "step": 910 }, { "epoch": 0.14362083297503367, "grad_norm": 0.94921875, "learning_rate": 7.327271195006687e-05, "loss": 1.0162, "step": 911 }, { "epoch": 0.1437784848224267, "grad_norm": 1.1015625, "learning_rate": 7.326827571771075e-05, "loss": 1.0617, "step": 912 }, { "epoch": 0.1439361366698197, "grad_norm": 1.0546875, "learning_rate": 7.326383954201098e-05, "loss": 1.2287, "step": 913 }, { "epoch": 0.14409378851721272, "grad_norm": 0.97265625, "learning_rate": 7.325940342297697e-05, "loss": 0.976, "step": 914 }, { "epoch": 0.14425144036460572, "grad_norm": 0.875, "learning_rate": 7.325496736061815e-05, "loss": 0.9393, "step": 915 }, { "epoch": 0.14440909221199874, "grad_norm": 1.0234375, "learning_rate": 7.32505313549439e-05, "loss": 1.1421, "step": 916 }, { "epoch": 0.14456674405939174, "grad_norm": 1.0, "learning_rate": 7.324609540596362e-05, "loss": 1.1644, "step": 917 }, { "epoch": 0.14472439590678476, "grad_norm": 1.015625, "learning_rate": 7.324165951368669e-05, "loss": 1.1705, "step": 918 }, { "epoch": 0.14488204775417776, "grad_norm": 0.921875, "learning_rate": 7.323722367812255e-05, "loss": 1.1147, "step": 919 }, { "epoch": 0.1450396996015708, "grad_norm": 1.0, "learning_rate": 7.323278789928059e-05, "loss": 1.2157, "step": 920 }, { "epoch": 0.14519735144896379, "grad_norm": 0.89453125, "learning_rate": 7.32283521771702e-05, "loss": 1.0341, "step": 921 }, { "epoch": 0.1453550032963568, "grad_norm": 0.95703125, "learning_rate": 7.322391651180078e-05, "loss": 1.148, "step": 922 }, { "epoch": 0.1455126551437498, "grad_norm": 1.0390625, "learning_rate": 7.321948090318172e-05, "loss": 1.003, "step": 923 }, { "epoch": 0.14567030699114283, "grad_norm": 1.015625, "learning_rate": 7.321504535132247e-05, "loss": 1.148, "step": 924 }, { "epoch": 0.14582795883853583, "grad_norm": 0.92578125, "learning_rate": 7.32106098562324e-05, "loss": 1.0988, "step": 925 }, { "epoch": 0.14598561068592886, "grad_norm": 0.87890625, "learning_rate": 7.32061744179209e-05, "loss": 0.9523, "step": 926 }, { "epoch": 0.14614326253332185, "grad_norm": 0.90625, "learning_rate": 7.320173903639739e-05, "loss": 1.4244, "step": 927 }, { "epoch": 0.14630091438071488, "grad_norm": 1.0, "learning_rate": 7.319730371167122e-05, "loss": 1.2422, "step": 928 }, { "epoch": 0.14645856622810788, "grad_norm": 0.86328125, "learning_rate": 7.319286844375189e-05, "loss": 1.1872, "step": 929 }, { "epoch": 0.1466162180755009, "grad_norm": 0.953125, "learning_rate": 7.318843323264872e-05, "loss": 1.2095, "step": 930 }, { "epoch": 0.1467738699228939, "grad_norm": 1.09375, "learning_rate": 7.318399807837115e-05, "loss": 1.2441, "step": 931 }, { "epoch": 0.14693152177028693, "grad_norm": 1.046875, "learning_rate": 7.317956298092856e-05, "loss": 1.4817, "step": 932 }, { "epoch": 0.14708917361767992, "grad_norm": 0.97265625, "learning_rate": 7.31751279403303e-05, "loss": 1.0741, "step": 933 }, { "epoch": 0.14724682546507295, "grad_norm": 1.125, "learning_rate": 7.317069295658588e-05, "loss": 1.2691, "step": 934 }, { "epoch": 0.14740447731246595, "grad_norm": 0.94921875, "learning_rate": 7.316625802970463e-05, "loss": 1.2311, "step": 935 }, { "epoch": 0.14756212915985897, "grad_norm": 0.95703125, "learning_rate": 7.316182315969598e-05, "loss": 1.0153, "step": 936 }, { "epoch": 0.14771978100725197, "grad_norm": 0.984375, "learning_rate": 7.315738834656929e-05, "loss": 1.0934, "step": 937 }, { "epoch": 0.147877432854645, "grad_norm": 0.90625, "learning_rate": 7.315295359033398e-05, "loss": 1.0864, "step": 938 }, { "epoch": 0.14803508470203802, "grad_norm": 0.96875, "learning_rate": 7.314851889099945e-05, "loss": 1.1927, "step": 939 }, { "epoch": 0.14819273654943102, "grad_norm": 0.96484375, "learning_rate": 7.31440842485751e-05, "loss": 1.1883, "step": 940 }, { "epoch": 0.14835038839682405, "grad_norm": 0.97265625, "learning_rate": 7.313964966307035e-05, "loss": 1.2068, "step": 941 }, { "epoch": 0.14850804024421704, "grad_norm": 0.99609375, "learning_rate": 7.313521513449457e-05, "loss": 1.1585, "step": 942 }, { "epoch": 0.14866569209161007, "grad_norm": 1.078125, "learning_rate": 7.313078066285715e-05, "loss": 1.3315, "step": 943 }, { "epoch": 0.14882334393900307, "grad_norm": 0.96875, "learning_rate": 7.312634624816748e-05, "loss": 1.1245, "step": 944 }, { "epoch": 0.1489809957863961, "grad_norm": 0.89453125, "learning_rate": 7.312191189043502e-05, "loss": 1.1142, "step": 945 }, { "epoch": 0.1491386476337891, "grad_norm": 1.03125, "learning_rate": 7.311747758966913e-05, "loss": 1.2184, "step": 946 }, { "epoch": 0.14929629948118212, "grad_norm": 0.90234375, "learning_rate": 7.311304334587923e-05, "loss": 1.0056, "step": 947 }, { "epoch": 0.1494539513285751, "grad_norm": 0.83984375, "learning_rate": 7.310860915907467e-05, "loss": 1.0754, "step": 948 }, { "epoch": 0.14961160317596814, "grad_norm": 0.94140625, "learning_rate": 7.310417502926484e-05, "loss": 1.1499, "step": 949 }, { "epoch": 0.14976925502336114, "grad_norm": 0.87109375, "learning_rate": 7.309974095645922e-05, "loss": 1.1762, "step": 950 }, { "epoch": 0.14992690687075416, "grad_norm": 0.96875, "learning_rate": 7.309530694066717e-05, "loss": 1.1175, "step": 951 }, { "epoch": 0.15008455871814716, "grad_norm": 0.875, "learning_rate": 7.309087298189807e-05, "loss": 1.0166, "step": 952 }, { "epoch": 0.15024221056554019, "grad_norm": 0.91015625, "learning_rate": 7.308643908016132e-05, "loss": 0.9621, "step": 953 }, { "epoch": 0.15039986241293318, "grad_norm": 0.95703125, "learning_rate": 7.308200523546629e-05, "loss": 1.2469, "step": 954 }, { "epoch": 0.1505575142603262, "grad_norm": 0.94140625, "learning_rate": 7.307757144782246e-05, "loss": 1.0857, "step": 955 }, { "epoch": 0.1507151661077192, "grad_norm": 0.91796875, "learning_rate": 7.307313771723917e-05, "loss": 1.0383, "step": 956 }, { "epoch": 0.15087281795511223, "grad_norm": 1.078125, "learning_rate": 7.306870404372581e-05, "loss": 1.2949, "step": 957 }, { "epoch": 0.15103046980250523, "grad_norm": 1.015625, "learning_rate": 7.306427042729181e-05, "loss": 1.3026, "step": 958 }, { "epoch": 0.15118812164989825, "grad_norm": 0.9453125, "learning_rate": 7.305983686794653e-05, "loss": 1.0829, "step": 959 }, { "epoch": 0.15134577349729125, "grad_norm": 0.93359375, "learning_rate": 7.30554033656994e-05, "loss": 1.3281, "step": 960 }, { "epoch": 0.15150342534468428, "grad_norm": 0.96875, "learning_rate": 7.30509699205598e-05, "loss": 1.2363, "step": 961 }, { "epoch": 0.15166107719207728, "grad_norm": 0.890625, "learning_rate": 7.304653653253713e-05, "loss": 0.9763, "step": 962 }, { "epoch": 0.1518187290394703, "grad_norm": 0.94921875, "learning_rate": 7.304210320164078e-05, "loss": 1.1151, "step": 963 }, { "epoch": 0.1519763808868633, "grad_norm": 0.90625, "learning_rate": 7.303766992788015e-05, "loss": 1.1778, "step": 964 }, { "epoch": 0.15213403273425632, "grad_norm": 1.203125, "learning_rate": 7.303323671126465e-05, "loss": 1.4283, "step": 965 }, { "epoch": 0.15229168458164932, "grad_norm": 0.921875, "learning_rate": 7.302880355180366e-05, "loss": 1.1114, "step": 966 }, { "epoch": 0.15244933642904235, "grad_norm": 0.984375, "learning_rate": 7.302437044950658e-05, "loss": 1.2378, "step": 967 }, { "epoch": 0.15260698827643535, "grad_norm": 0.90234375, "learning_rate": 7.30199374043828e-05, "loss": 1.0008, "step": 968 }, { "epoch": 0.15276464012382837, "grad_norm": 0.93359375, "learning_rate": 7.30155044164417e-05, "loss": 1.2024, "step": 969 }, { "epoch": 0.15292229197122137, "grad_norm": 1.03125, "learning_rate": 7.301107148569271e-05, "loss": 1.2336, "step": 970 }, { "epoch": 0.1530799438186144, "grad_norm": 0.86328125, "learning_rate": 7.300663861214523e-05, "loss": 1.0731, "step": 971 }, { "epoch": 0.1532375956660074, "grad_norm": 0.8984375, "learning_rate": 7.300220579580863e-05, "loss": 0.9677, "step": 972 }, { "epoch": 0.15339524751340042, "grad_norm": 0.90234375, "learning_rate": 7.299777303669231e-05, "loss": 0.9836, "step": 973 }, { "epoch": 0.15355289936079342, "grad_norm": 0.99609375, "learning_rate": 7.299334033480562e-05, "loss": 1.0679, "step": 974 }, { "epoch": 0.15371055120818644, "grad_norm": 0.90234375, "learning_rate": 7.298890769015808e-05, "loss": 1.0393, "step": 975 }, { "epoch": 0.15386820305557944, "grad_norm": 1.234375, "learning_rate": 7.298447510275896e-05, "loss": 1.0612, "step": 976 }, { "epoch": 0.15402585490297246, "grad_norm": 0.98828125, "learning_rate": 7.298004257261772e-05, "loss": 1.0934, "step": 977 }, { "epoch": 0.15418350675036546, "grad_norm": 0.8125, "learning_rate": 7.297561009974374e-05, "loss": 0.937, "step": 978 }, { "epoch": 0.1543411585977585, "grad_norm": 0.94921875, "learning_rate": 7.297117768414637e-05, "loss": 1.1493, "step": 979 }, { "epoch": 0.15449881044515149, "grad_norm": 0.8515625, "learning_rate": 7.296674532583509e-05, "loss": 1.1116, "step": 980 }, { "epoch": 0.1546564622925445, "grad_norm": 0.96484375, "learning_rate": 7.296231302481921e-05, "loss": 1.3317, "step": 981 }, { "epoch": 0.1548141141399375, "grad_norm": 0.90234375, "learning_rate": 7.295788078110819e-05, "loss": 1.1275, "step": 982 }, { "epoch": 0.15497176598733053, "grad_norm": 0.8515625, "learning_rate": 7.295344859471138e-05, "loss": 0.7711, "step": 983 }, { "epoch": 0.15512941783472353, "grad_norm": 1.0859375, "learning_rate": 7.294901646563819e-05, "loss": 1.3431, "step": 984 }, { "epoch": 0.15528706968211656, "grad_norm": 0.953125, "learning_rate": 7.294458439389803e-05, "loss": 1.1839, "step": 985 }, { "epoch": 0.15544472152950956, "grad_norm": 1.0390625, "learning_rate": 7.294015237950027e-05, "loss": 1.4353, "step": 986 }, { "epoch": 0.15560237337690258, "grad_norm": 0.96484375, "learning_rate": 7.293572042245431e-05, "loss": 1.0651, "step": 987 }, { "epoch": 0.15576002522429558, "grad_norm": 0.90625, "learning_rate": 7.293128852276956e-05, "loss": 1.0434, "step": 988 }, { "epoch": 0.1559176770716886, "grad_norm": 1.046875, "learning_rate": 7.292685668045533e-05, "loss": 1.1297, "step": 989 }, { "epoch": 0.1560753289190816, "grad_norm": 1.03125, "learning_rate": 7.292242489552114e-05, "loss": 1.1229, "step": 990 }, { "epoch": 0.15623298076647463, "grad_norm": 0.9140625, "learning_rate": 7.291799316797632e-05, "loss": 1.1803, "step": 991 }, { "epoch": 0.15639063261386763, "grad_norm": 0.921875, "learning_rate": 7.291356149783026e-05, "loss": 1.0939, "step": 992 }, { "epoch": 0.15654828446126065, "grad_norm": 0.87109375, "learning_rate": 7.290912988509236e-05, "loss": 0.9719, "step": 993 }, { "epoch": 0.15670593630865365, "grad_norm": 0.92578125, "learning_rate": 7.290469832977198e-05, "loss": 1.0, "step": 994 }, { "epoch": 0.15686358815604667, "grad_norm": 0.90234375, "learning_rate": 7.290026683187857e-05, "loss": 0.9822, "step": 995 }, { "epoch": 0.15702124000343967, "grad_norm": 0.984375, "learning_rate": 7.289583539142151e-05, "loss": 1.1368, "step": 996 }, { "epoch": 0.1571788918508327, "grad_norm": 0.953125, "learning_rate": 7.289140400841017e-05, "loss": 1.2877, "step": 997 }, { "epoch": 0.1573365436982257, "grad_norm": 1.0078125, "learning_rate": 7.288697268285393e-05, "loss": 1.3098, "step": 998 }, { "epoch": 0.15749419554561872, "grad_norm": 0.90234375, "learning_rate": 7.288254141476218e-05, "loss": 1.2059, "step": 999 }, { "epoch": 0.15765184739301172, "grad_norm": 1.015625, "learning_rate": 7.287811020414438e-05, "loss": 1.0577, "step": 1000 }, { "epoch": 0.15765184739301172, "eval_loss": 1.137479543685913, "eval_runtime": 296.6003, "eval_samples_per_second": 33.715, "eval_steps_per_second": 0.705, "step": 1000 }, { "epoch": 0.15780949924040474, "grad_norm": 0.8125, "learning_rate": 7.287367905100987e-05, "loss": 1.0921, "step": 1001 }, { "epoch": 0.15796715108779774, "grad_norm": 1.015625, "learning_rate": 7.286924795536805e-05, "loss": 1.0942, "step": 1002 }, { "epoch": 0.15812480293519077, "grad_norm": 0.85546875, "learning_rate": 7.286481691722831e-05, "loss": 1.0385, "step": 1003 }, { "epoch": 0.15828245478258376, "grad_norm": 0.95703125, "learning_rate": 7.286038593660001e-05, "loss": 0.9699, "step": 1004 }, { "epoch": 0.1584401066299768, "grad_norm": 1.09375, "learning_rate": 7.285595501349258e-05, "loss": 1.0774, "step": 1005 }, { "epoch": 0.1585977584773698, "grad_norm": 0.97265625, "learning_rate": 7.285152414791543e-05, "loss": 1.3203, "step": 1006 }, { "epoch": 0.1587554103247628, "grad_norm": 0.9921875, "learning_rate": 7.284709333987789e-05, "loss": 1.1586, "step": 1007 }, { "epoch": 0.1589130621721558, "grad_norm": 0.9765625, "learning_rate": 7.28426625893894e-05, "loss": 1.1286, "step": 1008 }, { "epoch": 0.15907071401954884, "grad_norm": 1.0, "learning_rate": 7.28382318964593e-05, "loss": 1.1665, "step": 1009 }, { "epoch": 0.15922836586694183, "grad_norm": 1.0546875, "learning_rate": 7.283380126109706e-05, "loss": 1.1352, "step": 1010 }, { "epoch": 0.15938601771433486, "grad_norm": 1.015625, "learning_rate": 7.282937068331201e-05, "loss": 1.214, "step": 1011 }, { "epoch": 0.15954366956172786, "grad_norm": 0.91015625, "learning_rate": 7.282494016311356e-05, "loss": 1.226, "step": 1012 }, { "epoch": 0.15970132140912088, "grad_norm": 1.078125, "learning_rate": 7.282050970051111e-05, "loss": 1.2391, "step": 1013 }, { "epoch": 0.15985897325651388, "grad_norm": 1.03125, "learning_rate": 7.281607929551398e-05, "loss": 1.3136, "step": 1014 }, { "epoch": 0.1600166251039069, "grad_norm": 0.8984375, "learning_rate": 7.281164894813166e-05, "loss": 0.9134, "step": 1015 }, { "epoch": 0.1601742769512999, "grad_norm": 1.09375, "learning_rate": 7.28072186583735e-05, "loss": 1.2413, "step": 1016 }, { "epoch": 0.16033192879869293, "grad_norm": 1.0546875, "learning_rate": 7.280278842624887e-05, "loss": 1.3154, "step": 1017 }, { "epoch": 0.16048958064608593, "grad_norm": 1.0234375, "learning_rate": 7.279835825176719e-05, "loss": 1.1872, "step": 1018 }, { "epoch": 0.16064723249347895, "grad_norm": 1.0234375, "learning_rate": 7.279392813493781e-05, "loss": 1.0962, "step": 1019 }, { "epoch": 0.16080488434087195, "grad_norm": 1.015625, "learning_rate": 7.278949807577016e-05, "loss": 1.0495, "step": 1020 }, { "epoch": 0.16096253618826498, "grad_norm": 1.0234375, "learning_rate": 7.278506807427364e-05, "loss": 1.1386, "step": 1021 }, { "epoch": 0.16112018803565797, "grad_norm": 0.8984375, "learning_rate": 7.278063813045758e-05, "loss": 1.3057, "step": 1022 }, { "epoch": 0.161277839883051, "grad_norm": 0.89453125, "learning_rate": 7.277620824433143e-05, "loss": 1.5924, "step": 1023 }, { "epoch": 0.161435491730444, "grad_norm": 1.015625, "learning_rate": 7.27717784159045e-05, "loss": 1.1775, "step": 1024 }, { "epoch": 0.16159314357783702, "grad_norm": 0.86328125, "learning_rate": 7.276734864518626e-05, "loss": 1.2048, "step": 1025 }, { "epoch": 0.16175079542523002, "grad_norm": 0.9765625, "learning_rate": 7.276291893218607e-05, "loss": 1.0168, "step": 1026 }, { "epoch": 0.16190844727262305, "grad_norm": 1.015625, "learning_rate": 7.275848927691333e-05, "loss": 1.4225, "step": 1027 }, { "epoch": 0.16206609912001604, "grad_norm": 0.90625, "learning_rate": 7.275405967937741e-05, "loss": 1.051, "step": 1028 }, { "epoch": 0.16222375096740907, "grad_norm": 0.89453125, "learning_rate": 7.274963013958765e-05, "loss": 1.2224, "step": 1029 }, { "epoch": 0.16238140281480207, "grad_norm": 0.94140625, "learning_rate": 7.274520065755354e-05, "loss": 1.0428, "step": 1030 }, { "epoch": 0.1625390546621951, "grad_norm": 1.0625, "learning_rate": 7.274077123328443e-05, "loss": 1.3585, "step": 1031 }, { "epoch": 0.1626967065095881, "grad_norm": 0.96484375, "learning_rate": 7.273634186678968e-05, "loss": 1.1712, "step": 1032 }, { "epoch": 0.16285435835698112, "grad_norm": 0.95703125, "learning_rate": 7.27319125580787e-05, "loss": 1.1267, "step": 1033 }, { "epoch": 0.1630120102043741, "grad_norm": 0.94140625, "learning_rate": 7.272748330716084e-05, "loss": 1.265, "step": 1034 }, { "epoch": 0.16316966205176714, "grad_norm": 1.0, "learning_rate": 7.272305411404554e-05, "loss": 1.1168, "step": 1035 }, { "epoch": 0.16332731389916014, "grad_norm": 0.875, "learning_rate": 7.271862497874218e-05, "loss": 1.0935, "step": 1036 }, { "epoch": 0.16348496574655316, "grad_norm": 0.97265625, "learning_rate": 7.271419590126015e-05, "loss": 1.4416, "step": 1037 }, { "epoch": 0.16364261759394616, "grad_norm": 0.90625, "learning_rate": 7.27097668816088e-05, "loss": 0.9698, "step": 1038 }, { "epoch": 0.16380026944133919, "grad_norm": 1.0390625, "learning_rate": 7.270533791979752e-05, "loss": 1.1774, "step": 1039 }, { "epoch": 0.16395792128873218, "grad_norm": 0.8828125, "learning_rate": 7.270090901583574e-05, "loss": 1.0788, "step": 1040 }, { "epoch": 0.1641155731361252, "grad_norm": 0.9453125, "learning_rate": 7.269648016973283e-05, "loss": 1.1205, "step": 1041 }, { "epoch": 0.1642732249835182, "grad_norm": 0.99609375, "learning_rate": 7.269205138149818e-05, "loss": 1.097, "step": 1042 }, { "epoch": 0.16443087683091123, "grad_norm": 0.92578125, "learning_rate": 7.268762265114114e-05, "loss": 1.0903, "step": 1043 }, { "epoch": 0.16458852867830423, "grad_norm": 0.96875, "learning_rate": 7.268319397867112e-05, "loss": 1.2038, "step": 1044 }, { "epoch": 0.16474618052569726, "grad_norm": 0.95703125, "learning_rate": 7.267876536409752e-05, "loss": 1.2062, "step": 1045 }, { "epoch": 0.16490383237309025, "grad_norm": 1.0, "learning_rate": 7.267433680742973e-05, "loss": 1.156, "step": 1046 }, { "epoch": 0.16506148422048328, "grad_norm": 1.046875, "learning_rate": 7.266990830867708e-05, "loss": 1.3046, "step": 1047 }, { "epoch": 0.16521913606787628, "grad_norm": 0.9296875, "learning_rate": 7.266547986784904e-05, "loss": 1.0482, "step": 1048 }, { "epoch": 0.1653767879152693, "grad_norm": 0.90234375, "learning_rate": 7.266105148495492e-05, "loss": 1.1674, "step": 1049 }, { "epoch": 0.1655344397626623, "grad_norm": 0.95703125, "learning_rate": 7.265662316000415e-05, "loss": 1.2059, "step": 1050 }, { "epoch": 0.16569209161005533, "grad_norm": 0.96484375, "learning_rate": 7.265219489300612e-05, "loss": 1.4553, "step": 1051 }, { "epoch": 0.16584974345744832, "grad_norm": 0.87109375, "learning_rate": 7.26477666839702e-05, "loss": 1.0725, "step": 1052 }, { "epoch": 0.16600739530484135, "grad_norm": 1.0390625, "learning_rate": 7.264333853290577e-05, "loss": 1.2494, "step": 1053 }, { "epoch": 0.16616504715223435, "grad_norm": 0.98046875, "learning_rate": 7.263891043982218e-05, "loss": 1.1896, "step": 1054 }, { "epoch": 0.16632269899962737, "grad_norm": 0.96484375, "learning_rate": 7.26344824047289e-05, "loss": 1.047, "step": 1055 }, { "epoch": 0.16648035084702037, "grad_norm": 0.9921875, "learning_rate": 7.263005442763526e-05, "loss": 1.2455, "step": 1056 }, { "epoch": 0.1666380026944134, "grad_norm": 1.0078125, "learning_rate": 7.262562650855068e-05, "loss": 1.0922, "step": 1057 }, { "epoch": 0.1667956545418064, "grad_norm": 0.9765625, "learning_rate": 7.262119864748448e-05, "loss": 0.9815, "step": 1058 }, { "epoch": 0.16695330638919942, "grad_norm": 0.95703125, "learning_rate": 7.261677084444606e-05, "loss": 1.1978, "step": 1059 }, { "epoch": 0.16711095823659242, "grad_norm": 0.9765625, "learning_rate": 7.26123430994449e-05, "loss": 1.1045, "step": 1060 }, { "epoch": 0.16726861008398544, "grad_norm": 0.87109375, "learning_rate": 7.260791541249028e-05, "loss": 1.0743, "step": 1061 }, { "epoch": 0.16742626193137844, "grad_norm": 1.046875, "learning_rate": 7.260348778359163e-05, "loss": 1.1904, "step": 1062 }, { "epoch": 0.16758391377877146, "grad_norm": 0.92578125, "learning_rate": 7.259906021275831e-05, "loss": 1.0195, "step": 1063 }, { "epoch": 0.16774156562616446, "grad_norm": 1.125, "learning_rate": 7.259463269999971e-05, "loss": 1.0266, "step": 1064 }, { "epoch": 0.1678992174735575, "grad_norm": 0.953125, "learning_rate": 7.259020524532523e-05, "loss": 1.3445, "step": 1065 }, { "epoch": 0.16805686932095049, "grad_norm": 0.97265625, "learning_rate": 7.258577784874423e-05, "loss": 1.3835, "step": 1066 }, { "epoch": 0.1682145211683435, "grad_norm": 0.96484375, "learning_rate": 7.258135051026612e-05, "loss": 1.1523, "step": 1067 }, { "epoch": 0.1683721730157365, "grad_norm": 1.0859375, "learning_rate": 7.257692322990028e-05, "loss": 1.2187, "step": 1068 }, { "epoch": 0.16852982486312953, "grad_norm": 1.03125, "learning_rate": 7.257249600765606e-05, "loss": 1.096, "step": 1069 }, { "epoch": 0.16868747671052253, "grad_norm": 0.98046875, "learning_rate": 7.256806884354289e-05, "loss": 1.1944, "step": 1070 }, { "epoch": 0.16884512855791556, "grad_norm": 0.8515625, "learning_rate": 7.256364173757011e-05, "loss": 1.1216, "step": 1071 }, { "epoch": 0.16900278040530856, "grad_norm": 0.90625, "learning_rate": 7.255921468974713e-05, "loss": 1.1106, "step": 1072 }, { "epoch": 0.16916043225270158, "grad_norm": 0.953125, "learning_rate": 7.255478770008335e-05, "loss": 1.2976, "step": 1073 }, { "epoch": 0.16931808410009458, "grad_norm": 1.0234375, "learning_rate": 7.255036076858806e-05, "loss": 1.067, "step": 1074 }, { "epoch": 0.1694757359474876, "grad_norm": 1.0078125, "learning_rate": 7.254593389527079e-05, "loss": 1.1403, "step": 1075 }, { "epoch": 0.1696333877948806, "grad_norm": 0.88671875, "learning_rate": 7.25415070801408e-05, "loss": 0.9739, "step": 1076 }, { "epoch": 0.16979103964227363, "grad_norm": 1.984375, "learning_rate": 7.253708032320754e-05, "loss": 1.2047, "step": 1077 }, { "epoch": 0.16994869148966663, "grad_norm": 0.99609375, "learning_rate": 7.253265362448036e-05, "loss": 1.307, "step": 1078 }, { "epoch": 0.17010634333705965, "grad_norm": 0.91015625, "learning_rate": 7.252822698396866e-05, "loss": 1.0295, "step": 1079 }, { "epoch": 0.17026399518445265, "grad_norm": 0.89453125, "learning_rate": 7.252380040168177e-05, "loss": 1.2777, "step": 1080 }, { "epoch": 0.17042164703184567, "grad_norm": 0.94921875, "learning_rate": 7.251937387762913e-05, "loss": 1.3088, "step": 1081 }, { "epoch": 0.17057929887923867, "grad_norm": 0.9296875, "learning_rate": 7.251494741182014e-05, "loss": 1.3712, "step": 1082 }, { "epoch": 0.1707369507266317, "grad_norm": 1.0625, "learning_rate": 7.251052100426413e-05, "loss": 1.1478, "step": 1083 }, { "epoch": 0.1708946025740247, "grad_norm": 0.98046875, "learning_rate": 7.250609465497051e-05, "loss": 1.2346, "step": 1084 }, { "epoch": 0.17105225442141772, "grad_norm": 0.9375, "learning_rate": 7.250166836394861e-05, "loss": 0.996, "step": 1085 }, { "epoch": 0.17120990626881072, "grad_norm": 0.9921875, "learning_rate": 7.249724213120787e-05, "loss": 1.2197, "step": 1086 }, { "epoch": 0.17136755811620374, "grad_norm": 1.0703125, "learning_rate": 7.249281595675768e-05, "loss": 1.2911, "step": 1087 }, { "epoch": 0.17152520996359674, "grad_norm": 1.0078125, "learning_rate": 7.24883898406074e-05, "loss": 1.1625, "step": 1088 }, { "epoch": 0.17168286181098977, "grad_norm": 0.95703125, "learning_rate": 7.248396378276638e-05, "loss": 1.1839, "step": 1089 }, { "epoch": 0.17184051365838277, "grad_norm": 0.9765625, "learning_rate": 7.247953778324401e-05, "loss": 1.0299, "step": 1090 }, { "epoch": 0.1719981655057758, "grad_norm": 0.80859375, "learning_rate": 7.247511184204971e-05, "loss": 0.9532, "step": 1091 }, { "epoch": 0.1721558173531688, "grad_norm": 0.953125, "learning_rate": 7.247068595919285e-05, "loss": 1.1673, "step": 1092 }, { "epoch": 0.17231346920056181, "grad_norm": 1.0078125, "learning_rate": 7.246626013468279e-05, "loss": 1.292, "step": 1093 }, { "epoch": 0.1724711210479548, "grad_norm": 0.98828125, "learning_rate": 7.246183436852891e-05, "loss": 1.3648, "step": 1094 }, { "epoch": 0.17262877289534784, "grad_norm": 0.86328125, "learning_rate": 7.245740866074058e-05, "loss": 1.0402, "step": 1095 }, { "epoch": 0.17278642474274086, "grad_norm": 0.9921875, "learning_rate": 7.245298301132721e-05, "loss": 1.4757, "step": 1096 }, { "epoch": 0.17294407659013386, "grad_norm": 0.90234375, "learning_rate": 7.244855742029819e-05, "loss": 1.3013, "step": 1097 }, { "epoch": 0.17310172843752689, "grad_norm": 0.91015625, "learning_rate": 7.244413188766286e-05, "loss": 1.0135, "step": 1098 }, { "epoch": 0.17325938028491988, "grad_norm": 1.09375, "learning_rate": 7.243970641343063e-05, "loss": 1.1352, "step": 1099 }, { "epoch": 0.1734170321323129, "grad_norm": 0.87109375, "learning_rate": 7.243528099761082e-05, "loss": 1.1853, "step": 1100 }, { "epoch": 0.1735746839797059, "grad_norm": 0.9609375, "learning_rate": 7.243085564021291e-05, "loss": 0.8484, "step": 1101 }, { "epoch": 0.17373233582709893, "grad_norm": 1.046875, "learning_rate": 7.242643034124621e-05, "loss": 1.1056, "step": 1102 }, { "epoch": 0.17388998767449193, "grad_norm": 1.1171875, "learning_rate": 7.242200510072012e-05, "loss": 1.217, "step": 1103 }, { "epoch": 0.17404763952188496, "grad_norm": 0.98828125, "learning_rate": 7.241757991864401e-05, "loss": 1.4531, "step": 1104 }, { "epoch": 0.17420529136927795, "grad_norm": 0.9453125, "learning_rate": 7.241315479502722e-05, "loss": 0.9688, "step": 1105 }, { "epoch": 0.17436294321667098, "grad_norm": 0.9765625, "learning_rate": 7.240872972987923e-05, "loss": 1.0284, "step": 1106 }, { "epoch": 0.17452059506406398, "grad_norm": 0.984375, "learning_rate": 7.240430472320933e-05, "loss": 1.0915, "step": 1107 }, { "epoch": 0.174678246911457, "grad_norm": 0.96875, "learning_rate": 7.239987977502695e-05, "loss": 1.1627, "step": 1108 }, { "epoch": 0.17483589875885, "grad_norm": 0.97265625, "learning_rate": 7.239545488534146e-05, "loss": 1.0793, "step": 1109 }, { "epoch": 0.17499355060624303, "grad_norm": 0.8984375, "learning_rate": 7.23910300541622e-05, "loss": 1.1779, "step": 1110 }, { "epoch": 0.17515120245363602, "grad_norm": 0.83203125, "learning_rate": 7.238660528149857e-05, "loss": 0.9639, "step": 1111 }, { "epoch": 0.17530885430102905, "grad_norm": 0.921875, "learning_rate": 7.238218056735997e-05, "loss": 1.2617, "step": 1112 }, { "epoch": 0.17546650614842205, "grad_norm": 0.921875, "learning_rate": 7.237775591175574e-05, "loss": 1.0418, "step": 1113 }, { "epoch": 0.17562415799581507, "grad_norm": 0.796875, "learning_rate": 7.237333131469528e-05, "loss": 1.0377, "step": 1114 }, { "epoch": 0.17578180984320807, "grad_norm": 0.91015625, "learning_rate": 7.236890677618796e-05, "loss": 1.144, "step": 1115 }, { "epoch": 0.1759394616906011, "grad_norm": 1.0859375, "learning_rate": 7.236448229624317e-05, "loss": 1.1276, "step": 1116 }, { "epoch": 0.1760971135379941, "grad_norm": 0.94921875, "learning_rate": 7.23600578748703e-05, "loss": 0.9916, "step": 1117 }, { "epoch": 0.17625476538538712, "grad_norm": 0.94140625, "learning_rate": 7.23556335120787e-05, "loss": 1.2311, "step": 1118 }, { "epoch": 0.17641241723278012, "grad_norm": 0.94921875, "learning_rate": 7.235120920787776e-05, "loss": 1.0904, "step": 1119 }, { "epoch": 0.17657006908017314, "grad_norm": 1.046875, "learning_rate": 7.234678496227681e-05, "loss": 1.1721, "step": 1120 }, { "epoch": 0.17672772092756614, "grad_norm": 1.015625, "learning_rate": 7.234236077528529e-05, "loss": 1.1207, "step": 1121 }, { "epoch": 0.17688537277495917, "grad_norm": 0.99609375, "learning_rate": 7.233793664691257e-05, "loss": 1.1241, "step": 1122 }, { "epoch": 0.17704302462235216, "grad_norm": 0.91015625, "learning_rate": 7.233351257716801e-05, "loss": 1.1628, "step": 1123 }, { "epoch": 0.1772006764697452, "grad_norm": 0.90234375, "learning_rate": 7.2329088566061e-05, "loss": 1.0504, "step": 1124 }, { "epoch": 0.1773583283171382, "grad_norm": 1.0703125, "learning_rate": 7.232466461360084e-05, "loss": 1.1426, "step": 1125 }, { "epoch": 0.1775159801645312, "grad_norm": 0.97265625, "learning_rate": 7.232024071979704e-05, "loss": 1.2198, "step": 1126 }, { "epoch": 0.1776736320119242, "grad_norm": 0.97265625, "learning_rate": 7.231581688465889e-05, "loss": 1.3697, "step": 1127 }, { "epoch": 0.17783128385931724, "grad_norm": 0.98828125, "learning_rate": 7.23113931081958e-05, "loss": 1.145, "step": 1128 }, { "epoch": 0.17798893570671023, "grad_norm": 0.92578125, "learning_rate": 7.23069693904171e-05, "loss": 1.058, "step": 1129 }, { "epoch": 0.17814658755410326, "grad_norm": 0.9765625, "learning_rate": 7.23025457313322e-05, "loss": 1.108, "step": 1130 }, { "epoch": 0.17830423940149626, "grad_norm": 0.95703125, "learning_rate": 7.229812213095049e-05, "loss": 1.0761, "step": 1131 }, { "epoch": 0.17846189124888928, "grad_norm": 1.0390625, "learning_rate": 7.229369858928132e-05, "loss": 1.1769, "step": 1132 }, { "epoch": 0.17861954309628228, "grad_norm": 1.015625, "learning_rate": 7.228927510633405e-05, "loss": 1.1977, "step": 1133 }, { "epoch": 0.1787771949436753, "grad_norm": 0.93359375, "learning_rate": 7.22848516821181e-05, "loss": 0.9682, "step": 1134 }, { "epoch": 0.1789348467910683, "grad_norm": 1.140625, "learning_rate": 7.22804283166428e-05, "loss": 1.321, "step": 1135 }, { "epoch": 0.17909249863846133, "grad_norm": 0.86328125, "learning_rate": 7.227600500991758e-05, "loss": 0.9646, "step": 1136 }, { "epoch": 0.17925015048585433, "grad_norm": 0.8671875, "learning_rate": 7.227158176195176e-05, "loss": 1.1262, "step": 1137 }, { "epoch": 0.17940780233324735, "grad_norm": 0.9765625, "learning_rate": 7.226715857275477e-05, "loss": 1.053, "step": 1138 }, { "epoch": 0.17956545418064035, "grad_norm": 1.125, "learning_rate": 7.226273544233593e-05, "loss": 1.2967, "step": 1139 }, { "epoch": 0.17972310602803337, "grad_norm": 0.890625, "learning_rate": 7.225831237070459e-05, "loss": 0.987, "step": 1140 }, { "epoch": 0.17988075787542637, "grad_norm": 0.890625, "learning_rate": 7.225388935787023e-05, "loss": 0.947, "step": 1141 }, { "epoch": 0.1800384097228194, "grad_norm": 0.9140625, "learning_rate": 7.224946640384216e-05, "loss": 1.0412, "step": 1142 }, { "epoch": 0.1801960615702124, "grad_norm": 0.9453125, "learning_rate": 7.224504350862976e-05, "loss": 1.1626, "step": 1143 }, { "epoch": 0.18035371341760542, "grad_norm": 1.0078125, "learning_rate": 7.22406206722424e-05, "loss": 1.1337, "step": 1144 }, { "epoch": 0.18051136526499842, "grad_norm": 0.9296875, "learning_rate": 7.223619789468942e-05, "loss": 1.0724, "step": 1145 }, { "epoch": 0.18066901711239144, "grad_norm": 0.93359375, "learning_rate": 7.223177517598027e-05, "loss": 1.1174, "step": 1146 }, { "epoch": 0.18082666895978444, "grad_norm": 0.96875, "learning_rate": 7.22273525161243e-05, "loss": 1.1967, "step": 1147 }, { "epoch": 0.18098432080717747, "grad_norm": 1.09375, "learning_rate": 7.222292991513085e-05, "loss": 1.2996, "step": 1148 }, { "epoch": 0.18114197265457047, "grad_norm": 1.046875, "learning_rate": 7.221850737300932e-05, "loss": 1.1696, "step": 1149 }, { "epoch": 0.1812996245019635, "grad_norm": 1.03125, "learning_rate": 7.221408488976904e-05, "loss": 1.1977, "step": 1150 }, { "epoch": 0.1814572763493565, "grad_norm": 0.9140625, "learning_rate": 7.220966246541946e-05, "loss": 0.9373, "step": 1151 }, { "epoch": 0.18161492819674951, "grad_norm": 0.97265625, "learning_rate": 7.220524009996989e-05, "loss": 1.2118, "step": 1152 }, { "epoch": 0.1817725800441425, "grad_norm": 0.96484375, "learning_rate": 7.220081779342973e-05, "loss": 1.1216, "step": 1153 }, { "epoch": 0.18193023189153554, "grad_norm": 0.9453125, "learning_rate": 7.219639554580836e-05, "loss": 1.1091, "step": 1154 }, { "epoch": 0.18208788373892854, "grad_norm": 0.91015625, "learning_rate": 7.219197335711512e-05, "loss": 1.0109, "step": 1155 }, { "epoch": 0.18224553558632156, "grad_norm": 0.87890625, "learning_rate": 7.218755122735943e-05, "loss": 1.0602, "step": 1156 }, { "epoch": 0.18240318743371456, "grad_norm": 0.8046875, "learning_rate": 7.21831291565506e-05, "loss": 0.9741, "step": 1157 }, { "epoch": 0.18256083928110758, "grad_norm": 0.9453125, "learning_rate": 7.217870714469808e-05, "loss": 1.1194, "step": 1158 }, { "epoch": 0.18271849112850058, "grad_norm": 1.0234375, "learning_rate": 7.217428519181118e-05, "loss": 1.416, "step": 1159 }, { "epoch": 0.1828761429758936, "grad_norm": 0.94921875, "learning_rate": 7.216986329789925e-05, "loss": 1.0331, "step": 1160 }, { "epoch": 0.1830337948232866, "grad_norm": 0.8125, "learning_rate": 7.216544146297177e-05, "loss": 1.0492, "step": 1161 }, { "epoch": 0.18319144667067963, "grad_norm": 0.859375, "learning_rate": 7.216101968703801e-05, "loss": 1.0186, "step": 1162 }, { "epoch": 0.18334909851807263, "grad_norm": 1.015625, "learning_rate": 7.215659797010741e-05, "loss": 1.1807, "step": 1163 }, { "epoch": 0.18350675036546565, "grad_norm": 0.97265625, "learning_rate": 7.215217631218929e-05, "loss": 1.1883, "step": 1164 }, { "epoch": 0.18366440221285865, "grad_norm": 0.9140625, "learning_rate": 7.2147754713293e-05, "loss": 1.0969, "step": 1165 }, { "epoch": 0.18382205406025168, "grad_norm": 0.9609375, "learning_rate": 7.214333317342799e-05, "loss": 1.0287, "step": 1166 }, { "epoch": 0.18397970590764468, "grad_norm": 0.98828125, "learning_rate": 7.21389116926036e-05, "loss": 1.0993, "step": 1167 }, { "epoch": 0.1841373577550377, "grad_norm": 1.0625, "learning_rate": 7.21344902708292e-05, "loss": 1.1584, "step": 1168 }, { "epoch": 0.1842950096024307, "grad_norm": 0.9296875, "learning_rate": 7.213006890811415e-05, "loss": 1.0319, "step": 1169 }, { "epoch": 0.18445266144982372, "grad_norm": 0.85546875, "learning_rate": 7.212564760446778e-05, "loss": 0.9684, "step": 1170 }, { "epoch": 0.18461031329721672, "grad_norm": 0.984375, "learning_rate": 7.212122635989956e-05, "loss": 1.1257, "step": 1171 }, { "epoch": 0.18476796514460975, "grad_norm": 0.8984375, "learning_rate": 7.211680517441878e-05, "loss": 1.0532, "step": 1172 }, { "epoch": 0.18492561699200274, "grad_norm": 0.94140625, "learning_rate": 7.211238404803486e-05, "loss": 1.1364, "step": 1173 }, { "epoch": 0.18508326883939577, "grad_norm": 1.015625, "learning_rate": 7.210796298075715e-05, "loss": 1.1933, "step": 1174 }, { "epoch": 0.18524092068678877, "grad_norm": 0.8203125, "learning_rate": 7.210354197259499e-05, "loss": 1.0786, "step": 1175 }, { "epoch": 0.1853985725341818, "grad_norm": 0.92578125, "learning_rate": 7.209912102355781e-05, "loss": 1.167, "step": 1176 }, { "epoch": 0.1855562243815748, "grad_norm": 1.0078125, "learning_rate": 7.209470013365494e-05, "loss": 1.2949, "step": 1177 }, { "epoch": 0.18571387622896782, "grad_norm": 0.9296875, "learning_rate": 7.209027930289575e-05, "loss": 1.1454, "step": 1178 }, { "epoch": 0.18587152807636081, "grad_norm": 0.9609375, "learning_rate": 7.208585853128962e-05, "loss": 1.192, "step": 1179 }, { "epoch": 0.18602917992375384, "grad_norm": 0.95703125, "learning_rate": 7.208143781884589e-05, "loss": 1.1754, "step": 1180 }, { "epoch": 0.18618683177114684, "grad_norm": 0.921875, "learning_rate": 7.207701716557398e-05, "loss": 0.8474, "step": 1181 }, { "epoch": 0.18634448361853986, "grad_norm": 0.85546875, "learning_rate": 7.207259657148324e-05, "loss": 0.9793, "step": 1182 }, { "epoch": 0.18650213546593286, "grad_norm": 1.2578125, "learning_rate": 7.206817603658304e-05, "loss": 1.1666, "step": 1183 }, { "epoch": 0.1866597873133259, "grad_norm": 0.890625, "learning_rate": 7.206375556088276e-05, "loss": 1.1172, "step": 1184 }, { "epoch": 0.18681743916071888, "grad_norm": 1.015625, "learning_rate": 7.205933514439169e-05, "loss": 1.0309, "step": 1185 }, { "epoch": 0.1869750910081119, "grad_norm": 0.9453125, "learning_rate": 7.20549147871193e-05, "loss": 0.9983, "step": 1186 }, { "epoch": 0.1871327428555049, "grad_norm": 0.9453125, "learning_rate": 7.205049448907492e-05, "loss": 1.1514, "step": 1187 }, { "epoch": 0.18729039470289793, "grad_norm": 0.8203125, "learning_rate": 7.204607425026794e-05, "loss": 0.9441, "step": 1188 }, { "epoch": 0.18744804655029093, "grad_norm": 0.9375, "learning_rate": 7.204165407070771e-05, "loss": 1.0981, "step": 1189 }, { "epoch": 0.18760569839768396, "grad_norm": 0.90234375, "learning_rate": 7.203723395040354e-05, "loss": 1.0558, "step": 1190 }, { "epoch": 0.18776335024507695, "grad_norm": 1.0625, "learning_rate": 7.20328138893649e-05, "loss": 1.4519, "step": 1191 }, { "epoch": 0.18792100209246998, "grad_norm": 1.0234375, "learning_rate": 7.202839388760109e-05, "loss": 1.4529, "step": 1192 }, { "epoch": 0.18807865393986298, "grad_norm": 0.90625, "learning_rate": 7.202397394512153e-05, "loss": 0.9502, "step": 1193 }, { "epoch": 0.188236305787256, "grad_norm": 0.8828125, "learning_rate": 7.201955406193556e-05, "loss": 0.9474, "step": 1194 }, { "epoch": 0.188393957634649, "grad_norm": 1.046875, "learning_rate": 7.201513423805251e-05, "loss": 1.2527, "step": 1195 }, { "epoch": 0.18855160948204203, "grad_norm": 0.93359375, "learning_rate": 7.201071447348181e-05, "loss": 1.0572, "step": 1196 }, { "epoch": 0.18870926132943502, "grad_norm": 1.0078125, "learning_rate": 7.20062947682328e-05, "loss": 1.154, "step": 1197 }, { "epoch": 0.18886691317682805, "grad_norm": 0.84375, "learning_rate": 7.200187512231484e-05, "loss": 0.9587, "step": 1198 }, { "epoch": 0.18902456502422105, "grad_norm": 0.98828125, "learning_rate": 7.199745553573733e-05, "loss": 1.0023, "step": 1199 }, { "epoch": 0.18918221687161407, "grad_norm": 0.97265625, "learning_rate": 7.199303600850956e-05, "loss": 1.2248, "step": 1200 }, { "epoch": 0.18933986871900707, "grad_norm": 0.98828125, "learning_rate": 7.198861654064097e-05, "loss": 1.1428, "step": 1201 }, { "epoch": 0.1894975205664001, "grad_norm": 0.9296875, "learning_rate": 7.198419713214093e-05, "loss": 1.2118, "step": 1202 }, { "epoch": 0.1896551724137931, "grad_norm": 0.9375, "learning_rate": 7.197977778301876e-05, "loss": 1.0747, "step": 1203 }, { "epoch": 0.18981282426118612, "grad_norm": 0.90234375, "learning_rate": 7.197535849328387e-05, "loss": 1.013, "step": 1204 }, { "epoch": 0.18997047610857912, "grad_norm": 0.9296875, "learning_rate": 7.197093926294555e-05, "loss": 1.0271, "step": 1205 }, { "epoch": 0.19012812795597214, "grad_norm": 0.92578125, "learning_rate": 7.196652009201327e-05, "loss": 1.0971, "step": 1206 }, { "epoch": 0.19028577980336514, "grad_norm": 0.86328125, "learning_rate": 7.196210098049636e-05, "loss": 1.1681, "step": 1207 }, { "epoch": 0.19044343165075817, "grad_norm": 0.9296875, "learning_rate": 7.195768192840417e-05, "loss": 1.1135, "step": 1208 }, { "epoch": 0.19060108349815116, "grad_norm": 1.0078125, "learning_rate": 7.195326293574607e-05, "loss": 1.1038, "step": 1209 }, { "epoch": 0.1907587353455442, "grad_norm": 1.09375, "learning_rate": 7.194884400253137e-05, "loss": 1.2848, "step": 1210 }, { "epoch": 0.1909163871929372, "grad_norm": 1.0390625, "learning_rate": 7.194442512876954e-05, "loss": 1.205, "step": 1211 }, { "epoch": 0.1910740390403302, "grad_norm": 0.90234375, "learning_rate": 7.194000631446991e-05, "loss": 1.0155, "step": 1212 }, { "epoch": 0.1912316908877232, "grad_norm": 0.8828125, "learning_rate": 7.193558755964183e-05, "loss": 1.2083, "step": 1213 }, { "epoch": 0.19138934273511624, "grad_norm": 0.9453125, "learning_rate": 7.193116886429466e-05, "loss": 1.2266, "step": 1214 }, { "epoch": 0.19154699458250923, "grad_norm": 0.921875, "learning_rate": 7.192675022843774e-05, "loss": 0.9782, "step": 1215 }, { "epoch": 0.19170464642990226, "grad_norm": 0.875, "learning_rate": 7.192233165208051e-05, "loss": 1.2995, "step": 1216 }, { "epoch": 0.19186229827729526, "grad_norm": 1.0234375, "learning_rate": 7.191791313523229e-05, "loss": 1.3943, "step": 1217 }, { "epoch": 0.19201995012468828, "grad_norm": 0.92578125, "learning_rate": 7.191349467790241e-05, "loss": 1.1526, "step": 1218 }, { "epoch": 0.19217760197208128, "grad_norm": 0.94140625, "learning_rate": 7.190907628010031e-05, "loss": 0.9935, "step": 1219 }, { "epoch": 0.1923352538194743, "grad_norm": 1.0390625, "learning_rate": 7.190465794183531e-05, "loss": 1.1115, "step": 1220 }, { "epoch": 0.1924929056668673, "grad_norm": 0.9453125, "learning_rate": 7.190023966311676e-05, "loss": 1.176, "step": 1221 }, { "epoch": 0.19265055751426033, "grad_norm": 1.078125, "learning_rate": 7.189582144395408e-05, "loss": 1.0479, "step": 1222 }, { "epoch": 0.19280820936165333, "grad_norm": 1.0078125, "learning_rate": 7.18914032843566e-05, "loss": 1.1806, "step": 1223 }, { "epoch": 0.19296586120904635, "grad_norm": 0.93359375, "learning_rate": 7.188698518433367e-05, "loss": 0.9646, "step": 1224 }, { "epoch": 0.19312351305643935, "grad_norm": 0.94140625, "learning_rate": 7.188256714389467e-05, "loss": 1.0893, "step": 1225 }, { "epoch": 0.19328116490383238, "grad_norm": 0.8515625, "learning_rate": 7.187814916304893e-05, "loss": 0.9054, "step": 1226 }, { "epoch": 0.19343881675122537, "grad_norm": 0.828125, "learning_rate": 7.187373124180587e-05, "loss": 0.9952, "step": 1227 }, { "epoch": 0.1935964685986184, "grad_norm": 0.9296875, "learning_rate": 7.186931338017484e-05, "loss": 1.1739, "step": 1228 }, { "epoch": 0.1937541204460114, "grad_norm": 0.98828125, "learning_rate": 7.18648955781652e-05, "loss": 1.3445, "step": 1229 }, { "epoch": 0.19391177229340442, "grad_norm": 0.99609375, "learning_rate": 7.18604778357863e-05, "loss": 1.1383, "step": 1230 }, { "epoch": 0.19406942414079742, "grad_norm": 0.84375, "learning_rate": 7.185606015304747e-05, "loss": 0.8533, "step": 1231 }, { "epoch": 0.19422707598819045, "grad_norm": 1.03125, "learning_rate": 7.185164252995814e-05, "loss": 1.1167, "step": 1232 }, { "epoch": 0.19438472783558344, "grad_norm": 0.97265625, "learning_rate": 7.184722496652764e-05, "loss": 1.1353, "step": 1233 }, { "epoch": 0.19454237968297647, "grad_norm": 0.9140625, "learning_rate": 7.184280746276537e-05, "loss": 1.1203, "step": 1234 }, { "epoch": 0.19470003153036947, "grad_norm": 0.90625, "learning_rate": 7.183839001868064e-05, "loss": 0.9602, "step": 1235 }, { "epoch": 0.1948576833777625, "grad_norm": 0.83984375, "learning_rate": 7.183397263428281e-05, "loss": 1.0512, "step": 1236 }, { "epoch": 0.1950153352251555, "grad_norm": 0.9609375, "learning_rate": 7.182955530958127e-05, "loss": 1.2018, "step": 1237 }, { "epoch": 0.19517298707254851, "grad_norm": 0.91015625, "learning_rate": 7.182513804458539e-05, "loss": 1.0852, "step": 1238 }, { "epoch": 0.1953306389199415, "grad_norm": 1.0, "learning_rate": 7.182072083930453e-05, "loss": 1.1473, "step": 1239 }, { "epoch": 0.19548829076733454, "grad_norm": 1.0390625, "learning_rate": 7.181630369374804e-05, "loss": 1.0743, "step": 1240 }, { "epoch": 0.19564594261472754, "grad_norm": 1.015625, "learning_rate": 7.181188660792525e-05, "loss": 1.13, "step": 1241 }, { "epoch": 0.19580359446212056, "grad_norm": 0.8515625, "learning_rate": 7.180746958184559e-05, "loss": 0.9628, "step": 1242 }, { "epoch": 0.19596124630951356, "grad_norm": 0.97265625, "learning_rate": 7.180305261551839e-05, "loss": 1.1122, "step": 1243 }, { "epoch": 0.19611889815690658, "grad_norm": 0.87890625, "learning_rate": 7.1798635708953e-05, "loss": 0.9681, "step": 1244 }, { "epoch": 0.19627655000429958, "grad_norm": 1.0078125, "learning_rate": 7.17942188621588e-05, "loss": 1.0186, "step": 1245 }, { "epoch": 0.1964342018516926, "grad_norm": 0.90625, "learning_rate": 7.178980207514507e-05, "loss": 1.2402, "step": 1246 }, { "epoch": 0.1965918536990856, "grad_norm": 0.98046875, "learning_rate": 7.178538534792132e-05, "loss": 1.0631, "step": 1247 }, { "epoch": 0.19674950554647863, "grad_norm": 0.90625, "learning_rate": 7.178096868049681e-05, "loss": 1.0162, "step": 1248 }, { "epoch": 0.19690715739387163, "grad_norm": 0.96875, "learning_rate": 7.177655207288094e-05, "loss": 1.3358, "step": 1249 }, { "epoch": 0.19706480924126465, "grad_norm": 0.91015625, "learning_rate": 7.177213552508303e-05, "loss": 1.1617, "step": 1250 }, { "epoch": 0.19722246108865765, "grad_norm": 0.8671875, "learning_rate": 7.176771903711246e-05, "loss": 0.9839, "step": 1251 }, { "epoch": 0.19738011293605068, "grad_norm": 1.0234375, "learning_rate": 7.176330260897862e-05, "loss": 1.2375, "step": 1252 }, { "epoch": 0.1975377647834437, "grad_norm": 0.95703125, "learning_rate": 7.175888624069083e-05, "loss": 1.1342, "step": 1253 }, { "epoch": 0.1976954166308367, "grad_norm": 0.8359375, "learning_rate": 7.175446993225848e-05, "loss": 1.0215, "step": 1254 }, { "epoch": 0.19785306847822973, "grad_norm": 0.88671875, "learning_rate": 7.175005368369092e-05, "loss": 1.0518, "step": 1255 }, { "epoch": 0.19801072032562272, "grad_norm": 0.953125, "learning_rate": 7.174563749499745e-05, "loss": 1.0539, "step": 1256 }, { "epoch": 0.19816837217301575, "grad_norm": 0.9453125, "learning_rate": 7.174122136618755e-05, "loss": 1.1794, "step": 1257 }, { "epoch": 0.19832602402040875, "grad_norm": 1.03125, "learning_rate": 7.17368052972705e-05, "loss": 0.9212, "step": 1258 }, { "epoch": 0.19848367586780177, "grad_norm": 0.94140625, "learning_rate": 7.173238928825568e-05, "loss": 1.1611, "step": 1259 }, { "epoch": 0.19864132771519477, "grad_norm": 1.2109375, "learning_rate": 7.172797333915244e-05, "loss": 1.164, "step": 1260 }, { "epoch": 0.1987989795625878, "grad_norm": 0.9296875, "learning_rate": 7.172355744997014e-05, "loss": 1.2847, "step": 1261 }, { "epoch": 0.1989566314099808, "grad_norm": 1.0234375, "learning_rate": 7.171914162071816e-05, "loss": 1.2166, "step": 1262 }, { "epoch": 0.19911428325737382, "grad_norm": 0.90625, "learning_rate": 7.171472585140583e-05, "loss": 1.0452, "step": 1263 }, { "epoch": 0.19927193510476682, "grad_norm": 0.88671875, "learning_rate": 7.171031014204253e-05, "loss": 1.0085, "step": 1264 }, { "epoch": 0.19942958695215984, "grad_norm": 0.9609375, "learning_rate": 7.17058944926376e-05, "loss": 1.3308, "step": 1265 }, { "epoch": 0.19958723879955284, "grad_norm": 0.85546875, "learning_rate": 7.170147890320038e-05, "loss": 1.0107, "step": 1266 }, { "epoch": 0.19974489064694587, "grad_norm": 0.984375, "learning_rate": 7.16970633737403e-05, "loss": 1.1441, "step": 1267 }, { "epoch": 0.19990254249433886, "grad_norm": 0.92578125, "learning_rate": 7.169264790426669e-05, "loss": 1.1446, "step": 1268 }, { "epoch": 0.2000601943417319, "grad_norm": 0.890625, "learning_rate": 7.168823249478886e-05, "loss": 1.1628, "step": 1269 }, { "epoch": 0.2002178461891249, "grad_norm": 0.96484375, "learning_rate": 7.168381714531623e-05, "loss": 1.1789, "step": 1270 }, { "epoch": 0.2003754980365179, "grad_norm": 1.015625, "learning_rate": 7.167940185585808e-05, "loss": 1.3069, "step": 1271 }, { "epoch": 0.2005331498839109, "grad_norm": 0.9296875, "learning_rate": 7.167498662642386e-05, "loss": 1.2018, "step": 1272 }, { "epoch": 0.20069080173130394, "grad_norm": 0.85546875, "learning_rate": 7.16705714570229e-05, "loss": 1.0384, "step": 1273 }, { "epoch": 0.20084845357869693, "grad_norm": 0.91015625, "learning_rate": 7.166615634766451e-05, "loss": 1.0935, "step": 1274 }, { "epoch": 0.20100610542608996, "grad_norm": 0.90234375, "learning_rate": 7.166174129835812e-05, "loss": 1.073, "step": 1275 }, { "epoch": 0.20116375727348296, "grad_norm": 0.8984375, "learning_rate": 7.1657326309113e-05, "loss": 1.1069, "step": 1276 }, { "epoch": 0.20132140912087598, "grad_norm": 0.921875, "learning_rate": 7.165291137993858e-05, "loss": 1.2457, "step": 1277 }, { "epoch": 0.20147906096826898, "grad_norm": 0.85546875, "learning_rate": 7.164849651084421e-05, "loss": 1.1289, "step": 1278 }, { "epoch": 0.201636712815662, "grad_norm": 0.8515625, "learning_rate": 7.164408170183923e-05, "loss": 0.9065, "step": 1279 }, { "epoch": 0.201794364663055, "grad_norm": 0.9375, "learning_rate": 7.1639666952933e-05, "loss": 1.2445, "step": 1280 }, { "epoch": 0.20195201651044803, "grad_norm": 0.85546875, "learning_rate": 7.163525226413483e-05, "loss": 0.938, "step": 1281 }, { "epoch": 0.20210966835784103, "grad_norm": 0.9296875, "learning_rate": 7.163083763545418e-05, "loss": 1.0402, "step": 1282 }, { "epoch": 0.20226732020523405, "grad_norm": 1.015625, "learning_rate": 7.162642306690033e-05, "loss": 1.1654, "step": 1283 }, { "epoch": 0.20242497205262705, "grad_norm": 0.96484375, "learning_rate": 7.162200855848264e-05, "loss": 1.2361, "step": 1284 }, { "epoch": 0.20258262390002008, "grad_norm": 0.9765625, "learning_rate": 7.161759411021048e-05, "loss": 1.1056, "step": 1285 }, { "epoch": 0.20274027574741307, "grad_norm": 1.0078125, "learning_rate": 7.16131797220932e-05, "loss": 0.9872, "step": 1286 }, { "epoch": 0.2028979275948061, "grad_norm": 0.9765625, "learning_rate": 7.160876539414018e-05, "loss": 1.2429, "step": 1287 }, { "epoch": 0.2030555794421991, "grad_norm": 0.94140625, "learning_rate": 7.160435112636078e-05, "loss": 0.9785, "step": 1288 }, { "epoch": 0.20321323128959212, "grad_norm": 1.3828125, "learning_rate": 7.159993691876431e-05, "loss": 1.296, "step": 1289 }, { "epoch": 0.20337088313698512, "grad_norm": 0.9375, "learning_rate": 7.159552277136016e-05, "loss": 1.0469, "step": 1290 }, { "epoch": 0.20352853498437815, "grad_norm": 0.9609375, "learning_rate": 7.159110868415763e-05, "loss": 1.2138, "step": 1291 }, { "epoch": 0.20368618683177114, "grad_norm": 0.875, "learning_rate": 7.158669465716617e-05, "loss": 1.1343, "step": 1292 }, { "epoch": 0.20384383867916417, "grad_norm": 1.0546875, "learning_rate": 7.158228069039507e-05, "loss": 1.0967, "step": 1293 }, { "epoch": 0.20400149052655717, "grad_norm": 1.109375, "learning_rate": 7.157786678385371e-05, "loss": 1.2791, "step": 1294 }, { "epoch": 0.2041591423739502, "grad_norm": 1.03125, "learning_rate": 7.157345293755147e-05, "loss": 1.3085, "step": 1295 }, { "epoch": 0.2043167942213432, "grad_norm": 0.86328125, "learning_rate": 7.15690391514976e-05, "loss": 0.8861, "step": 1296 }, { "epoch": 0.20447444606873622, "grad_norm": 0.9609375, "learning_rate": 7.156462542570157e-05, "loss": 1.1473, "step": 1297 }, { "epoch": 0.2046320979161292, "grad_norm": 0.93359375, "learning_rate": 7.15602117601727e-05, "loss": 1.0772, "step": 1298 }, { "epoch": 0.20478974976352224, "grad_norm": 0.89453125, "learning_rate": 7.15557981549203e-05, "loss": 1.0796, "step": 1299 }, { "epoch": 0.20494740161091524, "grad_norm": 0.98828125, "learning_rate": 7.15513846099538e-05, "loss": 1.2913, "step": 1300 }, { "epoch": 0.20510505345830826, "grad_norm": 0.95703125, "learning_rate": 7.154697112528248e-05, "loss": 1.536, "step": 1301 }, { "epoch": 0.20526270530570126, "grad_norm": 0.89453125, "learning_rate": 7.154255770091575e-05, "loss": 0.998, "step": 1302 }, { "epoch": 0.20542035715309429, "grad_norm": 0.875, "learning_rate": 7.153814433686294e-05, "loss": 1.1016, "step": 1303 }, { "epoch": 0.20557800900048728, "grad_norm": 0.98046875, "learning_rate": 7.15337310331334e-05, "loss": 1.2312, "step": 1304 }, { "epoch": 0.2057356608478803, "grad_norm": 1.0703125, "learning_rate": 7.152931778973649e-05, "loss": 1.0117, "step": 1305 }, { "epoch": 0.2058933126952733, "grad_norm": 0.96484375, "learning_rate": 7.152490460668156e-05, "loss": 0.9469, "step": 1306 }, { "epoch": 0.20605096454266633, "grad_norm": 0.91796875, "learning_rate": 7.152049148397797e-05, "loss": 1.236, "step": 1307 }, { "epoch": 0.20620861639005933, "grad_norm": 0.87890625, "learning_rate": 7.151607842163508e-05, "loss": 1.1139, "step": 1308 }, { "epoch": 0.20636626823745235, "grad_norm": 1.0859375, "learning_rate": 7.151166541966223e-05, "loss": 1.22, "step": 1309 }, { "epoch": 0.20652392008484535, "grad_norm": 0.83203125, "learning_rate": 7.150725247806877e-05, "loss": 1.1642, "step": 1310 }, { "epoch": 0.20668157193223838, "grad_norm": 1.046875, "learning_rate": 7.150283959686403e-05, "loss": 1.1524, "step": 1311 }, { "epoch": 0.20683922377963138, "grad_norm": 1.03125, "learning_rate": 7.149842677605743e-05, "loss": 1.2956, "step": 1312 }, { "epoch": 0.2069968756270244, "grad_norm": 0.96484375, "learning_rate": 7.14940140156583e-05, "loss": 0.9648, "step": 1313 }, { "epoch": 0.2071545274744174, "grad_norm": 0.93359375, "learning_rate": 7.148960131567597e-05, "loss": 1.0749, "step": 1314 }, { "epoch": 0.20731217932181042, "grad_norm": 1.0, "learning_rate": 7.14851886761198e-05, "loss": 1.2087, "step": 1315 }, { "epoch": 0.20746983116920342, "grad_norm": 1.0, "learning_rate": 7.148077609699909e-05, "loss": 1.1315, "step": 1316 }, { "epoch": 0.20762748301659645, "grad_norm": 0.87109375, "learning_rate": 7.14763635783233e-05, "loss": 0.9368, "step": 1317 }, { "epoch": 0.20778513486398945, "grad_norm": 1.015625, "learning_rate": 7.147195112010171e-05, "loss": 1.1761, "step": 1318 }, { "epoch": 0.20794278671138247, "grad_norm": 0.984375, "learning_rate": 7.146753872234371e-05, "loss": 1.1666, "step": 1319 }, { "epoch": 0.20810043855877547, "grad_norm": 1.0078125, "learning_rate": 7.14631263850586e-05, "loss": 1.1527, "step": 1320 }, { "epoch": 0.2082580904061685, "grad_norm": 0.84375, "learning_rate": 7.145871410825577e-05, "loss": 0.9459, "step": 1321 }, { "epoch": 0.2084157422535615, "grad_norm": 0.99609375, "learning_rate": 7.145430189194456e-05, "loss": 1.2383, "step": 1322 }, { "epoch": 0.20857339410095452, "grad_norm": 1.046875, "learning_rate": 7.144988973613435e-05, "loss": 1.3612, "step": 1323 }, { "epoch": 0.20873104594834752, "grad_norm": 1.015625, "learning_rate": 7.144547764083445e-05, "loss": 1.2297, "step": 1324 }, { "epoch": 0.20888869779574054, "grad_norm": 1.203125, "learning_rate": 7.144106560605425e-05, "loss": 1.3405, "step": 1325 }, { "epoch": 0.20904634964313354, "grad_norm": 0.83984375, "learning_rate": 7.143665363180305e-05, "loss": 1.0089, "step": 1326 }, { "epoch": 0.20920400149052656, "grad_norm": 0.90234375, "learning_rate": 7.143224171809025e-05, "loss": 0.9975, "step": 1327 }, { "epoch": 0.20936165333791956, "grad_norm": 1.0078125, "learning_rate": 7.142782986492517e-05, "loss": 1.0678, "step": 1328 }, { "epoch": 0.2095193051853126, "grad_norm": 1.40625, "learning_rate": 7.142341807231719e-05, "loss": 1.2242, "step": 1329 }, { "epoch": 0.20967695703270559, "grad_norm": 0.92578125, "learning_rate": 7.141900634027562e-05, "loss": 1.1118, "step": 1330 }, { "epoch": 0.2098346088800986, "grad_norm": 1.015625, "learning_rate": 7.14145946688098e-05, "loss": 1.1714, "step": 1331 }, { "epoch": 0.2099922607274916, "grad_norm": 0.953125, "learning_rate": 7.141018305792916e-05, "loss": 1.093, "step": 1332 }, { "epoch": 0.21014991257488463, "grad_norm": 0.91015625, "learning_rate": 7.140577150764301e-05, "loss": 1.0177, "step": 1333 }, { "epoch": 0.21030756442227763, "grad_norm": 0.89453125, "learning_rate": 7.140136001796068e-05, "loss": 1.0075, "step": 1334 }, { "epoch": 0.21046521626967066, "grad_norm": 0.9140625, "learning_rate": 7.139694858889152e-05, "loss": 1.0699, "step": 1335 }, { "epoch": 0.21062286811706366, "grad_norm": 0.93359375, "learning_rate": 7.139253722044487e-05, "loss": 1.0777, "step": 1336 }, { "epoch": 0.21078051996445668, "grad_norm": 1.0390625, "learning_rate": 7.138812591263015e-05, "loss": 1.2028, "step": 1337 }, { "epoch": 0.21093817181184968, "grad_norm": 1.015625, "learning_rate": 7.138371466545665e-05, "loss": 1.0713, "step": 1338 }, { "epoch": 0.2110958236592427, "grad_norm": 0.9375, "learning_rate": 7.137930347893374e-05, "loss": 1.0506, "step": 1339 }, { "epoch": 0.2112534755066357, "grad_norm": 0.984375, "learning_rate": 7.137489235307075e-05, "loss": 1.0686, "step": 1340 }, { "epoch": 0.21141112735402873, "grad_norm": 0.921875, "learning_rate": 7.1370481287877e-05, "loss": 1.1614, "step": 1341 }, { "epoch": 0.21156877920142173, "grad_norm": 0.953125, "learning_rate": 7.13660702833619e-05, "loss": 1.1643, "step": 1342 }, { "epoch": 0.21172643104881475, "grad_norm": 0.8984375, "learning_rate": 7.136165933953482e-05, "loss": 1.0648, "step": 1343 }, { "epoch": 0.21188408289620775, "grad_norm": 0.81640625, "learning_rate": 7.135724845640503e-05, "loss": 0.9797, "step": 1344 }, { "epoch": 0.21204173474360077, "grad_norm": 0.97265625, "learning_rate": 7.135283763398194e-05, "loss": 1.2917, "step": 1345 }, { "epoch": 0.21219938659099377, "grad_norm": 0.9609375, "learning_rate": 7.134842687227484e-05, "loss": 1.0777, "step": 1346 }, { "epoch": 0.2123570384383868, "grad_norm": 0.9140625, "learning_rate": 7.134401617129313e-05, "loss": 1.1141, "step": 1347 }, { "epoch": 0.2125146902857798, "grad_norm": 1.0390625, "learning_rate": 7.133960553104615e-05, "loss": 1.2634, "step": 1348 }, { "epoch": 0.21267234213317282, "grad_norm": 0.92578125, "learning_rate": 7.133519495154324e-05, "loss": 1.1475, "step": 1349 }, { "epoch": 0.21282999398056582, "grad_norm": 0.81640625, "learning_rate": 7.133078443279373e-05, "loss": 1.137, "step": 1350 }, { "epoch": 0.21298764582795884, "grad_norm": 1.0546875, "learning_rate": 7.132637397480695e-05, "loss": 1.1164, "step": 1351 }, { "epoch": 0.21314529767535184, "grad_norm": 1.1015625, "learning_rate": 7.132196357759233e-05, "loss": 1.1759, "step": 1352 }, { "epoch": 0.21330294952274487, "grad_norm": 0.9609375, "learning_rate": 7.131755324115916e-05, "loss": 1.0756, "step": 1353 }, { "epoch": 0.21346060137013786, "grad_norm": 0.9921875, "learning_rate": 7.13131429655168e-05, "loss": 1.1905, "step": 1354 }, { "epoch": 0.2136182532175309, "grad_norm": 0.97265625, "learning_rate": 7.130873275067459e-05, "loss": 1.2061, "step": 1355 }, { "epoch": 0.2137759050649239, "grad_norm": 0.8515625, "learning_rate": 7.130432259664184e-05, "loss": 0.9919, "step": 1356 }, { "epoch": 0.2139335569123169, "grad_norm": 0.953125, "learning_rate": 7.129991250342799e-05, "loss": 1.1732, "step": 1357 }, { "epoch": 0.2140912087597099, "grad_norm": 0.953125, "learning_rate": 7.129550247104233e-05, "loss": 1.0984, "step": 1358 }, { "epoch": 0.21424886060710294, "grad_norm": 1.75, "learning_rate": 7.12910924994942e-05, "loss": 0.9716, "step": 1359 }, { "epoch": 0.21440651245449593, "grad_norm": 0.93359375, "learning_rate": 7.128668258879296e-05, "loss": 1.0291, "step": 1360 }, { "epoch": 0.21456416430188896, "grad_norm": 1.0390625, "learning_rate": 7.128227273894798e-05, "loss": 1.2808, "step": 1361 }, { "epoch": 0.21472181614928196, "grad_norm": 0.98828125, "learning_rate": 7.127786294996852e-05, "loss": 1.4473, "step": 1362 }, { "epoch": 0.21487946799667498, "grad_norm": 0.984375, "learning_rate": 7.127345322186403e-05, "loss": 1.139, "step": 1363 }, { "epoch": 0.21503711984406798, "grad_norm": 0.85546875, "learning_rate": 7.126904355464382e-05, "loss": 1.0423, "step": 1364 }, { "epoch": 0.215194771691461, "grad_norm": 0.88671875, "learning_rate": 7.126463394831722e-05, "loss": 1.0168, "step": 1365 }, { "epoch": 0.215352423538854, "grad_norm": 0.9140625, "learning_rate": 7.12602244028936e-05, "loss": 1.052, "step": 1366 }, { "epoch": 0.21551007538624703, "grad_norm": 1.0078125, "learning_rate": 7.125581491838224e-05, "loss": 1.2174, "step": 1367 }, { "epoch": 0.21566772723364003, "grad_norm": 0.859375, "learning_rate": 7.125140549479258e-05, "loss": 0.9669, "step": 1368 }, { "epoch": 0.21582537908103305, "grad_norm": 1.171875, "learning_rate": 7.124699613213393e-05, "loss": 1.1151, "step": 1369 }, { "epoch": 0.21598303092842605, "grad_norm": 0.9609375, "learning_rate": 7.124258683041562e-05, "loss": 1.1197, "step": 1370 }, { "epoch": 0.21614068277581908, "grad_norm": 0.9453125, "learning_rate": 7.123817758964699e-05, "loss": 1.0519, "step": 1371 }, { "epoch": 0.21629833462321207, "grad_norm": 0.93359375, "learning_rate": 7.12337684098374e-05, "loss": 1.1533, "step": 1372 }, { "epoch": 0.2164559864706051, "grad_norm": 1.0078125, "learning_rate": 7.122935929099621e-05, "loss": 1.1204, "step": 1373 }, { "epoch": 0.2166136383179981, "grad_norm": 0.85546875, "learning_rate": 7.122495023313273e-05, "loss": 0.8712, "step": 1374 }, { "epoch": 0.21677129016539112, "grad_norm": 0.9140625, "learning_rate": 7.122054123625635e-05, "loss": 1.0061, "step": 1375 }, { "epoch": 0.21692894201278412, "grad_norm": 1.1796875, "learning_rate": 7.121613230037639e-05, "loss": 1.438, "step": 1376 }, { "epoch": 0.21708659386017715, "grad_norm": 0.9453125, "learning_rate": 7.121172342550213e-05, "loss": 1.2149, "step": 1377 }, { "epoch": 0.21724424570757014, "grad_norm": 0.97265625, "learning_rate": 7.120731461164302e-05, "loss": 1.1075, "step": 1378 }, { "epoch": 0.21740189755496317, "grad_norm": 0.9296875, "learning_rate": 7.120290585880837e-05, "loss": 1.4147, "step": 1379 }, { "epoch": 0.21755954940235617, "grad_norm": 0.98046875, "learning_rate": 7.119849716700752e-05, "loss": 1.0446, "step": 1380 }, { "epoch": 0.2177172012497492, "grad_norm": 0.890625, "learning_rate": 7.119408853624981e-05, "loss": 0.8742, "step": 1381 }, { "epoch": 0.2178748530971422, "grad_norm": 1.109375, "learning_rate": 7.118967996654452e-05, "loss": 1.2054, "step": 1382 }, { "epoch": 0.21803250494453522, "grad_norm": 0.78125, "learning_rate": 7.118527145790111e-05, "loss": 0.9694, "step": 1383 }, { "epoch": 0.2181901567919282, "grad_norm": 0.94140625, "learning_rate": 7.118086301032887e-05, "loss": 1.0086, "step": 1384 }, { "epoch": 0.21834780863932124, "grad_norm": 0.9375, "learning_rate": 7.117645462383715e-05, "loss": 1.3487, "step": 1385 }, { "epoch": 0.21850546048671424, "grad_norm": 0.97265625, "learning_rate": 7.117204629843528e-05, "loss": 0.8584, "step": 1386 }, { "epoch": 0.21866311233410726, "grad_norm": 1.0625, "learning_rate": 7.116763803413259e-05, "loss": 1.0109, "step": 1387 }, { "epoch": 0.21882076418150026, "grad_norm": 1.078125, "learning_rate": 7.116322983093846e-05, "loss": 1.1321, "step": 1388 }, { "epoch": 0.21897841602889329, "grad_norm": 0.9453125, "learning_rate": 7.115882168886221e-05, "loss": 1.1851, "step": 1389 }, { "epoch": 0.21913606787628628, "grad_norm": 0.921875, "learning_rate": 7.11544136079132e-05, "loss": 1.0456, "step": 1390 }, { "epoch": 0.2192937197236793, "grad_norm": 1.015625, "learning_rate": 7.115000558810076e-05, "loss": 1.1628, "step": 1391 }, { "epoch": 0.2194513715710723, "grad_norm": 1.1640625, "learning_rate": 7.114559762943422e-05, "loss": 1.1918, "step": 1392 }, { "epoch": 0.21960902341846533, "grad_norm": 1.0, "learning_rate": 7.114118973192294e-05, "loss": 1.1242, "step": 1393 }, { "epoch": 0.21976667526585833, "grad_norm": 0.91796875, "learning_rate": 7.113678189557627e-05, "loss": 1.1061, "step": 1394 }, { "epoch": 0.21992432711325136, "grad_norm": 1.03125, "learning_rate": 7.113237412040354e-05, "loss": 1.1322, "step": 1395 }, { "epoch": 0.22008197896064435, "grad_norm": 0.98828125, "learning_rate": 7.112796640641411e-05, "loss": 1.0374, "step": 1396 }, { "epoch": 0.22023963080803738, "grad_norm": 1.0546875, "learning_rate": 7.112355875361724e-05, "loss": 1.098, "step": 1397 }, { "epoch": 0.22039728265543038, "grad_norm": 0.9921875, "learning_rate": 7.111915116202239e-05, "loss": 1.013, "step": 1398 }, { "epoch": 0.2205549345028234, "grad_norm": 0.96875, "learning_rate": 7.111474363163885e-05, "loss": 1.2095, "step": 1399 }, { "epoch": 0.2207125863502164, "grad_norm": 1.0390625, "learning_rate": 7.111033616247595e-05, "loss": 1.2058, "step": 1400 }, { "epoch": 0.22087023819760943, "grad_norm": 0.9609375, "learning_rate": 7.110592875454304e-05, "loss": 1.361, "step": 1401 }, { "epoch": 0.22102789004500242, "grad_norm": 0.8828125, "learning_rate": 7.110152140784944e-05, "loss": 1.3022, "step": 1402 }, { "epoch": 0.22118554189239545, "grad_norm": 0.87109375, "learning_rate": 7.109711412240453e-05, "loss": 0.9766, "step": 1403 }, { "epoch": 0.22134319373978845, "grad_norm": 1.03125, "learning_rate": 7.109270689821766e-05, "loss": 1.1768, "step": 1404 }, { "epoch": 0.22150084558718147, "grad_norm": 1.140625, "learning_rate": 7.108829973529814e-05, "loss": 1.2395, "step": 1405 }, { "epoch": 0.22165849743457447, "grad_norm": 0.94140625, "learning_rate": 7.10838926336553e-05, "loss": 0.9981, "step": 1406 }, { "epoch": 0.2218161492819675, "grad_norm": 0.8671875, "learning_rate": 7.107948559329848e-05, "loss": 1.196, "step": 1407 }, { "epoch": 0.22197380112936052, "grad_norm": 1.109375, "learning_rate": 7.107507861423706e-05, "loss": 1.3497, "step": 1408 }, { "epoch": 0.22213145297675352, "grad_norm": 1.1015625, "learning_rate": 7.107067169648035e-05, "loss": 1.2475, "step": 1409 }, { "epoch": 0.22228910482414654, "grad_norm": 1.0390625, "learning_rate": 7.10662648400377e-05, "loss": 1.0338, "step": 1410 }, { "epoch": 0.22244675667153954, "grad_norm": 1.09375, "learning_rate": 7.106185804491847e-05, "loss": 1.2372, "step": 1411 }, { "epoch": 0.22260440851893257, "grad_norm": 1.046875, "learning_rate": 7.105745131113193e-05, "loss": 1.4149, "step": 1412 }, { "epoch": 0.22276206036632556, "grad_norm": 0.86328125, "learning_rate": 7.105304463868752e-05, "loss": 1.0651, "step": 1413 }, { "epoch": 0.2229197122137186, "grad_norm": 0.90234375, "learning_rate": 7.104863802759449e-05, "loss": 0.9611, "step": 1414 }, { "epoch": 0.2230773640611116, "grad_norm": 1.0234375, "learning_rate": 7.104423147786226e-05, "loss": 1.0465, "step": 1415 }, { "epoch": 0.2232350159085046, "grad_norm": 1.0078125, "learning_rate": 7.103982498950009e-05, "loss": 1.0557, "step": 1416 }, { "epoch": 0.2233926677558976, "grad_norm": 0.92578125, "learning_rate": 7.103541856251731e-05, "loss": 1.1902, "step": 1417 }, { "epoch": 0.22355031960329064, "grad_norm": 0.9375, "learning_rate": 7.103101219692338e-05, "loss": 0.9738, "step": 1418 }, { "epoch": 0.22370797145068363, "grad_norm": 0.8984375, "learning_rate": 7.102660589272754e-05, "loss": 1.0793, "step": 1419 }, { "epoch": 0.22386562329807666, "grad_norm": 0.96484375, "learning_rate": 7.102219964993917e-05, "loss": 1.1476, "step": 1420 }, { "epoch": 0.22402327514546966, "grad_norm": 0.86328125, "learning_rate": 7.101779346856758e-05, "loss": 0.9305, "step": 1421 }, { "epoch": 0.22418092699286268, "grad_norm": 1.0390625, "learning_rate": 7.101338734862208e-05, "loss": 1.1201, "step": 1422 }, { "epoch": 0.22433857884025568, "grad_norm": 0.90625, "learning_rate": 7.100898129011208e-05, "loss": 1.3119, "step": 1423 }, { "epoch": 0.2244962306876487, "grad_norm": 1.015625, "learning_rate": 7.100457529304691e-05, "loss": 1.1416, "step": 1424 }, { "epoch": 0.2246538825350417, "grad_norm": 0.9296875, "learning_rate": 7.100016935743588e-05, "loss": 1.1263, "step": 1425 }, { "epoch": 0.22481153438243473, "grad_norm": 1.0078125, "learning_rate": 7.099576348328832e-05, "loss": 1.0232, "step": 1426 }, { "epoch": 0.22496918622982773, "grad_norm": 1.015625, "learning_rate": 7.099135767061354e-05, "loss": 1.3169, "step": 1427 }, { "epoch": 0.22512683807722075, "grad_norm": 1.0078125, "learning_rate": 7.098695191942097e-05, "loss": 1.2153, "step": 1428 }, { "epoch": 0.22528448992461375, "grad_norm": 1.046875, "learning_rate": 7.09825462297199e-05, "loss": 1.1726, "step": 1429 }, { "epoch": 0.22544214177200678, "grad_norm": 0.97265625, "learning_rate": 7.097814060151965e-05, "loss": 1.1591, "step": 1430 }, { "epoch": 0.22559979361939977, "grad_norm": 0.9140625, "learning_rate": 7.097373503482958e-05, "loss": 1.1141, "step": 1431 }, { "epoch": 0.2257574454667928, "grad_norm": 1.015625, "learning_rate": 7.0969329529659e-05, "loss": 1.1505, "step": 1432 }, { "epoch": 0.2259150973141858, "grad_norm": 0.98828125, "learning_rate": 7.09649240860173e-05, "loss": 1.296, "step": 1433 }, { "epoch": 0.22607274916157882, "grad_norm": 0.96484375, "learning_rate": 7.096051870391376e-05, "loss": 1.0001, "step": 1434 }, { "epoch": 0.22623040100897182, "grad_norm": 0.95703125, "learning_rate": 7.095611338335776e-05, "loss": 1.1247, "step": 1435 }, { "epoch": 0.22638805285636485, "grad_norm": 0.90234375, "learning_rate": 7.095170812435861e-05, "loss": 0.9733, "step": 1436 }, { "epoch": 0.22654570470375784, "grad_norm": 0.95703125, "learning_rate": 7.09473029269256e-05, "loss": 1.0558, "step": 1437 }, { "epoch": 0.22670335655115087, "grad_norm": 0.890625, "learning_rate": 7.094289779106819e-05, "loss": 1.037, "step": 1438 }, { "epoch": 0.22686100839854387, "grad_norm": 1.0390625, "learning_rate": 7.093849271679562e-05, "loss": 1.0728, "step": 1439 }, { "epoch": 0.2270186602459369, "grad_norm": 0.94140625, "learning_rate": 7.093408770411727e-05, "loss": 1.103, "step": 1440 }, { "epoch": 0.2271763120933299, "grad_norm": 0.94140625, "learning_rate": 7.092968275304245e-05, "loss": 1.1255, "step": 1441 }, { "epoch": 0.22733396394072292, "grad_norm": 1.0546875, "learning_rate": 7.092527786358047e-05, "loss": 1.0659, "step": 1442 }, { "epoch": 0.22749161578811591, "grad_norm": 0.96875, "learning_rate": 7.092087303574075e-05, "loss": 1.1646, "step": 1443 }, { "epoch": 0.22764926763550894, "grad_norm": 0.9609375, "learning_rate": 7.091646826953258e-05, "loss": 1.2669, "step": 1444 }, { "epoch": 0.22780691948290194, "grad_norm": 0.93359375, "learning_rate": 7.091206356496526e-05, "loss": 1.1622, "step": 1445 }, { "epoch": 0.22796457133029496, "grad_norm": 0.90625, "learning_rate": 7.090765892204821e-05, "loss": 0.9643, "step": 1446 }, { "epoch": 0.22812222317768796, "grad_norm": 0.96875, "learning_rate": 7.090325434079064e-05, "loss": 1.1371, "step": 1447 }, { "epoch": 0.22827987502508099, "grad_norm": 0.8671875, "learning_rate": 7.0898849821202e-05, "loss": 1.0543, "step": 1448 }, { "epoch": 0.22843752687247398, "grad_norm": 1.109375, "learning_rate": 7.089444536329159e-05, "loss": 1.2672, "step": 1449 }, { "epoch": 0.228595178719867, "grad_norm": 0.8359375, "learning_rate": 7.089004096706873e-05, "loss": 1.0028, "step": 1450 }, { "epoch": 0.22875283056726, "grad_norm": 1.0078125, "learning_rate": 7.088563663254278e-05, "loss": 1.1057, "step": 1451 }, { "epoch": 0.22891048241465303, "grad_norm": 1.015625, "learning_rate": 7.088123235972304e-05, "loss": 1.1907, "step": 1452 }, { "epoch": 0.22906813426204603, "grad_norm": 0.91796875, "learning_rate": 7.087682814861887e-05, "loss": 1.253, "step": 1453 }, { "epoch": 0.22922578610943906, "grad_norm": 0.96875, "learning_rate": 7.087242399923962e-05, "loss": 1.0156, "step": 1454 }, { "epoch": 0.22938343795683205, "grad_norm": 0.90625, "learning_rate": 7.086801991159458e-05, "loss": 0.9941, "step": 1455 }, { "epoch": 0.22954108980422508, "grad_norm": 0.96484375, "learning_rate": 7.086361588569309e-05, "loss": 1.1429, "step": 1456 }, { "epoch": 0.22969874165161808, "grad_norm": 1.3359375, "learning_rate": 7.08592119215445e-05, "loss": 1.159, "step": 1457 }, { "epoch": 0.2298563934990111, "grad_norm": 1.1796875, "learning_rate": 7.085480801915818e-05, "loss": 1.1011, "step": 1458 }, { "epoch": 0.2300140453464041, "grad_norm": 1.0546875, "learning_rate": 7.08504041785434e-05, "loss": 1.2951, "step": 1459 }, { "epoch": 0.23017169719379713, "grad_norm": 0.8828125, "learning_rate": 7.084600039970955e-05, "loss": 1.028, "step": 1460 }, { "epoch": 0.23032934904119012, "grad_norm": 0.92578125, "learning_rate": 7.084159668266592e-05, "loss": 0.9699, "step": 1461 }, { "epoch": 0.23048700088858315, "grad_norm": 0.9921875, "learning_rate": 7.083719302742183e-05, "loss": 1.0949, "step": 1462 }, { "epoch": 0.23064465273597615, "grad_norm": 1.5859375, "learning_rate": 7.083278943398668e-05, "loss": 1.2474, "step": 1463 }, { "epoch": 0.23080230458336917, "grad_norm": 0.9375, "learning_rate": 7.082838590236976e-05, "loss": 0.9996, "step": 1464 }, { "epoch": 0.23095995643076217, "grad_norm": 0.96484375, "learning_rate": 7.082398243258042e-05, "loss": 1.127, "step": 1465 }, { "epoch": 0.2311176082781552, "grad_norm": 1.0546875, "learning_rate": 7.081957902462797e-05, "loss": 1.2925, "step": 1466 }, { "epoch": 0.2312752601255482, "grad_norm": 0.98046875, "learning_rate": 7.081517567852172e-05, "loss": 1.0787, "step": 1467 }, { "epoch": 0.23143291197294122, "grad_norm": 1.0, "learning_rate": 7.081077239427107e-05, "loss": 0.9864, "step": 1468 }, { "epoch": 0.23159056382033422, "grad_norm": 1.03125, "learning_rate": 7.080636917188532e-05, "loss": 1.1679, "step": 1469 }, { "epoch": 0.23174821566772724, "grad_norm": 0.953125, "learning_rate": 7.080196601137381e-05, "loss": 0.9883, "step": 1470 }, { "epoch": 0.23190586751512024, "grad_norm": 0.93359375, "learning_rate": 7.079756291274587e-05, "loss": 1.1307, "step": 1471 }, { "epoch": 0.23206351936251327, "grad_norm": 0.98046875, "learning_rate": 7.07931598760108e-05, "loss": 1.2378, "step": 1472 }, { "epoch": 0.23222117120990626, "grad_norm": 1.0078125, "learning_rate": 7.078875690117797e-05, "loss": 1.2546, "step": 1473 }, { "epoch": 0.2323788230572993, "grad_norm": 0.9921875, "learning_rate": 7.078435398825671e-05, "loss": 1.1365, "step": 1474 }, { "epoch": 0.2325364749046923, "grad_norm": 0.9765625, "learning_rate": 7.077995113725631e-05, "loss": 1.1301, "step": 1475 }, { "epoch": 0.2326941267520853, "grad_norm": 0.9375, "learning_rate": 7.077554834818617e-05, "loss": 0.978, "step": 1476 }, { "epoch": 0.2328517785994783, "grad_norm": 1.0078125, "learning_rate": 7.077114562105556e-05, "loss": 1.2358, "step": 1477 }, { "epoch": 0.23300943044687134, "grad_norm": 0.8125, "learning_rate": 7.076674295587384e-05, "loss": 0.9415, "step": 1478 }, { "epoch": 0.23316708229426433, "grad_norm": 0.9296875, "learning_rate": 7.076234035265034e-05, "loss": 1.0254, "step": 1479 }, { "epoch": 0.23332473414165736, "grad_norm": 0.98828125, "learning_rate": 7.075793781139442e-05, "loss": 1.31, "step": 1480 }, { "epoch": 0.23348238598905036, "grad_norm": 1.0078125, "learning_rate": 7.075353533211535e-05, "loss": 1.305, "step": 1481 }, { "epoch": 0.23364003783644338, "grad_norm": 0.95703125, "learning_rate": 7.074913291482246e-05, "loss": 0.9372, "step": 1482 }, { "epoch": 0.23379768968383638, "grad_norm": 0.96875, "learning_rate": 7.074473055952515e-05, "loss": 1.1031, "step": 1483 }, { "epoch": 0.2339553415312294, "grad_norm": 1.03125, "learning_rate": 7.074032826623271e-05, "loss": 1.4406, "step": 1484 }, { "epoch": 0.2341129933786224, "grad_norm": 0.97265625, "learning_rate": 7.073592603495447e-05, "loss": 1.1805, "step": 1485 }, { "epoch": 0.23427064522601543, "grad_norm": 0.921875, "learning_rate": 7.073152386569976e-05, "loss": 0.9847, "step": 1486 }, { "epoch": 0.23442829707340843, "grad_norm": 0.984375, "learning_rate": 7.072712175847787e-05, "loss": 0.9604, "step": 1487 }, { "epoch": 0.23458594892080145, "grad_norm": 1.1640625, "learning_rate": 7.072271971329823e-05, "loss": 1.0034, "step": 1488 }, { "epoch": 0.23474360076819445, "grad_norm": 0.78125, "learning_rate": 7.071831773017009e-05, "loss": 0.9594, "step": 1489 }, { "epoch": 0.23490125261558747, "grad_norm": 0.921875, "learning_rate": 7.071391580910281e-05, "loss": 1.1016, "step": 1490 }, { "epoch": 0.23505890446298047, "grad_norm": 0.96484375, "learning_rate": 7.070951395010572e-05, "loss": 1.108, "step": 1491 }, { "epoch": 0.2352165563103735, "grad_norm": 0.99609375, "learning_rate": 7.070511215318811e-05, "loss": 1.004, "step": 1492 }, { "epoch": 0.2353742081577665, "grad_norm": 0.89453125, "learning_rate": 7.070071041835935e-05, "loss": 1.1389, "step": 1493 }, { "epoch": 0.23553186000515952, "grad_norm": 0.93359375, "learning_rate": 7.069630874562877e-05, "loss": 1.0214, "step": 1494 }, { "epoch": 0.23568951185255252, "grad_norm": 0.9140625, "learning_rate": 7.06919071350057e-05, "loss": 1.138, "step": 1495 }, { "epoch": 0.23584716369994554, "grad_norm": 0.98828125, "learning_rate": 7.068750558649945e-05, "loss": 1.3268, "step": 1496 }, { "epoch": 0.23600481554733854, "grad_norm": 0.86328125, "learning_rate": 7.068310410011934e-05, "loss": 0.9273, "step": 1497 }, { "epoch": 0.23616246739473157, "grad_norm": 1.7421875, "learning_rate": 7.067870267587472e-05, "loss": 1.3118, "step": 1498 }, { "epoch": 0.23632011924212457, "grad_norm": 0.91015625, "learning_rate": 7.067430131377494e-05, "loss": 1.1032, "step": 1499 }, { "epoch": 0.2364777710895176, "grad_norm": 0.953125, "learning_rate": 7.066990001382928e-05, "loss": 1.144, "step": 1500 }, { "epoch": 0.2366354229369106, "grad_norm": 1.0, "learning_rate": 7.066549877604712e-05, "loss": 1.2402, "step": 1501 }, { "epoch": 0.23679307478430361, "grad_norm": 1.265625, "learning_rate": 7.066109760043768e-05, "loss": 0.9757, "step": 1502 }, { "epoch": 0.2369507266316966, "grad_norm": 0.80078125, "learning_rate": 7.065669648701044e-05, "loss": 0.9279, "step": 1503 }, { "epoch": 0.23710837847908964, "grad_norm": 1.0546875, "learning_rate": 7.065229543577463e-05, "loss": 1.1724, "step": 1504 }, { "epoch": 0.23726603032648264, "grad_norm": 1.0234375, "learning_rate": 7.06478944467396e-05, "loss": 1.3237, "step": 1505 }, { "epoch": 0.23742368217387566, "grad_norm": 1.046875, "learning_rate": 7.064349351991471e-05, "loss": 1.1968, "step": 1506 }, { "epoch": 0.23758133402126866, "grad_norm": 0.8984375, "learning_rate": 7.063909265530923e-05, "loss": 1.1441, "step": 1507 }, { "epoch": 0.23773898586866168, "grad_norm": 1.0703125, "learning_rate": 7.063469185293248e-05, "loss": 1.1328, "step": 1508 }, { "epoch": 0.23789663771605468, "grad_norm": 0.94921875, "learning_rate": 7.063029111279387e-05, "loss": 1.0879, "step": 1509 }, { "epoch": 0.2380542895634477, "grad_norm": 0.7890625, "learning_rate": 7.062589043490267e-05, "loss": 0.8795, "step": 1510 }, { "epoch": 0.2382119414108407, "grad_norm": 0.98828125, "learning_rate": 7.062148981926822e-05, "loss": 1.3077, "step": 1511 }, { "epoch": 0.23836959325823373, "grad_norm": 1.7421875, "learning_rate": 7.061708926589985e-05, "loss": 1.1431, "step": 1512 }, { "epoch": 0.23852724510562673, "grad_norm": 1.0859375, "learning_rate": 7.061268877480684e-05, "loss": 1.0349, "step": 1513 }, { "epoch": 0.23868489695301975, "grad_norm": 0.9453125, "learning_rate": 7.060828834599858e-05, "loss": 1.0356, "step": 1514 }, { "epoch": 0.23884254880041275, "grad_norm": 1.0234375, "learning_rate": 7.060388797948438e-05, "loss": 1.0894, "step": 1515 }, { "epoch": 0.23900020064780578, "grad_norm": 0.89453125, "learning_rate": 7.059948767527358e-05, "loss": 0.9824, "step": 1516 }, { "epoch": 0.23915785249519877, "grad_norm": 1.3515625, "learning_rate": 7.059508743337547e-05, "loss": 0.9462, "step": 1517 }, { "epoch": 0.2393155043425918, "grad_norm": 1.0390625, "learning_rate": 7.059068725379936e-05, "loss": 1.3671, "step": 1518 }, { "epoch": 0.2394731561899848, "grad_norm": 0.98828125, "learning_rate": 7.058628713655464e-05, "loss": 1.109, "step": 1519 }, { "epoch": 0.23963080803737782, "grad_norm": 0.9375, "learning_rate": 7.058188708165062e-05, "loss": 1.0372, "step": 1520 }, { "epoch": 0.23978845988477082, "grad_norm": 0.98046875, "learning_rate": 7.057748708909657e-05, "loss": 1.2781, "step": 1521 }, { "epoch": 0.23994611173216385, "grad_norm": 1.1484375, "learning_rate": 7.057308715890187e-05, "loss": 1.0074, "step": 1522 }, { "epoch": 0.24010376357955684, "grad_norm": 0.90625, "learning_rate": 7.05686872910758e-05, "loss": 0.9642, "step": 1523 }, { "epoch": 0.24026141542694987, "grad_norm": 1.046875, "learning_rate": 7.056428748562776e-05, "loss": 1.1751, "step": 1524 }, { "epoch": 0.24041906727434287, "grad_norm": 0.97265625, "learning_rate": 7.055988774256701e-05, "loss": 1.114, "step": 1525 }, { "epoch": 0.2405767191217359, "grad_norm": 0.9296875, "learning_rate": 7.055548806190291e-05, "loss": 1.2158, "step": 1526 }, { "epoch": 0.2407343709691289, "grad_norm": 0.98828125, "learning_rate": 7.055108844364476e-05, "loss": 1.1238, "step": 1527 }, { "epoch": 0.24089202281652192, "grad_norm": 0.890625, "learning_rate": 7.054668888780186e-05, "loss": 1.1261, "step": 1528 }, { "epoch": 0.24104967466391491, "grad_norm": 0.95703125, "learning_rate": 7.054228939438361e-05, "loss": 1.0729, "step": 1529 }, { "epoch": 0.24120732651130794, "grad_norm": 0.9921875, "learning_rate": 7.05378899633993e-05, "loss": 1.1375, "step": 1530 }, { "epoch": 0.24136497835870094, "grad_norm": 1.0234375, "learning_rate": 7.053349059485823e-05, "loss": 1.2235, "step": 1531 }, { "epoch": 0.24152263020609396, "grad_norm": 0.92578125, "learning_rate": 7.052909128876976e-05, "loss": 1.2142, "step": 1532 }, { "epoch": 0.24168028205348696, "grad_norm": 0.8671875, "learning_rate": 7.052469204514315e-05, "loss": 1.1479, "step": 1533 }, { "epoch": 0.24183793390088, "grad_norm": 0.98046875, "learning_rate": 7.05202928639878e-05, "loss": 1.0628, "step": 1534 }, { "epoch": 0.24199558574827298, "grad_norm": 1.0703125, "learning_rate": 7.051589374531303e-05, "loss": 1.2245, "step": 1535 }, { "epoch": 0.242153237595666, "grad_norm": 1.03125, "learning_rate": 7.051149468912812e-05, "loss": 1.1376, "step": 1536 }, { "epoch": 0.242310889443059, "grad_norm": 0.8828125, "learning_rate": 7.050709569544241e-05, "loss": 0.8926, "step": 1537 }, { "epoch": 0.24246854129045203, "grad_norm": 1.0703125, "learning_rate": 7.05026967642652e-05, "loss": 1.2994, "step": 1538 }, { "epoch": 0.24262619313784503, "grad_norm": 1.046875, "learning_rate": 7.049829789560586e-05, "loss": 1.3028, "step": 1539 }, { "epoch": 0.24278384498523806, "grad_norm": 1.0546875, "learning_rate": 7.049389908947372e-05, "loss": 1.1088, "step": 1540 }, { "epoch": 0.24294149683263105, "grad_norm": 0.91015625, "learning_rate": 7.048950034587805e-05, "loss": 1.1674, "step": 1541 }, { "epoch": 0.24309914868002408, "grad_norm": 0.81640625, "learning_rate": 7.048510166482818e-05, "loss": 0.9752, "step": 1542 }, { "epoch": 0.24325680052741708, "grad_norm": 1.0703125, "learning_rate": 7.048070304633345e-05, "loss": 1.2117, "step": 1543 }, { "epoch": 0.2434144523748101, "grad_norm": 0.8203125, "learning_rate": 7.047630449040321e-05, "loss": 0.9562, "step": 1544 }, { "epoch": 0.2435721042222031, "grad_norm": 0.91015625, "learning_rate": 7.047190599704674e-05, "loss": 1.0712, "step": 1545 }, { "epoch": 0.24372975606959613, "grad_norm": 0.99609375, "learning_rate": 7.046750756627338e-05, "loss": 1.1116, "step": 1546 }, { "epoch": 0.24388740791698912, "grad_norm": 0.96875, "learning_rate": 7.046310919809247e-05, "loss": 1.0757, "step": 1547 }, { "epoch": 0.24404505976438215, "grad_norm": 1.0078125, "learning_rate": 7.045871089251325e-05, "loss": 1.0926, "step": 1548 }, { "epoch": 0.24420271161177515, "grad_norm": 1.046875, "learning_rate": 7.045431264954516e-05, "loss": 1.0518, "step": 1549 }, { "epoch": 0.24436036345916817, "grad_norm": 0.91796875, "learning_rate": 7.044991446919744e-05, "loss": 1.1166, "step": 1550 }, { "epoch": 0.24451801530656117, "grad_norm": 0.98828125, "learning_rate": 7.044551635147947e-05, "loss": 1.2052, "step": 1551 }, { "epoch": 0.2446756671539542, "grad_norm": 1.0703125, "learning_rate": 7.044111829640052e-05, "loss": 1.1918, "step": 1552 }, { "epoch": 0.2448333190013472, "grad_norm": 0.8359375, "learning_rate": 7.043672030396989e-05, "loss": 0.8933, "step": 1553 }, { "epoch": 0.24499097084874022, "grad_norm": 0.9296875, "learning_rate": 7.043232237419699e-05, "loss": 1.0459, "step": 1554 }, { "epoch": 0.24514862269613322, "grad_norm": 1.015625, "learning_rate": 7.042792450709108e-05, "loss": 1.2032, "step": 1555 }, { "epoch": 0.24530627454352624, "grad_norm": 0.9296875, "learning_rate": 7.04235267026615e-05, "loss": 1.0806, "step": 1556 }, { "epoch": 0.24546392639091924, "grad_norm": 0.95703125, "learning_rate": 7.041912896091757e-05, "loss": 1.0679, "step": 1557 }, { "epoch": 0.24562157823831227, "grad_norm": 1.0390625, "learning_rate": 7.041473128186858e-05, "loss": 1.0776, "step": 1558 }, { "epoch": 0.24577923008570526, "grad_norm": 1.1171875, "learning_rate": 7.041033366552389e-05, "loss": 1.0946, "step": 1559 }, { "epoch": 0.2459368819330983, "grad_norm": 0.88671875, "learning_rate": 7.04059361118928e-05, "loss": 0.9819, "step": 1560 }, { "epoch": 0.2460945337804913, "grad_norm": 0.8359375, "learning_rate": 7.040153862098465e-05, "loss": 1.0056, "step": 1561 }, { "epoch": 0.2462521856278843, "grad_norm": 0.94140625, "learning_rate": 7.039714119280876e-05, "loss": 1.0712, "step": 1562 }, { "epoch": 0.2464098374752773, "grad_norm": 1.0390625, "learning_rate": 7.03927438273744e-05, "loss": 1.0964, "step": 1563 }, { "epoch": 0.24656748932267034, "grad_norm": 1.03125, "learning_rate": 7.038834652469094e-05, "loss": 1.0572, "step": 1564 }, { "epoch": 0.24672514117006336, "grad_norm": 0.9453125, "learning_rate": 7.03839492847677e-05, "loss": 1.1886, "step": 1565 }, { "epoch": 0.24688279301745636, "grad_norm": 0.9296875, "learning_rate": 7.0379552107614e-05, "loss": 1.0563, "step": 1566 }, { "epoch": 0.24704044486484938, "grad_norm": 1.015625, "learning_rate": 7.037515499323913e-05, "loss": 1.186, "step": 1567 }, { "epoch": 0.24719809671224238, "grad_norm": 0.90625, "learning_rate": 7.037075794165237e-05, "loss": 1.0678, "step": 1568 }, { "epoch": 0.2473557485596354, "grad_norm": 1.0, "learning_rate": 7.036636095286316e-05, "loss": 1.117, "step": 1569 }, { "epoch": 0.2475134004070284, "grad_norm": 0.90625, "learning_rate": 7.036196402688076e-05, "loss": 1.1243, "step": 1570 }, { "epoch": 0.24767105225442143, "grad_norm": 1.09375, "learning_rate": 7.035756716371446e-05, "loss": 1.042, "step": 1571 }, { "epoch": 0.24782870410181443, "grad_norm": 1.375, "learning_rate": 7.03531703633736e-05, "loss": 1.0608, "step": 1572 }, { "epoch": 0.24798635594920745, "grad_norm": 0.8984375, "learning_rate": 7.03487736258675e-05, "loss": 1.1136, "step": 1573 }, { "epoch": 0.24814400779660045, "grad_norm": 0.9609375, "learning_rate": 7.034437695120548e-05, "loss": 1.0519, "step": 1574 }, { "epoch": 0.24830165964399348, "grad_norm": 0.9375, "learning_rate": 7.033998033939687e-05, "loss": 1.17, "step": 1575 }, { "epoch": 0.24845931149138648, "grad_norm": 0.93359375, "learning_rate": 7.033558379045098e-05, "loss": 0.9513, "step": 1576 }, { "epoch": 0.2486169633387795, "grad_norm": 0.96484375, "learning_rate": 7.033118730437713e-05, "loss": 0.9956, "step": 1577 }, { "epoch": 0.2487746151861725, "grad_norm": 0.96484375, "learning_rate": 7.03267908811846e-05, "loss": 1.1155, "step": 1578 }, { "epoch": 0.24893226703356552, "grad_norm": 0.80859375, "learning_rate": 7.032239452088274e-05, "loss": 0.9468, "step": 1579 }, { "epoch": 0.24908991888095852, "grad_norm": 0.90234375, "learning_rate": 7.03179982234809e-05, "loss": 0.9682, "step": 1580 }, { "epoch": 0.24924757072835155, "grad_norm": 0.921875, "learning_rate": 7.031360198898835e-05, "loss": 1.0741, "step": 1581 }, { "epoch": 0.24940522257574455, "grad_norm": 0.98046875, "learning_rate": 7.030920581741444e-05, "loss": 1.07, "step": 1582 }, { "epoch": 0.24956287442313757, "grad_norm": 0.97265625, "learning_rate": 7.030480970876846e-05, "loss": 0.909, "step": 1583 }, { "epoch": 0.24972052627053057, "grad_norm": 0.9921875, "learning_rate": 7.030041366305973e-05, "loss": 1.2718, "step": 1584 }, { "epoch": 0.2498781781179236, "grad_norm": 1.5234375, "learning_rate": 7.02960176802976e-05, "loss": 0.9924, "step": 1585 }, { "epoch": 0.2500358299653166, "grad_norm": 0.84375, "learning_rate": 7.029162176049134e-05, "loss": 1.1488, "step": 1586 }, { "epoch": 0.2501934818127096, "grad_norm": 0.92578125, "learning_rate": 7.02872259036503e-05, "loss": 1.1985, "step": 1587 }, { "epoch": 0.25035113366010264, "grad_norm": 1.0546875, "learning_rate": 7.028283010978376e-05, "loss": 1.1041, "step": 1588 }, { "epoch": 0.2505087855074956, "grad_norm": 1.0234375, "learning_rate": 7.027843437890109e-05, "loss": 0.9797, "step": 1589 }, { "epoch": 0.25066643735488864, "grad_norm": 0.96875, "learning_rate": 7.027403871101157e-05, "loss": 1.0096, "step": 1590 }, { "epoch": 0.25082408920228166, "grad_norm": 0.89453125, "learning_rate": 7.026964310612453e-05, "loss": 1.1291, "step": 1591 }, { "epoch": 0.2509817410496747, "grad_norm": 0.84765625, "learning_rate": 7.02652475642493e-05, "loss": 0.9851, "step": 1592 }, { "epoch": 0.25113939289706766, "grad_norm": 1.0390625, "learning_rate": 7.026085208539513e-05, "loss": 1.0247, "step": 1593 }, { "epoch": 0.2512970447444607, "grad_norm": 0.91015625, "learning_rate": 7.025645666957141e-05, "loss": 1.1735, "step": 1594 }, { "epoch": 0.2514546965918537, "grad_norm": 0.95703125, "learning_rate": 7.025206131678745e-05, "loss": 1.1215, "step": 1595 }, { "epoch": 0.25161234843924674, "grad_norm": 0.9609375, "learning_rate": 7.024766602705254e-05, "loss": 1.0568, "step": 1596 }, { "epoch": 0.2517700002866397, "grad_norm": 1.078125, "learning_rate": 7.024327080037599e-05, "loss": 1.3967, "step": 1597 }, { "epoch": 0.25192765213403273, "grad_norm": 0.97265625, "learning_rate": 7.02388756367671e-05, "loss": 1.0681, "step": 1598 }, { "epoch": 0.25208530398142576, "grad_norm": 0.99609375, "learning_rate": 7.023448053623525e-05, "loss": 1.0843, "step": 1599 }, { "epoch": 0.2522429558288188, "grad_norm": 0.921875, "learning_rate": 7.023008549878971e-05, "loss": 1.0127, "step": 1600 }, { "epoch": 0.25240060767621175, "grad_norm": 1.1171875, "learning_rate": 7.022569052443982e-05, "loss": 1.1032, "step": 1601 }, { "epoch": 0.2525582595236048, "grad_norm": 0.94921875, "learning_rate": 7.022129561319486e-05, "loss": 1.2235, "step": 1602 }, { "epoch": 0.2527159113709978, "grad_norm": 1.96875, "learning_rate": 7.021690076506413e-05, "loss": 1.1295, "step": 1603 }, { "epoch": 0.25287356321839083, "grad_norm": 0.8984375, "learning_rate": 7.021250598005702e-05, "loss": 1.021, "step": 1604 }, { "epoch": 0.2530312150657838, "grad_norm": 0.99609375, "learning_rate": 7.020811125818279e-05, "loss": 1.2344, "step": 1605 }, { "epoch": 0.2531888669131768, "grad_norm": 0.9375, "learning_rate": 7.020371659945078e-05, "loss": 1.1612, "step": 1606 }, { "epoch": 0.25334651876056985, "grad_norm": 1.03125, "learning_rate": 7.019932200387027e-05, "loss": 1.1417, "step": 1607 }, { "epoch": 0.2535041706079629, "grad_norm": 1.0625, "learning_rate": 7.019492747145055e-05, "loss": 1.2071, "step": 1608 }, { "epoch": 0.25366182245535585, "grad_norm": 0.921875, "learning_rate": 7.019053300220104e-05, "loss": 1.0778, "step": 1609 }, { "epoch": 0.25381947430274887, "grad_norm": 0.921875, "learning_rate": 7.018613859613097e-05, "loss": 1.1426, "step": 1610 }, { "epoch": 0.2539771261501419, "grad_norm": 0.890625, "learning_rate": 7.01817442532497e-05, "loss": 1.1963, "step": 1611 }, { "epoch": 0.2541347779975349, "grad_norm": 0.94921875, "learning_rate": 7.017734997356651e-05, "loss": 1.1617, "step": 1612 }, { "epoch": 0.2542924298449279, "grad_norm": 1.0078125, "learning_rate": 7.017295575709066e-05, "loss": 1.0453, "step": 1613 }, { "epoch": 0.2544500816923209, "grad_norm": 0.85546875, "learning_rate": 7.016856160383158e-05, "loss": 1.0537, "step": 1614 }, { "epoch": 0.25460773353971394, "grad_norm": 0.8359375, "learning_rate": 7.016416751379854e-05, "loss": 0.8849, "step": 1615 }, { "epoch": 0.25476538538710697, "grad_norm": 0.90234375, "learning_rate": 7.015977348700084e-05, "loss": 1.0803, "step": 1616 }, { "epoch": 0.25492303723449994, "grad_norm": 1.015625, "learning_rate": 7.015537952344778e-05, "loss": 1.0938, "step": 1617 }, { "epoch": 0.25508068908189296, "grad_norm": 0.88671875, "learning_rate": 7.015098562314866e-05, "loss": 0.8956, "step": 1618 }, { "epoch": 0.255238340929286, "grad_norm": 0.96875, "learning_rate": 7.014659178611285e-05, "loss": 0.9498, "step": 1619 }, { "epoch": 0.255395992776679, "grad_norm": 1.0078125, "learning_rate": 7.014219801234963e-05, "loss": 1.3111, "step": 1620 }, { "epoch": 0.255553644624072, "grad_norm": 0.92578125, "learning_rate": 7.013780430186832e-05, "loss": 1.4495, "step": 1621 }, { "epoch": 0.255711296471465, "grad_norm": 0.96484375, "learning_rate": 7.013341065467823e-05, "loss": 1.1225, "step": 1622 }, { "epoch": 0.25586894831885804, "grad_norm": 1.921875, "learning_rate": 7.012901707078865e-05, "loss": 1.1353, "step": 1623 }, { "epoch": 0.25602660016625106, "grad_norm": 0.8984375, "learning_rate": 7.012462355020893e-05, "loss": 0.9615, "step": 1624 }, { "epoch": 0.25618425201364403, "grad_norm": 1.171875, "learning_rate": 7.012023009294836e-05, "loss": 1.2999, "step": 1625 }, { "epoch": 0.25634190386103706, "grad_norm": 0.921875, "learning_rate": 7.011583669901625e-05, "loss": 1.1103, "step": 1626 }, { "epoch": 0.2564995557084301, "grad_norm": 0.9453125, "learning_rate": 7.01114433684219e-05, "loss": 1.0375, "step": 1627 }, { "epoch": 0.2566572075558231, "grad_norm": 0.92578125, "learning_rate": 7.010705010117464e-05, "loss": 1.0802, "step": 1628 }, { "epoch": 0.2568148594032161, "grad_norm": 0.859375, "learning_rate": 7.01026568972838e-05, "loss": 0.9149, "step": 1629 }, { "epoch": 0.2569725112506091, "grad_norm": 0.89453125, "learning_rate": 7.009826375675868e-05, "loss": 1.1372, "step": 1630 }, { "epoch": 0.25713016309800213, "grad_norm": 0.98046875, "learning_rate": 7.009387067960855e-05, "loss": 1.3063, "step": 1631 }, { "epoch": 0.25728781494539515, "grad_norm": 0.9453125, "learning_rate": 7.008947766584278e-05, "loss": 1.2457, "step": 1632 }, { "epoch": 0.2574454667927881, "grad_norm": 0.96484375, "learning_rate": 7.008508471547059e-05, "loss": 1.0863, "step": 1633 }, { "epoch": 0.25760311864018115, "grad_norm": 0.984375, "learning_rate": 7.008069182850141e-05, "loss": 1.3042, "step": 1634 }, { "epoch": 0.2577607704875742, "grad_norm": 0.8828125, "learning_rate": 7.00762990049445e-05, "loss": 0.8884, "step": 1635 }, { "epoch": 0.2579184223349672, "grad_norm": 0.94921875, "learning_rate": 7.007190624480915e-05, "loss": 1.2682, "step": 1636 }, { "epoch": 0.25807607418236017, "grad_norm": 1.0078125, "learning_rate": 7.006751354810468e-05, "loss": 1.0564, "step": 1637 }, { "epoch": 0.2582337260297532, "grad_norm": 1.015625, "learning_rate": 7.006312091484038e-05, "loss": 1.2266, "step": 1638 }, { "epoch": 0.2583913778771462, "grad_norm": 0.94921875, "learning_rate": 7.005872834502562e-05, "loss": 1.0456, "step": 1639 }, { "epoch": 0.25854902972453925, "grad_norm": 0.9765625, "learning_rate": 7.005433583866966e-05, "loss": 1.0957, "step": 1640 }, { "epoch": 0.2587066815719322, "grad_norm": 0.9609375, "learning_rate": 7.004994339578184e-05, "loss": 0.8837, "step": 1641 }, { "epoch": 0.25886433341932524, "grad_norm": 0.8125, "learning_rate": 7.004555101637144e-05, "loss": 0.964, "step": 1642 }, { "epoch": 0.25902198526671827, "grad_norm": 1.0078125, "learning_rate": 7.004115870044776e-05, "loss": 1.0471, "step": 1643 }, { "epoch": 0.2591796371141113, "grad_norm": 0.9921875, "learning_rate": 7.003676644802017e-05, "loss": 1.0464, "step": 1644 }, { "epoch": 0.25933728896150426, "grad_norm": 1.0234375, "learning_rate": 7.003237425909794e-05, "loss": 1.153, "step": 1645 }, { "epoch": 0.2594949408088973, "grad_norm": 0.9140625, "learning_rate": 7.002798213369035e-05, "loss": 0.9293, "step": 1646 }, { "epoch": 0.2596525926562903, "grad_norm": 0.8671875, "learning_rate": 7.002359007180675e-05, "loss": 1.0631, "step": 1647 }, { "epoch": 0.25981024450368334, "grad_norm": 1.0234375, "learning_rate": 7.001919807345645e-05, "loss": 1.1788, "step": 1648 }, { "epoch": 0.2599678963510763, "grad_norm": 1.0625, "learning_rate": 7.001480613864872e-05, "loss": 1.2348, "step": 1649 }, { "epoch": 0.26012554819846934, "grad_norm": 0.9453125, "learning_rate": 7.001041426739292e-05, "loss": 1.2481, "step": 1650 }, { "epoch": 0.26028320004586236, "grad_norm": 0.84765625, "learning_rate": 7.000602245969833e-05, "loss": 0.9925, "step": 1651 }, { "epoch": 0.2604408518932554, "grad_norm": 0.90625, "learning_rate": 7.000163071557426e-05, "loss": 0.9351, "step": 1652 }, { "epoch": 0.26059850374064836, "grad_norm": 2.28125, "learning_rate": 6.999723903503002e-05, "loss": 1.1705, "step": 1653 }, { "epoch": 0.2607561555880414, "grad_norm": 1.015625, "learning_rate": 6.999284741807488e-05, "loss": 1.2346, "step": 1654 }, { "epoch": 0.2609138074354344, "grad_norm": 0.94140625, "learning_rate": 6.998845586471823e-05, "loss": 1.1277, "step": 1655 }, { "epoch": 0.26107145928282743, "grad_norm": 1.046875, "learning_rate": 6.998406437496932e-05, "loss": 1.3113, "step": 1656 }, { "epoch": 0.2612291111302204, "grad_norm": 0.92578125, "learning_rate": 6.997967294883747e-05, "loss": 1.0489, "step": 1657 }, { "epoch": 0.26138676297761343, "grad_norm": 1.078125, "learning_rate": 6.9975281586332e-05, "loss": 0.9751, "step": 1658 }, { "epoch": 0.26154441482500645, "grad_norm": 0.9375, "learning_rate": 6.997089028746216e-05, "loss": 0.9875, "step": 1659 }, { "epoch": 0.2617020666723995, "grad_norm": 0.80859375, "learning_rate": 6.996649905223733e-05, "loss": 0.7756, "step": 1660 }, { "epoch": 0.26185971851979245, "grad_norm": 1.84375, "learning_rate": 6.99621078806668e-05, "loss": 1.2189, "step": 1661 }, { "epoch": 0.2620173703671855, "grad_norm": 0.93359375, "learning_rate": 6.995771677275986e-05, "loss": 1.0652, "step": 1662 }, { "epoch": 0.2621750222145785, "grad_norm": 0.87890625, "learning_rate": 6.995332572852583e-05, "loss": 1.1876, "step": 1663 }, { "epoch": 0.2623326740619715, "grad_norm": 0.83203125, "learning_rate": 6.994893474797396e-05, "loss": 0.9399, "step": 1664 }, { "epoch": 0.2624903259093645, "grad_norm": 0.875, "learning_rate": 6.994454383111365e-05, "loss": 1.1369, "step": 1665 }, { "epoch": 0.2626479777567575, "grad_norm": 0.9296875, "learning_rate": 6.994015297795415e-05, "loss": 0.9936, "step": 1666 }, { "epoch": 0.26280562960415055, "grad_norm": 0.96875, "learning_rate": 6.993576218850479e-05, "loss": 1.0441, "step": 1667 }, { "epoch": 0.2629632814515436, "grad_norm": 1.0, "learning_rate": 6.993137146277487e-05, "loss": 1.0963, "step": 1668 }, { "epoch": 0.26312093329893654, "grad_norm": 1.7421875, "learning_rate": 6.992698080077367e-05, "loss": 1.2195, "step": 1669 }, { "epoch": 0.26327858514632957, "grad_norm": 0.84375, "learning_rate": 6.992259020251052e-05, "loss": 0.9091, "step": 1670 }, { "epoch": 0.2634362369937226, "grad_norm": 0.85546875, "learning_rate": 6.991819966799473e-05, "loss": 0.8029, "step": 1671 }, { "epoch": 0.2635938888411156, "grad_norm": 0.9140625, "learning_rate": 6.991380919723559e-05, "loss": 0.9312, "step": 1672 }, { "epoch": 0.2637515406885086, "grad_norm": 1.0078125, "learning_rate": 6.990941879024242e-05, "loss": 1.1331, "step": 1673 }, { "epoch": 0.2639091925359016, "grad_norm": 0.94140625, "learning_rate": 6.990502844702447e-05, "loss": 1.0675, "step": 1674 }, { "epoch": 0.26406684438329464, "grad_norm": 1.0078125, "learning_rate": 6.990063816759114e-05, "loss": 1.2706, "step": 1675 }, { "epoch": 0.26422449623068767, "grad_norm": 0.95703125, "learning_rate": 6.989624795195166e-05, "loss": 1.063, "step": 1676 }, { "epoch": 0.26438214807808064, "grad_norm": 0.91796875, "learning_rate": 6.98918578001154e-05, "loss": 1.0199, "step": 1677 }, { "epoch": 0.26453979992547366, "grad_norm": 0.87109375, "learning_rate": 6.988746771209161e-05, "loss": 0.9699, "step": 1678 }, { "epoch": 0.2646974517728667, "grad_norm": 0.88671875, "learning_rate": 6.988307768788956e-05, "loss": 1.2557, "step": 1679 }, { "epoch": 0.2648551036202597, "grad_norm": 0.97265625, "learning_rate": 6.987868772751866e-05, "loss": 1.5269, "step": 1680 }, { "epoch": 0.2650127554676527, "grad_norm": 0.94140625, "learning_rate": 6.987429783098816e-05, "loss": 0.8982, "step": 1681 }, { "epoch": 0.2651704073150457, "grad_norm": 0.99609375, "learning_rate": 6.986990799830737e-05, "loss": 1.0767, "step": 1682 }, { "epoch": 0.26532805916243873, "grad_norm": 0.90234375, "learning_rate": 6.986551822948557e-05, "loss": 0.8804, "step": 1683 }, { "epoch": 0.26548571100983176, "grad_norm": 0.81640625, "learning_rate": 6.986112852453204e-05, "loss": 0.9225, "step": 1684 }, { "epoch": 0.26564336285722473, "grad_norm": 1.03125, "learning_rate": 6.985673888345619e-05, "loss": 1.1509, "step": 1685 }, { "epoch": 0.26580101470461776, "grad_norm": 0.921875, "learning_rate": 6.985234930626723e-05, "loss": 1.1187, "step": 1686 }, { "epoch": 0.2659586665520108, "grad_norm": 0.8359375, "learning_rate": 6.984795979297452e-05, "loss": 0.9949, "step": 1687 }, { "epoch": 0.2661163183994038, "grad_norm": 0.98828125, "learning_rate": 6.98435703435873e-05, "loss": 1.1532, "step": 1688 }, { "epoch": 0.2662739702467968, "grad_norm": 0.84375, "learning_rate": 6.983918095811493e-05, "loss": 0.9482, "step": 1689 }, { "epoch": 0.2664316220941898, "grad_norm": 0.8515625, "learning_rate": 6.983479163656669e-05, "loss": 1.1119, "step": 1690 }, { "epoch": 0.2665892739415828, "grad_norm": 1.2265625, "learning_rate": 6.983040237895188e-05, "loss": 1.2842, "step": 1691 }, { "epoch": 0.26674692578897585, "grad_norm": 0.875, "learning_rate": 6.982601318527982e-05, "loss": 0.9848, "step": 1692 }, { "epoch": 0.2669045776363688, "grad_norm": 0.8515625, "learning_rate": 6.98216240555598e-05, "loss": 1.0954, "step": 1693 }, { "epoch": 0.26706222948376185, "grad_norm": 0.91015625, "learning_rate": 6.981723498980107e-05, "loss": 1.1908, "step": 1694 }, { "epoch": 0.2672198813311549, "grad_norm": 0.9609375, "learning_rate": 6.981284598801303e-05, "loss": 1.0665, "step": 1695 }, { "epoch": 0.2673775331785479, "grad_norm": 0.95703125, "learning_rate": 6.980845705020495e-05, "loss": 1.1982, "step": 1696 }, { "epoch": 0.26753518502594087, "grad_norm": 0.90234375, "learning_rate": 6.980406817638611e-05, "loss": 1.3166, "step": 1697 }, { "epoch": 0.2676928368733339, "grad_norm": 0.8984375, "learning_rate": 6.979967936656582e-05, "loss": 0.9869, "step": 1698 }, { "epoch": 0.2678504887207269, "grad_norm": 0.96484375, "learning_rate": 6.979529062075335e-05, "loss": 1.0196, "step": 1699 }, { "epoch": 0.26800814056811995, "grad_norm": 0.97265625, "learning_rate": 6.979090193895807e-05, "loss": 1.1166, "step": 1700 }, { "epoch": 0.2681657924155129, "grad_norm": 0.87109375, "learning_rate": 6.978651332118925e-05, "loss": 0.9091, "step": 1701 }, { "epoch": 0.26832344426290594, "grad_norm": 1.0234375, "learning_rate": 6.978212476745619e-05, "loss": 1.1728, "step": 1702 }, { "epoch": 0.26848109611029897, "grad_norm": 0.9921875, "learning_rate": 6.977773627776818e-05, "loss": 1.1748, "step": 1703 }, { "epoch": 0.268638747957692, "grad_norm": 0.89453125, "learning_rate": 6.977334785213449e-05, "loss": 1.1023, "step": 1704 }, { "epoch": 0.26879639980508496, "grad_norm": 1.015625, "learning_rate": 6.976895949056453e-05, "loss": 1.1224, "step": 1705 }, { "epoch": 0.268954051652478, "grad_norm": 0.90234375, "learning_rate": 6.97645711930675e-05, "loss": 0.9684, "step": 1706 }, { "epoch": 0.269111703499871, "grad_norm": 0.96875, "learning_rate": 6.976018295965274e-05, "loss": 1.0414, "step": 1707 }, { "epoch": 0.26926935534726404, "grad_norm": 0.84765625, "learning_rate": 6.975579479032955e-05, "loss": 0.8566, "step": 1708 }, { "epoch": 0.269427007194657, "grad_norm": 0.95703125, "learning_rate": 6.97514066851072e-05, "loss": 1.1728, "step": 1709 }, { "epoch": 0.26958465904205003, "grad_norm": 0.92578125, "learning_rate": 6.974701864399505e-05, "loss": 1.0108, "step": 1710 }, { "epoch": 0.26974231088944306, "grad_norm": 1.0625, "learning_rate": 6.974263066700234e-05, "loss": 1.156, "step": 1711 }, { "epoch": 0.2698999627368361, "grad_norm": 0.92578125, "learning_rate": 6.973824275413838e-05, "loss": 1.1015, "step": 1712 }, { "epoch": 0.27005761458422906, "grad_norm": 0.796875, "learning_rate": 6.973385490541251e-05, "loss": 0.9523, "step": 1713 }, { "epoch": 0.2702152664316221, "grad_norm": 0.82421875, "learning_rate": 6.9729467120834e-05, "loss": 0.9399, "step": 1714 }, { "epoch": 0.2703729182790151, "grad_norm": 0.921875, "learning_rate": 6.972507940041215e-05, "loss": 1.2716, "step": 1715 }, { "epoch": 0.27053057012640813, "grad_norm": 0.921875, "learning_rate": 6.972069174415628e-05, "loss": 1.1105, "step": 1716 }, { "epoch": 0.2706882219738011, "grad_norm": 0.984375, "learning_rate": 6.971630415207566e-05, "loss": 1.2103, "step": 1717 }, { "epoch": 0.2708458738211941, "grad_norm": 1.0234375, "learning_rate": 6.971191662417962e-05, "loss": 1.243, "step": 1718 }, { "epoch": 0.27100352566858715, "grad_norm": 0.8984375, "learning_rate": 6.970752916047739e-05, "loss": 1.0349, "step": 1719 }, { "epoch": 0.2711611775159802, "grad_norm": 0.96875, "learning_rate": 6.970314176097836e-05, "loss": 1.0999, "step": 1720 }, { "epoch": 0.2713188293633732, "grad_norm": 0.91796875, "learning_rate": 6.969875442569178e-05, "loss": 0.9182, "step": 1721 }, { "epoch": 0.2714764812107662, "grad_norm": 1.3671875, "learning_rate": 6.969436715462697e-05, "loss": 1.1229, "step": 1722 }, { "epoch": 0.2716341330581592, "grad_norm": 0.9765625, "learning_rate": 6.968997994779322e-05, "loss": 1.2143, "step": 1723 }, { "epoch": 0.2717917849055522, "grad_norm": 0.94140625, "learning_rate": 6.968559280519978e-05, "loss": 1.1386, "step": 1724 }, { "epoch": 0.27194943675294525, "grad_norm": 1.0390625, "learning_rate": 6.968120572685604e-05, "loss": 1.0527, "step": 1725 }, { "epoch": 0.2721070886003382, "grad_norm": 1.125, "learning_rate": 6.967681871277123e-05, "loss": 1.3038, "step": 1726 }, { "epoch": 0.27226474044773125, "grad_norm": 0.90234375, "learning_rate": 6.967243176295469e-05, "loss": 1.0945, "step": 1727 }, { "epoch": 0.27242239229512427, "grad_norm": 1.0546875, "learning_rate": 6.966804487741569e-05, "loss": 1.1197, "step": 1728 }, { "epoch": 0.2725800441425173, "grad_norm": 0.99609375, "learning_rate": 6.966365805616352e-05, "loss": 1.0122, "step": 1729 }, { "epoch": 0.27273769598991027, "grad_norm": 1.0, "learning_rate": 6.965927129920751e-05, "loss": 1.1578, "step": 1730 }, { "epoch": 0.2728953478373033, "grad_norm": 0.94921875, "learning_rate": 6.965488460655692e-05, "loss": 1.0093, "step": 1731 }, { "epoch": 0.2730529996846963, "grad_norm": 0.94921875, "learning_rate": 6.965049797822109e-05, "loss": 1.1155, "step": 1732 }, { "epoch": 0.27321065153208934, "grad_norm": 0.9296875, "learning_rate": 6.96461114142093e-05, "loss": 1.2728, "step": 1733 }, { "epoch": 0.2733683033794823, "grad_norm": 0.87890625, "learning_rate": 6.964172491453081e-05, "loss": 1.176, "step": 1734 }, { "epoch": 0.27352595522687534, "grad_norm": 0.9609375, "learning_rate": 6.963733847919496e-05, "loss": 1.1942, "step": 1735 }, { "epoch": 0.27368360707426836, "grad_norm": 0.9140625, "learning_rate": 6.963295210821105e-05, "loss": 0.9473, "step": 1736 }, { "epoch": 0.2738412589216614, "grad_norm": 1.0546875, "learning_rate": 6.962856580158837e-05, "loss": 1.1784, "step": 1737 }, { "epoch": 0.27399891076905436, "grad_norm": 0.9453125, "learning_rate": 6.96241795593362e-05, "loss": 1.0329, "step": 1738 }, { "epoch": 0.2741565626164474, "grad_norm": 1.0234375, "learning_rate": 6.961979338146381e-05, "loss": 1.215, "step": 1739 }, { "epoch": 0.2743142144638404, "grad_norm": 0.90234375, "learning_rate": 6.961540726798056e-05, "loss": 0.8672, "step": 1740 }, { "epoch": 0.27447186631123344, "grad_norm": 1.0078125, "learning_rate": 6.961102121889572e-05, "loss": 1.1347, "step": 1741 }, { "epoch": 0.2746295181586264, "grad_norm": 0.9453125, "learning_rate": 6.96066352342186e-05, "loss": 1.1355, "step": 1742 }, { "epoch": 0.27478717000601943, "grad_norm": 0.83203125, "learning_rate": 6.960224931395846e-05, "loss": 0.9219, "step": 1743 }, { "epoch": 0.27494482185341246, "grad_norm": 1.0078125, "learning_rate": 6.959786345812459e-05, "loss": 1.187, "step": 1744 }, { "epoch": 0.2751024737008055, "grad_norm": 0.94140625, "learning_rate": 6.959347766672633e-05, "loss": 1.3087, "step": 1745 }, { "epoch": 0.27526012554819845, "grad_norm": 0.90625, "learning_rate": 6.958909193977297e-05, "loss": 1.0897, "step": 1746 }, { "epoch": 0.2754177773955915, "grad_norm": 1.0625, "learning_rate": 6.95847062772738e-05, "loss": 0.9982, "step": 1747 }, { "epoch": 0.2755754292429845, "grad_norm": 0.83203125, "learning_rate": 6.95803206792381e-05, "loss": 1.1167, "step": 1748 }, { "epoch": 0.27573308109037753, "grad_norm": 0.91015625, "learning_rate": 6.957593514567514e-05, "loss": 0.9965, "step": 1749 }, { "epoch": 0.2758907329377705, "grad_norm": 0.99609375, "learning_rate": 6.957154967659426e-05, "loss": 1.2468, "step": 1750 }, { "epoch": 0.2760483847851635, "grad_norm": 0.9765625, "learning_rate": 6.956716427200476e-05, "loss": 0.9735, "step": 1751 }, { "epoch": 0.27620603663255655, "grad_norm": 0.9453125, "learning_rate": 6.956277893191591e-05, "loss": 0.9906, "step": 1752 }, { "epoch": 0.2763636884799496, "grad_norm": 0.921875, "learning_rate": 6.955839365633701e-05, "loss": 1.0699, "step": 1753 }, { "epoch": 0.27652134032734255, "grad_norm": 0.9765625, "learning_rate": 6.955400844527735e-05, "loss": 1.1844, "step": 1754 }, { "epoch": 0.27667899217473557, "grad_norm": 0.94140625, "learning_rate": 6.954962329874623e-05, "loss": 1.3394, "step": 1755 }, { "epoch": 0.2768366440221286, "grad_norm": 0.89453125, "learning_rate": 6.954523821675294e-05, "loss": 1.0751, "step": 1756 }, { "epoch": 0.2769942958695216, "grad_norm": 0.921875, "learning_rate": 6.954085319930679e-05, "loss": 1.0276, "step": 1757 }, { "epoch": 0.2771519477169146, "grad_norm": 0.9453125, "learning_rate": 6.953646824641707e-05, "loss": 1.215, "step": 1758 }, { "epoch": 0.2773095995643076, "grad_norm": 0.92578125, "learning_rate": 6.953208335809301e-05, "loss": 1.047, "step": 1759 }, { "epoch": 0.27746725141170064, "grad_norm": 0.81640625, "learning_rate": 6.9527698534344e-05, "loss": 0.881, "step": 1760 }, { "epoch": 0.27762490325909367, "grad_norm": 1.015625, "learning_rate": 6.952331377517929e-05, "loss": 1.2199, "step": 1761 }, { "epoch": 0.27778255510648664, "grad_norm": 0.9375, "learning_rate": 6.951892908060818e-05, "loss": 1.1025, "step": 1762 }, { "epoch": 0.27794020695387966, "grad_norm": 0.9453125, "learning_rate": 6.951454445063994e-05, "loss": 1.0837, "step": 1763 }, { "epoch": 0.2780978588012727, "grad_norm": 0.93359375, "learning_rate": 6.951015988528385e-05, "loss": 1.1537, "step": 1764 }, { "epoch": 0.2782555106486657, "grad_norm": 0.9765625, "learning_rate": 6.950577538454927e-05, "loss": 1.1484, "step": 1765 }, { "epoch": 0.2784131624960587, "grad_norm": 0.8828125, "learning_rate": 6.950139094844548e-05, "loss": 1.0682, "step": 1766 }, { "epoch": 0.2785708143434517, "grad_norm": 0.8671875, "learning_rate": 6.949700657698173e-05, "loss": 1.1406, "step": 1767 }, { "epoch": 0.27872846619084474, "grad_norm": 1.046875, "learning_rate": 6.949262227016732e-05, "loss": 1.0977, "step": 1768 }, { "epoch": 0.27888611803823776, "grad_norm": 1.0, "learning_rate": 6.948823802801154e-05, "loss": 1.2812, "step": 1769 }, { "epoch": 0.27904376988563073, "grad_norm": 0.98046875, "learning_rate": 6.948385385052372e-05, "loss": 1.2174, "step": 1770 }, { "epoch": 0.27920142173302376, "grad_norm": 0.9765625, "learning_rate": 6.947946973771313e-05, "loss": 1.0728, "step": 1771 }, { "epoch": 0.2793590735804168, "grad_norm": 0.89453125, "learning_rate": 6.947508568958905e-05, "loss": 1.0217, "step": 1772 }, { "epoch": 0.2795167254278098, "grad_norm": 0.890625, "learning_rate": 6.947070170616079e-05, "loss": 1.0975, "step": 1773 }, { "epoch": 0.2796743772752028, "grad_norm": 1.171875, "learning_rate": 6.946631778743762e-05, "loss": 1.2402, "step": 1774 }, { "epoch": 0.2798320291225958, "grad_norm": 1.0546875, "learning_rate": 6.946193393342886e-05, "loss": 1.1615, "step": 1775 }, { "epoch": 0.27998968096998883, "grad_norm": 0.9609375, "learning_rate": 6.945755014414376e-05, "loss": 1.0538, "step": 1776 }, { "epoch": 0.28014733281738186, "grad_norm": 0.8671875, "learning_rate": 6.945316641959168e-05, "loss": 0.8767, "step": 1777 }, { "epoch": 0.2803049846647748, "grad_norm": 1.0, "learning_rate": 6.944878275978184e-05, "loss": 1.2505, "step": 1778 }, { "epoch": 0.28046263651216785, "grad_norm": 0.96875, "learning_rate": 6.944439916472351e-05, "loss": 0.9423, "step": 1779 }, { "epoch": 0.2806202883595609, "grad_norm": 0.9375, "learning_rate": 6.94400156344261e-05, "loss": 0.9866, "step": 1780 }, { "epoch": 0.2807779402069539, "grad_norm": 0.83984375, "learning_rate": 6.943563216889881e-05, "loss": 1.1577, "step": 1781 }, { "epoch": 0.28093559205434687, "grad_norm": 0.87109375, "learning_rate": 6.943124876815097e-05, "loss": 0.9237, "step": 1782 }, { "epoch": 0.2810932439017399, "grad_norm": 0.95703125, "learning_rate": 6.942686543219183e-05, "loss": 1.0231, "step": 1783 }, { "epoch": 0.2812508957491329, "grad_norm": 0.91015625, "learning_rate": 6.942248216103067e-05, "loss": 0.9635, "step": 1784 }, { "epoch": 0.28140854759652595, "grad_norm": 0.96484375, "learning_rate": 6.941809895467684e-05, "loss": 1.0322, "step": 1785 }, { "epoch": 0.2815661994439189, "grad_norm": 0.8984375, "learning_rate": 6.941371581313962e-05, "loss": 0.9097, "step": 1786 }, { "epoch": 0.28172385129131194, "grad_norm": 0.97265625, "learning_rate": 6.940933273642827e-05, "loss": 0.8335, "step": 1787 }, { "epoch": 0.28188150313870497, "grad_norm": 0.99609375, "learning_rate": 6.94049497245521e-05, "loss": 1.1426, "step": 1788 }, { "epoch": 0.282039154986098, "grad_norm": 0.9921875, "learning_rate": 6.940056677752038e-05, "loss": 1.3404, "step": 1789 }, { "epoch": 0.28219680683349097, "grad_norm": 0.95703125, "learning_rate": 6.939618389534237e-05, "loss": 1.2091, "step": 1790 }, { "epoch": 0.282354458680884, "grad_norm": 0.97265625, "learning_rate": 6.939180107802743e-05, "loss": 1.1574, "step": 1791 }, { "epoch": 0.282512110528277, "grad_norm": 0.9765625, "learning_rate": 6.938741832558484e-05, "loss": 1.2753, "step": 1792 }, { "epoch": 0.28266976237567004, "grad_norm": 1.0546875, "learning_rate": 6.938303563802386e-05, "loss": 0.9984, "step": 1793 }, { "epoch": 0.282827414223063, "grad_norm": 1.0859375, "learning_rate": 6.937865301535377e-05, "loss": 1.0268, "step": 1794 }, { "epoch": 0.28298506607045604, "grad_norm": 0.9296875, "learning_rate": 6.937427045758386e-05, "loss": 1.0478, "step": 1795 }, { "epoch": 0.28314271791784906, "grad_norm": 0.94921875, "learning_rate": 6.936988796472349e-05, "loss": 1.223, "step": 1796 }, { "epoch": 0.2833003697652421, "grad_norm": 0.95703125, "learning_rate": 6.936550553678185e-05, "loss": 1.0839, "step": 1797 }, { "epoch": 0.28345802161263506, "grad_norm": 0.9609375, "learning_rate": 6.936112317376827e-05, "loss": 0.911, "step": 1798 }, { "epoch": 0.2836156734600281, "grad_norm": 0.83203125, "learning_rate": 6.935674087569205e-05, "loss": 1.1088, "step": 1799 }, { "epoch": 0.2837733253074211, "grad_norm": 1.03125, "learning_rate": 6.935235864256245e-05, "loss": 0.9608, "step": 1800 }, { "epoch": 0.28393097715481413, "grad_norm": 0.8984375, "learning_rate": 6.934797647438877e-05, "loss": 1.0391, "step": 1801 }, { "epoch": 0.2840886290022071, "grad_norm": 0.94140625, "learning_rate": 6.934359437118034e-05, "loss": 1.0048, "step": 1802 }, { "epoch": 0.28424628084960013, "grad_norm": 0.98046875, "learning_rate": 6.933921233294639e-05, "loss": 1.0308, "step": 1803 }, { "epoch": 0.28440393269699316, "grad_norm": 0.90234375, "learning_rate": 6.933483035969623e-05, "loss": 1.079, "step": 1804 }, { "epoch": 0.2845615845443862, "grad_norm": 0.90625, "learning_rate": 6.93304484514391e-05, "loss": 1.0794, "step": 1805 }, { "epoch": 0.28471923639177915, "grad_norm": 0.9140625, "learning_rate": 6.932606660818437e-05, "loss": 1.1625, "step": 1806 }, { "epoch": 0.2848768882391722, "grad_norm": 0.828125, "learning_rate": 6.93216848299413e-05, "loss": 0.8867, "step": 1807 }, { "epoch": 0.2850345400865652, "grad_norm": 0.93359375, "learning_rate": 6.931730311671916e-05, "loss": 0.863, "step": 1808 }, { "epoch": 0.28519219193395823, "grad_norm": 1.0, "learning_rate": 6.931292146852723e-05, "loss": 1.119, "step": 1809 }, { "epoch": 0.2853498437813512, "grad_norm": 0.890625, "learning_rate": 6.930853988537479e-05, "loss": 1.2126, "step": 1810 }, { "epoch": 0.2855074956287442, "grad_norm": 0.9453125, "learning_rate": 6.930415836727117e-05, "loss": 1.1326, "step": 1811 }, { "epoch": 0.28566514747613725, "grad_norm": 0.95703125, "learning_rate": 6.929977691422565e-05, "loss": 1.0235, "step": 1812 }, { "epoch": 0.2858227993235303, "grad_norm": 0.94140625, "learning_rate": 6.929539552624749e-05, "loss": 1.1517, "step": 1813 }, { "epoch": 0.28598045117092324, "grad_norm": 0.94921875, "learning_rate": 6.929101420334598e-05, "loss": 1.034, "step": 1814 }, { "epoch": 0.28613810301831627, "grad_norm": 0.90625, "learning_rate": 6.928663294553038e-05, "loss": 1.1265, "step": 1815 }, { "epoch": 0.2862957548657093, "grad_norm": 0.921875, "learning_rate": 6.928225175281005e-05, "loss": 1.1049, "step": 1816 }, { "epoch": 0.2864534067131023, "grad_norm": 0.890625, "learning_rate": 6.927787062519418e-05, "loss": 1.1225, "step": 1817 }, { "epoch": 0.2866110585604953, "grad_norm": 0.96484375, "learning_rate": 6.927348956269216e-05, "loss": 1.1945, "step": 1818 }, { "epoch": 0.2867687104078883, "grad_norm": 1.0, "learning_rate": 6.92691085653132e-05, "loss": 1.0273, "step": 1819 }, { "epoch": 0.28692636225528134, "grad_norm": 1.0625, "learning_rate": 6.92647276330666e-05, "loss": 1.1564, "step": 1820 }, { "epoch": 0.28708401410267437, "grad_norm": 0.93359375, "learning_rate": 6.926034676596167e-05, "loss": 1.2002, "step": 1821 }, { "epoch": 0.28724166595006734, "grad_norm": 0.85546875, "learning_rate": 6.925596596400768e-05, "loss": 0.9643, "step": 1822 }, { "epoch": 0.28739931779746036, "grad_norm": 0.828125, "learning_rate": 6.925158522721392e-05, "loss": 0.7791, "step": 1823 }, { "epoch": 0.2875569696448534, "grad_norm": 0.94921875, "learning_rate": 6.924720455558964e-05, "loss": 1.0785, "step": 1824 }, { "epoch": 0.2877146214922464, "grad_norm": 1.046875, "learning_rate": 6.924282394914413e-05, "loss": 1.1811, "step": 1825 }, { "epoch": 0.2878722733396394, "grad_norm": 0.9296875, "learning_rate": 6.923844340788675e-05, "loss": 1.073, "step": 1826 }, { "epoch": 0.2880299251870324, "grad_norm": 0.84375, "learning_rate": 6.923406293182671e-05, "loss": 0.9031, "step": 1827 }, { "epoch": 0.28818757703442544, "grad_norm": 0.9375, "learning_rate": 6.922968252097332e-05, "loss": 1.13, "step": 1828 }, { "epoch": 0.28834522888181846, "grad_norm": 0.9375, "learning_rate": 6.922530217533586e-05, "loss": 1.2399, "step": 1829 }, { "epoch": 0.28850288072921143, "grad_norm": 1.078125, "learning_rate": 6.922092189492358e-05, "loss": 1.1195, "step": 1830 }, { "epoch": 0.28866053257660446, "grad_norm": 0.921875, "learning_rate": 6.921654167974583e-05, "loss": 1.0458, "step": 1831 }, { "epoch": 0.2888181844239975, "grad_norm": 0.95703125, "learning_rate": 6.921216152981185e-05, "loss": 1.2888, "step": 1832 }, { "epoch": 0.2889758362713905, "grad_norm": 0.96875, "learning_rate": 6.920778144513097e-05, "loss": 1.1291, "step": 1833 }, { "epoch": 0.2891334881187835, "grad_norm": 0.96875, "learning_rate": 6.92034014257124e-05, "loss": 1.1549, "step": 1834 }, { "epoch": 0.2892911399661765, "grad_norm": 0.984375, "learning_rate": 6.919902147156542e-05, "loss": 1.0321, "step": 1835 }, { "epoch": 0.28944879181356953, "grad_norm": 0.98828125, "learning_rate": 6.919464158269942e-05, "loss": 1.0902, "step": 1836 }, { "epoch": 0.28960644366096255, "grad_norm": 0.9296875, "learning_rate": 6.919026175912359e-05, "loss": 1.0786, "step": 1837 }, { "epoch": 0.2897640955083555, "grad_norm": 0.89453125, "learning_rate": 6.918588200084726e-05, "loss": 0.9172, "step": 1838 }, { "epoch": 0.28992174735574855, "grad_norm": 1.0, "learning_rate": 6.918150230787969e-05, "loss": 1.0693, "step": 1839 }, { "epoch": 0.2900793992031416, "grad_norm": 0.94921875, "learning_rate": 6.917712268023013e-05, "loss": 0.9375, "step": 1840 }, { "epoch": 0.2902370510505346, "grad_norm": 0.93359375, "learning_rate": 6.917274311790793e-05, "loss": 1.3096, "step": 1841 }, { "epoch": 0.29039470289792757, "grad_norm": 0.88671875, "learning_rate": 6.916836362092234e-05, "loss": 1.0281, "step": 1842 }, { "epoch": 0.2905523547453206, "grad_norm": 0.9296875, "learning_rate": 6.916398418928263e-05, "loss": 1.1903, "step": 1843 }, { "epoch": 0.2907100065927136, "grad_norm": 0.76953125, "learning_rate": 6.91596048229981e-05, "loss": 0.8723, "step": 1844 }, { "epoch": 0.29086765844010665, "grad_norm": 0.83203125, "learning_rate": 6.915522552207798e-05, "loss": 0.9742, "step": 1845 }, { "epoch": 0.2910253102874996, "grad_norm": 0.87109375, "learning_rate": 6.915084628653163e-05, "loss": 1.0318, "step": 1846 }, { "epoch": 0.29118296213489264, "grad_norm": 1.03125, "learning_rate": 6.914646711636832e-05, "loss": 1.2363, "step": 1847 }, { "epoch": 0.29134061398228567, "grad_norm": 0.921875, "learning_rate": 6.91420880115973e-05, "loss": 1.2061, "step": 1848 }, { "epoch": 0.2914982658296787, "grad_norm": 0.8984375, "learning_rate": 6.913770897222786e-05, "loss": 1.3845, "step": 1849 }, { "epoch": 0.29165591767707166, "grad_norm": 1.0546875, "learning_rate": 6.913332999826922e-05, "loss": 1.1576, "step": 1850 }, { "epoch": 0.2918135695244647, "grad_norm": 0.875, "learning_rate": 6.91289510897308e-05, "loss": 1.0151, "step": 1851 }, { "epoch": 0.2919712213718577, "grad_norm": 0.98828125, "learning_rate": 6.912457224662179e-05, "loss": 1.235, "step": 1852 }, { "epoch": 0.29212887321925074, "grad_norm": 0.921875, "learning_rate": 6.912019346895146e-05, "loss": 1.0265, "step": 1853 }, { "epoch": 0.2922865250666437, "grad_norm": 0.94140625, "learning_rate": 6.911581475672914e-05, "loss": 1.0102, "step": 1854 }, { "epoch": 0.29244417691403674, "grad_norm": 0.890625, "learning_rate": 6.911143610996404e-05, "loss": 1.1134, "step": 1855 }, { "epoch": 0.29260182876142976, "grad_norm": 0.8515625, "learning_rate": 6.910705752866553e-05, "loss": 1.2059, "step": 1856 }, { "epoch": 0.2927594806088228, "grad_norm": 0.90234375, "learning_rate": 6.910267901284284e-05, "loss": 1.0385, "step": 1857 }, { "epoch": 0.29291713245621576, "grad_norm": 0.91015625, "learning_rate": 6.909830056250527e-05, "loss": 1.0966, "step": 1858 }, { "epoch": 0.2930747843036088, "grad_norm": 0.9375, "learning_rate": 6.909392217766207e-05, "loss": 1.1934, "step": 1859 }, { "epoch": 0.2932324361510018, "grad_norm": 0.89453125, "learning_rate": 6.908954385832251e-05, "loss": 1.0348, "step": 1860 }, { "epoch": 0.29339008799839483, "grad_norm": 0.91796875, "learning_rate": 6.908516560449594e-05, "loss": 1.1135, "step": 1861 }, { "epoch": 0.2935477398457878, "grad_norm": 0.95703125, "learning_rate": 6.908078741619157e-05, "loss": 1.128, "step": 1862 }, { "epoch": 0.29370539169318083, "grad_norm": 0.86328125, "learning_rate": 6.907640929341872e-05, "loss": 1.0865, "step": 1863 }, { "epoch": 0.29386304354057385, "grad_norm": 0.94140625, "learning_rate": 6.907203123618664e-05, "loss": 1.1875, "step": 1864 }, { "epoch": 0.2940206953879669, "grad_norm": 0.921875, "learning_rate": 6.90676532445046e-05, "loss": 1.278, "step": 1865 }, { "epoch": 0.29417834723535985, "grad_norm": 0.9140625, "learning_rate": 6.906327531838193e-05, "loss": 1.0806, "step": 1866 }, { "epoch": 0.2943359990827529, "grad_norm": 0.8984375, "learning_rate": 6.905889745782788e-05, "loss": 1.2227, "step": 1867 }, { "epoch": 0.2944936509301459, "grad_norm": 0.84765625, "learning_rate": 6.905451966285171e-05, "loss": 0.9036, "step": 1868 }, { "epoch": 0.2946513027775389, "grad_norm": 0.9296875, "learning_rate": 6.905014193346274e-05, "loss": 1.0541, "step": 1869 }, { "epoch": 0.2948089546249319, "grad_norm": 0.94921875, "learning_rate": 6.904576426967017e-05, "loss": 1.2212, "step": 1870 }, { "epoch": 0.2949666064723249, "grad_norm": 0.890625, "learning_rate": 6.904138667148338e-05, "loss": 0.9853, "step": 1871 }, { "epoch": 0.29512425831971795, "grad_norm": 0.921875, "learning_rate": 6.903700913891162e-05, "loss": 1.1984, "step": 1872 }, { "epoch": 0.295281910167111, "grad_norm": 0.9140625, "learning_rate": 6.903263167196412e-05, "loss": 1.0842, "step": 1873 }, { "epoch": 0.29543956201450394, "grad_norm": 0.984375, "learning_rate": 6.902825427065021e-05, "loss": 1.1143, "step": 1874 }, { "epoch": 0.29559721386189697, "grad_norm": 0.97265625, "learning_rate": 6.90238769349791e-05, "loss": 1.2871, "step": 1875 }, { "epoch": 0.29575486570929, "grad_norm": 1.0390625, "learning_rate": 6.901949966496013e-05, "loss": 0.898, "step": 1876 }, { "epoch": 0.295912517556683, "grad_norm": 0.859375, "learning_rate": 6.901512246060259e-05, "loss": 0.8928, "step": 1877 }, { "epoch": 0.29607016940407604, "grad_norm": 1.046875, "learning_rate": 6.901074532191572e-05, "loss": 1.037, "step": 1878 }, { "epoch": 0.296227821251469, "grad_norm": 1.0703125, "learning_rate": 6.900636824890878e-05, "loss": 1.2106, "step": 1879 }, { "epoch": 0.29638547309886204, "grad_norm": 0.8828125, "learning_rate": 6.900199124159109e-05, "loss": 0.9175, "step": 1880 }, { "epoch": 0.29654312494625507, "grad_norm": 0.92578125, "learning_rate": 6.89976142999719e-05, "loss": 1.0531, "step": 1881 }, { "epoch": 0.2967007767936481, "grad_norm": 0.94921875, "learning_rate": 6.89932374240605e-05, "loss": 0.9883, "step": 1882 }, { "epoch": 0.29685842864104106, "grad_norm": 0.921875, "learning_rate": 6.898886061386614e-05, "loss": 1.1274, "step": 1883 }, { "epoch": 0.2970160804884341, "grad_norm": 1.0234375, "learning_rate": 6.898448386939814e-05, "loss": 1.3654, "step": 1884 }, { "epoch": 0.2971737323358271, "grad_norm": 0.890625, "learning_rate": 6.898010719066572e-05, "loss": 1.0427, "step": 1885 }, { "epoch": 0.29733138418322014, "grad_norm": 1.0390625, "learning_rate": 6.897573057767824e-05, "loss": 1.1164, "step": 1886 }, { "epoch": 0.2974890360306131, "grad_norm": 0.9609375, "learning_rate": 6.897135403044491e-05, "loss": 1.1587, "step": 1887 }, { "epoch": 0.29764668787800613, "grad_norm": 0.96875, "learning_rate": 6.896697754897501e-05, "loss": 1.0289, "step": 1888 }, { "epoch": 0.29780433972539916, "grad_norm": 1.0078125, "learning_rate": 6.896260113327783e-05, "loss": 1.2689, "step": 1889 }, { "epoch": 0.2979619915727922, "grad_norm": 0.91015625, "learning_rate": 6.895822478336262e-05, "loss": 1.0849, "step": 1890 }, { "epoch": 0.29811964342018515, "grad_norm": 0.984375, "learning_rate": 6.895384849923871e-05, "loss": 1.1242, "step": 1891 }, { "epoch": 0.2982772952675782, "grad_norm": 1.015625, "learning_rate": 6.894947228091535e-05, "loss": 0.9605, "step": 1892 }, { "epoch": 0.2984349471149712, "grad_norm": 1.0078125, "learning_rate": 6.894509612840179e-05, "loss": 0.9764, "step": 1893 }, { "epoch": 0.29859259896236423, "grad_norm": 0.91015625, "learning_rate": 6.894072004170734e-05, "loss": 1.1986, "step": 1894 }, { "epoch": 0.2987502508097572, "grad_norm": 1.0078125, "learning_rate": 6.893634402084121e-05, "loss": 1.2609, "step": 1895 }, { "epoch": 0.2989079026571502, "grad_norm": 1.0234375, "learning_rate": 6.893196806581277e-05, "loss": 1.1271, "step": 1896 }, { "epoch": 0.29906555450454325, "grad_norm": 0.95703125, "learning_rate": 6.892759217663124e-05, "loss": 0.9491, "step": 1897 }, { "epoch": 0.2992232063519363, "grad_norm": 1.0546875, "learning_rate": 6.892321635330592e-05, "loss": 1.2664, "step": 1898 }, { "epoch": 0.29938085819932925, "grad_norm": 0.93359375, "learning_rate": 6.891884059584604e-05, "loss": 1.0487, "step": 1899 }, { "epoch": 0.2995385100467223, "grad_norm": 0.921875, "learning_rate": 6.89144649042609e-05, "loss": 1.2452, "step": 1900 }, { "epoch": 0.2996961618941153, "grad_norm": 0.91796875, "learning_rate": 6.89100892785598e-05, "loss": 1.1048, "step": 1901 }, { "epoch": 0.2998538137415083, "grad_norm": 0.953125, "learning_rate": 6.890571371875194e-05, "loss": 0.8733, "step": 1902 }, { "epoch": 0.3000114655889013, "grad_norm": 1.046875, "learning_rate": 6.890133822484669e-05, "loss": 1.0949, "step": 1903 }, { "epoch": 0.3001691174362943, "grad_norm": 0.90625, "learning_rate": 6.889696279685327e-05, "loss": 0.9121, "step": 1904 }, { "epoch": 0.30032676928368734, "grad_norm": 0.9453125, "learning_rate": 6.889258743478093e-05, "loss": 1.0087, "step": 1905 }, { "epoch": 0.30048442113108037, "grad_norm": 0.953125, "learning_rate": 6.888821213863901e-05, "loss": 0.9906, "step": 1906 }, { "epoch": 0.30064207297847334, "grad_norm": 0.94140625, "learning_rate": 6.888383690843673e-05, "loss": 1.1579, "step": 1907 }, { "epoch": 0.30079972482586637, "grad_norm": 0.85546875, "learning_rate": 6.887946174418338e-05, "loss": 0.8905, "step": 1908 }, { "epoch": 0.3009573766732594, "grad_norm": 0.9140625, "learning_rate": 6.887508664588824e-05, "loss": 1.1476, "step": 1909 }, { "epoch": 0.3011150285206524, "grad_norm": 0.84765625, "learning_rate": 6.887071161356054e-05, "loss": 1.203, "step": 1910 }, { "epoch": 0.3012726803680454, "grad_norm": 0.94921875, "learning_rate": 6.886633664720961e-05, "loss": 0.8998, "step": 1911 }, { "epoch": 0.3014303322154384, "grad_norm": 1.015625, "learning_rate": 6.886196174684471e-05, "loss": 1.2258, "step": 1912 }, { "epoch": 0.30158798406283144, "grad_norm": 0.9765625, "learning_rate": 6.88575869124751e-05, "loss": 1.158, "step": 1913 }, { "epoch": 0.30174563591022446, "grad_norm": 0.91015625, "learning_rate": 6.885321214411007e-05, "loss": 0.9877, "step": 1914 }, { "epoch": 0.30190328775761743, "grad_norm": 0.9609375, "learning_rate": 6.88488374417588e-05, "loss": 1.1588, "step": 1915 }, { "epoch": 0.30206093960501046, "grad_norm": 0.89453125, "learning_rate": 6.88444628054307e-05, "loss": 1.0643, "step": 1916 }, { "epoch": 0.3022185914524035, "grad_norm": 0.875, "learning_rate": 6.884008823513499e-05, "loss": 0.9943, "step": 1917 }, { "epoch": 0.3023762432997965, "grad_norm": 0.96484375, "learning_rate": 6.883571373088093e-05, "loss": 1.3065, "step": 1918 }, { "epoch": 0.3025338951471895, "grad_norm": 1.03125, "learning_rate": 6.88313392926778e-05, "loss": 1.0518, "step": 1919 }, { "epoch": 0.3026915469945825, "grad_norm": 1.4296875, "learning_rate": 6.882696492053483e-05, "loss": 1.1562, "step": 1920 }, { "epoch": 0.30284919884197553, "grad_norm": 1.015625, "learning_rate": 6.882259061446131e-05, "loss": 1.2035, "step": 1921 }, { "epoch": 0.30300685068936856, "grad_norm": 1.1015625, "learning_rate": 6.881821637446657e-05, "loss": 1.0504, "step": 1922 }, { "epoch": 0.3031645025367615, "grad_norm": 0.95703125, "learning_rate": 6.881384220055984e-05, "loss": 1.3212, "step": 1923 }, { "epoch": 0.30332215438415455, "grad_norm": 1.015625, "learning_rate": 6.880946809275038e-05, "loss": 1.0566, "step": 1924 }, { "epoch": 0.3034798062315476, "grad_norm": 1.046875, "learning_rate": 6.880509405104744e-05, "loss": 1.1852, "step": 1925 }, { "epoch": 0.3036374580789406, "grad_norm": 1.0078125, "learning_rate": 6.880072007546036e-05, "loss": 1.4554, "step": 1926 }, { "epoch": 0.3037951099263336, "grad_norm": 0.96484375, "learning_rate": 6.879634616599837e-05, "loss": 1.0309, "step": 1927 }, { "epoch": 0.3039527617737266, "grad_norm": 0.9375, "learning_rate": 6.879197232267073e-05, "loss": 0.8777, "step": 1928 }, { "epoch": 0.3041104136211196, "grad_norm": 0.89453125, "learning_rate": 6.878759854548672e-05, "loss": 1.0103, "step": 1929 }, { "epoch": 0.30426806546851265, "grad_norm": 0.9765625, "learning_rate": 6.878322483445561e-05, "loss": 1.096, "step": 1930 }, { "epoch": 0.3044257173159056, "grad_norm": 0.92578125, "learning_rate": 6.877885118958664e-05, "loss": 1.0015, "step": 1931 }, { "epoch": 0.30458336916329865, "grad_norm": 0.96484375, "learning_rate": 6.877447761088915e-05, "loss": 1.0426, "step": 1932 }, { "epoch": 0.30474102101069167, "grad_norm": 0.86328125, "learning_rate": 6.877010409837237e-05, "loss": 0.9628, "step": 1933 }, { "epoch": 0.3048986728580847, "grad_norm": 1.0234375, "learning_rate": 6.876573065204556e-05, "loss": 0.9831, "step": 1934 }, { "epoch": 0.30505632470547767, "grad_norm": 1.03125, "learning_rate": 6.8761357271918e-05, "loss": 1.0138, "step": 1935 }, { "epoch": 0.3052139765528707, "grad_norm": 0.87890625, "learning_rate": 6.875698395799892e-05, "loss": 0.7691, "step": 1936 }, { "epoch": 0.3053716284002637, "grad_norm": 1.09375, "learning_rate": 6.875261071029768e-05, "loss": 1.0916, "step": 1937 }, { "epoch": 0.30552928024765674, "grad_norm": 0.8984375, "learning_rate": 6.874823752882347e-05, "loss": 1.0076, "step": 1938 }, { "epoch": 0.3056869320950497, "grad_norm": 0.83984375, "learning_rate": 6.874386441358561e-05, "loss": 1.0452, "step": 1939 }, { "epoch": 0.30584458394244274, "grad_norm": 0.93359375, "learning_rate": 6.873949136459332e-05, "loss": 1.068, "step": 1940 }, { "epoch": 0.30600223578983576, "grad_norm": 0.98046875, "learning_rate": 6.873511838185587e-05, "loss": 1.2288, "step": 1941 }, { "epoch": 0.3061598876372288, "grad_norm": 1.046875, "learning_rate": 6.873074546538258e-05, "loss": 0.9895, "step": 1942 }, { "epoch": 0.30631753948462176, "grad_norm": 0.875, "learning_rate": 6.872637261518269e-05, "loss": 1.0229, "step": 1943 }, { "epoch": 0.3064751913320148, "grad_norm": 0.93359375, "learning_rate": 6.872199983126546e-05, "loss": 1.0553, "step": 1944 }, { "epoch": 0.3066328431794078, "grad_norm": 0.83203125, "learning_rate": 6.871762711364018e-05, "loss": 0.9682, "step": 1945 }, { "epoch": 0.30679049502680084, "grad_norm": 0.8984375, "learning_rate": 6.871325446231606e-05, "loss": 0.958, "step": 1946 }, { "epoch": 0.3069481468741938, "grad_norm": 0.98046875, "learning_rate": 6.870888187730245e-05, "loss": 1.0723, "step": 1947 }, { "epoch": 0.30710579872158683, "grad_norm": 0.88671875, "learning_rate": 6.870450935860857e-05, "loss": 1.0502, "step": 1948 }, { "epoch": 0.30726345056897986, "grad_norm": 1.046875, "learning_rate": 6.870013690624368e-05, "loss": 1.2192, "step": 1949 }, { "epoch": 0.3074211024163729, "grad_norm": 0.9296875, "learning_rate": 6.869576452021705e-05, "loss": 1.1482, "step": 1950 }, { "epoch": 0.30757875426376585, "grad_norm": 0.9375, "learning_rate": 6.869139220053795e-05, "loss": 1.1024, "step": 1951 }, { "epoch": 0.3077364061111589, "grad_norm": 0.80859375, "learning_rate": 6.868701994721569e-05, "loss": 0.7835, "step": 1952 }, { "epoch": 0.3078940579585519, "grad_norm": 0.85546875, "learning_rate": 6.86826477602595e-05, "loss": 1.0139, "step": 1953 }, { "epoch": 0.30805170980594493, "grad_norm": 1.0234375, "learning_rate": 6.867827563967864e-05, "loss": 1.332, "step": 1954 }, { "epoch": 0.3082093616533379, "grad_norm": 0.9375, "learning_rate": 6.867390358548238e-05, "loss": 1.1438, "step": 1955 }, { "epoch": 0.3083670135007309, "grad_norm": 0.87890625, "learning_rate": 6.866953159767997e-05, "loss": 1.1559, "step": 1956 }, { "epoch": 0.30852466534812395, "grad_norm": 0.9765625, "learning_rate": 6.866515967628071e-05, "loss": 1.098, "step": 1957 }, { "epoch": 0.308682317195517, "grad_norm": 0.91015625, "learning_rate": 6.866078782129388e-05, "loss": 0.8567, "step": 1958 }, { "epoch": 0.30883996904290995, "grad_norm": 0.9296875, "learning_rate": 6.86564160327287e-05, "loss": 0.9513, "step": 1959 }, { "epoch": 0.30899762089030297, "grad_norm": 0.8359375, "learning_rate": 6.865204431059447e-05, "loss": 0.9079, "step": 1960 }, { "epoch": 0.309155272737696, "grad_norm": 0.9921875, "learning_rate": 6.864767265490039e-05, "loss": 1.0906, "step": 1961 }, { "epoch": 0.309312924585089, "grad_norm": 0.953125, "learning_rate": 6.864330106565582e-05, "loss": 0.9395, "step": 1962 }, { "epoch": 0.309470576432482, "grad_norm": 0.8515625, "learning_rate": 6.863892954286997e-05, "loss": 0.8805, "step": 1963 }, { "epoch": 0.309628228279875, "grad_norm": 0.83984375, "learning_rate": 6.863455808655213e-05, "loss": 0.8618, "step": 1964 }, { "epoch": 0.30978588012726804, "grad_norm": 1.0703125, "learning_rate": 6.863018669671156e-05, "loss": 1.3156, "step": 1965 }, { "epoch": 0.30994353197466107, "grad_norm": 0.88671875, "learning_rate": 6.862581537335746e-05, "loss": 0.8891, "step": 1966 }, { "epoch": 0.31010118382205404, "grad_norm": 0.90625, "learning_rate": 6.86214441164992e-05, "loss": 1.0067, "step": 1967 }, { "epoch": 0.31025883566944706, "grad_norm": 0.9296875, "learning_rate": 6.861707292614598e-05, "loss": 1.046, "step": 1968 }, { "epoch": 0.3104164875168401, "grad_norm": 0.9765625, "learning_rate": 6.861270180230708e-05, "loss": 0.9657, "step": 1969 }, { "epoch": 0.3105741393642331, "grad_norm": 1.0078125, "learning_rate": 6.860833074499178e-05, "loss": 1.1181, "step": 1970 }, { "epoch": 0.3107317912116261, "grad_norm": 0.92578125, "learning_rate": 6.86039597542093e-05, "loss": 1.0731, "step": 1971 }, { "epoch": 0.3108894430590191, "grad_norm": 0.93359375, "learning_rate": 6.859958882996895e-05, "loss": 1.0175, "step": 1972 }, { "epoch": 0.31104709490641214, "grad_norm": 0.8828125, "learning_rate": 6.859521797227999e-05, "loss": 1.0549, "step": 1973 }, { "epoch": 0.31120474675380516, "grad_norm": 0.87890625, "learning_rate": 6.859084718115165e-05, "loss": 1.081, "step": 1974 }, { "epoch": 0.31136239860119813, "grad_norm": 0.9921875, "learning_rate": 6.858647645659324e-05, "loss": 1.1961, "step": 1975 }, { "epoch": 0.31152005044859116, "grad_norm": 0.98046875, "learning_rate": 6.858210579861394e-05, "loss": 1.077, "step": 1976 }, { "epoch": 0.3116777022959842, "grad_norm": 0.9296875, "learning_rate": 6.857773520722311e-05, "loss": 1.0883, "step": 1977 }, { "epoch": 0.3118353541433772, "grad_norm": 0.9453125, "learning_rate": 6.857336468242999e-05, "loss": 1.0424, "step": 1978 }, { "epoch": 0.3119930059907702, "grad_norm": 0.90625, "learning_rate": 6.856899422424381e-05, "loss": 1.0863, "step": 1979 }, { "epoch": 0.3121506578381632, "grad_norm": 0.9921875, "learning_rate": 6.856462383267386e-05, "loss": 1.1165, "step": 1980 }, { "epoch": 0.31230830968555623, "grad_norm": 0.84765625, "learning_rate": 6.856025350772934e-05, "loss": 1.0884, "step": 1981 }, { "epoch": 0.31246596153294925, "grad_norm": 0.90625, "learning_rate": 6.855588324941962e-05, "loss": 1.0194, "step": 1982 }, { "epoch": 0.3126236133803422, "grad_norm": 1.015625, "learning_rate": 6.855151305775392e-05, "loss": 1.3551, "step": 1983 }, { "epoch": 0.31278126522773525, "grad_norm": 0.9609375, "learning_rate": 6.854714293274147e-05, "loss": 1.1895, "step": 1984 }, { "epoch": 0.3129389170751283, "grad_norm": 0.94921875, "learning_rate": 6.854277287439154e-05, "loss": 0.9744, "step": 1985 }, { "epoch": 0.3130965689225213, "grad_norm": 0.94921875, "learning_rate": 6.853840288271341e-05, "loss": 1.0628, "step": 1986 }, { "epoch": 0.31325422076991427, "grad_norm": 1.2265625, "learning_rate": 6.853403295771633e-05, "loss": 1.2318, "step": 1987 }, { "epoch": 0.3134118726173073, "grad_norm": 0.859375, "learning_rate": 6.852966309940959e-05, "loss": 0.9688, "step": 1988 }, { "epoch": 0.3135695244647003, "grad_norm": 0.90625, "learning_rate": 6.852529330780243e-05, "loss": 1.0642, "step": 1989 }, { "epoch": 0.31372717631209335, "grad_norm": 0.94140625, "learning_rate": 6.852092358290411e-05, "loss": 1.1573, "step": 1990 }, { "epoch": 0.3138848281594863, "grad_norm": 1.03125, "learning_rate": 6.851655392472387e-05, "loss": 1.1883, "step": 1991 }, { "epoch": 0.31404248000687934, "grad_norm": 0.92578125, "learning_rate": 6.851218433327103e-05, "loss": 1.1077, "step": 1992 }, { "epoch": 0.31420013185427237, "grad_norm": 0.91796875, "learning_rate": 6.850781480855479e-05, "loss": 0.9263, "step": 1993 }, { "epoch": 0.3143577837016654, "grad_norm": 0.9765625, "learning_rate": 6.850344535058446e-05, "loss": 1.0613, "step": 1994 }, { "epoch": 0.31451543554905836, "grad_norm": 1.203125, "learning_rate": 6.849907595936927e-05, "loss": 1.0678, "step": 1995 }, { "epoch": 0.3146730873964514, "grad_norm": 0.87109375, "learning_rate": 6.849470663491844e-05, "loss": 1.1007, "step": 1996 }, { "epoch": 0.3148307392438444, "grad_norm": 0.890625, "learning_rate": 6.849033737724131e-05, "loss": 1.1009, "step": 1997 }, { "epoch": 0.31498839109123744, "grad_norm": 0.9765625, "learning_rate": 6.848596818634714e-05, "loss": 1.1363, "step": 1998 }, { "epoch": 0.3151460429386304, "grad_norm": 0.8671875, "learning_rate": 6.848159906224513e-05, "loss": 0.9082, "step": 1999 }, { "epoch": 0.31530369478602344, "grad_norm": 0.8671875, "learning_rate": 6.847723000494459e-05, "loss": 0.8839, "step": 2000 }, { "epoch": 0.31530369478602344, "eval_loss": 1.0622248649597168, "eval_runtime": 306.9382, "eval_samples_per_second": 32.58, "eval_steps_per_second": 0.681, "step": 2000 }, { "epoch": 0.31546134663341646, "grad_norm": 0.91015625, "learning_rate": 6.84728610144547e-05, "loss": 1.0609, "step": 2001 }, { "epoch": 0.3156189984808095, "grad_norm": 1.015625, "learning_rate": 6.846849209078484e-05, "loss": 1.095, "step": 2002 }, { "epoch": 0.31577665032820246, "grad_norm": 1.0078125, "learning_rate": 6.84641232339442e-05, "loss": 1.0071, "step": 2003 }, { "epoch": 0.3159343021755955, "grad_norm": 0.9453125, "learning_rate": 6.845975444394204e-05, "loss": 0.9972, "step": 2004 }, { "epoch": 0.3160919540229885, "grad_norm": 0.9375, "learning_rate": 6.845538572078763e-05, "loss": 1.2231, "step": 2005 }, { "epoch": 0.31624960587038153, "grad_norm": 0.921875, "learning_rate": 6.84510170644902e-05, "loss": 1.046, "step": 2006 }, { "epoch": 0.3164072577177745, "grad_norm": 0.921875, "learning_rate": 6.844664847505907e-05, "loss": 1.0046, "step": 2007 }, { "epoch": 0.31656490956516753, "grad_norm": 0.87109375, "learning_rate": 6.844227995250345e-05, "loss": 1.3371, "step": 2008 }, { "epoch": 0.31672256141256055, "grad_norm": 0.9765625, "learning_rate": 6.843791149683262e-05, "loss": 1.0634, "step": 2009 }, { "epoch": 0.3168802132599536, "grad_norm": 0.89453125, "learning_rate": 6.843354310805586e-05, "loss": 0.8842, "step": 2010 }, { "epoch": 0.31703786510734655, "grad_norm": 0.98828125, "learning_rate": 6.842917478618235e-05, "loss": 1.149, "step": 2011 }, { "epoch": 0.3171955169547396, "grad_norm": 0.94921875, "learning_rate": 6.842480653122143e-05, "loss": 0.9179, "step": 2012 }, { "epoch": 0.3173531688021326, "grad_norm": 0.98828125, "learning_rate": 6.842043834318232e-05, "loss": 1.0081, "step": 2013 }, { "epoch": 0.3175108206495256, "grad_norm": 1.0390625, "learning_rate": 6.841607022207431e-05, "loss": 1.212, "step": 2014 }, { "epoch": 0.3176684724969186, "grad_norm": 0.84375, "learning_rate": 6.84117021679066e-05, "loss": 0.861, "step": 2015 }, { "epoch": 0.3178261243443116, "grad_norm": 1.0234375, "learning_rate": 6.840733418068845e-05, "loss": 0.9907, "step": 2016 }, { "epoch": 0.31798377619170465, "grad_norm": 0.9375, "learning_rate": 6.840296626042921e-05, "loss": 0.9483, "step": 2017 }, { "epoch": 0.3181414280390977, "grad_norm": 0.9609375, "learning_rate": 6.839859840713807e-05, "loss": 1.0886, "step": 2018 }, { "epoch": 0.31829907988649064, "grad_norm": 0.890625, "learning_rate": 6.839423062082429e-05, "loss": 0.9851, "step": 2019 }, { "epoch": 0.31845673173388367, "grad_norm": 1.03125, "learning_rate": 6.838986290149712e-05, "loss": 1.2503, "step": 2020 }, { "epoch": 0.3186143835812767, "grad_norm": 1.0078125, "learning_rate": 6.83854952491658e-05, "loss": 1.3227, "step": 2021 }, { "epoch": 0.3187720354286697, "grad_norm": 0.94921875, "learning_rate": 6.838112766383966e-05, "loss": 1.0298, "step": 2022 }, { "epoch": 0.3189296872760627, "grad_norm": 0.9140625, "learning_rate": 6.837676014552791e-05, "loss": 1.1044, "step": 2023 }, { "epoch": 0.3190873391234557, "grad_norm": 0.9921875, "learning_rate": 6.837239269423981e-05, "loss": 1.1684, "step": 2024 }, { "epoch": 0.31924499097084874, "grad_norm": 1.0625, "learning_rate": 6.836802530998461e-05, "loss": 1.1514, "step": 2025 }, { "epoch": 0.31940264281824177, "grad_norm": 0.87890625, "learning_rate": 6.836365799277154e-05, "loss": 0.9073, "step": 2026 }, { "epoch": 0.31956029466563474, "grad_norm": 0.98828125, "learning_rate": 6.835929074260993e-05, "loss": 1.2934, "step": 2027 }, { "epoch": 0.31971794651302776, "grad_norm": 1.03125, "learning_rate": 6.835492355950898e-05, "loss": 1.0152, "step": 2028 }, { "epoch": 0.3198755983604208, "grad_norm": 0.91015625, "learning_rate": 6.835055644347797e-05, "loss": 1.1495, "step": 2029 }, { "epoch": 0.3200332502078138, "grad_norm": 0.87890625, "learning_rate": 6.834618939452614e-05, "loss": 0.8969, "step": 2030 }, { "epoch": 0.3201909020552068, "grad_norm": 0.90625, "learning_rate": 6.834182241266275e-05, "loss": 1.1123, "step": 2031 }, { "epoch": 0.3203485539025998, "grad_norm": 0.94140625, "learning_rate": 6.833745549789707e-05, "loss": 0.9982, "step": 2032 }, { "epoch": 0.32050620574999283, "grad_norm": 0.99609375, "learning_rate": 6.833308865023834e-05, "loss": 1.314, "step": 2033 }, { "epoch": 0.32066385759738586, "grad_norm": 0.86328125, "learning_rate": 6.832872186969583e-05, "loss": 1.0377, "step": 2034 }, { "epoch": 0.3208215094447789, "grad_norm": 0.890625, "learning_rate": 6.832435515627877e-05, "loss": 1.0519, "step": 2035 }, { "epoch": 0.32097916129217186, "grad_norm": 0.921875, "learning_rate": 6.831998850999639e-05, "loss": 1.1053, "step": 2036 }, { "epoch": 0.3211368131395649, "grad_norm": 1.03125, "learning_rate": 6.831562193085802e-05, "loss": 1.1427, "step": 2037 }, { "epoch": 0.3212944649869579, "grad_norm": 0.98046875, "learning_rate": 6.831125541887289e-05, "loss": 1.1727, "step": 2038 }, { "epoch": 0.32145211683435093, "grad_norm": 1.015625, "learning_rate": 6.830688897405024e-05, "loss": 1.328, "step": 2039 }, { "epoch": 0.3216097686817439, "grad_norm": 0.86328125, "learning_rate": 6.830252259639931e-05, "loss": 1.0804, "step": 2040 }, { "epoch": 0.3217674205291369, "grad_norm": 0.9375, "learning_rate": 6.829815628592935e-05, "loss": 0.9526, "step": 2041 }, { "epoch": 0.32192507237652995, "grad_norm": 0.91015625, "learning_rate": 6.829379004264966e-05, "loss": 1.0284, "step": 2042 }, { "epoch": 0.322082724223923, "grad_norm": 1.046875, "learning_rate": 6.828942386656948e-05, "loss": 1.1929, "step": 2043 }, { "epoch": 0.32224037607131595, "grad_norm": 0.921875, "learning_rate": 6.828505775769807e-05, "loss": 1.2235, "step": 2044 }, { "epoch": 0.322398027918709, "grad_norm": 1.015625, "learning_rate": 6.828069171604465e-05, "loss": 1.0808, "step": 2045 }, { "epoch": 0.322555679766102, "grad_norm": 1.0390625, "learning_rate": 6.827632574161845e-05, "loss": 1.2171, "step": 2046 }, { "epoch": 0.322713331613495, "grad_norm": 0.90234375, "learning_rate": 6.827195983442881e-05, "loss": 1.0112, "step": 2047 }, { "epoch": 0.322870983460888, "grad_norm": 0.91796875, "learning_rate": 6.826759399448493e-05, "loss": 0.9924, "step": 2048 }, { "epoch": 0.323028635308281, "grad_norm": 0.89453125, "learning_rate": 6.826322822179608e-05, "loss": 1.1004, "step": 2049 }, { "epoch": 0.32318628715567405, "grad_norm": 0.93359375, "learning_rate": 6.82588625163715e-05, "loss": 1.2539, "step": 2050 }, { "epoch": 0.32334393900306707, "grad_norm": 0.984375, "learning_rate": 6.825449687822043e-05, "loss": 0.8724, "step": 2051 }, { "epoch": 0.32350159085046004, "grad_norm": 0.98828125, "learning_rate": 6.825013130735216e-05, "loss": 1.0367, "step": 2052 }, { "epoch": 0.32365924269785307, "grad_norm": 0.87890625, "learning_rate": 6.824576580377591e-05, "loss": 1.0989, "step": 2053 }, { "epoch": 0.3238168945452461, "grad_norm": 0.83203125, "learning_rate": 6.824140036750092e-05, "loss": 0.8809, "step": 2054 }, { "epoch": 0.3239745463926391, "grad_norm": 0.953125, "learning_rate": 6.82370349985365e-05, "loss": 0.9756, "step": 2055 }, { "epoch": 0.3241321982400321, "grad_norm": 0.9765625, "learning_rate": 6.823266969689186e-05, "loss": 0.9237, "step": 2056 }, { "epoch": 0.3242898500874251, "grad_norm": 1.0390625, "learning_rate": 6.822830446257627e-05, "loss": 0.9121, "step": 2057 }, { "epoch": 0.32444750193481814, "grad_norm": 1.3671875, "learning_rate": 6.822393929559897e-05, "loss": 0.9538, "step": 2058 }, { "epoch": 0.32460515378221116, "grad_norm": 0.98046875, "learning_rate": 6.821957419596922e-05, "loss": 1.1098, "step": 2059 }, { "epoch": 0.32476280562960413, "grad_norm": 0.9296875, "learning_rate": 6.821520916369626e-05, "loss": 0.9749, "step": 2060 }, { "epoch": 0.32492045747699716, "grad_norm": 0.9140625, "learning_rate": 6.821084419878931e-05, "loss": 1.1818, "step": 2061 }, { "epoch": 0.3250781093243902, "grad_norm": 0.8984375, "learning_rate": 6.82064793012577e-05, "loss": 0.9469, "step": 2062 }, { "epoch": 0.3252357611717832, "grad_norm": 0.95703125, "learning_rate": 6.820211447111062e-05, "loss": 1.0894, "step": 2063 }, { "epoch": 0.3253934130191762, "grad_norm": 0.98828125, "learning_rate": 6.819774970835738e-05, "loss": 1.1263, "step": 2064 }, { "epoch": 0.3255510648665692, "grad_norm": 1.6953125, "learning_rate": 6.819338501300716e-05, "loss": 1.1258, "step": 2065 }, { "epoch": 0.32570871671396223, "grad_norm": 0.9921875, "learning_rate": 6.818902038506921e-05, "loss": 1.016, "step": 2066 }, { "epoch": 0.32586636856135526, "grad_norm": 1.046875, "learning_rate": 6.818465582455285e-05, "loss": 1.0847, "step": 2067 }, { "epoch": 0.3260240204087482, "grad_norm": 0.94140625, "learning_rate": 6.81802913314673e-05, "loss": 1.001, "step": 2068 }, { "epoch": 0.32618167225614125, "grad_norm": 0.93359375, "learning_rate": 6.817592690582182e-05, "loss": 1.2238, "step": 2069 }, { "epoch": 0.3263393241035343, "grad_norm": 0.94140625, "learning_rate": 6.817156254762562e-05, "loss": 1.1811, "step": 2070 }, { "epoch": 0.3264969759509273, "grad_norm": 0.89453125, "learning_rate": 6.816719825688796e-05, "loss": 1.0741, "step": 2071 }, { "epoch": 0.3266546277983203, "grad_norm": 0.97265625, "learning_rate": 6.816283403361812e-05, "loss": 1.0666, "step": 2072 }, { "epoch": 0.3268122796457133, "grad_norm": 0.9765625, "learning_rate": 6.815846987782532e-05, "loss": 1.073, "step": 2073 }, { "epoch": 0.3269699314931063, "grad_norm": 0.9453125, "learning_rate": 6.815410578951884e-05, "loss": 1.0709, "step": 2074 }, { "epoch": 0.32712758334049935, "grad_norm": 0.8671875, "learning_rate": 6.814974176870791e-05, "loss": 0.9503, "step": 2075 }, { "epoch": 0.3272852351878923, "grad_norm": 1.125, "learning_rate": 6.81453778154018e-05, "loss": 1.2879, "step": 2076 }, { "epoch": 0.32744288703528535, "grad_norm": 0.86328125, "learning_rate": 6.81410139296097e-05, "loss": 1.1819, "step": 2077 }, { "epoch": 0.32760053888267837, "grad_norm": 0.9453125, "learning_rate": 6.813665011134093e-05, "loss": 1.0551, "step": 2078 }, { "epoch": 0.3277581907300714, "grad_norm": 0.80078125, "learning_rate": 6.813228636060471e-05, "loss": 0.9619, "step": 2079 }, { "epoch": 0.32791584257746437, "grad_norm": 1.0546875, "learning_rate": 6.812792267741029e-05, "loss": 1.0038, "step": 2080 }, { "epoch": 0.3280734944248574, "grad_norm": 0.8671875, "learning_rate": 6.81235590617669e-05, "loss": 0.916, "step": 2081 }, { "epoch": 0.3282311462722504, "grad_norm": 1.0234375, "learning_rate": 6.811919551368378e-05, "loss": 1.184, "step": 2082 }, { "epoch": 0.32838879811964344, "grad_norm": 0.90625, "learning_rate": 6.811483203317023e-05, "loss": 0.9072, "step": 2083 }, { "epoch": 0.3285464499670364, "grad_norm": 0.98828125, "learning_rate": 6.811046862023548e-05, "loss": 1.1323, "step": 2084 }, { "epoch": 0.32870410181442944, "grad_norm": 0.875, "learning_rate": 6.810610527488877e-05, "loss": 1.0646, "step": 2085 }, { "epoch": 0.32886175366182246, "grad_norm": 0.734375, "learning_rate": 6.810174199713934e-05, "loss": 0.8824, "step": 2086 }, { "epoch": 0.3290194055092155, "grad_norm": 0.9765625, "learning_rate": 6.80973787869964e-05, "loss": 1.1224, "step": 2087 }, { "epoch": 0.32917705735660846, "grad_norm": 1.0703125, "learning_rate": 6.809301564446931e-05, "loss": 1.2173, "step": 2088 }, { "epoch": 0.3293347092040015, "grad_norm": 0.90234375, "learning_rate": 6.808865256956722e-05, "loss": 1.1498, "step": 2089 }, { "epoch": 0.3294923610513945, "grad_norm": 0.95703125, "learning_rate": 6.808428956229942e-05, "loss": 1.0883, "step": 2090 }, { "epoch": 0.32965001289878754, "grad_norm": 1.0703125, "learning_rate": 6.807992662267514e-05, "loss": 1.2694, "step": 2091 }, { "epoch": 0.3298076647461805, "grad_norm": 0.9140625, "learning_rate": 6.807556375070359e-05, "loss": 1.1311, "step": 2092 }, { "epoch": 0.32996531659357353, "grad_norm": 1.0546875, "learning_rate": 6.807120094639409e-05, "loss": 1.2841, "step": 2093 }, { "epoch": 0.33012296844096656, "grad_norm": 0.8828125, "learning_rate": 6.806683820975587e-05, "loss": 1.0515, "step": 2094 }, { "epoch": 0.3302806202883596, "grad_norm": 0.984375, "learning_rate": 6.806247554079816e-05, "loss": 1.1433, "step": 2095 }, { "epoch": 0.33043827213575255, "grad_norm": 0.953125, "learning_rate": 6.805811293953021e-05, "loss": 1.0956, "step": 2096 }, { "epoch": 0.3305959239831456, "grad_norm": 0.97265625, "learning_rate": 6.805375040596123e-05, "loss": 1.0345, "step": 2097 }, { "epoch": 0.3307535758305386, "grad_norm": 1.1015625, "learning_rate": 6.804938794010053e-05, "loss": 1.0658, "step": 2098 }, { "epoch": 0.33091122767793163, "grad_norm": 0.89453125, "learning_rate": 6.804502554195733e-05, "loss": 0.926, "step": 2099 }, { "epoch": 0.3310688795253246, "grad_norm": 0.90234375, "learning_rate": 6.804066321154085e-05, "loss": 0.9631, "step": 2100 }, { "epoch": 0.3312265313727176, "grad_norm": 0.890625, "learning_rate": 6.803630094886039e-05, "loss": 1.0872, "step": 2101 }, { "epoch": 0.33138418322011065, "grad_norm": 0.9453125, "learning_rate": 6.803193875392511e-05, "loss": 1.2202, "step": 2102 }, { "epoch": 0.3315418350675037, "grad_norm": 0.9921875, "learning_rate": 6.802757662674434e-05, "loss": 1.2617, "step": 2103 }, { "epoch": 0.33169948691489665, "grad_norm": 0.96875, "learning_rate": 6.80232145673273e-05, "loss": 0.868, "step": 2104 }, { "epoch": 0.33185713876228967, "grad_norm": 0.890625, "learning_rate": 6.801885257568323e-05, "loss": 0.9644, "step": 2105 }, { "epoch": 0.3320147906096827, "grad_norm": 1.0859375, "learning_rate": 6.801449065182137e-05, "loss": 0.9373, "step": 2106 }, { "epoch": 0.3321724424570757, "grad_norm": 1.0, "learning_rate": 6.801012879575093e-05, "loss": 1.266, "step": 2107 }, { "epoch": 0.3323300943044687, "grad_norm": 0.9765625, "learning_rate": 6.800576700748122e-05, "loss": 1.1639, "step": 2108 }, { "epoch": 0.3324877461518617, "grad_norm": 0.921875, "learning_rate": 6.800140528702147e-05, "loss": 1.0931, "step": 2109 }, { "epoch": 0.33264539799925474, "grad_norm": 0.8515625, "learning_rate": 6.799704363438093e-05, "loss": 1.0479, "step": 2110 }, { "epoch": 0.33280304984664777, "grad_norm": 1.0, "learning_rate": 6.799268204956881e-05, "loss": 1.1818, "step": 2111 }, { "epoch": 0.33296070169404074, "grad_norm": 0.89453125, "learning_rate": 6.798832053259434e-05, "loss": 1.1938, "step": 2112 }, { "epoch": 0.33311835354143376, "grad_norm": 0.984375, "learning_rate": 6.798395908346682e-05, "loss": 1.1217, "step": 2113 }, { "epoch": 0.3332760053888268, "grad_norm": 0.97265625, "learning_rate": 6.797959770219548e-05, "loss": 1.0753, "step": 2114 }, { "epoch": 0.3334336572362198, "grad_norm": 0.92578125, "learning_rate": 6.797523638878955e-05, "loss": 1.256, "step": 2115 }, { "epoch": 0.3335913090836128, "grad_norm": 0.8984375, "learning_rate": 6.797087514325828e-05, "loss": 1.0689, "step": 2116 }, { "epoch": 0.3337489609310058, "grad_norm": 0.98046875, "learning_rate": 6.796651396561088e-05, "loss": 1.145, "step": 2117 }, { "epoch": 0.33390661277839884, "grad_norm": 1.0, "learning_rate": 6.796215285585666e-05, "loss": 1.0995, "step": 2118 }, { "epoch": 0.33406426462579186, "grad_norm": 0.91015625, "learning_rate": 6.79577918140048e-05, "loss": 0.9946, "step": 2119 }, { "epoch": 0.33422191647318483, "grad_norm": 1.03125, "learning_rate": 6.795343084006458e-05, "loss": 1.0797, "step": 2120 }, { "epoch": 0.33437956832057786, "grad_norm": 0.953125, "learning_rate": 6.794906993404522e-05, "loss": 1.0831, "step": 2121 }, { "epoch": 0.3345372201679709, "grad_norm": 1.078125, "learning_rate": 6.794470909595596e-05, "loss": 1.303, "step": 2122 }, { "epoch": 0.3346948720153639, "grad_norm": 0.94140625, "learning_rate": 6.79403483258061e-05, "loss": 1.174, "step": 2123 }, { "epoch": 0.3348525238627569, "grad_norm": 1.1171875, "learning_rate": 6.793598762360481e-05, "loss": 1.1146, "step": 2124 }, { "epoch": 0.3350101757101499, "grad_norm": 0.99609375, "learning_rate": 6.793162698936137e-05, "loss": 1.005, "step": 2125 }, { "epoch": 0.33516782755754293, "grad_norm": 0.97265625, "learning_rate": 6.792726642308503e-05, "loss": 1.1307, "step": 2126 }, { "epoch": 0.33532547940493596, "grad_norm": 0.890625, "learning_rate": 6.792290592478497e-05, "loss": 0.9617, "step": 2127 }, { "epoch": 0.3354831312523289, "grad_norm": 0.90625, "learning_rate": 6.79185454944705e-05, "loss": 0.9597, "step": 2128 }, { "epoch": 0.33564078309972195, "grad_norm": 0.87109375, "learning_rate": 6.791418513215086e-05, "loss": 0.9049, "step": 2129 }, { "epoch": 0.335798434947115, "grad_norm": 1.046875, "learning_rate": 6.790982483783526e-05, "loss": 1.1412, "step": 2130 }, { "epoch": 0.335956086794508, "grad_norm": 0.84375, "learning_rate": 6.790546461153296e-05, "loss": 1.0486, "step": 2131 }, { "epoch": 0.33611373864190097, "grad_norm": 1.0078125, "learning_rate": 6.790110445325313e-05, "loss": 0.9885, "step": 2132 }, { "epoch": 0.336271390489294, "grad_norm": 0.87109375, "learning_rate": 6.789674436300516e-05, "loss": 1.0136, "step": 2133 }, { "epoch": 0.336429042336687, "grad_norm": 0.90234375, "learning_rate": 6.789238434079817e-05, "loss": 0.8374, "step": 2134 }, { "epoch": 0.33658669418408005, "grad_norm": 0.93359375, "learning_rate": 6.788802438664144e-05, "loss": 0.9635, "step": 2135 }, { "epoch": 0.336744346031473, "grad_norm": 0.8203125, "learning_rate": 6.78836645005442e-05, "loss": 0.9721, "step": 2136 }, { "epoch": 0.33690199787886604, "grad_norm": 1.0078125, "learning_rate": 6.787930468251569e-05, "loss": 1.1027, "step": 2137 }, { "epoch": 0.33705964972625907, "grad_norm": 1.0078125, "learning_rate": 6.787494493256519e-05, "loss": 1.1275, "step": 2138 }, { "epoch": 0.3372173015736521, "grad_norm": 0.91796875, "learning_rate": 6.787058525070189e-05, "loss": 0.9446, "step": 2139 }, { "epoch": 0.33737495342104507, "grad_norm": 0.9140625, "learning_rate": 6.786622563693503e-05, "loss": 1.0358, "step": 2140 }, { "epoch": 0.3375326052684381, "grad_norm": 0.94921875, "learning_rate": 6.786186609127389e-05, "loss": 1.1396, "step": 2141 }, { "epoch": 0.3376902571158311, "grad_norm": 0.8828125, "learning_rate": 6.785750661372766e-05, "loss": 1.0144, "step": 2142 }, { "epoch": 0.33784790896322414, "grad_norm": 0.94921875, "learning_rate": 6.785314720430565e-05, "loss": 1.1476, "step": 2143 }, { "epoch": 0.3380055608106171, "grad_norm": 0.94140625, "learning_rate": 6.784878786301703e-05, "loss": 1.0698, "step": 2144 }, { "epoch": 0.33816321265801014, "grad_norm": 0.953125, "learning_rate": 6.784442858987108e-05, "loss": 1.1043, "step": 2145 }, { "epoch": 0.33832086450540316, "grad_norm": 0.9609375, "learning_rate": 6.784006938487702e-05, "loss": 1.008, "step": 2146 }, { "epoch": 0.3384785163527962, "grad_norm": 0.85546875, "learning_rate": 6.783571024804407e-05, "loss": 0.8262, "step": 2147 }, { "epoch": 0.33863616820018916, "grad_norm": 0.95703125, "learning_rate": 6.783135117938152e-05, "loss": 1.201, "step": 2148 }, { "epoch": 0.3387938200475822, "grad_norm": 1.015625, "learning_rate": 6.782699217889858e-05, "loss": 1.0254, "step": 2149 }, { "epoch": 0.3389514718949752, "grad_norm": 0.96484375, "learning_rate": 6.782263324660449e-05, "loss": 1.1882, "step": 2150 }, { "epoch": 0.33910912374236823, "grad_norm": 1.078125, "learning_rate": 6.78182743825085e-05, "loss": 1.0499, "step": 2151 }, { "epoch": 0.3392667755897612, "grad_norm": 1.0078125, "learning_rate": 6.781391558661981e-05, "loss": 1.0338, "step": 2152 }, { "epoch": 0.33942442743715423, "grad_norm": 0.91015625, "learning_rate": 6.78095568589477e-05, "loss": 1.0153, "step": 2153 }, { "epoch": 0.33958207928454726, "grad_norm": 1.0625, "learning_rate": 6.780519819950141e-05, "loss": 1.0766, "step": 2154 }, { "epoch": 0.3397397311319403, "grad_norm": 0.9765625, "learning_rate": 6.780083960829015e-05, "loss": 1.1782, "step": 2155 }, { "epoch": 0.33989738297933325, "grad_norm": 1.0390625, "learning_rate": 6.779648108532319e-05, "loss": 1.3827, "step": 2156 }, { "epoch": 0.3400550348267263, "grad_norm": 1.0546875, "learning_rate": 6.77921226306097e-05, "loss": 1.3931, "step": 2157 }, { "epoch": 0.3402126866741193, "grad_norm": 0.921875, "learning_rate": 6.778776424415899e-05, "loss": 1.1675, "step": 2158 }, { "epoch": 0.34037033852151233, "grad_norm": 0.93359375, "learning_rate": 6.778340592598029e-05, "loss": 1.1134, "step": 2159 }, { "epoch": 0.3405279903689053, "grad_norm": 0.828125, "learning_rate": 6.777904767608281e-05, "loss": 0.8249, "step": 2160 }, { "epoch": 0.3406856422162983, "grad_norm": 0.95703125, "learning_rate": 6.777468949447579e-05, "loss": 1.0454, "step": 2161 }, { "epoch": 0.34084329406369135, "grad_norm": 0.93359375, "learning_rate": 6.777033138116846e-05, "loss": 1.249, "step": 2162 }, { "epoch": 0.3410009459110844, "grad_norm": 0.8359375, "learning_rate": 6.77659733361701e-05, "loss": 0.9667, "step": 2163 }, { "epoch": 0.34115859775847734, "grad_norm": 0.96484375, "learning_rate": 6.77616153594899e-05, "loss": 1.1271, "step": 2164 }, { "epoch": 0.34131624960587037, "grad_norm": 1.0703125, "learning_rate": 6.775725745113713e-05, "loss": 1.1888, "step": 2165 }, { "epoch": 0.3414739014532634, "grad_norm": 0.9609375, "learning_rate": 6.775289961112101e-05, "loss": 0.9442, "step": 2166 }, { "epoch": 0.3416315533006564, "grad_norm": 0.984375, "learning_rate": 6.774854183945072e-05, "loss": 1.026, "step": 2167 }, { "epoch": 0.3417892051480494, "grad_norm": 1.046875, "learning_rate": 6.774418413613561e-05, "loss": 1.2214, "step": 2168 }, { "epoch": 0.3419468569954424, "grad_norm": 1.0390625, "learning_rate": 6.773982650118484e-05, "loss": 1.1054, "step": 2169 }, { "epoch": 0.34210450884283544, "grad_norm": 0.92578125, "learning_rate": 6.773546893460769e-05, "loss": 1.2413, "step": 2170 }, { "epoch": 0.34226216069022847, "grad_norm": 0.86328125, "learning_rate": 6.773111143641335e-05, "loss": 1.0844, "step": 2171 }, { "epoch": 0.34241981253762144, "grad_norm": 0.8984375, "learning_rate": 6.772675400661104e-05, "loss": 0.8381, "step": 2172 }, { "epoch": 0.34257746438501446, "grad_norm": 0.8359375, "learning_rate": 6.772239664521007e-05, "loss": 1.0863, "step": 2173 }, { "epoch": 0.3427351162324075, "grad_norm": 0.94921875, "learning_rate": 6.771803935221963e-05, "loss": 0.9231, "step": 2174 }, { "epoch": 0.3428927680798005, "grad_norm": 1.0, "learning_rate": 6.771368212764897e-05, "loss": 0.9976, "step": 2175 }, { "epoch": 0.3430504199271935, "grad_norm": 0.890625, "learning_rate": 6.77093249715073e-05, "loss": 1.0573, "step": 2176 }, { "epoch": 0.3432080717745865, "grad_norm": 0.90234375, "learning_rate": 6.770496788380385e-05, "loss": 1.1379, "step": 2177 }, { "epoch": 0.34336572362197954, "grad_norm": 0.91015625, "learning_rate": 6.77006108645479e-05, "loss": 1.0191, "step": 2178 }, { "epoch": 0.34352337546937256, "grad_norm": 0.92578125, "learning_rate": 6.769625391374867e-05, "loss": 0.9361, "step": 2179 }, { "epoch": 0.34368102731676553, "grad_norm": 0.8515625, "learning_rate": 6.769189703141536e-05, "loss": 1.1088, "step": 2180 }, { "epoch": 0.34383867916415856, "grad_norm": 1.1015625, "learning_rate": 6.768754021755726e-05, "loss": 1.3022, "step": 2181 }, { "epoch": 0.3439963310115516, "grad_norm": 0.953125, "learning_rate": 6.768318347218353e-05, "loss": 1.0419, "step": 2182 }, { "epoch": 0.3441539828589446, "grad_norm": 1.03125, "learning_rate": 6.767882679530347e-05, "loss": 0.9761, "step": 2183 }, { "epoch": 0.3443116347063376, "grad_norm": 0.99609375, "learning_rate": 6.76744701869263e-05, "loss": 1.0723, "step": 2184 }, { "epoch": 0.3444692865537306, "grad_norm": 1.03125, "learning_rate": 6.767011364706122e-05, "loss": 1.1259, "step": 2185 }, { "epoch": 0.34462693840112363, "grad_norm": 0.97265625, "learning_rate": 6.766575717571751e-05, "loss": 1.0985, "step": 2186 }, { "epoch": 0.34478459024851665, "grad_norm": 0.97265625, "learning_rate": 6.766140077290433e-05, "loss": 1.1436, "step": 2187 }, { "epoch": 0.3449422420959096, "grad_norm": 1.0078125, "learning_rate": 6.7657044438631e-05, "loss": 1.1508, "step": 2188 }, { "epoch": 0.34509989394330265, "grad_norm": 0.9765625, "learning_rate": 6.765268817290672e-05, "loss": 1.0807, "step": 2189 }, { "epoch": 0.3452575457906957, "grad_norm": 0.8203125, "learning_rate": 6.764833197574072e-05, "loss": 1.0054, "step": 2190 }, { "epoch": 0.3454151976380887, "grad_norm": 0.91796875, "learning_rate": 6.764397584714223e-05, "loss": 1.0618, "step": 2191 }, { "epoch": 0.3455728494854817, "grad_norm": 1.09375, "learning_rate": 6.763961978712044e-05, "loss": 1.1288, "step": 2192 }, { "epoch": 0.3457305013328747, "grad_norm": 0.9453125, "learning_rate": 6.763526379568466e-05, "loss": 1.0887, "step": 2193 }, { "epoch": 0.3458881531802677, "grad_norm": 1.0546875, "learning_rate": 6.76309078728441e-05, "loss": 1.3282, "step": 2194 }, { "epoch": 0.34604580502766075, "grad_norm": 0.95703125, "learning_rate": 6.762655201860798e-05, "loss": 1.1897, "step": 2195 }, { "epoch": 0.34620345687505377, "grad_norm": 0.96484375, "learning_rate": 6.762219623298553e-05, "loss": 1.3365, "step": 2196 }, { "epoch": 0.34636110872244674, "grad_norm": 0.91015625, "learning_rate": 6.761784051598594e-05, "loss": 1.0132, "step": 2197 }, { "epoch": 0.34651876056983977, "grad_norm": 0.82421875, "learning_rate": 6.761348486761854e-05, "loss": 1.0249, "step": 2198 }, { "epoch": 0.3466764124172328, "grad_norm": 0.9453125, "learning_rate": 6.76091292878925e-05, "loss": 1.1909, "step": 2199 }, { "epoch": 0.3468340642646258, "grad_norm": 0.97265625, "learning_rate": 6.760477377681705e-05, "loss": 1.0123, "step": 2200 }, { "epoch": 0.3469917161120188, "grad_norm": 0.84765625, "learning_rate": 6.760041833440144e-05, "loss": 1.1197, "step": 2201 }, { "epoch": 0.3471493679594118, "grad_norm": 0.8046875, "learning_rate": 6.759606296065486e-05, "loss": 0.9041, "step": 2202 }, { "epoch": 0.34730701980680484, "grad_norm": 0.96484375, "learning_rate": 6.75917076555866e-05, "loss": 1.0531, "step": 2203 }, { "epoch": 0.34746467165419787, "grad_norm": 0.83203125, "learning_rate": 6.758735241920588e-05, "loss": 0.7879, "step": 2204 }, { "epoch": 0.34762232350159084, "grad_norm": 0.94140625, "learning_rate": 6.758299725152189e-05, "loss": 1.0953, "step": 2205 }, { "epoch": 0.34777997534898386, "grad_norm": 0.98046875, "learning_rate": 6.75786421525439e-05, "loss": 1.0906, "step": 2206 }, { "epoch": 0.3479376271963769, "grad_norm": 0.9296875, "learning_rate": 6.757428712228107e-05, "loss": 0.9932, "step": 2207 }, { "epoch": 0.3480952790437699, "grad_norm": 0.890625, "learning_rate": 6.756993216074275e-05, "loss": 0.9871, "step": 2208 }, { "epoch": 0.3482529308911629, "grad_norm": 0.9296875, "learning_rate": 6.756557726793808e-05, "loss": 1.07, "step": 2209 }, { "epoch": 0.3484105827385559, "grad_norm": 1.03125, "learning_rate": 6.756122244387632e-05, "loss": 0.9949, "step": 2210 }, { "epoch": 0.34856823458594893, "grad_norm": 1.09375, "learning_rate": 6.75568676885667e-05, "loss": 1.1843, "step": 2211 }, { "epoch": 0.34872588643334196, "grad_norm": 1.0078125, "learning_rate": 6.75525130020184e-05, "loss": 1.0812, "step": 2212 }, { "epoch": 0.34888353828073493, "grad_norm": 0.94140625, "learning_rate": 6.754815838424074e-05, "loss": 1.1916, "step": 2213 }, { "epoch": 0.34904119012812795, "grad_norm": 1.015625, "learning_rate": 6.75438038352429e-05, "loss": 1.0848, "step": 2214 }, { "epoch": 0.349198841975521, "grad_norm": 0.98046875, "learning_rate": 6.753944935503412e-05, "loss": 1.2739, "step": 2215 }, { "epoch": 0.349356493822914, "grad_norm": 0.890625, "learning_rate": 6.75350949436236e-05, "loss": 1.1133, "step": 2216 }, { "epoch": 0.349514145670307, "grad_norm": 0.92578125, "learning_rate": 6.75307406010206e-05, "loss": 1.0498, "step": 2217 }, { "epoch": 0.3496717975177, "grad_norm": 0.98828125, "learning_rate": 6.752638632723431e-05, "loss": 0.9045, "step": 2218 }, { "epoch": 0.349829449365093, "grad_norm": 0.93359375, "learning_rate": 6.752203212227402e-05, "loss": 1.0925, "step": 2219 }, { "epoch": 0.34998710121248605, "grad_norm": 0.89453125, "learning_rate": 6.751767798614894e-05, "loss": 0.9421, "step": 2220 }, { "epoch": 0.350144753059879, "grad_norm": 0.9921875, "learning_rate": 6.751332391886827e-05, "loss": 1.1928, "step": 2221 }, { "epoch": 0.35030240490727205, "grad_norm": 0.96484375, "learning_rate": 6.750896992044126e-05, "loss": 1.1387, "step": 2222 }, { "epoch": 0.3504600567546651, "grad_norm": 1.0390625, "learning_rate": 6.75046159908771e-05, "loss": 1.0339, "step": 2223 }, { "epoch": 0.3506177086020581, "grad_norm": 0.8671875, "learning_rate": 6.750026213018509e-05, "loss": 0.9718, "step": 2224 }, { "epoch": 0.35077536044945107, "grad_norm": 0.96875, "learning_rate": 6.749590833837438e-05, "loss": 1.1879, "step": 2225 }, { "epoch": 0.3509330122968441, "grad_norm": 0.8359375, "learning_rate": 6.749155461545427e-05, "loss": 0.9013, "step": 2226 }, { "epoch": 0.3510906641442371, "grad_norm": 0.90625, "learning_rate": 6.748720096143392e-05, "loss": 1.1077, "step": 2227 }, { "epoch": 0.35124831599163014, "grad_norm": 0.91015625, "learning_rate": 6.74828473763226e-05, "loss": 0.9664, "step": 2228 }, { "epoch": 0.3514059678390231, "grad_norm": 1.0390625, "learning_rate": 6.747849386012954e-05, "loss": 1.1624, "step": 2229 }, { "epoch": 0.35156361968641614, "grad_norm": 0.98828125, "learning_rate": 6.747414041286395e-05, "loss": 1.1076, "step": 2230 }, { "epoch": 0.35172127153380917, "grad_norm": 1.03125, "learning_rate": 6.746978703453506e-05, "loss": 0.9155, "step": 2231 }, { "epoch": 0.3518789233812022, "grad_norm": 0.92578125, "learning_rate": 6.74654337251521e-05, "loss": 1.1402, "step": 2232 }, { "epoch": 0.35203657522859516, "grad_norm": 0.90625, "learning_rate": 6.746108048472424e-05, "loss": 1.0794, "step": 2233 }, { "epoch": 0.3521942270759882, "grad_norm": 0.9296875, "learning_rate": 6.745672731326082e-05, "loss": 0.8526, "step": 2234 }, { "epoch": 0.3523518789233812, "grad_norm": 0.99609375, "learning_rate": 6.7452374210771e-05, "loss": 1.03, "step": 2235 }, { "epoch": 0.35250953077077424, "grad_norm": 0.86328125, "learning_rate": 6.744802117726401e-05, "loss": 0.9623, "step": 2236 }, { "epoch": 0.3526671826181672, "grad_norm": 1.3828125, "learning_rate": 6.744366821274907e-05, "loss": 1.0031, "step": 2237 }, { "epoch": 0.35282483446556023, "grad_norm": 0.89453125, "learning_rate": 6.743931531723539e-05, "loss": 0.9254, "step": 2238 }, { "epoch": 0.35298248631295326, "grad_norm": 0.921875, "learning_rate": 6.743496249073225e-05, "loss": 1.2715, "step": 2239 }, { "epoch": 0.3531401381603463, "grad_norm": 0.93359375, "learning_rate": 6.743060973324885e-05, "loss": 1.109, "step": 2240 }, { "epoch": 0.35329779000773925, "grad_norm": 1.0078125, "learning_rate": 6.74262570447944e-05, "loss": 1.1213, "step": 2241 }, { "epoch": 0.3534554418551323, "grad_norm": 0.859375, "learning_rate": 6.742190442537814e-05, "loss": 0.9054, "step": 2242 }, { "epoch": 0.3536130937025253, "grad_norm": 0.9296875, "learning_rate": 6.741755187500928e-05, "loss": 1.0821, "step": 2243 }, { "epoch": 0.35377074554991833, "grad_norm": 0.97265625, "learning_rate": 6.741319939369706e-05, "loss": 1.0868, "step": 2244 }, { "epoch": 0.3539283973973113, "grad_norm": 0.99609375, "learning_rate": 6.740884698145072e-05, "loss": 0.8927, "step": 2245 }, { "epoch": 0.3540860492447043, "grad_norm": 1.0703125, "learning_rate": 6.740449463827947e-05, "loss": 1.2288, "step": 2246 }, { "epoch": 0.35424370109209735, "grad_norm": 0.85546875, "learning_rate": 6.740014236419252e-05, "loss": 1.0025, "step": 2247 }, { "epoch": 0.3544013529394904, "grad_norm": 0.8828125, "learning_rate": 6.739579015919909e-05, "loss": 0.9829, "step": 2248 }, { "epoch": 0.35455900478688335, "grad_norm": 0.8125, "learning_rate": 6.739143802330843e-05, "loss": 0.8915, "step": 2249 }, { "epoch": 0.3547166566342764, "grad_norm": 0.9765625, "learning_rate": 6.738708595652976e-05, "loss": 1.0, "step": 2250 }, { "epoch": 0.3548743084816694, "grad_norm": 0.88671875, "learning_rate": 6.738273395887229e-05, "loss": 1.0099, "step": 2251 }, { "epoch": 0.3550319603290624, "grad_norm": 0.828125, "learning_rate": 6.737838203034526e-05, "loss": 1.0821, "step": 2252 }, { "epoch": 0.3551896121764554, "grad_norm": 0.97265625, "learning_rate": 6.737403017095785e-05, "loss": 1.0546, "step": 2253 }, { "epoch": 0.3553472640238484, "grad_norm": 0.921875, "learning_rate": 6.736967838071935e-05, "loss": 1.2094, "step": 2254 }, { "epoch": 0.35550491587124144, "grad_norm": 1.1015625, "learning_rate": 6.736532665963894e-05, "loss": 0.9994, "step": 2255 }, { "epoch": 0.35566256771863447, "grad_norm": 1.0546875, "learning_rate": 6.736097500772587e-05, "loss": 1.0833, "step": 2256 }, { "epoch": 0.35582021956602744, "grad_norm": 1.03125, "learning_rate": 6.735662342498935e-05, "loss": 1.1034, "step": 2257 }, { "epoch": 0.35597787141342047, "grad_norm": 0.9453125, "learning_rate": 6.735227191143856e-05, "loss": 1.0425, "step": 2258 }, { "epoch": 0.3561355232608135, "grad_norm": 0.94921875, "learning_rate": 6.73479204670828e-05, "loss": 1.065, "step": 2259 }, { "epoch": 0.3562931751082065, "grad_norm": 1.0859375, "learning_rate": 6.734356909193125e-05, "loss": 1.4038, "step": 2260 }, { "epoch": 0.3564508269555995, "grad_norm": 0.99609375, "learning_rate": 6.733921778599315e-05, "loss": 1.0652, "step": 2261 }, { "epoch": 0.3566084788029925, "grad_norm": 0.96875, "learning_rate": 6.73348665492777e-05, "loss": 1.0583, "step": 2262 }, { "epoch": 0.35676613065038554, "grad_norm": 0.91015625, "learning_rate": 6.73305153817941e-05, "loss": 1.0196, "step": 2263 }, { "epoch": 0.35692378249777856, "grad_norm": 0.91796875, "learning_rate": 6.732616428355165e-05, "loss": 0.9423, "step": 2264 }, { "epoch": 0.35708143434517153, "grad_norm": 0.87890625, "learning_rate": 6.732181325455951e-05, "loss": 1.0701, "step": 2265 }, { "epoch": 0.35723908619256456, "grad_norm": 0.9375, "learning_rate": 6.731746229482694e-05, "loss": 0.9669, "step": 2266 }, { "epoch": 0.3573967380399576, "grad_norm": 0.86328125, "learning_rate": 6.731311140436312e-05, "loss": 1.0199, "step": 2267 }, { "epoch": 0.3575543898873506, "grad_norm": 0.83984375, "learning_rate": 6.730876058317729e-05, "loss": 0.9333, "step": 2268 }, { "epoch": 0.3577120417347436, "grad_norm": 0.90625, "learning_rate": 6.730440983127869e-05, "loss": 0.8572, "step": 2269 }, { "epoch": 0.3578696935821366, "grad_norm": 0.8828125, "learning_rate": 6.730005914867653e-05, "loss": 0.8859, "step": 2270 }, { "epoch": 0.35802734542952963, "grad_norm": 0.828125, "learning_rate": 6.729570853538001e-05, "loss": 1.0453, "step": 2271 }, { "epoch": 0.35818499727692266, "grad_norm": 1.015625, "learning_rate": 6.729135799139836e-05, "loss": 1.4755, "step": 2272 }, { "epoch": 0.3583426491243156, "grad_norm": 0.875, "learning_rate": 6.728700751674079e-05, "loss": 1.1053, "step": 2273 }, { "epoch": 0.35850030097170865, "grad_norm": 0.92578125, "learning_rate": 6.728265711141657e-05, "loss": 1.1808, "step": 2274 }, { "epoch": 0.3586579528191017, "grad_norm": 0.86328125, "learning_rate": 6.727830677543489e-05, "loss": 0.8523, "step": 2275 }, { "epoch": 0.3588156046664947, "grad_norm": 0.87109375, "learning_rate": 6.727395650880496e-05, "loss": 1.0509, "step": 2276 }, { "epoch": 0.3589732565138877, "grad_norm": 0.92578125, "learning_rate": 6.726960631153602e-05, "loss": 1.0758, "step": 2277 }, { "epoch": 0.3591309083612807, "grad_norm": 1.0390625, "learning_rate": 6.726525618363722e-05, "loss": 1.1373, "step": 2278 }, { "epoch": 0.3592885602086737, "grad_norm": 0.828125, "learning_rate": 6.72609061251179e-05, "loss": 0.8261, "step": 2279 }, { "epoch": 0.35944621205606675, "grad_norm": 0.9296875, "learning_rate": 6.72565561359872e-05, "loss": 1.0646, "step": 2280 }, { "epoch": 0.3596038639034597, "grad_norm": 0.92578125, "learning_rate": 6.725220621625437e-05, "loss": 1.0613, "step": 2281 }, { "epoch": 0.35976151575085275, "grad_norm": 0.9453125, "learning_rate": 6.724785636592861e-05, "loss": 1.0387, "step": 2282 }, { "epoch": 0.35991916759824577, "grad_norm": 0.92578125, "learning_rate": 6.724350658501912e-05, "loss": 0.9041, "step": 2283 }, { "epoch": 0.3600768194456388, "grad_norm": 0.93359375, "learning_rate": 6.723915687353517e-05, "loss": 1.1569, "step": 2284 }, { "epoch": 0.36023447129303177, "grad_norm": 0.99609375, "learning_rate": 6.723480723148596e-05, "loss": 1.1649, "step": 2285 }, { "epoch": 0.3603921231404248, "grad_norm": 1.1171875, "learning_rate": 6.723045765888071e-05, "loss": 1.0411, "step": 2286 }, { "epoch": 0.3605497749878178, "grad_norm": 0.9609375, "learning_rate": 6.722610815572863e-05, "loss": 0.9436, "step": 2287 }, { "epoch": 0.36070742683521084, "grad_norm": 0.9765625, "learning_rate": 6.72217587220389e-05, "loss": 1.1826, "step": 2288 }, { "epoch": 0.3608650786826038, "grad_norm": 0.74609375, "learning_rate": 6.721740935782081e-05, "loss": 0.7414, "step": 2289 }, { "epoch": 0.36102273052999684, "grad_norm": 1.0078125, "learning_rate": 6.721306006308357e-05, "loss": 1.1439, "step": 2290 }, { "epoch": 0.36118038237738986, "grad_norm": 0.96875, "learning_rate": 6.720871083783636e-05, "loss": 0.9869, "step": 2291 }, { "epoch": 0.3613380342247829, "grad_norm": 0.796875, "learning_rate": 6.720436168208839e-05, "loss": 1.1139, "step": 2292 }, { "epoch": 0.36149568607217586, "grad_norm": 0.91796875, "learning_rate": 6.720001259584889e-05, "loss": 1.1633, "step": 2293 }, { "epoch": 0.3616533379195689, "grad_norm": 0.93359375, "learning_rate": 6.719566357912712e-05, "loss": 1.1726, "step": 2294 }, { "epoch": 0.3618109897669619, "grad_norm": 0.8671875, "learning_rate": 6.719131463193227e-05, "loss": 0.9629, "step": 2295 }, { "epoch": 0.36196864161435494, "grad_norm": 1.015625, "learning_rate": 6.718696575427354e-05, "loss": 0.9552, "step": 2296 }, { "epoch": 0.3621262934617479, "grad_norm": 1.015625, "learning_rate": 6.718261694616018e-05, "loss": 1.2482, "step": 2297 }, { "epoch": 0.36228394530914093, "grad_norm": 0.94140625, "learning_rate": 6.717826820760134e-05, "loss": 1.0721, "step": 2298 }, { "epoch": 0.36244159715653396, "grad_norm": 0.9453125, "learning_rate": 6.717391953860633e-05, "loss": 0.9532, "step": 2299 }, { "epoch": 0.362599249003927, "grad_norm": 0.87890625, "learning_rate": 6.71695709391843e-05, "loss": 0.9664, "step": 2300 }, { "epoch": 0.36275690085131995, "grad_norm": 0.98828125, "learning_rate": 6.71652224093445e-05, "loss": 1.2529, "step": 2301 }, { "epoch": 0.362914552698713, "grad_norm": 1.0078125, "learning_rate": 6.716087394909613e-05, "loss": 1.0475, "step": 2302 }, { "epoch": 0.363072204546106, "grad_norm": 0.984375, "learning_rate": 6.715652555844838e-05, "loss": 1.207, "step": 2303 }, { "epoch": 0.36322985639349903, "grad_norm": 0.8515625, "learning_rate": 6.715217723741054e-05, "loss": 1.0334, "step": 2304 }, { "epoch": 0.363387508240892, "grad_norm": 0.95703125, "learning_rate": 6.714782898599176e-05, "loss": 1.1563, "step": 2305 }, { "epoch": 0.363545160088285, "grad_norm": 0.859375, "learning_rate": 6.714348080420131e-05, "loss": 1.0027, "step": 2306 }, { "epoch": 0.36370281193567805, "grad_norm": 0.91015625, "learning_rate": 6.713913269204836e-05, "loss": 1.192, "step": 2307 }, { "epoch": 0.3638604637830711, "grad_norm": 0.7734375, "learning_rate": 6.713478464954212e-05, "loss": 0.9898, "step": 2308 }, { "epoch": 0.36401811563046405, "grad_norm": 0.875, "learning_rate": 6.713043667669184e-05, "loss": 1.0811, "step": 2309 }, { "epoch": 0.36417576747785707, "grad_norm": 0.9765625, "learning_rate": 6.712608877350673e-05, "loss": 0.9769, "step": 2310 }, { "epoch": 0.3643334193252501, "grad_norm": 0.93359375, "learning_rate": 6.712174093999597e-05, "loss": 0.924, "step": 2311 }, { "epoch": 0.3644910711726431, "grad_norm": 0.74609375, "learning_rate": 6.711739317616882e-05, "loss": 0.9438, "step": 2312 }, { "epoch": 0.3646487230200361, "grad_norm": 1.046875, "learning_rate": 6.711304548203446e-05, "loss": 1.321, "step": 2313 }, { "epoch": 0.3648063748674291, "grad_norm": 0.92578125, "learning_rate": 6.710869785760214e-05, "loss": 1.1431, "step": 2314 }, { "epoch": 0.36496402671482214, "grad_norm": 0.875, "learning_rate": 6.710435030288105e-05, "loss": 1.0735, "step": 2315 }, { "epoch": 0.36512167856221517, "grad_norm": 0.8671875, "learning_rate": 6.710000281788041e-05, "loss": 0.8637, "step": 2316 }, { "epoch": 0.36527933040960814, "grad_norm": 1.0078125, "learning_rate": 6.709565540260944e-05, "loss": 1.105, "step": 2317 }, { "epoch": 0.36543698225700116, "grad_norm": 0.97265625, "learning_rate": 6.709130805707731e-05, "loss": 1.1873, "step": 2318 }, { "epoch": 0.3655946341043942, "grad_norm": 1.0, "learning_rate": 6.70869607812933e-05, "loss": 1.0989, "step": 2319 }, { "epoch": 0.3657522859517872, "grad_norm": 1.0078125, "learning_rate": 6.70826135752666e-05, "loss": 1.0525, "step": 2320 }, { "epoch": 0.3659099377991802, "grad_norm": 0.87890625, "learning_rate": 6.707826643900641e-05, "loss": 1.0107, "step": 2321 }, { "epoch": 0.3660675896465732, "grad_norm": 0.890625, "learning_rate": 6.707391937252196e-05, "loss": 1.1362, "step": 2322 }, { "epoch": 0.36622524149396624, "grad_norm": 0.94921875, "learning_rate": 6.706957237582243e-05, "loss": 1.0402, "step": 2323 }, { "epoch": 0.36638289334135926, "grad_norm": 0.9296875, "learning_rate": 6.706522544891709e-05, "loss": 0.9442, "step": 2324 }, { "epoch": 0.36654054518875223, "grad_norm": 0.9765625, "learning_rate": 6.706087859181512e-05, "loss": 1.2302, "step": 2325 }, { "epoch": 0.36669819703614526, "grad_norm": 0.953125, "learning_rate": 6.705653180452574e-05, "loss": 1.0468, "step": 2326 }, { "epoch": 0.3668558488835383, "grad_norm": 1.0859375, "learning_rate": 6.705218508705814e-05, "loss": 1.2013, "step": 2327 }, { "epoch": 0.3670135007309313, "grad_norm": 0.9609375, "learning_rate": 6.704783843942155e-05, "loss": 1.0828, "step": 2328 }, { "epoch": 0.3671711525783243, "grad_norm": 1.1171875, "learning_rate": 6.704349186162517e-05, "loss": 1.1, "step": 2329 }, { "epoch": 0.3673288044257173, "grad_norm": 0.90625, "learning_rate": 6.703914535367825e-05, "loss": 0.9595, "step": 2330 }, { "epoch": 0.36748645627311033, "grad_norm": 0.98046875, "learning_rate": 6.703479891558998e-05, "loss": 1.1449, "step": 2331 }, { "epoch": 0.36764410812050335, "grad_norm": 0.96875, "learning_rate": 6.703045254736957e-05, "loss": 1.056, "step": 2332 }, { "epoch": 0.3678017599678963, "grad_norm": 1.1015625, "learning_rate": 6.702610624902621e-05, "loss": 1.1571, "step": 2333 }, { "epoch": 0.36795941181528935, "grad_norm": 0.94921875, "learning_rate": 6.702176002056915e-05, "loss": 1.0523, "step": 2334 }, { "epoch": 0.3681170636626824, "grad_norm": 0.83984375, "learning_rate": 6.70174138620076e-05, "loss": 0.9799, "step": 2335 }, { "epoch": 0.3682747155100754, "grad_norm": 1.0234375, "learning_rate": 6.701306777335072e-05, "loss": 1.0972, "step": 2336 }, { "epoch": 0.36843236735746837, "grad_norm": 0.8984375, "learning_rate": 6.70087217546078e-05, "loss": 1.2335, "step": 2337 }, { "epoch": 0.3685900192048614, "grad_norm": 0.98046875, "learning_rate": 6.700437580578793e-05, "loss": 1.1157, "step": 2338 }, { "epoch": 0.3687476710522544, "grad_norm": 0.9296875, "learning_rate": 6.700002992690046e-05, "loss": 1.0694, "step": 2339 }, { "epoch": 0.36890532289964745, "grad_norm": 0.8984375, "learning_rate": 6.699568411795455e-05, "loss": 0.9397, "step": 2340 }, { "epoch": 0.3690629747470404, "grad_norm": 1.03125, "learning_rate": 6.699133837895938e-05, "loss": 1.1, "step": 2341 }, { "epoch": 0.36922062659443344, "grad_norm": 0.91015625, "learning_rate": 6.69869927099242e-05, "loss": 1.0253, "step": 2342 }, { "epoch": 0.36937827844182647, "grad_norm": 0.82421875, "learning_rate": 6.698264711085816e-05, "loss": 0.8466, "step": 2343 }, { "epoch": 0.3695359302892195, "grad_norm": 0.99609375, "learning_rate": 6.697830158177054e-05, "loss": 1.078, "step": 2344 }, { "epoch": 0.36969358213661246, "grad_norm": 0.8984375, "learning_rate": 6.697395612267052e-05, "loss": 0.9842, "step": 2345 }, { "epoch": 0.3698512339840055, "grad_norm": 0.91796875, "learning_rate": 6.696961073356733e-05, "loss": 1.0337, "step": 2346 }, { "epoch": 0.3700088858313985, "grad_norm": 0.90625, "learning_rate": 6.696526541447015e-05, "loss": 1.1329, "step": 2347 }, { "epoch": 0.37016653767879154, "grad_norm": 0.99609375, "learning_rate": 6.696092016538818e-05, "loss": 0.9966, "step": 2348 }, { "epoch": 0.37032418952618457, "grad_norm": 0.78125, "learning_rate": 6.695657498633067e-05, "loss": 0.978, "step": 2349 }, { "epoch": 0.37048184137357754, "grad_norm": 0.97265625, "learning_rate": 6.695222987730681e-05, "loss": 1.1548, "step": 2350 }, { "epoch": 0.37063949322097056, "grad_norm": 1.0078125, "learning_rate": 6.694788483832581e-05, "loss": 1.0842, "step": 2351 }, { "epoch": 0.3707971450683636, "grad_norm": 1.0546875, "learning_rate": 6.69435398693969e-05, "loss": 1.223, "step": 2352 }, { "epoch": 0.3709547969157566, "grad_norm": 0.85546875, "learning_rate": 6.693919497052923e-05, "loss": 1.249, "step": 2353 }, { "epoch": 0.3711124487631496, "grad_norm": 0.8984375, "learning_rate": 6.693485014173207e-05, "loss": 1.15, "step": 2354 }, { "epoch": 0.3712701006105426, "grad_norm": 0.87890625, "learning_rate": 6.693050538301461e-05, "loss": 0.9304, "step": 2355 }, { "epoch": 0.37142775245793563, "grad_norm": 0.87109375, "learning_rate": 6.692616069438604e-05, "loss": 1.0236, "step": 2356 }, { "epoch": 0.37158540430532866, "grad_norm": 1.015625, "learning_rate": 6.692181607585559e-05, "loss": 1.1574, "step": 2357 }, { "epoch": 0.37174305615272163, "grad_norm": 0.8828125, "learning_rate": 6.691747152743247e-05, "loss": 1.0662, "step": 2358 }, { "epoch": 0.37190070800011465, "grad_norm": 0.97265625, "learning_rate": 6.691312704912582e-05, "loss": 1.0855, "step": 2359 }, { "epoch": 0.3720583598475077, "grad_norm": 0.94140625, "learning_rate": 6.690878264094496e-05, "loss": 0.8673, "step": 2360 }, { "epoch": 0.3722160116949007, "grad_norm": 0.92578125, "learning_rate": 6.690443830289904e-05, "loss": 1.0486, "step": 2361 }, { "epoch": 0.3723736635422937, "grad_norm": 0.91015625, "learning_rate": 6.690009403499728e-05, "loss": 1.0667, "step": 2362 }, { "epoch": 0.3725313153896867, "grad_norm": 0.89453125, "learning_rate": 6.689574983724887e-05, "loss": 0.9216, "step": 2363 }, { "epoch": 0.3726889672370797, "grad_norm": 0.90625, "learning_rate": 6.689140570966298e-05, "loss": 1.0748, "step": 2364 }, { "epoch": 0.37284661908447275, "grad_norm": 0.9375, "learning_rate": 6.688706165224891e-05, "loss": 1.2729, "step": 2365 }, { "epoch": 0.3730042709318657, "grad_norm": 0.82421875, "learning_rate": 6.688271766501581e-05, "loss": 1.0533, "step": 2366 }, { "epoch": 0.37316192277925875, "grad_norm": 0.8671875, "learning_rate": 6.687837374797291e-05, "loss": 1.0134, "step": 2367 }, { "epoch": 0.3733195746266518, "grad_norm": 0.9375, "learning_rate": 6.68740299011294e-05, "loss": 1.068, "step": 2368 }, { "epoch": 0.3734772264740448, "grad_norm": 0.98828125, "learning_rate": 6.686968612449445e-05, "loss": 1.0722, "step": 2369 }, { "epoch": 0.37363487832143777, "grad_norm": 0.94140625, "learning_rate": 6.686534241807734e-05, "loss": 1.0215, "step": 2370 }, { "epoch": 0.3737925301688308, "grad_norm": 0.90625, "learning_rate": 6.686099878188725e-05, "loss": 1.0173, "step": 2371 }, { "epoch": 0.3739501820162238, "grad_norm": 0.81640625, "learning_rate": 6.685665521593339e-05, "loss": 0.7034, "step": 2372 }, { "epoch": 0.37410783386361685, "grad_norm": 0.8828125, "learning_rate": 6.685231172022494e-05, "loss": 1.063, "step": 2373 }, { "epoch": 0.3742654857110098, "grad_norm": 0.89453125, "learning_rate": 6.68479682947711e-05, "loss": 0.9328, "step": 2374 }, { "epoch": 0.37442313755840284, "grad_norm": 0.8828125, "learning_rate": 6.684362493958112e-05, "loss": 0.9069, "step": 2375 }, { "epoch": 0.37458078940579587, "grad_norm": 0.9375, "learning_rate": 6.683928165466418e-05, "loss": 1.0216, "step": 2376 }, { "epoch": 0.3747384412531889, "grad_norm": 1.109375, "learning_rate": 6.683493844002949e-05, "loss": 1.0683, "step": 2377 }, { "epoch": 0.37489609310058186, "grad_norm": 0.921875, "learning_rate": 6.683059529568622e-05, "loss": 0.9968, "step": 2378 }, { "epoch": 0.3750537449479749, "grad_norm": 0.92578125, "learning_rate": 6.682625222164362e-05, "loss": 1.1002, "step": 2379 }, { "epoch": 0.3752113967953679, "grad_norm": 0.91796875, "learning_rate": 6.682190921791091e-05, "loss": 1.2517, "step": 2380 }, { "epoch": 0.37536904864276094, "grad_norm": 0.9140625, "learning_rate": 6.681756628449727e-05, "loss": 1.1317, "step": 2381 }, { "epoch": 0.3755267004901539, "grad_norm": 0.875, "learning_rate": 6.681322342141188e-05, "loss": 0.9104, "step": 2382 }, { "epoch": 0.37568435233754693, "grad_norm": 0.9140625, "learning_rate": 6.680888062866397e-05, "loss": 1.2512, "step": 2383 }, { "epoch": 0.37584200418493996, "grad_norm": 0.86328125, "learning_rate": 6.680453790626271e-05, "loss": 1.1783, "step": 2384 }, { "epoch": 0.375999656032333, "grad_norm": 1.0234375, "learning_rate": 6.680019525421737e-05, "loss": 1.2468, "step": 2385 }, { "epoch": 0.37615730787972596, "grad_norm": 1.140625, "learning_rate": 6.679585267253712e-05, "loss": 1.0994, "step": 2386 }, { "epoch": 0.376314959727119, "grad_norm": 0.921875, "learning_rate": 6.679151016123116e-05, "loss": 1.1314, "step": 2387 }, { "epoch": 0.376472611574512, "grad_norm": 1.0078125, "learning_rate": 6.67871677203087e-05, "loss": 1.1308, "step": 2388 }, { "epoch": 0.37663026342190503, "grad_norm": 0.8984375, "learning_rate": 6.67828253497789e-05, "loss": 1.0111, "step": 2389 }, { "epoch": 0.376787915269298, "grad_norm": 0.9453125, "learning_rate": 6.677848304965104e-05, "loss": 0.974, "step": 2390 }, { "epoch": 0.376945567116691, "grad_norm": 0.83984375, "learning_rate": 6.67741408199343e-05, "loss": 0.9218, "step": 2391 }, { "epoch": 0.37710321896408405, "grad_norm": 0.8359375, "learning_rate": 6.676979866063784e-05, "loss": 0.9949, "step": 2392 }, { "epoch": 0.3772608708114771, "grad_norm": 0.93359375, "learning_rate": 6.676545657177091e-05, "loss": 1.2279, "step": 2393 }, { "epoch": 0.37741852265887005, "grad_norm": 0.9375, "learning_rate": 6.676111455334269e-05, "loss": 1.0802, "step": 2394 }, { "epoch": 0.3775761745062631, "grad_norm": 1.015625, "learning_rate": 6.675677260536237e-05, "loss": 1.1283, "step": 2395 }, { "epoch": 0.3777338263536561, "grad_norm": 1.046875, "learning_rate": 6.675243072783919e-05, "loss": 1.1182, "step": 2396 }, { "epoch": 0.3778914782010491, "grad_norm": 0.94140625, "learning_rate": 6.674808892078232e-05, "loss": 1.0163, "step": 2397 }, { "epoch": 0.3780491300484421, "grad_norm": 0.859375, "learning_rate": 6.6743747184201e-05, "loss": 0.9343, "step": 2398 }, { "epoch": 0.3782067818958351, "grad_norm": 0.96484375, "learning_rate": 6.673940551810438e-05, "loss": 0.9897, "step": 2399 }, { "epoch": 0.37836443374322815, "grad_norm": 1.0546875, "learning_rate": 6.673506392250171e-05, "loss": 1.0946, "step": 2400 }, { "epoch": 0.37852208559062117, "grad_norm": 0.9375, "learning_rate": 6.673072239740216e-05, "loss": 1.0103, "step": 2401 }, { "epoch": 0.37867973743801414, "grad_norm": 0.890625, "learning_rate": 6.672638094281494e-05, "loss": 1.0068, "step": 2402 }, { "epoch": 0.37883738928540717, "grad_norm": 1.03125, "learning_rate": 6.672203955874928e-05, "loss": 1.213, "step": 2403 }, { "epoch": 0.3789950411328002, "grad_norm": 1.0546875, "learning_rate": 6.671769824521427e-05, "loss": 1.0481, "step": 2404 }, { "epoch": 0.3791526929801932, "grad_norm": 0.8671875, "learning_rate": 6.671335700221927e-05, "loss": 0.9311, "step": 2405 }, { "epoch": 0.3793103448275862, "grad_norm": 1.0234375, "learning_rate": 6.670901582977342e-05, "loss": 1.2619, "step": 2406 }, { "epoch": 0.3794679966749792, "grad_norm": 0.9765625, "learning_rate": 6.670467472788589e-05, "loss": 1.101, "step": 2407 }, { "epoch": 0.37962564852237224, "grad_norm": 0.8515625, "learning_rate": 6.670033369656589e-05, "loss": 1.0371, "step": 2408 }, { "epoch": 0.37978330036976526, "grad_norm": 0.95703125, "learning_rate": 6.66959927358226e-05, "loss": 1.1082, "step": 2409 }, { "epoch": 0.37994095221715823, "grad_norm": 0.90234375, "learning_rate": 6.669165184566528e-05, "loss": 1.0961, "step": 2410 }, { "epoch": 0.38009860406455126, "grad_norm": 0.99609375, "learning_rate": 6.66873110261031e-05, "loss": 1.0553, "step": 2411 }, { "epoch": 0.3802562559119443, "grad_norm": 0.953125, "learning_rate": 6.668297027714527e-05, "loss": 0.8685, "step": 2412 }, { "epoch": 0.3804139077593373, "grad_norm": 0.984375, "learning_rate": 6.667862959880098e-05, "loss": 0.9549, "step": 2413 }, { "epoch": 0.3805715596067303, "grad_norm": 1.0078125, "learning_rate": 6.667428899107942e-05, "loss": 1.2299, "step": 2414 }, { "epoch": 0.3807292114541233, "grad_norm": 0.91015625, "learning_rate": 6.666994845398978e-05, "loss": 0.9016, "step": 2415 }, { "epoch": 0.38088686330151633, "grad_norm": 0.9453125, "learning_rate": 6.666560798754131e-05, "loss": 1.2052, "step": 2416 }, { "epoch": 0.38104451514890936, "grad_norm": 0.96484375, "learning_rate": 6.666126759174319e-05, "loss": 1.1808, "step": 2417 }, { "epoch": 0.3812021669963023, "grad_norm": 0.96875, "learning_rate": 6.665692726660457e-05, "loss": 1.1925, "step": 2418 }, { "epoch": 0.38135981884369535, "grad_norm": 0.94921875, "learning_rate": 6.66525870121347e-05, "loss": 1.05, "step": 2419 }, { "epoch": 0.3815174706910884, "grad_norm": 0.96484375, "learning_rate": 6.664824682834277e-05, "loss": 1.0083, "step": 2420 }, { "epoch": 0.3816751225384814, "grad_norm": 0.9296875, "learning_rate": 6.664390671523799e-05, "loss": 1.1218, "step": 2421 }, { "epoch": 0.3818327743858744, "grad_norm": 0.89453125, "learning_rate": 6.663956667282953e-05, "loss": 1.1959, "step": 2422 }, { "epoch": 0.3819904262332674, "grad_norm": 1.234375, "learning_rate": 6.663522670112661e-05, "loss": 1.0978, "step": 2423 }, { "epoch": 0.3821480780806604, "grad_norm": 0.859375, "learning_rate": 6.663088680013837e-05, "loss": 1.0639, "step": 2424 }, { "epoch": 0.38230572992805345, "grad_norm": 0.921875, "learning_rate": 6.662654696987409e-05, "loss": 0.8288, "step": 2425 }, { "epoch": 0.3824633817754464, "grad_norm": 0.9296875, "learning_rate": 6.662220721034296e-05, "loss": 0.864, "step": 2426 }, { "epoch": 0.38262103362283945, "grad_norm": 0.86328125, "learning_rate": 6.661786752155415e-05, "loss": 0.9253, "step": 2427 }, { "epoch": 0.38277868547023247, "grad_norm": 1.0078125, "learning_rate": 6.661352790351685e-05, "loss": 1.0914, "step": 2428 }, { "epoch": 0.3829363373176255, "grad_norm": 1.03125, "learning_rate": 6.660918835624023e-05, "loss": 1.1623, "step": 2429 }, { "epoch": 0.38309398916501847, "grad_norm": 0.953125, "learning_rate": 6.660484887973357e-05, "loss": 1.0313, "step": 2430 }, { "epoch": 0.3832516410124115, "grad_norm": 0.9296875, "learning_rate": 6.660050947400603e-05, "loss": 1.1663, "step": 2431 }, { "epoch": 0.3834092928598045, "grad_norm": 0.9140625, "learning_rate": 6.65961701390668e-05, "loss": 0.9767, "step": 2432 }, { "epoch": 0.38356694470719754, "grad_norm": 0.89453125, "learning_rate": 6.659183087492507e-05, "loss": 1.1642, "step": 2433 }, { "epoch": 0.3837245965545905, "grad_norm": 0.94140625, "learning_rate": 6.658749168159e-05, "loss": 1.0965, "step": 2434 }, { "epoch": 0.38388224840198354, "grad_norm": 0.90625, "learning_rate": 6.65831525590709e-05, "loss": 1.0116, "step": 2435 }, { "epoch": 0.38403990024937656, "grad_norm": 0.91796875, "learning_rate": 6.657881350737687e-05, "loss": 1.2105, "step": 2436 }, { "epoch": 0.3841975520967696, "grad_norm": 0.90625, "learning_rate": 6.657447452651715e-05, "loss": 1.0967, "step": 2437 }, { "epoch": 0.38435520394416256, "grad_norm": 0.9375, "learning_rate": 6.657013561650092e-05, "loss": 1.1027, "step": 2438 }, { "epoch": 0.3845128557915556, "grad_norm": 0.94140625, "learning_rate": 6.656579677733734e-05, "loss": 1.0223, "step": 2439 }, { "epoch": 0.3846705076389486, "grad_norm": 0.98828125, "learning_rate": 6.656145800903569e-05, "loss": 1.0253, "step": 2440 }, { "epoch": 0.38482815948634164, "grad_norm": 0.99609375, "learning_rate": 6.655711931160509e-05, "loss": 1.1558, "step": 2441 }, { "epoch": 0.3849858113337346, "grad_norm": 0.99609375, "learning_rate": 6.655278068505478e-05, "loss": 1.2671, "step": 2442 }, { "epoch": 0.38514346318112763, "grad_norm": 0.875, "learning_rate": 6.654844212939393e-05, "loss": 1.1614, "step": 2443 }, { "epoch": 0.38530111502852066, "grad_norm": 0.98828125, "learning_rate": 6.65441036446317e-05, "loss": 1.0869, "step": 2444 }, { "epoch": 0.3854587668759137, "grad_norm": 0.859375, "learning_rate": 6.653976523077739e-05, "loss": 0.9522, "step": 2445 }, { "epoch": 0.38561641872330665, "grad_norm": 0.953125, "learning_rate": 6.653542688784014e-05, "loss": 1.0814, "step": 2446 }, { "epoch": 0.3857740705706997, "grad_norm": 0.91796875, "learning_rate": 6.653108861582912e-05, "loss": 1.165, "step": 2447 }, { "epoch": 0.3859317224180927, "grad_norm": 0.97265625, "learning_rate": 6.652675041475355e-05, "loss": 1.0485, "step": 2448 }, { "epoch": 0.38608937426548573, "grad_norm": 0.88671875, "learning_rate": 6.652241228462258e-05, "loss": 1.0481, "step": 2449 }, { "epoch": 0.3862470261128787, "grad_norm": 0.89453125, "learning_rate": 6.651807422544548e-05, "loss": 0.841, "step": 2450 }, { "epoch": 0.3864046779602717, "grad_norm": 0.8203125, "learning_rate": 6.651373623723142e-05, "loss": 0.7862, "step": 2451 }, { "epoch": 0.38656232980766475, "grad_norm": 0.99609375, "learning_rate": 6.650939831998956e-05, "loss": 1.0859, "step": 2452 }, { "epoch": 0.3867199816550578, "grad_norm": 0.87890625, "learning_rate": 6.650506047372915e-05, "loss": 0.8709, "step": 2453 }, { "epoch": 0.38687763350245075, "grad_norm": 0.91015625, "learning_rate": 6.650072269845928e-05, "loss": 0.8493, "step": 2454 }, { "epoch": 0.38703528534984377, "grad_norm": 0.9140625, "learning_rate": 6.649638499418926e-05, "loss": 0.9746, "step": 2455 }, { "epoch": 0.3871929371972368, "grad_norm": 0.875, "learning_rate": 6.649204736092825e-05, "loss": 0.9267, "step": 2456 }, { "epoch": 0.3873505890446298, "grad_norm": 0.890625, "learning_rate": 6.648770979868542e-05, "loss": 0.9865, "step": 2457 }, { "epoch": 0.3875082408920228, "grad_norm": 0.921875, "learning_rate": 6.648337230746998e-05, "loss": 0.8834, "step": 2458 }, { "epoch": 0.3876658927394158, "grad_norm": 0.8515625, "learning_rate": 6.647903488729108e-05, "loss": 1.0119, "step": 2459 }, { "epoch": 0.38782354458680884, "grad_norm": 0.94140625, "learning_rate": 6.647469753815799e-05, "loss": 0.8676, "step": 2460 }, { "epoch": 0.38798119643420187, "grad_norm": 0.8828125, "learning_rate": 6.647036026007984e-05, "loss": 1.0225, "step": 2461 }, { "epoch": 0.38813884828159484, "grad_norm": 0.90625, "learning_rate": 6.646602305306587e-05, "loss": 0.9215, "step": 2462 }, { "epoch": 0.38829650012898786, "grad_norm": 0.83984375, "learning_rate": 6.646168591712521e-05, "loss": 0.8573, "step": 2463 }, { "epoch": 0.3884541519763809, "grad_norm": 0.9609375, "learning_rate": 6.645734885226709e-05, "loss": 1.016, "step": 2464 }, { "epoch": 0.3886118038237739, "grad_norm": 0.96875, "learning_rate": 6.645301185850072e-05, "loss": 1.0788, "step": 2465 }, { "epoch": 0.3887694556711669, "grad_norm": 0.875, "learning_rate": 6.644867493583529e-05, "loss": 1.1469, "step": 2466 }, { "epoch": 0.3889271075185599, "grad_norm": 0.92578125, "learning_rate": 6.644433808427996e-05, "loss": 0.9306, "step": 2467 }, { "epoch": 0.38908475936595294, "grad_norm": 0.859375, "learning_rate": 6.644000130384396e-05, "loss": 0.8761, "step": 2468 }, { "epoch": 0.38924241121334596, "grad_norm": 0.875, "learning_rate": 6.64356645945364e-05, "loss": 1.0406, "step": 2469 }, { "epoch": 0.38940006306073893, "grad_norm": 0.921875, "learning_rate": 6.643132795636657e-05, "loss": 1.0771, "step": 2470 }, { "epoch": 0.38955771490813196, "grad_norm": 0.9296875, "learning_rate": 6.642699138934363e-05, "loss": 1.0681, "step": 2471 }, { "epoch": 0.389715366755525, "grad_norm": 0.98046875, "learning_rate": 6.642265489347675e-05, "loss": 1.099, "step": 2472 }, { "epoch": 0.389873018602918, "grad_norm": 0.9921875, "learning_rate": 6.641831846877514e-05, "loss": 0.9692, "step": 2473 }, { "epoch": 0.390030670450311, "grad_norm": 0.9765625, "learning_rate": 6.641398211524795e-05, "loss": 1.0655, "step": 2474 }, { "epoch": 0.390188322297704, "grad_norm": 0.96875, "learning_rate": 6.640964583290444e-05, "loss": 1.0029, "step": 2475 }, { "epoch": 0.39034597414509703, "grad_norm": 1.0, "learning_rate": 6.640530962175377e-05, "loss": 0.9785, "step": 2476 }, { "epoch": 0.39050362599249006, "grad_norm": 0.89453125, "learning_rate": 6.640097348180512e-05, "loss": 1.0944, "step": 2477 }, { "epoch": 0.390661277839883, "grad_norm": 0.9453125, "learning_rate": 6.63966374130677e-05, "loss": 1.0633, "step": 2478 }, { "epoch": 0.39081892968727605, "grad_norm": 1.0078125, "learning_rate": 6.639230141555065e-05, "loss": 1.241, "step": 2479 }, { "epoch": 0.3909765815346691, "grad_norm": 0.9296875, "learning_rate": 6.638796548926323e-05, "loss": 0.8372, "step": 2480 }, { "epoch": 0.3911342333820621, "grad_norm": 0.99609375, "learning_rate": 6.638362963421459e-05, "loss": 1.2442, "step": 2481 }, { "epoch": 0.39129188522945507, "grad_norm": 0.94140625, "learning_rate": 6.637929385041389e-05, "loss": 0.8778, "step": 2482 }, { "epoch": 0.3914495370768481, "grad_norm": 0.9140625, "learning_rate": 6.63749581378704e-05, "loss": 1.0154, "step": 2483 }, { "epoch": 0.3916071889242411, "grad_norm": 0.97265625, "learning_rate": 6.637062249659323e-05, "loss": 1.1208, "step": 2484 }, { "epoch": 0.39176484077163415, "grad_norm": 0.9453125, "learning_rate": 6.636628692659162e-05, "loss": 1.0515, "step": 2485 }, { "epoch": 0.3919224926190271, "grad_norm": 0.83984375, "learning_rate": 6.636195142787475e-05, "loss": 0.8469, "step": 2486 }, { "epoch": 0.39208014446642014, "grad_norm": 0.984375, "learning_rate": 6.635761600045181e-05, "loss": 1.192, "step": 2487 }, { "epoch": 0.39223779631381317, "grad_norm": 0.93359375, "learning_rate": 6.635328064433197e-05, "loss": 0.9389, "step": 2488 }, { "epoch": 0.3923954481612062, "grad_norm": 0.94140625, "learning_rate": 6.634894535952438e-05, "loss": 1.1853, "step": 2489 }, { "epoch": 0.39255310000859917, "grad_norm": 0.921875, "learning_rate": 6.634461014603832e-05, "loss": 0.9414, "step": 2490 }, { "epoch": 0.3927107518559922, "grad_norm": 0.93359375, "learning_rate": 6.634027500388295e-05, "loss": 1.0023, "step": 2491 }, { "epoch": 0.3928684037033852, "grad_norm": 1.0234375, "learning_rate": 6.633593993306744e-05, "loss": 1.1227, "step": 2492 }, { "epoch": 0.39302605555077824, "grad_norm": 1.0, "learning_rate": 6.633160493360097e-05, "loss": 1.0954, "step": 2493 }, { "epoch": 0.3931837073981712, "grad_norm": 0.89453125, "learning_rate": 6.63272700054927e-05, "loss": 1.0853, "step": 2494 }, { "epoch": 0.39334135924556424, "grad_norm": 0.97265625, "learning_rate": 6.632293514875191e-05, "loss": 1.0914, "step": 2495 }, { "epoch": 0.39349901109295726, "grad_norm": 0.87109375, "learning_rate": 6.631860036338773e-05, "loss": 1.01, "step": 2496 }, { "epoch": 0.3936566629403503, "grad_norm": 0.86328125, "learning_rate": 6.631426564940935e-05, "loss": 1.0759, "step": 2497 }, { "epoch": 0.39381431478774326, "grad_norm": 0.89453125, "learning_rate": 6.630993100682595e-05, "loss": 1.0284, "step": 2498 }, { "epoch": 0.3939719666351363, "grad_norm": 0.98046875, "learning_rate": 6.630559643564673e-05, "loss": 1.1537, "step": 2499 }, { "epoch": 0.3941296184825293, "grad_norm": 0.78515625, "learning_rate": 6.630126193588082e-05, "loss": 0.9391, "step": 2500 }, { "epoch": 0.39428727032992233, "grad_norm": 0.8203125, "learning_rate": 6.629692750753752e-05, "loss": 0.9273, "step": 2501 }, { "epoch": 0.3944449221773153, "grad_norm": 0.95703125, "learning_rate": 6.629259315062594e-05, "loss": 1.0176, "step": 2502 }, { "epoch": 0.39460257402470833, "grad_norm": 0.8984375, "learning_rate": 6.62882588651553e-05, "loss": 1.0719, "step": 2503 }, { "epoch": 0.39476022587210136, "grad_norm": 0.8671875, "learning_rate": 6.628392465113475e-05, "loss": 1.1594, "step": 2504 }, { "epoch": 0.3949178777194944, "grad_norm": 0.96875, "learning_rate": 6.627959050857348e-05, "loss": 1.1225, "step": 2505 }, { "epoch": 0.3950755295668874, "grad_norm": 0.98046875, "learning_rate": 6.627525643748073e-05, "loss": 1.0193, "step": 2506 }, { "epoch": 0.3952331814142804, "grad_norm": 1.0390625, "learning_rate": 6.627092243786562e-05, "loss": 1.2507, "step": 2507 }, { "epoch": 0.3953908332616734, "grad_norm": 0.94921875, "learning_rate": 6.626658850973737e-05, "loss": 0.9266, "step": 2508 }, { "epoch": 0.39554848510906643, "grad_norm": 1.0078125, "learning_rate": 6.626225465310517e-05, "loss": 1.252, "step": 2509 }, { "epoch": 0.39570613695645945, "grad_norm": 0.9296875, "learning_rate": 6.625792086797814e-05, "loss": 1.0576, "step": 2510 }, { "epoch": 0.3958637888038524, "grad_norm": 0.953125, "learning_rate": 6.625358715436556e-05, "loss": 1.1978, "step": 2511 }, { "epoch": 0.39602144065124545, "grad_norm": 1.0390625, "learning_rate": 6.624925351227658e-05, "loss": 1.1091, "step": 2512 }, { "epoch": 0.3961790924986385, "grad_norm": 0.84375, "learning_rate": 6.624491994172037e-05, "loss": 0.9117, "step": 2513 }, { "epoch": 0.3963367443460315, "grad_norm": 1.03125, "learning_rate": 6.624058644270613e-05, "loss": 1.0061, "step": 2514 }, { "epoch": 0.39649439619342447, "grad_norm": 0.98828125, "learning_rate": 6.623625301524299e-05, "loss": 1.0533, "step": 2515 }, { "epoch": 0.3966520480408175, "grad_norm": 0.93359375, "learning_rate": 6.623191965934021e-05, "loss": 1.208, "step": 2516 }, { "epoch": 0.3968096998882105, "grad_norm": 1.0078125, "learning_rate": 6.622758637500697e-05, "loss": 1.1789, "step": 2517 }, { "epoch": 0.39696735173560355, "grad_norm": 0.953125, "learning_rate": 6.622325316225243e-05, "loss": 1.2051, "step": 2518 }, { "epoch": 0.3971250035829965, "grad_norm": 0.8359375, "learning_rate": 6.621892002108576e-05, "loss": 0.9699, "step": 2519 }, { "epoch": 0.39728265543038954, "grad_norm": 0.8828125, "learning_rate": 6.621458695151612e-05, "loss": 0.8761, "step": 2520 }, { "epoch": 0.39744030727778257, "grad_norm": 0.9609375, "learning_rate": 6.621025395355279e-05, "loss": 0.9464, "step": 2521 }, { "epoch": 0.3975979591251756, "grad_norm": 0.8671875, "learning_rate": 6.620592102720487e-05, "loss": 0.9853, "step": 2522 }, { "epoch": 0.39775561097256856, "grad_norm": 0.90625, "learning_rate": 6.620158817248157e-05, "loss": 0.9904, "step": 2523 }, { "epoch": 0.3979132628199616, "grad_norm": 1.0, "learning_rate": 6.619725538939209e-05, "loss": 1.1307, "step": 2524 }, { "epoch": 0.3980709146673546, "grad_norm": 0.99609375, "learning_rate": 6.619292267794557e-05, "loss": 1.3308, "step": 2525 }, { "epoch": 0.39822856651474764, "grad_norm": 1.0703125, "learning_rate": 6.618859003815123e-05, "loss": 1.0709, "step": 2526 }, { "epoch": 0.3983862183621406, "grad_norm": 0.953125, "learning_rate": 6.618425747001824e-05, "loss": 1.3499, "step": 2527 }, { "epoch": 0.39854387020953363, "grad_norm": 0.94140625, "learning_rate": 6.61799249735558e-05, "loss": 1.0349, "step": 2528 }, { "epoch": 0.39870152205692666, "grad_norm": 0.9140625, "learning_rate": 6.617559254877305e-05, "loss": 0.984, "step": 2529 }, { "epoch": 0.3988591739043197, "grad_norm": 1.03125, "learning_rate": 6.617126019567918e-05, "loss": 1.1902, "step": 2530 }, { "epoch": 0.39901682575171266, "grad_norm": 0.84765625, "learning_rate": 6.616692791428343e-05, "loss": 0.8985, "step": 2531 }, { "epoch": 0.3991744775991057, "grad_norm": 0.98828125, "learning_rate": 6.616259570459493e-05, "loss": 1.1675, "step": 2532 }, { "epoch": 0.3993321294464987, "grad_norm": 1.078125, "learning_rate": 6.615826356662289e-05, "loss": 1.184, "step": 2533 }, { "epoch": 0.39948978129389173, "grad_norm": 0.890625, "learning_rate": 6.615393150037646e-05, "loss": 0.9945, "step": 2534 }, { "epoch": 0.3996474331412847, "grad_norm": 0.9296875, "learning_rate": 6.61495995058648e-05, "loss": 1.0541, "step": 2535 }, { "epoch": 0.39980508498867773, "grad_norm": 0.9609375, "learning_rate": 6.614526758309718e-05, "loss": 1.035, "step": 2536 }, { "epoch": 0.39996273683607075, "grad_norm": 0.94921875, "learning_rate": 6.614093573208272e-05, "loss": 1.2051, "step": 2537 }, { "epoch": 0.4001203886834638, "grad_norm": 0.96875, "learning_rate": 6.613660395283061e-05, "loss": 1.2529, "step": 2538 }, { "epoch": 0.40027804053085675, "grad_norm": 0.83203125, "learning_rate": 6.613227224535004e-05, "loss": 1.0717, "step": 2539 }, { "epoch": 0.4004356923782498, "grad_norm": 0.97265625, "learning_rate": 6.612794060965014e-05, "loss": 0.9188, "step": 2540 }, { "epoch": 0.4005933442256428, "grad_norm": 0.8515625, "learning_rate": 6.612360904574018e-05, "loss": 1.0133, "step": 2541 }, { "epoch": 0.4007509960730358, "grad_norm": 0.98046875, "learning_rate": 6.611927755362929e-05, "loss": 1.0587, "step": 2542 }, { "epoch": 0.4009086479204288, "grad_norm": 1.015625, "learning_rate": 6.611494613332664e-05, "loss": 1.2033, "step": 2543 }, { "epoch": 0.4010662997678218, "grad_norm": 0.921875, "learning_rate": 6.611061478484145e-05, "loss": 0.9407, "step": 2544 }, { "epoch": 0.40122395161521485, "grad_norm": 0.921875, "learning_rate": 6.610628350818284e-05, "loss": 0.9071, "step": 2545 }, { "epoch": 0.40138160346260787, "grad_norm": 1.015625, "learning_rate": 6.610195230336005e-05, "loss": 1.115, "step": 2546 }, { "epoch": 0.40153925531000084, "grad_norm": 0.96875, "learning_rate": 6.609762117038224e-05, "loss": 1.4135, "step": 2547 }, { "epoch": 0.40169690715739387, "grad_norm": 0.98828125, "learning_rate": 6.609329010925855e-05, "loss": 0.9778, "step": 2548 }, { "epoch": 0.4018545590047869, "grad_norm": 0.828125, "learning_rate": 6.608895911999822e-05, "loss": 0.8862, "step": 2549 }, { "epoch": 0.4020122108521799, "grad_norm": 1.0390625, "learning_rate": 6.608462820261039e-05, "loss": 1.1875, "step": 2550 }, { "epoch": 0.4021698626995729, "grad_norm": 0.96875, "learning_rate": 6.608029735710426e-05, "loss": 1.059, "step": 2551 }, { "epoch": 0.4023275145469659, "grad_norm": 0.95703125, "learning_rate": 6.6075966583489e-05, "loss": 1.2092, "step": 2552 }, { "epoch": 0.40248516639435894, "grad_norm": 0.9453125, "learning_rate": 6.60716358817738e-05, "loss": 1.1373, "step": 2553 }, { "epoch": 0.40264281824175197, "grad_norm": 0.9609375, "learning_rate": 6.606730525196783e-05, "loss": 1.0164, "step": 2554 }, { "epoch": 0.40280047008914494, "grad_norm": 0.953125, "learning_rate": 6.606297469408023e-05, "loss": 0.9494, "step": 2555 }, { "epoch": 0.40295812193653796, "grad_norm": 0.89453125, "learning_rate": 6.605864420812024e-05, "loss": 1.0553, "step": 2556 }, { "epoch": 0.403115773783931, "grad_norm": 1.015625, "learning_rate": 6.605431379409702e-05, "loss": 1.0976, "step": 2557 }, { "epoch": 0.403273425631324, "grad_norm": 0.9765625, "learning_rate": 6.604998345201974e-05, "loss": 1.2923, "step": 2558 }, { "epoch": 0.403431077478717, "grad_norm": 0.8671875, "learning_rate": 6.604565318189758e-05, "loss": 0.9522, "step": 2559 }, { "epoch": 0.40358872932611, "grad_norm": 0.90234375, "learning_rate": 6.604132298373969e-05, "loss": 0.9164, "step": 2560 }, { "epoch": 0.40374638117350303, "grad_norm": 0.890625, "learning_rate": 6.603699285755529e-05, "loss": 0.8581, "step": 2561 }, { "epoch": 0.40390403302089606, "grad_norm": 0.9921875, "learning_rate": 6.603266280335356e-05, "loss": 1.3284, "step": 2562 }, { "epoch": 0.40406168486828903, "grad_norm": 0.94921875, "learning_rate": 6.602833282114366e-05, "loss": 1.0006, "step": 2563 }, { "epoch": 0.40421933671568205, "grad_norm": 0.84375, "learning_rate": 6.602400291093476e-05, "loss": 1.0529, "step": 2564 }, { "epoch": 0.4043769885630751, "grad_norm": 0.90234375, "learning_rate": 6.601967307273603e-05, "loss": 1.1403, "step": 2565 }, { "epoch": 0.4045346404104681, "grad_norm": 1.046875, "learning_rate": 6.601534330655668e-05, "loss": 1.3225, "step": 2566 }, { "epoch": 0.4046922922578611, "grad_norm": 0.90625, "learning_rate": 6.601101361240584e-05, "loss": 0.8643, "step": 2567 }, { "epoch": 0.4048499441052541, "grad_norm": 0.95703125, "learning_rate": 6.600668399029275e-05, "loss": 0.9923, "step": 2568 }, { "epoch": 0.4050075959526471, "grad_norm": 0.9453125, "learning_rate": 6.600235444022653e-05, "loss": 0.9787, "step": 2569 }, { "epoch": 0.40516524780004015, "grad_norm": 0.875, "learning_rate": 6.599802496221636e-05, "loss": 1.1355, "step": 2570 }, { "epoch": 0.4053228996474331, "grad_norm": 0.921875, "learning_rate": 6.599369555627146e-05, "loss": 1.0048, "step": 2571 }, { "epoch": 0.40548055149482615, "grad_norm": 0.95703125, "learning_rate": 6.598936622240098e-05, "loss": 1.144, "step": 2572 }, { "epoch": 0.4056382033422192, "grad_norm": 0.94140625, "learning_rate": 6.59850369606141e-05, "loss": 0.9273, "step": 2573 }, { "epoch": 0.4057958551896122, "grad_norm": 1.15625, "learning_rate": 6.598070777091999e-05, "loss": 0.8808, "step": 2574 }, { "epoch": 0.40595350703700517, "grad_norm": 0.9296875, "learning_rate": 6.597637865332776e-05, "loss": 1.0257, "step": 2575 }, { "epoch": 0.4061111588843982, "grad_norm": 0.921875, "learning_rate": 6.59720496078467e-05, "loss": 1.0233, "step": 2576 }, { "epoch": 0.4062688107317912, "grad_norm": 0.81640625, "learning_rate": 6.596772063448595e-05, "loss": 0.9426, "step": 2577 }, { "epoch": 0.40642646257918424, "grad_norm": 0.94921875, "learning_rate": 6.596339173325467e-05, "loss": 0.958, "step": 2578 }, { "epoch": 0.4065841144265772, "grad_norm": 0.9609375, "learning_rate": 6.595906290416203e-05, "loss": 1.1968, "step": 2579 }, { "epoch": 0.40674176627397024, "grad_norm": 1.03125, "learning_rate": 6.595473414721718e-05, "loss": 1.1921, "step": 2580 }, { "epoch": 0.40689941812136327, "grad_norm": 0.984375, "learning_rate": 6.595040546242934e-05, "loss": 1.1397, "step": 2581 }, { "epoch": 0.4070570699687563, "grad_norm": 0.90234375, "learning_rate": 6.59460768498077e-05, "loss": 1.1991, "step": 2582 }, { "epoch": 0.40721472181614926, "grad_norm": 0.9609375, "learning_rate": 6.594174830936139e-05, "loss": 0.9516, "step": 2583 }, { "epoch": 0.4073723736635423, "grad_norm": 0.9609375, "learning_rate": 6.593741984109959e-05, "loss": 1.1191, "step": 2584 }, { "epoch": 0.4075300255109353, "grad_norm": 1.0234375, "learning_rate": 6.593309144503147e-05, "loss": 1.1492, "step": 2585 }, { "epoch": 0.40768767735832834, "grad_norm": 0.90234375, "learning_rate": 6.59287631211662e-05, "loss": 1.0137, "step": 2586 }, { "epoch": 0.4078453292057213, "grad_norm": 0.88671875, "learning_rate": 6.592443486951301e-05, "loss": 0.9528, "step": 2587 }, { "epoch": 0.40800298105311433, "grad_norm": 0.91015625, "learning_rate": 6.592010669008102e-05, "loss": 0.975, "step": 2588 }, { "epoch": 0.40816063290050736, "grad_norm": 0.94921875, "learning_rate": 6.591577858287942e-05, "loss": 1.0767, "step": 2589 }, { "epoch": 0.4083182847479004, "grad_norm": 0.87890625, "learning_rate": 6.591145054791737e-05, "loss": 0.9555, "step": 2590 }, { "epoch": 0.40847593659529335, "grad_norm": 0.91796875, "learning_rate": 6.590712258520406e-05, "loss": 0.9409, "step": 2591 }, { "epoch": 0.4086335884426864, "grad_norm": 0.82421875, "learning_rate": 6.590279469474867e-05, "loss": 0.8437, "step": 2592 }, { "epoch": 0.4087912402900794, "grad_norm": 1.03125, "learning_rate": 6.589846687656033e-05, "loss": 1.2578, "step": 2593 }, { "epoch": 0.40894889213747243, "grad_norm": 1.0546875, "learning_rate": 6.589413913064826e-05, "loss": 1.1839, "step": 2594 }, { "epoch": 0.4091065439848654, "grad_norm": 0.90234375, "learning_rate": 6.588981145702158e-05, "loss": 1.0763, "step": 2595 }, { "epoch": 0.4092641958322584, "grad_norm": 0.91015625, "learning_rate": 6.588548385568952e-05, "loss": 1.0296, "step": 2596 }, { "epoch": 0.40942184767965145, "grad_norm": 1.0390625, "learning_rate": 6.588115632666123e-05, "loss": 1.2016, "step": 2597 }, { "epoch": 0.4095794995270445, "grad_norm": 0.91015625, "learning_rate": 6.587682886994588e-05, "loss": 1.0526, "step": 2598 }, { "epoch": 0.40973715137443745, "grad_norm": 1.0546875, "learning_rate": 6.587250148555264e-05, "loss": 1.1871, "step": 2599 }, { "epoch": 0.4098948032218305, "grad_norm": 0.94921875, "learning_rate": 6.586817417349065e-05, "loss": 1.1411, "step": 2600 }, { "epoch": 0.4100524550692235, "grad_norm": 1.0, "learning_rate": 6.586384693376915e-05, "loss": 1.0411, "step": 2601 }, { "epoch": 0.4102101069166165, "grad_norm": 0.8828125, "learning_rate": 6.585951976639726e-05, "loss": 0.9494, "step": 2602 }, { "epoch": 0.4103677587640095, "grad_norm": 0.98046875, "learning_rate": 6.585519267138418e-05, "loss": 1.0176, "step": 2603 }, { "epoch": 0.4105254106114025, "grad_norm": 1.0, "learning_rate": 6.585086564873908e-05, "loss": 1.0628, "step": 2604 }, { "epoch": 0.41068306245879554, "grad_norm": 0.90234375, "learning_rate": 6.584653869847106e-05, "loss": 1.2245, "step": 2605 }, { "epoch": 0.41084071430618857, "grad_norm": 0.8203125, "learning_rate": 6.584221182058939e-05, "loss": 0.9844, "step": 2606 }, { "epoch": 0.41099836615358154, "grad_norm": 0.8984375, "learning_rate": 6.58378850151032e-05, "loss": 0.9701, "step": 2607 }, { "epoch": 0.41115601800097457, "grad_norm": 0.91796875, "learning_rate": 6.583355828202166e-05, "loss": 1.0095, "step": 2608 }, { "epoch": 0.4113136698483676, "grad_norm": 0.8515625, "learning_rate": 6.582923162135394e-05, "loss": 0.985, "step": 2609 }, { "epoch": 0.4114713216957606, "grad_norm": 0.91796875, "learning_rate": 6.58249050331092e-05, "loss": 0.9441, "step": 2610 }, { "epoch": 0.4116289735431536, "grad_norm": 0.921875, "learning_rate": 6.582057851729664e-05, "loss": 1.13, "step": 2611 }, { "epoch": 0.4117866253905466, "grad_norm": 0.98828125, "learning_rate": 6.581625207392541e-05, "loss": 1.015, "step": 2612 }, { "epoch": 0.41194427723793964, "grad_norm": 0.9609375, "learning_rate": 6.581192570300467e-05, "loss": 1.143, "step": 2613 }, { "epoch": 0.41210192908533266, "grad_norm": 0.91796875, "learning_rate": 6.580759940454361e-05, "loss": 0.981, "step": 2614 }, { "epoch": 0.41225958093272563, "grad_norm": 0.96484375, "learning_rate": 6.580327317855133e-05, "loss": 1.0447, "step": 2615 }, { "epoch": 0.41241723278011866, "grad_norm": 0.93359375, "learning_rate": 6.579894702503712e-05, "loss": 0.9664, "step": 2616 }, { "epoch": 0.4125748846275117, "grad_norm": 0.88671875, "learning_rate": 6.579462094401008e-05, "loss": 1.0837, "step": 2617 }, { "epoch": 0.4127325364749047, "grad_norm": 0.9609375, "learning_rate": 6.579029493547939e-05, "loss": 1.3176, "step": 2618 }, { "epoch": 0.4128901883222977, "grad_norm": 0.890625, "learning_rate": 6.57859689994542e-05, "loss": 0.9244, "step": 2619 }, { "epoch": 0.4130478401696907, "grad_norm": 0.9609375, "learning_rate": 6.578164313594366e-05, "loss": 1.1278, "step": 2620 }, { "epoch": 0.41320549201708373, "grad_norm": 1.0, "learning_rate": 6.577731734495703e-05, "loss": 1.2114, "step": 2621 }, { "epoch": 0.41336314386447676, "grad_norm": 1.0234375, "learning_rate": 6.577299162650338e-05, "loss": 1.0157, "step": 2622 }, { "epoch": 0.4135207957118697, "grad_norm": 0.92578125, "learning_rate": 6.576866598059195e-05, "loss": 0.9643, "step": 2623 }, { "epoch": 0.41367844755926275, "grad_norm": 1.03125, "learning_rate": 6.576434040723188e-05, "loss": 1.1188, "step": 2624 }, { "epoch": 0.4138360994066558, "grad_norm": 0.921875, "learning_rate": 6.576001490643227e-05, "loss": 0.9879, "step": 2625 }, { "epoch": 0.4139937512540488, "grad_norm": 0.8984375, "learning_rate": 6.575568947820239e-05, "loss": 0.8778, "step": 2626 }, { "epoch": 0.4141514031014418, "grad_norm": 0.9140625, "learning_rate": 6.575136412255138e-05, "loss": 0.9882, "step": 2627 }, { "epoch": 0.4143090549488348, "grad_norm": 0.91015625, "learning_rate": 6.574703883948841e-05, "loss": 1.0088, "step": 2628 }, { "epoch": 0.4144667067962278, "grad_norm": 0.9375, "learning_rate": 6.574271362902262e-05, "loss": 1.1282, "step": 2629 }, { "epoch": 0.41462435864362085, "grad_norm": 0.94921875, "learning_rate": 6.573838849116316e-05, "loss": 1.0887, "step": 2630 }, { "epoch": 0.4147820104910138, "grad_norm": 0.9765625, "learning_rate": 6.573406342591927e-05, "loss": 1.1364, "step": 2631 }, { "epoch": 0.41493966233840684, "grad_norm": 0.98828125, "learning_rate": 6.572973843330006e-05, "loss": 0.9321, "step": 2632 }, { "epoch": 0.41509731418579987, "grad_norm": 0.953125, "learning_rate": 6.572541351331472e-05, "loss": 1.0673, "step": 2633 }, { "epoch": 0.4152549660331929, "grad_norm": 0.953125, "learning_rate": 6.572108866597238e-05, "loss": 1.2719, "step": 2634 }, { "epoch": 0.41541261788058587, "grad_norm": 0.93359375, "learning_rate": 6.571676389128223e-05, "loss": 1.1433, "step": 2635 }, { "epoch": 0.4155702697279789, "grad_norm": 1.0546875, "learning_rate": 6.571243918925344e-05, "loss": 1.2131, "step": 2636 }, { "epoch": 0.4157279215753719, "grad_norm": 1.0078125, "learning_rate": 6.570811455989521e-05, "loss": 1.0439, "step": 2637 }, { "epoch": 0.41588557342276494, "grad_norm": 0.9453125, "learning_rate": 6.570379000321667e-05, "loss": 1.0461, "step": 2638 }, { "epoch": 0.4160432252701579, "grad_norm": 1.0625, "learning_rate": 6.569946551922696e-05, "loss": 1.0368, "step": 2639 }, { "epoch": 0.41620087711755094, "grad_norm": 0.8515625, "learning_rate": 6.56951411079353e-05, "loss": 1.1239, "step": 2640 }, { "epoch": 0.41635852896494396, "grad_norm": 0.91015625, "learning_rate": 6.569081676935078e-05, "loss": 1.0256, "step": 2641 }, { "epoch": 0.416516180812337, "grad_norm": 1.0546875, "learning_rate": 6.568649250348264e-05, "loss": 1.2058, "step": 2642 }, { "epoch": 0.41667383265972996, "grad_norm": 0.98828125, "learning_rate": 6.568216831034002e-05, "loss": 1.0007, "step": 2643 }, { "epoch": 0.416831484507123, "grad_norm": 0.96484375, "learning_rate": 6.56778441899321e-05, "loss": 1.2472, "step": 2644 }, { "epoch": 0.416989136354516, "grad_norm": 1.046875, "learning_rate": 6.567352014226802e-05, "loss": 1.0037, "step": 2645 }, { "epoch": 0.41714678820190904, "grad_norm": 0.921875, "learning_rate": 6.566919616735689e-05, "loss": 1.0668, "step": 2646 }, { "epoch": 0.417304440049302, "grad_norm": 0.90234375, "learning_rate": 6.566487226520801e-05, "loss": 1.0446, "step": 2647 }, { "epoch": 0.41746209189669503, "grad_norm": 0.94921875, "learning_rate": 6.566054843583045e-05, "loss": 1.1741, "step": 2648 }, { "epoch": 0.41761974374408806, "grad_norm": 0.91796875, "learning_rate": 6.56562246792334e-05, "loss": 0.8666, "step": 2649 }, { "epoch": 0.4177773955914811, "grad_norm": 0.921875, "learning_rate": 6.565190099542603e-05, "loss": 1.1364, "step": 2650 }, { "epoch": 0.41793504743887405, "grad_norm": 0.96875, "learning_rate": 6.564757738441745e-05, "loss": 0.973, "step": 2651 }, { "epoch": 0.4180926992862671, "grad_norm": 1.0234375, "learning_rate": 6.564325384621688e-05, "loss": 1.3055, "step": 2652 }, { "epoch": 0.4182503511336601, "grad_norm": 0.9140625, "learning_rate": 6.563893038083349e-05, "loss": 0.9315, "step": 2653 }, { "epoch": 0.41840800298105313, "grad_norm": 0.86328125, "learning_rate": 6.563460698827642e-05, "loss": 0.8043, "step": 2654 }, { "epoch": 0.4185656548284461, "grad_norm": 1.0703125, "learning_rate": 6.563028366855484e-05, "loss": 1.2749, "step": 2655 }, { "epoch": 0.4187233066758391, "grad_norm": 0.89453125, "learning_rate": 6.562596042167788e-05, "loss": 0.706, "step": 2656 }, { "epoch": 0.41888095852323215, "grad_norm": 0.90625, "learning_rate": 6.562163724765476e-05, "loss": 1.0255, "step": 2657 }, { "epoch": 0.4190386103706252, "grad_norm": 0.8671875, "learning_rate": 6.561731414649462e-05, "loss": 0.966, "step": 2658 }, { "epoch": 0.41919626221801815, "grad_norm": 1.28125, "learning_rate": 6.561299111820661e-05, "loss": 1.0845, "step": 2659 }, { "epoch": 0.41935391406541117, "grad_norm": 0.91796875, "learning_rate": 6.560866816279991e-05, "loss": 1.0453, "step": 2660 }, { "epoch": 0.4195115659128042, "grad_norm": 1.0234375, "learning_rate": 6.56043452802836e-05, "loss": 1.0875, "step": 2661 }, { "epoch": 0.4196692177601972, "grad_norm": 0.9140625, "learning_rate": 6.560002247066699e-05, "loss": 0.9738, "step": 2662 }, { "epoch": 0.41982686960759025, "grad_norm": 0.93359375, "learning_rate": 6.559569973395915e-05, "loss": 0.7607, "step": 2663 }, { "epoch": 0.4199845214549832, "grad_norm": 1.0390625, "learning_rate": 6.559137707016927e-05, "loss": 1.1456, "step": 2664 }, { "epoch": 0.42014217330237624, "grad_norm": 0.92578125, "learning_rate": 6.558705447930649e-05, "loss": 0.9813, "step": 2665 }, { "epoch": 0.42029982514976927, "grad_norm": 0.88671875, "learning_rate": 6.558273196137995e-05, "loss": 0.9075, "step": 2666 }, { "epoch": 0.4204574769971623, "grad_norm": 0.97265625, "learning_rate": 6.557840951639886e-05, "loss": 1.2153, "step": 2667 }, { "epoch": 0.42061512884455526, "grad_norm": 1.0234375, "learning_rate": 6.557408714437239e-05, "loss": 1.0649, "step": 2668 }, { "epoch": 0.4207727806919483, "grad_norm": 0.921875, "learning_rate": 6.556976484530965e-05, "loss": 1.0956, "step": 2669 }, { "epoch": 0.4209304325393413, "grad_norm": 0.984375, "learning_rate": 6.556544261921984e-05, "loss": 1.1292, "step": 2670 }, { "epoch": 0.42108808438673434, "grad_norm": 0.92578125, "learning_rate": 6.556112046611207e-05, "loss": 0.8227, "step": 2671 }, { "epoch": 0.4212457362341273, "grad_norm": 0.90234375, "learning_rate": 6.555679838599556e-05, "loss": 0.9641, "step": 2672 }, { "epoch": 0.42140338808152034, "grad_norm": 0.8671875, "learning_rate": 6.555247637887947e-05, "loss": 1.036, "step": 2673 }, { "epoch": 0.42156103992891336, "grad_norm": 0.99609375, "learning_rate": 6.554815444477292e-05, "loss": 0.976, "step": 2674 }, { "epoch": 0.4217186917763064, "grad_norm": 1.09375, "learning_rate": 6.554383258368509e-05, "loss": 1.1241, "step": 2675 }, { "epoch": 0.42187634362369936, "grad_norm": 0.984375, "learning_rate": 6.553951079562513e-05, "loss": 1.2606, "step": 2676 }, { "epoch": 0.4220339954710924, "grad_norm": 1.1640625, "learning_rate": 6.553518908060221e-05, "loss": 1.1383, "step": 2677 }, { "epoch": 0.4221916473184854, "grad_norm": 0.87890625, "learning_rate": 6.55308674386255e-05, "loss": 0.9199, "step": 2678 }, { "epoch": 0.42234929916587843, "grad_norm": 0.984375, "learning_rate": 6.552654586970414e-05, "loss": 1.3144, "step": 2679 }, { "epoch": 0.4225069510132714, "grad_norm": 0.95703125, "learning_rate": 6.55222243738473e-05, "loss": 0.9537, "step": 2680 }, { "epoch": 0.42266460286066443, "grad_norm": 1.015625, "learning_rate": 6.551790295106407e-05, "loss": 1.0827, "step": 2681 }, { "epoch": 0.42282225470805745, "grad_norm": 0.95703125, "learning_rate": 6.551358160136376e-05, "loss": 1.2236, "step": 2682 }, { "epoch": 0.4229799065554505, "grad_norm": 1.140625, "learning_rate": 6.550926032475542e-05, "loss": 1.0299, "step": 2683 }, { "epoch": 0.42313755840284345, "grad_norm": 0.89453125, "learning_rate": 6.550493912124822e-05, "loss": 1.0157, "step": 2684 }, { "epoch": 0.4232952102502365, "grad_norm": 0.875, "learning_rate": 6.550061799085134e-05, "loss": 0.9454, "step": 2685 }, { "epoch": 0.4234528620976295, "grad_norm": 1.0, "learning_rate": 6.549629693357389e-05, "loss": 1.1689, "step": 2686 }, { "epoch": 0.4236105139450225, "grad_norm": 1.0703125, "learning_rate": 6.54919759494251e-05, "loss": 1.1926, "step": 2687 }, { "epoch": 0.4237681657924155, "grad_norm": 0.875, "learning_rate": 6.54876550384141e-05, "loss": 0.9646, "step": 2688 }, { "epoch": 0.4239258176398085, "grad_norm": 1.1484375, "learning_rate": 6.548333420055004e-05, "loss": 1.0622, "step": 2689 }, { "epoch": 0.42408346948720155, "grad_norm": 0.94140625, "learning_rate": 6.547901343584208e-05, "loss": 1.1203, "step": 2690 }, { "epoch": 0.4242411213345946, "grad_norm": 1.0234375, "learning_rate": 6.547469274429933e-05, "loss": 1.0942, "step": 2691 }, { "epoch": 0.42439877318198754, "grad_norm": 0.8984375, "learning_rate": 6.547037212593106e-05, "loss": 1.0022, "step": 2692 }, { "epoch": 0.42455642502938057, "grad_norm": 1.2265625, "learning_rate": 6.546605158074635e-05, "loss": 0.8668, "step": 2693 }, { "epoch": 0.4247140768767736, "grad_norm": 0.8359375, "learning_rate": 6.546173110875435e-05, "loss": 0.8737, "step": 2694 }, { "epoch": 0.4248717287241666, "grad_norm": 0.92578125, "learning_rate": 6.545741070996427e-05, "loss": 0.9713, "step": 2695 }, { "epoch": 0.4250293805715596, "grad_norm": 0.97265625, "learning_rate": 6.545309038438518e-05, "loss": 1.008, "step": 2696 }, { "epoch": 0.4251870324189526, "grad_norm": 0.91015625, "learning_rate": 6.544877013202633e-05, "loss": 0.9426, "step": 2697 }, { "epoch": 0.42534468426634564, "grad_norm": 0.9375, "learning_rate": 6.544444995289685e-05, "loss": 0.9433, "step": 2698 }, { "epoch": 0.42550233611373867, "grad_norm": 0.8671875, "learning_rate": 6.544012984700587e-05, "loss": 1.0172, "step": 2699 }, { "epoch": 0.42565998796113164, "grad_norm": 0.85546875, "learning_rate": 6.543580981436255e-05, "loss": 1.0212, "step": 2700 }, { "epoch": 0.42581763980852466, "grad_norm": 0.90234375, "learning_rate": 6.543148985497603e-05, "loss": 0.9989, "step": 2701 }, { "epoch": 0.4259752916559177, "grad_norm": 1.03125, "learning_rate": 6.542716996885551e-05, "loss": 1.0019, "step": 2702 }, { "epoch": 0.4261329435033107, "grad_norm": 0.921875, "learning_rate": 6.542285015601015e-05, "loss": 0.9482, "step": 2703 }, { "epoch": 0.4262905953507037, "grad_norm": 0.90625, "learning_rate": 6.541853041644907e-05, "loss": 0.9904, "step": 2704 }, { "epoch": 0.4264482471980967, "grad_norm": 0.9375, "learning_rate": 6.541421075018143e-05, "loss": 1.2548, "step": 2705 }, { "epoch": 0.42660589904548973, "grad_norm": 1.015625, "learning_rate": 6.540989115721638e-05, "loss": 1.0241, "step": 2706 }, { "epoch": 0.42676355089288276, "grad_norm": 0.92578125, "learning_rate": 6.54055716375631e-05, "loss": 1.0165, "step": 2707 }, { "epoch": 0.42692120274027573, "grad_norm": 0.84765625, "learning_rate": 6.540125219123077e-05, "loss": 0.8942, "step": 2708 }, { "epoch": 0.42707885458766875, "grad_norm": 0.9609375, "learning_rate": 6.539693281822847e-05, "loss": 0.9734, "step": 2709 }, { "epoch": 0.4272365064350618, "grad_norm": 1.078125, "learning_rate": 6.539261351856542e-05, "loss": 1.144, "step": 2710 }, { "epoch": 0.4273941582824548, "grad_norm": 1.046875, "learning_rate": 6.538829429225069e-05, "loss": 1.1552, "step": 2711 }, { "epoch": 0.4275518101298478, "grad_norm": 1.03125, "learning_rate": 6.538397513929354e-05, "loss": 1.2133, "step": 2712 }, { "epoch": 0.4277094619772408, "grad_norm": 0.8359375, "learning_rate": 6.537965605970308e-05, "loss": 0.945, "step": 2713 }, { "epoch": 0.4278671138246338, "grad_norm": 0.82421875, "learning_rate": 6.537533705348845e-05, "loss": 0.7975, "step": 2714 }, { "epoch": 0.42802476567202685, "grad_norm": 0.890625, "learning_rate": 6.537101812065882e-05, "loss": 0.9211, "step": 2715 }, { "epoch": 0.4281824175194198, "grad_norm": 1.03125, "learning_rate": 6.536669926122332e-05, "loss": 1.177, "step": 2716 }, { "epoch": 0.42834006936681285, "grad_norm": 0.9921875, "learning_rate": 6.536238047519114e-05, "loss": 0.9751, "step": 2717 }, { "epoch": 0.4284977212142059, "grad_norm": 0.9453125, "learning_rate": 6.535806176257141e-05, "loss": 1.0925, "step": 2718 }, { "epoch": 0.4286553730615989, "grad_norm": 0.87109375, "learning_rate": 6.535374312337328e-05, "loss": 1.1275, "step": 2719 }, { "epoch": 0.42881302490899187, "grad_norm": 0.90625, "learning_rate": 6.534942455760591e-05, "loss": 0.9658, "step": 2720 }, { "epoch": 0.4289706767563849, "grad_norm": 0.83984375, "learning_rate": 6.534510606527845e-05, "loss": 0.9199, "step": 2721 }, { "epoch": 0.4291283286037779, "grad_norm": 0.87890625, "learning_rate": 6.534078764640008e-05, "loss": 0.989, "step": 2722 }, { "epoch": 0.42928598045117095, "grad_norm": 0.8984375, "learning_rate": 6.533646930097991e-05, "loss": 0.9411, "step": 2723 }, { "epoch": 0.4294436322985639, "grad_norm": 0.90234375, "learning_rate": 6.533215102902714e-05, "loss": 0.9289, "step": 2724 }, { "epoch": 0.42960128414595694, "grad_norm": 1.0078125, "learning_rate": 6.532783283055087e-05, "loss": 1.047, "step": 2725 }, { "epoch": 0.42975893599334997, "grad_norm": 0.96875, "learning_rate": 6.532351470556023e-05, "loss": 1.1762, "step": 2726 }, { "epoch": 0.429916587840743, "grad_norm": 0.890625, "learning_rate": 6.53191966540645e-05, "loss": 0.9962, "step": 2727 }, { "epoch": 0.43007423968813596, "grad_norm": 1.0390625, "learning_rate": 6.531487867607272e-05, "loss": 1.0826, "step": 2728 }, { "epoch": 0.430231891535529, "grad_norm": 0.87890625, "learning_rate": 6.531056077159407e-05, "loss": 1.0216, "step": 2729 }, { "epoch": 0.430389543382922, "grad_norm": 0.94921875, "learning_rate": 6.530624294063771e-05, "loss": 1.1872, "step": 2730 }, { "epoch": 0.43054719523031504, "grad_norm": 0.94140625, "learning_rate": 6.530192518321276e-05, "loss": 1.0959, "step": 2731 }, { "epoch": 0.430704847077708, "grad_norm": 0.9921875, "learning_rate": 6.529760749932841e-05, "loss": 0.9862, "step": 2732 }, { "epoch": 0.43086249892510103, "grad_norm": 0.953125, "learning_rate": 6.529328988899382e-05, "loss": 0.9641, "step": 2733 }, { "epoch": 0.43102015077249406, "grad_norm": 0.91015625, "learning_rate": 6.52889723522181e-05, "loss": 1.0431, "step": 2734 }, { "epoch": 0.4311778026198871, "grad_norm": 0.9453125, "learning_rate": 6.528465488901043e-05, "loss": 0.9527, "step": 2735 }, { "epoch": 0.43133545446728006, "grad_norm": 0.9765625, "learning_rate": 6.528033749937992e-05, "loss": 1.0643, "step": 2736 }, { "epoch": 0.4314931063146731, "grad_norm": 0.93359375, "learning_rate": 6.527602018333579e-05, "loss": 1.1445, "step": 2737 }, { "epoch": 0.4316507581620661, "grad_norm": 0.9375, "learning_rate": 6.527170294088712e-05, "loss": 1.0928, "step": 2738 }, { "epoch": 0.43180841000945913, "grad_norm": 0.84375, "learning_rate": 6.52673857720431e-05, "loss": 0.9932, "step": 2739 }, { "epoch": 0.4319660618568521, "grad_norm": 1.046875, "learning_rate": 6.526306867681288e-05, "loss": 1.2806, "step": 2740 }, { "epoch": 0.4321237137042451, "grad_norm": 0.83984375, "learning_rate": 6.525875165520558e-05, "loss": 1.0832, "step": 2741 }, { "epoch": 0.43228136555163815, "grad_norm": 0.828125, "learning_rate": 6.525443470723039e-05, "loss": 0.9927, "step": 2742 }, { "epoch": 0.4324390173990312, "grad_norm": 0.93359375, "learning_rate": 6.525011783289644e-05, "loss": 0.9146, "step": 2743 }, { "epoch": 0.43259666924642415, "grad_norm": 0.953125, "learning_rate": 6.524580103221287e-05, "loss": 0.9657, "step": 2744 }, { "epoch": 0.4327543210938172, "grad_norm": 0.86328125, "learning_rate": 6.524148430518886e-05, "loss": 1.032, "step": 2745 }, { "epoch": 0.4329119729412102, "grad_norm": 0.87109375, "learning_rate": 6.523716765183347e-05, "loss": 1.0179, "step": 2746 }, { "epoch": 0.4330696247886032, "grad_norm": 0.8671875, "learning_rate": 6.523285107215597e-05, "loss": 0.9907, "step": 2747 }, { "epoch": 0.4332272766359962, "grad_norm": 0.91796875, "learning_rate": 6.522853456616547e-05, "loss": 1.0947, "step": 2748 }, { "epoch": 0.4333849284833892, "grad_norm": 0.92578125, "learning_rate": 6.522421813387107e-05, "loss": 1.0683, "step": 2749 }, { "epoch": 0.43354258033078225, "grad_norm": 0.953125, "learning_rate": 6.521990177528198e-05, "loss": 1.229, "step": 2750 }, { "epoch": 0.43370023217817527, "grad_norm": 0.9921875, "learning_rate": 6.521558549040727e-05, "loss": 1.0778, "step": 2751 }, { "epoch": 0.43385788402556824, "grad_norm": 0.93359375, "learning_rate": 6.521126927925618e-05, "loss": 0.9319, "step": 2752 }, { "epoch": 0.43401553587296127, "grad_norm": 0.8359375, "learning_rate": 6.520695314183782e-05, "loss": 1.0405, "step": 2753 }, { "epoch": 0.4341731877203543, "grad_norm": 0.8203125, "learning_rate": 6.520263707816132e-05, "loss": 0.8712, "step": 2754 }, { "epoch": 0.4343308395677473, "grad_norm": 1.0546875, "learning_rate": 6.519832108823586e-05, "loss": 1.0385, "step": 2755 }, { "epoch": 0.4344884914151403, "grad_norm": 0.95703125, "learning_rate": 6.519400517207052e-05, "loss": 1.1215, "step": 2756 }, { "epoch": 0.4346461432625333, "grad_norm": 0.86328125, "learning_rate": 6.518968932967453e-05, "loss": 0.9479, "step": 2757 }, { "epoch": 0.43480379510992634, "grad_norm": 0.8984375, "learning_rate": 6.518537356105701e-05, "loss": 1.1548, "step": 2758 }, { "epoch": 0.43496144695731936, "grad_norm": 0.8828125, "learning_rate": 6.518105786622711e-05, "loss": 0.9939, "step": 2759 }, { "epoch": 0.43511909880471233, "grad_norm": 0.9453125, "learning_rate": 6.517674224519397e-05, "loss": 1.0875, "step": 2760 }, { "epoch": 0.43527675065210536, "grad_norm": 1.3984375, "learning_rate": 6.517242669796669e-05, "loss": 0.9756, "step": 2761 }, { "epoch": 0.4354344024994984, "grad_norm": 1.0234375, "learning_rate": 6.516811122455449e-05, "loss": 1.1052, "step": 2762 }, { "epoch": 0.4355920543468914, "grad_norm": 0.87890625, "learning_rate": 6.51637958249665e-05, "loss": 0.9943, "step": 2763 }, { "epoch": 0.4357497061942844, "grad_norm": 1.0078125, "learning_rate": 6.515948049921186e-05, "loss": 1.2119, "step": 2764 }, { "epoch": 0.4359073580416774, "grad_norm": 0.8828125, "learning_rate": 6.515516524729967e-05, "loss": 1.002, "step": 2765 }, { "epoch": 0.43606500988907043, "grad_norm": 1.140625, "learning_rate": 6.51508500692391e-05, "loss": 1.2949, "step": 2766 }, { "epoch": 0.43622266173646346, "grad_norm": 0.9609375, "learning_rate": 6.514653496503938e-05, "loss": 1.1826, "step": 2767 }, { "epoch": 0.4363803135838564, "grad_norm": 0.98828125, "learning_rate": 6.514221993470954e-05, "loss": 1.0555, "step": 2768 }, { "epoch": 0.43653796543124945, "grad_norm": 1.0, "learning_rate": 6.51379049782588e-05, "loss": 1.0748, "step": 2769 }, { "epoch": 0.4366956172786425, "grad_norm": 0.890625, "learning_rate": 6.513359009569627e-05, "loss": 0.9438, "step": 2770 }, { "epoch": 0.4368532691260355, "grad_norm": 0.87109375, "learning_rate": 6.512927528703105e-05, "loss": 1.0476, "step": 2771 }, { "epoch": 0.4370109209734285, "grad_norm": 1.0234375, "learning_rate": 6.51249605522724e-05, "loss": 1.0635, "step": 2772 }, { "epoch": 0.4371685728208215, "grad_norm": 0.9296875, "learning_rate": 6.512064589142938e-05, "loss": 1.0634, "step": 2773 }, { "epoch": 0.4373262246682145, "grad_norm": 0.98828125, "learning_rate": 6.511633130451117e-05, "loss": 1.0078, "step": 2774 }, { "epoch": 0.43748387651560755, "grad_norm": 0.87109375, "learning_rate": 6.511201679152692e-05, "loss": 1.1566, "step": 2775 }, { "epoch": 0.4376415283630005, "grad_norm": 0.98046875, "learning_rate": 6.51077023524857e-05, "loss": 1.2044, "step": 2776 }, { "epoch": 0.43779918021039355, "grad_norm": 0.9140625, "learning_rate": 6.510338798739674e-05, "loss": 0.9469, "step": 2777 }, { "epoch": 0.43795683205778657, "grad_norm": 0.859375, "learning_rate": 6.509907369626917e-05, "loss": 0.868, "step": 2778 }, { "epoch": 0.4381144839051796, "grad_norm": 0.84765625, "learning_rate": 6.509475947911212e-05, "loss": 1.0416, "step": 2779 }, { "epoch": 0.43827213575257257, "grad_norm": 1.0234375, "learning_rate": 6.509044533593473e-05, "loss": 1.046, "step": 2780 }, { "epoch": 0.4384297875999656, "grad_norm": 0.97265625, "learning_rate": 6.508613126674612e-05, "loss": 1.0648, "step": 2781 }, { "epoch": 0.4385874394473586, "grad_norm": 1.0078125, "learning_rate": 6.508181727155547e-05, "loss": 1.0563, "step": 2782 }, { "epoch": 0.43874509129475164, "grad_norm": 0.98046875, "learning_rate": 6.507750335037192e-05, "loss": 1.0095, "step": 2783 }, { "epoch": 0.4389027431421446, "grad_norm": 0.96484375, "learning_rate": 6.507318950320461e-05, "loss": 0.9736, "step": 2784 }, { "epoch": 0.43906039498953764, "grad_norm": 0.90625, "learning_rate": 6.506887573006268e-05, "loss": 0.9974, "step": 2785 }, { "epoch": 0.43921804683693066, "grad_norm": 1.0390625, "learning_rate": 6.506456203095526e-05, "loss": 1.293, "step": 2786 }, { "epoch": 0.4393756986843237, "grad_norm": 1.0, "learning_rate": 6.506024840589148e-05, "loss": 0.974, "step": 2787 }, { "epoch": 0.43953335053171666, "grad_norm": 0.94140625, "learning_rate": 6.505593485488054e-05, "loss": 1.0034, "step": 2788 }, { "epoch": 0.4396910023791097, "grad_norm": 0.94140625, "learning_rate": 6.505162137793154e-05, "loss": 1.068, "step": 2789 }, { "epoch": 0.4398486542265027, "grad_norm": 0.9765625, "learning_rate": 6.504730797505365e-05, "loss": 1.0178, "step": 2790 }, { "epoch": 0.44000630607389574, "grad_norm": 0.93359375, "learning_rate": 6.504299464625599e-05, "loss": 1.1401, "step": 2791 }, { "epoch": 0.4401639579212887, "grad_norm": 1.0234375, "learning_rate": 6.503868139154763e-05, "loss": 1.0917, "step": 2792 }, { "epoch": 0.44032160976868173, "grad_norm": 0.88671875, "learning_rate": 6.503436821093787e-05, "loss": 1.0827, "step": 2793 }, { "epoch": 0.44047926161607476, "grad_norm": 1.0390625, "learning_rate": 6.503005510443574e-05, "loss": 1.3003, "step": 2794 }, { "epoch": 0.4406369134634678, "grad_norm": 0.984375, "learning_rate": 6.502574207205042e-05, "loss": 0.9289, "step": 2795 }, { "epoch": 0.44079456531086075, "grad_norm": 1.375, "learning_rate": 6.502142911379105e-05, "loss": 1.1396, "step": 2796 }, { "epoch": 0.4409522171582538, "grad_norm": 0.9296875, "learning_rate": 6.501711622966671e-05, "loss": 1.1392, "step": 2797 }, { "epoch": 0.4411098690056468, "grad_norm": 1.109375, "learning_rate": 6.501280341968663e-05, "loss": 1.0291, "step": 2798 }, { "epoch": 0.44126752085303983, "grad_norm": 1.03125, "learning_rate": 6.500849068385993e-05, "loss": 1.4314, "step": 2799 }, { "epoch": 0.4414251727004328, "grad_norm": 1.453125, "learning_rate": 6.500417802219572e-05, "loss": 1.2191, "step": 2800 }, { "epoch": 0.4415828245478258, "grad_norm": 0.9140625, "learning_rate": 6.499986543470315e-05, "loss": 1.1965, "step": 2801 }, { "epoch": 0.44174047639521885, "grad_norm": 0.890625, "learning_rate": 6.499555292139134e-05, "loss": 0.9441, "step": 2802 }, { "epoch": 0.4418981282426119, "grad_norm": 0.90234375, "learning_rate": 6.49912404822695e-05, "loss": 1.0731, "step": 2803 }, { "epoch": 0.44205578009000485, "grad_norm": 0.90625, "learning_rate": 6.498692811734671e-05, "loss": 1.0864, "step": 2804 }, { "epoch": 0.44221343193739787, "grad_norm": 0.99609375, "learning_rate": 6.49826158266321e-05, "loss": 0.9313, "step": 2805 }, { "epoch": 0.4423710837847909, "grad_norm": 0.828125, "learning_rate": 6.497830361013486e-05, "loss": 0.8939, "step": 2806 }, { "epoch": 0.4425287356321839, "grad_norm": 0.96484375, "learning_rate": 6.497399146786408e-05, "loss": 1.3015, "step": 2807 }, { "epoch": 0.4426863874795769, "grad_norm": 0.88671875, "learning_rate": 6.496967939982895e-05, "loss": 0.9593, "step": 2808 }, { "epoch": 0.4428440393269699, "grad_norm": 0.9140625, "learning_rate": 6.496536740603858e-05, "loss": 0.8728, "step": 2809 }, { "epoch": 0.44300169117436294, "grad_norm": 0.8828125, "learning_rate": 6.496105548650212e-05, "loss": 1.1447, "step": 2810 }, { "epoch": 0.44315934302175597, "grad_norm": 0.98828125, "learning_rate": 6.495674364122868e-05, "loss": 1.0463, "step": 2811 }, { "epoch": 0.44331699486914894, "grad_norm": 0.9609375, "learning_rate": 6.495243187022739e-05, "loss": 1.1476, "step": 2812 }, { "epoch": 0.44347464671654196, "grad_norm": 0.9296875, "learning_rate": 6.494812017350746e-05, "loss": 1.0201, "step": 2813 }, { "epoch": 0.443632298563935, "grad_norm": 0.94921875, "learning_rate": 6.494380855107798e-05, "loss": 1.014, "step": 2814 }, { "epoch": 0.443789950411328, "grad_norm": 1.046875, "learning_rate": 6.49394970029481e-05, "loss": 1.0446, "step": 2815 }, { "epoch": 0.44394760225872104, "grad_norm": 0.79296875, "learning_rate": 6.493518552912695e-05, "loss": 0.8293, "step": 2816 }, { "epoch": 0.444105254106114, "grad_norm": 1.03125, "learning_rate": 6.493087412962361e-05, "loss": 0.9589, "step": 2817 }, { "epoch": 0.44426290595350704, "grad_norm": 0.98828125, "learning_rate": 6.492656280444735e-05, "loss": 1.2074, "step": 2818 }, { "epoch": 0.44442055780090006, "grad_norm": 0.93359375, "learning_rate": 6.492225155360722e-05, "loss": 0.9702, "step": 2819 }, { "epoch": 0.4445782096482931, "grad_norm": 0.94921875, "learning_rate": 6.491794037711238e-05, "loss": 1.2989, "step": 2820 }, { "epoch": 0.44473586149568606, "grad_norm": 0.90234375, "learning_rate": 6.491362927497195e-05, "loss": 1.0855, "step": 2821 }, { "epoch": 0.4448935133430791, "grad_norm": 0.9140625, "learning_rate": 6.490931824719506e-05, "loss": 0.8969, "step": 2822 }, { "epoch": 0.4450511651904721, "grad_norm": 0.875, "learning_rate": 6.490500729379087e-05, "loss": 0.9227, "step": 2823 }, { "epoch": 0.44520881703786513, "grad_norm": 0.94140625, "learning_rate": 6.490069641476852e-05, "loss": 0.995, "step": 2824 }, { "epoch": 0.4453664688852581, "grad_norm": 0.88671875, "learning_rate": 6.489638561013716e-05, "loss": 1.1086, "step": 2825 }, { "epoch": 0.44552412073265113, "grad_norm": 1.0546875, "learning_rate": 6.489207487990588e-05, "loss": 1.2279, "step": 2826 }, { "epoch": 0.44568177258004416, "grad_norm": 0.89453125, "learning_rate": 6.488776422408384e-05, "loss": 1.1683, "step": 2827 }, { "epoch": 0.4458394244274372, "grad_norm": 0.97265625, "learning_rate": 6.488345364268018e-05, "loss": 1.0655, "step": 2828 }, { "epoch": 0.44599707627483015, "grad_norm": 0.80859375, "learning_rate": 6.487914313570404e-05, "loss": 0.9703, "step": 2829 }, { "epoch": 0.4461547281222232, "grad_norm": 0.9140625, "learning_rate": 6.487483270316455e-05, "loss": 1.2179, "step": 2830 }, { "epoch": 0.4463123799696162, "grad_norm": 1.2109375, "learning_rate": 6.487052234507085e-05, "loss": 1.1804, "step": 2831 }, { "epoch": 0.4464700318170092, "grad_norm": 0.87109375, "learning_rate": 6.486621206143203e-05, "loss": 0.9828, "step": 2832 }, { "epoch": 0.4466276836644022, "grad_norm": 0.94140625, "learning_rate": 6.486190185225729e-05, "loss": 1.1227, "step": 2833 }, { "epoch": 0.4467853355117952, "grad_norm": 1.0234375, "learning_rate": 6.485759171755574e-05, "loss": 0.986, "step": 2834 }, { "epoch": 0.44694298735918825, "grad_norm": 1.0078125, "learning_rate": 6.485328165733653e-05, "loss": 1.1596, "step": 2835 }, { "epoch": 0.4471006392065813, "grad_norm": 1.09375, "learning_rate": 6.484897167160877e-05, "loss": 1.1091, "step": 2836 }, { "epoch": 0.44725829105397424, "grad_norm": 0.9609375, "learning_rate": 6.484466176038158e-05, "loss": 0.8134, "step": 2837 }, { "epoch": 0.44741594290136727, "grad_norm": 0.92578125, "learning_rate": 6.484035192366414e-05, "loss": 0.9533, "step": 2838 }, { "epoch": 0.4475735947487603, "grad_norm": 0.984375, "learning_rate": 6.483604216146558e-05, "loss": 1.2262, "step": 2839 }, { "epoch": 0.4477312465961533, "grad_norm": 0.91796875, "learning_rate": 6.483173247379501e-05, "loss": 1.0133, "step": 2840 }, { "epoch": 0.4478888984435463, "grad_norm": 0.859375, "learning_rate": 6.482742286066159e-05, "loss": 0.9566, "step": 2841 }, { "epoch": 0.4480465502909393, "grad_norm": 0.953125, "learning_rate": 6.482311332207438e-05, "loss": 1.1468, "step": 2842 }, { "epoch": 0.44820420213833234, "grad_norm": 0.94140625, "learning_rate": 6.48188038580426e-05, "loss": 0.8488, "step": 2843 }, { "epoch": 0.44836185398572537, "grad_norm": 0.9140625, "learning_rate": 6.481449446857539e-05, "loss": 1.0653, "step": 2844 }, { "epoch": 0.44851950583311834, "grad_norm": 0.91796875, "learning_rate": 6.481018515368183e-05, "loss": 1.068, "step": 2845 }, { "epoch": 0.44867715768051136, "grad_norm": 0.87890625, "learning_rate": 6.480587591337106e-05, "loss": 0.8972, "step": 2846 }, { "epoch": 0.4488348095279044, "grad_norm": 0.98828125, "learning_rate": 6.48015667476522e-05, "loss": 1.1933, "step": 2847 }, { "epoch": 0.4489924613752974, "grad_norm": 0.86328125, "learning_rate": 6.479725765653445e-05, "loss": 0.9931, "step": 2848 }, { "epoch": 0.4491501132226904, "grad_norm": 1.0234375, "learning_rate": 6.47929486400269e-05, "loss": 1.1808, "step": 2849 }, { "epoch": 0.4493077650700834, "grad_norm": 0.94921875, "learning_rate": 6.478863969813866e-05, "loss": 1.1713, "step": 2850 }, { "epoch": 0.44946541691747643, "grad_norm": 0.9609375, "learning_rate": 6.478433083087891e-05, "loss": 1.1561, "step": 2851 }, { "epoch": 0.44962306876486946, "grad_norm": 0.9765625, "learning_rate": 6.47800220382567e-05, "loss": 1.2913, "step": 2852 }, { "epoch": 0.44978072061226243, "grad_norm": 0.9609375, "learning_rate": 6.477571332028128e-05, "loss": 1.1878, "step": 2853 }, { "epoch": 0.44993837245965546, "grad_norm": 0.953125, "learning_rate": 6.47714046769617e-05, "loss": 1.2379, "step": 2854 }, { "epoch": 0.4500960243070485, "grad_norm": 1.0234375, "learning_rate": 6.476709610830712e-05, "loss": 1.2034, "step": 2855 }, { "epoch": 0.4502536761544415, "grad_norm": 0.9296875, "learning_rate": 6.476278761432666e-05, "loss": 1.1278, "step": 2856 }, { "epoch": 0.4504113280018345, "grad_norm": 0.8984375, "learning_rate": 6.475847919502945e-05, "loss": 1.0027, "step": 2857 }, { "epoch": 0.4505689798492275, "grad_norm": 1.375, "learning_rate": 6.475417085042464e-05, "loss": 1.1341, "step": 2858 }, { "epoch": 0.4507266316966205, "grad_norm": 0.9765625, "learning_rate": 6.474986258052135e-05, "loss": 0.981, "step": 2859 }, { "epoch": 0.45088428354401355, "grad_norm": 1.0234375, "learning_rate": 6.474555438532872e-05, "loss": 1.0832, "step": 2860 }, { "epoch": 0.4510419353914065, "grad_norm": 1.015625, "learning_rate": 6.474124626485587e-05, "loss": 1.1032, "step": 2861 }, { "epoch": 0.45119958723879955, "grad_norm": 1.09375, "learning_rate": 6.473693821911188e-05, "loss": 0.8015, "step": 2862 }, { "epoch": 0.4513572390861926, "grad_norm": 1.1171875, "learning_rate": 6.4732630248106e-05, "loss": 1.2049, "step": 2863 }, { "epoch": 0.4515148909335856, "grad_norm": 1.015625, "learning_rate": 6.472832235184728e-05, "loss": 1.131, "step": 2864 }, { "epoch": 0.45167254278097857, "grad_norm": 0.95703125, "learning_rate": 6.472401453034486e-05, "loss": 1.1863, "step": 2865 }, { "epoch": 0.4518301946283716, "grad_norm": 1.0234375, "learning_rate": 6.471970678360789e-05, "loss": 1.001, "step": 2866 }, { "epoch": 0.4519878464757646, "grad_norm": 0.9140625, "learning_rate": 6.471539911164546e-05, "loss": 0.8913, "step": 2867 }, { "epoch": 0.45214549832315765, "grad_norm": 0.86328125, "learning_rate": 6.471109151446674e-05, "loss": 0.8759, "step": 2868 }, { "epoch": 0.4523031501705506, "grad_norm": 0.93359375, "learning_rate": 6.470678399208085e-05, "loss": 1.093, "step": 2869 }, { "epoch": 0.45246080201794364, "grad_norm": 0.87890625, "learning_rate": 6.47024765444969e-05, "loss": 0.9012, "step": 2870 }, { "epoch": 0.45261845386533667, "grad_norm": 0.92578125, "learning_rate": 6.469816917172404e-05, "loss": 1.0347, "step": 2871 }, { "epoch": 0.4527761057127297, "grad_norm": 0.96484375, "learning_rate": 6.469386187377135e-05, "loss": 1.1308, "step": 2872 }, { "epoch": 0.45293375756012266, "grad_norm": 0.9296875, "learning_rate": 6.468955465064806e-05, "loss": 1.1969, "step": 2873 }, { "epoch": 0.4530914094075157, "grad_norm": 1.015625, "learning_rate": 6.468524750236322e-05, "loss": 1.1529, "step": 2874 }, { "epoch": 0.4532490612549087, "grad_norm": 0.97265625, "learning_rate": 6.4680940428926e-05, "loss": 1.0287, "step": 2875 }, { "epoch": 0.45340671310230174, "grad_norm": 0.83203125, "learning_rate": 6.467663343034549e-05, "loss": 0.9452, "step": 2876 }, { "epoch": 0.4535643649496947, "grad_norm": 1.015625, "learning_rate": 6.46723265066308e-05, "loss": 1.0053, "step": 2877 }, { "epoch": 0.45372201679708773, "grad_norm": 1.0078125, "learning_rate": 6.466801965779114e-05, "loss": 1.1034, "step": 2878 }, { "epoch": 0.45387966864448076, "grad_norm": 0.87890625, "learning_rate": 6.46637128838356e-05, "loss": 0.9602, "step": 2879 }, { "epoch": 0.4540373204918738, "grad_norm": 0.94140625, "learning_rate": 6.465940618477328e-05, "loss": 1.2403, "step": 2880 }, { "epoch": 0.45419497233926676, "grad_norm": 1.140625, "learning_rate": 6.465509956061336e-05, "loss": 1.0088, "step": 2881 }, { "epoch": 0.4543526241866598, "grad_norm": 1.0234375, "learning_rate": 6.465079301136485e-05, "loss": 1.1048, "step": 2882 }, { "epoch": 0.4545102760340528, "grad_norm": 0.9453125, "learning_rate": 6.464648653703705e-05, "loss": 0.9967, "step": 2883 }, { "epoch": 0.45466792788144583, "grad_norm": 0.96875, "learning_rate": 6.464218013763896e-05, "loss": 1.1412, "step": 2884 }, { "epoch": 0.4548255797288388, "grad_norm": 1.015625, "learning_rate": 6.463787381317978e-05, "loss": 1.0688, "step": 2885 }, { "epoch": 0.45498323157623183, "grad_norm": 1.015625, "learning_rate": 6.463356756366859e-05, "loss": 1.2648, "step": 2886 }, { "epoch": 0.45514088342362485, "grad_norm": 0.90625, "learning_rate": 6.462926138911452e-05, "loss": 0.9267, "step": 2887 }, { "epoch": 0.4552985352710179, "grad_norm": 1.0078125, "learning_rate": 6.462495528952672e-05, "loss": 1.1445, "step": 2888 }, { "epoch": 0.45545618711841085, "grad_norm": 0.94140625, "learning_rate": 6.462064926491431e-05, "loss": 1.1423, "step": 2889 }, { "epoch": 0.4556138389658039, "grad_norm": 1.0625, "learning_rate": 6.46163433152864e-05, "loss": 1.2558, "step": 2890 }, { "epoch": 0.4557714908131969, "grad_norm": 0.8984375, "learning_rate": 6.461203744065214e-05, "loss": 0.9796, "step": 2891 }, { "epoch": 0.4559291426605899, "grad_norm": 0.90625, "learning_rate": 6.460773164102062e-05, "loss": 0.977, "step": 2892 }, { "epoch": 0.4560867945079829, "grad_norm": 0.9296875, "learning_rate": 6.460342591640102e-05, "loss": 1.1335, "step": 2893 }, { "epoch": 0.4562444463553759, "grad_norm": 0.98046875, "learning_rate": 6.459912026680243e-05, "loss": 1.1063, "step": 2894 }, { "epoch": 0.45640209820276895, "grad_norm": 0.9375, "learning_rate": 6.459481469223397e-05, "loss": 1.1357, "step": 2895 }, { "epoch": 0.45655975005016197, "grad_norm": 0.9609375, "learning_rate": 6.45905091927048e-05, "loss": 1.0765, "step": 2896 }, { "epoch": 0.45671740189755494, "grad_norm": 0.94921875, "learning_rate": 6.458620376822395e-05, "loss": 1.1401, "step": 2897 }, { "epoch": 0.45687505374494797, "grad_norm": 1.0078125, "learning_rate": 6.45818984188007e-05, "loss": 1.1662, "step": 2898 }, { "epoch": 0.457032705592341, "grad_norm": 0.99609375, "learning_rate": 6.457759314444404e-05, "loss": 1.1503, "step": 2899 }, { "epoch": 0.457190357439734, "grad_norm": 0.9375, "learning_rate": 6.45732879451632e-05, "loss": 0.9518, "step": 2900 }, { "epoch": 0.457348009287127, "grad_norm": 1.0, "learning_rate": 6.456898282096721e-05, "loss": 1.1939, "step": 2901 }, { "epoch": 0.45750566113452, "grad_norm": 0.98828125, "learning_rate": 6.456467777186522e-05, "loss": 0.9387, "step": 2902 }, { "epoch": 0.45766331298191304, "grad_norm": 1.046875, "learning_rate": 6.45603727978664e-05, "loss": 1.1041, "step": 2903 }, { "epoch": 0.45782096482930607, "grad_norm": 0.9140625, "learning_rate": 6.455606789897984e-05, "loss": 1.0833, "step": 2904 }, { "epoch": 0.45797861667669904, "grad_norm": 0.91796875, "learning_rate": 6.455176307521468e-05, "loss": 1.149, "step": 2905 }, { "epoch": 0.45813626852409206, "grad_norm": 0.88671875, "learning_rate": 6.454745832658002e-05, "loss": 0.9623, "step": 2906 }, { "epoch": 0.4582939203714851, "grad_norm": 0.8671875, "learning_rate": 6.454315365308499e-05, "loss": 0.9078, "step": 2907 }, { "epoch": 0.4584515722188781, "grad_norm": 0.9140625, "learning_rate": 6.453884905473873e-05, "loss": 1.0158, "step": 2908 }, { "epoch": 0.4586092240662711, "grad_norm": 0.8828125, "learning_rate": 6.453454453155033e-05, "loss": 0.9926, "step": 2909 }, { "epoch": 0.4587668759136641, "grad_norm": 0.99609375, "learning_rate": 6.453024008352897e-05, "loss": 0.9995, "step": 2910 }, { "epoch": 0.45892452776105713, "grad_norm": 0.953125, "learning_rate": 6.452593571068372e-05, "loss": 1.1068, "step": 2911 }, { "epoch": 0.45908217960845016, "grad_norm": 0.83203125, "learning_rate": 6.452163141302371e-05, "loss": 1.0467, "step": 2912 }, { "epoch": 0.45923983145584313, "grad_norm": 0.87890625, "learning_rate": 6.45173271905581e-05, "loss": 1.0606, "step": 2913 }, { "epoch": 0.45939748330323615, "grad_norm": 0.9609375, "learning_rate": 6.451302304329597e-05, "loss": 1.0208, "step": 2914 }, { "epoch": 0.4595551351506292, "grad_norm": 0.98828125, "learning_rate": 6.450871897124647e-05, "loss": 1.0334, "step": 2915 }, { "epoch": 0.4597127869980222, "grad_norm": 0.9921875, "learning_rate": 6.450441497441872e-05, "loss": 1.0089, "step": 2916 }, { "epoch": 0.4598704388454152, "grad_norm": 1.015625, "learning_rate": 6.450011105282179e-05, "loss": 1.2276, "step": 2917 }, { "epoch": 0.4600280906928082, "grad_norm": 0.8359375, "learning_rate": 6.449580720646488e-05, "loss": 0.868, "step": 2918 }, { "epoch": 0.4601857425402012, "grad_norm": 0.94140625, "learning_rate": 6.449150343535709e-05, "loss": 1.007, "step": 2919 }, { "epoch": 0.46034339438759425, "grad_norm": 0.93359375, "learning_rate": 6.44871997395075e-05, "loss": 0.975, "step": 2920 }, { "epoch": 0.4605010462349872, "grad_norm": 0.890625, "learning_rate": 6.448289611892529e-05, "loss": 0.9442, "step": 2921 }, { "epoch": 0.46065869808238025, "grad_norm": 1.0, "learning_rate": 6.447859257361949e-05, "loss": 1.046, "step": 2922 }, { "epoch": 0.4608163499297733, "grad_norm": 0.9765625, "learning_rate": 6.447428910359933e-05, "loss": 0.9668, "step": 2923 }, { "epoch": 0.4609740017771663, "grad_norm": 0.9921875, "learning_rate": 6.446998570887388e-05, "loss": 0.9756, "step": 2924 }, { "epoch": 0.46113165362455927, "grad_norm": 0.98828125, "learning_rate": 6.446568238945228e-05, "loss": 1.0839, "step": 2925 }, { "epoch": 0.4612893054719523, "grad_norm": 0.91796875, "learning_rate": 6.446137914534363e-05, "loss": 1.183, "step": 2926 }, { "epoch": 0.4614469573193453, "grad_norm": 0.96875, "learning_rate": 6.445707597655703e-05, "loss": 1.183, "step": 2927 }, { "epoch": 0.46160460916673834, "grad_norm": 0.9921875, "learning_rate": 6.445277288310162e-05, "loss": 0.9142, "step": 2928 }, { "epoch": 0.4617622610141313, "grad_norm": 4.625, "learning_rate": 6.444846986498656e-05, "loss": 0.9203, "step": 2929 }, { "epoch": 0.46191991286152434, "grad_norm": 0.89453125, "learning_rate": 6.444416692222093e-05, "loss": 1.0077, "step": 2930 }, { "epoch": 0.46207756470891737, "grad_norm": 0.98828125, "learning_rate": 6.443986405481385e-05, "loss": 1.3544, "step": 2931 }, { "epoch": 0.4622352165563104, "grad_norm": 0.921875, "learning_rate": 6.443556126277445e-05, "loss": 0.9479, "step": 2932 }, { "epoch": 0.46239286840370336, "grad_norm": 1.0, "learning_rate": 6.443125854611183e-05, "loss": 1.1212, "step": 2933 }, { "epoch": 0.4625505202510964, "grad_norm": 0.90625, "learning_rate": 6.442695590483514e-05, "loss": 1.1315, "step": 2934 }, { "epoch": 0.4627081720984894, "grad_norm": 1.2109375, "learning_rate": 6.442265333895348e-05, "loss": 0.83, "step": 2935 }, { "epoch": 0.46286582394588244, "grad_norm": 0.9609375, "learning_rate": 6.441835084847598e-05, "loss": 0.9132, "step": 2936 }, { "epoch": 0.4630234757932754, "grad_norm": 0.84375, "learning_rate": 6.441404843341174e-05, "loss": 0.8552, "step": 2937 }, { "epoch": 0.46318112764066843, "grad_norm": 0.859375, "learning_rate": 6.440974609376987e-05, "loss": 1.0363, "step": 2938 }, { "epoch": 0.46333877948806146, "grad_norm": 0.92578125, "learning_rate": 6.440544382955953e-05, "loss": 1.1962, "step": 2939 }, { "epoch": 0.4634964313354545, "grad_norm": 1.0703125, "learning_rate": 6.440114164078983e-05, "loss": 1.0169, "step": 2940 }, { "epoch": 0.46365408318284745, "grad_norm": 0.97265625, "learning_rate": 6.439683952746987e-05, "loss": 1.16, "step": 2941 }, { "epoch": 0.4638117350302405, "grad_norm": 0.9140625, "learning_rate": 6.439253748960877e-05, "loss": 1.0198, "step": 2942 }, { "epoch": 0.4639693868776335, "grad_norm": 0.93359375, "learning_rate": 6.438823552721562e-05, "loss": 1.0605, "step": 2943 }, { "epoch": 0.46412703872502653, "grad_norm": 0.91796875, "learning_rate": 6.43839336402996e-05, "loss": 1.2189, "step": 2944 }, { "epoch": 0.4642846905724195, "grad_norm": 0.9921875, "learning_rate": 6.43796318288698e-05, "loss": 0.9212, "step": 2945 }, { "epoch": 0.4644423424198125, "grad_norm": 0.83203125, "learning_rate": 6.437533009293535e-05, "loss": 0.8551, "step": 2946 }, { "epoch": 0.46459999426720555, "grad_norm": 0.984375, "learning_rate": 6.437102843250533e-05, "loss": 1.1616, "step": 2947 }, { "epoch": 0.4647576461145986, "grad_norm": 0.87890625, "learning_rate": 6.436672684758884e-05, "loss": 0.9441, "step": 2948 }, { "epoch": 0.46491529796199155, "grad_norm": 0.9609375, "learning_rate": 6.436242533819509e-05, "loss": 1.2233, "step": 2949 }, { "epoch": 0.4650729498093846, "grad_norm": 0.87890625, "learning_rate": 6.435812390433313e-05, "loss": 1.0567, "step": 2950 }, { "epoch": 0.4652306016567776, "grad_norm": 0.8984375, "learning_rate": 6.43538225460121e-05, "loss": 1.1503, "step": 2951 }, { "epoch": 0.4653882535041706, "grad_norm": 0.9140625, "learning_rate": 6.43495212632411e-05, "loss": 1.123, "step": 2952 }, { "epoch": 0.4655459053515636, "grad_norm": 0.84375, "learning_rate": 6.434522005602923e-05, "loss": 0.9633, "step": 2953 }, { "epoch": 0.4657035571989566, "grad_norm": 0.8828125, "learning_rate": 6.434091892438565e-05, "loss": 0.9783, "step": 2954 }, { "epoch": 0.46586120904634964, "grad_norm": 0.90234375, "learning_rate": 6.433661786831946e-05, "loss": 1.0, "step": 2955 }, { "epoch": 0.46601886089374267, "grad_norm": 0.875, "learning_rate": 6.433231688783976e-05, "loss": 0.9738, "step": 2956 }, { "epoch": 0.46617651274113564, "grad_norm": 0.93359375, "learning_rate": 6.432801598295565e-05, "loss": 1.2092, "step": 2957 }, { "epoch": 0.46633416458852867, "grad_norm": 0.9609375, "learning_rate": 6.432371515367628e-05, "loss": 0.9751, "step": 2958 }, { "epoch": 0.4664918164359217, "grad_norm": 0.84375, "learning_rate": 6.431941440001079e-05, "loss": 0.9563, "step": 2959 }, { "epoch": 0.4666494682833147, "grad_norm": 1.03125, "learning_rate": 6.431511372196825e-05, "loss": 1.0504, "step": 2960 }, { "epoch": 0.4668071201307077, "grad_norm": 0.90234375, "learning_rate": 6.43108131195578e-05, "loss": 1.017, "step": 2961 }, { "epoch": 0.4669647719781007, "grad_norm": 0.9609375, "learning_rate": 6.430651259278852e-05, "loss": 1.0393, "step": 2962 }, { "epoch": 0.46712242382549374, "grad_norm": 0.9296875, "learning_rate": 6.430221214166951e-05, "loss": 1.0031, "step": 2963 }, { "epoch": 0.46728007567288676, "grad_norm": 0.9609375, "learning_rate": 6.429791176620996e-05, "loss": 1.0212, "step": 2964 }, { "epoch": 0.46743772752027973, "grad_norm": 0.9609375, "learning_rate": 6.429361146641895e-05, "loss": 1.0614, "step": 2965 }, { "epoch": 0.46759537936767276, "grad_norm": 0.90625, "learning_rate": 6.428931124230559e-05, "loss": 1.0419, "step": 2966 }, { "epoch": 0.4677530312150658, "grad_norm": 0.90625, "learning_rate": 6.4285011093879e-05, "loss": 1.0604, "step": 2967 }, { "epoch": 0.4679106830624588, "grad_norm": 0.94921875, "learning_rate": 6.428071102114824e-05, "loss": 1.2436, "step": 2968 }, { "epoch": 0.4680683349098518, "grad_norm": 0.8984375, "learning_rate": 6.427641102412251e-05, "loss": 1.1099, "step": 2969 }, { "epoch": 0.4682259867572448, "grad_norm": 0.96875, "learning_rate": 6.42721111028109e-05, "loss": 1.0098, "step": 2970 }, { "epoch": 0.46838363860463783, "grad_norm": 0.90625, "learning_rate": 6.42678112572225e-05, "loss": 0.965, "step": 2971 }, { "epoch": 0.46854129045203086, "grad_norm": 1.078125, "learning_rate": 6.426351148736642e-05, "loss": 1.099, "step": 2972 }, { "epoch": 0.4686989422994239, "grad_norm": 1.078125, "learning_rate": 6.425921179325177e-05, "loss": 1.2015, "step": 2973 }, { "epoch": 0.46885659414681685, "grad_norm": 1.046875, "learning_rate": 6.42549121748877e-05, "loss": 1.0897, "step": 2974 }, { "epoch": 0.4690142459942099, "grad_norm": 0.97265625, "learning_rate": 6.425061263228331e-05, "loss": 1.0414, "step": 2975 }, { "epoch": 0.4691718978416029, "grad_norm": 1.03125, "learning_rate": 6.424631316544767e-05, "loss": 1.0062, "step": 2976 }, { "epoch": 0.46932954968899593, "grad_norm": 0.98046875, "learning_rate": 6.424201377438995e-05, "loss": 0.9038, "step": 2977 }, { "epoch": 0.4694872015363889, "grad_norm": 0.890625, "learning_rate": 6.423771445911921e-05, "loss": 1.0877, "step": 2978 }, { "epoch": 0.4696448533837819, "grad_norm": 0.96875, "learning_rate": 6.423341521964463e-05, "loss": 0.9739, "step": 2979 }, { "epoch": 0.46980250523117495, "grad_norm": 1.015625, "learning_rate": 6.422911605597527e-05, "loss": 1.1937, "step": 2980 }, { "epoch": 0.469960157078568, "grad_norm": 0.875, "learning_rate": 6.422481696812026e-05, "loss": 0.9589, "step": 2981 }, { "epoch": 0.47011780892596094, "grad_norm": 0.91796875, "learning_rate": 6.42205179560887e-05, "loss": 1.0437, "step": 2982 }, { "epoch": 0.47027546077335397, "grad_norm": 0.94140625, "learning_rate": 6.421621901988965e-05, "loss": 1.0172, "step": 2983 }, { "epoch": 0.470433112620747, "grad_norm": 0.86328125, "learning_rate": 6.421192015953235e-05, "loss": 0.9801, "step": 2984 }, { "epoch": 0.47059076446814, "grad_norm": 0.90625, "learning_rate": 6.420762137502583e-05, "loss": 0.9537, "step": 2985 }, { "epoch": 0.470748416315533, "grad_norm": 0.8359375, "learning_rate": 6.420332266637919e-05, "loss": 0.8769, "step": 2986 }, { "epoch": 0.470906068162926, "grad_norm": 0.9453125, "learning_rate": 6.419902403360158e-05, "loss": 1.0042, "step": 2987 }, { "epoch": 0.47106372001031904, "grad_norm": 1.125, "learning_rate": 6.419472547670206e-05, "loss": 1.0405, "step": 2988 }, { "epoch": 0.47122137185771207, "grad_norm": 0.9375, "learning_rate": 6.41904269956898e-05, "loss": 0.9437, "step": 2989 }, { "epoch": 0.47137902370510504, "grad_norm": 0.91796875, "learning_rate": 6.41861285905739e-05, "loss": 1.0336, "step": 2990 }, { "epoch": 0.47153667555249806, "grad_norm": 1.0, "learning_rate": 6.418183026136342e-05, "loss": 1.06, "step": 2991 }, { "epoch": 0.4716943273998911, "grad_norm": 0.9296875, "learning_rate": 6.417753200806753e-05, "loss": 1.0348, "step": 2992 }, { "epoch": 0.4718519792472841, "grad_norm": 0.84765625, "learning_rate": 6.41732338306953e-05, "loss": 0.8807, "step": 2993 }, { "epoch": 0.4720096310946771, "grad_norm": 0.99609375, "learning_rate": 6.416893572925583e-05, "loss": 1.0623, "step": 2994 }, { "epoch": 0.4721672829420701, "grad_norm": 0.8828125, "learning_rate": 6.416463770375827e-05, "loss": 1.1014, "step": 2995 }, { "epoch": 0.47232493478946314, "grad_norm": 0.86328125, "learning_rate": 6.416033975421173e-05, "loss": 1.0485, "step": 2996 }, { "epoch": 0.47248258663685616, "grad_norm": 0.97265625, "learning_rate": 6.41560418806253e-05, "loss": 1.0475, "step": 2997 }, { "epoch": 0.47264023848424913, "grad_norm": 0.9765625, "learning_rate": 6.415174408300806e-05, "loss": 1.0459, "step": 2998 }, { "epoch": 0.47279789033164216, "grad_norm": 0.9375, "learning_rate": 6.414744636136918e-05, "loss": 1.3581, "step": 2999 }, { "epoch": 0.4729555421790352, "grad_norm": 0.99609375, "learning_rate": 6.414314871571773e-05, "loss": 1.0912, "step": 3000 }, { "epoch": 0.4729555421790352, "eval_loss": 1.0201112031936646, "eval_runtime": 310.4524, "eval_samples_per_second": 32.211, "eval_steps_per_second": 0.673, "step": 3000 }, { "epoch": 0.4731131940264282, "grad_norm": 0.86328125, "learning_rate": 6.413885114606284e-05, "loss": 0.8915, "step": 3001 }, { "epoch": 0.4732708458738212, "grad_norm": 1.03125, "learning_rate": 6.413455365241358e-05, "loss": 1.2258, "step": 3002 }, { "epoch": 0.4734284977212142, "grad_norm": 0.92578125, "learning_rate": 6.413025623477907e-05, "loss": 0.8646, "step": 3003 }, { "epoch": 0.47358614956860723, "grad_norm": 2.15625, "learning_rate": 6.412595889316845e-05, "loss": 0.9748, "step": 3004 }, { "epoch": 0.47374380141600025, "grad_norm": 0.9765625, "learning_rate": 6.412166162759084e-05, "loss": 1.1281, "step": 3005 }, { "epoch": 0.4739014532633932, "grad_norm": 0.9375, "learning_rate": 6.411736443805529e-05, "loss": 1.0596, "step": 3006 }, { "epoch": 0.47405910511078625, "grad_norm": 0.8671875, "learning_rate": 6.411306732457094e-05, "loss": 0.8141, "step": 3007 }, { "epoch": 0.4742167569581793, "grad_norm": 1.0234375, "learning_rate": 6.410877028714686e-05, "loss": 1.0134, "step": 3008 }, { "epoch": 0.4743744088055723, "grad_norm": 1.0078125, "learning_rate": 6.410447332579222e-05, "loss": 0.951, "step": 3009 }, { "epoch": 0.47453206065296527, "grad_norm": 0.890625, "learning_rate": 6.410017644051611e-05, "loss": 1.0156, "step": 3010 }, { "epoch": 0.4746897125003583, "grad_norm": 0.9140625, "learning_rate": 6.409587963132762e-05, "loss": 0.9838, "step": 3011 }, { "epoch": 0.4748473643477513, "grad_norm": 1.09375, "learning_rate": 6.409158289823584e-05, "loss": 1.0861, "step": 3012 }, { "epoch": 0.47500501619514435, "grad_norm": 0.88671875, "learning_rate": 6.408728624124988e-05, "loss": 0.7852, "step": 3013 }, { "epoch": 0.4751626680425373, "grad_norm": 0.95703125, "learning_rate": 6.408298966037892e-05, "loss": 1.1155, "step": 3014 }, { "epoch": 0.47532031988993034, "grad_norm": 0.9453125, "learning_rate": 6.407869315563198e-05, "loss": 0.9624, "step": 3015 }, { "epoch": 0.47547797173732337, "grad_norm": 0.96484375, "learning_rate": 6.40743967270182e-05, "loss": 0.947, "step": 3016 }, { "epoch": 0.4756356235847164, "grad_norm": 0.98046875, "learning_rate": 6.407010037454669e-05, "loss": 0.9774, "step": 3017 }, { "epoch": 0.47579327543210936, "grad_norm": 0.91015625, "learning_rate": 6.406580409822654e-05, "loss": 0.943, "step": 3018 }, { "epoch": 0.4759509272795024, "grad_norm": 0.875, "learning_rate": 6.406150789806686e-05, "loss": 1.1236, "step": 3019 }, { "epoch": 0.4761085791268954, "grad_norm": 0.9921875, "learning_rate": 6.405721177407678e-05, "loss": 1.0658, "step": 3020 }, { "epoch": 0.47626623097428844, "grad_norm": 1.0234375, "learning_rate": 6.405291572626537e-05, "loss": 1.0106, "step": 3021 }, { "epoch": 0.4764238828216814, "grad_norm": 0.85546875, "learning_rate": 6.404861975464177e-05, "loss": 1.025, "step": 3022 }, { "epoch": 0.47658153466907444, "grad_norm": 0.8359375, "learning_rate": 6.404432385921501e-05, "loss": 0.9433, "step": 3023 }, { "epoch": 0.47673918651646746, "grad_norm": 0.86328125, "learning_rate": 6.40400280399943e-05, "loss": 1.0483, "step": 3024 }, { "epoch": 0.4768968383638605, "grad_norm": 0.85546875, "learning_rate": 6.403573229698868e-05, "loss": 0.8012, "step": 3025 }, { "epoch": 0.47705449021125346, "grad_norm": 1.0, "learning_rate": 6.403143663020727e-05, "loss": 0.891, "step": 3026 }, { "epoch": 0.4772121420586465, "grad_norm": 0.859375, "learning_rate": 6.40271410396592e-05, "loss": 0.8501, "step": 3027 }, { "epoch": 0.4773697939060395, "grad_norm": 1.203125, "learning_rate": 6.402284552535349e-05, "loss": 1.321, "step": 3028 }, { "epoch": 0.47752744575343253, "grad_norm": 0.8515625, "learning_rate": 6.401855008729934e-05, "loss": 0.863, "step": 3029 }, { "epoch": 0.4776850976008255, "grad_norm": 0.984375, "learning_rate": 6.401425472550581e-05, "loss": 0.9992, "step": 3030 }, { "epoch": 0.47784274944821853, "grad_norm": 0.91796875, "learning_rate": 6.400995943998204e-05, "loss": 1.0827, "step": 3031 }, { "epoch": 0.47800040129561155, "grad_norm": 0.91796875, "learning_rate": 6.400566423073709e-05, "loss": 0.9695, "step": 3032 }, { "epoch": 0.4781580531430046, "grad_norm": 0.92578125, "learning_rate": 6.400136909778002e-05, "loss": 1.0187, "step": 3033 }, { "epoch": 0.47831570499039755, "grad_norm": 0.8984375, "learning_rate": 6.399707404112005e-05, "loss": 1.1181, "step": 3034 }, { "epoch": 0.4784733568377906, "grad_norm": 0.8125, "learning_rate": 6.399277906076622e-05, "loss": 0.9862, "step": 3035 }, { "epoch": 0.4786310086851836, "grad_norm": 0.90625, "learning_rate": 6.398848415672762e-05, "loss": 0.938, "step": 3036 }, { "epoch": 0.4787886605325766, "grad_norm": 0.9921875, "learning_rate": 6.39841893290134e-05, "loss": 1.0769, "step": 3037 }, { "epoch": 0.4789463123799696, "grad_norm": 1.0, "learning_rate": 6.397989457763258e-05, "loss": 0.8636, "step": 3038 }, { "epoch": 0.4791039642273626, "grad_norm": 1.515625, "learning_rate": 6.397559990259437e-05, "loss": 1.0157, "step": 3039 }, { "epoch": 0.47926161607475565, "grad_norm": 0.83984375, "learning_rate": 6.397130530390778e-05, "loss": 1.0076, "step": 3040 }, { "epoch": 0.4794192679221487, "grad_norm": 0.89453125, "learning_rate": 6.396701078158197e-05, "loss": 0.9069, "step": 3041 }, { "epoch": 0.47957691976954164, "grad_norm": 1.0546875, "learning_rate": 6.396271633562601e-05, "loss": 1.2792, "step": 3042 }, { "epoch": 0.47973457161693467, "grad_norm": 0.92578125, "learning_rate": 6.395842196604899e-05, "loss": 1.0496, "step": 3043 }, { "epoch": 0.4798922234643277, "grad_norm": 0.8828125, "learning_rate": 6.395412767286007e-05, "loss": 0.8954, "step": 3044 }, { "epoch": 0.4800498753117207, "grad_norm": 0.96875, "learning_rate": 6.394983345606831e-05, "loss": 1.0327, "step": 3045 }, { "epoch": 0.4802075271591137, "grad_norm": 0.890625, "learning_rate": 6.394553931568281e-05, "loss": 0.9867, "step": 3046 }, { "epoch": 0.4803651790065067, "grad_norm": 1.0078125, "learning_rate": 6.394124525171269e-05, "loss": 0.9998, "step": 3047 }, { "epoch": 0.48052283085389974, "grad_norm": 1.015625, "learning_rate": 6.393695126416698e-05, "loss": 1.0541, "step": 3048 }, { "epoch": 0.48068048270129277, "grad_norm": 0.99609375, "learning_rate": 6.393265735305492e-05, "loss": 1.1514, "step": 3049 }, { "epoch": 0.48083813454868574, "grad_norm": 0.98046875, "learning_rate": 6.39283635183855e-05, "loss": 1.24, "step": 3050 }, { "epoch": 0.48099578639607876, "grad_norm": 0.88671875, "learning_rate": 6.392406976016786e-05, "loss": 1.048, "step": 3051 }, { "epoch": 0.4811534382434718, "grad_norm": 0.953125, "learning_rate": 6.391977607841109e-05, "loss": 0.9697, "step": 3052 }, { "epoch": 0.4813110900908648, "grad_norm": 0.84765625, "learning_rate": 6.391548247312425e-05, "loss": 0.8896, "step": 3053 }, { "epoch": 0.4814687419382578, "grad_norm": 0.90625, "learning_rate": 6.391118894431654e-05, "loss": 0.8735, "step": 3054 }, { "epoch": 0.4816263937856508, "grad_norm": 0.94921875, "learning_rate": 6.390689549199698e-05, "loss": 1.181, "step": 3055 }, { "epoch": 0.48178404563304383, "grad_norm": 0.94140625, "learning_rate": 6.390260211617471e-05, "loss": 0.9737, "step": 3056 }, { "epoch": 0.48194169748043686, "grad_norm": 0.85546875, "learning_rate": 6.38983088168588e-05, "loss": 0.9113, "step": 3057 }, { "epoch": 0.48209934932782983, "grad_norm": 1.0234375, "learning_rate": 6.389401559405835e-05, "loss": 1.1394, "step": 3058 }, { "epoch": 0.48225700117522285, "grad_norm": 1.0078125, "learning_rate": 6.388972244778248e-05, "loss": 1.1372, "step": 3059 }, { "epoch": 0.4824146530226159, "grad_norm": 0.89453125, "learning_rate": 6.388542937804027e-05, "loss": 0.8104, "step": 3060 }, { "epoch": 0.4825723048700089, "grad_norm": 0.921875, "learning_rate": 6.388113638484083e-05, "loss": 1.2575, "step": 3061 }, { "epoch": 0.4827299567174019, "grad_norm": 1.0078125, "learning_rate": 6.387684346819328e-05, "loss": 1.1679, "step": 3062 }, { "epoch": 0.4828876085647949, "grad_norm": 1.0234375, "learning_rate": 6.387255062810666e-05, "loss": 1.2936, "step": 3063 }, { "epoch": 0.4830452604121879, "grad_norm": 0.98828125, "learning_rate": 6.386825786459012e-05, "loss": 0.9851, "step": 3064 }, { "epoch": 0.48320291225958095, "grad_norm": 0.91015625, "learning_rate": 6.386396517765275e-05, "loss": 0.9444, "step": 3065 }, { "epoch": 0.4833605641069739, "grad_norm": 0.91796875, "learning_rate": 6.385967256730363e-05, "loss": 0.9007, "step": 3066 }, { "epoch": 0.48351821595436695, "grad_norm": 0.81640625, "learning_rate": 6.385538003355189e-05, "loss": 0.8841, "step": 3067 }, { "epoch": 0.48367586780176, "grad_norm": 0.8828125, "learning_rate": 6.38510875764066e-05, "loss": 0.9777, "step": 3068 }, { "epoch": 0.483833519649153, "grad_norm": 0.890625, "learning_rate": 6.384679519587682e-05, "loss": 0.8578, "step": 3069 }, { "epoch": 0.48399117149654597, "grad_norm": 0.96484375, "learning_rate": 6.384250289197172e-05, "loss": 1.1878, "step": 3070 }, { "epoch": 0.484148823343939, "grad_norm": 1.046875, "learning_rate": 6.383821066470039e-05, "loss": 1.186, "step": 3071 }, { "epoch": 0.484306475191332, "grad_norm": 0.96484375, "learning_rate": 6.383391851407189e-05, "loss": 0.8684, "step": 3072 }, { "epoch": 0.48446412703872505, "grad_norm": 1.0, "learning_rate": 6.382962644009534e-05, "loss": 1.0652, "step": 3073 }, { "epoch": 0.484621778886118, "grad_norm": 0.99609375, "learning_rate": 6.382533444277978e-05, "loss": 1.184, "step": 3074 }, { "epoch": 0.48477943073351104, "grad_norm": 0.8828125, "learning_rate": 6.38210425221344e-05, "loss": 0.8562, "step": 3075 }, { "epoch": 0.48493708258090407, "grad_norm": 1.015625, "learning_rate": 6.381675067816825e-05, "loss": 1.1279, "step": 3076 }, { "epoch": 0.4850947344282971, "grad_norm": 0.8359375, "learning_rate": 6.381245891089045e-05, "loss": 0.9765, "step": 3077 }, { "epoch": 0.48525238627569006, "grad_norm": 0.80078125, "learning_rate": 6.380816722031005e-05, "loss": 0.8693, "step": 3078 }, { "epoch": 0.4854100381230831, "grad_norm": 1.0390625, "learning_rate": 6.380387560643616e-05, "loss": 0.9756, "step": 3079 }, { "epoch": 0.4855676899704761, "grad_norm": 0.90625, "learning_rate": 6.379958406927789e-05, "loss": 1.0338, "step": 3080 }, { "epoch": 0.48572534181786914, "grad_norm": 0.828125, "learning_rate": 6.379529260884434e-05, "loss": 0.8861, "step": 3081 }, { "epoch": 0.4858829936652621, "grad_norm": 0.8828125, "learning_rate": 6.37910012251446e-05, "loss": 0.9717, "step": 3082 }, { "epoch": 0.48604064551265513, "grad_norm": 0.87109375, "learning_rate": 6.378670991818778e-05, "loss": 0.8443, "step": 3083 }, { "epoch": 0.48619829736004816, "grad_norm": 0.88671875, "learning_rate": 6.378241868798293e-05, "loss": 0.9172, "step": 3084 }, { "epoch": 0.4863559492074412, "grad_norm": 0.9140625, "learning_rate": 6.377812753453919e-05, "loss": 0.9223, "step": 3085 }, { "epoch": 0.48651360105483415, "grad_norm": 0.8828125, "learning_rate": 6.377383645786563e-05, "loss": 0.9662, "step": 3086 }, { "epoch": 0.4866712529022272, "grad_norm": 0.87890625, "learning_rate": 6.376954545797138e-05, "loss": 1.0614, "step": 3087 }, { "epoch": 0.4868289047496202, "grad_norm": 1.0390625, "learning_rate": 6.376525453486549e-05, "loss": 1.2085, "step": 3088 }, { "epoch": 0.48698655659701323, "grad_norm": 0.91796875, "learning_rate": 6.376096368855701e-05, "loss": 0.993, "step": 3089 }, { "epoch": 0.4871442084444062, "grad_norm": 0.9609375, "learning_rate": 6.375667291905519e-05, "loss": 0.9354, "step": 3090 }, { "epoch": 0.4873018602917992, "grad_norm": 1.046875, "learning_rate": 6.375238222636899e-05, "loss": 1.0115, "step": 3091 }, { "epoch": 0.48745951213919225, "grad_norm": 0.8828125, "learning_rate": 6.374809161050754e-05, "loss": 0.799, "step": 3092 }, { "epoch": 0.4876171639865853, "grad_norm": 0.99609375, "learning_rate": 6.374380107147996e-05, "loss": 0.9673, "step": 3093 }, { "epoch": 0.48777481583397825, "grad_norm": 0.92578125, "learning_rate": 6.373951060929526e-05, "loss": 1.1461, "step": 3094 }, { "epoch": 0.4879324676813713, "grad_norm": 1.078125, "learning_rate": 6.373522022396265e-05, "loss": 1.2105, "step": 3095 }, { "epoch": 0.4880901195287643, "grad_norm": 1.0234375, "learning_rate": 6.373092991549117e-05, "loss": 1.0989, "step": 3096 }, { "epoch": 0.4882477713761573, "grad_norm": 0.9375, "learning_rate": 6.37266396838899e-05, "loss": 0.9336, "step": 3097 }, { "epoch": 0.4884054232235503, "grad_norm": 0.94140625, "learning_rate": 6.372234952916796e-05, "loss": 1.1615, "step": 3098 }, { "epoch": 0.4885630750709433, "grad_norm": 0.890625, "learning_rate": 6.371805945133437e-05, "loss": 0.9691, "step": 3099 }, { "epoch": 0.48872072691833635, "grad_norm": 1.0234375, "learning_rate": 6.371376945039833e-05, "loss": 1.2314, "step": 3100 }, { "epoch": 0.48887837876572937, "grad_norm": 0.91796875, "learning_rate": 6.370947952636887e-05, "loss": 1.0524, "step": 3101 }, { "epoch": 0.48903603061312234, "grad_norm": 0.9375, "learning_rate": 6.37051896792551e-05, "loss": 0.9398, "step": 3102 }, { "epoch": 0.48919368246051537, "grad_norm": 1.0078125, "learning_rate": 6.370089990906612e-05, "loss": 1.0843, "step": 3103 }, { "epoch": 0.4893513343079084, "grad_norm": 0.8828125, "learning_rate": 6.369661021581097e-05, "loss": 0.9807, "step": 3104 }, { "epoch": 0.4895089861553014, "grad_norm": 0.8515625, "learning_rate": 6.369232059949881e-05, "loss": 0.9204, "step": 3105 }, { "epoch": 0.4896666380026944, "grad_norm": 0.890625, "learning_rate": 6.368803106013869e-05, "loss": 0.9076, "step": 3106 }, { "epoch": 0.4898242898500874, "grad_norm": 0.81640625, "learning_rate": 6.368374159773971e-05, "loss": 1.0334, "step": 3107 }, { "epoch": 0.48998194169748044, "grad_norm": 0.8515625, "learning_rate": 6.367945221231097e-05, "loss": 0.9416, "step": 3108 }, { "epoch": 0.49013959354487346, "grad_norm": 0.91015625, "learning_rate": 6.367516290386153e-05, "loss": 1.0069, "step": 3109 }, { "epoch": 0.49029724539226643, "grad_norm": 0.97265625, "learning_rate": 6.367087367240053e-05, "loss": 1.1338, "step": 3110 }, { "epoch": 0.49045489723965946, "grad_norm": 0.984375, "learning_rate": 6.366658451793703e-05, "loss": 1.1915, "step": 3111 }, { "epoch": 0.4906125490870525, "grad_norm": 0.94140625, "learning_rate": 6.366229544048015e-05, "loss": 0.9578, "step": 3112 }, { "epoch": 0.4907702009344455, "grad_norm": 0.88671875, "learning_rate": 6.365800644003894e-05, "loss": 1.0418, "step": 3113 }, { "epoch": 0.4909278527818385, "grad_norm": 0.859375, "learning_rate": 6.365371751662247e-05, "loss": 1.0268, "step": 3114 }, { "epoch": 0.4910855046292315, "grad_norm": 0.8359375, "learning_rate": 6.364942867023992e-05, "loss": 0.856, "step": 3115 }, { "epoch": 0.49124315647662453, "grad_norm": 0.95703125, "learning_rate": 6.364513990090032e-05, "loss": 1.1017, "step": 3116 }, { "epoch": 0.49140080832401756, "grad_norm": 0.88671875, "learning_rate": 6.364085120861276e-05, "loss": 1.0334, "step": 3117 }, { "epoch": 0.4915584601714105, "grad_norm": 0.875, "learning_rate": 6.363656259338635e-05, "loss": 1.0768, "step": 3118 }, { "epoch": 0.49171611201880355, "grad_norm": 0.9609375, "learning_rate": 6.363227405523013e-05, "loss": 0.941, "step": 3119 }, { "epoch": 0.4918737638661966, "grad_norm": 0.9453125, "learning_rate": 6.362798559415327e-05, "loss": 0.9969, "step": 3120 }, { "epoch": 0.4920314157135896, "grad_norm": 0.95703125, "learning_rate": 6.362369721016479e-05, "loss": 1.0028, "step": 3121 }, { "epoch": 0.4921890675609826, "grad_norm": 1.015625, "learning_rate": 6.361940890327382e-05, "loss": 1.1523, "step": 3122 }, { "epoch": 0.4923467194083756, "grad_norm": 0.9140625, "learning_rate": 6.361512067348944e-05, "loss": 1.0733, "step": 3123 }, { "epoch": 0.4925043712557686, "grad_norm": 0.87109375, "learning_rate": 6.36108325208207e-05, "loss": 0.997, "step": 3124 }, { "epoch": 0.49266202310316165, "grad_norm": 0.859375, "learning_rate": 6.360654444527675e-05, "loss": 1.0732, "step": 3125 }, { "epoch": 0.4928196749505546, "grad_norm": 0.97265625, "learning_rate": 6.360225644686664e-05, "loss": 1.1142, "step": 3126 }, { "epoch": 0.49297732679794765, "grad_norm": 0.9296875, "learning_rate": 6.359796852559948e-05, "loss": 1.1833, "step": 3127 }, { "epoch": 0.49313497864534067, "grad_norm": 0.9140625, "learning_rate": 6.359368068148431e-05, "loss": 1.0652, "step": 3128 }, { "epoch": 0.4932926304927337, "grad_norm": 0.94140625, "learning_rate": 6.358939291453026e-05, "loss": 1.2144, "step": 3129 }, { "epoch": 0.4934502823401267, "grad_norm": 0.984375, "learning_rate": 6.358510522474643e-05, "loss": 0.9077, "step": 3130 }, { "epoch": 0.4936079341875197, "grad_norm": 0.87890625, "learning_rate": 6.358081761214189e-05, "loss": 0.8758, "step": 3131 }, { "epoch": 0.4937655860349127, "grad_norm": 0.89453125, "learning_rate": 6.357653007672572e-05, "loss": 0.8917, "step": 3132 }, { "epoch": 0.49392323788230574, "grad_norm": 0.8671875, "learning_rate": 6.357224261850701e-05, "loss": 0.9171, "step": 3133 }, { "epoch": 0.49408088972969877, "grad_norm": 0.90625, "learning_rate": 6.356795523749483e-05, "loss": 0.945, "step": 3134 }, { "epoch": 0.49423854157709174, "grad_norm": 0.94140625, "learning_rate": 6.35636679336983e-05, "loss": 1.0642, "step": 3135 }, { "epoch": 0.49439619342448476, "grad_norm": 1.0390625, "learning_rate": 6.355938070712651e-05, "loss": 1.1653, "step": 3136 }, { "epoch": 0.4945538452718778, "grad_norm": 0.83203125, "learning_rate": 6.355509355778852e-05, "loss": 0.867, "step": 3137 }, { "epoch": 0.4947114971192708, "grad_norm": 0.90234375, "learning_rate": 6.355080648569345e-05, "loss": 0.9392, "step": 3138 }, { "epoch": 0.4948691489666638, "grad_norm": 0.84765625, "learning_rate": 6.354651949085028e-05, "loss": 1.0152, "step": 3139 }, { "epoch": 0.4950268008140568, "grad_norm": 0.99609375, "learning_rate": 6.354223257326826e-05, "loss": 1.2065, "step": 3140 }, { "epoch": 0.49518445266144984, "grad_norm": 0.94921875, "learning_rate": 6.353794573295637e-05, "loss": 1.0764, "step": 3141 }, { "epoch": 0.49534210450884286, "grad_norm": 1.015625, "learning_rate": 6.353365896992374e-05, "loss": 1.126, "step": 3142 }, { "epoch": 0.49549975635623583, "grad_norm": 0.9765625, "learning_rate": 6.352937228417942e-05, "loss": 0.957, "step": 3143 }, { "epoch": 0.49565740820362886, "grad_norm": 0.984375, "learning_rate": 6.352508567573247e-05, "loss": 1.0135, "step": 3144 }, { "epoch": 0.4958150600510219, "grad_norm": 0.921875, "learning_rate": 6.352079914459208e-05, "loss": 1.0165, "step": 3145 }, { "epoch": 0.4959727118984149, "grad_norm": 1.0078125, "learning_rate": 6.351651269076723e-05, "loss": 1.1789, "step": 3146 }, { "epoch": 0.4961303637458079, "grad_norm": 0.84765625, "learning_rate": 6.351222631426705e-05, "loss": 1.0894, "step": 3147 }, { "epoch": 0.4962880155932009, "grad_norm": 0.9765625, "learning_rate": 6.350794001510066e-05, "loss": 1.1899, "step": 3148 }, { "epoch": 0.49644566744059393, "grad_norm": 0.8828125, "learning_rate": 6.350365379327705e-05, "loss": 1.0332, "step": 3149 }, { "epoch": 0.49660331928798696, "grad_norm": 0.91796875, "learning_rate": 6.34993676488054e-05, "loss": 1.0062, "step": 3150 }, { "epoch": 0.4967609711353799, "grad_norm": 0.9453125, "learning_rate": 6.349508158169474e-05, "loss": 0.9593, "step": 3151 }, { "epoch": 0.49691862298277295, "grad_norm": 1.0625, "learning_rate": 6.349079559195416e-05, "loss": 1.2109, "step": 3152 }, { "epoch": 0.497076274830166, "grad_norm": 0.875, "learning_rate": 6.348650967959278e-05, "loss": 1.1193, "step": 3153 }, { "epoch": 0.497233926677559, "grad_norm": 1.03125, "learning_rate": 6.34822238446196e-05, "loss": 1.2981, "step": 3154 }, { "epoch": 0.49739157852495197, "grad_norm": 0.9140625, "learning_rate": 6.347793808704381e-05, "loss": 1.0825, "step": 3155 }, { "epoch": 0.497549230372345, "grad_norm": 0.93359375, "learning_rate": 6.347365240687443e-05, "loss": 0.8452, "step": 3156 }, { "epoch": 0.497706882219738, "grad_norm": 0.91015625, "learning_rate": 6.346936680412055e-05, "loss": 1.1153, "step": 3157 }, { "epoch": 0.49786453406713105, "grad_norm": 0.9921875, "learning_rate": 6.346508127879128e-05, "loss": 1.0513, "step": 3158 }, { "epoch": 0.498022185914524, "grad_norm": 0.94140625, "learning_rate": 6.346079583089563e-05, "loss": 1.2105, "step": 3159 }, { "epoch": 0.49817983776191704, "grad_norm": 0.97265625, "learning_rate": 6.345651046044277e-05, "loss": 0.9945, "step": 3160 }, { "epoch": 0.49833748960931007, "grad_norm": 0.9921875, "learning_rate": 6.345222516744175e-05, "loss": 1.0579, "step": 3161 }, { "epoch": 0.4984951414567031, "grad_norm": 0.9296875, "learning_rate": 6.344793995190166e-05, "loss": 0.9442, "step": 3162 }, { "epoch": 0.49865279330409606, "grad_norm": 0.9765625, "learning_rate": 6.344365481383157e-05, "loss": 1.0004, "step": 3163 }, { "epoch": 0.4988104451514891, "grad_norm": 0.94921875, "learning_rate": 6.343936975324054e-05, "loss": 1.0813, "step": 3164 }, { "epoch": 0.4989680969988821, "grad_norm": 0.91015625, "learning_rate": 6.343508477013766e-05, "loss": 0.9525, "step": 3165 }, { "epoch": 0.49912574884627514, "grad_norm": 1.046875, "learning_rate": 6.343079986453207e-05, "loss": 1.2379, "step": 3166 }, { "epoch": 0.4992834006936681, "grad_norm": 0.82421875, "learning_rate": 6.34265150364328e-05, "loss": 0.8228, "step": 3167 }, { "epoch": 0.49944105254106114, "grad_norm": 1.03125, "learning_rate": 6.342223028584895e-05, "loss": 1.0939, "step": 3168 }, { "epoch": 0.49959870438845416, "grad_norm": 0.90625, "learning_rate": 6.341794561278956e-05, "loss": 0.9936, "step": 3169 }, { "epoch": 0.4997563562358472, "grad_norm": 0.9609375, "learning_rate": 6.341366101726376e-05, "loss": 0.9175, "step": 3170 }, { "epoch": 0.49991400808324016, "grad_norm": 1.0078125, "learning_rate": 6.340937649928063e-05, "loss": 1.1426, "step": 3171 }, { "epoch": 0.5000716599306332, "grad_norm": 0.84765625, "learning_rate": 6.340509205884924e-05, "loss": 0.8701, "step": 3172 }, { "epoch": 0.5002293117780262, "grad_norm": 0.8984375, "learning_rate": 6.340080769597864e-05, "loss": 0.9066, "step": 3173 }, { "epoch": 0.5003869636254192, "grad_norm": 0.98828125, "learning_rate": 6.339652341067792e-05, "loss": 1.1343, "step": 3174 }, { "epoch": 0.5005446154728123, "grad_norm": 0.91015625, "learning_rate": 6.339223920295621e-05, "loss": 0.9691, "step": 3175 }, { "epoch": 0.5007022673202053, "grad_norm": 1.1640625, "learning_rate": 6.338795507282255e-05, "loss": 1.0484, "step": 3176 }, { "epoch": 0.5008599191675982, "grad_norm": 0.89453125, "learning_rate": 6.338367102028603e-05, "loss": 1.1239, "step": 3177 }, { "epoch": 0.5010175710149912, "grad_norm": 1.046875, "learning_rate": 6.337938704535573e-05, "loss": 1.151, "step": 3178 }, { "epoch": 0.5011752228623843, "grad_norm": 0.98828125, "learning_rate": 6.337510314804067e-05, "loss": 1.1216, "step": 3179 }, { "epoch": 0.5013328747097773, "grad_norm": 1.0234375, "learning_rate": 6.337081932835005e-05, "loss": 0.803, "step": 3180 }, { "epoch": 0.5014905265571703, "grad_norm": 0.96875, "learning_rate": 6.336653558629286e-05, "loss": 0.8989, "step": 3181 }, { "epoch": 0.5016481784045633, "grad_norm": 0.90234375, "learning_rate": 6.336225192187824e-05, "loss": 1.0633, "step": 3182 }, { "epoch": 0.5018058302519564, "grad_norm": 1.0703125, "learning_rate": 6.33579683351152e-05, "loss": 1.2872, "step": 3183 }, { "epoch": 0.5019634820993494, "grad_norm": 0.80859375, "learning_rate": 6.335368482601282e-05, "loss": 1.002, "step": 3184 }, { "epoch": 0.5021211339467423, "grad_norm": 0.82421875, "learning_rate": 6.334940139458026e-05, "loss": 1.0318, "step": 3185 }, { "epoch": 0.5022787857941353, "grad_norm": 0.8984375, "learning_rate": 6.334511804082653e-05, "loss": 1.037, "step": 3186 }, { "epoch": 0.5024364376415283, "grad_norm": 0.94921875, "learning_rate": 6.334083476476073e-05, "loss": 1.2291, "step": 3187 }, { "epoch": 0.5025940894889214, "grad_norm": 0.94921875, "learning_rate": 6.333655156639196e-05, "loss": 1.0167, "step": 3188 }, { "epoch": 0.5027517413363144, "grad_norm": 0.88671875, "learning_rate": 6.333226844572924e-05, "loss": 1.0555, "step": 3189 }, { "epoch": 0.5029093931837074, "grad_norm": 0.953125, "learning_rate": 6.332798540278168e-05, "loss": 1.0258, "step": 3190 }, { "epoch": 0.5030670450311004, "grad_norm": 1.0078125, "learning_rate": 6.332370243755839e-05, "loss": 1.055, "step": 3191 }, { "epoch": 0.5032246968784935, "grad_norm": 0.96875, "learning_rate": 6.331941955006839e-05, "loss": 0.9814, "step": 3192 }, { "epoch": 0.5033823487258864, "grad_norm": 1.015625, "learning_rate": 6.331513674032081e-05, "loss": 1.1327, "step": 3193 }, { "epoch": 0.5035400005732794, "grad_norm": 0.8671875, "learning_rate": 6.331085400832464e-05, "loss": 1.0885, "step": 3194 }, { "epoch": 0.5036976524206724, "grad_norm": 0.9921875, "learning_rate": 6.330657135408906e-05, "loss": 1.0215, "step": 3195 }, { "epoch": 0.5038553042680655, "grad_norm": 0.82421875, "learning_rate": 6.330228877762312e-05, "loss": 1.0863, "step": 3196 }, { "epoch": 0.5040129561154585, "grad_norm": 0.93359375, "learning_rate": 6.329800627893587e-05, "loss": 1.0148, "step": 3197 }, { "epoch": 0.5041706079628515, "grad_norm": 0.85546875, "learning_rate": 6.32937238580364e-05, "loss": 0.9876, "step": 3198 }, { "epoch": 0.5043282598102445, "grad_norm": 0.98828125, "learning_rate": 6.328944151493374e-05, "loss": 1.3381, "step": 3199 }, { "epoch": 0.5044859116576376, "grad_norm": 0.8125, "learning_rate": 6.328515924963707e-05, "loss": 0.7538, "step": 3200 }, { "epoch": 0.5046435635050305, "grad_norm": 0.9375, "learning_rate": 6.328087706215537e-05, "loss": 1.1717, "step": 3201 }, { "epoch": 0.5048012153524235, "grad_norm": 0.90234375, "learning_rate": 6.327659495249779e-05, "loss": 1.1193, "step": 3202 }, { "epoch": 0.5049588671998165, "grad_norm": 0.91015625, "learning_rate": 6.327231292067335e-05, "loss": 1.1128, "step": 3203 }, { "epoch": 0.5051165190472096, "grad_norm": 1.0234375, "learning_rate": 6.326803096669109e-05, "loss": 0.9594, "step": 3204 }, { "epoch": 0.5052741708946026, "grad_norm": 0.9140625, "learning_rate": 6.32637490905602e-05, "loss": 0.9936, "step": 3205 }, { "epoch": 0.5054318227419956, "grad_norm": 0.953125, "learning_rate": 6.325946729228969e-05, "loss": 1.0825, "step": 3206 }, { "epoch": 0.5055894745893886, "grad_norm": 0.859375, "learning_rate": 6.325518557188863e-05, "loss": 1.0765, "step": 3207 }, { "epoch": 0.5057471264367817, "grad_norm": 0.8359375, "learning_rate": 6.325090392936612e-05, "loss": 0.807, "step": 3208 }, { "epoch": 0.5059047782841746, "grad_norm": 0.91015625, "learning_rate": 6.32466223647312e-05, "loss": 1.1411, "step": 3209 }, { "epoch": 0.5060624301315676, "grad_norm": 1.046875, "learning_rate": 6.324234087799296e-05, "loss": 1.1344, "step": 3210 }, { "epoch": 0.5062200819789606, "grad_norm": 0.90625, "learning_rate": 6.323805946916048e-05, "loss": 0.781, "step": 3211 }, { "epoch": 0.5063777338263536, "grad_norm": 1.0234375, "learning_rate": 6.323377813824286e-05, "loss": 1.1569, "step": 3212 }, { "epoch": 0.5065353856737467, "grad_norm": 0.8984375, "learning_rate": 6.322949688524909e-05, "loss": 1.0285, "step": 3213 }, { "epoch": 0.5066930375211397, "grad_norm": 0.859375, "learning_rate": 6.322521571018835e-05, "loss": 0.935, "step": 3214 }, { "epoch": 0.5068506893685327, "grad_norm": 0.90625, "learning_rate": 6.322093461306963e-05, "loss": 0.8615, "step": 3215 }, { "epoch": 0.5070083412159258, "grad_norm": 0.921875, "learning_rate": 6.321665359390205e-05, "loss": 1.0648, "step": 3216 }, { "epoch": 0.5071659930633187, "grad_norm": 0.9453125, "learning_rate": 6.321237265269469e-05, "loss": 1.0917, "step": 3217 }, { "epoch": 0.5073236449107117, "grad_norm": 0.87109375, "learning_rate": 6.320809178945658e-05, "loss": 0.988, "step": 3218 }, { "epoch": 0.5074812967581047, "grad_norm": 0.9765625, "learning_rate": 6.320381100419683e-05, "loss": 0.9283, "step": 3219 }, { "epoch": 0.5076389486054977, "grad_norm": 0.9296875, "learning_rate": 6.319953029692446e-05, "loss": 1.157, "step": 3220 }, { "epoch": 0.5077966004528908, "grad_norm": 0.85546875, "learning_rate": 6.319524966764861e-05, "loss": 0.9046, "step": 3221 }, { "epoch": 0.5079542523002838, "grad_norm": 0.9765625, "learning_rate": 6.319096911637834e-05, "loss": 1.0192, "step": 3222 }, { "epoch": 0.5081119041476768, "grad_norm": 0.98046875, "learning_rate": 6.31866886431227e-05, "loss": 0.9162, "step": 3223 }, { "epoch": 0.5082695559950698, "grad_norm": 1.0078125, "learning_rate": 6.318240824789077e-05, "loss": 1.1665, "step": 3224 }, { "epoch": 0.5084272078424628, "grad_norm": 0.90234375, "learning_rate": 6.317812793069158e-05, "loss": 0.9449, "step": 3225 }, { "epoch": 0.5085848596898558, "grad_norm": 0.98046875, "learning_rate": 6.317384769153429e-05, "loss": 1.052, "step": 3226 }, { "epoch": 0.5087425115372488, "grad_norm": 0.9140625, "learning_rate": 6.31695675304279e-05, "loss": 0.8936, "step": 3227 }, { "epoch": 0.5089001633846418, "grad_norm": 0.99609375, "learning_rate": 6.316528744738155e-05, "loss": 0.9335, "step": 3228 }, { "epoch": 0.5090578152320349, "grad_norm": 0.98828125, "learning_rate": 6.316100744240422e-05, "loss": 0.8978, "step": 3229 }, { "epoch": 0.5092154670794279, "grad_norm": 0.94140625, "learning_rate": 6.315672751550505e-05, "loss": 1.0539, "step": 3230 }, { "epoch": 0.5093731189268209, "grad_norm": 0.96875, "learning_rate": 6.31524476666931e-05, "loss": 0.9447, "step": 3231 }, { "epoch": 0.5095307707742139, "grad_norm": 0.89453125, "learning_rate": 6.31481678959774e-05, "loss": 1.0343, "step": 3232 }, { "epoch": 0.5096884226216069, "grad_norm": 0.97265625, "learning_rate": 6.314388820336707e-05, "loss": 1.096, "step": 3233 }, { "epoch": 0.5098460744689999, "grad_norm": 0.8828125, "learning_rate": 6.313960858887119e-05, "loss": 0.955, "step": 3234 }, { "epoch": 0.5100037263163929, "grad_norm": 0.90625, "learning_rate": 6.313532905249875e-05, "loss": 0.9424, "step": 3235 }, { "epoch": 0.5101613781637859, "grad_norm": 0.953125, "learning_rate": 6.31310495942589e-05, "loss": 1.1538, "step": 3236 }, { "epoch": 0.510319030011179, "grad_norm": 0.87890625, "learning_rate": 6.312677021416068e-05, "loss": 0.9694, "step": 3237 }, { "epoch": 0.510476681858572, "grad_norm": 1.0078125, "learning_rate": 6.312249091221318e-05, "loss": 1.0882, "step": 3238 }, { "epoch": 0.510634333705965, "grad_norm": 0.9140625, "learning_rate": 6.311821168842544e-05, "loss": 0.9332, "step": 3239 }, { "epoch": 0.510791985553358, "grad_norm": 0.85546875, "learning_rate": 6.311393254280652e-05, "loss": 1.1887, "step": 3240 }, { "epoch": 0.510949637400751, "grad_norm": 1.03125, "learning_rate": 6.310965347536553e-05, "loss": 1.0398, "step": 3241 }, { "epoch": 0.511107289248144, "grad_norm": 0.8125, "learning_rate": 6.310537448611153e-05, "loss": 0.8946, "step": 3242 }, { "epoch": 0.511264941095537, "grad_norm": 0.953125, "learning_rate": 6.310109557505357e-05, "loss": 1.0631, "step": 3243 }, { "epoch": 0.51142259294293, "grad_norm": 0.8359375, "learning_rate": 6.309681674220073e-05, "loss": 0.9902, "step": 3244 }, { "epoch": 0.511580244790323, "grad_norm": 0.921875, "learning_rate": 6.309253798756205e-05, "loss": 1.0334, "step": 3245 }, { "epoch": 0.5117378966377161, "grad_norm": 1.0390625, "learning_rate": 6.308825931114666e-05, "loss": 1.1892, "step": 3246 }, { "epoch": 0.5118955484851091, "grad_norm": 1.015625, "learning_rate": 6.30839807129636e-05, "loss": 0.9506, "step": 3247 }, { "epoch": 0.5120532003325021, "grad_norm": 1.0390625, "learning_rate": 6.307970219302193e-05, "loss": 1.118, "step": 3248 }, { "epoch": 0.512210852179895, "grad_norm": 1.1171875, "learning_rate": 6.307542375133071e-05, "loss": 1.2192, "step": 3249 }, { "epoch": 0.5123685040272881, "grad_norm": 1.0078125, "learning_rate": 6.3071145387899e-05, "loss": 1.0798, "step": 3250 }, { "epoch": 0.5125261558746811, "grad_norm": 1.0390625, "learning_rate": 6.306686710273591e-05, "loss": 1.021, "step": 3251 }, { "epoch": 0.5126838077220741, "grad_norm": 0.8984375, "learning_rate": 6.306258889585049e-05, "loss": 0.9509, "step": 3252 }, { "epoch": 0.5128414595694671, "grad_norm": 0.8984375, "learning_rate": 6.30583107672518e-05, "loss": 0.8355, "step": 3253 }, { "epoch": 0.5129991114168602, "grad_norm": 1.078125, "learning_rate": 6.30540327169489e-05, "loss": 0.9464, "step": 3254 }, { "epoch": 0.5131567632642532, "grad_norm": 0.93359375, "learning_rate": 6.304975474495085e-05, "loss": 0.9805, "step": 3255 }, { "epoch": 0.5133144151116462, "grad_norm": 1.0078125, "learning_rate": 6.304547685126676e-05, "loss": 0.9806, "step": 3256 }, { "epoch": 0.5134720669590391, "grad_norm": 0.82421875, "learning_rate": 6.304119903590567e-05, "loss": 0.9074, "step": 3257 }, { "epoch": 0.5136297188064322, "grad_norm": 0.890625, "learning_rate": 6.303692129887665e-05, "loss": 1.1021, "step": 3258 }, { "epoch": 0.5137873706538252, "grad_norm": 1.0234375, "learning_rate": 6.303264364018874e-05, "loss": 1.0947, "step": 3259 }, { "epoch": 0.5139450225012182, "grad_norm": 0.96484375, "learning_rate": 6.302836605985102e-05, "loss": 1.0885, "step": 3260 }, { "epoch": 0.5141026743486112, "grad_norm": 0.88671875, "learning_rate": 6.302408855787258e-05, "loss": 1.0311, "step": 3261 }, { "epoch": 0.5142603261960043, "grad_norm": 0.9375, "learning_rate": 6.30198111342625e-05, "loss": 1.0976, "step": 3262 }, { "epoch": 0.5144179780433973, "grad_norm": 1.0234375, "learning_rate": 6.301553378902979e-05, "loss": 1.0993, "step": 3263 }, { "epoch": 0.5145756298907903, "grad_norm": 0.96484375, "learning_rate": 6.301125652218355e-05, "loss": 0.9576, "step": 3264 }, { "epoch": 0.5147332817381832, "grad_norm": 0.95703125, "learning_rate": 6.30069793337328e-05, "loss": 1.0853, "step": 3265 }, { "epoch": 0.5148909335855762, "grad_norm": 0.92578125, "learning_rate": 6.300270222368667e-05, "loss": 1.0693, "step": 3266 }, { "epoch": 0.5150485854329693, "grad_norm": 0.9296875, "learning_rate": 6.299842519205421e-05, "loss": 0.938, "step": 3267 }, { "epoch": 0.5152062372803623, "grad_norm": 0.8359375, "learning_rate": 6.299414823884447e-05, "loss": 1.0012, "step": 3268 }, { "epoch": 0.5153638891277553, "grad_norm": 1.015625, "learning_rate": 6.298987136406653e-05, "loss": 1.1214, "step": 3269 }, { "epoch": 0.5155215409751484, "grad_norm": 0.984375, "learning_rate": 6.29855945677294e-05, "loss": 1.1659, "step": 3270 }, { "epoch": 0.5156791928225414, "grad_norm": 0.9375, "learning_rate": 6.29813178498422e-05, "loss": 0.8866, "step": 3271 }, { "epoch": 0.5158368446699344, "grad_norm": 0.984375, "learning_rate": 6.2977041210414e-05, "loss": 1.1123, "step": 3272 }, { "epoch": 0.5159944965173273, "grad_norm": 0.8984375, "learning_rate": 6.297276464945386e-05, "loss": 1.0722, "step": 3273 }, { "epoch": 0.5161521483647203, "grad_norm": 0.9375, "learning_rate": 6.296848816697079e-05, "loss": 0.9293, "step": 3274 }, { "epoch": 0.5163098002121134, "grad_norm": 0.9140625, "learning_rate": 6.29642117629739e-05, "loss": 0.9768, "step": 3275 }, { "epoch": 0.5164674520595064, "grad_norm": 1.0703125, "learning_rate": 6.295993543747228e-05, "loss": 1.0379, "step": 3276 }, { "epoch": 0.5166251039068994, "grad_norm": 0.90234375, "learning_rate": 6.295565919047492e-05, "loss": 1.0485, "step": 3277 }, { "epoch": 0.5167827557542924, "grad_norm": 0.890625, "learning_rate": 6.295138302199096e-05, "loss": 0.9777, "step": 3278 }, { "epoch": 0.5169404076016855, "grad_norm": 2.953125, "learning_rate": 6.294710693202941e-05, "loss": 0.819, "step": 3279 }, { "epoch": 0.5170980594490785, "grad_norm": 1.0625, "learning_rate": 6.294283092059929e-05, "loss": 1.0518, "step": 3280 }, { "epoch": 0.5172557112964714, "grad_norm": 0.9375, "learning_rate": 6.29385549877098e-05, "loss": 1.0184, "step": 3281 }, { "epoch": 0.5174133631438644, "grad_norm": 0.95703125, "learning_rate": 6.29342791333699e-05, "loss": 0.87, "step": 3282 }, { "epoch": 0.5175710149912575, "grad_norm": 0.98828125, "learning_rate": 6.293000335758867e-05, "loss": 1.154, "step": 3283 }, { "epoch": 0.5177286668386505, "grad_norm": 0.984375, "learning_rate": 6.29257276603752e-05, "loss": 1.227, "step": 3284 }, { "epoch": 0.5178863186860435, "grad_norm": 0.90625, "learning_rate": 6.292145204173848e-05, "loss": 1.1327, "step": 3285 }, { "epoch": 0.5180439705334365, "grad_norm": 0.8984375, "learning_rate": 6.291717650168766e-05, "loss": 0.7133, "step": 3286 }, { "epoch": 0.5182016223808296, "grad_norm": 0.97265625, "learning_rate": 6.291290104023178e-05, "loss": 0.9067, "step": 3287 }, { "epoch": 0.5183592742282226, "grad_norm": 0.984375, "learning_rate": 6.290862565737987e-05, "loss": 1.1938, "step": 3288 }, { "epoch": 0.5185169260756156, "grad_norm": 1.015625, "learning_rate": 6.290435035314102e-05, "loss": 1.078, "step": 3289 }, { "epoch": 0.5186745779230085, "grad_norm": 0.890625, "learning_rate": 6.290007512752423e-05, "loss": 0.9553, "step": 3290 }, { "epoch": 0.5188322297704016, "grad_norm": 0.94140625, "learning_rate": 6.289579998053867e-05, "loss": 1.1297, "step": 3291 }, { "epoch": 0.5189898816177946, "grad_norm": 0.8828125, "learning_rate": 6.289152491219332e-05, "loss": 0.8165, "step": 3292 }, { "epoch": 0.5191475334651876, "grad_norm": 0.90625, "learning_rate": 6.288724992249726e-05, "loss": 0.9558, "step": 3293 }, { "epoch": 0.5193051853125806, "grad_norm": 0.9921875, "learning_rate": 6.288297501145956e-05, "loss": 1.1013, "step": 3294 }, { "epoch": 0.5194628371599737, "grad_norm": 0.94921875, "learning_rate": 6.287870017908926e-05, "loss": 0.9093, "step": 3295 }, { "epoch": 0.5196204890073667, "grad_norm": 0.91796875, "learning_rate": 6.287442542539544e-05, "loss": 0.974, "step": 3296 }, { "epoch": 0.5197781408547597, "grad_norm": 0.95703125, "learning_rate": 6.287015075038716e-05, "loss": 0.9947, "step": 3297 }, { "epoch": 0.5199357927021526, "grad_norm": 0.97265625, "learning_rate": 6.286587615407348e-05, "loss": 1.206, "step": 3298 }, { "epoch": 0.5200934445495456, "grad_norm": 0.9140625, "learning_rate": 6.286160163646342e-05, "loss": 0.9339, "step": 3299 }, { "epoch": 0.5202510963969387, "grad_norm": 0.99609375, "learning_rate": 6.285732719756608e-05, "loss": 1.0755, "step": 3300 }, { "epoch": 0.5204087482443317, "grad_norm": 0.93359375, "learning_rate": 6.285305283739054e-05, "loss": 1.0263, "step": 3301 }, { "epoch": 0.5205664000917247, "grad_norm": 1.0078125, "learning_rate": 6.284877855594582e-05, "loss": 1.0925, "step": 3302 }, { "epoch": 0.5207240519391177, "grad_norm": 1.0703125, "learning_rate": 6.284450435324098e-05, "loss": 0.9795, "step": 3303 }, { "epoch": 0.5208817037865108, "grad_norm": 0.8046875, "learning_rate": 6.284023022928511e-05, "loss": 0.7722, "step": 3304 }, { "epoch": 0.5210393556339038, "grad_norm": 0.84375, "learning_rate": 6.28359561840872e-05, "loss": 1.0487, "step": 3305 }, { "epoch": 0.5211970074812967, "grad_norm": 0.92578125, "learning_rate": 6.283168221765639e-05, "loss": 1.0511, "step": 3306 }, { "epoch": 0.5213546593286897, "grad_norm": 0.91015625, "learning_rate": 6.282740833000171e-05, "loss": 0.9363, "step": 3307 }, { "epoch": 0.5215123111760828, "grad_norm": 0.92578125, "learning_rate": 6.282313452113222e-05, "loss": 1.0918, "step": 3308 }, { "epoch": 0.5216699630234758, "grad_norm": 1.0234375, "learning_rate": 6.281886079105697e-05, "loss": 0.9448, "step": 3309 }, { "epoch": 0.5218276148708688, "grad_norm": 0.88671875, "learning_rate": 6.281458713978496e-05, "loss": 0.9482, "step": 3310 }, { "epoch": 0.5219852667182618, "grad_norm": 0.984375, "learning_rate": 6.281031356732536e-05, "loss": 0.9017, "step": 3311 }, { "epoch": 0.5221429185656549, "grad_norm": 0.8359375, "learning_rate": 6.280604007368719e-05, "loss": 1.0652, "step": 3312 }, { "epoch": 0.5223005704130479, "grad_norm": 0.9765625, "learning_rate": 6.280176665887949e-05, "loss": 0.9913, "step": 3313 }, { "epoch": 0.5224582222604408, "grad_norm": 0.91796875, "learning_rate": 6.279749332291129e-05, "loss": 1.0119, "step": 3314 }, { "epoch": 0.5226158741078338, "grad_norm": 0.953125, "learning_rate": 6.279322006579169e-05, "loss": 1.0175, "step": 3315 }, { "epoch": 0.5227735259552269, "grad_norm": 0.8984375, "learning_rate": 6.278894688752972e-05, "loss": 0.9892, "step": 3316 }, { "epoch": 0.5229311778026199, "grad_norm": 0.80078125, "learning_rate": 6.278467378813446e-05, "loss": 0.8812, "step": 3317 }, { "epoch": 0.5230888296500129, "grad_norm": 0.91015625, "learning_rate": 6.278040076761497e-05, "loss": 1.0253, "step": 3318 }, { "epoch": 0.5232464814974059, "grad_norm": 0.921875, "learning_rate": 6.277612782598028e-05, "loss": 1.1658, "step": 3319 }, { "epoch": 0.523404133344799, "grad_norm": 0.9296875, "learning_rate": 6.277185496323945e-05, "loss": 1.0372, "step": 3320 }, { "epoch": 0.523561785192192, "grad_norm": 0.890625, "learning_rate": 6.276758217940157e-05, "loss": 1.145, "step": 3321 }, { "epoch": 0.5237194370395849, "grad_norm": 0.93359375, "learning_rate": 6.276330947447566e-05, "loss": 0.9584, "step": 3322 }, { "epoch": 0.5238770888869779, "grad_norm": 0.96484375, "learning_rate": 6.27590368484708e-05, "loss": 1.0584, "step": 3323 }, { "epoch": 0.524034740734371, "grad_norm": 0.82421875, "learning_rate": 6.275476430139602e-05, "loss": 0.9147, "step": 3324 }, { "epoch": 0.524192392581764, "grad_norm": 0.91796875, "learning_rate": 6.275049183326036e-05, "loss": 0.9428, "step": 3325 }, { "epoch": 0.524350044429157, "grad_norm": 0.92578125, "learning_rate": 6.274621944407292e-05, "loss": 1.014, "step": 3326 }, { "epoch": 0.52450769627655, "grad_norm": 0.90625, "learning_rate": 6.274194713384276e-05, "loss": 0.9501, "step": 3327 }, { "epoch": 0.524665348123943, "grad_norm": 0.84765625, "learning_rate": 6.273767490257891e-05, "loss": 0.9977, "step": 3328 }, { "epoch": 0.5248229999713361, "grad_norm": 0.8828125, "learning_rate": 6.273340275029043e-05, "loss": 0.9036, "step": 3329 }, { "epoch": 0.524980651818729, "grad_norm": 0.828125, "learning_rate": 6.272913067698634e-05, "loss": 0.9698, "step": 3330 }, { "epoch": 0.525138303666122, "grad_norm": 1.0, "learning_rate": 6.272485868267575e-05, "loss": 1.4471, "step": 3331 }, { "epoch": 0.525295955513515, "grad_norm": 1.0390625, "learning_rate": 6.27205867673677e-05, "loss": 1.0074, "step": 3332 }, { "epoch": 0.5254536073609081, "grad_norm": 1.125, "learning_rate": 6.271631493107124e-05, "loss": 0.9946, "step": 3333 }, { "epoch": 0.5256112592083011, "grad_norm": 0.8046875, "learning_rate": 6.271204317379541e-05, "loss": 0.7963, "step": 3334 }, { "epoch": 0.5257689110556941, "grad_norm": 1.0390625, "learning_rate": 6.270777149554926e-05, "loss": 1.0995, "step": 3335 }, { "epoch": 0.5259265629030871, "grad_norm": 1.046875, "learning_rate": 6.270349989634185e-05, "loss": 1.2185, "step": 3336 }, { "epoch": 0.5260842147504802, "grad_norm": 1.03125, "learning_rate": 6.269922837618227e-05, "loss": 0.8999, "step": 3337 }, { "epoch": 0.5262418665978731, "grad_norm": 0.91015625, "learning_rate": 6.269495693507954e-05, "loss": 0.8993, "step": 3338 }, { "epoch": 0.5263995184452661, "grad_norm": 0.90625, "learning_rate": 6.269068557304271e-05, "loss": 1.0625, "step": 3339 }, { "epoch": 0.5265571702926591, "grad_norm": 0.8515625, "learning_rate": 6.268641429008081e-05, "loss": 0.911, "step": 3340 }, { "epoch": 0.5267148221400522, "grad_norm": 0.98828125, "learning_rate": 6.268214308620297e-05, "loss": 1.1234, "step": 3341 }, { "epoch": 0.5268724739874452, "grad_norm": 0.8828125, "learning_rate": 6.267787196141817e-05, "loss": 1.036, "step": 3342 }, { "epoch": 0.5270301258348382, "grad_norm": 0.91015625, "learning_rate": 6.267360091573551e-05, "loss": 1.0629, "step": 3343 }, { "epoch": 0.5271877776822312, "grad_norm": 0.96875, "learning_rate": 6.2669329949164e-05, "loss": 0.9262, "step": 3344 }, { "epoch": 0.5273454295296243, "grad_norm": 1.1328125, "learning_rate": 6.266505906171268e-05, "loss": 1.0977, "step": 3345 }, { "epoch": 0.5275030813770172, "grad_norm": 0.93359375, "learning_rate": 6.266078825339067e-05, "loss": 0.9758, "step": 3346 }, { "epoch": 0.5276607332244102, "grad_norm": 0.91796875, "learning_rate": 6.265651752420699e-05, "loss": 1.0942, "step": 3347 }, { "epoch": 0.5278183850718032, "grad_norm": 0.87109375, "learning_rate": 6.265224687417068e-05, "loss": 1.3608, "step": 3348 }, { "epoch": 0.5279760369191963, "grad_norm": 0.953125, "learning_rate": 6.26479763032908e-05, "loss": 0.9409, "step": 3349 }, { "epoch": 0.5281336887665893, "grad_norm": 0.89453125, "learning_rate": 6.264370581157637e-05, "loss": 1.2185, "step": 3350 }, { "epoch": 0.5282913406139823, "grad_norm": 0.94140625, "learning_rate": 6.26394353990365e-05, "loss": 0.9889, "step": 3351 }, { "epoch": 0.5284489924613753, "grad_norm": 0.94140625, "learning_rate": 6.263516506568021e-05, "loss": 1.0061, "step": 3352 }, { "epoch": 0.5286066443087684, "grad_norm": 0.92578125, "learning_rate": 6.263089481151656e-05, "loss": 1.0299, "step": 3353 }, { "epoch": 0.5287642961561613, "grad_norm": 0.8828125, "learning_rate": 6.262662463655458e-05, "loss": 0.9581, "step": 3354 }, { "epoch": 0.5289219480035543, "grad_norm": 1.03125, "learning_rate": 6.262235454080334e-05, "loss": 0.7999, "step": 3355 }, { "epoch": 0.5290795998509473, "grad_norm": 0.8515625, "learning_rate": 6.261808452427185e-05, "loss": 0.9769, "step": 3356 }, { "epoch": 0.5292372516983403, "grad_norm": 0.95703125, "learning_rate": 6.261381458696923e-05, "loss": 0.9866, "step": 3357 }, { "epoch": 0.5293949035457334, "grad_norm": 0.96484375, "learning_rate": 6.260954472890448e-05, "loss": 1.0054, "step": 3358 }, { "epoch": 0.5295525553931264, "grad_norm": 0.94140625, "learning_rate": 6.260527495008668e-05, "loss": 0.966, "step": 3359 }, { "epoch": 0.5297102072405194, "grad_norm": 0.8984375, "learning_rate": 6.260100525052486e-05, "loss": 0.9001, "step": 3360 }, { "epoch": 0.5298678590879125, "grad_norm": 0.90625, "learning_rate": 6.259673563022803e-05, "loss": 0.9025, "step": 3361 }, { "epoch": 0.5300255109353054, "grad_norm": 0.93359375, "learning_rate": 6.259246608920533e-05, "loss": 0.9716, "step": 3362 }, { "epoch": 0.5301831627826984, "grad_norm": 0.94921875, "learning_rate": 6.258819662746574e-05, "loss": 0.8808, "step": 3363 }, { "epoch": 0.5303408146300914, "grad_norm": 0.953125, "learning_rate": 6.258392724501834e-05, "loss": 1.1066, "step": 3364 }, { "epoch": 0.5304984664774844, "grad_norm": 1.0078125, "learning_rate": 6.257965794187217e-05, "loss": 1.0312, "step": 3365 }, { "epoch": 0.5306561183248775, "grad_norm": 0.86328125, "learning_rate": 6.257538871803622e-05, "loss": 0.8986, "step": 3366 }, { "epoch": 0.5308137701722705, "grad_norm": 0.96484375, "learning_rate": 6.257111957351965e-05, "loss": 1.0827, "step": 3367 }, { "epoch": 0.5309714220196635, "grad_norm": 1.046875, "learning_rate": 6.256685050833144e-05, "loss": 1.257, "step": 3368 }, { "epoch": 0.5311290738670565, "grad_norm": 1.4375, "learning_rate": 6.256258152248067e-05, "loss": 0.9571, "step": 3369 }, { "epoch": 0.5312867257144495, "grad_norm": 0.8515625, "learning_rate": 6.255831261597635e-05, "loss": 1.1417, "step": 3370 }, { "epoch": 0.5314443775618425, "grad_norm": 0.89453125, "learning_rate": 6.255404378882752e-05, "loss": 0.9161, "step": 3371 }, { "epoch": 0.5316020294092355, "grad_norm": 0.94140625, "learning_rate": 6.254977504104328e-05, "loss": 1.0195, "step": 3372 }, { "epoch": 0.5317596812566285, "grad_norm": 0.8828125, "learning_rate": 6.254550637263266e-05, "loss": 1.0788, "step": 3373 }, { "epoch": 0.5319173331040216, "grad_norm": 0.828125, "learning_rate": 6.25412377836047e-05, "loss": 1.0279, "step": 3374 }, { "epoch": 0.5320749849514146, "grad_norm": 1.0703125, "learning_rate": 6.253696927396843e-05, "loss": 1.0844, "step": 3375 }, { "epoch": 0.5322326367988076, "grad_norm": 0.87890625, "learning_rate": 6.253270084373288e-05, "loss": 0.8595, "step": 3376 }, { "epoch": 0.5323902886462006, "grad_norm": 0.91796875, "learning_rate": 6.252843249290719e-05, "loss": 1.11, "step": 3377 }, { "epoch": 0.5325479404935936, "grad_norm": 0.88671875, "learning_rate": 6.252416422150032e-05, "loss": 1.0162, "step": 3378 }, { "epoch": 0.5327055923409866, "grad_norm": 0.87109375, "learning_rate": 6.251989602952135e-05, "loss": 0.9173, "step": 3379 }, { "epoch": 0.5328632441883796, "grad_norm": 0.9765625, "learning_rate": 6.251562791697932e-05, "loss": 0.9907, "step": 3380 }, { "epoch": 0.5330208960357726, "grad_norm": 0.953125, "learning_rate": 6.251135988388326e-05, "loss": 0.8467, "step": 3381 }, { "epoch": 0.5331785478831657, "grad_norm": 1.0390625, "learning_rate": 6.250709193024224e-05, "loss": 1.0027, "step": 3382 }, { "epoch": 0.5333361997305587, "grad_norm": 0.9609375, "learning_rate": 6.25028240560653e-05, "loss": 0.9265, "step": 3383 }, { "epoch": 0.5334938515779517, "grad_norm": 0.9453125, "learning_rate": 6.249855626136145e-05, "loss": 1.0365, "step": 3384 }, { "epoch": 0.5336515034253447, "grad_norm": 0.96875, "learning_rate": 6.249428854613981e-05, "loss": 1.1204, "step": 3385 }, { "epoch": 0.5338091552727376, "grad_norm": 0.875, "learning_rate": 6.249002091040934e-05, "loss": 0.8398, "step": 3386 }, { "epoch": 0.5339668071201307, "grad_norm": 0.85546875, "learning_rate": 6.248575335417915e-05, "loss": 1.0203, "step": 3387 }, { "epoch": 0.5341244589675237, "grad_norm": 1.0625, "learning_rate": 6.248148587745828e-05, "loss": 0.9752, "step": 3388 }, { "epoch": 0.5342821108149167, "grad_norm": 0.8671875, "learning_rate": 6.247721848025574e-05, "loss": 1.0393, "step": 3389 }, { "epoch": 0.5344397626623097, "grad_norm": 0.9296875, "learning_rate": 6.247295116258059e-05, "loss": 0.8796, "step": 3390 }, { "epoch": 0.5345974145097028, "grad_norm": 0.98828125, "learning_rate": 6.246868392444185e-05, "loss": 0.9378, "step": 3391 }, { "epoch": 0.5347550663570958, "grad_norm": 1.0703125, "learning_rate": 6.24644167658486e-05, "loss": 1.3404, "step": 3392 }, { "epoch": 0.5349127182044888, "grad_norm": 0.85546875, "learning_rate": 6.24601496868099e-05, "loss": 0.9214, "step": 3393 }, { "epoch": 0.5350703700518817, "grad_norm": 0.80078125, "learning_rate": 6.245588268733475e-05, "loss": 0.8723, "step": 3394 }, { "epoch": 0.5352280218992748, "grad_norm": 0.9296875, "learning_rate": 6.245161576743223e-05, "loss": 1.0703, "step": 3395 }, { "epoch": 0.5353856737466678, "grad_norm": 0.953125, "learning_rate": 6.24473489271113e-05, "loss": 1.0099, "step": 3396 }, { "epoch": 0.5355433255940608, "grad_norm": 0.8984375, "learning_rate": 6.244308216638113e-05, "loss": 0.9258, "step": 3397 }, { "epoch": 0.5357009774414538, "grad_norm": 1.046875, "learning_rate": 6.243881548525068e-05, "loss": 1.4259, "step": 3398 }, { "epoch": 0.5358586292888469, "grad_norm": 0.875, "learning_rate": 6.243454888372904e-05, "loss": 1.046, "step": 3399 }, { "epoch": 0.5360162811362399, "grad_norm": 0.93359375, "learning_rate": 6.243028236182522e-05, "loss": 0.9435, "step": 3400 }, { "epoch": 0.5361739329836329, "grad_norm": 0.921875, "learning_rate": 6.242601591954822e-05, "loss": 1.0578, "step": 3401 }, { "epoch": 0.5363315848310258, "grad_norm": 0.890625, "learning_rate": 6.242174955690719e-05, "loss": 1.1444, "step": 3402 }, { "epoch": 0.5364892366784189, "grad_norm": 0.92578125, "learning_rate": 6.241748327391107e-05, "loss": 0.9384, "step": 3403 }, { "epoch": 0.5366468885258119, "grad_norm": 0.90234375, "learning_rate": 6.241321707056897e-05, "loss": 1.1036, "step": 3404 }, { "epoch": 0.5368045403732049, "grad_norm": 0.90625, "learning_rate": 6.240895094688991e-05, "loss": 1.0915, "step": 3405 }, { "epoch": 0.5369621922205979, "grad_norm": 0.88671875, "learning_rate": 6.240468490288291e-05, "loss": 1.0356, "step": 3406 }, { "epoch": 0.537119844067991, "grad_norm": 0.96875, "learning_rate": 6.240041893855706e-05, "loss": 1.0159, "step": 3407 }, { "epoch": 0.537277495915384, "grad_norm": 0.90625, "learning_rate": 6.239615305392136e-05, "loss": 1.0554, "step": 3408 }, { "epoch": 0.537435147762777, "grad_norm": 0.9296875, "learning_rate": 6.239188724898486e-05, "loss": 1.2192, "step": 3409 }, { "epoch": 0.5375927996101699, "grad_norm": 1.03125, "learning_rate": 6.238762152375661e-05, "loss": 0.8354, "step": 3410 }, { "epoch": 0.537750451457563, "grad_norm": 0.95703125, "learning_rate": 6.238335587824562e-05, "loss": 1.0254, "step": 3411 }, { "epoch": 0.537908103304956, "grad_norm": 1.0625, "learning_rate": 6.237909031246098e-05, "loss": 1.0759, "step": 3412 }, { "epoch": 0.538065755152349, "grad_norm": 0.87890625, "learning_rate": 6.237482482641173e-05, "loss": 0.964, "step": 3413 }, { "epoch": 0.538223406999742, "grad_norm": 1.0546875, "learning_rate": 6.237055942010686e-05, "loss": 0.8919, "step": 3414 }, { "epoch": 0.538381058847135, "grad_norm": 0.953125, "learning_rate": 6.236629409355545e-05, "loss": 1.0071, "step": 3415 }, { "epoch": 0.5385387106945281, "grad_norm": 0.88671875, "learning_rate": 6.23620288467665e-05, "loss": 1.0342, "step": 3416 }, { "epoch": 0.5386963625419211, "grad_norm": 0.87890625, "learning_rate": 6.23577636797491e-05, "loss": 0.9522, "step": 3417 }, { "epoch": 0.538854014389314, "grad_norm": 0.92578125, "learning_rate": 6.235349859251229e-05, "loss": 1.0359, "step": 3418 }, { "epoch": 0.539011666236707, "grad_norm": 1.1484375, "learning_rate": 6.234923358506508e-05, "loss": 0.9768, "step": 3419 }, { "epoch": 0.5391693180841001, "grad_norm": 0.92578125, "learning_rate": 6.23449686574165e-05, "loss": 1.1031, "step": 3420 }, { "epoch": 0.5393269699314931, "grad_norm": 0.9296875, "learning_rate": 6.234070380957559e-05, "loss": 1.064, "step": 3421 }, { "epoch": 0.5394846217788861, "grad_norm": 0.99609375, "learning_rate": 6.233643904155144e-05, "loss": 1.1468, "step": 3422 }, { "epoch": 0.5396422736262791, "grad_norm": 0.98046875, "learning_rate": 6.233217435335304e-05, "loss": 0.8327, "step": 3423 }, { "epoch": 0.5397999254736722, "grad_norm": 0.9921875, "learning_rate": 6.232790974498945e-05, "loss": 1.0967, "step": 3424 }, { "epoch": 0.5399575773210652, "grad_norm": 0.87890625, "learning_rate": 6.232364521646972e-05, "loss": 0.8968, "step": 3425 }, { "epoch": 0.5401152291684581, "grad_norm": 0.9375, "learning_rate": 6.231938076780284e-05, "loss": 1.1833, "step": 3426 }, { "epoch": 0.5402728810158511, "grad_norm": 0.91796875, "learning_rate": 6.231511639899789e-05, "loss": 0.9542, "step": 3427 }, { "epoch": 0.5404305328632442, "grad_norm": 0.98046875, "learning_rate": 6.23108521100639e-05, "loss": 1.0067, "step": 3428 }, { "epoch": 0.5405881847106372, "grad_norm": 0.87109375, "learning_rate": 6.230658790100991e-05, "loss": 1.0509, "step": 3429 }, { "epoch": 0.5407458365580302, "grad_norm": 1.0078125, "learning_rate": 6.230232377184495e-05, "loss": 1.2403, "step": 3430 }, { "epoch": 0.5409034884054232, "grad_norm": 0.88671875, "learning_rate": 6.229805972257802e-05, "loss": 0.8219, "step": 3431 }, { "epoch": 0.5410611402528163, "grad_norm": 1.0234375, "learning_rate": 6.229379575321824e-05, "loss": 1.2755, "step": 3432 }, { "epoch": 0.5412187921002093, "grad_norm": 1.0078125, "learning_rate": 6.228953186377459e-05, "loss": 1.1359, "step": 3433 }, { "epoch": 0.5413764439476022, "grad_norm": 0.95703125, "learning_rate": 6.228526805425614e-05, "loss": 1.1323, "step": 3434 }, { "epoch": 0.5415340957949952, "grad_norm": 1.015625, "learning_rate": 6.22810043246719e-05, "loss": 0.9417, "step": 3435 }, { "epoch": 0.5416917476423883, "grad_norm": 0.83203125, "learning_rate": 6.227674067503088e-05, "loss": 0.9541, "step": 3436 }, { "epoch": 0.5418493994897813, "grad_norm": 0.875, "learning_rate": 6.227247710534219e-05, "loss": 0.9886, "step": 3437 }, { "epoch": 0.5420070513371743, "grad_norm": 0.828125, "learning_rate": 6.226821361561483e-05, "loss": 0.9319, "step": 3438 }, { "epoch": 0.5421647031845673, "grad_norm": 0.84375, "learning_rate": 6.226395020585783e-05, "loss": 0.8324, "step": 3439 }, { "epoch": 0.5423223550319604, "grad_norm": 0.86328125, "learning_rate": 6.225968687608024e-05, "loss": 1.0523, "step": 3440 }, { "epoch": 0.5424800068793534, "grad_norm": 0.828125, "learning_rate": 6.225542362629103e-05, "loss": 0.8469, "step": 3441 }, { "epoch": 0.5426376587267464, "grad_norm": 0.953125, "learning_rate": 6.225116045649935e-05, "loss": 1.047, "step": 3442 }, { "epoch": 0.5427953105741393, "grad_norm": 0.91015625, "learning_rate": 6.224689736671417e-05, "loss": 0.939, "step": 3443 }, { "epoch": 0.5429529624215323, "grad_norm": 0.984375, "learning_rate": 6.224263435694453e-05, "loss": 1.0781, "step": 3444 }, { "epoch": 0.5431106142689254, "grad_norm": 0.85546875, "learning_rate": 6.223837142719946e-05, "loss": 0.9463, "step": 3445 }, { "epoch": 0.5432682661163184, "grad_norm": 1.1640625, "learning_rate": 6.2234108577488e-05, "loss": 1.2032, "step": 3446 }, { "epoch": 0.5434259179637114, "grad_norm": 1.0546875, "learning_rate": 6.222984580781921e-05, "loss": 1.1017, "step": 3447 }, { "epoch": 0.5435835698111045, "grad_norm": 1.0703125, "learning_rate": 6.222558311820209e-05, "loss": 0.9118, "step": 3448 }, { "epoch": 0.5437412216584975, "grad_norm": 0.97265625, "learning_rate": 6.222132050864569e-05, "loss": 1.061, "step": 3449 }, { "epoch": 0.5438988735058905, "grad_norm": 0.90625, "learning_rate": 6.221705797915905e-05, "loss": 0.9684, "step": 3450 }, { "epoch": 0.5440565253532834, "grad_norm": 1.125, "learning_rate": 6.221279552975114e-05, "loss": 1.0248, "step": 3451 }, { "epoch": 0.5442141772006764, "grad_norm": 0.87109375, "learning_rate": 6.220853316043111e-05, "loss": 1.0001, "step": 3452 }, { "epoch": 0.5443718290480695, "grad_norm": 0.91796875, "learning_rate": 6.220427087120793e-05, "loss": 1.0313, "step": 3453 }, { "epoch": 0.5445294808954625, "grad_norm": 0.82421875, "learning_rate": 6.220000866209064e-05, "loss": 0.8019, "step": 3454 }, { "epoch": 0.5446871327428555, "grad_norm": 1.0625, "learning_rate": 6.219574653308826e-05, "loss": 1.1443, "step": 3455 }, { "epoch": 0.5448447845902485, "grad_norm": 1.1796875, "learning_rate": 6.219148448420979e-05, "loss": 1.2077, "step": 3456 }, { "epoch": 0.5450024364376416, "grad_norm": 0.9296875, "learning_rate": 6.218722251546437e-05, "loss": 1.1358, "step": 3457 }, { "epoch": 0.5451600882850346, "grad_norm": 0.9765625, "learning_rate": 6.218296062686094e-05, "loss": 1.1086, "step": 3458 }, { "epoch": 0.5453177401324275, "grad_norm": 0.9140625, "learning_rate": 6.217869881840859e-05, "loss": 0.8284, "step": 3459 }, { "epoch": 0.5454753919798205, "grad_norm": 0.93359375, "learning_rate": 6.217443709011632e-05, "loss": 0.839, "step": 3460 }, { "epoch": 0.5456330438272136, "grad_norm": 0.9296875, "learning_rate": 6.217017544199312e-05, "loss": 1.0064, "step": 3461 }, { "epoch": 0.5457906956746066, "grad_norm": 0.9609375, "learning_rate": 6.21659138740481e-05, "loss": 1.2099, "step": 3462 }, { "epoch": 0.5459483475219996, "grad_norm": 0.94140625, "learning_rate": 6.21616523862903e-05, "loss": 0.9807, "step": 3463 }, { "epoch": 0.5461059993693926, "grad_norm": 1.0234375, "learning_rate": 6.215739097872868e-05, "loss": 1.1242, "step": 3464 }, { "epoch": 0.5462636512167857, "grad_norm": 0.9453125, "learning_rate": 6.215312965137232e-05, "loss": 0.9632, "step": 3465 }, { "epoch": 0.5464213030641787, "grad_norm": 0.87890625, "learning_rate": 6.214886840423021e-05, "loss": 1.0211, "step": 3466 }, { "epoch": 0.5465789549115716, "grad_norm": 0.89453125, "learning_rate": 6.214460723731145e-05, "loss": 1.0288, "step": 3467 }, { "epoch": 0.5467366067589646, "grad_norm": 1.0, "learning_rate": 6.214034615062503e-05, "loss": 1.1139, "step": 3468 }, { "epoch": 0.5468942586063577, "grad_norm": 0.87890625, "learning_rate": 6.213608514417997e-05, "loss": 1.2055, "step": 3469 }, { "epoch": 0.5470519104537507, "grad_norm": 0.8984375, "learning_rate": 6.213182421798529e-05, "loss": 1.017, "step": 3470 }, { "epoch": 0.5472095623011437, "grad_norm": 0.9375, "learning_rate": 6.212756337205006e-05, "loss": 0.9165, "step": 3471 }, { "epoch": 0.5473672141485367, "grad_norm": 1.15625, "learning_rate": 6.21233026063833e-05, "loss": 0.991, "step": 3472 }, { "epoch": 0.5475248659959298, "grad_norm": 1.09375, "learning_rate": 6.211904192099404e-05, "loss": 1.0471, "step": 3473 }, { "epoch": 0.5476825178433228, "grad_norm": 0.921875, "learning_rate": 6.211478131589131e-05, "loss": 1.1033, "step": 3474 }, { "epoch": 0.5478401696907157, "grad_norm": 0.96875, "learning_rate": 6.211052079108413e-05, "loss": 1.0447, "step": 3475 }, { "epoch": 0.5479978215381087, "grad_norm": 1.015625, "learning_rate": 6.21062603465815e-05, "loss": 1.0452, "step": 3476 }, { "epoch": 0.5481554733855017, "grad_norm": 0.92578125, "learning_rate": 6.210199998239251e-05, "loss": 0.9041, "step": 3477 }, { "epoch": 0.5483131252328948, "grad_norm": 0.90625, "learning_rate": 6.209773969852618e-05, "loss": 0.983, "step": 3478 }, { "epoch": 0.5484707770802878, "grad_norm": 0.9375, "learning_rate": 6.209347949499151e-05, "loss": 1.0631, "step": 3479 }, { "epoch": 0.5486284289276808, "grad_norm": 1.0703125, "learning_rate": 6.208921937179756e-05, "loss": 1.0733, "step": 3480 }, { "epoch": 0.5487860807750738, "grad_norm": 0.9296875, "learning_rate": 6.208495932895331e-05, "loss": 1.0309, "step": 3481 }, { "epoch": 0.5489437326224669, "grad_norm": 0.91015625, "learning_rate": 6.208069936646784e-05, "loss": 0.8806, "step": 3482 }, { "epoch": 0.5491013844698598, "grad_norm": 1.0, "learning_rate": 6.207643948435018e-05, "loss": 0.9734, "step": 3483 }, { "epoch": 0.5492590363172528, "grad_norm": 0.80078125, "learning_rate": 6.207217968260932e-05, "loss": 0.918, "step": 3484 }, { "epoch": 0.5494166881646458, "grad_norm": 1.28125, "learning_rate": 6.206791996125431e-05, "loss": 1.1494, "step": 3485 }, { "epoch": 0.5495743400120389, "grad_norm": 0.9765625, "learning_rate": 6.206366032029416e-05, "loss": 1.0591, "step": 3486 }, { "epoch": 0.5497319918594319, "grad_norm": 0.9140625, "learning_rate": 6.205940075973795e-05, "loss": 1.0137, "step": 3487 }, { "epoch": 0.5498896437068249, "grad_norm": 0.9375, "learning_rate": 6.205514127959462e-05, "loss": 1.0162, "step": 3488 }, { "epoch": 0.5500472955542179, "grad_norm": 0.90234375, "learning_rate": 6.205088187987329e-05, "loss": 1.2119, "step": 3489 }, { "epoch": 0.550204947401611, "grad_norm": 1.0703125, "learning_rate": 6.204662256058294e-05, "loss": 1.0059, "step": 3490 }, { "epoch": 0.5503625992490039, "grad_norm": 0.94140625, "learning_rate": 6.204236332173259e-05, "loss": 1.1431, "step": 3491 }, { "epoch": 0.5505202510963969, "grad_norm": 0.96484375, "learning_rate": 6.20381041633313e-05, "loss": 0.9722, "step": 3492 }, { "epoch": 0.5506779029437899, "grad_norm": 0.953125, "learning_rate": 6.203384508538808e-05, "loss": 1.1598, "step": 3493 }, { "epoch": 0.550835554791183, "grad_norm": 0.86328125, "learning_rate": 6.202958608791195e-05, "loss": 0.9417, "step": 3494 }, { "epoch": 0.550993206638576, "grad_norm": 0.92578125, "learning_rate": 6.202532717091193e-05, "loss": 0.8229, "step": 3495 }, { "epoch": 0.551150858485969, "grad_norm": 1.1484375, "learning_rate": 6.20210683343971e-05, "loss": 1.1489, "step": 3496 }, { "epoch": 0.551308510333362, "grad_norm": 0.875, "learning_rate": 6.201680957837637e-05, "loss": 1.0794, "step": 3497 }, { "epoch": 0.5514661621807551, "grad_norm": 0.9296875, "learning_rate": 6.201255090285888e-05, "loss": 0.912, "step": 3498 }, { "epoch": 0.551623814028148, "grad_norm": 0.96484375, "learning_rate": 6.200829230785362e-05, "loss": 0.8833, "step": 3499 }, { "epoch": 0.551781465875541, "grad_norm": 0.8828125, "learning_rate": 6.200403379336962e-05, "loss": 1.0313, "step": 3500 }, { "epoch": 0.551939117722934, "grad_norm": 1.234375, "learning_rate": 6.19997753594159e-05, "loss": 1.0761, "step": 3501 }, { "epoch": 0.552096769570327, "grad_norm": 1.0078125, "learning_rate": 6.199551700600144e-05, "loss": 1.2859, "step": 3502 }, { "epoch": 0.5522544214177201, "grad_norm": 1.0625, "learning_rate": 6.199125873313535e-05, "loss": 1.1384, "step": 3503 }, { "epoch": 0.5524120732651131, "grad_norm": 0.890625, "learning_rate": 6.198700054082662e-05, "loss": 1.0558, "step": 3504 }, { "epoch": 0.5525697251125061, "grad_norm": 0.9296875, "learning_rate": 6.198274242908427e-05, "loss": 0.8794, "step": 3505 }, { "epoch": 0.5527273769598992, "grad_norm": 0.9140625, "learning_rate": 6.197848439791731e-05, "loss": 0.9681, "step": 3506 }, { "epoch": 0.5528850288072921, "grad_norm": 0.9296875, "learning_rate": 6.197422644733475e-05, "loss": 1.014, "step": 3507 }, { "epoch": 0.5530426806546851, "grad_norm": 0.9140625, "learning_rate": 6.196996857734568e-05, "loss": 0.9072, "step": 3508 }, { "epoch": 0.5532003325020781, "grad_norm": 0.90625, "learning_rate": 6.19657107879591e-05, "loss": 0.8791, "step": 3509 }, { "epoch": 0.5533579843494711, "grad_norm": 1.3203125, "learning_rate": 6.196145307918399e-05, "loss": 1.0669, "step": 3510 }, { "epoch": 0.5535156361968642, "grad_norm": 0.94140625, "learning_rate": 6.195719545102944e-05, "loss": 0.9948, "step": 3511 }, { "epoch": 0.5536732880442572, "grad_norm": 0.97265625, "learning_rate": 6.19529379035044e-05, "loss": 0.9606, "step": 3512 }, { "epoch": 0.5538309398916502, "grad_norm": 1.0234375, "learning_rate": 6.194868043661796e-05, "loss": 0.9529, "step": 3513 }, { "epoch": 0.5539885917390432, "grad_norm": 0.9921875, "learning_rate": 6.194442305037913e-05, "loss": 1.0794, "step": 3514 }, { "epoch": 0.5541462435864362, "grad_norm": 1.25, "learning_rate": 6.194016574479691e-05, "loss": 0.9702, "step": 3515 }, { "epoch": 0.5543038954338292, "grad_norm": 0.91796875, "learning_rate": 6.193590851988032e-05, "loss": 0.958, "step": 3516 }, { "epoch": 0.5544615472812222, "grad_norm": 0.953125, "learning_rate": 6.193165137563836e-05, "loss": 1.3084, "step": 3517 }, { "epoch": 0.5546191991286152, "grad_norm": 0.87109375, "learning_rate": 6.192739431208014e-05, "loss": 1.0632, "step": 3518 }, { "epoch": 0.5547768509760083, "grad_norm": 0.97265625, "learning_rate": 6.192313732921465e-05, "loss": 1.1959, "step": 3519 }, { "epoch": 0.5549345028234013, "grad_norm": 1.0078125, "learning_rate": 6.191888042705086e-05, "loss": 1.0572, "step": 3520 }, { "epoch": 0.5550921546707943, "grad_norm": 0.90234375, "learning_rate": 6.191462360559783e-05, "loss": 1.1542, "step": 3521 }, { "epoch": 0.5552498065181873, "grad_norm": 0.97265625, "learning_rate": 6.191036686486456e-05, "loss": 1.0064, "step": 3522 }, { "epoch": 0.5554074583655803, "grad_norm": 0.98828125, "learning_rate": 6.190611020486012e-05, "loss": 1.0162, "step": 3523 }, { "epoch": 0.5555651102129733, "grad_norm": 1.078125, "learning_rate": 6.190185362559349e-05, "loss": 0.8536, "step": 3524 }, { "epoch": 0.5557227620603663, "grad_norm": 0.8984375, "learning_rate": 6.189759712707372e-05, "loss": 0.9489, "step": 3525 }, { "epoch": 0.5558804139077593, "grad_norm": 0.953125, "learning_rate": 6.189334070930982e-05, "loss": 0.9904, "step": 3526 }, { "epoch": 0.5560380657551524, "grad_norm": 0.9609375, "learning_rate": 6.188908437231074e-05, "loss": 1.0053, "step": 3527 }, { "epoch": 0.5561957176025454, "grad_norm": 1.0625, "learning_rate": 6.188482811608563e-05, "loss": 1.2527, "step": 3528 }, { "epoch": 0.5563533694499384, "grad_norm": 1.03125, "learning_rate": 6.188057194064343e-05, "loss": 0.9917, "step": 3529 }, { "epoch": 0.5565110212973314, "grad_norm": 1.0234375, "learning_rate": 6.187631584599319e-05, "loss": 1.1718, "step": 3530 }, { "epoch": 0.5566686731447243, "grad_norm": 0.953125, "learning_rate": 6.187205983214393e-05, "loss": 1.0718, "step": 3531 }, { "epoch": 0.5568263249921174, "grad_norm": 0.98046875, "learning_rate": 6.186780389910463e-05, "loss": 0.9906, "step": 3532 }, { "epoch": 0.5569839768395104, "grad_norm": 1.0859375, "learning_rate": 6.186354804688435e-05, "loss": 1.0674, "step": 3533 }, { "epoch": 0.5571416286869034, "grad_norm": 0.97265625, "learning_rate": 6.18592922754921e-05, "loss": 0.9837, "step": 3534 }, { "epoch": 0.5572992805342964, "grad_norm": 0.96875, "learning_rate": 6.185503658493693e-05, "loss": 0.9634, "step": 3535 }, { "epoch": 0.5574569323816895, "grad_norm": 1.171875, "learning_rate": 6.185078097522779e-05, "loss": 0.9674, "step": 3536 }, { "epoch": 0.5576145842290825, "grad_norm": 1.015625, "learning_rate": 6.184652544637373e-05, "loss": 1.2012, "step": 3537 }, { "epoch": 0.5577722360764755, "grad_norm": 1.046875, "learning_rate": 6.184226999838381e-05, "loss": 1.0553, "step": 3538 }, { "epoch": 0.5579298879238684, "grad_norm": 0.9296875, "learning_rate": 6.183801463126701e-05, "loss": 0.892, "step": 3539 }, { "epoch": 0.5580875397712615, "grad_norm": 0.98046875, "learning_rate": 6.183375934503234e-05, "loss": 0.97, "step": 3540 }, { "epoch": 0.5582451916186545, "grad_norm": 1.03125, "learning_rate": 6.182950413968886e-05, "loss": 1.1748, "step": 3541 }, { "epoch": 0.5584028434660475, "grad_norm": 0.96875, "learning_rate": 6.182524901524553e-05, "loss": 1.0689, "step": 3542 }, { "epoch": 0.5585604953134405, "grad_norm": 0.921875, "learning_rate": 6.182099397171144e-05, "loss": 0.9223, "step": 3543 }, { "epoch": 0.5587181471608336, "grad_norm": 0.87890625, "learning_rate": 6.181673900909556e-05, "loss": 0.9212, "step": 3544 }, { "epoch": 0.5588757990082266, "grad_norm": 1.0546875, "learning_rate": 6.181248412740692e-05, "loss": 1.1926, "step": 3545 }, { "epoch": 0.5590334508556196, "grad_norm": 0.95703125, "learning_rate": 6.180822932665454e-05, "loss": 1.1333, "step": 3546 }, { "epoch": 0.5591911027030125, "grad_norm": 0.90625, "learning_rate": 6.180397460684739e-05, "loss": 0.9353, "step": 3547 }, { "epoch": 0.5593487545504056, "grad_norm": 0.859375, "learning_rate": 6.179971996799459e-05, "loss": 1.0062, "step": 3548 }, { "epoch": 0.5595064063977986, "grad_norm": 0.9765625, "learning_rate": 6.179546541010508e-05, "loss": 0.9992, "step": 3549 }, { "epoch": 0.5596640582451916, "grad_norm": 0.984375, "learning_rate": 6.179121093318791e-05, "loss": 1.2038, "step": 3550 }, { "epoch": 0.5598217100925846, "grad_norm": 0.9765625, "learning_rate": 6.178695653725208e-05, "loss": 1.1454, "step": 3551 }, { "epoch": 0.5599793619399777, "grad_norm": 0.90625, "learning_rate": 6.17827022223066e-05, "loss": 0.9238, "step": 3552 }, { "epoch": 0.5601370137873707, "grad_norm": 0.9453125, "learning_rate": 6.177844798836051e-05, "loss": 1.0459, "step": 3553 }, { "epoch": 0.5602946656347637, "grad_norm": 0.95703125, "learning_rate": 6.17741938354228e-05, "loss": 1.0438, "step": 3554 }, { "epoch": 0.5604523174821566, "grad_norm": 1.0, "learning_rate": 6.176993976350251e-05, "loss": 1.1238, "step": 3555 }, { "epoch": 0.5606099693295497, "grad_norm": 1.1484375, "learning_rate": 6.176568577260866e-05, "loss": 1.1855, "step": 3556 }, { "epoch": 0.5607676211769427, "grad_norm": 0.90234375, "learning_rate": 6.176143186275021e-05, "loss": 0.9099, "step": 3557 }, { "epoch": 0.5609252730243357, "grad_norm": 0.95703125, "learning_rate": 6.175717803393627e-05, "loss": 0.9274, "step": 3558 }, { "epoch": 0.5610829248717287, "grad_norm": 0.8984375, "learning_rate": 6.175292428617578e-05, "loss": 0.9743, "step": 3559 }, { "epoch": 0.5612405767191218, "grad_norm": 0.9609375, "learning_rate": 6.17486706194778e-05, "loss": 0.9465, "step": 3560 }, { "epoch": 0.5613982285665148, "grad_norm": 0.96875, "learning_rate": 6.174441703385132e-05, "loss": 0.9705, "step": 3561 }, { "epoch": 0.5615558804139078, "grad_norm": 0.87890625, "learning_rate": 6.174016352930532e-05, "loss": 0.9486, "step": 3562 }, { "epoch": 0.5617135322613007, "grad_norm": 0.86328125, "learning_rate": 6.173591010584891e-05, "loss": 0.9749, "step": 3563 }, { "epoch": 0.5618711841086937, "grad_norm": 0.9375, "learning_rate": 6.173165676349103e-05, "loss": 0.9463, "step": 3564 }, { "epoch": 0.5620288359560868, "grad_norm": 0.88671875, "learning_rate": 6.172740350224074e-05, "loss": 1.0372, "step": 3565 }, { "epoch": 0.5621864878034798, "grad_norm": 0.91015625, "learning_rate": 6.172315032210701e-05, "loss": 0.9502, "step": 3566 }, { "epoch": 0.5623441396508728, "grad_norm": 0.87109375, "learning_rate": 6.171889722309884e-05, "loss": 0.8261, "step": 3567 }, { "epoch": 0.5625017914982658, "grad_norm": 1.109375, "learning_rate": 6.171464420522531e-05, "loss": 1.0187, "step": 3568 }, { "epoch": 0.5626594433456589, "grad_norm": 0.953125, "learning_rate": 6.171039126849543e-05, "loss": 1.0639, "step": 3569 }, { "epoch": 0.5628170951930519, "grad_norm": 0.94140625, "learning_rate": 6.170613841291817e-05, "loss": 1.1333, "step": 3570 }, { "epoch": 0.5629747470404448, "grad_norm": 0.9375, "learning_rate": 6.170188563850256e-05, "loss": 0.7898, "step": 3571 }, { "epoch": 0.5631323988878378, "grad_norm": 1.1328125, "learning_rate": 6.169763294525758e-05, "loss": 1.1087, "step": 3572 }, { "epoch": 0.5632900507352309, "grad_norm": 0.98828125, "learning_rate": 6.169338033319232e-05, "loss": 1.0119, "step": 3573 }, { "epoch": 0.5634477025826239, "grad_norm": 0.94921875, "learning_rate": 6.168912780231573e-05, "loss": 0.9388, "step": 3574 }, { "epoch": 0.5636053544300169, "grad_norm": 0.94921875, "learning_rate": 6.168487535263685e-05, "loss": 0.8846, "step": 3575 }, { "epoch": 0.5637630062774099, "grad_norm": 1.078125, "learning_rate": 6.168062298416471e-05, "loss": 1.2134, "step": 3576 }, { "epoch": 0.563920658124803, "grad_norm": 0.9140625, "learning_rate": 6.167637069690825e-05, "loss": 0.9596, "step": 3577 }, { "epoch": 0.564078309972196, "grad_norm": 0.921875, "learning_rate": 6.167211849087658e-05, "loss": 1.0673, "step": 3578 }, { "epoch": 0.5642359618195889, "grad_norm": 1.1796875, "learning_rate": 6.166786636607864e-05, "loss": 1.1317, "step": 3579 }, { "epoch": 0.5643936136669819, "grad_norm": 0.96875, "learning_rate": 6.166361432252348e-05, "loss": 1.0661, "step": 3580 }, { "epoch": 0.564551265514375, "grad_norm": 0.921875, "learning_rate": 6.165936236022009e-05, "loss": 0.9367, "step": 3581 }, { "epoch": 0.564708917361768, "grad_norm": 0.8046875, "learning_rate": 6.165511047917747e-05, "loss": 1.0096, "step": 3582 }, { "epoch": 0.564866569209161, "grad_norm": 0.953125, "learning_rate": 6.165085867940467e-05, "loss": 1.0306, "step": 3583 }, { "epoch": 0.565024221056554, "grad_norm": 1.0625, "learning_rate": 6.164660696091069e-05, "loss": 1.3307, "step": 3584 }, { "epoch": 0.5651818729039471, "grad_norm": 0.91796875, "learning_rate": 6.164235532370453e-05, "loss": 1.1037, "step": 3585 }, { "epoch": 0.5653395247513401, "grad_norm": 0.88671875, "learning_rate": 6.163810376779521e-05, "loss": 0.9014, "step": 3586 }, { "epoch": 0.565497176598733, "grad_norm": 0.99609375, "learning_rate": 6.16338522931917e-05, "loss": 1.1368, "step": 3587 }, { "epoch": 0.565654828446126, "grad_norm": 0.87890625, "learning_rate": 6.162960089990308e-05, "loss": 0.9397, "step": 3588 }, { "epoch": 0.565812480293519, "grad_norm": 0.84765625, "learning_rate": 6.162534958793833e-05, "loss": 0.9237, "step": 3589 }, { "epoch": 0.5659701321409121, "grad_norm": 0.859375, "learning_rate": 6.162109835730647e-05, "loss": 0.8484, "step": 3590 }, { "epoch": 0.5661277839883051, "grad_norm": 1.0546875, "learning_rate": 6.161684720801648e-05, "loss": 0.9828, "step": 3591 }, { "epoch": 0.5662854358356981, "grad_norm": 0.953125, "learning_rate": 6.161259614007736e-05, "loss": 0.9851, "step": 3592 }, { "epoch": 0.5664430876830912, "grad_norm": 0.875, "learning_rate": 6.160834515349819e-05, "loss": 0.7556, "step": 3593 }, { "epoch": 0.5666007395304842, "grad_norm": 1.9921875, "learning_rate": 6.160409424828793e-05, "loss": 1.1532, "step": 3594 }, { "epoch": 0.5667583913778771, "grad_norm": 0.84765625, "learning_rate": 6.159984342445559e-05, "loss": 0.9824, "step": 3595 }, { "epoch": 0.5669160432252701, "grad_norm": 1.0078125, "learning_rate": 6.159559268201021e-05, "loss": 1.0979, "step": 3596 }, { "epoch": 0.5670736950726631, "grad_norm": 0.953125, "learning_rate": 6.159134202096073e-05, "loss": 1.1249, "step": 3597 }, { "epoch": 0.5672313469200562, "grad_norm": 0.96484375, "learning_rate": 6.158709144131625e-05, "loss": 1.0888, "step": 3598 }, { "epoch": 0.5673889987674492, "grad_norm": 0.953125, "learning_rate": 6.158284094308572e-05, "loss": 1.054, "step": 3599 }, { "epoch": 0.5675466506148422, "grad_norm": 0.8828125, "learning_rate": 6.157859052627817e-05, "loss": 1.0251, "step": 3600 }, { "epoch": 0.5677043024622352, "grad_norm": 0.84375, "learning_rate": 6.15743401909026e-05, "loss": 1.1205, "step": 3601 }, { "epoch": 0.5678619543096283, "grad_norm": 0.8671875, "learning_rate": 6.157008993696798e-05, "loss": 0.902, "step": 3602 }, { "epoch": 0.5680196061570213, "grad_norm": 0.9921875, "learning_rate": 6.156583976448339e-05, "loss": 0.9983, "step": 3603 }, { "epoch": 0.5681772580044142, "grad_norm": 0.890625, "learning_rate": 6.156158967345781e-05, "loss": 0.9108, "step": 3604 }, { "epoch": 0.5683349098518072, "grad_norm": 0.9375, "learning_rate": 6.155733966390024e-05, "loss": 0.982, "step": 3605 }, { "epoch": 0.5684925616992003, "grad_norm": 0.90625, "learning_rate": 6.15530897358197e-05, "loss": 1.0778, "step": 3606 }, { "epoch": 0.5686502135465933, "grad_norm": 1.3828125, "learning_rate": 6.154883988922515e-05, "loss": 1.152, "step": 3607 }, { "epoch": 0.5688078653939863, "grad_norm": 0.9609375, "learning_rate": 6.154459012412565e-05, "loss": 0.9303, "step": 3608 }, { "epoch": 0.5689655172413793, "grad_norm": 1.0234375, "learning_rate": 6.154034044053023e-05, "loss": 1.1761, "step": 3609 }, { "epoch": 0.5691231690887724, "grad_norm": 0.90625, "learning_rate": 6.153609083844784e-05, "loss": 0.9178, "step": 3610 }, { "epoch": 0.5692808209361654, "grad_norm": 0.91015625, "learning_rate": 6.153184131788751e-05, "loss": 0.9623, "step": 3611 }, { "epoch": 0.5694384727835583, "grad_norm": 0.87890625, "learning_rate": 6.15275918788582e-05, "loss": 0.9303, "step": 3612 }, { "epoch": 0.5695961246309513, "grad_norm": 0.98828125, "learning_rate": 6.152334252136902e-05, "loss": 1.0123, "step": 3613 }, { "epoch": 0.5697537764783444, "grad_norm": 0.92578125, "learning_rate": 6.151909324542888e-05, "loss": 1.0151, "step": 3614 }, { "epoch": 0.5699114283257374, "grad_norm": 0.80078125, "learning_rate": 6.151484405104685e-05, "loss": 0.8561, "step": 3615 }, { "epoch": 0.5700690801731304, "grad_norm": 0.98828125, "learning_rate": 6.15105949382319e-05, "loss": 1.0678, "step": 3616 }, { "epoch": 0.5702267320205234, "grad_norm": 0.9375, "learning_rate": 6.150634590699302e-05, "loss": 0.9229, "step": 3617 }, { "epoch": 0.5703843838679165, "grad_norm": 0.92578125, "learning_rate": 6.150209695733927e-05, "loss": 0.9033, "step": 3618 }, { "epoch": 0.5705420357153095, "grad_norm": 0.98046875, "learning_rate": 6.149784808927963e-05, "loss": 0.8425, "step": 3619 }, { "epoch": 0.5706996875627024, "grad_norm": 1.0078125, "learning_rate": 6.149359930282308e-05, "loss": 1.0524, "step": 3620 }, { "epoch": 0.5708573394100954, "grad_norm": 0.953125, "learning_rate": 6.148935059797866e-05, "loss": 1.017, "step": 3621 }, { "epoch": 0.5710149912574884, "grad_norm": 1.0078125, "learning_rate": 6.148510197475533e-05, "loss": 0.9759, "step": 3622 }, { "epoch": 0.5711726431048815, "grad_norm": 0.859375, "learning_rate": 6.148085343316214e-05, "loss": 0.7434, "step": 3623 }, { "epoch": 0.5713302949522745, "grad_norm": 0.9921875, "learning_rate": 6.147660497320809e-05, "loss": 0.9951, "step": 3624 }, { "epoch": 0.5714879467996675, "grad_norm": 1.0703125, "learning_rate": 6.147235659490216e-05, "loss": 1.0435, "step": 3625 }, { "epoch": 0.5716455986470605, "grad_norm": 1.0234375, "learning_rate": 6.14681082982534e-05, "loss": 1.143, "step": 3626 }, { "epoch": 0.5718032504944536, "grad_norm": 0.90234375, "learning_rate": 6.146386008327072e-05, "loss": 0.9175, "step": 3627 }, { "epoch": 0.5719609023418465, "grad_norm": 0.83984375, "learning_rate": 6.145961194996323e-05, "loss": 1.0816, "step": 3628 }, { "epoch": 0.5721185541892395, "grad_norm": 1.0703125, "learning_rate": 6.145536389833989e-05, "loss": 1.14, "step": 3629 }, { "epoch": 0.5722762060366325, "grad_norm": 0.9453125, "learning_rate": 6.145111592840971e-05, "loss": 0.9537, "step": 3630 }, { "epoch": 0.5724338578840256, "grad_norm": 1.0234375, "learning_rate": 6.144686804018167e-05, "loss": 1.1233, "step": 3631 }, { "epoch": 0.5725915097314186, "grad_norm": 2.0625, "learning_rate": 6.144262023366476e-05, "loss": 1.021, "step": 3632 }, { "epoch": 0.5727491615788116, "grad_norm": 0.99609375, "learning_rate": 6.143837250886806e-05, "loss": 1.053, "step": 3633 }, { "epoch": 0.5729068134262046, "grad_norm": 0.9921875, "learning_rate": 6.143412486580051e-05, "loss": 1.0127, "step": 3634 }, { "epoch": 0.5730644652735977, "grad_norm": 0.984375, "learning_rate": 6.142987730447115e-05, "loss": 0.9543, "step": 3635 }, { "epoch": 0.5732221171209906, "grad_norm": 0.8984375, "learning_rate": 6.142562982488893e-05, "loss": 1.1389, "step": 3636 }, { "epoch": 0.5733797689683836, "grad_norm": 0.98828125, "learning_rate": 6.14213824270629e-05, "loss": 1.2776, "step": 3637 }, { "epoch": 0.5735374208157766, "grad_norm": 0.94921875, "learning_rate": 6.141713511100203e-05, "loss": 1.0018, "step": 3638 }, { "epoch": 0.5736950726631697, "grad_norm": 0.83203125, "learning_rate": 6.141288787671535e-05, "loss": 0.7984, "step": 3639 }, { "epoch": 0.5738527245105627, "grad_norm": 0.94140625, "learning_rate": 6.140864072421184e-05, "loss": 1.2511, "step": 3640 }, { "epoch": 0.5740103763579557, "grad_norm": 0.9453125, "learning_rate": 6.14043936535005e-05, "loss": 0.9615, "step": 3641 }, { "epoch": 0.5741680282053487, "grad_norm": 1.03125, "learning_rate": 6.140014666459036e-05, "loss": 0.94, "step": 3642 }, { "epoch": 0.5743256800527418, "grad_norm": 0.89453125, "learning_rate": 6.139589975749039e-05, "loss": 1.075, "step": 3643 }, { "epoch": 0.5744833319001347, "grad_norm": 0.921875, "learning_rate": 6.139165293220961e-05, "loss": 1.1126, "step": 3644 }, { "epoch": 0.5746409837475277, "grad_norm": 1.0, "learning_rate": 6.138740618875703e-05, "loss": 1.0052, "step": 3645 }, { "epoch": 0.5747986355949207, "grad_norm": 0.87890625, "learning_rate": 6.138315952714162e-05, "loss": 0.8576, "step": 3646 }, { "epoch": 0.5749562874423138, "grad_norm": 0.859375, "learning_rate": 6.137891294737241e-05, "loss": 0.8909, "step": 3647 }, { "epoch": 0.5751139392897068, "grad_norm": 0.90625, "learning_rate": 6.137466644945834e-05, "loss": 1.1745, "step": 3648 }, { "epoch": 0.5752715911370998, "grad_norm": 0.95703125, "learning_rate": 6.137042003340849e-05, "loss": 1.0669, "step": 3649 }, { "epoch": 0.5754292429844928, "grad_norm": 0.98046875, "learning_rate": 6.136617369923185e-05, "loss": 0.8868, "step": 3650 }, { "epoch": 0.5755868948318859, "grad_norm": 0.9453125, "learning_rate": 6.136192744693738e-05, "loss": 1.0726, "step": 3651 }, { "epoch": 0.5757445466792788, "grad_norm": 1.390625, "learning_rate": 6.135768127653409e-05, "loss": 0.9905, "step": 3652 }, { "epoch": 0.5759021985266718, "grad_norm": 0.91015625, "learning_rate": 6.135343518803096e-05, "loss": 0.953, "step": 3653 }, { "epoch": 0.5760598503740648, "grad_norm": 1.03125, "learning_rate": 6.134918918143703e-05, "loss": 0.9553, "step": 3654 }, { "epoch": 0.5762175022214578, "grad_norm": 0.95703125, "learning_rate": 6.134494325676133e-05, "loss": 1.14, "step": 3655 }, { "epoch": 0.5763751540688509, "grad_norm": 1.0, "learning_rate": 6.134069741401278e-05, "loss": 1.1095, "step": 3656 }, { "epoch": 0.5765328059162439, "grad_norm": 0.87890625, "learning_rate": 6.13364516532004e-05, "loss": 0.8708, "step": 3657 }, { "epoch": 0.5766904577636369, "grad_norm": 0.90234375, "learning_rate": 6.13322059743332e-05, "loss": 1.0143, "step": 3658 }, { "epoch": 0.57684810961103, "grad_norm": 0.9140625, "learning_rate": 6.132796037742019e-05, "loss": 0.96, "step": 3659 }, { "epoch": 0.5770057614584229, "grad_norm": 0.86328125, "learning_rate": 6.132371486247036e-05, "loss": 1.009, "step": 3660 }, { "epoch": 0.5771634133058159, "grad_norm": 0.9375, "learning_rate": 6.131946942949271e-05, "loss": 0.9901, "step": 3661 }, { "epoch": 0.5773210651532089, "grad_norm": 0.88671875, "learning_rate": 6.131522407849624e-05, "loss": 0.9055, "step": 3662 }, { "epoch": 0.5774787170006019, "grad_norm": 0.94140625, "learning_rate": 6.131097880948992e-05, "loss": 1.1075, "step": 3663 }, { "epoch": 0.577636368847995, "grad_norm": 0.91796875, "learning_rate": 6.130673362248278e-05, "loss": 0.9076, "step": 3664 }, { "epoch": 0.577794020695388, "grad_norm": 1.015625, "learning_rate": 6.130248851748382e-05, "loss": 1.0603, "step": 3665 }, { "epoch": 0.577951672542781, "grad_norm": 1.0390625, "learning_rate": 6.129824349450202e-05, "loss": 1.2511, "step": 3666 }, { "epoch": 0.578109324390174, "grad_norm": 1.0234375, "learning_rate": 6.129399855354637e-05, "loss": 1.145, "step": 3667 }, { "epoch": 0.578266976237567, "grad_norm": 1.0234375, "learning_rate": 6.128975369462584e-05, "loss": 1.1687, "step": 3668 }, { "epoch": 0.57842462808496, "grad_norm": 0.9765625, "learning_rate": 6.128550891774952e-05, "loss": 0.9894, "step": 3669 }, { "epoch": 0.578582279932353, "grad_norm": 1.03125, "learning_rate": 6.128126422292633e-05, "loss": 1.1256, "step": 3670 }, { "epoch": 0.578739931779746, "grad_norm": 0.99609375, "learning_rate": 6.12770196101653e-05, "loss": 1.0242, "step": 3671 }, { "epoch": 0.5788975836271391, "grad_norm": 0.89453125, "learning_rate": 6.12727750794754e-05, "loss": 0.8864, "step": 3672 }, { "epoch": 0.5790552354745321, "grad_norm": 0.89453125, "learning_rate": 6.126853063086562e-05, "loss": 1.1632, "step": 3673 }, { "epoch": 0.5792128873219251, "grad_norm": 0.98828125, "learning_rate": 6.126428626434501e-05, "loss": 1.0877, "step": 3674 }, { "epoch": 0.5793705391693181, "grad_norm": 0.84765625, "learning_rate": 6.126004197992253e-05, "loss": 0.8311, "step": 3675 }, { "epoch": 0.579528191016711, "grad_norm": 0.8984375, "learning_rate": 6.125579777760717e-05, "loss": 1.0003, "step": 3676 }, { "epoch": 0.5796858428641041, "grad_norm": 0.859375, "learning_rate": 6.125155365740794e-05, "loss": 0.8458, "step": 3677 }, { "epoch": 0.5798434947114971, "grad_norm": 0.9453125, "learning_rate": 6.124730961933378e-05, "loss": 0.967, "step": 3678 }, { "epoch": 0.5800011465588901, "grad_norm": 0.94921875, "learning_rate": 6.124306566339377e-05, "loss": 1.2209, "step": 3679 }, { "epoch": 0.5801587984062831, "grad_norm": 0.94921875, "learning_rate": 6.123882178959687e-05, "loss": 1.1856, "step": 3680 }, { "epoch": 0.5803164502536762, "grad_norm": 0.93359375, "learning_rate": 6.123457799795208e-05, "loss": 1.0488, "step": 3681 }, { "epoch": 0.5804741021010692, "grad_norm": 0.8828125, "learning_rate": 6.123033428846839e-05, "loss": 0.8963, "step": 3682 }, { "epoch": 0.5806317539484622, "grad_norm": 0.8203125, "learning_rate": 6.122609066115476e-05, "loss": 0.9018, "step": 3683 }, { "epoch": 0.5807894057958551, "grad_norm": 0.91796875, "learning_rate": 6.122184711602024e-05, "loss": 0.8157, "step": 3684 }, { "epoch": 0.5809470576432482, "grad_norm": 0.96484375, "learning_rate": 6.121760365307378e-05, "loss": 0.9985, "step": 3685 }, { "epoch": 0.5811047094906412, "grad_norm": 0.93359375, "learning_rate": 6.121336027232441e-05, "loss": 0.8355, "step": 3686 }, { "epoch": 0.5812623613380342, "grad_norm": 0.90234375, "learning_rate": 6.120911697378111e-05, "loss": 0.9189, "step": 3687 }, { "epoch": 0.5814200131854272, "grad_norm": 0.92578125, "learning_rate": 6.120487375745282e-05, "loss": 1.108, "step": 3688 }, { "epoch": 0.5815776650328203, "grad_norm": 0.9453125, "learning_rate": 6.120063062334862e-05, "loss": 0.9857, "step": 3689 }, { "epoch": 0.5817353168802133, "grad_norm": 0.984375, "learning_rate": 6.119638757147748e-05, "loss": 1.2278, "step": 3690 }, { "epoch": 0.5818929687276063, "grad_norm": 0.89453125, "learning_rate": 6.119214460184836e-05, "loss": 0.9237, "step": 3691 }, { "epoch": 0.5820506205749992, "grad_norm": 1.015625, "learning_rate": 6.118790171447029e-05, "loss": 1.0169, "step": 3692 }, { "epoch": 0.5822082724223923, "grad_norm": 0.93359375, "learning_rate": 6.11836589093522e-05, "loss": 1.0088, "step": 3693 }, { "epoch": 0.5823659242697853, "grad_norm": 0.9453125, "learning_rate": 6.117941618650315e-05, "loss": 1.0157, "step": 3694 }, { "epoch": 0.5825235761171783, "grad_norm": 0.9609375, "learning_rate": 6.117517354593212e-05, "loss": 1.2598, "step": 3695 }, { "epoch": 0.5826812279645713, "grad_norm": 0.8671875, "learning_rate": 6.117093098764808e-05, "loss": 0.8387, "step": 3696 }, { "epoch": 0.5828388798119644, "grad_norm": 1.015625, "learning_rate": 6.116668851166006e-05, "loss": 0.9466, "step": 3697 }, { "epoch": 0.5829965316593574, "grad_norm": 0.921875, "learning_rate": 6.116244611797697e-05, "loss": 1.2616, "step": 3698 }, { "epoch": 0.5831541835067504, "grad_norm": 0.98828125, "learning_rate": 6.115820380660789e-05, "loss": 1.0047, "step": 3699 }, { "epoch": 0.5833118353541433, "grad_norm": 0.921875, "learning_rate": 6.115396157756177e-05, "loss": 0.9286, "step": 3700 }, { "epoch": 0.5834694872015364, "grad_norm": 0.92578125, "learning_rate": 6.114971943084761e-05, "loss": 1.0702, "step": 3701 }, { "epoch": 0.5836271390489294, "grad_norm": 0.8984375, "learning_rate": 6.114547736647442e-05, "loss": 0.9522, "step": 3702 }, { "epoch": 0.5837847908963224, "grad_norm": 1.0859375, "learning_rate": 6.114123538445114e-05, "loss": 1.1444, "step": 3703 }, { "epoch": 0.5839424427437154, "grad_norm": 1.15625, "learning_rate": 6.11369934847868e-05, "loss": 1.0301, "step": 3704 }, { "epoch": 0.5841000945911085, "grad_norm": 0.94921875, "learning_rate": 6.11327516674904e-05, "loss": 1.0001, "step": 3705 }, { "epoch": 0.5842577464385015, "grad_norm": 0.8671875, "learning_rate": 6.112850993257089e-05, "loss": 0.9038, "step": 3706 }, { "epoch": 0.5844153982858945, "grad_norm": 0.90625, "learning_rate": 6.112426828003728e-05, "loss": 1.1516, "step": 3707 }, { "epoch": 0.5845730501332874, "grad_norm": 0.90234375, "learning_rate": 6.112002670989853e-05, "loss": 0.9596, "step": 3708 }, { "epoch": 0.5847307019806804, "grad_norm": 0.90234375, "learning_rate": 6.111578522216371e-05, "loss": 0.9554, "step": 3709 }, { "epoch": 0.5848883538280735, "grad_norm": 0.9921875, "learning_rate": 6.111154381684176e-05, "loss": 0.8495, "step": 3710 }, { "epoch": 0.5850460056754665, "grad_norm": 1.3515625, "learning_rate": 6.110730249394165e-05, "loss": 0.9454, "step": 3711 }, { "epoch": 0.5852036575228595, "grad_norm": 0.9765625, "learning_rate": 6.110306125347242e-05, "loss": 0.9966, "step": 3712 }, { "epoch": 0.5853613093702525, "grad_norm": 0.95703125, "learning_rate": 6.109882009544296e-05, "loss": 1.0292, "step": 3713 }, { "epoch": 0.5855189612176456, "grad_norm": 1.0625, "learning_rate": 6.109457901986238e-05, "loss": 1.1162, "step": 3714 }, { "epoch": 0.5856766130650386, "grad_norm": 0.97265625, "learning_rate": 6.109033802673963e-05, "loss": 1.1341, "step": 3715 }, { "epoch": 0.5858342649124315, "grad_norm": 0.83984375, "learning_rate": 6.108609711608367e-05, "loss": 0.8791, "step": 3716 }, { "epoch": 0.5859919167598245, "grad_norm": 1.046875, "learning_rate": 6.108185628790349e-05, "loss": 1.0924, "step": 3717 }, { "epoch": 0.5861495686072176, "grad_norm": 0.92578125, "learning_rate": 6.107761554220806e-05, "loss": 1.0934, "step": 3718 }, { "epoch": 0.5863072204546106, "grad_norm": 1.015625, "learning_rate": 6.107337487900643e-05, "loss": 1.1894, "step": 3719 }, { "epoch": 0.5864648723020036, "grad_norm": 1.0234375, "learning_rate": 6.106913429830759e-05, "loss": 0.8627, "step": 3720 }, { "epoch": 0.5866225241493966, "grad_norm": 0.83984375, "learning_rate": 6.106489380012047e-05, "loss": 0.908, "step": 3721 }, { "epoch": 0.5867801759967897, "grad_norm": 0.98828125, "learning_rate": 6.106065338445409e-05, "loss": 1.2072, "step": 3722 }, { "epoch": 0.5869378278441827, "grad_norm": 0.859375, "learning_rate": 6.105641305131741e-05, "loss": 0.9015, "step": 3723 }, { "epoch": 0.5870954796915756, "grad_norm": 0.9609375, "learning_rate": 6.105217280071944e-05, "loss": 1.0727, "step": 3724 }, { "epoch": 0.5872531315389686, "grad_norm": 0.88671875, "learning_rate": 6.104793263266919e-05, "loss": 0.8315, "step": 3725 }, { "epoch": 0.5874107833863617, "grad_norm": 1.046875, "learning_rate": 6.104369254717558e-05, "loss": 0.914, "step": 3726 }, { "epoch": 0.5875684352337547, "grad_norm": 0.9453125, "learning_rate": 6.103945254424766e-05, "loss": 0.9096, "step": 3727 }, { "epoch": 0.5877260870811477, "grad_norm": 1.03125, "learning_rate": 6.103521262389438e-05, "loss": 1.1415, "step": 3728 }, { "epoch": 0.5878837389285407, "grad_norm": 0.9453125, "learning_rate": 6.103097278612476e-05, "loss": 1.0662, "step": 3729 }, { "epoch": 0.5880413907759338, "grad_norm": 0.828125, "learning_rate": 6.102673303094777e-05, "loss": 0.8099, "step": 3730 }, { "epoch": 0.5881990426233268, "grad_norm": 0.98828125, "learning_rate": 6.102249335837238e-05, "loss": 1.0218, "step": 3731 }, { "epoch": 0.5883566944707197, "grad_norm": 0.953125, "learning_rate": 6.10182537684076e-05, "loss": 1.1913, "step": 3732 }, { "epoch": 0.5885143463181127, "grad_norm": 0.87109375, "learning_rate": 6.1014014261062345e-05, "loss": 0.9416, "step": 3733 }, { "epoch": 0.5886719981655057, "grad_norm": 0.87890625, "learning_rate": 6.100977483634571e-05, "loss": 0.9575, "step": 3734 }, { "epoch": 0.5888296500128988, "grad_norm": 0.8984375, "learning_rate": 6.100553549426662e-05, "loss": 1.0487, "step": 3735 }, { "epoch": 0.5889873018602918, "grad_norm": 0.97265625, "learning_rate": 6.100129623483408e-05, "loss": 0.9781, "step": 3736 }, { "epoch": 0.5891449537076848, "grad_norm": 0.9453125, "learning_rate": 6.099705705805706e-05, "loss": 1.1241, "step": 3737 }, { "epoch": 0.5893026055550779, "grad_norm": 0.9453125, "learning_rate": 6.099281796394452e-05, "loss": 1.1786, "step": 3738 }, { "epoch": 0.5894602574024709, "grad_norm": 1.0625, "learning_rate": 6.098857895250549e-05, "loss": 1.1393, "step": 3739 }, { "epoch": 0.5896179092498638, "grad_norm": 0.9609375, "learning_rate": 6.0984340023748955e-05, "loss": 1.1505, "step": 3740 }, { "epoch": 0.5897755610972568, "grad_norm": 1.0, "learning_rate": 6.098010117768387e-05, "loss": 1.1143, "step": 3741 }, { "epoch": 0.5899332129446498, "grad_norm": 0.92578125, "learning_rate": 6.097586241431923e-05, "loss": 0.9673, "step": 3742 }, { "epoch": 0.5900908647920429, "grad_norm": 1.203125, "learning_rate": 6.0971623733664005e-05, "loss": 0.8645, "step": 3743 }, { "epoch": 0.5902485166394359, "grad_norm": 0.89453125, "learning_rate": 6.096738513572722e-05, "loss": 0.8388, "step": 3744 }, { "epoch": 0.5904061684868289, "grad_norm": 0.8671875, "learning_rate": 6.096314662051779e-05, "loss": 0.8632, "step": 3745 }, { "epoch": 0.590563820334222, "grad_norm": 0.97265625, "learning_rate": 6.095890818804477e-05, "loss": 1.002, "step": 3746 }, { "epoch": 0.590721472181615, "grad_norm": 0.91015625, "learning_rate": 6.0954669838317123e-05, "loss": 0.8754, "step": 3747 }, { "epoch": 0.5908791240290079, "grad_norm": 0.875, "learning_rate": 6.095043157134379e-05, "loss": 0.9226, "step": 3748 }, { "epoch": 0.5910367758764009, "grad_norm": 0.9296875, "learning_rate": 6.094619338713381e-05, "loss": 1.0216, "step": 3749 }, { "epoch": 0.5911944277237939, "grad_norm": 0.93359375, "learning_rate": 6.094195528569614e-05, "loss": 0.9461, "step": 3750 }, { "epoch": 0.591352079571187, "grad_norm": 1.0, "learning_rate": 6.0937717267039765e-05, "loss": 0.8667, "step": 3751 }, { "epoch": 0.59150973141858, "grad_norm": 1.0234375, "learning_rate": 6.093347933117366e-05, "loss": 1.2498, "step": 3752 }, { "epoch": 0.591667383265973, "grad_norm": 0.96875, "learning_rate": 6.092924147810677e-05, "loss": 1.0493, "step": 3753 }, { "epoch": 0.591825035113366, "grad_norm": 0.96484375, "learning_rate": 6.092500370784817e-05, "loss": 0.9877, "step": 3754 }, { "epoch": 0.5919826869607591, "grad_norm": 1.0078125, "learning_rate": 6.092076602040678e-05, "loss": 1.1773, "step": 3755 }, { "epoch": 0.5921403388081521, "grad_norm": 1.1484375, "learning_rate": 6.091652841579161e-05, "loss": 1.0858, "step": 3756 }, { "epoch": 0.592297990655545, "grad_norm": 0.88671875, "learning_rate": 6.0912290894011606e-05, "loss": 0.9462, "step": 3757 }, { "epoch": 0.592455642502938, "grad_norm": 0.94140625, "learning_rate": 6.0908053455075734e-05, "loss": 1.0217, "step": 3758 }, { "epoch": 0.592613294350331, "grad_norm": 0.984375, "learning_rate": 6.090381609899305e-05, "loss": 0.9896, "step": 3759 }, { "epoch": 0.5927709461977241, "grad_norm": 1.0078125, "learning_rate": 6.08995788257725e-05, "loss": 1.0398, "step": 3760 }, { "epoch": 0.5929285980451171, "grad_norm": 0.984375, "learning_rate": 6.089534163542304e-05, "loss": 1.3516, "step": 3761 }, { "epoch": 0.5930862498925101, "grad_norm": 0.90625, "learning_rate": 6.089110452795368e-05, "loss": 1.114, "step": 3762 }, { "epoch": 0.5932439017399032, "grad_norm": 0.890625, "learning_rate": 6.088686750337335e-05, "loss": 0.9312, "step": 3763 }, { "epoch": 0.5934015535872962, "grad_norm": 0.96484375, "learning_rate": 6.08826305616911e-05, "loss": 0.8496, "step": 3764 }, { "epoch": 0.5935592054346891, "grad_norm": 0.92578125, "learning_rate": 6.087839370291588e-05, "loss": 1.192, "step": 3765 }, { "epoch": 0.5937168572820821, "grad_norm": 1.0234375, "learning_rate": 6.0874156927056683e-05, "loss": 1.2035, "step": 3766 }, { "epoch": 0.5938745091294751, "grad_norm": 0.953125, "learning_rate": 6.0869920234122454e-05, "loss": 1.0935, "step": 3767 }, { "epoch": 0.5940321609768682, "grad_norm": 0.8515625, "learning_rate": 6.0865683624122196e-05, "loss": 0.9468, "step": 3768 }, { "epoch": 0.5941898128242612, "grad_norm": 0.890625, "learning_rate": 6.086144709706488e-05, "loss": 0.9503, "step": 3769 }, { "epoch": 0.5943474646716542, "grad_norm": 1.0703125, "learning_rate": 6.085721065295951e-05, "loss": 1.2029, "step": 3770 }, { "epoch": 0.5945051165190472, "grad_norm": 0.9609375, "learning_rate": 6.085297429181504e-05, "loss": 0.9799, "step": 3771 }, { "epoch": 0.5946627683664403, "grad_norm": 0.921875, "learning_rate": 6.084873801364044e-05, "loss": 1.0189, "step": 3772 }, { "epoch": 0.5948204202138332, "grad_norm": 0.8515625, "learning_rate": 6.084450181844468e-05, "loss": 0.7438, "step": 3773 }, { "epoch": 0.5949780720612262, "grad_norm": 0.96875, "learning_rate": 6.084026570623678e-05, "loss": 1.1908, "step": 3774 }, { "epoch": 0.5951357239086192, "grad_norm": 0.94921875, "learning_rate": 6.083602967702571e-05, "loss": 0.9484, "step": 3775 }, { "epoch": 0.5952933757560123, "grad_norm": 0.9453125, "learning_rate": 6.0831793730820444e-05, "loss": 1.0996, "step": 3776 }, { "epoch": 0.5954510276034053, "grad_norm": 0.83984375, "learning_rate": 6.0827557867629945e-05, "loss": 0.9528, "step": 3777 }, { "epoch": 0.5956086794507983, "grad_norm": 1.0546875, "learning_rate": 6.08233220874632e-05, "loss": 1.0698, "step": 3778 }, { "epoch": 0.5957663312981913, "grad_norm": 1.015625, "learning_rate": 6.081908639032914e-05, "loss": 1.3273, "step": 3779 }, { "epoch": 0.5959239831455844, "grad_norm": 1.640625, "learning_rate": 6.081485077623683e-05, "loss": 1.2733, "step": 3780 }, { "epoch": 0.5960816349929773, "grad_norm": 0.95703125, "learning_rate": 6.08106152451952e-05, "loss": 0.9948, "step": 3781 }, { "epoch": 0.5962392868403703, "grad_norm": 5.15625, "learning_rate": 6.0806379797213245e-05, "loss": 1.0552, "step": 3782 }, { "epoch": 0.5963969386877633, "grad_norm": 1.0625, "learning_rate": 6.08021444322999e-05, "loss": 0.9877, "step": 3783 }, { "epoch": 0.5965545905351564, "grad_norm": 0.953125, "learning_rate": 6.079790915046415e-05, "loss": 1.0554, "step": 3784 }, { "epoch": 0.5967122423825494, "grad_norm": 0.9375, "learning_rate": 6.079367395171504e-05, "loss": 1.0957, "step": 3785 }, { "epoch": 0.5968698942299424, "grad_norm": 0.86328125, "learning_rate": 6.0789438836061474e-05, "loss": 0.8926, "step": 3786 }, { "epoch": 0.5970275460773354, "grad_norm": 0.9296875, "learning_rate": 6.078520380351247e-05, "loss": 1.1871, "step": 3787 }, { "epoch": 0.5971851979247285, "grad_norm": 0.8671875, "learning_rate": 6.078096885407698e-05, "loss": 0.987, "step": 3788 }, { "epoch": 0.5973428497721214, "grad_norm": 0.8671875, "learning_rate": 6.0776733987763955e-05, "loss": 0.926, "step": 3789 }, { "epoch": 0.5975005016195144, "grad_norm": 0.92578125, "learning_rate": 6.0772499204582437e-05, "loss": 0.9101, "step": 3790 }, { "epoch": 0.5976581534669074, "grad_norm": 1.0625, "learning_rate": 6.076826450454135e-05, "loss": 1.0006, "step": 3791 }, { "epoch": 0.5978158053143005, "grad_norm": 0.9296875, "learning_rate": 6.076402988764969e-05, "loss": 0.9464, "step": 3792 }, { "epoch": 0.5979734571616935, "grad_norm": 0.90625, "learning_rate": 6.075979535391641e-05, "loss": 0.9077, "step": 3793 }, { "epoch": 0.5981311090090865, "grad_norm": 0.953125, "learning_rate": 6.075556090335048e-05, "loss": 0.9244, "step": 3794 }, { "epoch": 0.5982887608564795, "grad_norm": 0.9453125, "learning_rate": 6.075132653596094e-05, "loss": 0.9194, "step": 3795 }, { "epoch": 0.5984464127038726, "grad_norm": 0.8984375, "learning_rate": 6.074709225175672e-05, "loss": 0.9811, "step": 3796 }, { "epoch": 0.5986040645512655, "grad_norm": 1.0390625, "learning_rate": 6.0742858050746776e-05, "loss": 0.8866, "step": 3797 }, { "epoch": 0.5987617163986585, "grad_norm": 0.9296875, "learning_rate": 6.073862393294012e-05, "loss": 1.0535, "step": 3798 }, { "epoch": 0.5989193682460515, "grad_norm": 0.80078125, "learning_rate": 6.073438989834566e-05, "loss": 0.9325, "step": 3799 }, { "epoch": 0.5990770200934445, "grad_norm": 0.921875, "learning_rate": 6.073015594697245e-05, "loss": 0.9716, "step": 3800 }, { "epoch": 0.5992346719408376, "grad_norm": 0.99609375, "learning_rate": 6.0725922078829434e-05, "loss": 1.1171, "step": 3801 }, { "epoch": 0.5993923237882306, "grad_norm": 0.90625, "learning_rate": 6.0721688293925575e-05, "loss": 1.0306, "step": 3802 }, { "epoch": 0.5995499756356236, "grad_norm": 0.9609375, "learning_rate": 6.071745459226986e-05, "loss": 1.0906, "step": 3803 }, { "epoch": 0.5997076274830166, "grad_norm": 0.9375, "learning_rate": 6.071322097387122e-05, "loss": 1.0168, "step": 3804 }, { "epoch": 0.5998652793304096, "grad_norm": 0.953125, "learning_rate": 6.0708987438738695e-05, "loss": 1.0013, "step": 3805 }, { "epoch": 0.6000229311778026, "grad_norm": 0.91796875, "learning_rate": 6.0704753986881224e-05, "loss": 1.0368, "step": 3806 }, { "epoch": 0.6001805830251956, "grad_norm": 0.93359375, "learning_rate": 6.0700520618307774e-05, "loss": 0.9536, "step": 3807 }, { "epoch": 0.6003382348725886, "grad_norm": 0.87890625, "learning_rate": 6.069628733302735e-05, "loss": 0.8368, "step": 3808 }, { "epoch": 0.6004958867199817, "grad_norm": 1.03125, "learning_rate": 6.069205413104886e-05, "loss": 0.8933, "step": 3809 }, { "epoch": 0.6006535385673747, "grad_norm": 0.9375, "learning_rate": 6.068782101238133e-05, "loss": 0.8778, "step": 3810 }, { "epoch": 0.6008111904147677, "grad_norm": 1.4765625, "learning_rate": 6.068358797703373e-05, "loss": 1.0116, "step": 3811 }, { "epoch": 0.6009688422621607, "grad_norm": 0.828125, "learning_rate": 6.067935502501498e-05, "loss": 0.9347, "step": 3812 }, { "epoch": 0.6011264941095537, "grad_norm": 0.84765625, "learning_rate": 6.067512215633413e-05, "loss": 0.8701, "step": 3813 }, { "epoch": 0.6012841459569467, "grad_norm": 0.91796875, "learning_rate": 6.067088937100007e-05, "loss": 1.0277, "step": 3814 }, { "epoch": 0.6014417978043397, "grad_norm": 0.96484375, "learning_rate": 6.066665666902185e-05, "loss": 0.972, "step": 3815 }, { "epoch": 0.6015994496517327, "grad_norm": 0.91796875, "learning_rate": 6.06624240504084e-05, "loss": 0.9593, "step": 3816 }, { "epoch": 0.6017571014991258, "grad_norm": 1.0, "learning_rate": 6.065819151516868e-05, "loss": 0.9156, "step": 3817 }, { "epoch": 0.6019147533465188, "grad_norm": 1.09375, "learning_rate": 6.065395906331168e-05, "loss": 1.1907, "step": 3818 }, { "epoch": 0.6020724051939118, "grad_norm": 0.98046875, "learning_rate": 6.064972669484634e-05, "loss": 1.0359, "step": 3819 }, { "epoch": 0.6022300570413048, "grad_norm": 0.94921875, "learning_rate": 6.064549440978169e-05, "loss": 1.0657, "step": 3820 }, { "epoch": 0.6023877088886977, "grad_norm": 0.86328125, "learning_rate": 6.064126220812665e-05, "loss": 0.7633, "step": 3821 }, { "epoch": 0.6025453607360908, "grad_norm": 0.89453125, "learning_rate": 6.063703008989021e-05, "loss": 0.9008, "step": 3822 }, { "epoch": 0.6027030125834838, "grad_norm": 0.93359375, "learning_rate": 6.0632798055081354e-05, "loss": 0.9777, "step": 3823 }, { "epoch": 0.6028606644308768, "grad_norm": 0.92578125, "learning_rate": 6.062856610370897e-05, "loss": 1.0232, "step": 3824 }, { "epoch": 0.6030183162782699, "grad_norm": 1.125, "learning_rate": 6.0624334235782146e-05, "loss": 1.3447, "step": 3825 }, { "epoch": 0.6031759681256629, "grad_norm": 0.91796875, "learning_rate": 6.062010245130978e-05, "loss": 0.9635, "step": 3826 }, { "epoch": 0.6033336199730559, "grad_norm": 0.90625, "learning_rate": 6.0615870750300865e-05, "loss": 0.8296, "step": 3827 }, { "epoch": 0.6034912718204489, "grad_norm": 1.046875, "learning_rate": 6.061163913276435e-05, "loss": 1.0955, "step": 3828 }, { "epoch": 0.6036489236678418, "grad_norm": 1.0234375, "learning_rate": 6.060740759870922e-05, "loss": 0.9466, "step": 3829 }, { "epoch": 0.6038065755152349, "grad_norm": 0.9140625, "learning_rate": 6.060317614814441e-05, "loss": 0.8579, "step": 3830 }, { "epoch": 0.6039642273626279, "grad_norm": 0.8359375, "learning_rate": 6.059894478107895e-05, "loss": 0.9249, "step": 3831 }, { "epoch": 0.6041218792100209, "grad_norm": 0.9296875, "learning_rate": 6.059471349752177e-05, "loss": 0.9536, "step": 3832 }, { "epoch": 0.6042795310574139, "grad_norm": 0.984375, "learning_rate": 6.059048229748185e-05, "loss": 1.0153, "step": 3833 }, { "epoch": 0.604437182904807, "grad_norm": 1.015625, "learning_rate": 6.058625118096812e-05, "loss": 1.1531, "step": 3834 }, { "epoch": 0.6045948347522, "grad_norm": 0.8671875, "learning_rate": 6.058202014798962e-05, "loss": 1.0149, "step": 3835 }, { "epoch": 0.604752486599593, "grad_norm": 1.0390625, "learning_rate": 6.057778919855525e-05, "loss": 1.2301, "step": 3836 }, { "epoch": 0.6049101384469859, "grad_norm": 0.88671875, "learning_rate": 6.057355833267401e-05, "loss": 0.9064, "step": 3837 }, { "epoch": 0.605067790294379, "grad_norm": 0.9453125, "learning_rate": 6.056932755035486e-05, "loss": 0.8718, "step": 3838 }, { "epoch": 0.605225442141772, "grad_norm": 0.9765625, "learning_rate": 6.056509685160672e-05, "loss": 0.8612, "step": 3839 }, { "epoch": 0.605383093989165, "grad_norm": 0.9453125, "learning_rate": 6.0560866236438654e-05, "loss": 1.0919, "step": 3840 }, { "epoch": 0.605540745836558, "grad_norm": 0.89453125, "learning_rate": 6.055663570485956e-05, "loss": 0.9215, "step": 3841 }, { "epoch": 0.6056983976839511, "grad_norm": 0.91015625, "learning_rate": 6.055240525687844e-05, "loss": 0.9596, "step": 3842 }, { "epoch": 0.6058560495313441, "grad_norm": 0.9609375, "learning_rate": 6.054817489250425e-05, "loss": 0.9909, "step": 3843 }, { "epoch": 0.6060137013787371, "grad_norm": 0.84375, "learning_rate": 6.054394461174589e-05, "loss": 0.8108, "step": 3844 }, { "epoch": 0.60617135322613, "grad_norm": 1.0078125, "learning_rate": 6.0539714414612434e-05, "loss": 1.0393, "step": 3845 }, { "epoch": 0.606329005073523, "grad_norm": 0.984375, "learning_rate": 6.05354843011128e-05, "loss": 1.1833, "step": 3846 }, { "epoch": 0.6064866569209161, "grad_norm": 1.0546875, "learning_rate": 6.053125427125595e-05, "loss": 0.9821, "step": 3847 }, { "epoch": 0.6066443087683091, "grad_norm": 0.9921875, "learning_rate": 6.052702432505084e-05, "loss": 0.8377, "step": 3848 }, { "epoch": 0.6068019606157021, "grad_norm": 0.96484375, "learning_rate": 6.0522794462506416e-05, "loss": 1.1503, "step": 3849 }, { "epoch": 0.6069596124630952, "grad_norm": 0.95703125, "learning_rate": 6.051856468363171e-05, "loss": 1.3072, "step": 3850 }, { "epoch": 0.6071172643104882, "grad_norm": 1.0234375, "learning_rate": 6.051433498843564e-05, "loss": 1.1149, "step": 3851 }, { "epoch": 0.6072749161578812, "grad_norm": 0.89453125, "learning_rate": 6.0510105376927184e-05, "loss": 0.9371, "step": 3852 }, { "epoch": 0.6074325680052741, "grad_norm": 0.97265625, "learning_rate": 6.050587584911531e-05, "loss": 1.0806, "step": 3853 }, { "epoch": 0.6075902198526671, "grad_norm": 1.0234375, "learning_rate": 6.050164640500895e-05, "loss": 0.8806, "step": 3854 }, { "epoch": 0.6077478717000602, "grad_norm": 0.8671875, "learning_rate": 6.049741704461712e-05, "loss": 0.9011, "step": 3855 }, { "epoch": 0.6079055235474532, "grad_norm": 0.9375, "learning_rate": 6.049318776794876e-05, "loss": 1.0227, "step": 3856 }, { "epoch": 0.6080631753948462, "grad_norm": 0.953125, "learning_rate": 6.0488958575012824e-05, "loss": 0.9647, "step": 3857 }, { "epoch": 0.6082208272422392, "grad_norm": 1.0234375, "learning_rate": 6.048472946581827e-05, "loss": 1.2149, "step": 3858 }, { "epoch": 0.6083784790896323, "grad_norm": 0.90234375, "learning_rate": 6.048050044037405e-05, "loss": 0.8682, "step": 3859 }, { "epoch": 0.6085361309370253, "grad_norm": 0.8984375, "learning_rate": 6.0476271498689174e-05, "loss": 0.8054, "step": 3860 }, { "epoch": 0.6086937827844182, "grad_norm": 0.90234375, "learning_rate": 6.04720426407726e-05, "loss": 0.9682, "step": 3861 }, { "epoch": 0.6088514346318112, "grad_norm": 0.91796875, "learning_rate": 6.046781386663326e-05, "loss": 0.8595, "step": 3862 }, { "epoch": 0.6090090864792043, "grad_norm": 0.984375, "learning_rate": 6.0463585176280134e-05, "loss": 1.2023, "step": 3863 }, { "epoch": 0.6091667383265973, "grad_norm": 0.90625, "learning_rate": 6.045935656972214e-05, "loss": 0.9886, "step": 3864 }, { "epoch": 0.6093243901739903, "grad_norm": 0.90625, "learning_rate": 6.0455128046968316e-05, "loss": 0.8924, "step": 3865 }, { "epoch": 0.6094820420213833, "grad_norm": 1.046875, "learning_rate": 6.045089960802759e-05, "loss": 1.0013, "step": 3866 }, { "epoch": 0.6096396938687764, "grad_norm": 0.87890625, "learning_rate": 6.0446671252908926e-05, "loss": 0.8369, "step": 3867 }, { "epoch": 0.6097973457161694, "grad_norm": 0.96875, "learning_rate": 6.044244298162127e-05, "loss": 0.9433, "step": 3868 }, { "epoch": 0.6099549975635623, "grad_norm": 1.015625, "learning_rate": 6.043821479417356e-05, "loss": 0.9105, "step": 3869 }, { "epoch": 0.6101126494109553, "grad_norm": 0.84765625, "learning_rate": 6.043398669057483e-05, "loss": 0.9041, "step": 3870 }, { "epoch": 0.6102703012583484, "grad_norm": 0.94140625, "learning_rate": 6.0429758670834015e-05, "loss": 1.0843, "step": 3871 }, { "epoch": 0.6104279531057414, "grad_norm": 1.0, "learning_rate": 6.0425530734960054e-05, "loss": 1.0184, "step": 3872 }, { "epoch": 0.6105856049531344, "grad_norm": 1.03125, "learning_rate": 6.0421302882961926e-05, "loss": 1.018, "step": 3873 }, { "epoch": 0.6107432568005274, "grad_norm": 0.95703125, "learning_rate": 6.041707511484855e-05, "loss": 0.9192, "step": 3874 }, { "epoch": 0.6109009086479205, "grad_norm": 0.8671875, "learning_rate": 6.041284743062896e-05, "loss": 1.0443, "step": 3875 }, { "epoch": 0.6110585604953135, "grad_norm": 1.03125, "learning_rate": 6.040861983031207e-05, "loss": 0.8376, "step": 3876 }, { "epoch": 0.6112162123427064, "grad_norm": 0.921875, "learning_rate": 6.0404392313906845e-05, "loss": 0.9152, "step": 3877 }, { "epoch": 0.6113738641900994, "grad_norm": 0.93359375, "learning_rate": 6.040016488142224e-05, "loss": 1.1457, "step": 3878 }, { "epoch": 0.6115315160374925, "grad_norm": 0.8984375, "learning_rate": 6.039593753286721e-05, "loss": 0.938, "step": 3879 }, { "epoch": 0.6116891678848855, "grad_norm": 0.9921875, "learning_rate": 6.039171026825075e-05, "loss": 0.9104, "step": 3880 }, { "epoch": 0.6118468197322785, "grad_norm": 0.94921875, "learning_rate": 6.038748308758179e-05, "loss": 0.9276, "step": 3881 }, { "epoch": 0.6120044715796715, "grad_norm": 1.0078125, "learning_rate": 6.038325599086929e-05, "loss": 1.1049, "step": 3882 }, { "epoch": 0.6121621234270646, "grad_norm": 0.93359375, "learning_rate": 6.037902897812222e-05, "loss": 1.0009, "step": 3883 }, { "epoch": 0.6123197752744576, "grad_norm": 0.91796875, "learning_rate": 6.037480204934951e-05, "loss": 0.8953, "step": 3884 }, { "epoch": 0.6124774271218505, "grad_norm": 1.3203125, "learning_rate": 6.0370575204560164e-05, "loss": 0.9871, "step": 3885 }, { "epoch": 0.6126350789692435, "grad_norm": 0.83203125, "learning_rate": 6.036634844376312e-05, "loss": 0.8286, "step": 3886 }, { "epoch": 0.6127927308166365, "grad_norm": 0.8984375, "learning_rate": 6.036212176696734e-05, "loss": 1.0444, "step": 3887 }, { "epoch": 0.6129503826640296, "grad_norm": 0.84765625, "learning_rate": 6.0357895174181775e-05, "loss": 0.8363, "step": 3888 }, { "epoch": 0.6131080345114226, "grad_norm": 0.97265625, "learning_rate": 6.035366866541535e-05, "loss": 0.9531, "step": 3889 }, { "epoch": 0.6132656863588156, "grad_norm": 0.85546875, "learning_rate": 6.034944224067709e-05, "loss": 0.9766, "step": 3890 }, { "epoch": 0.6134233382062086, "grad_norm": 1.03125, "learning_rate": 6.034521589997593e-05, "loss": 1.2584, "step": 3891 }, { "epoch": 0.6135809900536017, "grad_norm": 0.96875, "learning_rate": 6.034098964332082e-05, "loss": 1.0185, "step": 3892 }, { "epoch": 0.6137386419009946, "grad_norm": 0.87109375, "learning_rate": 6.0336763470720705e-05, "loss": 1.1305, "step": 3893 }, { "epoch": 0.6138962937483876, "grad_norm": 1.0546875, "learning_rate": 6.033253738218454e-05, "loss": 1.0223, "step": 3894 }, { "epoch": 0.6140539455957806, "grad_norm": 0.91015625, "learning_rate": 6.0328311377721305e-05, "loss": 0.9278, "step": 3895 }, { "epoch": 0.6142115974431737, "grad_norm": 1.0234375, "learning_rate": 6.032408545733996e-05, "loss": 1.2693, "step": 3896 }, { "epoch": 0.6143692492905667, "grad_norm": 0.91015625, "learning_rate": 6.031985962104942e-05, "loss": 0.9212, "step": 3897 }, { "epoch": 0.6145269011379597, "grad_norm": 0.90234375, "learning_rate": 6.03156338688587e-05, "loss": 0.9277, "step": 3898 }, { "epoch": 0.6146845529853527, "grad_norm": 2.203125, "learning_rate": 6.03114082007767e-05, "loss": 1.018, "step": 3899 }, { "epoch": 0.6148422048327458, "grad_norm": 1.0703125, "learning_rate": 6.030718261681241e-05, "loss": 0.9203, "step": 3900 }, { "epoch": 0.6149998566801387, "grad_norm": 1.0390625, "learning_rate": 6.030295711697478e-05, "loss": 1.0157, "step": 3901 }, { "epoch": 0.6151575085275317, "grad_norm": 0.9921875, "learning_rate": 6.0298731701272784e-05, "loss": 1.0371, "step": 3902 }, { "epoch": 0.6153151603749247, "grad_norm": 0.921875, "learning_rate": 6.029450636971534e-05, "loss": 1.0708, "step": 3903 }, { "epoch": 0.6154728122223178, "grad_norm": 0.875, "learning_rate": 6.0290281122311384e-05, "loss": 0.9155, "step": 3904 }, { "epoch": 0.6156304640697108, "grad_norm": 0.97265625, "learning_rate": 6.028605595906994e-05, "loss": 1.1189, "step": 3905 }, { "epoch": 0.6157881159171038, "grad_norm": 0.94140625, "learning_rate": 6.028183087999994e-05, "loss": 0.9114, "step": 3906 }, { "epoch": 0.6159457677644968, "grad_norm": 0.953125, "learning_rate": 6.027760588511033e-05, "loss": 1.2237, "step": 3907 }, { "epoch": 0.6161034196118899, "grad_norm": 0.96484375, "learning_rate": 6.027338097441006e-05, "loss": 0.9169, "step": 3908 }, { "epoch": 0.6162610714592828, "grad_norm": 0.84765625, "learning_rate": 6.026915614790805e-05, "loss": 1.0648, "step": 3909 }, { "epoch": 0.6164187233066758, "grad_norm": 1.0390625, "learning_rate": 6.026493140561333e-05, "loss": 0.9408, "step": 3910 }, { "epoch": 0.6165763751540688, "grad_norm": 0.8203125, "learning_rate": 6.026070674753481e-05, "loss": 0.9578, "step": 3911 }, { "epoch": 0.6167340270014618, "grad_norm": 0.96484375, "learning_rate": 6.0256482173681476e-05, "loss": 0.9915, "step": 3912 }, { "epoch": 0.6168916788488549, "grad_norm": 1.1015625, "learning_rate": 6.025225768406223e-05, "loss": 1.1405, "step": 3913 }, { "epoch": 0.6170493306962479, "grad_norm": 1.0078125, "learning_rate": 6.0248033278686044e-05, "loss": 1.1939, "step": 3914 }, { "epoch": 0.6172069825436409, "grad_norm": 0.96484375, "learning_rate": 6.02438089575619e-05, "loss": 0.9426, "step": 3915 }, { "epoch": 0.617364634391034, "grad_norm": 1.109375, "learning_rate": 6.02395847206987e-05, "loss": 1.07, "step": 3916 }, { "epoch": 0.617522286238427, "grad_norm": 0.93359375, "learning_rate": 6.023536056810546e-05, "loss": 0.8044, "step": 3917 }, { "epoch": 0.6176799380858199, "grad_norm": 0.875, "learning_rate": 6.0231136499791084e-05, "loss": 0.9561, "step": 3918 }, { "epoch": 0.6178375899332129, "grad_norm": 1.0, "learning_rate": 6.022691251576453e-05, "loss": 1.084, "step": 3919 }, { "epoch": 0.6179952417806059, "grad_norm": 1.0859375, "learning_rate": 6.022268861603479e-05, "loss": 1.2374, "step": 3920 }, { "epoch": 0.618152893627999, "grad_norm": 0.8515625, "learning_rate": 6.0218464800610786e-05, "loss": 0.9819, "step": 3921 }, { "epoch": 0.618310545475392, "grad_norm": 0.96875, "learning_rate": 6.021424106950146e-05, "loss": 0.9702, "step": 3922 }, { "epoch": 0.618468197322785, "grad_norm": 0.98046875, "learning_rate": 6.021001742271579e-05, "loss": 1.0531, "step": 3923 }, { "epoch": 0.618625849170178, "grad_norm": 1.0390625, "learning_rate": 6.020579386026271e-05, "loss": 1.0509, "step": 3924 }, { "epoch": 0.6187835010175711, "grad_norm": 0.9921875, "learning_rate": 6.0201570382151126e-05, "loss": 0.8691, "step": 3925 }, { "epoch": 0.618941152864964, "grad_norm": 0.96484375, "learning_rate": 6.019734698839009e-05, "loss": 1.0757, "step": 3926 }, { "epoch": 0.619098804712357, "grad_norm": 1.015625, "learning_rate": 6.01931236789885e-05, "loss": 1.1138, "step": 3927 }, { "epoch": 0.61925645655975, "grad_norm": 0.93359375, "learning_rate": 6.018890045395531e-05, "loss": 1.0332, "step": 3928 }, { "epoch": 0.6194141084071431, "grad_norm": 1.046875, "learning_rate": 6.0184677313299466e-05, "loss": 1.2614, "step": 3929 }, { "epoch": 0.6195717602545361, "grad_norm": 1.0390625, "learning_rate": 6.018045425702988e-05, "loss": 0.9975, "step": 3930 }, { "epoch": 0.6197294121019291, "grad_norm": 0.95703125, "learning_rate": 6.017623128515559e-05, "loss": 0.8931, "step": 3931 }, { "epoch": 0.6198870639493221, "grad_norm": 0.91015625, "learning_rate": 6.017200839768551e-05, "loss": 0.981, "step": 3932 }, { "epoch": 0.6200447157967152, "grad_norm": 0.89453125, "learning_rate": 6.016778559462857e-05, "loss": 1.123, "step": 3933 }, { "epoch": 0.6202023676441081, "grad_norm": 1.0078125, "learning_rate": 6.0163562875993726e-05, "loss": 1.2774, "step": 3934 }, { "epoch": 0.6203600194915011, "grad_norm": 1.1796875, "learning_rate": 6.0159340241789906e-05, "loss": 1.15, "step": 3935 }, { "epoch": 0.6205176713388941, "grad_norm": 0.91015625, "learning_rate": 6.015511769202612e-05, "loss": 0.8823, "step": 3936 }, { "epoch": 0.6206753231862872, "grad_norm": 0.8515625, "learning_rate": 6.015089522671129e-05, "loss": 0.7702, "step": 3937 }, { "epoch": 0.6208329750336802, "grad_norm": 0.96484375, "learning_rate": 6.014667284585436e-05, "loss": 1.0726, "step": 3938 }, { "epoch": 0.6209906268810732, "grad_norm": 0.92578125, "learning_rate": 6.014245054946428e-05, "loss": 0.9264, "step": 3939 }, { "epoch": 0.6211482787284662, "grad_norm": 0.91015625, "learning_rate": 6.0138228337549964e-05, "loss": 1.0241, "step": 3940 }, { "epoch": 0.6213059305758593, "grad_norm": 0.96875, "learning_rate": 6.013400621012043e-05, "loss": 0.8098, "step": 3941 }, { "epoch": 0.6214635824232522, "grad_norm": 0.92578125, "learning_rate": 6.0129784167184576e-05, "loss": 0.9866, "step": 3942 }, { "epoch": 0.6216212342706452, "grad_norm": 0.93359375, "learning_rate": 6.012556220875139e-05, "loss": 1.1052, "step": 3943 }, { "epoch": 0.6217788861180382, "grad_norm": 0.9375, "learning_rate": 6.0121340334829765e-05, "loss": 1.0257, "step": 3944 }, { "epoch": 0.6219365379654312, "grad_norm": 0.96875, "learning_rate": 6.0117118545428676e-05, "loss": 1.1729, "step": 3945 }, { "epoch": 0.6220941898128243, "grad_norm": 0.87109375, "learning_rate": 6.011289684055709e-05, "loss": 0.8727, "step": 3946 }, { "epoch": 0.6222518416602173, "grad_norm": 0.90234375, "learning_rate": 6.010867522022394e-05, "loss": 1.0254, "step": 3947 }, { "epoch": 0.6224094935076103, "grad_norm": 0.859375, "learning_rate": 6.0104453684438175e-05, "loss": 1.02, "step": 3948 }, { "epoch": 0.6225671453550033, "grad_norm": 0.92578125, "learning_rate": 6.010023223320874e-05, "loss": 0.966, "step": 3949 }, { "epoch": 0.6227247972023963, "grad_norm": 1.2109375, "learning_rate": 6.009601086654455e-05, "loss": 1.4213, "step": 3950 }, { "epoch": 0.6228824490497893, "grad_norm": 0.90234375, "learning_rate": 6.0091789584454613e-05, "loss": 0.9118, "step": 3951 }, { "epoch": 0.6230401008971823, "grad_norm": 0.96875, "learning_rate": 6.0087568386947855e-05, "loss": 0.9218, "step": 3952 }, { "epoch": 0.6231977527445753, "grad_norm": 1.1484375, "learning_rate": 6.008334727403322e-05, "loss": 1.0216, "step": 3953 }, { "epoch": 0.6233554045919684, "grad_norm": 0.921875, "learning_rate": 6.007912624571963e-05, "loss": 0.9363, "step": 3954 }, { "epoch": 0.6235130564393614, "grad_norm": 0.9375, "learning_rate": 6.0074905302016025e-05, "loss": 0.9949, "step": 3955 }, { "epoch": 0.6236707082867544, "grad_norm": 0.8203125, "learning_rate": 6.0070684442931414e-05, "loss": 0.7989, "step": 3956 }, { "epoch": 0.6238283601341474, "grad_norm": 0.85546875, "learning_rate": 6.0066463668474706e-05, "loss": 0.8889, "step": 3957 }, { "epoch": 0.6239860119815404, "grad_norm": 0.921875, "learning_rate": 6.006224297865484e-05, "loss": 1.094, "step": 3958 }, { "epoch": 0.6241436638289334, "grad_norm": 0.828125, "learning_rate": 6.0058022373480774e-05, "loss": 0.7891, "step": 3959 }, { "epoch": 0.6243013156763264, "grad_norm": 0.8359375, "learning_rate": 6.005380185296142e-05, "loss": 1.0674, "step": 3960 }, { "epoch": 0.6244589675237194, "grad_norm": 0.94921875, "learning_rate": 6.004958141710577e-05, "loss": 1.0569, "step": 3961 }, { "epoch": 0.6246166193711125, "grad_norm": 0.953125, "learning_rate": 6.004536106592276e-05, "loss": 0.8703, "step": 3962 }, { "epoch": 0.6247742712185055, "grad_norm": 0.90625, "learning_rate": 6.004114079942131e-05, "loss": 1.0172, "step": 3963 }, { "epoch": 0.6249319230658985, "grad_norm": 0.87890625, "learning_rate": 6.003692061761037e-05, "loss": 0.7993, "step": 3964 }, { "epoch": 0.6250895749132915, "grad_norm": 1.0625, "learning_rate": 6.003270052049887e-05, "loss": 0.958, "step": 3965 }, { "epoch": 0.6252472267606844, "grad_norm": 1.0546875, "learning_rate": 6.002848050809582e-05, "loss": 1.1425, "step": 3966 }, { "epoch": 0.6254048786080775, "grad_norm": 0.94921875, "learning_rate": 6.002426058041011e-05, "loss": 0.9984, "step": 3967 }, { "epoch": 0.6255625304554705, "grad_norm": 0.98828125, "learning_rate": 6.00200407374507e-05, "loss": 1.15, "step": 3968 }, { "epoch": 0.6257201823028635, "grad_norm": 1.0, "learning_rate": 6.001582097922653e-05, "loss": 1.0224, "step": 3969 }, { "epoch": 0.6258778341502566, "grad_norm": 1.0390625, "learning_rate": 6.00116013057465e-05, "loss": 1.1592, "step": 3970 }, { "epoch": 0.6260354859976496, "grad_norm": 0.87890625, "learning_rate": 6.000738171701963e-05, "loss": 0.9591, "step": 3971 }, { "epoch": 0.6261931378450426, "grad_norm": 0.94140625, "learning_rate": 6.000316221305484e-05, "loss": 1.1682, "step": 3972 }, { "epoch": 0.6263507896924356, "grad_norm": 0.80859375, "learning_rate": 5.9998942793861044e-05, "loss": 0.8825, "step": 3973 }, { "epoch": 0.6265084415398285, "grad_norm": 0.92578125, "learning_rate": 5.999472345944721e-05, "loss": 1.0027, "step": 3974 }, { "epoch": 0.6266660933872216, "grad_norm": 1.0, "learning_rate": 5.999050420982224e-05, "loss": 1.0554, "step": 3975 }, { "epoch": 0.6268237452346146, "grad_norm": 1.0234375, "learning_rate": 5.998628504499515e-05, "loss": 0.8671, "step": 3976 }, { "epoch": 0.6269813970820076, "grad_norm": 0.8359375, "learning_rate": 5.9982065964974845e-05, "loss": 0.9443, "step": 3977 }, { "epoch": 0.6271390489294006, "grad_norm": 0.9140625, "learning_rate": 5.997784696977024e-05, "loss": 1.0919, "step": 3978 }, { "epoch": 0.6272967007767937, "grad_norm": 0.9921875, "learning_rate": 5.997362805939033e-05, "loss": 1.088, "step": 3979 }, { "epoch": 0.6274543526241867, "grad_norm": 0.90234375, "learning_rate": 5.9969409233843984e-05, "loss": 0.8665, "step": 3980 }, { "epoch": 0.6276120044715797, "grad_norm": 0.94140625, "learning_rate": 5.996519049314022e-05, "loss": 1.0393, "step": 3981 }, { "epoch": 0.6277696563189726, "grad_norm": 0.9375, "learning_rate": 5.9960971837287926e-05, "loss": 0.9798, "step": 3982 }, { "epoch": 0.6279273081663657, "grad_norm": 1.0390625, "learning_rate": 5.9956753266296086e-05, "loss": 1.0209, "step": 3983 }, { "epoch": 0.6280849600137587, "grad_norm": 0.95703125, "learning_rate": 5.9952534780173606e-05, "loss": 0.9548, "step": 3984 }, { "epoch": 0.6282426118611517, "grad_norm": 0.91015625, "learning_rate": 5.994831637892944e-05, "loss": 0.8393, "step": 3985 }, { "epoch": 0.6284002637085447, "grad_norm": 0.9453125, "learning_rate": 5.994409806257253e-05, "loss": 1.1392, "step": 3986 }, { "epoch": 0.6285579155559378, "grad_norm": 0.90234375, "learning_rate": 5.9939879831111826e-05, "loss": 0.9246, "step": 3987 }, { "epoch": 0.6287155674033308, "grad_norm": 1.109375, "learning_rate": 5.993566168455624e-05, "loss": 1.0332, "step": 3988 }, { "epoch": 0.6288732192507238, "grad_norm": 0.9765625, "learning_rate": 5.993144362291474e-05, "loss": 0.9965, "step": 3989 }, { "epoch": 0.6290308710981167, "grad_norm": 0.890625, "learning_rate": 5.992722564619622e-05, "loss": 0.9224, "step": 3990 }, { "epoch": 0.6291885229455098, "grad_norm": 1.0078125, "learning_rate": 5.9923007754409686e-05, "loss": 1.0478, "step": 3991 }, { "epoch": 0.6293461747929028, "grad_norm": 1.0078125, "learning_rate": 5.991878994756406e-05, "loss": 0.9672, "step": 3992 }, { "epoch": 0.6295038266402958, "grad_norm": 1.03125, "learning_rate": 5.991457222566825e-05, "loss": 0.8852, "step": 3993 }, { "epoch": 0.6296614784876888, "grad_norm": 1.046875, "learning_rate": 5.991035458873122e-05, "loss": 1.0203, "step": 3994 }, { "epoch": 0.6298191303350819, "grad_norm": 0.90234375, "learning_rate": 5.990613703676187e-05, "loss": 0.9439, "step": 3995 }, { "epoch": 0.6299767821824749, "grad_norm": 0.90234375, "learning_rate": 5.99019195697692e-05, "loss": 0.8503, "step": 3996 }, { "epoch": 0.6301344340298679, "grad_norm": 0.96484375, "learning_rate": 5.9897702187762125e-05, "loss": 1.176, "step": 3997 }, { "epoch": 0.6302920858772608, "grad_norm": 1.0625, "learning_rate": 5.9893484890749586e-05, "loss": 0.9993, "step": 3998 }, { "epoch": 0.6304497377246538, "grad_norm": 1.0234375, "learning_rate": 5.98892676787405e-05, "loss": 0.9875, "step": 3999 }, { "epoch": 0.6306073895720469, "grad_norm": 0.9375, "learning_rate": 5.98850505517438e-05, "loss": 1.0499, "step": 4000 }, { "epoch": 0.6306073895720469, "eval_loss": 0.9967675805091858, "eval_runtime": 308.9704, "eval_samples_per_second": 32.366, "eval_steps_per_second": 0.676, "step": 4000 }, { "epoch": 0.6307650414194399, "grad_norm": 0.9609375, "learning_rate": 5.988083350976845e-05, "loss": 1.0275, "step": 4001 }, { "epoch": 0.6309226932668329, "grad_norm": 0.92578125, "learning_rate": 5.98766165528234e-05, "loss": 1.063, "step": 4002 }, { "epoch": 0.631080345114226, "grad_norm": 0.90234375, "learning_rate": 5.987239968091756e-05, "loss": 0.9901, "step": 4003 }, { "epoch": 0.631237996961619, "grad_norm": 0.98046875, "learning_rate": 5.9868182894059886e-05, "loss": 0.9539, "step": 4004 }, { "epoch": 0.631395648809012, "grad_norm": 0.90234375, "learning_rate": 5.986396619225927e-05, "loss": 1.0427, "step": 4005 }, { "epoch": 0.6315533006564049, "grad_norm": 0.98828125, "learning_rate": 5.9859749575524714e-05, "loss": 1.0904, "step": 4006 }, { "epoch": 0.6317109525037979, "grad_norm": 0.94140625, "learning_rate": 5.985553304386512e-05, "loss": 0.8973, "step": 4007 }, { "epoch": 0.631868604351191, "grad_norm": 0.859375, "learning_rate": 5.985131659728943e-05, "loss": 0.8621, "step": 4008 }, { "epoch": 0.632026256198584, "grad_norm": 1.0078125, "learning_rate": 5.984710023580658e-05, "loss": 1.037, "step": 4009 }, { "epoch": 0.632183908045977, "grad_norm": 1.046875, "learning_rate": 5.984288395942547e-05, "loss": 1.1111, "step": 4010 }, { "epoch": 0.63234155989337, "grad_norm": 0.9921875, "learning_rate": 5.983866776815511e-05, "loss": 1.0566, "step": 4011 }, { "epoch": 0.6324992117407631, "grad_norm": 0.98046875, "learning_rate": 5.9834451662004384e-05, "loss": 1.0283, "step": 4012 }, { "epoch": 0.6326568635881561, "grad_norm": 0.83203125, "learning_rate": 5.983023564098227e-05, "loss": 0.9869, "step": 4013 }, { "epoch": 0.632814515435549, "grad_norm": 0.84765625, "learning_rate": 5.982601970509766e-05, "loss": 0.8327, "step": 4014 }, { "epoch": 0.632972167282942, "grad_norm": 0.97265625, "learning_rate": 5.982180385435946e-05, "loss": 1.019, "step": 4015 }, { "epoch": 0.6331298191303351, "grad_norm": 0.921875, "learning_rate": 5.98175880887767e-05, "loss": 0.9727, "step": 4016 }, { "epoch": 0.6332874709777281, "grad_norm": 0.95703125, "learning_rate": 5.981337240835826e-05, "loss": 1.0572, "step": 4017 }, { "epoch": 0.6334451228251211, "grad_norm": 0.87890625, "learning_rate": 5.9809156813113075e-05, "loss": 1.0114, "step": 4018 }, { "epoch": 0.6336027746725141, "grad_norm": 1.0078125, "learning_rate": 5.98049413030501e-05, "loss": 0.9712, "step": 4019 }, { "epoch": 0.6337604265199072, "grad_norm": 0.9453125, "learning_rate": 5.98007258781782e-05, "loss": 1.0276, "step": 4020 }, { "epoch": 0.6339180783673002, "grad_norm": 1.3515625, "learning_rate": 5.979651053850641e-05, "loss": 1.2735, "step": 4021 }, { "epoch": 0.6340757302146931, "grad_norm": 0.93359375, "learning_rate": 5.979229528404362e-05, "loss": 1.1118, "step": 4022 }, { "epoch": 0.6342333820620861, "grad_norm": 0.94140625, "learning_rate": 5.9788080114798765e-05, "loss": 1.0267, "step": 4023 }, { "epoch": 0.6343910339094792, "grad_norm": 0.96484375, "learning_rate": 5.9783865030780764e-05, "loss": 1.0634, "step": 4024 }, { "epoch": 0.6345486857568722, "grad_norm": 0.87890625, "learning_rate": 5.977965003199855e-05, "loss": 1.0741, "step": 4025 }, { "epoch": 0.6347063376042652, "grad_norm": 0.85546875, "learning_rate": 5.977543511846109e-05, "loss": 1.0265, "step": 4026 }, { "epoch": 0.6348639894516582, "grad_norm": 1.203125, "learning_rate": 5.977122029017729e-05, "loss": 1.4199, "step": 4027 }, { "epoch": 0.6350216412990513, "grad_norm": 0.82421875, "learning_rate": 5.976700554715608e-05, "loss": 0.8776, "step": 4028 }, { "epoch": 0.6351792931464443, "grad_norm": 0.84765625, "learning_rate": 5.976279088940643e-05, "loss": 0.9833, "step": 4029 }, { "epoch": 0.6353369449938372, "grad_norm": 1.0078125, "learning_rate": 5.9758576316937186e-05, "loss": 0.9782, "step": 4030 }, { "epoch": 0.6354945968412302, "grad_norm": 0.88671875, "learning_rate": 5.975436182975739e-05, "loss": 0.9362, "step": 4031 }, { "epoch": 0.6356522486886232, "grad_norm": 1.015625, "learning_rate": 5.975014742787591e-05, "loss": 1.1218, "step": 4032 }, { "epoch": 0.6358099005360163, "grad_norm": 1.078125, "learning_rate": 5.97459331113017e-05, "loss": 1.0512, "step": 4033 }, { "epoch": 0.6359675523834093, "grad_norm": 0.95703125, "learning_rate": 5.9741718880043685e-05, "loss": 1.0428, "step": 4034 }, { "epoch": 0.6361252042308023, "grad_norm": 0.9296875, "learning_rate": 5.973750473411075e-05, "loss": 0.87, "step": 4035 }, { "epoch": 0.6362828560781953, "grad_norm": 0.8359375, "learning_rate": 5.9733290673511925e-05, "loss": 0.9724, "step": 4036 }, { "epoch": 0.6364405079255884, "grad_norm": 0.9609375, "learning_rate": 5.972907669825607e-05, "loss": 0.8214, "step": 4037 }, { "epoch": 0.6365981597729813, "grad_norm": 0.96875, "learning_rate": 5.972486280835214e-05, "loss": 1.0852, "step": 4038 }, { "epoch": 0.6367558116203743, "grad_norm": 1.2890625, "learning_rate": 5.972064900380907e-05, "loss": 1.1032, "step": 4039 }, { "epoch": 0.6369134634677673, "grad_norm": 0.7734375, "learning_rate": 5.971643528463575e-05, "loss": 0.8922, "step": 4040 }, { "epoch": 0.6370711153151604, "grad_norm": 0.98046875, "learning_rate": 5.971222165084116e-05, "loss": 0.7775, "step": 4041 }, { "epoch": 0.6372287671625534, "grad_norm": 0.96484375, "learning_rate": 5.970800810243423e-05, "loss": 0.9469, "step": 4042 }, { "epoch": 0.6373864190099464, "grad_norm": 0.984375, "learning_rate": 5.970379463942387e-05, "loss": 1.0401, "step": 4043 }, { "epoch": 0.6375440708573394, "grad_norm": 0.9375, "learning_rate": 5.969958126181902e-05, "loss": 1.0152, "step": 4044 }, { "epoch": 0.6377017227047325, "grad_norm": 0.921875, "learning_rate": 5.969536796962858e-05, "loss": 0.9256, "step": 4045 }, { "epoch": 0.6378593745521254, "grad_norm": 0.9296875, "learning_rate": 5.969115476286151e-05, "loss": 1.1293, "step": 4046 }, { "epoch": 0.6380170263995184, "grad_norm": 0.98046875, "learning_rate": 5.968694164152675e-05, "loss": 1.2149, "step": 4047 }, { "epoch": 0.6381746782469114, "grad_norm": 1.171875, "learning_rate": 5.96827286056332e-05, "loss": 1.0217, "step": 4048 }, { "epoch": 0.6383323300943045, "grad_norm": 0.93359375, "learning_rate": 5.9678515655189806e-05, "loss": 1.0468, "step": 4049 }, { "epoch": 0.6384899819416975, "grad_norm": 0.93359375, "learning_rate": 5.967430279020547e-05, "loss": 0.9772, "step": 4050 }, { "epoch": 0.6386476337890905, "grad_norm": 0.88671875, "learning_rate": 5.967009001068917e-05, "loss": 0.9117, "step": 4051 }, { "epoch": 0.6388052856364835, "grad_norm": 0.90234375, "learning_rate": 5.966587731664981e-05, "loss": 1.0482, "step": 4052 }, { "epoch": 0.6389629374838766, "grad_norm": 0.9453125, "learning_rate": 5.966166470809631e-05, "loss": 0.9544, "step": 4053 }, { "epoch": 0.6391205893312695, "grad_norm": 0.96484375, "learning_rate": 5.965745218503762e-05, "loss": 1.1745, "step": 4054 }, { "epoch": 0.6392782411786625, "grad_norm": 0.921875, "learning_rate": 5.96532397474826e-05, "loss": 0.8989, "step": 4055 }, { "epoch": 0.6394358930260555, "grad_norm": 1.0234375, "learning_rate": 5.964902739544029e-05, "loss": 0.9273, "step": 4056 }, { "epoch": 0.6395935448734485, "grad_norm": 0.87890625, "learning_rate": 5.964481512891955e-05, "loss": 1.074, "step": 4057 }, { "epoch": 0.6397511967208416, "grad_norm": 0.93359375, "learning_rate": 5.964060294792933e-05, "loss": 1.0031, "step": 4058 }, { "epoch": 0.6399088485682346, "grad_norm": 1.0703125, "learning_rate": 5.963639085247852e-05, "loss": 0.898, "step": 4059 }, { "epoch": 0.6400665004156276, "grad_norm": 1.0546875, "learning_rate": 5.963217884257606e-05, "loss": 1.1751, "step": 4060 }, { "epoch": 0.6402241522630207, "grad_norm": 0.97265625, "learning_rate": 5.962796691823092e-05, "loss": 1.065, "step": 4061 }, { "epoch": 0.6403818041104136, "grad_norm": 1.0, "learning_rate": 5.962375507945198e-05, "loss": 1.0746, "step": 4062 }, { "epoch": 0.6405394559578066, "grad_norm": 0.8359375, "learning_rate": 5.9619543326248196e-05, "loss": 1.0416, "step": 4063 }, { "epoch": 0.6406971078051996, "grad_norm": 0.90234375, "learning_rate": 5.961533165862848e-05, "loss": 0.9557, "step": 4064 }, { "epoch": 0.6408547596525926, "grad_norm": 0.8046875, "learning_rate": 5.961112007660177e-05, "loss": 0.8417, "step": 4065 }, { "epoch": 0.6410124114999857, "grad_norm": 0.921875, "learning_rate": 5.960690858017695e-05, "loss": 1.0058, "step": 4066 }, { "epoch": 0.6411700633473787, "grad_norm": 1.0, "learning_rate": 5.9602697169363e-05, "loss": 1.0221, "step": 4067 }, { "epoch": 0.6413277151947717, "grad_norm": 1.03125, "learning_rate": 5.9598485844168805e-05, "loss": 0.9815, "step": 4068 }, { "epoch": 0.6414853670421647, "grad_norm": 0.94140625, "learning_rate": 5.959427460460334e-05, "loss": 0.808, "step": 4069 }, { "epoch": 0.6416430188895578, "grad_norm": 0.9765625, "learning_rate": 5.959006345067548e-05, "loss": 1.0907, "step": 4070 }, { "epoch": 0.6418006707369507, "grad_norm": 0.984375, "learning_rate": 5.958585238239418e-05, "loss": 1.153, "step": 4071 }, { "epoch": 0.6419583225843437, "grad_norm": 0.828125, "learning_rate": 5.958164139976835e-05, "loss": 0.9838, "step": 4072 }, { "epoch": 0.6421159744317367, "grad_norm": 0.98828125, "learning_rate": 5.957743050280693e-05, "loss": 1.2096, "step": 4073 }, { "epoch": 0.6422736262791298, "grad_norm": 0.91796875, "learning_rate": 5.9573219691518836e-05, "loss": 1.0842, "step": 4074 }, { "epoch": 0.6424312781265228, "grad_norm": 0.94921875, "learning_rate": 5.9569008965913e-05, "loss": 1.0536, "step": 4075 }, { "epoch": 0.6425889299739158, "grad_norm": 1.0078125, "learning_rate": 5.9564798325998286e-05, "loss": 0.8884, "step": 4076 }, { "epoch": 0.6427465818213088, "grad_norm": 0.80078125, "learning_rate": 5.956058777178372e-05, "loss": 0.8034, "step": 4077 }, { "epoch": 0.6429042336687019, "grad_norm": 0.89453125, "learning_rate": 5.955637730327816e-05, "loss": 1.0051, "step": 4078 }, { "epoch": 0.6430618855160948, "grad_norm": 0.95703125, "learning_rate": 5.955216692049057e-05, "loss": 0.9429, "step": 4079 }, { "epoch": 0.6432195373634878, "grad_norm": 0.90625, "learning_rate": 5.954795662342985e-05, "loss": 1.0221, "step": 4080 }, { "epoch": 0.6433771892108808, "grad_norm": 0.96875, "learning_rate": 5.954374641210486e-05, "loss": 0.9418, "step": 4081 }, { "epoch": 0.6435348410582739, "grad_norm": 0.87109375, "learning_rate": 5.953953628652464e-05, "loss": 0.951, "step": 4082 }, { "epoch": 0.6436924929056669, "grad_norm": 0.94921875, "learning_rate": 5.9535326246698065e-05, "loss": 1.1242, "step": 4083 }, { "epoch": 0.6438501447530599, "grad_norm": 0.93359375, "learning_rate": 5.953111629263405e-05, "loss": 1.083, "step": 4084 }, { "epoch": 0.6440077966004529, "grad_norm": 0.90625, "learning_rate": 5.952690642434151e-05, "loss": 0.9453, "step": 4085 }, { "epoch": 0.644165448447846, "grad_norm": 0.859375, "learning_rate": 5.9522696641829344e-05, "loss": 0.8229, "step": 4086 }, { "epoch": 0.6443231002952389, "grad_norm": 1.0, "learning_rate": 5.951848694510655e-05, "loss": 1.0224, "step": 4087 }, { "epoch": 0.6444807521426319, "grad_norm": 0.98046875, "learning_rate": 5.951427733418202e-05, "loss": 1.2275, "step": 4088 }, { "epoch": 0.6446384039900249, "grad_norm": 0.953125, "learning_rate": 5.951006780906466e-05, "loss": 1.051, "step": 4089 }, { "epoch": 0.644796055837418, "grad_norm": 1.109375, "learning_rate": 5.950585836976339e-05, "loss": 0.8892, "step": 4090 }, { "epoch": 0.644953707684811, "grad_norm": 0.9453125, "learning_rate": 5.950164901628713e-05, "loss": 0.9013, "step": 4091 }, { "epoch": 0.645111359532204, "grad_norm": 0.91796875, "learning_rate": 5.949743974864482e-05, "loss": 0.9337, "step": 4092 }, { "epoch": 0.645269011379597, "grad_norm": 0.98046875, "learning_rate": 5.949323056684537e-05, "loss": 1.0974, "step": 4093 }, { "epoch": 0.64542666322699, "grad_norm": 0.890625, "learning_rate": 5.948902147089772e-05, "loss": 0.8279, "step": 4094 }, { "epoch": 0.645584315074383, "grad_norm": 0.9296875, "learning_rate": 5.9484812460810754e-05, "loss": 0.8438, "step": 4095 }, { "epoch": 0.645741966921776, "grad_norm": 0.9609375, "learning_rate": 5.948060353659338e-05, "loss": 1.1735, "step": 4096 }, { "epoch": 0.645899618769169, "grad_norm": 0.9921875, "learning_rate": 5.947639469825459e-05, "loss": 1.1641, "step": 4097 }, { "epoch": 0.646057270616562, "grad_norm": 0.984375, "learning_rate": 5.947218594580326e-05, "loss": 0.9519, "step": 4098 }, { "epoch": 0.6462149224639551, "grad_norm": 0.92578125, "learning_rate": 5.9467977279248336e-05, "loss": 0.9406, "step": 4099 }, { "epoch": 0.6463725743113481, "grad_norm": 1.03125, "learning_rate": 5.946376869859869e-05, "loss": 1.1576, "step": 4100 }, { "epoch": 0.6465302261587411, "grad_norm": 0.859375, "learning_rate": 5.945956020386324e-05, "loss": 0.8623, "step": 4101 }, { "epoch": 0.6466878780061341, "grad_norm": 0.96875, "learning_rate": 5.9455351795050975e-05, "loss": 1.0275, "step": 4102 }, { "epoch": 0.6468455298535271, "grad_norm": 0.8984375, "learning_rate": 5.945114347217078e-05, "loss": 0.923, "step": 4103 }, { "epoch": 0.6470031817009201, "grad_norm": 1.0078125, "learning_rate": 5.944693523523157e-05, "loss": 1.1688, "step": 4104 }, { "epoch": 0.6471608335483131, "grad_norm": 0.859375, "learning_rate": 5.944272708424226e-05, "loss": 0.9876, "step": 4105 }, { "epoch": 0.6473184853957061, "grad_norm": 0.99609375, "learning_rate": 5.943851901921171e-05, "loss": 1.0504, "step": 4106 }, { "epoch": 0.6474761372430992, "grad_norm": 0.92578125, "learning_rate": 5.943431104014897e-05, "loss": 1.22, "step": 4107 }, { "epoch": 0.6476337890904922, "grad_norm": 0.9375, "learning_rate": 5.943010314706288e-05, "loss": 1.0781, "step": 4108 }, { "epoch": 0.6477914409378852, "grad_norm": 0.9765625, "learning_rate": 5.942589533996237e-05, "loss": 1.1588, "step": 4109 }, { "epoch": 0.6479490927852782, "grad_norm": 0.9140625, "learning_rate": 5.942168761885635e-05, "loss": 0.7994, "step": 4110 }, { "epoch": 0.6481067446326712, "grad_norm": 0.98828125, "learning_rate": 5.941747998375372e-05, "loss": 0.9258, "step": 4111 }, { "epoch": 0.6482643964800642, "grad_norm": 0.9921875, "learning_rate": 5.941327243466345e-05, "loss": 0.975, "step": 4112 }, { "epoch": 0.6484220483274572, "grad_norm": 1.71875, "learning_rate": 5.940906497159442e-05, "loss": 1.055, "step": 4113 }, { "epoch": 0.6485797001748502, "grad_norm": 0.94921875, "learning_rate": 5.940485759455556e-05, "loss": 0.8806, "step": 4114 }, { "epoch": 0.6487373520222433, "grad_norm": 0.96484375, "learning_rate": 5.94006503035558e-05, "loss": 1.0791, "step": 4115 }, { "epoch": 0.6488950038696363, "grad_norm": 0.97265625, "learning_rate": 5.939644309860398e-05, "loss": 0.8534, "step": 4116 }, { "epoch": 0.6490526557170293, "grad_norm": 0.9140625, "learning_rate": 5.939223597970913e-05, "loss": 0.8935, "step": 4117 }, { "epoch": 0.6492103075644223, "grad_norm": 0.91796875, "learning_rate": 5.938802894688011e-05, "loss": 0.9772, "step": 4118 }, { "epoch": 0.6493679594118152, "grad_norm": 0.9765625, "learning_rate": 5.938382200012584e-05, "loss": 1.1068, "step": 4119 }, { "epoch": 0.6495256112592083, "grad_norm": 0.9609375, "learning_rate": 5.9379615139455244e-05, "loss": 0.975, "step": 4120 }, { "epoch": 0.6496832631066013, "grad_norm": 0.984375, "learning_rate": 5.9375408364877185e-05, "loss": 1.0221, "step": 4121 }, { "epoch": 0.6498409149539943, "grad_norm": 0.8828125, "learning_rate": 5.937120167640067e-05, "loss": 0.8946, "step": 4122 }, { "epoch": 0.6499985668013873, "grad_norm": 0.8828125, "learning_rate": 5.936699507403458e-05, "loss": 0.9541, "step": 4123 }, { "epoch": 0.6501562186487804, "grad_norm": 0.890625, "learning_rate": 5.9362788557787805e-05, "loss": 0.9226, "step": 4124 }, { "epoch": 0.6503138704961734, "grad_norm": 0.97265625, "learning_rate": 5.935858212766928e-05, "loss": 0.9563, "step": 4125 }, { "epoch": 0.6504715223435664, "grad_norm": 0.87890625, "learning_rate": 5.935437578368787e-05, "loss": 0.8202, "step": 4126 }, { "epoch": 0.6506291741909593, "grad_norm": 1.0078125, "learning_rate": 5.93501695258526e-05, "loss": 1.003, "step": 4127 }, { "epoch": 0.6507868260383524, "grad_norm": 0.875, "learning_rate": 5.934596335417231e-05, "loss": 0.9557, "step": 4128 }, { "epoch": 0.6509444778857454, "grad_norm": 0.8984375, "learning_rate": 5.934175726865593e-05, "loss": 0.9531, "step": 4129 }, { "epoch": 0.6511021297331384, "grad_norm": 0.83203125, "learning_rate": 5.9337551269312376e-05, "loss": 1.057, "step": 4130 }, { "epoch": 0.6512597815805314, "grad_norm": 0.8984375, "learning_rate": 5.933334535615051e-05, "loss": 1.0511, "step": 4131 }, { "epoch": 0.6514174334279245, "grad_norm": 0.99609375, "learning_rate": 5.9329139529179356e-05, "loss": 1.1209, "step": 4132 }, { "epoch": 0.6515750852753175, "grad_norm": 0.87890625, "learning_rate": 5.932493378840773e-05, "loss": 1.023, "step": 4133 }, { "epoch": 0.6517327371227105, "grad_norm": 0.91015625, "learning_rate": 5.932072813384461e-05, "loss": 1.0014, "step": 4134 }, { "epoch": 0.6518903889701034, "grad_norm": 0.890625, "learning_rate": 5.931652256549885e-05, "loss": 0.9755, "step": 4135 }, { "epoch": 0.6520480408174965, "grad_norm": 0.9375, "learning_rate": 5.931231708337939e-05, "loss": 0.928, "step": 4136 }, { "epoch": 0.6522056926648895, "grad_norm": 1.0234375, "learning_rate": 5.930811168749517e-05, "loss": 1.1616, "step": 4137 }, { "epoch": 0.6523633445122825, "grad_norm": 0.91796875, "learning_rate": 5.930390637785508e-05, "loss": 1.1084, "step": 4138 }, { "epoch": 0.6525209963596755, "grad_norm": 0.8828125, "learning_rate": 5.929970115446803e-05, "loss": 0.9586, "step": 4139 }, { "epoch": 0.6526786482070686, "grad_norm": 1.1015625, "learning_rate": 5.9295496017342956e-05, "loss": 1.2771, "step": 4140 }, { "epoch": 0.6528363000544616, "grad_norm": 0.9765625, "learning_rate": 5.92912909664887e-05, "loss": 1.1735, "step": 4141 }, { "epoch": 0.6529939519018546, "grad_norm": 0.90625, "learning_rate": 5.928708600191426e-05, "loss": 0.8799, "step": 4142 }, { "epoch": 0.6531516037492475, "grad_norm": 1.0625, "learning_rate": 5.9282881123628534e-05, "loss": 1.056, "step": 4143 }, { "epoch": 0.6533092555966405, "grad_norm": 0.9296875, "learning_rate": 5.9278676331640395e-05, "loss": 0.9063, "step": 4144 }, { "epoch": 0.6534669074440336, "grad_norm": 0.87109375, "learning_rate": 5.927447162595878e-05, "loss": 1.0187, "step": 4145 }, { "epoch": 0.6536245592914266, "grad_norm": 1.0, "learning_rate": 5.927026700659255e-05, "loss": 0.9661, "step": 4146 }, { "epoch": 0.6537822111388196, "grad_norm": 0.8203125, "learning_rate": 5.926606247355071e-05, "loss": 0.9684, "step": 4147 }, { "epoch": 0.6539398629862126, "grad_norm": 0.9921875, "learning_rate": 5.926185802684212e-05, "loss": 1.0104, "step": 4148 }, { "epoch": 0.6540975148336057, "grad_norm": 0.99609375, "learning_rate": 5.925765366647569e-05, "loss": 1.0866, "step": 4149 }, { "epoch": 0.6542551666809987, "grad_norm": 0.86328125, "learning_rate": 5.9253449392460334e-05, "loss": 0.867, "step": 4150 }, { "epoch": 0.6544128185283916, "grad_norm": 0.9296875, "learning_rate": 5.924924520480495e-05, "loss": 0.8596, "step": 4151 }, { "epoch": 0.6545704703757846, "grad_norm": 0.8671875, "learning_rate": 5.924504110351847e-05, "loss": 0.8642, "step": 4152 }, { "epoch": 0.6547281222231777, "grad_norm": 0.94140625, "learning_rate": 5.924083708860979e-05, "loss": 0.9368, "step": 4153 }, { "epoch": 0.6548857740705707, "grad_norm": 1.0859375, "learning_rate": 5.923663316008783e-05, "loss": 1.1633, "step": 4154 }, { "epoch": 0.6550434259179637, "grad_norm": 0.9453125, "learning_rate": 5.9232429317961516e-05, "loss": 0.9412, "step": 4155 }, { "epoch": 0.6552010777653567, "grad_norm": 0.92578125, "learning_rate": 5.9228225562239706e-05, "loss": 0.9711, "step": 4156 }, { "epoch": 0.6553587296127498, "grad_norm": 1.0859375, "learning_rate": 5.922402189293136e-05, "loss": 1.1707, "step": 4157 }, { "epoch": 0.6555163814601428, "grad_norm": 0.8359375, "learning_rate": 5.921981831004537e-05, "loss": 0.8617, "step": 4158 }, { "epoch": 0.6556740333075357, "grad_norm": 0.875, "learning_rate": 5.921561481359064e-05, "loss": 1.2367, "step": 4159 }, { "epoch": 0.6558316851549287, "grad_norm": 0.9453125, "learning_rate": 5.921141140357609e-05, "loss": 0.9554, "step": 4160 }, { "epoch": 0.6559893370023218, "grad_norm": 1.015625, "learning_rate": 5.920720808001059e-05, "loss": 0.8776, "step": 4161 }, { "epoch": 0.6561469888497148, "grad_norm": 0.93359375, "learning_rate": 5.920300484290311e-05, "loss": 0.835, "step": 4162 }, { "epoch": 0.6563046406971078, "grad_norm": 1.0234375, "learning_rate": 5.919880169226253e-05, "loss": 1.2206, "step": 4163 }, { "epoch": 0.6564622925445008, "grad_norm": 0.91796875, "learning_rate": 5.919459862809775e-05, "loss": 0.9594, "step": 4164 }, { "epoch": 0.6566199443918939, "grad_norm": 0.9453125, "learning_rate": 5.9190395650417696e-05, "loss": 0.9661, "step": 4165 }, { "epoch": 0.6567775962392869, "grad_norm": 0.94140625, "learning_rate": 5.918619275923123e-05, "loss": 1.111, "step": 4166 }, { "epoch": 0.6569352480866798, "grad_norm": 0.91796875, "learning_rate": 5.9181989954547334e-05, "loss": 0.9967, "step": 4167 }, { "epoch": 0.6570928999340728, "grad_norm": 1.0390625, "learning_rate": 5.917778723637486e-05, "loss": 1.1511, "step": 4168 }, { "epoch": 0.6572505517814659, "grad_norm": 0.94140625, "learning_rate": 5.9173584604722755e-05, "loss": 0.8744, "step": 4169 }, { "epoch": 0.6574082036288589, "grad_norm": 0.9453125, "learning_rate": 5.9169382059599895e-05, "loss": 0.9771, "step": 4170 }, { "epoch": 0.6575658554762519, "grad_norm": 1.140625, "learning_rate": 5.916517960101517e-05, "loss": 1.1534, "step": 4171 }, { "epoch": 0.6577235073236449, "grad_norm": 0.92578125, "learning_rate": 5.9160977228977524e-05, "loss": 1.0715, "step": 4172 }, { "epoch": 0.657881159171038, "grad_norm": 1.046875, "learning_rate": 5.915677494349586e-05, "loss": 1.0406, "step": 4173 }, { "epoch": 0.658038811018431, "grad_norm": 1.03125, "learning_rate": 5.9152572744579085e-05, "loss": 0.971, "step": 4174 }, { "epoch": 0.6581964628658239, "grad_norm": 1.1015625, "learning_rate": 5.914837063223611e-05, "loss": 1.065, "step": 4175 }, { "epoch": 0.6583541147132169, "grad_norm": 0.96484375, "learning_rate": 5.914416860647578e-05, "loss": 1.0131, "step": 4176 }, { "epoch": 0.65851176656061, "grad_norm": 0.8671875, "learning_rate": 5.913996666730709e-05, "loss": 0.8314, "step": 4177 }, { "epoch": 0.658669418408003, "grad_norm": 0.98046875, "learning_rate": 5.91357648147389e-05, "loss": 1.1401, "step": 4178 }, { "epoch": 0.658827070255396, "grad_norm": 0.8828125, "learning_rate": 5.913156304878011e-05, "loss": 0.8032, "step": 4179 }, { "epoch": 0.658984722102789, "grad_norm": 0.94921875, "learning_rate": 5.912736136943966e-05, "loss": 0.9449, "step": 4180 }, { "epoch": 0.659142373950182, "grad_norm": 0.90234375, "learning_rate": 5.912315977672638e-05, "loss": 0.8813, "step": 4181 }, { "epoch": 0.6593000257975751, "grad_norm": 1.0, "learning_rate": 5.911895827064926e-05, "loss": 0.9157, "step": 4182 }, { "epoch": 0.659457677644968, "grad_norm": 0.921875, "learning_rate": 5.9114756851217166e-05, "loss": 1.0107, "step": 4183 }, { "epoch": 0.659615329492361, "grad_norm": 0.875, "learning_rate": 5.9110555518439024e-05, "loss": 0.7929, "step": 4184 }, { "epoch": 0.659772981339754, "grad_norm": 1.0078125, "learning_rate": 5.9106354272323716e-05, "loss": 1.0335, "step": 4185 }, { "epoch": 0.6599306331871471, "grad_norm": 1.1328125, "learning_rate": 5.910215311288012e-05, "loss": 1.3282, "step": 4186 }, { "epoch": 0.6600882850345401, "grad_norm": 0.81640625, "learning_rate": 5.90979520401172e-05, "loss": 0.7694, "step": 4187 }, { "epoch": 0.6602459368819331, "grad_norm": 1.203125, "learning_rate": 5.9093751054043845e-05, "loss": 1.2487, "step": 4188 }, { "epoch": 0.6604035887293261, "grad_norm": 0.984375, "learning_rate": 5.9089550154668935e-05, "loss": 1.0161, "step": 4189 }, { "epoch": 0.6605612405767192, "grad_norm": 0.91015625, "learning_rate": 5.9085349342001386e-05, "loss": 0.9972, "step": 4190 }, { "epoch": 0.6607188924241121, "grad_norm": 1.328125, "learning_rate": 5.9081148616050075e-05, "loss": 0.9767, "step": 4191 }, { "epoch": 0.6608765442715051, "grad_norm": 0.9609375, "learning_rate": 5.907694797682395e-05, "loss": 1.094, "step": 4192 }, { "epoch": 0.6610341961188981, "grad_norm": 0.91015625, "learning_rate": 5.907274742433191e-05, "loss": 0.9247, "step": 4193 }, { "epoch": 0.6611918479662912, "grad_norm": 0.890625, "learning_rate": 5.906854695858284e-05, "loss": 0.9399, "step": 4194 }, { "epoch": 0.6613494998136842, "grad_norm": 0.9921875, "learning_rate": 5.906434657958564e-05, "loss": 1.0467, "step": 4195 }, { "epoch": 0.6615071516610772, "grad_norm": 0.89453125, "learning_rate": 5.906014628734919e-05, "loss": 1.1172, "step": 4196 }, { "epoch": 0.6616648035084702, "grad_norm": 0.984375, "learning_rate": 5.905594608188246e-05, "loss": 0.9914, "step": 4197 }, { "epoch": 0.6618224553558633, "grad_norm": 1.046875, "learning_rate": 5.9051745963194306e-05, "loss": 0.8661, "step": 4198 }, { "epoch": 0.6619801072032562, "grad_norm": 1.9609375, "learning_rate": 5.9047545931293626e-05, "loss": 0.9653, "step": 4199 }, { "epoch": 0.6621377590506492, "grad_norm": 1.0, "learning_rate": 5.904334598618935e-05, "loss": 0.9585, "step": 4200 }, { "epoch": 0.6622954108980422, "grad_norm": 0.81640625, "learning_rate": 5.90391461278903e-05, "loss": 0.8253, "step": 4201 }, { "epoch": 0.6624530627454353, "grad_norm": 0.96484375, "learning_rate": 5.9034946356405496e-05, "loss": 0.9384, "step": 4202 }, { "epoch": 0.6626107145928283, "grad_norm": 0.9609375, "learning_rate": 5.903074667174378e-05, "loss": 0.9379, "step": 4203 }, { "epoch": 0.6627683664402213, "grad_norm": 0.9765625, "learning_rate": 5.902654707391404e-05, "loss": 1.0177, "step": 4204 }, { "epoch": 0.6629260182876143, "grad_norm": 0.92578125, "learning_rate": 5.9022347562925196e-05, "loss": 1.0832, "step": 4205 }, { "epoch": 0.6630836701350074, "grad_norm": 1.0390625, "learning_rate": 5.9018148138786146e-05, "loss": 1.0953, "step": 4206 }, { "epoch": 0.6632413219824003, "grad_norm": 0.9140625, "learning_rate": 5.9013948801505745e-05, "loss": 1.0732, "step": 4207 }, { "epoch": 0.6633989738297933, "grad_norm": 0.93359375, "learning_rate": 5.9009749551093e-05, "loss": 1.1342, "step": 4208 }, { "epoch": 0.6635566256771863, "grad_norm": 0.98046875, "learning_rate": 5.9005550387556726e-05, "loss": 1.0695, "step": 4209 }, { "epoch": 0.6637142775245793, "grad_norm": 0.9453125, "learning_rate": 5.900135131090585e-05, "loss": 0.7862, "step": 4210 }, { "epoch": 0.6638719293719724, "grad_norm": 0.8984375, "learning_rate": 5.899715232114926e-05, "loss": 0.7955, "step": 4211 }, { "epoch": 0.6640295812193654, "grad_norm": 0.8984375, "learning_rate": 5.8992953418295824e-05, "loss": 0.9941, "step": 4212 }, { "epoch": 0.6641872330667584, "grad_norm": 0.9140625, "learning_rate": 5.898875460235453e-05, "loss": 0.9664, "step": 4213 }, { "epoch": 0.6643448849141514, "grad_norm": 0.9140625, "learning_rate": 5.898455587333422e-05, "loss": 0.9722, "step": 4214 }, { "epoch": 0.6645025367615444, "grad_norm": 1.0234375, "learning_rate": 5.89803572312438e-05, "loss": 1.1612, "step": 4215 }, { "epoch": 0.6646601886089374, "grad_norm": 0.85546875, "learning_rate": 5.897615867609216e-05, "loss": 0.9339, "step": 4216 }, { "epoch": 0.6648178404563304, "grad_norm": 0.96484375, "learning_rate": 5.89719602078882e-05, "loss": 1.0697, "step": 4217 }, { "epoch": 0.6649754923037234, "grad_norm": 0.94921875, "learning_rate": 5.896776182664084e-05, "loss": 0.8771, "step": 4218 }, { "epoch": 0.6651331441511165, "grad_norm": 0.90234375, "learning_rate": 5.8963563532358965e-05, "loss": 1.1059, "step": 4219 }, { "epoch": 0.6652907959985095, "grad_norm": 0.9921875, "learning_rate": 5.895936532505145e-05, "loss": 0.9927, "step": 4220 }, { "epoch": 0.6654484478459025, "grad_norm": 0.9609375, "learning_rate": 5.895516720472724e-05, "loss": 1.1861, "step": 4221 }, { "epoch": 0.6656060996932955, "grad_norm": 0.921875, "learning_rate": 5.895096917139518e-05, "loss": 0.9352, "step": 4222 }, { "epoch": 0.6657637515406885, "grad_norm": 0.8203125, "learning_rate": 5.894677122506421e-05, "loss": 0.8584, "step": 4223 }, { "epoch": 0.6659214033880815, "grad_norm": 1.2109375, "learning_rate": 5.894257336574322e-05, "loss": 1.0601, "step": 4224 }, { "epoch": 0.6660790552354745, "grad_norm": 1.09375, "learning_rate": 5.89383755934411e-05, "loss": 0.9244, "step": 4225 }, { "epoch": 0.6662367070828675, "grad_norm": 0.88671875, "learning_rate": 5.893417790816674e-05, "loss": 1.0607, "step": 4226 }, { "epoch": 0.6663943589302606, "grad_norm": 1.046875, "learning_rate": 5.8929980309929e-05, "loss": 1.0881, "step": 4227 }, { "epoch": 0.6665520107776536, "grad_norm": 0.890625, "learning_rate": 5.892578279873687e-05, "loss": 0.8997, "step": 4228 }, { "epoch": 0.6667096626250466, "grad_norm": 0.8828125, "learning_rate": 5.89215853745992e-05, "loss": 0.896, "step": 4229 }, { "epoch": 0.6668673144724396, "grad_norm": 0.8828125, "learning_rate": 5.891738803752487e-05, "loss": 0.805, "step": 4230 }, { "epoch": 0.6670249663198327, "grad_norm": 0.8828125, "learning_rate": 5.891319078752279e-05, "loss": 0.9534, "step": 4231 }, { "epoch": 0.6671826181672256, "grad_norm": 1.0390625, "learning_rate": 5.8908993624601806e-05, "loss": 0.9815, "step": 4232 }, { "epoch": 0.6673402700146186, "grad_norm": 1.0625, "learning_rate": 5.8904796548770916e-05, "loss": 1.2012, "step": 4233 }, { "epoch": 0.6674979218620116, "grad_norm": 0.9921875, "learning_rate": 5.890059956003895e-05, "loss": 0.9238, "step": 4234 }, { "epoch": 0.6676555737094046, "grad_norm": 1.140625, "learning_rate": 5.889640265841482e-05, "loss": 1.0521, "step": 4235 }, { "epoch": 0.6678132255567977, "grad_norm": 1.140625, "learning_rate": 5.889220584390741e-05, "loss": 0.9985, "step": 4236 }, { "epoch": 0.6679708774041907, "grad_norm": 0.921875, "learning_rate": 5.888800911652561e-05, "loss": 1.1496, "step": 4237 }, { "epoch": 0.6681285292515837, "grad_norm": 0.97265625, "learning_rate": 5.888381247627833e-05, "loss": 1.1432, "step": 4238 }, { "epoch": 0.6682861810989768, "grad_norm": 0.9453125, "learning_rate": 5.887961592317446e-05, "loss": 1.0047, "step": 4239 }, { "epoch": 0.6684438329463697, "grad_norm": 0.9140625, "learning_rate": 5.8875419457222904e-05, "loss": 1.1259, "step": 4240 }, { "epoch": 0.6686014847937627, "grad_norm": 0.98046875, "learning_rate": 5.8871223078432536e-05, "loss": 0.8408, "step": 4241 }, { "epoch": 0.6687591366411557, "grad_norm": 0.953125, "learning_rate": 5.886702678681224e-05, "loss": 0.9974, "step": 4242 }, { "epoch": 0.6689167884885487, "grad_norm": 0.97265625, "learning_rate": 5.886283058237097e-05, "loss": 0.8743, "step": 4243 }, { "epoch": 0.6690744403359418, "grad_norm": 0.89453125, "learning_rate": 5.8858634465117554e-05, "loss": 1.0363, "step": 4244 }, { "epoch": 0.6692320921833348, "grad_norm": 0.8984375, "learning_rate": 5.885443843506093e-05, "loss": 0.9773, "step": 4245 }, { "epoch": 0.6693897440307278, "grad_norm": 0.90625, "learning_rate": 5.885024249220996e-05, "loss": 0.895, "step": 4246 }, { "epoch": 0.6695473958781208, "grad_norm": 0.9375, "learning_rate": 5.8846046636573515e-05, "loss": 1.0035, "step": 4247 }, { "epoch": 0.6697050477255138, "grad_norm": 1.015625, "learning_rate": 5.8841850868160564e-05, "loss": 1.1711, "step": 4248 }, { "epoch": 0.6698626995729068, "grad_norm": 0.91015625, "learning_rate": 5.883765518697996e-05, "loss": 1.2258, "step": 4249 }, { "epoch": 0.6700203514202998, "grad_norm": 0.9765625, "learning_rate": 5.8833459593040583e-05, "loss": 1.2968, "step": 4250 }, { "epoch": 0.6701780032676928, "grad_norm": 0.92578125, "learning_rate": 5.882926408635134e-05, "loss": 0.8819, "step": 4251 }, { "epoch": 0.6703356551150859, "grad_norm": 0.98046875, "learning_rate": 5.882506866692108e-05, "loss": 1.1756, "step": 4252 }, { "epoch": 0.6704933069624789, "grad_norm": 0.921875, "learning_rate": 5.882087333475876e-05, "loss": 0.8979, "step": 4253 }, { "epoch": 0.6706509588098719, "grad_norm": 1.9140625, "learning_rate": 5.881667808987327e-05, "loss": 1.0591, "step": 4254 }, { "epoch": 0.6708086106572649, "grad_norm": 0.96875, "learning_rate": 5.881248293227346e-05, "loss": 1.0853, "step": 4255 }, { "epoch": 0.6709662625046579, "grad_norm": 0.83203125, "learning_rate": 5.880828786196825e-05, "loss": 0.7879, "step": 4256 }, { "epoch": 0.6711239143520509, "grad_norm": 0.984375, "learning_rate": 5.880409287896647e-05, "loss": 1.0554, "step": 4257 }, { "epoch": 0.6712815661994439, "grad_norm": 0.890625, "learning_rate": 5.87998979832771e-05, "loss": 0.9114, "step": 4258 }, { "epoch": 0.6714392180468369, "grad_norm": 0.9453125, "learning_rate": 5.879570317490899e-05, "loss": 0.9613, "step": 4259 }, { "epoch": 0.67159686989423, "grad_norm": 0.94921875, "learning_rate": 5.879150845387105e-05, "loss": 0.907, "step": 4260 }, { "epoch": 0.671754521741623, "grad_norm": 0.9140625, "learning_rate": 5.878731382017213e-05, "loss": 0.9004, "step": 4261 }, { "epoch": 0.671912173589016, "grad_norm": 0.875, "learning_rate": 5.878311927382113e-05, "loss": 0.9622, "step": 4262 }, { "epoch": 0.672069825436409, "grad_norm": 2.34375, "learning_rate": 5.877892481482697e-05, "loss": 0.8504, "step": 4263 }, { "epoch": 0.6722274772838019, "grad_norm": 0.953125, "learning_rate": 5.877473044319853e-05, "loss": 0.9961, "step": 4264 }, { "epoch": 0.672385129131195, "grad_norm": 1.4921875, "learning_rate": 5.877053615894469e-05, "loss": 1.1064, "step": 4265 }, { "epoch": 0.672542780978588, "grad_norm": 0.9296875, "learning_rate": 5.876634196207433e-05, "loss": 1.0089, "step": 4266 }, { "epoch": 0.672700432825981, "grad_norm": 1.140625, "learning_rate": 5.876214785259633e-05, "loss": 1.0729, "step": 4267 }, { "epoch": 0.672858084673374, "grad_norm": 0.9375, "learning_rate": 5.875795383051963e-05, "loss": 0.9183, "step": 4268 }, { "epoch": 0.6730157365207671, "grad_norm": 0.9921875, "learning_rate": 5.87537598958531e-05, "loss": 1.1391, "step": 4269 }, { "epoch": 0.6731733883681601, "grad_norm": 1.0390625, "learning_rate": 5.874956604860562e-05, "loss": 1.2014, "step": 4270 }, { "epoch": 0.6733310402155531, "grad_norm": 0.93359375, "learning_rate": 5.874537228878606e-05, "loss": 1.0237, "step": 4271 }, { "epoch": 0.673488692062946, "grad_norm": 0.875, "learning_rate": 5.874117861640329e-05, "loss": 0.8601, "step": 4272 }, { "epoch": 0.6736463439103391, "grad_norm": 0.96875, "learning_rate": 5.8736985031466274e-05, "loss": 0.986, "step": 4273 }, { "epoch": 0.6738039957577321, "grad_norm": 0.96875, "learning_rate": 5.873279153398388e-05, "loss": 1.1771, "step": 4274 }, { "epoch": 0.6739616476051251, "grad_norm": 0.98828125, "learning_rate": 5.872859812396495e-05, "loss": 1.1899, "step": 4275 }, { "epoch": 0.6741192994525181, "grad_norm": 1.125, "learning_rate": 5.872440480141841e-05, "loss": 1.0007, "step": 4276 }, { "epoch": 0.6742769512999112, "grad_norm": 0.92578125, "learning_rate": 5.872021156635309e-05, "loss": 1.0224, "step": 4277 }, { "epoch": 0.6744346031473042, "grad_norm": 1.0, "learning_rate": 5.871601841877796e-05, "loss": 0.9422, "step": 4278 }, { "epoch": 0.6745922549946972, "grad_norm": 0.86328125, "learning_rate": 5.8711825358701876e-05, "loss": 1.1526, "step": 4279 }, { "epoch": 0.6747499068420901, "grad_norm": 0.86328125, "learning_rate": 5.8707632386133706e-05, "loss": 0.8807, "step": 4280 }, { "epoch": 0.6749075586894832, "grad_norm": 0.96875, "learning_rate": 5.870343950108237e-05, "loss": 1.2016, "step": 4281 }, { "epoch": 0.6750652105368762, "grad_norm": 0.8828125, "learning_rate": 5.869924670355669e-05, "loss": 1.0572, "step": 4282 }, { "epoch": 0.6752228623842692, "grad_norm": 0.92578125, "learning_rate": 5.869505399356563e-05, "loss": 0.9693, "step": 4283 }, { "epoch": 0.6753805142316622, "grad_norm": 0.99609375, "learning_rate": 5.869086137111803e-05, "loss": 1.0617, "step": 4284 }, { "epoch": 0.6755381660790553, "grad_norm": 0.99609375, "learning_rate": 5.86866688362228e-05, "loss": 1.0246, "step": 4285 }, { "epoch": 0.6756958179264483, "grad_norm": 0.8828125, "learning_rate": 5.868247638888882e-05, "loss": 1.1124, "step": 4286 }, { "epoch": 0.6758534697738413, "grad_norm": 1.0, "learning_rate": 5.8678284029124916e-05, "loss": 1.1563, "step": 4287 }, { "epoch": 0.6760111216212342, "grad_norm": 1.0234375, "learning_rate": 5.867409175694007e-05, "loss": 0.8567, "step": 4288 }, { "epoch": 0.6761687734686272, "grad_norm": 0.8515625, "learning_rate": 5.866989957234313e-05, "loss": 1.0346, "step": 4289 }, { "epoch": 0.6763264253160203, "grad_norm": 1.046875, "learning_rate": 5.866570747534298e-05, "loss": 1.0391, "step": 4290 }, { "epoch": 0.6764840771634133, "grad_norm": 1.1484375, "learning_rate": 5.866151546594849e-05, "loss": 0.9463, "step": 4291 }, { "epoch": 0.6766417290108063, "grad_norm": 0.93359375, "learning_rate": 5.865732354416852e-05, "loss": 1.0399, "step": 4292 }, { "epoch": 0.6767993808581994, "grad_norm": 1.0, "learning_rate": 5.865313171001203e-05, "loss": 1.1003, "step": 4293 }, { "epoch": 0.6769570327055924, "grad_norm": 0.89453125, "learning_rate": 5.864893996348786e-05, "loss": 0.9609, "step": 4294 }, { "epoch": 0.6771146845529854, "grad_norm": 1.1875, "learning_rate": 5.864474830460489e-05, "loss": 1.1013, "step": 4295 }, { "epoch": 0.6772723364003783, "grad_norm": 0.90625, "learning_rate": 5.864055673337203e-05, "loss": 1.0646, "step": 4296 }, { "epoch": 0.6774299882477713, "grad_norm": 0.8984375, "learning_rate": 5.86363652497981e-05, "loss": 0.9352, "step": 4297 }, { "epoch": 0.6775876400951644, "grad_norm": 0.94140625, "learning_rate": 5.863217385389207e-05, "loss": 0.8736, "step": 4298 }, { "epoch": 0.6777452919425574, "grad_norm": 0.77734375, "learning_rate": 5.862798254566277e-05, "loss": 0.9367, "step": 4299 }, { "epoch": 0.6779029437899504, "grad_norm": 1.109375, "learning_rate": 5.862379132511912e-05, "loss": 1.1506, "step": 4300 }, { "epoch": 0.6780605956373434, "grad_norm": 0.98046875, "learning_rate": 5.8619600192269964e-05, "loss": 1.0278, "step": 4301 }, { "epoch": 0.6782182474847365, "grad_norm": 0.85546875, "learning_rate": 5.861540914712418e-05, "loss": 0.8443, "step": 4302 }, { "epoch": 0.6783758993321295, "grad_norm": 1.078125, "learning_rate": 5.86112181896907e-05, "loss": 1.0845, "step": 4303 }, { "epoch": 0.6785335511795224, "grad_norm": 0.97265625, "learning_rate": 5.860702731997836e-05, "loss": 0.893, "step": 4304 }, { "epoch": 0.6786912030269154, "grad_norm": 0.89453125, "learning_rate": 5.860283653799607e-05, "loss": 0.8737, "step": 4305 }, { "epoch": 0.6788488548743085, "grad_norm": 1.2890625, "learning_rate": 5.8598645843752695e-05, "loss": 0.8927, "step": 4306 }, { "epoch": 0.6790065067217015, "grad_norm": 0.9453125, "learning_rate": 5.8594455237257106e-05, "loss": 0.7611, "step": 4307 }, { "epoch": 0.6791641585690945, "grad_norm": 1.1484375, "learning_rate": 5.859026471851823e-05, "loss": 1.2162, "step": 4308 }, { "epoch": 0.6793218104164875, "grad_norm": 0.96484375, "learning_rate": 5.8586074287544924e-05, "loss": 1.0337, "step": 4309 }, { "epoch": 0.6794794622638806, "grad_norm": 0.96484375, "learning_rate": 5.858188394434605e-05, "loss": 1.0937, "step": 4310 }, { "epoch": 0.6796371141112736, "grad_norm": 0.9375, "learning_rate": 5.857769368893052e-05, "loss": 0.8648, "step": 4311 }, { "epoch": 0.6797947659586665, "grad_norm": 0.81640625, "learning_rate": 5.857350352130716e-05, "loss": 0.9456, "step": 4312 }, { "epoch": 0.6799524178060595, "grad_norm": 0.9921875, "learning_rate": 5.856931344148493e-05, "loss": 1.0389, "step": 4313 }, { "epoch": 0.6801100696534526, "grad_norm": 1.0625, "learning_rate": 5.856512344947267e-05, "loss": 1.1071, "step": 4314 }, { "epoch": 0.6802677215008456, "grad_norm": 0.8671875, "learning_rate": 5.856093354527926e-05, "loss": 0.9892, "step": 4315 }, { "epoch": 0.6804253733482386, "grad_norm": 1.015625, "learning_rate": 5.8556743728913585e-05, "loss": 0.9803, "step": 4316 }, { "epoch": 0.6805830251956316, "grad_norm": 0.99609375, "learning_rate": 5.8552554000384485e-05, "loss": 1.1223, "step": 4317 }, { "epoch": 0.6807406770430247, "grad_norm": 0.89453125, "learning_rate": 5.854836435970092e-05, "loss": 1.0529, "step": 4318 }, { "epoch": 0.6808983288904177, "grad_norm": 0.984375, "learning_rate": 5.854417480687173e-05, "loss": 0.7834, "step": 4319 }, { "epoch": 0.6810559807378106, "grad_norm": 1.0703125, "learning_rate": 5.853998534190579e-05, "loss": 1.0463, "step": 4320 }, { "epoch": 0.6812136325852036, "grad_norm": 1.015625, "learning_rate": 5.853579596481198e-05, "loss": 1.0442, "step": 4321 }, { "epoch": 0.6813712844325966, "grad_norm": 0.9140625, "learning_rate": 5.853160667559915e-05, "loss": 1.057, "step": 4322 }, { "epoch": 0.6815289362799897, "grad_norm": 1.09375, "learning_rate": 5.8527417474276235e-05, "loss": 1.0733, "step": 4323 }, { "epoch": 0.6816865881273827, "grad_norm": 0.9296875, "learning_rate": 5.852322836085207e-05, "loss": 0.8256, "step": 4324 }, { "epoch": 0.6818442399747757, "grad_norm": 0.93359375, "learning_rate": 5.851903933533558e-05, "loss": 0.9742, "step": 4325 }, { "epoch": 0.6820018918221687, "grad_norm": 0.96875, "learning_rate": 5.851485039773561e-05, "loss": 0.8921, "step": 4326 }, { "epoch": 0.6821595436695618, "grad_norm": 1.0703125, "learning_rate": 5.8510661548061015e-05, "loss": 1.1335, "step": 4327 }, { "epoch": 0.6823171955169547, "grad_norm": 1.0234375, "learning_rate": 5.850647278632073e-05, "loss": 1.1335, "step": 4328 }, { "epoch": 0.6824748473643477, "grad_norm": 0.9296875, "learning_rate": 5.850228411252361e-05, "loss": 1.0729, "step": 4329 }, { "epoch": 0.6826324992117407, "grad_norm": 0.9296875, "learning_rate": 5.849809552667851e-05, "loss": 1.0121, "step": 4330 }, { "epoch": 0.6827901510591338, "grad_norm": 0.953125, "learning_rate": 5.8493907028794334e-05, "loss": 0.8595, "step": 4331 }, { "epoch": 0.6829478029065268, "grad_norm": 0.94921875, "learning_rate": 5.8489718618879906e-05, "loss": 0.9107, "step": 4332 }, { "epoch": 0.6831054547539198, "grad_norm": 0.89453125, "learning_rate": 5.848553029694419e-05, "loss": 1.049, "step": 4333 }, { "epoch": 0.6832631066013128, "grad_norm": 0.921875, "learning_rate": 5.848134206299602e-05, "loss": 0.9846, "step": 4334 }, { "epoch": 0.6834207584487059, "grad_norm": 0.9140625, "learning_rate": 5.847715391704427e-05, "loss": 0.8768, "step": 4335 }, { "epoch": 0.6835784102960988, "grad_norm": 0.890625, "learning_rate": 5.8472965859097825e-05, "loss": 0.9036, "step": 4336 }, { "epoch": 0.6837360621434918, "grad_norm": 1.0703125, "learning_rate": 5.846877788916551e-05, "loss": 0.9874, "step": 4337 }, { "epoch": 0.6838937139908848, "grad_norm": 0.9453125, "learning_rate": 5.846459000725629e-05, "loss": 1.1366, "step": 4338 }, { "epoch": 0.6840513658382779, "grad_norm": 0.96484375, "learning_rate": 5.8460402213378996e-05, "loss": 0.8511, "step": 4339 }, { "epoch": 0.6842090176856709, "grad_norm": 0.94140625, "learning_rate": 5.8456214507542504e-05, "loss": 1.2788, "step": 4340 }, { "epoch": 0.6843666695330639, "grad_norm": 0.8828125, "learning_rate": 5.8452026889755697e-05, "loss": 0.9004, "step": 4341 }, { "epoch": 0.6845243213804569, "grad_norm": 1.1015625, "learning_rate": 5.844783936002741e-05, "loss": 1.0071, "step": 4342 }, { "epoch": 0.68468197322785, "grad_norm": 0.86328125, "learning_rate": 5.844365191836656e-05, "loss": 0.9768, "step": 4343 }, { "epoch": 0.6848396250752429, "grad_norm": 0.90234375, "learning_rate": 5.8439464564782043e-05, "loss": 0.9202, "step": 4344 }, { "epoch": 0.6849972769226359, "grad_norm": 0.8125, "learning_rate": 5.8435277299282697e-05, "loss": 1.0251, "step": 4345 }, { "epoch": 0.6851549287700289, "grad_norm": 0.91015625, "learning_rate": 5.8431090121877405e-05, "loss": 0.9603, "step": 4346 }, { "epoch": 0.685312580617422, "grad_norm": 1.015625, "learning_rate": 5.842690303257504e-05, "loss": 0.8703, "step": 4347 }, { "epoch": 0.685470232464815, "grad_norm": 1.171875, "learning_rate": 5.8422716031384464e-05, "loss": 1.0084, "step": 4348 }, { "epoch": 0.685627884312208, "grad_norm": 0.94140625, "learning_rate": 5.841852911831458e-05, "loss": 0.7919, "step": 4349 }, { "epoch": 0.685785536159601, "grad_norm": 0.94921875, "learning_rate": 5.841434229337425e-05, "loss": 1.0641, "step": 4350 }, { "epoch": 0.685943188006994, "grad_norm": 0.94921875, "learning_rate": 5.841015555657234e-05, "loss": 0.9804, "step": 4351 }, { "epoch": 0.686100839854387, "grad_norm": 1.0, "learning_rate": 5.840596890791773e-05, "loss": 1.0713, "step": 4352 }, { "epoch": 0.68625849170178, "grad_norm": 0.94921875, "learning_rate": 5.8401782347419255e-05, "loss": 1.1287, "step": 4353 }, { "epoch": 0.686416143549173, "grad_norm": 0.91796875, "learning_rate": 5.839759587508586e-05, "loss": 1.0508, "step": 4354 }, { "epoch": 0.686573795396566, "grad_norm": 0.92578125, "learning_rate": 5.8393409490926385e-05, "loss": 0.991, "step": 4355 }, { "epoch": 0.6867314472439591, "grad_norm": 0.98046875, "learning_rate": 5.8389223194949704e-05, "loss": 0.9483, "step": 4356 }, { "epoch": 0.6868890990913521, "grad_norm": 0.8515625, "learning_rate": 5.8385036987164665e-05, "loss": 1.0166, "step": 4357 }, { "epoch": 0.6870467509387451, "grad_norm": 0.9609375, "learning_rate": 5.838085086758017e-05, "loss": 0.9482, "step": 4358 }, { "epoch": 0.6872044027861381, "grad_norm": 0.99609375, "learning_rate": 5.8376664836205096e-05, "loss": 0.853, "step": 4359 }, { "epoch": 0.6873620546335311, "grad_norm": 0.984375, "learning_rate": 5.837247889304829e-05, "loss": 0.9209, "step": 4360 }, { "epoch": 0.6875197064809241, "grad_norm": 0.9609375, "learning_rate": 5.836829303811865e-05, "loss": 0.9603, "step": 4361 }, { "epoch": 0.6876773583283171, "grad_norm": 0.91015625, "learning_rate": 5.8364107271425025e-05, "loss": 0.8887, "step": 4362 }, { "epoch": 0.6878350101757101, "grad_norm": 1.1015625, "learning_rate": 5.835992159297625e-05, "loss": 1.1894, "step": 4363 }, { "epoch": 0.6879926620231032, "grad_norm": 0.85546875, "learning_rate": 5.835573600278129e-05, "loss": 0.9182, "step": 4364 }, { "epoch": 0.6881503138704962, "grad_norm": 0.9765625, "learning_rate": 5.835155050084897e-05, "loss": 1.0942, "step": 4365 }, { "epoch": 0.6883079657178892, "grad_norm": 0.9609375, "learning_rate": 5.8347365087188145e-05, "loss": 1.0286, "step": 4366 }, { "epoch": 0.6884656175652822, "grad_norm": 0.90625, "learning_rate": 5.834317976180772e-05, "loss": 0.8342, "step": 4367 }, { "epoch": 0.6886232694126752, "grad_norm": 1.1484375, "learning_rate": 5.833899452471648e-05, "loss": 0.8904, "step": 4368 }, { "epoch": 0.6887809212600682, "grad_norm": 1.1171875, "learning_rate": 5.8334809375923414e-05, "loss": 0.9624, "step": 4369 }, { "epoch": 0.6889385731074612, "grad_norm": 0.9375, "learning_rate": 5.8330624315437346e-05, "loss": 1.0367, "step": 4370 }, { "epoch": 0.6890962249548542, "grad_norm": 1.09375, "learning_rate": 5.832643934326711e-05, "loss": 1.0257, "step": 4371 }, { "epoch": 0.6892538768022473, "grad_norm": 0.89453125, "learning_rate": 5.8322254459421634e-05, "loss": 1.0738, "step": 4372 }, { "epoch": 0.6894115286496403, "grad_norm": 0.875, "learning_rate": 5.831806966390969e-05, "loss": 1.1193, "step": 4373 }, { "epoch": 0.6895691804970333, "grad_norm": 0.95703125, "learning_rate": 5.831388495674028e-05, "loss": 1.15, "step": 4374 }, { "epoch": 0.6897268323444263, "grad_norm": 0.8515625, "learning_rate": 5.8309700337922204e-05, "loss": 0.8735, "step": 4375 }, { "epoch": 0.6898844841918192, "grad_norm": 0.97265625, "learning_rate": 5.8305515807464325e-05, "loss": 0.9704, "step": 4376 }, { "epoch": 0.6900421360392123, "grad_norm": 0.93359375, "learning_rate": 5.8301331365375524e-05, "loss": 0.9566, "step": 4377 }, { "epoch": 0.6901997878866053, "grad_norm": 0.96484375, "learning_rate": 5.829714701166462e-05, "loss": 1.1779, "step": 4378 }, { "epoch": 0.6903574397339983, "grad_norm": 1.453125, "learning_rate": 5.8292962746340586e-05, "loss": 1.2191, "step": 4379 }, { "epoch": 0.6905150915813913, "grad_norm": 0.93359375, "learning_rate": 5.828877856941223e-05, "loss": 0.9928, "step": 4380 }, { "epoch": 0.6906727434287844, "grad_norm": 0.8671875, "learning_rate": 5.828459448088841e-05, "loss": 0.7851, "step": 4381 }, { "epoch": 0.6908303952761774, "grad_norm": 0.9609375, "learning_rate": 5.828041048077803e-05, "loss": 1.1876, "step": 4382 }, { "epoch": 0.6909880471235704, "grad_norm": 0.94921875, "learning_rate": 5.827622656908989e-05, "loss": 0.9875, "step": 4383 }, { "epoch": 0.6911456989709635, "grad_norm": 0.9453125, "learning_rate": 5.8272042745832936e-05, "loss": 1.0881, "step": 4384 }, { "epoch": 0.6913033508183564, "grad_norm": 0.87109375, "learning_rate": 5.826785901101599e-05, "loss": 0.968, "step": 4385 }, { "epoch": 0.6914610026657494, "grad_norm": 0.9453125, "learning_rate": 5.826367536464794e-05, "loss": 1.035, "step": 4386 }, { "epoch": 0.6916186545131424, "grad_norm": 0.859375, "learning_rate": 5.8259491806737654e-05, "loss": 0.8168, "step": 4387 }, { "epoch": 0.6917763063605354, "grad_norm": 0.94921875, "learning_rate": 5.825530833729395e-05, "loss": 0.9897, "step": 4388 }, { "epoch": 0.6919339582079285, "grad_norm": 0.984375, "learning_rate": 5.8251124956325765e-05, "loss": 1.0609, "step": 4389 }, { "epoch": 0.6920916100553215, "grad_norm": 0.984375, "learning_rate": 5.824694166384194e-05, "loss": 0.9525, "step": 4390 }, { "epoch": 0.6922492619027145, "grad_norm": 0.92578125, "learning_rate": 5.824275845985133e-05, "loss": 1.0863, "step": 4391 }, { "epoch": 0.6924069137501075, "grad_norm": 1.109375, "learning_rate": 5.823857534436281e-05, "loss": 1.1252, "step": 4392 }, { "epoch": 0.6925645655975005, "grad_norm": 0.91015625, "learning_rate": 5.82343923173852e-05, "loss": 0.8341, "step": 4393 }, { "epoch": 0.6927222174448935, "grad_norm": 1.015625, "learning_rate": 5.823020937892745e-05, "loss": 1.0191, "step": 4394 }, { "epoch": 0.6928798692922865, "grad_norm": 1.15625, "learning_rate": 5.822602652899839e-05, "loss": 0.9242, "step": 4395 }, { "epoch": 0.6930375211396795, "grad_norm": 1.015625, "learning_rate": 5.822184376760687e-05, "loss": 1.0908, "step": 4396 }, { "epoch": 0.6931951729870726, "grad_norm": 1.0546875, "learning_rate": 5.821766109476177e-05, "loss": 1.2476, "step": 4397 }, { "epoch": 0.6933528248344656, "grad_norm": 0.95703125, "learning_rate": 5.821347851047194e-05, "loss": 0.923, "step": 4398 }, { "epoch": 0.6935104766818586, "grad_norm": 0.8671875, "learning_rate": 5.8209296014746275e-05, "loss": 0.8926, "step": 4399 }, { "epoch": 0.6936681285292516, "grad_norm": 1.03125, "learning_rate": 5.820511360759355e-05, "loss": 0.9136, "step": 4400 }, { "epoch": 0.6938257803766446, "grad_norm": 0.984375, "learning_rate": 5.820093128902275e-05, "loss": 1.1609, "step": 4401 }, { "epoch": 0.6939834322240376, "grad_norm": 1.0078125, "learning_rate": 5.81967490590427e-05, "loss": 0.9315, "step": 4402 }, { "epoch": 0.6941410840714306, "grad_norm": 0.89453125, "learning_rate": 5.819256691766224e-05, "loss": 0.91, "step": 4403 }, { "epoch": 0.6942987359188236, "grad_norm": 0.89453125, "learning_rate": 5.818838486489024e-05, "loss": 0.9092, "step": 4404 }, { "epoch": 0.6944563877662167, "grad_norm": 0.80078125, "learning_rate": 5.8184202900735585e-05, "loss": 0.8103, "step": 4405 }, { "epoch": 0.6946140396136097, "grad_norm": 0.87890625, "learning_rate": 5.81800210252071e-05, "loss": 1.1155, "step": 4406 }, { "epoch": 0.6947716914610027, "grad_norm": 0.88671875, "learning_rate": 5.817583923831369e-05, "loss": 0.9026, "step": 4407 }, { "epoch": 0.6949293433083957, "grad_norm": 0.9375, "learning_rate": 5.817165754006415e-05, "loss": 0.9241, "step": 4408 }, { "epoch": 0.6950869951557886, "grad_norm": 0.98046875, "learning_rate": 5.816747593046743e-05, "loss": 0.9623, "step": 4409 }, { "epoch": 0.6952446470031817, "grad_norm": 0.92578125, "learning_rate": 5.816329440953234e-05, "loss": 1.0573, "step": 4410 }, { "epoch": 0.6954022988505747, "grad_norm": 0.9453125, "learning_rate": 5.815911297726777e-05, "loss": 1.1209, "step": 4411 }, { "epoch": 0.6955599506979677, "grad_norm": 1.34375, "learning_rate": 5.815493163368257e-05, "loss": 1.0183, "step": 4412 }, { "epoch": 0.6957176025453607, "grad_norm": 1.109375, "learning_rate": 5.815075037878557e-05, "loss": 0.9832, "step": 4413 }, { "epoch": 0.6958752543927538, "grad_norm": 1.0546875, "learning_rate": 5.814656921258568e-05, "loss": 1.1672, "step": 4414 }, { "epoch": 0.6960329062401468, "grad_norm": 0.86328125, "learning_rate": 5.814238813509176e-05, "loss": 0.9455, "step": 4415 }, { "epoch": 0.6961905580875398, "grad_norm": 1.015625, "learning_rate": 5.8138207146312654e-05, "loss": 1.0353, "step": 4416 }, { "epoch": 0.6963482099349327, "grad_norm": 1.0, "learning_rate": 5.8134026246257225e-05, "loss": 0.974, "step": 4417 }, { "epoch": 0.6965058617823258, "grad_norm": 0.8671875, "learning_rate": 5.812984543493429e-05, "loss": 1.0643, "step": 4418 }, { "epoch": 0.6966635136297188, "grad_norm": 0.8515625, "learning_rate": 5.812566471235279e-05, "loss": 0.8551, "step": 4419 }, { "epoch": 0.6968211654771118, "grad_norm": 0.96875, "learning_rate": 5.812148407852156e-05, "loss": 1.0857, "step": 4420 }, { "epoch": 0.6969788173245048, "grad_norm": 0.99609375, "learning_rate": 5.811730353344945e-05, "loss": 0.9829, "step": 4421 }, { "epoch": 0.6971364691718979, "grad_norm": 0.8671875, "learning_rate": 5.8113123077145315e-05, "loss": 0.9791, "step": 4422 }, { "epoch": 0.6972941210192909, "grad_norm": 0.9921875, "learning_rate": 5.8108942709617996e-05, "loss": 1.0099, "step": 4423 }, { "epoch": 0.6974517728666839, "grad_norm": 1.015625, "learning_rate": 5.81047624308764e-05, "loss": 0.9585, "step": 4424 }, { "epoch": 0.6976094247140768, "grad_norm": 0.90234375, "learning_rate": 5.810058224092938e-05, "loss": 0.9146, "step": 4425 }, { "epoch": 0.6977670765614699, "grad_norm": 1.0, "learning_rate": 5.809640213978578e-05, "loss": 1.0991, "step": 4426 }, { "epoch": 0.6979247284088629, "grad_norm": 0.98046875, "learning_rate": 5.809222212745444e-05, "loss": 1.0558, "step": 4427 }, { "epoch": 0.6980823802562559, "grad_norm": 0.91015625, "learning_rate": 5.8088042203944214e-05, "loss": 0.9057, "step": 4428 }, { "epoch": 0.6982400321036489, "grad_norm": 0.99609375, "learning_rate": 5.8083862369264045e-05, "loss": 1.0565, "step": 4429 }, { "epoch": 0.698397683951042, "grad_norm": 1.1875, "learning_rate": 5.807968262342272e-05, "loss": 1.2033, "step": 4430 }, { "epoch": 0.698555335798435, "grad_norm": 0.98828125, "learning_rate": 5.8075502966429094e-05, "loss": 1.136, "step": 4431 }, { "epoch": 0.698712987645828, "grad_norm": 0.890625, "learning_rate": 5.807132339829207e-05, "loss": 0.8736, "step": 4432 }, { "epoch": 0.6988706394932209, "grad_norm": 1.0234375, "learning_rate": 5.806714391902042e-05, "loss": 1.0263, "step": 4433 }, { "epoch": 0.699028291340614, "grad_norm": 0.90625, "learning_rate": 5.8062964528623096e-05, "loss": 1.0787, "step": 4434 }, { "epoch": 0.699185943188007, "grad_norm": 0.89453125, "learning_rate": 5.805878522710895e-05, "loss": 1.0348, "step": 4435 }, { "epoch": 0.6993435950354, "grad_norm": 0.9140625, "learning_rate": 5.805460601448679e-05, "loss": 0.9765, "step": 4436 }, { "epoch": 0.699501246882793, "grad_norm": 1.0234375, "learning_rate": 5.8050426890765496e-05, "loss": 0.9284, "step": 4437 }, { "epoch": 0.699658898730186, "grad_norm": 0.94140625, "learning_rate": 5.8046247855953893e-05, "loss": 1.0038, "step": 4438 }, { "epoch": 0.6998165505775791, "grad_norm": 1.125, "learning_rate": 5.804206891006091e-05, "loss": 1.0067, "step": 4439 }, { "epoch": 0.6999742024249721, "grad_norm": 0.91015625, "learning_rate": 5.803789005309537e-05, "loss": 0.7741, "step": 4440 }, { "epoch": 0.700131854272365, "grad_norm": 0.921875, "learning_rate": 5.803371128506611e-05, "loss": 0.898, "step": 4441 }, { "epoch": 0.700289506119758, "grad_norm": 0.859375, "learning_rate": 5.8029532605982e-05, "loss": 0.7008, "step": 4442 }, { "epoch": 0.7004471579671511, "grad_norm": 0.85546875, "learning_rate": 5.80253540158519e-05, "loss": 0.873, "step": 4443 }, { "epoch": 0.7006048098145441, "grad_norm": 1.0546875, "learning_rate": 5.802117551468467e-05, "loss": 1.1366, "step": 4444 }, { "epoch": 0.7007624616619371, "grad_norm": 0.9921875, "learning_rate": 5.8016997102489146e-05, "loss": 1.0757, "step": 4445 }, { "epoch": 0.7009201135093301, "grad_norm": 0.96484375, "learning_rate": 5.8012818779274205e-05, "loss": 1.1899, "step": 4446 }, { "epoch": 0.7010777653567232, "grad_norm": 0.8515625, "learning_rate": 5.800864054504869e-05, "loss": 0.9863, "step": 4447 }, { "epoch": 0.7012354172041162, "grad_norm": 0.8671875, "learning_rate": 5.8004462399821424e-05, "loss": 1.0852, "step": 4448 }, { "epoch": 0.7013930690515091, "grad_norm": 0.9453125, "learning_rate": 5.800028434360134e-05, "loss": 1.0996, "step": 4449 }, { "epoch": 0.7015507208989021, "grad_norm": 1.0, "learning_rate": 5.799610637639725e-05, "loss": 0.9754, "step": 4450 }, { "epoch": 0.7017083727462952, "grad_norm": 0.89453125, "learning_rate": 5.7991928498218016e-05, "loss": 0.9668, "step": 4451 }, { "epoch": 0.7018660245936882, "grad_norm": 0.97265625, "learning_rate": 5.7987750709072494e-05, "loss": 0.9257, "step": 4452 }, { "epoch": 0.7020236764410812, "grad_norm": 0.984375, "learning_rate": 5.798357300896948e-05, "loss": 0.9411, "step": 4453 }, { "epoch": 0.7021813282884742, "grad_norm": 0.9375, "learning_rate": 5.7979395397917934e-05, "loss": 0.8162, "step": 4454 }, { "epoch": 0.7023389801358673, "grad_norm": 1.0546875, "learning_rate": 5.797521787592666e-05, "loss": 0.9148, "step": 4455 }, { "epoch": 0.7024966319832603, "grad_norm": 1.40625, "learning_rate": 5.79710404430045e-05, "loss": 1.0743, "step": 4456 }, { "epoch": 0.7026542838306532, "grad_norm": 0.91796875, "learning_rate": 5.7966863099160305e-05, "loss": 1.0067, "step": 4457 }, { "epoch": 0.7028119356780462, "grad_norm": 0.96484375, "learning_rate": 5.796268584440292e-05, "loss": 0.923, "step": 4458 }, { "epoch": 0.7029695875254393, "grad_norm": 0.84765625, "learning_rate": 5.795850867874126e-05, "loss": 0.8637, "step": 4459 }, { "epoch": 0.7031272393728323, "grad_norm": 2.125, "learning_rate": 5.795433160218413e-05, "loss": 1.0024, "step": 4460 }, { "epoch": 0.7032848912202253, "grad_norm": 0.921875, "learning_rate": 5.7950154614740396e-05, "loss": 1.0329, "step": 4461 }, { "epoch": 0.7034425430676183, "grad_norm": 1.4296875, "learning_rate": 5.7945977716418896e-05, "loss": 0.9838, "step": 4462 }, { "epoch": 0.7036001949150114, "grad_norm": 1.046875, "learning_rate": 5.794180090722846e-05, "loss": 1.1006, "step": 4463 }, { "epoch": 0.7037578467624044, "grad_norm": 1.1640625, "learning_rate": 5.793762418717801e-05, "loss": 1.2801, "step": 4464 }, { "epoch": 0.7039154986097973, "grad_norm": 0.984375, "learning_rate": 5.7933447556276364e-05, "loss": 1.1131, "step": 4465 }, { "epoch": 0.7040731504571903, "grad_norm": 0.97265625, "learning_rate": 5.7929271014532383e-05, "loss": 1.0401, "step": 4466 }, { "epoch": 0.7042308023045833, "grad_norm": 1.0, "learning_rate": 5.79250945619549e-05, "loss": 0.8546, "step": 4467 }, { "epoch": 0.7043884541519764, "grad_norm": 0.93359375, "learning_rate": 5.792091819855272e-05, "loss": 0.8764, "step": 4468 }, { "epoch": 0.7045461059993694, "grad_norm": 0.99609375, "learning_rate": 5.7916741924334807e-05, "loss": 1.0916, "step": 4469 }, { "epoch": 0.7047037578467624, "grad_norm": 0.94140625, "learning_rate": 5.791256573930994e-05, "loss": 0.9512, "step": 4470 }, { "epoch": 0.7048614096941554, "grad_norm": 1.0546875, "learning_rate": 5.790838964348699e-05, "loss": 1.1549, "step": 4471 }, { "epoch": 0.7050190615415485, "grad_norm": 0.91796875, "learning_rate": 5.7904213636874806e-05, "loss": 1.0431, "step": 4472 }, { "epoch": 0.7051767133889414, "grad_norm": 0.9609375, "learning_rate": 5.790003771948219e-05, "loss": 1.1709, "step": 4473 }, { "epoch": 0.7053343652363344, "grad_norm": 1.0859375, "learning_rate": 5.789586189131808e-05, "loss": 1.0286, "step": 4474 }, { "epoch": 0.7054920170837274, "grad_norm": 0.8828125, "learning_rate": 5.789168615239129e-05, "loss": 1.0234, "step": 4475 }, { "epoch": 0.7056496689311205, "grad_norm": 0.953125, "learning_rate": 5.788751050271065e-05, "loss": 0.9849, "step": 4476 }, { "epoch": 0.7058073207785135, "grad_norm": 1.0625, "learning_rate": 5.7883334942285036e-05, "loss": 1.0829, "step": 4477 }, { "epoch": 0.7059649726259065, "grad_norm": 0.83984375, "learning_rate": 5.7879159471123236e-05, "loss": 0.9591, "step": 4478 }, { "epoch": 0.7061226244732995, "grad_norm": 0.890625, "learning_rate": 5.7874984089234196e-05, "loss": 0.8608, "step": 4479 }, { "epoch": 0.7062802763206926, "grad_norm": 0.92578125, "learning_rate": 5.787080879662673e-05, "loss": 1.0391, "step": 4480 }, { "epoch": 0.7064379281680855, "grad_norm": 0.85546875, "learning_rate": 5.786663359330966e-05, "loss": 0.9326, "step": 4481 }, { "epoch": 0.7065955800154785, "grad_norm": 0.9609375, "learning_rate": 5.786245847929186e-05, "loss": 1.1351, "step": 4482 }, { "epoch": 0.7067532318628715, "grad_norm": 0.8984375, "learning_rate": 5.7858283454582174e-05, "loss": 0.7805, "step": 4483 }, { "epoch": 0.7069108837102646, "grad_norm": 0.97265625, "learning_rate": 5.785410851918944e-05, "loss": 1.1022, "step": 4484 }, { "epoch": 0.7070685355576576, "grad_norm": 0.828125, "learning_rate": 5.784993367312253e-05, "loss": 0.8071, "step": 4485 }, { "epoch": 0.7072261874050506, "grad_norm": 0.84375, "learning_rate": 5.7845758916390213e-05, "loss": 0.6792, "step": 4486 }, { "epoch": 0.7073838392524436, "grad_norm": 0.8359375, "learning_rate": 5.784158424900146e-05, "loss": 0.8592, "step": 4487 }, { "epoch": 0.7075414910998367, "grad_norm": 0.9765625, "learning_rate": 5.783740967096505e-05, "loss": 1.0713, "step": 4488 }, { "epoch": 0.7076991429472296, "grad_norm": 0.91015625, "learning_rate": 5.7833235182289835e-05, "loss": 0.9192, "step": 4489 }, { "epoch": 0.7078567947946226, "grad_norm": 0.9140625, "learning_rate": 5.782906078298467e-05, "loss": 1.0175, "step": 4490 }, { "epoch": 0.7080144466420156, "grad_norm": 0.91796875, "learning_rate": 5.782488647305841e-05, "loss": 0.8847, "step": 4491 }, { "epoch": 0.7081720984894087, "grad_norm": 0.98046875, "learning_rate": 5.782071225251988e-05, "loss": 1.0023, "step": 4492 }, { "epoch": 0.7083297503368017, "grad_norm": 0.89453125, "learning_rate": 5.781653812137795e-05, "loss": 1.1209, "step": 4493 }, { "epoch": 0.7084874021841947, "grad_norm": 0.890625, "learning_rate": 5.781236407964141e-05, "loss": 0.8303, "step": 4494 }, { "epoch": 0.7086450540315877, "grad_norm": 0.9609375, "learning_rate": 5.7808190127319195e-05, "loss": 0.9324, "step": 4495 }, { "epoch": 0.7088027058789808, "grad_norm": 1.0078125, "learning_rate": 5.780401626442011e-05, "loss": 1.0799, "step": 4496 }, { "epoch": 0.7089603577263737, "grad_norm": 0.8515625, "learning_rate": 5.7799842490953005e-05, "loss": 0.8729, "step": 4497 }, { "epoch": 0.7091180095737667, "grad_norm": 1.0, "learning_rate": 5.779566880692673e-05, "loss": 0.9757, "step": 4498 }, { "epoch": 0.7092756614211597, "grad_norm": 0.83203125, "learning_rate": 5.779149521235005e-05, "loss": 0.8458, "step": 4499 }, { "epoch": 0.7094333132685527, "grad_norm": 1.03125, "learning_rate": 5.7787321707231954e-05, "loss": 0.8481, "step": 4500 }, { "epoch": 0.7095909651159458, "grad_norm": 1.078125, "learning_rate": 5.77831482915812e-05, "loss": 1.0312, "step": 4501 }, { "epoch": 0.7097486169633388, "grad_norm": 0.95703125, "learning_rate": 5.7778974965406675e-05, "loss": 1.0573, "step": 4502 }, { "epoch": 0.7099062688107318, "grad_norm": 0.890625, "learning_rate": 5.777480172871717e-05, "loss": 1.0107, "step": 4503 }, { "epoch": 0.7100639206581248, "grad_norm": 0.91796875, "learning_rate": 5.7770628581521534e-05, "loss": 0.9872, "step": 4504 }, { "epoch": 0.7102215725055178, "grad_norm": 0.76953125, "learning_rate": 5.776645552382867e-05, "loss": 0.9871, "step": 4505 }, { "epoch": 0.7103792243529108, "grad_norm": 0.875, "learning_rate": 5.77622825556474e-05, "loss": 1.0843, "step": 4506 }, { "epoch": 0.7105368762003038, "grad_norm": 0.85546875, "learning_rate": 5.7758109676986546e-05, "loss": 1.0402, "step": 4507 }, { "epoch": 0.7106945280476968, "grad_norm": 1.125, "learning_rate": 5.7753936887854976e-05, "loss": 0.8896, "step": 4508 }, { "epoch": 0.7108521798950899, "grad_norm": 0.890625, "learning_rate": 5.774976418826147e-05, "loss": 1.0048, "step": 4509 }, { "epoch": 0.7110098317424829, "grad_norm": 0.90234375, "learning_rate": 5.774559157821497e-05, "loss": 0.9749, "step": 4510 }, { "epoch": 0.7111674835898759, "grad_norm": 0.9140625, "learning_rate": 5.774141905772428e-05, "loss": 1.0288, "step": 4511 }, { "epoch": 0.7113251354372689, "grad_norm": 0.93359375, "learning_rate": 5.773724662679823e-05, "loss": 0.9577, "step": 4512 }, { "epoch": 0.7114827872846619, "grad_norm": 0.94921875, "learning_rate": 5.7733074285445665e-05, "loss": 1.0781, "step": 4513 }, { "epoch": 0.7116404391320549, "grad_norm": 0.94921875, "learning_rate": 5.77289020336754e-05, "loss": 0.914, "step": 4514 }, { "epoch": 0.7117980909794479, "grad_norm": 0.9609375, "learning_rate": 5.7724729871496356e-05, "loss": 1.079, "step": 4515 }, { "epoch": 0.7119557428268409, "grad_norm": 0.984375, "learning_rate": 5.7720557798917315e-05, "loss": 1.0542, "step": 4516 }, { "epoch": 0.712113394674234, "grad_norm": 1.0625, "learning_rate": 5.771638581594714e-05, "loss": 0.8942, "step": 4517 }, { "epoch": 0.712271046521627, "grad_norm": 1.0625, "learning_rate": 5.771221392259467e-05, "loss": 1.23, "step": 4518 }, { "epoch": 0.71242869836902, "grad_norm": 0.80859375, "learning_rate": 5.770804211886871e-05, "loss": 0.938, "step": 4519 }, { "epoch": 0.712586350216413, "grad_norm": 0.95703125, "learning_rate": 5.770387040477818e-05, "loss": 1.0381, "step": 4520 }, { "epoch": 0.712744002063806, "grad_norm": 1.0234375, "learning_rate": 5.769969878033188e-05, "loss": 1.0366, "step": 4521 }, { "epoch": 0.712901653911199, "grad_norm": 0.97265625, "learning_rate": 5.769552724553864e-05, "loss": 1.0303, "step": 4522 }, { "epoch": 0.713059305758592, "grad_norm": 0.8671875, "learning_rate": 5.769135580040732e-05, "loss": 0.9038, "step": 4523 }, { "epoch": 0.713216957605985, "grad_norm": 0.984375, "learning_rate": 5.768718444494671e-05, "loss": 1.1799, "step": 4524 }, { "epoch": 0.713374609453378, "grad_norm": 0.9453125, "learning_rate": 5.7683013179165736e-05, "loss": 1.1596, "step": 4525 }, { "epoch": 0.7135322613007711, "grad_norm": 0.93359375, "learning_rate": 5.767884200307319e-05, "loss": 1.0714, "step": 4526 }, { "epoch": 0.7136899131481641, "grad_norm": 0.90625, "learning_rate": 5.767467091667793e-05, "loss": 0.8077, "step": 4527 }, { "epoch": 0.7138475649955571, "grad_norm": 0.94921875, "learning_rate": 5.7670499919988776e-05, "loss": 0.855, "step": 4528 }, { "epoch": 0.71400521684295, "grad_norm": 1.3203125, "learning_rate": 5.7666329013014586e-05, "loss": 1.1338, "step": 4529 }, { "epoch": 0.7141628686903431, "grad_norm": 0.97265625, "learning_rate": 5.7662158195764185e-05, "loss": 1.0226, "step": 4530 }, { "epoch": 0.7143205205377361, "grad_norm": 0.92578125, "learning_rate": 5.765798746824642e-05, "loss": 1.0009, "step": 4531 }, { "epoch": 0.7144781723851291, "grad_norm": 0.9453125, "learning_rate": 5.765381683047013e-05, "loss": 0.9105, "step": 4532 }, { "epoch": 0.7146358242325221, "grad_norm": 0.9609375, "learning_rate": 5.7649646282444156e-05, "loss": 1.0727, "step": 4533 }, { "epoch": 0.7147934760799152, "grad_norm": 0.98046875, "learning_rate": 5.7645475824177295e-05, "loss": 1.0921, "step": 4534 }, { "epoch": 0.7149511279273082, "grad_norm": 0.9296875, "learning_rate": 5.764130545567847e-05, "loss": 0.8955, "step": 4535 }, { "epoch": 0.7151087797747012, "grad_norm": 1.0234375, "learning_rate": 5.763713517695649e-05, "loss": 1.1819, "step": 4536 }, { "epoch": 0.7152664316220942, "grad_norm": 3.96875, "learning_rate": 5.763296498802017e-05, "loss": 1.0377, "step": 4537 }, { "epoch": 0.7154240834694872, "grad_norm": 1.015625, "learning_rate": 5.762879488887836e-05, "loss": 1.1568, "step": 4538 }, { "epoch": 0.7155817353168802, "grad_norm": 0.88671875, "learning_rate": 5.762462487953986e-05, "loss": 0.7946, "step": 4539 }, { "epoch": 0.7157393871642732, "grad_norm": 0.9453125, "learning_rate": 5.762045496001357e-05, "loss": 1.043, "step": 4540 }, { "epoch": 0.7158970390116662, "grad_norm": 0.9609375, "learning_rate": 5.761628513030831e-05, "loss": 0.9246, "step": 4541 }, { "epoch": 0.7160546908590593, "grad_norm": 0.9296875, "learning_rate": 5.7612115390432916e-05, "loss": 1.0121, "step": 4542 }, { "epoch": 0.7162123427064523, "grad_norm": 1.015625, "learning_rate": 5.7607945740396227e-05, "loss": 1.2838, "step": 4543 }, { "epoch": 0.7163699945538453, "grad_norm": 0.87890625, "learning_rate": 5.7603776180207025e-05, "loss": 0.8676, "step": 4544 }, { "epoch": 0.7165276464012383, "grad_norm": 1.0234375, "learning_rate": 5.759960670987423e-05, "loss": 1.1053, "step": 4545 }, { "epoch": 0.7166852982486313, "grad_norm": 1.046875, "learning_rate": 5.759543732940666e-05, "loss": 0.9991, "step": 4546 }, { "epoch": 0.7168429500960243, "grad_norm": 0.8828125, "learning_rate": 5.759126803881313e-05, "loss": 1.0022, "step": 4547 }, { "epoch": 0.7170006019434173, "grad_norm": 0.8515625, "learning_rate": 5.758709883810248e-05, "loss": 0.8361, "step": 4548 }, { "epoch": 0.7171582537908103, "grad_norm": 0.921875, "learning_rate": 5.758292972728352e-05, "loss": 1.028, "step": 4549 }, { "epoch": 0.7173159056382034, "grad_norm": 0.89453125, "learning_rate": 5.7578760706365144e-05, "loss": 1.1563, "step": 4550 }, { "epoch": 0.7174735574855964, "grad_norm": 1.03125, "learning_rate": 5.757459177535616e-05, "loss": 1.2181, "step": 4551 }, { "epoch": 0.7176312093329894, "grad_norm": 1.0, "learning_rate": 5.757042293426541e-05, "loss": 1.1177, "step": 4552 }, { "epoch": 0.7177888611803824, "grad_norm": 0.9296875, "learning_rate": 5.756625418310173e-05, "loss": 0.9362, "step": 4553 }, { "epoch": 0.7179465130277753, "grad_norm": 1.0234375, "learning_rate": 5.75620855218739e-05, "loss": 1.0421, "step": 4554 }, { "epoch": 0.7181041648751684, "grad_norm": 0.83203125, "learning_rate": 5.755791695059084e-05, "loss": 0.7127, "step": 4555 }, { "epoch": 0.7182618167225614, "grad_norm": 0.89453125, "learning_rate": 5.755374846926136e-05, "loss": 0.9927, "step": 4556 }, { "epoch": 0.7184194685699544, "grad_norm": 0.99609375, "learning_rate": 5.754958007789427e-05, "loss": 1.0711, "step": 4557 }, { "epoch": 0.7185771204173474, "grad_norm": 1.0, "learning_rate": 5.7545411776498437e-05, "loss": 1.0965, "step": 4558 }, { "epoch": 0.7187347722647405, "grad_norm": 0.859375, "learning_rate": 5.7541243565082616e-05, "loss": 0.9999, "step": 4559 }, { "epoch": 0.7188924241121335, "grad_norm": 0.87890625, "learning_rate": 5.7537075443655766e-05, "loss": 1.0846, "step": 4560 }, { "epoch": 0.7190500759595265, "grad_norm": 0.86328125, "learning_rate": 5.7532907412226634e-05, "loss": 0.9009, "step": 4561 }, { "epoch": 0.7192077278069194, "grad_norm": 0.82421875, "learning_rate": 5.7528739470804084e-05, "loss": 0.8899, "step": 4562 }, { "epoch": 0.7193653796543125, "grad_norm": 0.90625, "learning_rate": 5.752457161939695e-05, "loss": 1.052, "step": 4563 }, { "epoch": 0.7195230315017055, "grad_norm": 0.90234375, "learning_rate": 5.7520403858014025e-05, "loss": 0.9828, "step": 4564 }, { "epoch": 0.7196806833490985, "grad_norm": 1.0390625, "learning_rate": 5.7516236186664196e-05, "loss": 1.0232, "step": 4565 }, { "epoch": 0.7198383351964915, "grad_norm": 1.015625, "learning_rate": 5.7512068605356294e-05, "loss": 1.124, "step": 4566 }, { "epoch": 0.7199959870438846, "grad_norm": 0.91015625, "learning_rate": 5.750790111409913e-05, "loss": 0.857, "step": 4567 }, { "epoch": 0.7201536388912776, "grad_norm": 1.0625, "learning_rate": 5.750373371290154e-05, "loss": 1.1734, "step": 4568 }, { "epoch": 0.7203112907386706, "grad_norm": 1.6328125, "learning_rate": 5.749956640177235e-05, "loss": 1.0413, "step": 4569 }, { "epoch": 0.7204689425860635, "grad_norm": 0.859375, "learning_rate": 5.749539918072041e-05, "loss": 0.8861, "step": 4570 }, { "epoch": 0.7206265944334566, "grad_norm": 0.84375, "learning_rate": 5.7491232049754495e-05, "loss": 0.8735, "step": 4571 }, { "epoch": 0.7207842462808496, "grad_norm": 1.0, "learning_rate": 5.7487065008883524e-05, "loss": 1.1797, "step": 4572 }, { "epoch": 0.7209418981282426, "grad_norm": 0.9453125, "learning_rate": 5.74828980581163e-05, "loss": 1.0538, "step": 4573 }, { "epoch": 0.7210995499756356, "grad_norm": 1.03125, "learning_rate": 5.7478731197461636e-05, "loss": 0.973, "step": 4574 }, { "epoch": 0.7212572018230287, "grad_norm": 0.953125, "learning_rate": 5.747456442692836e-05, "loss": 1.0639, "step": 4575 }, { "epoch": 0.7214148536704217, "grad_norm": 0.8515625, "learning_rate": 5.747039774652533e-05, "loss": 0.884, "step": 4576 }, { "epoch": 0.7215725055178147, "grad_norm": 0.875, "learning_rate": 5.746623115626135e-05, "loss": 0.7935, "step": 4577 }, { "epoch": 0.7217301573652076, "grad_norm": 1.0546875, "learning_rate": 5.746206465614528e-05, "loss": 1.2527, "step": 4578 }, { "epoch": 0.7218878092126007, "grad_norm": 0.9765625, "learning_rate": 5.745789824618587e-05, "loss": 1.0558, "step": 4579 }, { "epoch": 0.7220454610599937, "grad_norm": 1.0625, "learning_rate": 5.745373192639206e-05, "loss": 1.1334, "step": 4580 }, { "epoch": 0.7222031129073867, "grad_norm": 0.99609375, "learning_rate": 5.744956569677265e-05, "loss": 0.9989, "step": 4581 }, { "epoch": 0.7223607647547797, "grad_norm": 0.96875, "learning_rate": 5.744539955733643e-05, "loss": 0.9758, "step": 4582 }, { "epoch": 0.7225184166021728, "grad_norm": 0.8984375, "learning_rate": 5.744123350809225e-05, "loss": 0.8247, "step": 4583 }, { "epoch": 0.7226760684495658, "grad_norm": 1.0625, "learning_rate": 5.743706754904892e-05, "loss": 1.0697, "step": 4584 }, { "epoch": 0.7228337202969588, "grad_norm": 1.1171875, "learning_rate": 5.743290168021534e-05, "loss": 0.9729, "step": 4585 }, { "epoch": 0.7229913721443517, "grad_norm": 1.1171875, "learning_rate": 5.742873590160027e-05, "loss": 1.3939, "step": 4586 }, { "epoch": 0.7231490239917447, "grad_norm": 0.9296875, "learning_rate": 5.742457021321257e-05, "loss": 0.9684, "step": 4587 }, { "epoch": 0.7233066758391378, "grad_norm": 0.91015625, "learning_rate": 5.742040461506106e-05, "loss": 1.0538, "step": 4588 }, { "epoch": 0.7234643276865308, "grad_norm": 0.92578125, "learning_rate": 5.7416239107154534e-05, "loss": 1.0785, "step": 4589 }, { "epoch": 0.7236219795339238, "grad_norm": 0.921875, "learning_rate": 5.74120736895019e-05, "loss": 1.0088, "step": 4590 }, { "epoch": 0.7237796313813168, "grad_norm": 1.1328125, "learning_rate": 5.7407908362111916e-05, "loss": 0.9529, "step": 4591 }, { "epoch": 0.7239372832287099, "grad_norm": 0.96875, "learning_rate": 5.740374312499346e-05, "loss": 0.8724, "step": 4592 }, { "epoch": 0.7240949350761029, "grad_norm": 0.92578125, "learning_rate": 5.739957797815533e-05, "loss": 1.1785, "step": 4593 }, { "epoch": 0.7242525869234958, "grad_norm": 0.9765625, "learning_rate": 5.739541292160632e-05, "loss": 0.9545, "step": 4594 }, { "epoch": 0.7244102387708888, "grad_norm": 0.98046875, "learning_rate": 5.739124795535533e-05, "loss": 0.8773, "step": 4595 }, { "epoch": 0.7245678906182819, "grad_norm": 0.8671875, "learning_rate": 5.738708307941117e-05, "loss": 0.7808, "step": 4596 }, { "epoch": 0.7247255424656749, "grad_norm": 0.921875, "learning_rate": 5.738291829378264e-05, "loss": 0.8546, "step": 4597 }, { "epoch": 0.7248831943130679, "grad_norm": 1.0703125, "learning_rate": 5.7378753598478574e-05, "loss": 1.3119, "step": 4598 }, { "epoch": 0.7250408461604609, "grad_norm": 0.9296875, "learning_rate": 5.737458899350776e-05, "loss": 0.9611, "step": 4599 }, { "epoch": 0.725198498007854, "grad_norm": 1.0703125, "learning_rate": 5.7370424478879124e-05, "loss": 1.0643, "step": 4600 }, { "epoch": 0.725356149855247, "grad_norm": 1.6328125, "learning_rate": 5.736626005460144e-05, "loss": 1.1612, "step": 4601 }, { "epoch": 0.7255138017026399, "grad_norm": 0.9140625, "learning_rate": 5.736209572068352e-05, "loss": 0.9703, "step": 4602 }, { "epoch": 0.7256714535500329, "grad_norm": 1.015625, "learning_rate": 5.7357931477134194e-05, "loss": 1.0533, "step": 4603 }, { "epoch": 0.725829105397426, "grad_norm": 0.80078125, "learning_rate": 5.735376732396227e-05, "loss": 0.7787, "step": 4604 }, { "epoch": 0.725986757244819, "grad_norm": 1.0234375, "learning_rate": 5.734960326117662e-05, "loss": 1.1392, "step": 4605 }, { "epoch": 0.726144409092212, "grad_norm": 0.94140625, "learning_rate": 5.734543928878606e-05, "loss": 1.0177, "step": 4606 }, { "epoch": 0.726302060939605, "grad_norm": 0.92578125, "learning_rate": 5.734127540679941e-05, "loss": 0.9528, "step": 4607 }, { "epoch": 0.7264597127869981, "grad_norm": 0.9140625, "learning_rate": 5.733711161522548e-05, "loss": 0.9613, "step": 4608 }, { "epoch": 0.7266173646343911, "grad_norm": 0.9921875, "learning_rate": 5.733294791407307e-05, "loss": 1.0726, "step": 4609 }, { "epoch": 0.726775016481784, "grad_norm": 0.90625, "learning_rate": 5.732878430335107e-05, "loss": 0.8793, "step": 4610 }, { "epoch": 0.726932668329177, "grad_norm": 0.90234375, "learning_rate": 5.732462078306828e-05, "loss": 0.929, "step": 4611 }, { "epoch": 0.72709032017657, "grad_norm": 1.15625, "learning_rate": 5.732045735323351e-05, "loss": 1.063, "step": 4612 }, { "epoch": 0.7272479720239631, "grad_norm": 0.96484375, "learning_rate": 5.73162940138556e-05, "loss": 0.9351, "step": 4613 }, { "epoch": 0.7274056238713561, "grad_norm": 1.0078125, "learning_rate": 5.7312130764943364e-05, "loss": 0.9894, "step": 4614 }, { "epoch": 0.7275632757187491, "grad_norm": 1.0546875, "learning_rate": 5.730796760650562e-05, "loss": 0.971, "step": 4615 }, { "epoch": 0.7277209275661422, "grad_norm": 1.03125, "learning_rate": 5.73038045385512e-05, "loss": 0.9732, "step": 4616 }, { "epoch": 0.7278785794135352, "grad_norm": 1.046875, "learning_rate": 5.729964156108894e-05, "loss": 1.0791, "step": 4617 }, { "epoch": 0.7280362312609281, "grad_norm": 1.0703125, "learning_rate": 5.7295478674127624e-05, "loss": 0.8884, "step": 4618 }, { "epoch": 0.7281938831083211, "grad_norm": 0.96875, "learning_rate": 5.7291315877676064e-05, "loss": 1.1057, "step": 4619 }, { "epoch": 0.7283515349557141, "grad_norm": 1.0859375, "learning_rate": 5.728715317174318e-05, "loss": 1.0346, "step": 4620 }, { "epoch": 0.7285091868031072, "grad_norm": 1.03125, "learning_rate": 5.7282990556337724e-05, "loss": 1.2148, "step": 4621 }, { "epoch": 0.7286668386505002, "grad_norm": 0.99609375, "learning_rate": 5.7278828031468515e-05, "loss": 1.0916, "step": 4622 }, { "epoch": 0.7288244904978932, "grad_norm": 1.0625, "learning_rate": 5.7274665597144404e-05, "loss": 1.1413, "step": 4623 }, { "epoch": 0.7289821423452862, "grad_norm": 0.984375, "learning_rate": 5.727050325337415e-05, "loss": 1.1136, "step": 4624 }, { "epoch": 0.7291397941926793, "grad_norm": 1.0, "learning_rate": 5.726634100016666e-05, "loss": 1.0647, "step": 4625 }, { "epoch": 0.7292974460400722, "grad_norm": 0.9375, "learning_rate": 5.7262178837530723e-05, "loss": 1.0225, "step": 4626 }, { "epoch": 0.7294550978874652, "grad_norm": 0.93359375, "learning_rate": 5.725801676547515e-05, "loss": 0.9935, "step": 4627 }, { "epoch": 0.7296127497348582, "grad_norm": 0.9140625, "learning_rate": 5.7253854784008775e-05, "loss": 0.9398, "step": 4628 }, { "epoch": 0.7297704015822513, "grad_norm": 1.015625, "learning_rate": 5.724969289314036e-05, "loss": 1.0707, "step": 4629 }, { "epoch": 0.7299280534296443, "grad_norm": 1.046875, "learning_rate": 5.724553109287882e-05, "loss": 1.0626, "step": 4630 }, { "epoch": 0.7300857052770373, "grad_norm": 0.921875, "learning_rate": 5.724136938323295e-05, "loss": 1.1977, "step": 4631 }, { "epoch": 0.7302433571244303, "grad_norm": 0.92578125, "learning_rate": 5.723720776421153e-05, "loss": 1.0472, "step": 4632 }, { "epoch": 0.7304010089718234, "grad_norm": 1.0859375, "learning_rate": 5.723304623582342e-05, "loss": 1.0177, "step": 4633 }, { "epoch": 0.7305586608192163, "grad_norm": 1.125, "learning_rate": 5.722888479807742e-05, "loss": 0.9893, "step": 4634 }, { "epoch": 0.7307163126666093, "grad_norm": 0.78515625, "learning_rate": 5.722472345098231e-05, "loss": 0.8246, "step": 4635 }, { "epoch": 0.7308739645140023, "grad_norm": 0.84765625, "learning_rate": 5.7220562194547e-05, "loss": 0.907, "step": 4636 }, { "epoch": 0.7310316163613954, "grad_norm": 0.83984375, "learning_rate": 5.721640102878026e-05, "loss": 0.9116, "step": 4637 }, { "epoch": 0.7311892682087884, "grad_norm": 0.93359375, "learning_rate": 5.721223995369092e-05, "loss": 0.8619, "step": 4638 }, { "epoch": 0.7313469200561814, "grad_norm": 1.0546875, "learning_rate": 5.720807896928778e-05, "loss": 1.0181, "step": 4639 }, { "epoch": 0.7315045719035744, "grad_norm": 1.03125, "learning_rate": 5.720391807557963e-05, "loss": 1.183, "step": 4640 }, { "epoch": 0.7316622237509675, "grad_norm": 1.0234375, "learning_rate": 5.719975727257538e-05, "loss": 1.2148, "step": 4641 }, { "epoch": 0.7318198755983604, "grad_norm": 0.98046875, "learning_rate": 5.7195596560283794e-05, "loss": 0.868, "step": 4642 }, { "epoch": 0.7319775274457534, "grad_norm": 0.87890625, "learning_rate": 5.71914359387137e-05, "loss": 0.8763, "step": 4643 }, { "epoch": 0.7321351792931464, "grad_norm": 1.109375, "learning_rate": 5.7187275407873895e-05, "loss": 0.9841, "step": 4644 }, { "epoch": 0.7322928311405394, "grad_norm": 0.98828125, "learning_rate": 5.7183114967773174e-05, "loss": 0.9438, "step": 4645 }, { "epoch": 0.7324504829879325, "grad_norm": 1.0078125, "learning_rate": 5.7178954618420446e-05, "loss": 0.832, "step": 4646 }, { "epoch": 0.7326081348353255, "grad_norm": 0.96875, "learning_rate": 5.717479435982448e-05, "loss": 0.9947, "step": 4647 }, { "epoch": 0.7327657866827185, "grad_norm": 0.890625, "learning_rate": 5.7170634191994076e-05, "loss": 1.0779, "step": 4648 }, { "epoch": 0.7329234385301115, "grad_norm": 1.015625, "learning_rate": 5.716647411493807e-05, "loss": 1.1037, "step": 4649 }, { "epoch": 0.7330810903775045, "grad_norm": 1.0390625, "learning_rate": 5.716231412866524e-05, "loss": 1.0519, "step": 4650 }, { "epoch": 0.7332387422248975, "grad_norm": 0.86328125, "learning_rate": 5.715815423318447e-05, "loss": 0.8023, "step": 4651 }, { "epoch": 0.7333963940722905, "grad_norm": 0.9140625, "learning_rate": 5.715399442850454e-05, "loss": 1.0471, "step": 4652 }, { "epoch": 0.7335540459196835, "grad_norm": 0.91796875, "learning_rate": 5.714983471463428e-05, "loss": 0.9293, "step": 4653 }, { "epoch": 0.7337116977670766, "grad_norm": 0.7734375, "learning_rate": 5.7145675091582485e-05, "loss": 0.7455, "step": 4654 }, { "epoch": 0.7338693496144696, "grad_norm": 1.015625, "learning_rate": 5.714151555935798e-05, "loss": 1.1761, "step": 4655 }, { "epoch": 0.7340270014618626, "grad_norm": 0.9453125, "learning_rate": 5.71373561179696e-05, "loss": 0.9818, "step": 4656 }, { "epoch": 0.7341846533092556, "grad_norm": 0.97265625, "learning_rate": 5.713319676742609e-05, "loss": 1.157, "step": 4657 }, { "epoch": 0.7343423051566486, "grad_norm": 0.9609375, "learning_rate": 5.712903750773637e-05, "loss": 0.91, "step": 4658 }, { "epoch": 0.7344999570040416, "grad_norm": 0.95703125, "learning_rate": 5.712487833890919e-05, "loss": 0.9129, "step": 4659 }, { "epoch": 0.7346576088514346, "grad_norm": 0.96875, "learning_rate": 5.712071926095339e-05, "loss": 0.9001, "step": 4660 }, { "epoch": 0.7348152606988276, "grad_norm": 1.03125, "learning_rate": 5.7116560273877775e-05, "loss": 0.7676, "step": 4661 }, { "epoch": 0.7349729125462207, "grad_norm": 0.984375, "learning_rate": 5.711240137769116e-05, "loss": 1.1658, "step": 4662 }, { "epoch": 0.7351305643936137, "grad_norm": 1.1328125, "learning_rate": 5.7108242572402346e-05, "loss": 0.9517, "step": 4663 }, { "epoch": 0.7352882162410067, "grad_norm": 1.0078125, "learning_rate": 5.710408385802017e-05, "loss": 1.0213, "step": 4664 }, { "epoch": 0.7354458680883997, "grad_norm": 1.0703125, "learning_rate": 5.709992523455341e-05, "loss": 1.0087, "step": 4665 }, { "epoch": 0.7356035199357926, "grad_norm": 0.8359375, "learning_rate": 5.7095766702010935e-05, "loss": 0.7306, "step": 4666 }, { "epoch": 0.7357611717831857, "grad_norm": 0.94921875, "learning_rate": 5.709160826040152e-05, "loss": 0.9112, "step": 4667 }, { "epoch": 0.7359188236305787, "grad_norm": 0.84765625, "learning_rate": 5.7087449909734005e-05, "loss": 0.7613, "step": 4668 }, { "epoch": 0.7360764754779717, "grad_norm": 0.94921875, "learning_rate": 5.708329165001717e-05, "loss": 0.9827, "step": 4669 }, { "epoch": 0.7362341273253648, "grad_norm": 1.0234375, "learning_rate": 5.707913348125982e-05, "loss": 0.9918, "step": 4670 }, { "epoch": 0.7363917791727578, "grad_norm": 0.9453125, "learning_rate": 5.707497540347082e-05, "loss": 0.9083, "step": 4671 }, { "epoch": 0.7365494310201508, "grad_norm": 1.2265625, "learning_rate": 5.707081741665896e-05, "loss": 1.0363, "step": 4672 }, { "epoch": 0.7367070828675438, "grad_norm": 1.0078125, "learning_rate": 5.706665952083305e-05, "loss": 0.9994, "step": 4673 }, { "epoch": 0.7368647347149367, "grad_norm": 0.921875, "learning_rate": 5.706250171600192e-05, "loss": 0.9276, "step": 4674 }, { "epoch": 0.7370223865623298, "grad_norm": 0.94140625, "learning_rate": 5.70583440021743e-05, "loss": 0.8835, "step": 4675 }, { "epoch": 0.7371800384097228, "grad_norm": 0.9296875, "learning_rate": 5.705418637935911e-05, "loss": 1.1272, "step": 4676 }, { "epoch": 0.7373376902571158, "grad_norm": 0.96484375, "learning_rate": 5.7050028847565115e-05, "loss": 0.8556, "step": 4677 }, { "epoch": 0.7374953421045088, "grad_norm": 0.87109375, "learning_rate": 5.704587140680114e-05, "loss": 0.9852, "step": 4678 }, { "epoch": 0.7376529939519019, "grad_norm": 0.9296875, "learning_rate": 5.704171405707598e-05, "loss": 1.0377, "step": 4679 }, { "epoch": 0.7378106457992949, "grad_norm": 0.96484375, "learning_rate": 5.703755679839842e-05, "loss": 0.9971, "step": 4680 }, { "epoch": 0.7379682976466879, "grad_norm": 0.91796875, "learning_rate": 5.703339963077733e-05, "loss": 1.0554, "step": 4681 }, { "epoch": 0.7381259494940808, "grad_norm": 0.890625, "learning_rate": 5.70292425542215e-05, "loss": 0.9929, "step": 4682 }, { "epoch": 0.7382836013414739, "grad_norm": 0.84375, "learning_rate": 5.702508556873973e-05, "loss": 0.8728, "step": 4683 }, { "epoch": 0.7384412531888669, "grad_norm": 0.84765625, "learning_rate": 5.702092867434084e-05, "loss": 0.8815, "step": 4684 }, { "epoch": 0.7385989050362599, "grad_norm": 0.88671875, "learning_rate": 5.701677187103358e-05, "loss": 0.967, "step": 4685 }, { "epoch": 0.7387565568836529, "grad_norm": 0.890625, "learning_rate": 5.7012615158826875e-05, "loss": 1.0705, "step": 4686 }, { "epoch": 0.738914208731046, "grad_norm": 0.98046875, "learning_rate": 5.700845853772947e-05, "loss": 0.9555, "step": 4687 }, { "epoch": 0.739071860578439, "grad_norm": 0.94921875, "learning_rate": 5.700430200775018e-05, "loss": 0.994, "step": 4688 }, { "epoch": 0.739229512425832, "grad_norm": 0.80859375, "learning_rate": 5.70001455688978e-05, "loss": 0.8522, "step": 4689 }, { "epoch": 0.7393871642732249, "grad_norm": 0.82421875, "learning_rate": 5.6995989221181124e-05, "loss": 0.8654, "step": 4690 }, { "epoch": 0.739544816120618, "grad_norm": 0.87890625, "learning_rate": 5.6991832964609035e-05, "loss": 0.8621, "step": 4691 }, { "epoch": 0.739702467968011, "grad_norm": 0.953125, "learning_rate": 5.6987676799190295e-05, "loss": 0.9141, "step": 4692 }, { "epoch": 0.739860119815404, "grad_norm": 0.98046875, "learning_rate": 5.698352072493371e-05, "loss": 0.8904, "step": 4693 }, { "epoch": 0.740017771662797, "grad_norm": 0.9453125, "learning_rate": 5.6979364741848105e-05, "loss": 1.0103, "step": 4694 }, { "epoch": 0.7401754235101901, "grad_norm": 0.98046875, "learning_rate": 5.697520884994223e-05, "loss": 0.902, "step": 4695 }, { "epoch": 0.7403330753575831, "grad_norm": 0.9375, "learning_rate": 5.6971053049224987e-05, "loss": 1.0204, "step": 4696 }, { "epoch": 0.7404907272049761, "grad_norm": 0.89453125, "learning_rate": 5.696689733970513e-05, "loss": 0.9853, "step": 4697 }, { "epoch": 0.7406483790523691, "grad_norm": 0.91796875, "learning_rate": 5.6962741721391474e-05, "loss": 1.0159, "step": 4698 }, { "epoch": 0.740806030899762, "grad_norm": 1.0234375, "learning_rate": 5.695858619429284e-05, "loss": 0.9521, "step": 4699 }, { "epoch": 0.7409636827471551, "grad_norm": 1.0859375, "learning_rate": 5.695443075841801e-05, "loss": 0.9416, "step": 4700 }, { "epoch": 0.7411213345945481, "grad_norm": 1.0546875, "learning_rate": 5.69502754137758e-05, "loss": 0.971, "step": 4701 }, { "epoch": 0.7412789864419411, "grad_norm": 1.21875, "learning_rate": 5.694612016037504e-05, "loss": 1.0025, "step": 4702 }, { "epoch": 0.7414366382893341, "grad_norm": 1.0078125, "learning_rate": 5.694196499822449e-05, "loss": 0.9848, "step": 4703 }, { "epoch": 0.7415942901367272, "grad_norm": 0.921875, "learning_rate": 5.6937809927333006e-05, "loss": 0.9557, "step": 4704 }, { "epoch": 0.7417519419841202, "grad_norm": 0.859375, "learning_rate": 5.6933654947709326e-05, "loss": 0.8627, "step": 4705 }, { "epoch": 0.7419095938315132, "grad_norm": 1.2109375, "learning_rate": 5.692950005936234e-05, "loss": 1.1389, "step": 4706 }, { "epoch": 0.7420672456789061, "grad_norm": 0.87109375, "learning_rate": 5.692534526230082e-05, "loss": 0.946, "step": 4707 }, { "epoch": 0.7422248975262992, "grad_norm": 0.953125, "learning_rate": 5.692119055653358e-05, "loss": 0.9587, "step": 4708 }, { "epoch": 0.7423825493736922, "grad_norm": 1.0078125, "learning_rate": 5.6917035942069386e-05, "loss": 0.9721, "step": 4709 }, { "epoch": 0.7425402012210852, "grad_norm": 0.9296875, "learning_rate": 5.6912881418917054e-05, "loss": 0.9636, "step": 4710 }, { "epoch": 0.7426978530684782, "grad_norm": 0.89453125, "learning_rate": 5.6908726987085444e-05, "loss": 0.8739, "step": 4711 }, { "epoch": 0.7428555049158713, "grad_norm": 0.90625, "learning_rate": 5.690457264658331e-05, "loss": 0.9868, "step": 4712 }, { "epoch": 0.7430131567632643, "grad_norm": 1.0859375, "learning_rate": 5.690041839741949e-05, "loss": 0.9563, "step": 4713 }, { "epoch": 0.7431708086106573, "grad_norm": 0.9140625, "learning_rate": 5.689626423960277e-05, "loss": 0.8799, "step": 4714 }, { "epoch": 0.7433284604580502, "grad_norm": 0.94921875, "learning_rate": 5.68921101731419e-05, "loss": 1.0575, "step": 4715 }, { "epoch": 0.7434861123054433, "grad_norm": 0.91796875, "learning_rate": 5.688795619804579e-05, "loss": 1.0697, "step": 4716 }, { "epoch": 0.7436437641528363, "grad_norm": 0.9609375, "learning_rate": 5.688380231432318e-05, "loss": 1.1015, "step": 4717 }, { "epoch": 0.7438014160002293, "grad_norm": 0.921875, "learning_rate": 5.6879648521982896e-05, "loss": 1.0351, "step": 4718 }, { "epoch": 0.7439590678476223, "grad_norm": 1.03125, "learning_rate": 5.6875494821033737e-05, "loss": 1.0856, "step": 4719 }, { "epoch": 0.7441167196950154, "grad_norm": 1.109375, "learning_rate": 5.687134121148445e-05, "loss": 1.0032, "step": 4720 }, { "epoch": 0.7442743715424084, "grad_norm": 0.87109375, "learning_rate": 5.6867187693343936e-05, "loss": 0.842, "step": 4721 }, { "epoch": 0.7444320233898014, "grad_norm": 1.671875, "learning_rate": 5.6863034266620954e-05, "loss": 0.7749, "step": 4722 }, { "epoch": 0.7445896752371943, "grad_norm": 1.0390625, "learning_rate": 5.68588809313243e-05, "loss": 1.245, "step": 4723 }, { "epoch": 0.7447473270845874, "grad_norm": 1.015625, "learning_rate": 5.6854727687462786e-05, "loss": 1.0012, "step": 4724 }, { "epoch": 0.7449049789319804, "grad_norm": 1.03125, "learning_rate": 5.685057453504516e-05, "loss": 1.0731, "step": 4725 }, { "epoch": 0.7450626307793734, "grad_norm": 1.0078125, "learning_rate": 5.684642147408034e-05, "loss": 0.8519, "step": 4726 }, { "epoch": 0.7452202826267664, "grad_norm": 0.87890625, "learning_rate": 5.6842268504577034e-05, "loss": 0.9437, "step": 4727 }, { "epoch": 0.7453779344741595, "grad_norm": 0.91015625, "learning_rate": 5.6838115626544086e-05, "loss": 0.919, "step": 4728 }, { "epoch": 0.7455355863215525, "grad_norm": 0.98046875, "learning_rate": 5.6833962839990296e-05, "loss": 1.1763, "step": 4729 }, { "epoch": 0.7456932381689455, "grad_norm": 0.9609375, "learning_rate": 5.68298101449244e-05, "loss": 1.0592, "step": 4730 }, { "epoch": 0.7458508900163384, "grad_norm": 0.953125, "learning_rate": 5.682565754135531e-05, "loss": 1.104, "step": 4731 }, { "epoch": 0.7460085418637314, "grad_norm": 1.0, "learning_rate": 5.682150502929175e-05, "loss": 1.0742, "step": 4732 }, { "epoch": 0.7461661937111245, "grad_norm": 1.1796875, "learning_rate": 5.6817352608742546e-05, "loss": 0.9803, "step": 4733 }, { "epoch": 0.7463238455585175, "grad_norm": 0.8671875, "learning_rate": 5.6813200279716503e-05, "loss": 0.9692, "step": 4734 }, { "epoch": 0.7464814974059105, "grad_norm": 0.79296875, "learning_rate": 5.680904804222238e-05, "loss": 0.7481, "step": 4735 }, { "epoch": 0.7466391492533035, "grad_norm": 0.9765625, "learning_rate": 5.6804895896269036e-05, "loss": 1.149, "step": 4736 }, { "epoch": 0.7467968011006966, "grad_norm": 1.0546875, "learning_rate": 5.680074384186526e-05, "loss": 1.2308, "step": 4737 }, { "epoch": 0.7469544529480896, "grad_norm": 0.94921875, "learning_rate": 5.6796591879019825e-05, "loss": 0.938, "step": 4738 }, { "epoch": 0.7471121047954825, "grad_norm": 0.90234375, "learning_rate": 5.6792440007741554e-05, "loss": 0.9796, "step": 4739 }, { "epoch": 0.7472697566428755, "grad_norm": 0.8984375, "learning_rate": 5.6788288228039234e-05, "loss": 1.0304, "step": 4740 }, { "epoch": 0.7474274084902686, "grad_norm": 0.88671875, "learning_rate": 5.678413653992166e-05, "loss": 0.8708, "step": 4741 }, { "epoch": 0.7475850603376616, "grad_norm": 1.0625, "learning_rate": 5.67799849433976e-05, "loss": 0.9206, "step": 4742 }, { "epoch": 0.7477427121850546, "grad_norm": 0.96875, "learning_rate": 5.6775833438475946e-05, "loss": 1.2325, "step": 4743 }, { "epoch": 0.7479003640324476, "grad_norm": 0.9453125, "learning_rate": 5.677168202516543e-05, "loss": 1.0454, "step": 4744 }, { "epoch": 0.7480580158798407, "grad_norm": 0.93359375, "learning_rate": 5.6767530703474866e-05, "loss": 1.0683, "step": 4745 }, { "epoch": 0.7482156677272337, "grad_norm": 0.8984375, "learning_rate": 5.6763379473413045e-05, "loss": 0.9489, "step": 4746 }, { "epoch": 0.7483733195746266, "grad_norm": 0.93359375, "learning_rate": 5.6759228334988764e-05, "loss": 1.0223, "step": 4747 }, { "epoch": 0.7485309714220196, "grad_norm": 0.88671875, "learning_rate": 5.6755077288210846e-05, "loss": 0.9622, "step": 4748 }, { "epoch": 0.7486886232694127, "grad_norm": 0.91015625, "learning_rate": 5.675092633308806e-05, "loss": 1.1073, "step": 4749 }, { "epoch": 0.7488462751168057, "grad_norm": 0.90234375, "learning_rate": 5.674677546962918e-05, "loss": 1.1217, "step": 4750 }, { "epoch": 0.7490039269641987, "grad_norm": 0.88671875, "learning_rate": 5.674262469784306e-05, "loss": 0.9063, "step": 4751 }, { "epoch": 0.7491615788115917, "grad_norm": 1.0625, "learning_rate": 5.67384740177385e-05, "loss": 0.9443, "step": 4752 }, { "epoch": 0.7493192306589848, "grad_norm": 0.91015625, "learning_rate": 5.673432342932425e-05, "loss": 0.869, "step": 4753 }, { "epoch": 0.7494768825063778, "grad_norm": 0.8984375, "learning_rate": 5.673017293260914e-05, "loss": 0.9956, "step": 4754 }, { "epoch": 0.7496345343537707, "grad_norm": 1.0078125, "learning_rate": 5.672602252760191e-05, "loss": 1.0084, "step": 4755 }, { "epoch": 0.7497921862011637, "grad_norm": 0.94140625, "learning_rate": 5.6721872214311445e-05, "loss": 0.9403, "step": 4756 }, { "epoch": 0.7499498380485567, "grad_norm": 0.90625, "learning_rate": 5.67177219927465e-05, "loss": 0.9248, "step": 4757 }, { "epoch": 0.7501074898959498, "grad_norm": 1.0078125, "learning_rate": 5.671357186291586e-05, "loss": 0.9568, "step": 4758 }, { "epoch": 0.7502651417433428, "grad_norm": 0.91796875, "learning_rate": 5.6709421824828345e-05, "loss": 0.9258, "step": 4759 }, { "epoch": 0.7504227935907358, "grad_norm": 0.90234375, "learning_rate": 5.670527187849268e-05, "loss": 0.9776, "step": 4760 }, { "epoch": 0.7505804454381289, "grad_norm": 0.83984375, "learning_rate": 5.670112202391776e-05, "loss": 1.0012, "step": 4761 }, { "epoch": 0.7507380972855219, "grad_norm": 0.9296875, "learning_rate": 5.6696972261112346e-05, "loss": 1.1628, "step": 4762 }, { "epoch": 0.7508957491329148, "grad_norm": 0.99609375, "learning_rate": 5.669282259008521e-05, "loss": 1.0154, "step": 4763 }, { "epoch": 0.7510534009803078, "grad_norm": 0.9609375, "learning_rate": 5.6688673010845184e-05, "loss": 1.1107, "step": 4764 }, { "epoch": 0.7512110528277008, "grad_norm": 1.0234375, "learning_rate": 5.668452352340099e-05, "loss": 1.017, "step": 4765 }, { "epoch": 0.7513687046750939, "grad_norm": 0.91015625, "learning_rate": 5.6680374127761506e-05, "loss": 0.9244, "step": 4766 }, { "epoch": 0.7515263565224869, "grad_norm": 1.28125, "learning_rate": 5.667622482393549e-05, "loss": 1.0216, "step": 4767 }, { "epoch": 0.7516840083698799, "grad_norm": 0.9921875, "learning_rate": 5.667207561193175e-05, "loss": 1.1673, "step": 4768 }, { "epoch": 0.751841660217273, "grad_norm": 0.83203125, "learning_rate": 5.666792649175906e-05, "loss": 1.019, "step": 4769 }, { "epoch": 0.751999312064666, "grad_norm": 0.85546875, "learning_rate": 5.6663777463426194e-05, "loss": 0.8572, "step": 4770 }, { "epoch": 0.7521569639120589, "grad_norm": 0.81640625, "learning_rate": 5.6659628526942e-05, "loss": 0.8351, "step": 4771 }, { "epoch": 0.7523146157594519, "grad_norm": 0.8984375, "learning_rate": 5.665547968231526e-05, "loss": 1.1263, "step": 4772 }, { "epoch": 0.7524722676068449, "grad_norm": 0.8671875, "learning_rate": 5.6651330929554745e-05, "loss": 0.8592, "step": 4773 }, { "epoch": 0.752629919454238, "grad_norm": 1.078125, "learning_rate": 5.664718226866926e-05, "loss": 0.9418, "step": 4774 }, { "epoch": 0.752787571301631, "grad_norm": 0.86328125, "learning_rate": 5.664303369966759e-05, "loss": 0.9023, "step": 4775 }, { "epoch": 0.752945223149024, "grad_norm": 1.0859375, "learning_rate": 5.66388852225585e-05, "loss": 1.1438, "step": 4776 }, { "epoch": 0.753102874996417, "grad_norm": 0.93359375, "learning_rate": 5.663473683735085e-05, "loss": 1.002, "step": 4777 }, { "epoch": 0.7532605268438101, "grad_norm": 0.93359375, "learning_rate": 5.6630588544053396e-05, "loss": 1.0069, "step": 4778 }, { "epoch": 0.753418178691203, "grad_norm": 0.9453125, "learning_rate": 5.6626440342674924e-05, "loss": 0.8408, "step": 4779 }, { "epoch": 0.753575830538596, "grad_norm": 1.078125, "learning_rate": 5.662229223322425e-05, "loss": 1.183, "step": 4780 }, { "epoch": 0.753733482385989, "grad_norm": 1.1328125, "learning_rate": 5.66181442157101e-05, "loss": 1.1869, "step": 4781 }, { "epoch": 0.753891134233382, "grad_norm": 0.921875, "learning_rate": 5.6613996290141344e-05, "loss": 0.8942, "step": 4782 }, { "epoch": 0.7540487860807751, "grad_norm": 0.9609375, "learning_rate": 5.660984845652675e-05, "loss": 0.8832, "step": 4783 }, { "epoch": 0.7542064379281681, "grad_norm": 0.98828125, "learning_rate": 5.660570071487511e-05, "loss": 1.1572, "step": 4784 }, { "epoch": 0.7543640897755611, "grad_norm": 0.86328125, "learning_rate": 5.660155306519519e-05, "loss": 0.9075, "step": 4785 }, { "epoch": 0.7545217416229542, "grad_norm": 1.609375, "learning_rate": 5.6597405507495814e-05, "loss": 0.9227, "step": 4786 }, { "epoch": 0.7546793934703471, "grad_norm": 0.91015625, "learning_rate": 5.6593258041785746e-05, "loss": 1.1666, "step": 4787 }, { "epoch": 0.7548370453177401, "grad_norm": 0.94921875, "learning_rate": 5.6589110668073795e-05, "loss": 1.0032, "step": 4788 }, { "epoch": 0.7549946971651331, "grad_norm": 0.9375, "learning_rate": 5.658496338636874e-05, "loss": 0.9914, "step": 4789 }, { "epoch": 0.7551523490125261, "grad_norm": 1.1640625, "learning_rate": 5.658081619667933e-05, "loss": 0.9509, "step": 4790 }, { "epoch": 0.7553100008599192, "grad_norm": 0.859375, "learning_rate": 5.657666909901444e-05, "loss": 0.8592, "step": 4791 }, { "epoch": 0.7554676527073122, "grad_norm": 0.9921875, "learning_rate": 5.6572522093382815e-05, "loss": 1.1908, "step": 4792 }, { "epoch": 0.7556253045547052, "grad_norm": 0.921875, "learning_rate": 5.656837517979326e-05, "loss": 1.0601, "step": 4793 }, { "epoch": 0.7557829564020982, "grad_norm": 0.96484375, "learning_rate": 5.6564228358254536e-05, "loss": 0.8806, "step": 4794 }, { "epoch": 0.7559406082494912, "grad_norm": 0.953125, "learning_rate": 5.656008162877545e-05, "loss": 1.0074, "step": 4795 }, { "epoch": 0.7560982600968842, "grad_norm": 0.89453125, "learning_rate": 5.655593499136476e-05, "loss": 0.9661, "step": 4796 }, { "epoch": 0.7562559119442772, "grad_norm": 0.80078125, "learning_rate": 5.6551788446031304e-05, "loss": 0.7427, "step": 4797 }, { "epoch": 0.7564135637916702, "grad_norm": 1.0546875, "learning_rate": 5.6547641992783864e-05, "loss": 0.8946, "step": 4798 }, { "epoch": 0.7565712156390633, "grad_norm": 0.984375, "learning_rate": 5.6543495631631206e-05, "loss": 1.28, "step": 4799 }, { "epoch": 0.7567288674864563, "grad_norm": 0.84765625, "learning_rate": 5.6539349362582126e-05, "loss": 1.0501, "step": 4800 }, { "epoch": 0.7568865193338493, "grad_norm": 0.98828125, "learning_rate": 5.653520318564538e-05, "loss": 1.0795, "step": 4801 }, { "epoch": 0.7570441711812423, "grad_norm": 0.9140625, "learning_rate": 5.6531057100829807e-05, "loss": 0.9369, "step": 4802 }, { "epoch": 0.7572018230286353, "grad_norm": 0.875, "learning_rate": 5.6526911108144185e-05, "loss": 0.8539, "step": 4803 }, { "epoch": 0.7573594748760283, "grad_norm": 1.015625, "learning_rate": 5.65227652075973e-05, "loss": 1.0506, "step": 4804 }, { "epoch": 0.7575171267234213, "grad_norm": 0.87109375, "learning_rate": 5.651861939919792e-05, "loss": 1.0172, "step": 4805 }, { "epoch": 0.7576747785708143, "grad_norm": 0.99609375, "learning_rate": 5.651447368295479e-05, "loss": 1.0218, "step": 4806 }, { "epoch": 0.7578324304182074, "grad_norm": 0.91796875, "learning_rate": 5.65103280588768e-05, "loss": 0.9427, "step": 4807 }, { "epoch": 0.7579900822656004, "grad_norm": 0.9296875, "learning_rate": 5.650618252697269e-05, "loss": 0.9399, "step": 4808 }, { "epoch": 0.7581477341129934, "grad_norm": 0.88671875, "learning_rate": 5.6502037087251234e-05, "loss": 0.8983, "step": 4809 }, { "epoch": 0.7583053859603864, "grad_norm": 1.015625, "learning_rate": 5.649789173972122e-05, "loss": 1.0003, "step": 4810 }, { "epoch": 0.7584630378077793, "grad_norm": 0.8828125, "learning_rate": 5.649374648439139e-05, "loss": 1.1657, "step": 4811 }, { "epoch": 0.7586206896551724, "grad_norm": 1.1796875, "learning_rate": 5.648960132127064e-05, "loss": 0.9845, "step": 4812 }, { "epoch": 0.7587783415025654, "grad_norm": 3.25, "learning_rate": 5.648545625036767e-05, "loss": 1.044, "step": 4813 }, { "epoch": 0.7589359933499584, "grad_norm": 1.140625, "learning_rate": 5.648131127169131e-05, "loss": 0.9187, "step": 4814 }, { "epoch": 0.7590936451973515, "grad_norm": 0.9453125, "learning_rate": 5.647716638525031e-05, "loss": 1.0713, "step": 4815 }, { "epoch": 0.7592512970447445, "grad_norm": 0.921875, "learning_rate": 5.6473021591053435e-05, "loss": 0.9249, "step": 4816 }, { "epoch": 0.7594089488921375, "grad_norm": 0.9140625, "learning_rate": 5.646887688910954e-05, "loss": 0.8056, "step": 4817 }, { "epoch": 0.7595666007395305, "grad_norm": 0.953125, "learning_rate": 5.646473227942737e-05, "loss": 1.2723, "step": 4818 }, { "epoch": 0.7597242525869234, "grad_norm": 0.91015625, "learning_rate": 5.6460587762015705e-05, "loss": 0.9943, "step": 4819 }, { "epoch": 0.7598819044343165, "grad_norm": 1.015625, "learning_rate": 5.645644333688335e-05, "loss": 0.968, "step": 4820 }, { "epoch": 0.7600395562817095, "grad_norm": 0.90625, "learning_rate": 5.6452299004039025e-05, "loss": 0.9611, "step": 4821 }, { "epoch": 0.7601972081291025, "grad_norm": 1.125, "learning_rate": 5.644815476349161e-05, "loss": 1.3164, "step": 4822 }, { "epoch": 0.7603548599764955, "grad_norm": 1.015625, "learning_rate": 5.644401061524983e-05, "loss": 1.1447, "step": 4823 }, { "epoch": 0.7605125118238886, "grad_norm": 1.015625, "learning_rate": 5.6439866559322494e-05, "loss": 1.0321, "step": 4824 }, { "epoch": 0.7606701636712816, "grad_norm": 0.90234375, "learning_rate": 5.6435722595718366e-05, "loss": 0.9234, "step": 4825 }, { "epoch": 0.7608278155186746, "grad_norm": 0.99609375, "learning_rate": 5.643157872444622e-05, "loss": 0.8942, "step": 4826 }, { "epoch": 0.7609854673660675, "grad_norm": 1.0078125, "learning_rate": 5.642743494551488e-05, "loss": 1.1226, "step": 4827 }, { "epoch": 0.7611431192134606, "grad_norm": 0.9140625, "learning_rate": 5.642329125893304e-05, "loss": 0.9367, "step": 4828 }, { "epoch": 0.7613007710608536, "grad_norm": 1.0234375, "learning_rate": 5.641914766470959e-05, "loss": 0.9956, "step": 4829 }, { "epoch": 0.7614584229082466, "grad_norm": 1.0234375, "learning_rate": 5.641500416285326e-05, "loss": 0.8958, "step": 4830 }, { "epoch": 0.7616160747556396, "grad_norm": 1.0234375, "learning_rate": 5.641086075337285e-05, "loss": 0.9933, "step": 4831 }, { "epoch": 0.7617737266030327, "grad_norm": 0.94921875, "learning_rate": 5.640671743627712e-05, "loss": 0.8069, "step": 4832 }, { "epoch": 0.7619313784504257, "grad_norm": 0.90625, "learning_rate": 5.640257421157487e-05, "loss": 1.0776, "step": 4833 }, { "epoch": 0.7620890302978187, "grad_norm": 0.92578125, "learning_rate": 5.639843107927485e-05, "loss": 0.968, "step": 4834 }, { "epoch": 0.7622466821452116, "grad_norm": 1.03125, "learning_rate": 5.639428803938589e-05, "loss": 1.1492, "step": 4835 }, { "epoch": 0.7624043339926047, "grad_norm": 0.9375, "learning_rate": 5.639014509191669e-05, "loss": 0.8815, "step": 4836 }, { "epoch": 0.7625619858399977, "grad_norm": 0.98828125, "learning_rate": 5.638600223687612e-05, "loss": 0.9549, "step": 4837 }, { "epoch": 0.7627196376873907, "grad_norm": 1.6953125, "learning_rate": 5.6381859474272935e-05, "loss": 1.0474, "step": 4838 }, { "epoch": 0.7628772895347837, "grad_norm": 0.86328125, "learning_rate": 5.63777168041159e-05, "loss": 0.858, "step": 4839 }, { "epoch": 0.7630349413821768, "grad_norm": 0.9921875, "learning_rate": 5.6373574226413805e-05, "loss": 1.1111, "step": 4840 }, { "epoch": 0.7631925932295698, "grad_norm": 0.9296875, "learning_rate": 5.636943174117539e-05, "loss": 1.0617, "step": 4841 }, { "epoch": 0.7633502450769628, "grad_norm": 1.0390625, "learning_rate": 5.63652893484095e-05, "loss": 0.9691, "step": 4842 }, { "epoch": 0.7635078969243557, "grad_norm": 0.9765625, "learning_rate": 5.6361147048124895e-05, "loss": 0.8672, "step": 4843 }, { "epoch": 0.7636655487717487, "grad_norm": 1.3125, "learning_rate": 5.635700484033034e-05, "loss": 0.8289, "step": 4844 }, { "epoch": 0.7638232006191418, "grad_norm": 1.0, "learning_rate": 5.635286272503462e-05, "loss": 1.026, "step": 4845 }, { "epoch": 0.7639808524665348, "grad_norm": 0.8828125, "learning_rate": 5.6348720702246474e-05, "loss": 0.9966, "step": 4846 }, { "epoch": 0.7641385043139278, "grad_norm": 0.9296875, "learning_rate": 5.634457877197475e-05, "loss": 1.0871, "step": 4847 }, { "epoch": 0.7642961561613208, "grad_norm": 0.9140625, "learning_rate": 5.634043693422821e-05, "loss": 1.0065, "step": 4848 }, { "epoch": 0.7644538080087139, "grad_norm": 1.0, "learning_rate": 5.633629518901561e-05, "loss": 0.9696, "step": 4849 }, { "epoch": 0.7646114598561069, "grad_norm": 0.93359375, "learning_rate": 5.6332153536345735e-05, "loss": 0.8475, "step": 4850 }, { "epoch": 0.7647691117034999, "grad_norm": 1.0078125, "learning_rate": 5.632801197622732e-05, "loss": 1.0268, "step": 4851 }, { "epoch": 0.7649267635508928, "grad_norm": 0.96875, "learning_rate": 5.6323870508669254e-05, "loss": 1.1305, "step": 4852 }, { "epoch": 0.7650844153982859, "grad_norm": 1.015625, "learning_rate": 5.631972913368022e-05, "loss": 0.9686, "step": 4853 }, { "epoch": 0.7652420672456789, "grad_norm": 0.96875, "learning_rate": 5.631558785126904e-05, "loss": 1.1917, "step": 4854 }, { "epoch": 0.7653997190930719, "grad_norm": 0.78515625, "learning_rate": 5.6311446661444454e-05, "loss": 0.8221, "step": 4855 }, { "epoch": 0.7655573709404649, "grad_norm": 0.890625, "learning_rate": 5.630730556421524e-05, "loss": 0.8045, "step": 4856 }, { "epoch": 0.765715022787858, "grad_norm": 0.89453125, "learning_rate": 5.630316455959024e-05, "loss": 0.9608, "step": 4857 }, { "epoch": 0.765872674635251, "grad_norm": 0.9453125, "learning_rate": 5.629902364757816e-05, "loss": 0.9387, "step": 4858 }, { "epoch": 0.766030326482644, "grad_norm": 0.921875, "learning_rate": 5.6294882828187826e-05, "loss": 1.0019, "step": 4859 }, { "epoch": 0.7661879783300369, "grad_norm": 1.0625, "learning_rate": 5.6290742101427975e-05, "loss": 1.0928, "step": 4860 }, { "epoch": 0.76634563017743, "grad_norm": 0.921875, "learning_rate": 5.628660146730735e-05, "loss": 1.106, "step": 4861 }, { "epoch": 0.766503282024823, "grad_norm": 0.9375, "learning_rate": 5.6282460925834834e-05, "loss": 0.8749, "step": 4862 }, { "epoch": 0.766660933872216, "grad_norm": 0.96875, "learning_rate": 5.627832047701913e-05, "loss": 0.7704, "step": 4863 }, { "epoch": 0.766818585719609, "grad_norm": 1.0, "learning_rate": 5.6274180120869036e-05, "loss": 0.8896, "step": 4864 }, { "epoch": 0.7669762375670021, "grad_norm": 0.953125, "learning_rate": 5.627003985739332e-05, "loss": 0.8467, "step": 4865 }, { "epoch": 0.7671338894143951, "grad_norm": 0.90625, "learning_rate": 5.6265899686600696e-05, "loss": 0.8383, "step": 4866 }, { "epoch": 0.7672915412617881, "grad_norm": 0.96875, "learning_rate": 5.6261759608500065e-05, "loss": 0.9786, "step": 4867 }, { "epoch": 0.767449193109181, "grad_norm": 0.953125, "learning_rate": 5.625761962310011e-05, "loss": 1.0446, "step": 4868 }, { "epoch": 0.767606844956574, "grad_norm": 1.140625, "learning_rate": 5.6253479730409645e-05, "loss": 1.0828, "step": 4869 }, { "epoch": 0.7677644968039671, "grad_norm": 0.8984375, "learning_rate": 5.624933993043742e-05, "loss": 0.8906, "step": 4870 }, { "epoch": 0.7679221486513601, "grad_norm": 0.96484375, "learning_rate": 5.624520022319222e-05, "loss": 0.9426, "step": 4871 }, { "epoch": 0.7680798004987531, "grad_norm": 0.984375, "learning_rate": 5.6241060608682826e-05, "loss": 1.0208, "step": 4872 }, { "epoch": 0.7682374523461462, "grad_norm": 0.9765625, "learning_rate": 5.6236921086918e-05, "loss": 1.0319, "step": 4873 }, { "epoch": 0.7683951041935392, "grad_norm": 0.9375, "learning_rate": 5.6232781657906505e-05, "loss": 1.0559, "step": 4874 }, { "epoch": 0.7685527560409322, "grad_norm": 1.0234375, "learning_rate": 5.6228642321657144e-05, "loss": 1.1998, "step": 4875 }, { "epoch": 0.7687104078883251, "grad_norm": 0.91796875, "learning_rate": 5.622450307817864e-05, "loss": 0.9915, "step": 4876 }, { "epoch": 0.7688680597357181, "grad_norm": 0.91796875, "learning_rate": 5.622036392747983e-05, "loss": 0.9104, "step": 4877 }, { "epoch": 0.7690257115831112, "grad_norm": 0.890625, "learning_rate": 5.6216224869569436e-05, "loss": 0.8299, "step": 4878 }, { "epoch": 0.7691833634305042, "grad_norm": 0.984375, "learning_rate": 5.621208590445628e-05, "loss": 1.1187, "step": 4879 }, { "epoch": 0.7693410152778972, "grad_norm": 0.8984375, "learning_rate": 5.620794703214909e-05, "loss": 0.8825, "step": 4880 }, { "epoch": 0.7694986671252902, "grad_norm": 1.046875, "learning_rate": 5.620380825265661e-05, "loss": 1.1337, "step": 4881 }, { "epoch": 0.7696563189726833, "grad_norm": 0.96484375, "learning_rate": 5.61996695659877e-05, "loss": 1.1088, "step": 4882 }, { "epoch": 0.7698139708200763, "grad_norm": 0.81640625, "learning_rate": 5.619553097215109e-05, "loss": 0.8583, "step": 4883 }, { "epoch": 0.7699716226674692, "grad_norm": 1.1484375, "learning_rate": 5.6191392471155555e-05, "loss": 1.2999, "step": 4884 }, { "epoch": 0.7701292745148622, "grad_norm": 0.92578125, "learning_rate": 5.618725406300985e-05, "loss": 1.0181, "step": 4885 }, { "epoch": 0.7702869263622553, "grad_norm": 0.94921875, "learning_rate": 5.618311574772273e-05, "loss": 1.0428, "step": 4886 }, { "epoch": 0.7704445782096483, "grad_norm": 0.92578125, "learning_rate": 5.617897752530301e-05, "loss": 0.9231, "step": 4887 }, { "epoch": 0.7706022300570413, "grad_norm": 0.921875, "learning_rate": 5.617483939575945e-05, "loss": 0.8501, "step": 4888 }, { "epoch": 0.7707598819044343, "grad_norm": 0.95703125, "learning_rate": 5.617070135910082e-05, "loss": 1.1794, "step": 4889 }, { "epoch": 0.7709175337518274, "grad_norm": 1.0234375, "learning_rate": 5.616656341533588e-05, "loss": 1.1176, "step": 4890 }, { "epoch": 0.7710751855992204, "grad_norm": 1.0078125, "learning_rate": 5.616242556447335e-05, "loss": 1.0158, "step": 4891 }, { "epoch": 0.7712328374466133, "grad_norm": 0.91796875, "learning_rate": 5.615828780652211e-05, "loss": 0.8889, "step": 4892 }, { "epoch": 0.7713904892940063, "grad_norm": 0.96875, "learning_rate": 5.615415014149087e-05, "loss": 1.1174, "step": 4893 }, { "epoch": 0.7715481411413994, "grad_norm": 1.0078125, "learning_rate": 5.61500125693884e-05, "loss": 1.179, "step": 4894 }, { "epoch": 0.7717057929887924, "grad_norm": 0.9140625, "learning_rate": 5.6145875090223475e-05, "loss": 0.7847, "step": 4895 }, { "epoch": 0.7718634448361854, "grad_norm": 1.0, "learning_rate": 5.614173770400481e-05, "loss": 0.8788, "step": 4896 }, { "epoch": 0.7720210966835784, "grad_norm": 0.90625, "learning_rate": 5.6137600410741275e-05, "loss": 1.0177, "step": 4897 }, { "epoch": 0.7721787485309715, "grad_norm": 1.0078125, "learning_rate": 5.6133463210441595e-05, "loss": 1.0472, "step": 4898 }, { "epoch": 0.7723364003783645, "grad_norm": 1.0078125, "learning_rate": 5.612932610311452e-05, "loss": 1.0524, "step": 4899 }, { "epoch": 0.7724940522257574, "grad_norm": 0.953125, "learning_rate": 5.612518908876885e-05, "loss": 1.021, "step": 4900 }, { "epoch": 0.7726517040731504, "grad_norm": 0.93359375, "learning_rate": 5.612105216741328e-05, "loss": 0.968, "step": 4901 }, { "epoch": 0.7728093559205435, "grad_norm": 0.9609375, "learning_rate": 5.611691533905666e-05, "loss": 0.7917, "step": 4902 }, { "epoch": 0.7729670077679365, "grad_norm": 0.84375, "learning_rate": 5.6112778603707746e-05, "loss": 0.8711, "step": 4903 }, { "epoch": 0.7731246596153295, "grad_norm": 0.9375, "learning_rate": 5.6108641961375286e-05, "loss": 1.0208, "step": 4904 }, { "epoch": 0.7732823114627225, "grad_norm": 0.984375, "learning_rate": 5.6104505412068056e-05, "loss": 0.951, "step": 4905 }, { "epoch": 0.7734399633101156, "grad_norm": 1.0703125, "learning_rate": 5.610036895579477e-05, "loss": 1.1127, "step": 4906 }, { "epoch": 0.7735976151575086, "grad_norm": 0.9375, "learning_rate": 5.609623259256428e-05, "loss": 0.9038, "step": 4907 }, { "epoch": 0.7737552670049015, "grad_norm": 0.89453125, "learning_rate": 5.609209632238532e-05, "loss": 0.8875, "step": 4908 }, { "epoch": 0.7739129188522945, "grad_norm": 0.94921875, "learning_rate": 5.608796014526665e-05, "loss": 1.1249, "step": 4909 }, { "epoch": 0.7740705706996875, "grad_norm": 0.9375, "learning_rate": 5.6083824061217035e-05, "loss": 1.2101, "step": 4910 }, { "epoch": 0.7742282225470806, "grad_norm": 0.84765625, "learning_rate": 5.607968807024526e-05, "loss": 1.0282, "step": 4911 }, { "epoch": 0.7743858743944736, "grad_norm": 0.97265625, "learning_rate": 5.607555217236007e-05, "loss": 1.083, "step": 4912 }, { "epoch": 0.7745435262418666, "grad_norm": 1.0234375, "learning_rate": 5.607141636757018e-05, "loss": 1.1474, "step": 4913 }, { "epoch": 0.7747011780892596, "grad_norm": 0.81640625, "learning_rate": 5.606728065588447e-05, "loss": 0.9038, "step": 4914 }, { "epoch": 0.7748588299366527, "grad_norm": 1.078125, "learning_rate": 5.606314503731165e-05, "loss": 1.0259, "step": 4915 }, { "epoch": 0.7750164817840456, "grad_norm": 1.328125, "learning_rate": 5.605900951186047e-05, "loss": 0.8264, "step": 4916 }, { "epoch": 0.7751741336314386, "grad_norm": 0.98046875, "learning_rate": 5.605487407953972e-05, "loss": 0.9976, "step": 4917 }, { "epoch": 0.7753317854788316, "grad_norm": 0.94140625, "learning_rate": 5.6050738740358146e-05, "loss": 1.0209, "step": 4918 }, { "epoch": 0.7754894373262247, "grad_norm": 0.94921875, "learning_rate": 5.604660349432451e-05, "loss": 0.8686, "step": 4919 }, { "epoch": 0.7756470891736177, "grad_norm": 0.87109375, "learning_rate": 5.60424683414476e-05, "loss": 0.9371, "step": 4920 }, { "epoch": 0.7758047410210107, "grad_norm": 0.88671875, "learning_rate": 5.603833328173617e-05, "loss": 0.9635, "step": 4921 }, { "epoch": 0.7759623928684037, "grad_norm": 0.9375, "learning_rate": 5.6034198315198924e-05, "loss": 1.0, "step": 4922 }, { "epoch": 0.7761200447157968, "grad_norm": 0.984375, "learning_rate": 5.6030063441844725e-05, "loss": 0.9096, "step": 4923 }, { "epoch": 0.7762776965631897, "grad_norm": 0.98046875, "learning_rate": 5.60259286616823e-05, "loss": 1.0806, "step": 4924 }, { "epoch": 0.7764353484105827, "grad_norm": 1.0390625, "learning_rate": 5.6021793974720407e-05, "loss": 1.194, "step": 4925 }, { "epoch": 0.7765930002579757, "grad_norm": 1.0390625, "learning_rate": 5.6017659380967805e-05, "loss": 0.9434, "step": 4926 }, { "epoch": 0.7767506521053688, "grad_norm": 0.96484375, "learning_rate": 5.601352488043321e-05, "loss": 0.8821, "step": 4927 }, { "epoch": 0.7769083039527618, "grad_norm": 1.1328125, "learning_rate": 5.600939047312548e-05, "loss": 1.1288, "step": 4928 }, { "epoch": 0.7770659558001548, "grad_norm": 0.97265625, "learning_rate": 5.600525615905334e-05, "loss": 1.0655, "step": 4929 }, { "epoch": 0.7772236076475478, "grad_norm": 1.0078125, "learning_rate": 5.600112193822554e-05, "loss": 1.1527, "step": 4930 }, { "epoch": 0.7773812594949409, "grad_norm": 0.9453125, "learning_rate": 5.599698781065086e-05, "loss": 0.9636, "step": 4931 }, { "epoch": 0.7775389113423338, "grad_norm": 0.84375, "learning_rate": 5.599285377633798e-05, "loss": 1.0691, "step": 4932 }, { "epoch": 0.7776965631897268, "grad_norm": 0.921875, "learning_rate": 5.598871983529579e-05, "loss": 0.9235, "step": 4933 }, { "epoch": 0.7778542150371198, "grad_norm": 0.953125, "learning_rate": 5.5984585987533e-05, "loss": 1.0348, "step": 4934 }, { "epoch": 0.7780118668845128, "grad_norm": 0.796875, "learning_rate": 5.598045223305836e-05, "loss": 0.777, "step": 4935 }, { "epoch": 0.7781695187319059, "grad_norm": 1.0078125, "learning_rate": 5.597631857188063e-05, "loss": 1.1555, "step": 4936 }, { "epoch": 0.7783271705792989, "grad_norm": 0.9609375, "learning_rate": 5.597218500400854e-05, "loss": 0.9298, "step": 4937 }, { "epoch": 0.7784848224266919, "grad_norm": 0.9296875, "learning_rate": 5.5968051529450916e-05, "loss": 1.0523, "step": 4938 }, { "epoch": 0.778642474274085, "grad_norm": 0.93359375, "learning_rate": 5.59639181482165e-05, "loss": 0.9194, "step": 4939 }, { "epoch": 0.7788001261214779, "grad_norm": 1.046875, "learning_rate": 5.595978486031403e-05, "loss": 1.1318, "step": 4940 }, { "epoch": 0.7789577779688709, "grad_norm": 0.80078125, "learning_rate": 5.5955651665752294e-05, "loss": 0.8782, "step": 4941 }, { "epoch": 0.7791154298162639, "grad_norm": 0.828125, "learning_rate": 5.5951518564539996e-05, "loss": 0.789, "step": 4942 }, { "epoch": 0.7792730816636569, "grad_norm": 1.1171875, "learning_rate": 5.594738555668597e-05, "loss": 1.1789, "step": 4943 }, { "epoch": 0.77943073351105, "grad_norm": 0.89453125, "learning_rate": 5.594325264219895e-05, "loss": 0.8158, "step": 4944 }, { "epoch": 0.779588385358443, "grad_norm": 1.0234375, "learning_rate": 5.5939119821087684e-05, "loss": 1.2868, "step": 4945 }, { "epoch": 0.779746037205836, "grad_norm": 0.8828125, "learning_rate": 5.593498709336094e-05, "loss": 0.9817, "step": 4946 }, { "epoch": 0.779903689053229, "grad_norm": 0.91796875, "learning_rate": 5.5930854459027426e-05, "loss": 0.9308, "step": 4947 }, { "epoch": 0.780061340900622, "grad_norm": 0.90234375, "learning_rate": 5.592672191809598e-05, "loss": 1.0024, "step": 4948 }, { "epoch": 0.780218992748015, "grad_norm": 0.9140625, "learning_rate": 5.5922589470575336e-05, "loss": 0.9408, "step": 4949 }, { "epoch": 0.780376644595408, "grad_norm": 0.81640625, "learning_rate": 5.591845711647425e-05, "loss": 0.7219, "step": 4950 }, { "epoch": 0.780534296442801, "grad_norm": 0.94140625, "learning_rate": 5.591432485580147e-05, "loss": 1.0164, "step": 4951 }, { "epoch": 0.7806919482901941, "grad_norm": 0.88671875, "learning_rate": 5.5910192688565724e-05, "loss": 1.0595, "step": 4952 }, { "epoch": 0.7808496001375871, "grad_norm": 0.96484375, "learning_rate": 5.5906060614775836e-05, "loss": 1.1242, "step": 4953 }, { "epoch": 0.7810072519849801, "grad_norm": 0.83984375, "learning_rate": 5.590192863444052e-05, "loss": 0.9602, "step": 4954 }, { "epoch": 0.7811649038323731, "grad_norm": 0.890625, "learning_rate": 5.5897796747568566e-05, "loss": 0.8565, "step": 4955 }, { "epoch": 0.781322555679766, "grad_norm": 0.9296875, "learning_rate": 5.5893664954168715e-05, "loss": 0.9337, "step": 4956 }, { "epoch": 0.7814802075271591, "grad_norm": 0.8203125, "learning_rate": 5.58895332542497e-05, "loss": 0.8481, "step": 4957 }, { "epoch": 0.7816378593745521, "grad_norm": 0.9375, "learning_rate": 5.588540164782031e-05, "loss": 0.8779, "step": 4958 }, { "epoch": 0.7817955112219451, "grad_norm": 1.1015625, "learning_rate": 5.5881270134889286e-05, "loss": 1.2009, "step": 4959 }, { "epoch": 0.7819531630693382, "grad_norm": 0.8984375, "learning_rate": 5.587713871546539e-05, "loss": 0.849, "step": 4960 }, { "epoch": 0.7821108149167312, "grad_norm": 1.0234375, "learning_rate": 5.587300738955734e-05, "loss": 1.2691, "step": 4961 }, { "epoch": 0.7822684667641242, "grad_norm": 0.94140625, "learning_rate": 5.586887615717397e-05, "loss": 0.9872, "step": 4962 }, { "epoch": 0.7824261186115172, "grad_norm": 0.86328125, "learning_rate": 5.5864745018323996e-05, "loss": 0.9269, "step": 4963 }, { "epoch": 0.7825837704589101, "grad_norm": 1.0546875, "learning_rate": 5.586061397301616e-05, "loss": 1.119, "step": 4964 }, { "epoch": 0.7827414223063032, "grad_norm": 0.92578125, "learning_rate": 5.585648302125923e-05, "loss": 0.9326, "step": 4965 }, { "epoch": 0.7828990741536962, "grad_norm": 0.9375, "learning_rate": 5.585235216306197e-05, "loss": 1.0823, "step": 4966 }, { "epoch": 0.7830567260010892, "grad_norm": 0.9765625, "learning_rate": 5.5848221398433086e-05, "loss": 0.8879, "step": 4967 }, { "epoch": 0.7832143778484822, "grad_norm": 2.09375, "learning_rate": 5.5844090727381414e-05, "loss": 1.2079, "step": 4968 }, { "epoch": 0.7833720296958753, "grad_norm": 0.9765625, "learning_rate": 5.583996014991566e-05, "loss": 0.7936, "step": 4969 }, { "epoch": 0.7835296815432683, "grad_norm": 0.90625, "learning_rate": 5.583582966604459e-05, "loss": 1.0289, "step": 4970 }, { "epoch": 0.7836873333906613, "grad_norm": 1.15625, "learning_rate": 5.583169927577696e-05, "loss": 0.9569, "step": 4971 }, { "epoch": 0.7838449852380542, "grad_norm": 0.8359375, "learning_rate": 5.582756897912147e-05, "loss": 0.9837, "step": 4972 }, { "epoch": 0.7840026370854473, "grad_norm": 0.8203125, "learning_rate": 5.582343877608697e-05, "loss": 0.9335, "step": 4973 }, { "epoch": 0.7841602889328403, "grad_norm": 0.8984375, "learning_rate": 5.581930866668217e-05, "loss": 0.9462, "step": 4974 }, { "epoch": 0.7843179407802333, "grad_norm": 0.890625, "learning_rate": 5.581517865091581e-05, "loss": 0.851, "step": 4975 }, { "epoch": 0.7844755926276263, "grad_norm": 0.97265625, "learning_rate": 5.581104872879666e-05, "loss": 1.0618, "step": 4976 }, { "epoch": 0.7846332444750194, "grad_norm": 0.890625, "learning_rate": 5.5806918900333416e-05, "loss": 0.8612, "step": 4977 }, { "epoch": 0.7847908963224124, "grad_norm": 0.8984375, "learning_rate": 5.580278916553493e-05, "loss": 0.7828, "step": 4978 }, { "epoch": 0.7849485481698054, "grad_norm": 0.94140625, "learning_rate": 5.57986595244099e-05, "loss": 0.9476, "step": 4979 }, { "epoch": 0.7851062000171983, "grad_norm": 0.89453125, "learning_rate": 5.5794529976967105e-05, "loss": 1.0268, "step": 4980 }, { "epoch": 0.7852638518645914, "grad_norm": 0.9140625, "learning_rate": 5.579040052321525e-05, "loss": 1.0485, "step": 4981 }, { "epoch": 0.7854215037119844, "grad_norm": 1.0078125, "learning_rate": 5.578627116316308e-05, "loss": 0.9143, "step": 4982 }, { "epoch": 0.7855791555593774, "grad_norm": 0.9296875, "learning_rate": 5.578214189681943e-05, "loss": 1.0022, "step": 4983 }, { "epoch": 0.7857368074067704, "grad_norm": 0.984375, "learning_rate": 5.577801272419301e-05, "loss": 0.9785, "step": 4984 }, { "epoch": 0.7858944592541635, "grad_norm": 0.98046875, "learning_rate": 5.577388364529255e-05, "loss": 0.952, "step": 4985 }, { "epoch": 0.7860521111015565, "grad_norm": 0.85546875, "learning_rate": 5.5769754660126816e-05, "loss": 0.9624, "step": 4986 }, { "epoch": 0.7862097629489495, "grad_norm": 0.90234375, "learning_rate": 5.576562576870452e-05, "loss": 0.9868, "step": 4987 }, { "epoch": 0.7863674147963424, "grad_norm": 1.0, "learning_rate": 5.57614969710345e-05, "loss": 1.182, "step": 4988 }, { "epoch": 0.7865250666437354, "grad_norm": 0.9921875, "learning_rate": 5.5757368267125455e-05, "loss": 1.1271, "step": 4989 }, { "epoch": 0.7866827184911285, "grad_norm": 1.0546875, "learning_rate": 5.575323965698614e-05, "loss": 1.0158, "step": 4990 }, { "epoch": 0.7868403703385215, "grad_norm": 1.015625, "learning_rate": 5.574911114062531e-05, "loss": 0.8596, "step": 4991 }, { "epoch": 0.7869980221859145, "grad_norm": 0.92578125, "learning_rate": 5.574498271805166e-05, "loss": 0.9557, "step": 4992 }, { "epoch": 0.7871556740333076, "grad_norm": 0.9296875, "learning_rate": 5.5740854389274036e-05, "loss": 0.8968, "step": 4993 }, { "epoch": 0.7873133258807006, "grad_norm": 0.8984375, "learning_rate": 5.573672615430112e-05, "loss": 0.9367, "step": 4994 }, { "epoch": 0.7874709777280936, "grad_norm": 0.91015625, "learning_rate": 5.57325980131417e-05, "loss": 0.9134, "step": 4995 }, { "epoch": 0.7876286295754865, "grad_norm": 0.91796875, "learning_rate": 5.572846996580452e-05, "loss": 0.9269, "step": 4996 }, { "epoch": 0.7877862814228795, "grad_norm": 0.91796875, "learning_rate": 5.57243420122983e-05, "loss": 0.9997, "step": 4997 }, { "epoch": 0.7879439332702726, "grad_norm": 0.83984375, "learning_rate": 5.572021415263182e-05, "loss": 0.792, "step": 4998 }, { "epoch": 0.7881015851176656, "grad_norm": 0.91015625, "learning_rate": 5.571608638681376e-05, "loss": 1.0963, "step": 4999 }, { "epoch": 0.7882592369650586, "grad_norm": 1.0234375, "learning_rate": 5.571195871485297e-05, "loss": 0.9355, "step": 5000 }, { "epoch": 0.7882592369650586, "eval_loss": 0.973717987537384, "eval_runtime": 307.9479, "eval_samples_per_second": 32.473, "eval_steps_per_second": 0.679, "step": 5000 }, { "epoch": 0.7884168888124516, "grad_norm": 1.078125, "learning_rate": 5.570783113675816e-05, "loss": 1.369, "step": 5001 }, { "epoch": 0.7885745406598447, "grad_norm": 0.94921875, "learning_rate": 5.570370365253806e-05, "loss": 1.1005, "step": 5002 }, { "epoch": 0.7887321925072377, "grad_norm": 1.0234375, "learning_rate": 5.569957626220143e-05, "loss": 1.1115, "step": 5003 }, { "epoch": 0.7888898443546306, "grad_norm": 1.3046875, "learning_rate": 5.569544896575702e-05, "loss": 1.0446, "step": 5004 }, { "epoch": 0.7890474962020236, "grad_norm": 0.89453125, "learning_rate": 5.5691321763213566e-05, "loss": 1.0526, "step": 5005 }, { "epoch": 0.7892051480494167, "grad_norm": 0.93359375, "learning_rate": 5.568719465457983e-05, "loss": 1.1241, "step": 5006 }, { "epoch": 0.7893627998968097, "grad_norm": 1.046875, "learning_rate": 5.5683067639864516e-05, "loss": 1.2479, "step": 5007 }, { "epoch": 0.7895204517442027, "grad_norm": 1.2890625, "learning_rate": 5.5678940719076446e-05, "loss": 1.0674, "step": 5008 }, { "epoch": 0.7896781035915957, "grad_norm": 1.03125, "learning_rate": 5.567481389222432e-05, "loss": 0.9963, "step": 5009 }, { "epoch": 0.7898357554389888, "grad_norm": 1.171875, "learning_rate": 5.5670687159316896e-05, "loss": 1.0227, "step": 5010 }, { "epoch": 0.7899934072863818, "grad_norm": 0.8984375, "learning_rate": 5.566656052036292e-05, "loss": 0.9234, "step": 5011 }, { "epoch": 0.7901510591337748, "grad_norm": 0.99609375, "learning_rate": 5.56624339753711e-05, "loss": 0.9602, "step": 5012 }, { "epoch": 0.7903087109811677, "grad_norm": 0.921875, "learning_rate": 5.565830752435024e-05, "loss": 0.8803, "step": 5013 }, { "epoch": 0.7904663628285608, "grad_norm": 0.95703125, "learning_rate": 5.565418116730907e-05, "loss": 1.0225, "step": 5014 }, { "epoch": 0.7906240146759538, "grad_norm": 0.890625, "learning_rate": 5.565005490425633e-05, "loss": 0.9334, "step": 5015 }, { "epoch": 0.7907816665233468, "grad_norm": 1.0234375, "learning_rate": 5.564592873520076e-05, "loss": 1.0181, "step": 5016 }, { "epoch": 0.7909393183707398, "grad_norm": 1.046875, "learning_rate": 5.564180266015108e-05, "loss": 1.0257, "step": 5017 }, { "epoch": 0.7910969702181329, "grad_norm": 1.0546875, "learning_rate": 5.5637676679116104e-05, "loss": 0.9985, "step": 5018 }, { "epoch": 0.7912546220655259, "grad_norm": 0.88671875, "learning_rate": 5.5633550792104526e-05, "loss": 0.8488, "step": 5019 }, { "epoch": 0.7914122739129189, "grad_norm": 1.0859375, "learning_rate": 5.562942499912509e-05, "loss": 1.0944, "step": 5020 }, { "epoch": 0.7915699257603118, "grad_norm": 1.0703125, "learning_rate": 5.5625299300186574e-05, "loss": 0.9547, "step": 5021 }, { "epoch": 0.7917275776077048, "grad_norm": 1.2265625, "learning_rate": 5.562117369529765e-05, "loss": 1.0213, "step": 5022 }, { "epoch": 0.7918852294550979, "grad_norm": 1.03125, "learning_rate": 5.561704818446716e-05, "loss": 1.1166, "step": 5023 }, { "epoch": 0.7920428813024909, "grad_norm": 0.98828125, "learning_rate": 5.561292276770379e-05, "loss": 1.2056, "step": 5024 }, { "epoch": 0.7922005331498839, "grad_norm": 0.890625, "learning_rate": 5.5608797445016305e-05, "loss": 0.9612, "step": 5025 }, { "epoch": 0.792358184997277, "grad_norm": 0.9609375, "learning_rate": 5.560467221641341e-05, "loss": 0.9725, "step": 5026 }, { "epoch": 0.79251583684467, "grad_norm": 0.9375, "learning_rate": 5.5600547081903864e-05, "loss": 0.8572, "step": 5027 }, { "epoch": 0.792673488692063, "grad_norm": 0.91796875, "learning_rate": 5.5596422041496444e-05, "loss": 0.9926, "step": 5028 }, { "epoch": 0.7928311405394559, "grad_norm": 0.9453125, "learning_rate": 5.559229709519989e-05, "loss": 1.094, "step": 5029 }, { "epoch": 0.7929887923868489, "grad_norm": 0.93359375, "learning_rate": 5.5588172243022895e-05, "loss": 0.9249, "step": 5030 }, { "epoch": 0.793146444234242, "grad_norm": 0.90625, "learning_rate": 5.558404748497425e-05, "loss": 1.0177, "step": 5031 }, { "epoch": 0.793304096081635, "grad_norm": 0.92578125, "learning_rate": 5.557992282106263e-05, "loss": 0.9821, "step": 5032 }, { "epoch": 0.793461747929028, "grad_norm": 1.359375, "learning_rate": 5.557579825129686e-05, "loss": 1.2193, "step": 5033 }, { "epoch": 0.793619399776421, "grad_norm": 1.015625, "learning_rate": 5.557167377568566e-05, "loss": 1.1078, "step": 5034 }, { "epoch": 0.7937770516238141, "grad_norm": 0.87890625, "learning_rate": 5.556754939423775e-05, "loss": 0.8695, "step": 5035 }, { "epoch": 0.7939347034712071, "grad_norm": 1.0078125, "learning_rate": 5.556342510696188e-05, "loss": 0.981, "step": 5036 }, { "epoch": 0.7940923553186, "grad_norm": 0.8359375, "learning_rate": 5.555930091386675e-05, "loss": 0.7826, "step": 5037 }, { "epoch": 0.794250007165993, "grad_norm": 1.0078125, "learning_rate": 5.5555176814961184e-05, "loss": 1.0468, "step": 5038 }, { "epoch": 0.7944076590133861, "grad_norm": 0.8125, "learning_rate": 5.555105281025388e-05, "loss": 0.8758, "step": 5039 }, { "epoch": 0.7945653108607791, "grad_norm": 0.96484375, "learning_rate": 5.554692889975357e-05, "loss": 1.1049, "step": 5040 }, { "epoch": 0.7947229627081721, "grad_norm": 0.95703125, "learning_rate": 5.554280508346902e-05, "loss": 1.0157, "step": 5041 }, { "epoch": 0.7948806145555651, "grad_norm": 0.76953125, "learning_rate": 5.553868136140894e-05, "loss": 0.8076, "step": 5042 }, { "epoch": 0.7950382664029582, "grad_norm": 2.671875, "learning_rate": 5.553455773358209e-05, "loss": 1.0057, "step": 5043 }, { "epoch": 0.7951959182503512, "grad_norm": 0.94921875, "learning_rate": 5.5530434199997204e-05, "loss": 1.045, "step": 5044 }, { "epoch": 0.7953535700977441, "grad_norm": 0.94140625, "learning_rate": 5.5526310760663015e-05, "loss": 0.7979, "step": 5045 }, { "epoch": 0.7955112219451371, "grad_norm": 1.078125, "learning_rate": 5.552218741558828e-05, "loss": 0.9919, "step": 5046 }, { "epoch": 0.7956688737925302, "grad_norm": 1.0390625, "learning_rate": 5.551806416478167e-05, "loss": 0.9217, "step": 5047 }, { "epoch": 0.7958265256399232, "grad_norm": 0.9453125, "learning_rate": 5.5513941008252035e-05, "loss": 0.9083, "step": 5048 }, { "epoch": 0.7959841774873162, "grad_norm": 0.99609375, "learning_rate": 5.5509817946008044e-05, "loss": 0.9528, "step": 5049 }, { "epoch": 0.7961418293347092, "grad_norm": 1.125, "learning_rate": 5.5505694978058464e-05, "loss": 1.0934, "step": 5050 }, { "epoch": 0.7962994811821023, "grad_norm": 0.98828125, "learning_rate": 5.5501572104412024e-05, "loss": 0.9185, "step": 5051 }, { "epoch": 0.7964571330294953, "grad_norm": 0.8984375, "learning_rate": 5.549744932507741e-05, "loss": 0.8803, "step": 5052 }, { "epoch": 0.7966147848768882, "grad_norm": 0.8984375, "learning_rate": 5.549332664006346e-05, "loss": 0.979, "step": 5053 }, { "epoch": 0.7967724367242812, "grad_norm": 0.95703125, "learning_rate": 5.548920404937885e-05, "loss": 1.0268, "step": 5054 }, { "epoch": 0.7969300885716742, "grad_norm": 0.859375, "learning_rate": 5.5485081553032314e-05, "loss": 0.9669, "step": 5055 }, { "epoch": 0.7970877404190673, "grad_norm": 0.98046875, "learning_rate": 5.5480959151032616e-05, "loss": 1.1204, "step": 5056 }, { "epoch": 0.7972453922664603, "grad_norm": 1.0625, "learning_rate": 5.547683684338849e-05, "loss": 1.1368, "step": 5057 }, { "epoch": 0.7974030441138533, "grad_norm": 0.9921875, "learning_rate": 5.5472714630108616e-05, "loss": 1.1169, "step": 5058 }, { "epoch": 0.7975606959612463, "grad_norm": 0.8984375, "learning_rate": 5.546859251120182e-05, "loss": 1.0148, "step": 5059 }, { "epoch": 0.7977183478086394, "grad_norm": 1.0390625, "learning_rate": 5.54644704866768e-05, "loss": 0.8953, "step": 5060 }, { "epoch": 0.7978759996560323, "grad_norm": 1.046875, "learning_rate": 5.546034855654227e-05, "loss": 1.1756, "step": 5061 }, { "epoch": 0.7980336515034253, "grad_norm": 0.859375, "learning_rate": 5.545622672080702e-05, "loss": 1.1079, "step": 5062 }, { "epoch": 0.7981913033508183, "grad_norm": 0.91015625, "learning_rate": 5.545210497947968e-05, "loss": 0.9998, "step": 5063 }, { "epoch": 0.7983489551982114, "grad_norm": 0.94921875, "learning_rate": 5.544798333256911e-05, "loss": 1.0053, "step": 5064 }, { "epoch": 0.7985066070456044, "grad_norm": 0.88671875, "learning_rate": 5.5443861780083984e-05, "loss": 0.9897, "step": 5065 }, { "epoch": 0.7986642588929974, "grad_norm": 1.0, "learning_rate": 5.543974032203305e-05, "loss": 0.9277, "step": 5066 }, { "epoch": 0.7988219107403904, "grad_norm": 0.97265625, "learning_rate": 5.543561895842505e-05, "loss": 1.081, "step": 5067 }, { "epoch": 0.7989795625877835, "grad_norm": 0.9453125, "learning_rate": 5.543149768926866e-05, "loss": 1.061, "step": 5068 }, { "epoch": 0.7991372144351764, "grad_norm": 0.98828125, "learning_rate": 5.5427376514572703e-05, "loss": 0.9441, "step": 5069 }, { "epoch": 0.7992948662825694, "grad_norm": 0.91015625, "learning_rate": 5.5423255434345877e-05, "loss": 1.0224, "step": 5070 }, { "epoch": 0.7994525181299624, "grad_norm": 1.0703125, "learning_rate": 5.541913444859692e-05, "loss": 1.0454, "step": 5071 }, { "epoch": 0.7996101699773555, "grad_norm": 0.96484375, "learning_rate": 5.541501355733454e-05, "loss": 0.9178, "step": 5072 }, { "epoch": 0.7997678218247485, "grad_norm": 0.890625, "learning_rate": 5.5410892760567465e-05, "loss": 0.7967, "step": 5073 }, { "epoch": 0.7999254736721415, "grad_norm": 0.875, "learning_rate": 5.540677205830449e-05, "loss": 0.8773, "step": 5074 }, { "epoch": 0.8000831255195345, "grad_norm": 1.0625, "learning_rate": 5.5402651450554324e-05, "loss": 1.0382, "step": 5075 }, { "epoch": 0.8002407773669276, "grad_norm": 1.1015625, "learning_rate": 5.5398530937325674e-05, "loss": 1.1118, "step": 5076 }, { "epoch": 0.8003984292143205, "grad_norm": 1.1953125, "learning_rate": 5.539441051862728e-05, "loss": 0.724, "step": 5077 }, { "epoch": 0.8005560810617135, "grad_norm": 0.8671875, "learning_rate": 5.5390290194467865e-05, "loss": 1.0443, "step": 5078 }, { "epoch": 0.8007137329091065, "grad_norm": 0.85546875, "learning_rate": 5.5386169964856216e-05, "loss": 0.9296, "step": 5079 }, { "epoch": 0.8008713847564995, "grad_norm": 1.0234375, "learning_rate": 5.538204982980102e-05, "loss": 1.0788, "step": 5080 }, { "epoch": 0.8010290366038926, "grad_norm": 0.9140625, "learning_rate": 5.537792978931102e-05, "loss": 0.9391, "step": 5081 }, { "epoch": 0.8011866884512856, "grad_norm": 0.875, "learning_rate": 5.5373809843394954e-05, "loss": 0.7482, "step": 5082 }, { "epoch": 0.8013443402986786, "grad_norm": 1.3046875, "learning_rate": 5.5369689992061545e-05, "loss": 1.1189, "step": 5083 }, { "epoch": 0.8015019921460717, "grad_norm": 1.046875, "learning_rate": 5.53655702353195e-05, "loss": 1.209, "step": 5084 }, { "epoch": 0.8016596439934646, "grad_norm": 0.9375, "learning_rate": 5.5361450573177585e-05, "loss": 0.9709, "step": 5085 }, { "epoch": 0.8018172958408576, "grad_norm": 1.0234375, "learning_rate": 5.535733100564455e-05, "loss": 1.0494, "step": 5086 }, { "epoch": 0.8019749476882506, "grad_norm": 1.0234375, "learning_rate": 5.53532115327291e-05, "loss": 1.1126, "step": 5087 }, { "epoch": 0.8021325995356436, "grad_norm": 0.90625, "learning_rate": 5.534909215443995e-05, "loss": 1.0134, "step": 5088 }, { "epoch": 0.8022902513830367, "grad_norm": 0.984375, "learning_rate": 5.534497287078586e-05, "loss": 0.9051, "step": 5089 }, { "epoch": 0.8024479032304297, "grad_norm": 0.82421875, "learning_rate": 5.534085368177555e-05, "loss": 0.8708, "step": 5090 }, { "epoch": 0.8026055550778227, "grad_norm": 0.9453125, "learning_rate": 5.533673458741775e-05, "loss": 1.0625, "step": 5091 }, { "epoch": 0.8027632069252157, "grad_norm": 0.96875, "learning_rate": 5.5332615587721183e-05, "loss": 1.0924, "step": 5092 }, { "epoch": 0.8029208587726087, "grad_norm": 0.9453125, "learning_rate": 5.532849668269453e-05, "loss": 0.951, "step": 5093 }, { "epoch": 0.8030785106200017, "grad_norm": 0.9453125, "learning_rate": 5.5324377872346634e-05, "loss": 0.9661, "step": 5094 }, { "epoch": 0.8032361624673947, "grad_norm": 0.9609375, "learning_rate": 5.532025915668616e-05, "loss": 0.8835, "step": 5095 }, { "epoch": 0.8033938143147877, "grad_norm": 0.96875, "learning_rate": 5.531614053572184e-05, "loss": 1.0814, "step": 5096 }, { "epoch": 0.8035514661621808, "grad_norm": 0.88671875, "learning_rate": 5.531202200946242e-05, "loss": 0.8403, "step": 5097 }, { "epoch": 0.8037091180095738, "grad_norm": 0.8984375, "learning_rate": 5.5307903577916554e-05, "loss": 0.9275, "step": 5098 }, { "epoch": 0.8038667698569668, "grad_norm": 0.875, "learning_rate": 5.530378524109308e-05, "loss": 0.8785, "step": 5099 }, { "epoch": 0.8040244217043598, "grad_norm": 0.96484375, "learning_rate": 5.529966699900069e-05, "loss": 0.9336, "step": 5100 }, { "epoch": 0.8041820735517528, "grad_norm": 0.96875, "learning_rate": 5.529554885164808e-05, "loss": 1.042, "step": 5101 }, { "epoch": 0.8043397253991458, "grad_norm": 0.87109375, "learning_rate": 5.529143079904401e-05, "loss": 0.9835, "step": 5102 }, { "epoch": 0.8044973772465388, "grad_norm": 1.6328125, "learning_rate": 5.5287312841197156e-05, "loss": 1.022, "step": 5103 }, { "epoch": 0.8046550290939318, "grad_norm": 0.921875, "learning_rate": 5.528319497811633e-05, "loss": 0.9245, "step": 5104 }, { "epoch": 0.8048126809413249, "grad_norm": 0.98046875, "learning_rate": 5.527907720981022e-05, "loss": 1.1255, "step": 5105 }, { "epoch": 0.8049703327887179, "grad_norm": 1.0234375, "learning_rate": 5.527495953628754e-05, "loss": 1.1628, "step": 5106 }, { "epoch": 0.8051279846361109, "grad_norm": 0.91796875, "learning_rate": 5.527084195755703e-05, "loss": 1.0737, "step": 5107 }, { "epoch": 0.8052856364835039, "grad_norm": 0.96484375, "learning_rate": 5.526672447362737e-05, "loss": 1.0728, "step": 5108 }, { "epoch": 0.8054432883308968, "grad_norm": 0.83984375, "learning_rate": 5.526260708450738e-05, "loss": 0.8846, "step": 5109 }, { "epoch": 0.8056009401782899, "grad_norm": 0.890625, "learning_rate": 5.525848979020573e-05, "loss": 0.9312, "step": 5110 }, { "epoch": 0.8057585920256829, "grad_norm": 0.9375, "learning_rate": 5.525437259073115e-05, "loss": 0.8961, "step": 5111 }, { "epoch": 0.8059162438730759, "grad_norm": 0.93359375, "learning_rate": 5.525025548609237e-05, "loss": 1.0776, "step": 5112 }, { "epoch": 0.806073895720469, "grad_norm": 0.8671875, "learning_rate": 5.5246138476298095e-05, "loss": 1.0368, "step": 5113 }, { "epoch": 0.806231547567862, "grad_norm": 0.9453125, "learning_rate": 5.5242021561357094e-05, "loss": 1.089, "step": 5114 }, { "epoch": 0.806389199415255, "grad_norm": 0.84375, "learning_rate": 5.523790474127808e-05, "loss": 1.052, "step": 5115 }, { "epoch": 0.806546851262648, "grad_norm": 0.94921875, "learning_rate": 5.523378801606975e-05, "loss": 1.0138, "step": 5116 }, { "epoch": 0.8067045031100409, "grad_norm": 0.96875, "learning_rate": 5.522967138574087e-05, "loss": 1.1764, "step": 5117 }, { "epoch": 0.806862154957434, "grad_norm": 0.98046875, "learning_rate": 5.5225554850300085e-05, "loss": 0.9761, "step": 5118 }, { "epoch": 0.807019806804827, "grad_norm": 0.9375, "learning_rate": 5.5221438409756224e-05, "loss": 0.8085, "step": 5119 }, { "epoch": 0.80717745865222, "grad_norm": 0.94921875, "learning_rate": 5.521732206411797e-05, "loss": 1.0472, "step": 5120 }, { "epoch": 0.807335110499613, "grad_norm": 0.9140625, "learning_rate": 5.521320581339404e-05, "loss": 0.8863, "step": 5121 }, { "epoch": 0.8074927623470061, "grad_norm": 1.0234375, "learning_rate": 5.520908965759315e-05, "loss": 0.9264, "step": 5122 }, { "epoch": 0.8076504141943991, "grad_norm": 0.91015625, "learning_rate": 5.520497359672401e-05, "loss": 0.9371, "step": 5123 }, { "epoch": 0.8078080660417921, "grad_norm": 0.9453125, "learning_rate": 5.52008576307954e-05, "loss": 1.1294, "step": 5124 }, { "epoch": 0.807965717889185, "grad_norm": 0.90625, "learning_rate": 5.5196741759816016e-05, "loss": 0.9846, "step": 5125 }, { "epoch": 0.8081233697365781, "grad_norm": 0.9609375, "learning_rate": 5.519262598379457e-05, "loss": 0.9696, "step": 5126 }, { "epoch": 0.8082810215839711, "grad_norm": 1.09375, "learning_rate": 5.518851030273979e-05, "loss": 0.8826, "step": 5127 }, { "epoch": 0.8084386734313641, "grad_norm": 0.9765625, "learning_rate": 5.518439471666042e-05, "loss": 0.935, "step": 5128 }, { "epoch": 0.8085963252787571, "grad_norm": 0.8125, "learning_rate": 5.5180279225565145e-05, "loss": 0.7327, "step": 5129 }, { "epoch": 0.8087539771261502, "grad_norm": 0.90234375, "learning_rate": 5.517616382946271e-05, "loss": 0.8761, "step": 5130 }, { "epoch": 0.8089116289735432, "grad_norm": 1.171875, "learning_rate": 5.517204852836183e-05, "loss": 1.187, "step": 5131 }, { "epoch": 0.8090692808209362, "grad_norm": 0.9609375, "learning_rate": 5.5167933322271196e-05, "loss": 0.8544, "step": 5132 }, { "epoch": 0.8092269326683291, "grad_norm": 1.0390625, "learning_rate": 5.51638182111996e-05, "loss": 0.9201, "step": 5133 }, { "epoch": 0.8093845845157221, "grad_norm": 0.984375, "learning_rate": 5.515970319515574e-05, "loss": 0.9751, "step": 5134 }, { "epoch": 0.8095422363631152, "grad_norm": 0.99609375, "learning_rate": 5.515558827414833e-05, "loss": 0.9072, "step": 5135 }, { "epoch": 0.8096998882105082, "grad_norm": 0.9609375, "learning_rate": 5.515147344818607e-05, "loss": 1.0498, "step": 5136 }, { "epoch": 0.8098575400579012, "grad_norm": 0.8828125, "learning_rate": 5.514735871727771e-05, "loss": 0.8049, "step": 5137 }, { "epoch": 0.8100151919052943, "grad_norm": 1.109375, "learning_rate": 5.514324408143191e-05, "loss": 0.8281, "step": 5138 }, { "epoch": 0.8101728437526873, "grad_norm": 0.984375, "learning_rate": 5.5139129540657486e-05, "loss": 0.903, "step": 5139 }, { "epoch": 0.8103304956000803, "grad_norm": 0.98046875, "learning_rate": 5.5135015094963114e-05, "loss": 0.8336, "step": 5140 }, { "epoch": 0.8104881474474732, "grad_norm": 0.87109375, "learning_rate": 5.5130900744357515e-05, "loss": 0.8002, "step": 5141 }, { "epoch": 0.8106457992948662, "grad_norm": 1.0390625, "learning_rate": 5.51267864888494e-05, "loss": 1.1768, "step": 5142 }, { "epoch": 0.8108034511422593, "grad_norm": 0.91796875, "learning_rate": 5.512267232844746e-05, "loss": 0.7953, "step": 5143 }, { "epoch": 0.8109611029896523, "grad_norm": 0.9609375, "learning_rate": 5.51185582631605e-05, "loss": 1.1085, "step": 5144 }, { "epoch": 0.8111187548370453, "grad_norm": 0.95703125, "learning_rate": 5.511444429299717e-05, "loss": 1.0579, "step": 5145 }, { "epoch": 0.8112764066844383, "grad_norm": 1.078125, "learning_rate": 5.511033041796623e-05, "loss": 1.0626, "step": 5146 }, { "epoch": 0.8114340585318314, "grad_norm": 1.109375, "learning_rate": 5.510621663807637e-05, "loss": 0.9026, "step": 5147 }, { "epoch": 0.8115917103792244, "grad_norm": 0.80078125, "learning_rate": 5.5102102953336274e-05, "loss": 0.7934, "step": 5148 }, { "epoch": 0.8117493622266173, "grad_norm": 0.87109375, "learning_rate": 5.509798936375475e-05, "loss": 0.9132, "step": 5149 }, { "epoch": 0.8119070140740103, "grad_norm": 1.0390625, "learning_rate": 5.509387586934047e-05, "loss": 1.2006, "step": 5150 }, { "epoch": 0.8120646659214034, "grad_norm": 0.93359375, "learning_rate": 5.508976247010216e-05, "loss": 1.0395, "step": 5151 }, { "epoch": 0.8122223177687964, "grad_norm": 0.91796875, "learning_rate": 5.5085649166048525e-05, "loss": 0.8543, "step": 5152 }, { "epoch": 0.8123799696161894, "grad_norm": 0.97265625, "learning_rate": 5.508153595718825e-05, "loss": 0.9978, "step": 5153 }, { "epoch": 0.8125376214635824, "grad_norm": 1.0703125, "learning_rate": 5.507742284353012e-05, "loss": 1.3401, "step": 5154 }, { "epoch": 0.8126952733109755, "grad_norm": 0.9140625, "learning_rate": 5.507330982508284e-05, "loss": 0.7885, "step": 5155 }, { "epoch": 0.8128529251583685, "grad_norm": 0.94921875, "learning_rate": 5.50691969018551e-05, "loss": 0.9366, "step": 5156 }, { "epoch": 0.8130105770057614, "grad_norm": 0.99609375, "learning_rate": 5.506508407385565e-05, "loss": 1.0174, "step": 5157 }, { "epoch": 0.8131682288531544, "grad_norm": 0.98828125, "learning_rate": 5.5060971341093135e-05, "loss": 0.989, "step": 5158 }, { "epoch": 0.8133258807005475, "grad_norm": 0.921875, "learning_rate": 5.5056858703576354e-05, "loss": 0.9446, "step": 5159 }, { "epoch": 0.8134835325479405, "grad_norm": 0.88671875, "learning_rate": 5.5052746161314e-05, "loss": 0.8718, "step": 5160 }, { "epoch": 0.8136411843953335, "grad_norm": 0.88671875, "learning_rate": 5.504863371431478e-05, "loss": 0.8263, "step": 5161 }, { "epoch": 0.8137988362427265, "grad_norm": 0.99609375, "learning_rate": 5.504452136258742e-05, "loss": 1.1316, "step": 5162 }, { "epoch": 0.8139564880901196, "grad_norm": 1.046875, "learning_rate": 5.504040910614057e-05, "loss": 1.2804, "step": 5163 }, { "epoch": 0.8141141399375126, "grad_norm": 1.015625, "learning_rate": 5.5036296944983054e-05, "loss": 0.8445, "step": 5164 }, { "epoch": 0.8142717917849056, "grad_norm": 0.80078125, "learning_rate": 5.503218487912354e-05, "loss": 0.8056, "step": 5165 }, { "epoch": 0.8144294436322985, "grad_norm": 0.91796875, "learning_rate": 5.5028072908570724e-05, "loss": 1.0973, "step": 5166 }, { "epoch": 0.8145870954796915, "grad_norm": 0.8671875, "learning_rate": 5.502396103333336e-05, "loss": 0.7199, "step": 5167 }, { "epoch": 0.8147447473270846, "grad_norm": 0.92578125, "learning_rate": 5.501984925342012e-05, "loss": 0.831, "step": 5168 }, { "epoch": 0.8149023991744776, "grad_norm": 1.0078125, "learning_rate": 5.501573756883971e-05, "loss": 0.951, "step": 5169 }, { "epoch": 0.8150600510218706, "grad_norm": 0.87890625, "learning_rate": 5.5011625979600904e-05, "loss": 0.98, "step": 5170 }, { "epoch": 0.8152177028692636, "grad_norm": 0.9453125, "learning_rate": 5.50075144857124e-05, "loss": 0.8224, "step": 5171 }, { "epoch": 0.8153753547166567, "grad_norm": 1.1953125, "learning_rate": 5.500340308718288e-05, "loss": 1.0025, "step": 5172 }, { "epoch": 0.8155330065640497, "grad_norm": 0.94921875, "learning_rate": 5.499929178402108e-05, "loss": 1.0702, "step": 5173 }, { "epoch": 0.8156906584114426, "grad_norm": 0.81640625, "learning_rate": 5.499518057623572e-05, "loss": 0.8925, "step": 5174 }, { "epoch": 0.8158483102588356, "grad_norm": 0.8984375, "learning_rate": 5.49910694638355e-05, "loss": 0.9471, "step": 5175 }, { "epoch": 0.8160059621062287, "grad_norm": 0.8046875, "learning_rate": 5.498695844682913e-05, "loss": 0.9644, "step": 5176 }, { "epoch": 0.8161636139536217, "grad_norm": 0.96484375, "learning_rate": 5.4982847525225324e-05, "loss": 1.0802, "step": 5177 }, { "epoch": 0.8163212658010147, "grad_norm": 1.015625, "learning_rate": 5.4978736699032765e-05, "loss": 0.976, "step": 5178 }, { "epoch": 0.8164789176484077, "grad_norm": 0.9296875, "learning_rate": 5.497462596826024e-05, "loss": 1.138, "step": 5179 }, { "epoch": 0.8166365694958008, "grad_norm": 0.9921875, "learning_rate": 5.497051533291642e-05, "loss": 0.8956, "step": 5180 }, { "epoch": 0.8167942213431938, "grad_norm": 0.9375, "learning_rate": 5.4966404793010027e-05, "loss": 0.9929, "step": 5181 }, { "epoch": 0.8169518731905867, "grad_norm": 0.90234375, "learning_rate": 5.496229434854976e-05, "loss": 0.9973, "step": 5182 }, { "epoch": 0.8171095250379797, "grad_norm": 0.95703125, "learning_rate": 5.495818399954429e-05, "loss": 1.0594, "step": 5183 }, { "epoch": 0.8172671768853728, "grad_norm": 1.0859375, "learning_rate": 5.4954073746002424e-05, "loss": 1.0803, "step": 5184 }, { "epoch": 0.8174248287327658, "grad_norm": 0.9921875, "learning_rate": 5.4949963587932805e-05, "loss": 1.1302, "step": 5185 }, { "epoch": 0.8175824805801588, "grad_norm": 0.90234375, "learning_rate": 5.494585352534418e-05, "loss": 0.976, "step": 5186 }, { "epoch": 0.8177401324275518, "grad_norm": 1.203125, "learning_rate": 5.494174355824524e-05, "loss": 0.992, "step": 5187 }, { "epoch": 0.8178977842749449, "grad_norm": 0.9453125, "learning_rate": 5.493763368664466e-05, "loss": 0.9713, "step": 5188 }, { "epoch": 0.8180554361223379, "grad_norm": 0.96875, "learning_rate": 5.4933523910551245e-05, "loss": 0.9347, "step": 5189 }, { "epoch": 0.8182130879697308, "grad_norm": 0.953125, "learning_rate": 5.492941422997362e-05, "loss": 1.0517, "step": 5190 }, { "epoch": 0.8183707398171238, "grad_norm": 0.90625, "learning_rate": 5.492530464492055e-05, "loss": 0.8317, "step": 5191 }, { "epoch": 0.8185283916645169, "grad_norm": 1.0, "learning_rate": 5.492119515540071e-05, "loss": 1.1276, "step": 5192 }, { "epoch": 0.8186860435119099, "grad_norm": 0.98828125, "learning_rate": 5.491708576142278e-05, "loss": 1.1118, "step": 5193 }, { "epoch": 0.8188436953593029, "grad_norm": 1.0234375, "learning_rate": 5.4912976462995556e-05, "loss": 0.9278, "step": 5194 }, { "epoch": 0.8190013472066959, "grad_norm": 0.91015625, "learning_rate": 5.4908867260127696e-05, "loss": 0.8707, "step": 5195 }, { "epoch": 0.819158999054089, "grad_norm": 1.171875, "learning_rate": 5.490475815282792e-05, "loss": 1.0641, "step": 5196 }, { "epoch": 0.819316650901482, "grad_norm": 0.953125, "learning_rate": 5.490064914110491e-05, "loss": 0.9458, "step": 5197 }, { "epoch": 0.8194743027488749, "grad_norm": 1.1328125, "learning_rate": 5.489654022496737e-05, "loss": 0.8619, "step": 5198 }, { "epoch": 0.8196319545962679, "grad_norm": 0.86328125, "learning_rate": 5.489243140442408e-05, "loss": 1.0698, "step": 5199 }, { "epoch": 0.819789606443661, "grad_norm": 0.921875, "learning_rate": 5.488832267948372e-05, "loss": 0.9618, "step": 5200 }, { "epoch": 0.819947258291054, "grad_norm": 0.96875, "learning_rate": 5.488421405015496e-05, "loss": 1.058, "step": 5201 }, { "epoch": 0.820104910138447, "grad_norm": 0.98046875, "learning_rate": 5.488010551644652e-05, "loss": 0.9512, "step": 5202 }, { "epoch": 0.82026256198584, "grad_norm": 0.94140625, "learning_rate": 5.4875997078367135e-05, "loss": 0.8277, "step": 5203 }, { "epoch": 0.820420213833233, "grad_norm": 0.9453125, "learning_rate": 5.4871888735925446e-05, "loss": 0.9278, "step": 5204 }, { "epoch": 0.8205778656806261, "grad_norm": 1.015625, "learning_rate": 5.4867780489130263e-05, "loss": 0.7493, "step": 5205 }, { "epoch": 0.820735517528019, "grad_norm": 0.890625, "learning_rate": 5.486367233799023e-05, "loss": 0.8391, "step": 5206 }, { "epoch": 0.820893169375412, "grad_norm": 0.8515625, "learning_rate": 5.4859564282514064e-05, "loss": 0.8964, "step": 5207 }, { "epoch": 0.821050821222805, "grad_norm": 0.94921875, "learning_rate": 5.485545632271046e-05, "loss": 1.177, "step": 5208 }, { "epoch": 0.8212084730701981, "grad_norm": 0.91015625, "learning_rate": 5.4851348458588116e-05, "loss": 0.8275, "step": 5209 }, { "epoch": 0.8213661249175911, "grad_norm": 0.9296875, "learning_rate": 5.4847240690155786e-05, "loss": 1.0174, "step": 5210 }, { "epoch": 0.8215237767649841, "grad_norm": 0.93359375, "learning_rate": 5.484313301742215e-05, "loss": 0.8695, "step": 5211 }, { "epoch": 0.8216814286123771, "grad_norm": 1.015625, "learning_rate": 5.483902544039592e-05, "loss": 0.9129, "step": 5212 }, { "epoch": 0.8218390804597702, "grad_norm": 0.859375, "learning_rate": 5.483491795908578e-05, "loss": 0.9097, "step": 5213 }, { "epoch": 0.8219967323071631, "grad_norm": 1.03125, "learning_rate": 5.483081057350046e-05, "loss": 0.9051, "step": 5214 }, { "epoch": 0.8221543841545561, "grad_norm": 0.921875, "learning_rate": 5.482670328364865e-05, "loss": 0.9964, "step": 5215 }, { "epoch": 0.8223120360019491, "grad_norm": 1.0, "learning_rate": 5.482259608953908e-05, "loss": 1.2773, "step": 5216 }, { "epoch": 0.8224696878493422, "grad_norm": 0.98046875, "learning_rate": 5.481848899118041e-05, "loss": 0.8949, "step": 5217 }, { "epoch": 0.8226273396967352, "grad_norm": 0.95703125, "learning_rate": 5.4814381988581344e-05, "loss": 0.9458, "step": 5218 }, { "epoch": 0.8227849915441282, "grad_norm": 0.96875, "learning_rate": 5.481027508175065e-05, "loss": 0.7866, "step": 5219 }, { "epoch": 0.8229426433915212, "grad_norm": 0.9453125, "learning_rate": 5.480616827069699e-05, "loss": 0.8623, "step": 5220 }, { "epoch": 0.8231002952389143, "grad_norm": 0.9375, "learning_rate": 5.4802061555429084e-05, "loss": 1.0991, "step": 5221 }, { "epoch": 0.8232579470863072, "grad_norm": 1.0, "learning_rate": 5.479795493595561e-05, "loss": 0.8485, "step": 5222 }, { "epoch": 0.8234155989337002, "grad_norm": 0.97265625, "learning_rate": 5.47938484122853e-05, "loss": 1.1294, "step": 5223 }, { "epoch": 0.8235732507810932, "grad_norm": 0.96484375, "learning_rate": 5.478974198442679e-05, "loss": 1.0742, "step": 5224 }, { "epoch": 0.8237309026284863, "grad_norm": 0.99609375, "learning_rate": 5.47856356523889e-05, "loss": 0.991, "step": 5225 }, { "epoch": 0.8238885544758793, "grad_norm": 0.9140625, "learning_rate": 5.478152941618024e-05, "loss": 0.8446, "step": 5226 }, { "epoch": 0.8240462063232723, "grad_norm": 0.9296875, "learning_rate": 5.477742327580957e-05, "loss": 0.9164, "step": 5227 }, { "epoch": 0.8242038581706653, "grad_norm": 0.8828125, "learning_rate": 5.477331723128556e-05, "loss": 1.1309, "step": 5228 }, { "epoch": 0.8243615100180584, "grad_norm": 1.0859375, "learning_rate": 5.4769211282616874e-05, "loss": 0.9729, "step": 5229 }, { "epoch": 0.8245191618654513, "grad_norm": 1.0703125, "learning_rate": 5.47651054298123e-05, "loss": 0.946, "step": 5230 }, { "epoch": 0.8246768137128443, "grad_norm": 0.921875, "learning_rate": 5.476099967288051e-05, "loss": 0.9177, "step": 5231 }, { "epoch": 0.8248344655602373, "grad_norm": 1.1796875, "learning_rate": 5.4756894011830185e-05, "loss": 1.0947, "step": 5232 }, { "epoch": 0.8249921174076303, "grad_norm": 0.8984375, "learning_rate": 5.475278844667005e-05, "loss": 0.9768, "step": 5233 }, { "epoch": 0.8251497692550234, "grad_norm": 1.3203125, "learning_rate": 5.474868297740874e-05, "loss": 0.961, "step": 5234 }, { "epoch": 0.8253074211024164, "grad_norm": 0.90234375, "learning_rate": 5.474457760405505e-05, "loss": 1.1096, "step": 5235 }, { "epoch": 0.8254650729498094, "grad_norm": 0.83203125, "learning_rate": 5.4740472326617645e-05, "loss": 0.8823, "step": 5236 }, { "epoch": 0.8256227247972024, "grad_norm": 0.8984375, "learning_rate": 5.473636714510522e-05, "loss": 0.8365, "step": 5237 }, { "epoch": 0.8257803766445954, "grad_norm": 0.95703125, "learning_rate": 5.473226205952649e-05, "loss": 1.2306, "step": 5238 }, { "epoch": 0.8259380284919884, "grad_norm": 0.8671875, "learning_rate": 5.472815706989008e-05, "loss": 0.9294, "step": 5239 }, { "epoch": 0.8260956803393814, "grad_norm": 1.03125, "learning_rate": 5.472405217620481e-05, "loss": 1.0705, "step": 5240 }, { "epoch": 0.8262533321867744, "grad_norm": 0.9296875, "learning_rate": 5.471994737847932e-05, "loss": 0.9364, "step": 5241 }, { "epoch": 0.8264109840341675, "grad_norm": 0.99609375, "learning_rate": 5.471584267672232e-05, "loss": 0.9921, "step": 5242 }, { "epoch": 0.8265686358815605, "grad_norm": 0.96875, "learning_rate": 5.471173807094249e-05, "loss": 1.0356, "step": 5243 }, { "epoch": 0.8267262877289535, "grad_norm": 0.94921875, "learning_rate": 5.470763356114851e-05, "loss": 1.0048, "step": 5244 }, { "epoch": 0.8268839395763465, "grad_norm": 0.953125, "learning_rate": 5.4703529147349155e-05, "loss": 0.9975, "step": 5245 }, { "epoch": 0.8270415914237395, "grad_norm": 1.0859375, "learning_rate": 5.469942482955307e-05, "loss": 1.0741, "step": 5246 }, { "epoch": 0.8271992432711325, "grad_norm": 1.0234375, "learning_rate": 5.4695320607768975e-05, "loss": 0.9203, "step": 5247 }, { "epoch": 0.8273568951185255, "grad_norm": 0.88671875, "learning_rate": 5.469121648200555e-05, "loss": 0.8414, "step": 5248 }, { "epoch": 0.8275145469659185, "grad_norm": 0.9453125, "learning_rate": 5.468711245227147e-05, "loss": 0.8854, "step": 5249 }, { "epoch": 0.8276721988133116, "grad_norm": 0.94140625, "learning_rate": 5.4683008518575505e-05, "loss": 0.9234, "step": 5250 }, { "epoch": 0.8278298506607046, "grad_norm": 0.86328125, "learning_rate": 5.467890468092631e-05, "loss": 0.917, "step": 5251 }, { "epoch": 0.8279875025080976, "grad_norm": 0.96875, "learning_rate": 5.467480093933258e-05, "loss": 1.0449, "step": 5252 }, { "epoch": 0.8281451543554906, "grad_norm": 0.9140625, "learning_rate": 5.467069729380303e-05, "loss": 0.8753, "step": 5253 }, { "epoch": 0.8283028062028835, "grad_norm": 0.89453125, "learning_rate": 5.466659374434634e-05, "loss": 0.9171, "step": 5254 }, { "epoch": 0.8284604580502766, "grad_norm": 0.93359375, "learning_rate": 5.466249029097117e-05, "loss": 0.9256, "step": 5255 }, { "epoch": 0.8286181098976696, "grad_norm": 0.94140625, "learning_rate": 5.4658386933686315e-05, "loss": 0.8906, "step": 5256 }, { "epoch": 0.8287757617450626, "grad_norm": 0.92578125, "learning_rate": 5.465428367250041e-05, "loss": 1.063, "step": 5257 }, { "epoch": 0.8289334135924556, "grad_norm": 1.0703125, "learning_rate": 5.465018050742215e-05, "loss": 1.0541, "step": 5258 }, { "epoch": 0.8290910654398487, "grad_norm": 0.953125, "learning_rate": 5.4646077438460244e-05, "loss": 1.1291, "step": 5259 }, { "epoch": 0.8292487172872417, "grad_norm": 0.95703125, "learning_rate": 5.464197446562339e-05, "loss": 1.0995, "step": 5260 }, { "epoch": 0.8294063691346347, "grad_norm": 0.93359375, "learning_rate": 5.463787158892028e-05, "loss": 0.9444, "step": 5261 }, { "epoch": 0.8295640209820276, "grad_norm": 0.9375, "learning_rate": 5.4633768808359596e-05, "loss": 1.1147, "step": 5262 }, { "epoch": 0.8297216728294207, "grad_norm": 0.87890625, "learning_rate": 5.462966612395006e-05, "loss": 0.6974, "step": 5263 }, { "epoch": 0.8298793246768137, "grad_norm": 1.0625, "learning_rate": 5.462556353570031e-05, "loss": 1.0401, "step": 5264 }, { "epoch": 0.8300369765242067, "grad_norm": 0.96875, "learning_rate": 5.462146104361911e-05, "loss": 1.0604, "step": 5265 }, { "epoch": 0.8301946283715997, "grad_norm": 0.89453125, "learning_rate": 5.461735864771515e-05, "loss": 0.999, "step": 5266 }, { "epoch": 0.8303522802189928, "grad_norm": 1.3359375, "learning_rate": 5.46132563479971e-05, "loss": 0.8615, "step": 5267 }, { "epoch": 0.8305099320663858, "grad_norm": 0.8515625, "learning_rate": 5.460915414447365e-05, "loss": 0.9262, "step": 5268 }, { "epoch": 0.8306675839137788, "grad_norm": 0.875, "learning_rate": 5.460505203715347e-05, "loss": 1.0039, "step": 5269 }, { "epoch": 0.8308252357611717, "grad_norm": 1.0625, "learning_rate": 5.4600950026045326e-05, "loss": 1.0897, "step": 5270 }, { "epoch": 0.8309828876085648, "grad_norm": 0.87890625, "learning_rate": 5.4596848111157885e-05, "loss": 0.9269, "step": 5271 }, { "epoch": 0.8311405394559578, "grad_norm": 1.0234375, "learning_rate": 5.459274629249982e-05, "loss": 1.1924, "step": 5272 }, { "epoch": 0.8312981913033508, "grad_norm": 0.953125, "learning_rate": 5.458864457007982e-05, "loss": 0.8545, "step": 5273 }, { "epoch": 0.8314558431507438, "grad_norm": 0.84375, "learning_rate": 5.458454294390657e-05, "loss": 0.8142, "step": 5274 }, { "epoch": 0.8316134949981369, "grad_norm": 0.9765625, "learning_rate": 5.458044141398881e-05, "loss": 0.9757, "step": 5275 }, { "epoch": 0.8317711468455299, "grad_norm": 0.85546875, "learning_rate": 5.457633998033522e-05, "loss": 0.9675, "step": 5276 }, { "epoch": 0.8319287986929229, "grad_norm": 0.9296875, "learning_rate": 5.457223864295449e-05, "loss": 1.0007, "step": 5277 }, { "epoch": 0.8320864505403158, "grad_norm": 0.8828125, "learning_rate": 5.456813740185529e-05, "loss": 0.9493, "step": 5278 }, { "epoch": 0.8322441023877089, "grad_norm": 0.9921875, "learning_rate": 5.456403625704629e-05, "loss": 1.0495, "step": 5279 }, { "epoch": 0.8324017542351019, "grad_norm": 1.0, "learning_rate": 5.455993520853626e-05, "loss": 1.0945, "step": 5280 }, { "epoch": 0.8325594060824949, "grad_norm": 0.984375, "learning_rate": 5.455583425633384e-05, "loss": 1.0039, "step": 5281 }, { "epoch": 0.8327170579298879, "grad_norm": 0.87890625, "learning_rate": 5.4551733400447747e-05, "loss": 0.8577, "step": 5282 }, { "epoch": 0.832874709777281, "grad_norm": 0.96484375, "learning_rate": 5.454763264088665e-05, "loss": 1.0563, "step": 5283 }, { "epoch": 0.833032361624674, "grad_norm": 0.8984375, "learning_rate": 5.45435319776592e-05, "loss": 1.074, "step": 5284 }, { "epoch": 0.833190013472067, "grad_norm": 0.8828125, "learning_rate": 5.453943141077418e-05, "loss": 0.9303, "step": 5285 }, { "epoch": 0.8333476653194599, "grad_norm": 1.0546875, "learning_rate": 5.453533094024024e-05, "loss": 1.083, "step": 5286 }, { "epoch": 0.8335053171668529, "grad_norm": 0.890625, "learning_rate": 5.4531230566066074e-05, "loss": 0.913, "step": 5287 }, { "epoch": 0.833662969014246, "grad_norm": 0.9375, "learning_rate": 5.452713028826035e-05, "loss": 0.962, "step": 5288 }, { "epoch": 0.833820620861639, "grad_norm": 0.953125, "learning_rate": 5.4523030106831754e-05, "loss": 0.8838, "step": 5289 }, { "epoch": 0.833978272709032, "grad_norm": 0.98046875, "learning_rate": 5.451893002178903e-05, "loss": 0.9337, "step": 5290 }, { "epoch": 0.834135924556425, "grad_norm": 1.03125, "learning_rate": 5.451483003314082e-05, "loss": 0.822, "step": 5291 }, { "epoch": 0.8342935764038181, "grad_norm": 0.78515625, "learning_rate": 5.4510730140895835e-05, "loss": 0.8248, "step": 5292 }, { "epoch": 0.8344512282512111, "grad_norm": 0.90234375, "learning_rate": 5.450663034506276e-05, "loss": 0.9307, "step": 5293 }, { "epoch": 0.834608880098604, "grad_norm": 0.9609375, "learning_rate": 5.450253064565025e-05, "loss": 1.0506, "step": 5294 }, { "epoch": 0.834766531945997, "grad_norm": 1.203125, "learning_rate": 5.449843104266705e-05, "loss": 0.9977, "step": 5295 }, { "epoch": 0.8349241837933901, "grad_norm": 1.0234375, "learning_rate": 5.449433153612184e-05, "loss": 1.0601, "step": 5296 }, { "epoch": 0.8350818356407831, "grad_norm": 1.1015625, "learning_rate": 5.4490232126023286e-05, "loss": 0.9959, "step": 5297 }, { "epoch": 0.8352394874881761, "grad_norm": 1.0625, "learning_rate": 5.4486132812380085e-05, "loss": 1.0214, "step": 5298 }, { "epoch": 0.8353971393355691, "grad_norm": 0.91015625, "learning_rate": 5.448203359520092e-05, "loss": 0.9181, "step": 5299 }, { "epoch": 0.8355547911829622, "grad_norm": 0.9140625, "learning_rate": 5.447793447449448e-05, "loss": 0.8187, "step": 5300 }, { "epoch": 0.8357124430303552, "grad_norm": 0.98046875, "learning_rate": 5.447383545026947e-05, "loss": 0.9523, "step": 5301 }, { "epoch": 0.8358700948777481, "grad_norm": 1.0234375, "learning_rate": 5.4469736522534554e-05, "loss": 1.06, "step": 5302 }, { "epoch": 0.8360277467251411, "grad_norm": 0.85546875, "learning_rate": 5.446563769129839e-05, "loss": 0.8695, "step": 5303 }, { "epoch": 0.8361853985725342, "grad_norm": 0.984375, "learning_rate": 5.4461538956569734e-05, "loss": 1.0142, "step": 5304 }, { "epoch": 0.8363430504199272, "grad_norm": 0.9921875, "learning_rate": 5.4457440318357266e-05, "loss": 1.1545, "step": 5305 }, { "epoch": 0.8365007022673202, "grad_norm": 0.984375, "learning_rate": 5.445334177666963e-05, "loss": 1.0802, "step": 5306 }, { "epoch": 0.8366583541147132, "grad_norm": 0.91796875, "learning_rate": 5.444924333151554e-05, "loss": 0.8082, "step": 5307 }, { "epoch": 0.8368160059621063, "grad_norm": 0.94921875, "learning_rate": 5.444514498290367e-05, "loss": 0.8939, "step": 5308 }, { "epoch": 0.8369736578094993, "grad_norm": 0.90625, "learning_rate": 5.4441046730842695e-05, "loss": 1.0111, "step": 5309 }, { "epoch": 0.8371313096568922, "grad_norm": 0.859375, "learning_rate": 5.443694857534134e-05, "loss": 0.8811, "step": 5310 }, { "epoch": 0.8372889615042852, "grad_norm": 1.046875, "learning_rate": 5.443285051640826e-05, "loss": 1.0414, "step": 5311 }, { "epoch": 0.8374466133516782, "grad_norm": 0.984375, "learning_rate": 5.442875255405215e-05, "loss": 1.2941, "step": 5312 }, { "epoch": 0.8376042651990713, "grad_norm": 0.83984375, "learning_rate": 5.442465468828171e-05, "loss": 0.8372, "step": 5313 }, { "epoch": 0.8377619170464643, "grad_norm": 0.83984375, "learning_rate": 5.442055691910557e-05, "loss": 0.8019, "step": 5314 }, { "epoch": 0.8379195688938573, "grad_norm": 0.94140625, "learning_rate": 5.441645924653247e-05, "loss": 1.1077, "step": 5315 }, { "epoch": 0.8380772207412504, "grad_norm": 1.0390625, "learning_rate": 5.4412361670571096e-05, "loss": 1.1403, "step": 5316 }, { "epoch": 0.8382348725886434, "grad_norm": 1.15625, "learning_rate": 5.4408264191230116e-05, "loss": 1.1413, "step": 5317 }, { "epoch": 0.8383925244360363, "grad_norm": 0.91796875, "learning_rate": 5.440416680851821e-05, "loss": 1.013, "step": 5318 }, { "epoch": 0.8385501762834293, "grad_norm": 0.93359375, "learning_rate": 5.440006952244403e-05, "loss": 0.9658, "step": 5319 }, { "epoch": 0.8387078281308223, "grad_norm": 0.94140625, "learning_rate": 5.4395972333016345e-05, "loss": 0.9723, "step": 5320 }, { "epoch": 0.8388654799782154, "grad_norm": 0.96875, "learning_rate": 5.4391875240243774e-05, "loss": 1.1316, "step": 5321 }, { "epoch": 0.8390231318256084, "grad_norm": 0.953125, "learning_rate": 5.438777824413502e-05, "loss": 1.0209, "step": 5322 }, { "epoch": 0.8391807836730014, "grad_norm": 0.9296875, "learning_rate": 5.4383681344698764e-05, "loss": 1.1083, "step": 5323 }, { "epoch": 0.8393384355203944, "grad_norm": 0.97265625, "learning_rate": 5.437958454194365e-05, "loss": 1.0236, "step": 5324 }, { "epoch": 0.8394960873677875, "grad_norm": 1.1171875, "learning_rate": 5.4375487835878444e-05, "loss": 1.1842, "step": 5325 }, { "epoch": 0.8396537392151805, "grad_norm": 0.9921875, "learning_rate": 5.4371391226511783e-05, "loss": 0.9599, "step": 5326 }, { "epoch": 0.8398113910625734, "grad_norm": 0.97265625, "learning_rate": 5.436729471385234e-05, "loss": 0.962, "step": 5327 }, { "epoch": 0.8399690429099664, "grad_norm": 0.99609375, "learning_rate": 5.4363198297908815e-05, "loss": 1.1417, "step": 5328 }, { "epoch": 0.8401266947573595, "grad_norm": 0.91796875, "learning_rate": 5.435910197868984e-05, "loss": 0.9005, "step": 5329 }, { "epoch": 0.8402843466047525, "grad_norm": 0.79296875, "learning_rate": 5.435500575620418e-05, "loss": 0.9509, "step": 5330 }, { "epoch": 0.8404419984521455, "grad_norm": 0.953125, "learning_rate": 5.435090963046048e-05, "loss": 0.8581, "step": 5331 }, { "epoch": 0.8405996502995385, "grad_norm": 0.9140625, "learning_rate": 5.4346813601467405e-05, "loss": 1.0648, "step": 5332 }, { "epoch": 0.8407573021469316, "grad_norm": 1.1484375, "learning_rate": 5.434271766923367e-05, "loss": 1.1964, "step": 5333 }, { "epoch": 0.8409149539943246, "grad_norm": 1.6640625, "learning_rate": 5.4338621833767875e-05, "loss": 1.1655, "step": 5334 }, { "epoch": 0.8410726058417175, "grad_norm": 0.9375, "learning_rate": 5.433452609507881e-05, "loss": 0.8811, "step": 5335 }, { "epoch": 0.8412302576891105, "grad_norm": 0.83984375, "learning_rate": 5.433043045317512e-05, "loss": 0.8548, "step": 5336 }, { "epoch": 0.8413879095365036, "grad_norm": 0.84765625, "learning_rate": 5.432633490806545e-05, "loss": 0.7957, "step": 5337 }, { "epoch": 0.8415455613838966, "grad_norm": 1.34375, "learning_rate": 5.432223945975852e-05, "loss": 1.1343, "step": 5338 }, { "epoch": 0.8417032132312896, "grad_norm": 0.90625, "learning_rate": 5.431814410826298e-05, "loss": 0.9082, "step": 5339 }, { "epoch": 0.8418608650786826, "grad_norm": 0.90625, "learning_rate": 5.4314048853587485e-05, "loss": 1.0591, "step": 5340 }, { "epoch": 0.8420185169260757, "grad_norm": 0.79296875, "learning_rate": 5.430995369574079e-05, "loss": 0.7362, "step": 5341 }, { "epoch": 0.8421761687734687, "grad_norm": 0.89453125, "learning_rate": 5.430585863473154e-05, "loss": 0.8145, "step": 5342 }, { "epoch": 0.8423338206208616, "grad_norm": 0.9609375, "learning_rate": 5.430176367056842e-05, "loss": 0.9817, "step": 5343 }, { "epoch": 0.8424914724682546, "grad_norm": 0.98828125, "learning_rate": 5.429766880326009e-05, "loss": 0.9073, "step": 5344 }, { "epoch": 0.8426491243156476, "grad_norm": 1.0234375, "learning_rate": 5.429357403281524e-05, "loss": 1.0548, "step": 5345 }, { "epoch": 0.8428067761630407, "grad_norm": 1.1015625, "learning_rate": 5.4289479359242555e-05, "loss": 1.2482, "step": 5346 }, { "epoch": 0.8429644280104337, "grad_norm": 0.87109375, "learning_rate": 5.42853847825507e-05, "loss": 1.0408, "step": 5347 }, { "epoch": 0.8431220798578267, "grad_norm": 1.53125, "learning_rate": 5.4281290302748357e-05, "loss": 1.0326, "step": 5348 }, { "epoch": 0.8432797317052197, "grad_norm": 0.96484375, "learning_rate": 5.427719591984421e-05, "loss": 0.9507, "step": 5349 }, { "epoch": 0.8434373835526128, "grad_norm": 0.8671875, "learning_rate": 5.427310163384689e-05, "loss": 0.8769, "step": 5350 }, { "epoch": 0.8435950354000057, "grad_norm": 1.0390625, "learning_rate": 5.426900744476515e-05, "loss": 0.9115, "step": 5351 }, { "epoch": 0.8437526872473987, "grad_norm": 0.93359375, "learning_rate": 5.426491335260764e-05, "loss": 1.0168, "step": 5352 }, { "epoch": 0.8439103390947917, "grad_norm": 1.546875, "learning_rate": 5.426081935738303e-05, "loss": 0.7667, "step": 5353 }, { "epoch": 0.8440679909421848, "grad_norm": 0.85546875, "learning_rate": 5.425672545910001e-05, "loss": 0.8063, "step": 5354 }, { "epoch": 0.8442256427895778, "grad_norm": 0.94921875, "learning_rate": 5.42526316577672e-05, "loss": 0.7669, "step": 5355 }, { "epoch": 0.8443832946369708, "grad_norm": 0.96875, "learning_rate": 5.424853795339335e-05, "loss": 0.9334, "step": 5356 }, { "epoch": 0.8445409464843638, "grad_norm": 1.0390625, "learning_rate": 5.424444434598712e-05, "loss": 0.9616, "step": 5357 }, { "epoch": 0.8446985983317569, "grad_norm": 0.921875, "learning_rate": 5.424035083555718e-05, "loss": 1.1178, "step": 5358 }, { "epoch": 0.8448562501791498, "grad_norm": 0.87890625, "learning_rate": 5.4236257422112205e-05, "loss": 0.9606, "step": 5359 }, { "epoch": 0.8450139020265428, "grad_norm": 0.9765625, "learning_rate": 5.423216410566081e-05, "loss": 0.8693, "step": 5360 }, { "epoch": 0.8451715538739358, "grad_norm": 0.87109375, "learning_rate": 5.422807088621178e-05, "loss": 0.8453, "step": 5361 }, { "epoch": 0.8453292057213289, "grad_norm": 0.90625, "learning_rate": 5.422397776377373e-05, "loss": 1.0678, "step": 5362 }, { "epoch": 0.8454868575687219, "grad_norm": 0.93359375, "learning_rate": 5.4219884738355356e-05, "loss": 0.8089, "step": 5363 }, { "epoch": 0.8456445094161149, "grad_norm": 1.109375, "learning_rate": 5.42157918099653e-05, "loss": 1.0632, "step": 5364 }, { "epoch": 0.8458021612635079, "grad_norm": 0.9921875, "learning_rate": 5.421169897861223e-05, "loss": 0.935, "step": 5365 }, { "epoch": 0.845959813110901, "grad_norm": 0.93359375, "learning_rate": 5.420760624430488e-05, "loss": 0.9009, "step": 5366 }, { "epoch": 0.8461174649582939, "grad_norm": 1.2421875, "learning_rate": 5.420351360705189e-05, "loss": 1.066, "step": 5367 }, { "epoch": 0.8462751168056869, "grad_norm": 0.921875, "learning_rate": 5.4199421066861934e-05, "loss": 0.8118, "step": 5368 }, { "epoch": 0.8464327686530799, "grad_norm": 0.98828125, "learning_rate": 5.41953286237437e-05, "loss": 1.0136, "step": 5369 }, { "epoch": 0.846590420500473, "grad_norm": 0.88671875, "learning_rate": 5.419123627770579e-05, "loss": 0.8225, "step": 5370 }, { "epoch": 0.846748072347866, "grad_norm": 0.90625, "learning_rate": 5.4187144028756994e-05, "loss": 0.9703, "step": 5371 }, { "epoch": 0.846905724195259, "grad_norm": 0.828125, "learning_rate": 5.418305187690592e-05, "loss": 0.8827, "step": 5372 }, { "epoch": 0.847063376042652, "grad_norm": 0.87890625, "learning_rate": 5.417895982216126e-05, "loss": 0.8912, "step": 5373 }, { "epoch": 0.847221027890045, "grad_norm": 1.046875, "learning_rate": 5.417486786453165e-05, "loss": 1.0141, "step": 5374 }, { "epoch": 0.847378679737438, "grad_norm": 1.0234375, "learning_rate": 5.417077600402577e-05, "loss": 0.9664, "step": 5375 }, { "epoch": 0.847536331584831, "grad_norm": 0.94140625, "learning_rate": 5.416668424065234e-05, "loss": 1.0336, "step": 5376 }, { "epoch": 0.847693983432224, "grad_norm": 0.9375, "learning_rate": 5.4162592574420015e-05, "loss": 1.0457, "step": 5377 }, { "epoch": 0.847851635279617, "grad_norm": 1.03125, "learning_rate": 5.415850100533745e-05, "loss": 0.957, "step": 5378 }, { "epoch": 0.8480092871270101, "grad_norm": 0.99609375, "learning_rate": 5.415440953341331e-05, "loss": 0.9653, "step": 5379 }, { "epoch": 0.8481669389744031, "grad_norm": 1.515625, "learning_rate": 5.4150318158656255e-05, "loss": 0.9962, "step": 5380 }, { "epoch": 0.8483245908217961, "grad_norm": 0.8515625, "learning_rate": 5.414622688107501e-05, "loss": 0.9141, "step": 5381 }, { "epoch": 0.8484822426691891, "grad_norm": 0.9609375, "learning_rate": 5.414213570067822e-05, "loss": 0.9514, "step": 5382 }, { "epoch": 0.8486398945165821, "grad_norm": 1.234375, "learning_rate": 5.413804461747456e-05, "loss": 0.9546, "step": 5383 }, { "epoch": 0.8487975463639751, "grad_norm": 1.09375, "learning_rate": 5.4133953631472676e-05, "loss": 1.0411, "step": 5384 }, { "epoch": 0.8489551982113681, "grad_norm": 0.9765625, "learning_rate": 5.412986274268126e-05, "loss": 0.9021, "step": 5385 }, { "epoch": 0.8491128500587611, "grad_norm": 0.9296875, "learning_rate": 5.4125771951108993e-05, "loss": 1.0593, "step": 5386 }, { "epoch": 0.8492705019061542, "grad_norm": 0.9375, "learning_rate": 5.4121681256764514e-05, "loss": 1.0389, "step": 5387 }, { "epoch": 0.8494281537535472, "grad_norm": 0.89453125, "learning_rate": 5.411759065965651e-05, "loss": 0.9675, "step": 5388 }, { "epoch": 0.8495858056009402, "grad_norm": 1.4296875, "learning_rate": 5.4113500159793615e-05, "loss": 1.1298, "step": 5389 }, { "epoch": 0.8497434574483332, "grad_norm": 0.9296875, "learning_rate": 5.410940975718458e-05, "loss": 0.9422, "step": 5390 }, { "epoch": 0.8499011092957262, "grad_norm": 0.95703125, "learning_rate": 5.410531945183802e-05, "loss": 0.9882, "step": 5391 }, { "epoch": 0.8500587611431192, "grad_norm": 0.9921875, "learning_rate": 5.410122924376262e-05, "loss": 1.0268, "step": 5392 }, { "epoch": 0.8502164129905122, "grad_norm": 1.03125, "learning_rate": 5.4097139132967036e-05, "loss": 1.1408, "step": 5393 }, { "epoch": 0.8503740648379052, "grad_norm": 0.99609375, "learning_rate": 5.4093049119459936e-05, "loss": 1.2685, "step": 5394 }, { "epoch": 0.8505317166852983, "grad_norm": 0.96484375, "learning_rate": 5.4088959203249965e-05, "loss": 0.9417, "step": 5395 }, { "epoch": 0.8506893685326913, "grad_norm": 0.9453125, "learning_rate": 5.408486938434585e-05, "loss": 0.973, "step": 5396 }, { "epoch": 0.8508470203800843, "grad_norm": 0.984375, "learning_rate": 5.408077966275624e-05, "loss": 0.9648, "step": 5397 }, { "epoch": 0.8510046722274773, "grad_norm": 0.953125, "learning_rate": 5.407669003848978e-05, "loss": 0.875, "step": 5398 }, { "epoch": 0.8511623240748702, "grad_norm": 0.984375, "learning_rate": 5.407260051155516e-05, "loss": 0.8967, "step": 5399 }, { "epoch": 0.8513199759222633, "grad_norm": 0.9375, "learning_rate": 5.4068511081961004e-05, "loss": 0.8516, "step": 5400 }, { "epoch": 0.8514776277696563, "grad_norm": 0.84765625, "learning_rate": 5.4064421749716046e-05, "loss": 0.8777, "step": 5401 }, { "epoch": 0.8516352796170493, "grad_norm": 0.98046875, "learning_rate": 5.406033251482892e-05, "loss": 0.9872, "step": 5402 }, { "epoch": 0.8517929314644423, "grad_norm": 0.796875, "learning_rate": 5.40562433773083e-05, "loss": 0.683, "step": 5403 }, { "epoch": 0.8519505833118354, "grad_norm": 0.95703125, "learning_rate": 5.405215433716284e-05, "loss": 1.1625, "step": 5404 }, { "epoch": 0.8521082351592284, "grad_norm": 0.90625, "learning_rate": 5.404806539440117e-05, "loss": 0.893, "step": 5405 }, { "epoch": 0.8522658870066214, "grad_norm": 0.9765625, "learning_rate": 5.404397654903204e-05, "loss": 1.0531, "step": 5406 }, { "epoch": 0.8524235388540143, "grad_norm": 1.0390625, "learning_rate": 5.4039887801064085e-05, "loss": 1.1981, "step": 5407 }, { "epoch": 0.8525811907014074, "grad_norm": 0.87890625, "learning_rate": 5.4035799150505936e-05, "loss": 1.0637, "step": 5408 }, { "epoch": 0.8527388425488004, "grad_norm": 0.86328125, "learning_rate": 5.4031710597366314e-05, "loss": 0.8279, "step": 5409 }, { "epoch": 0.8528964943961934, "grad_norm": 1.046875, "learning_rate": 5.4027622141653797e-05, "loss": 1.037, "step": 5410 }, { "epoch": 0.8530541462435864, "grad_norm": 0.91015625, "learning_rate": 5.4023533783377146e-05, "loss": 0.9613, "step": 5411 }, { "epoch": 0.8532117980909795, "grad_norm": 1.09375, "learning_rate": 5.4019445522544996e-05, "loss": 1.1839, "step": 5412 }, { "epoch": 0.8533694499383725, "grad_norm": 0.9296875, "learning_rate": 5.4015357359166005e-05, "loss": 1.0147, "step": 5413 }, { "epoch": 0.8535271017857655, "grad_norm": 0.89453125, "learning_rate": 5.401126929324882e-05, "loss": 0.9077, "step": 5414 }, { "epoch": 0.8536847536331584, "grad_norm": 0.953125, "learning_rate": 5.400718132480209e-05, "loss": 0.9579, "step": 5415 }, { "epoch": 0.8538424054805515, "grad_norm": 1.0234375, "learning_rate": 5.400309345383457e-05, "loss": 0.9847, "step": 5416 }, { "epoch": 0.8540000573279445, "grad_norm": 0.97265625, "learning_rate": 5.399900568035483e-05, "loss": 0.9473, "step": 5417 }, { "epoch": 0.8541577091753375, "grad_norm": 0.89453125, "learning_rate": 5.3994918004371594e-05, "loss": 0.8808, "step": 5418 }, { "epoch": 0.8543153610227305, "grad_norm": 0.9921875, "learning_rate": 5.399083042589348e-05, "loss": 1.0036, "step": 5419 }, { "epoch": 0.8544730128701236, "grad_norm": 0.97265625, "learning_rate": 5.398674294492915e-05, "loss": 0.9734, "step": 5420 }, { "epoch": 0.8546306647175166, "grad_norm": 0.8125, "learning_rate": 5.3982655561487317e-05, "loss": 0.9005, "step": 5421 }, { "epoch": 0.8547883165649096, "grad_norm": 1.140625, "learning_rate": 5.397856827557661e-05, "loss": 0.9832, "step": 5422 }, { "epoch": 0.8549459684123025, "grad_norm": 0.95703125, "learning_rate": 5.3974481087205706e-05, "loss": 0.8583, "step": 5423 }, { "epoch": 0.8551036202596956, "grad_norm": 0.82421875, "learning_rate": 5.397039399638326e-05, "loss": 0.9437, "step": 5424 }, { "epoch": 0.8552612721070886, "grad_norm": 1.046875, "learning_rate": 5.396630700311793e-05, "loss": 0.8679, "step": 5425 }, { "epoch": 0.8554189239544816, "grad_norm": 0.99609375, "learning_rate": 5.396222010741834e-05, "loss": 0.9481, "step": 5426 }, { "epoch": 0.8555765758018746, "grad_norm": 1.1484375, "learning_rate": 5.3958133309293245e-05, "loss": 0.8645, "step": 5427 }, { "epoch": 0.8557342276492677, "grad_norm": 1.1171875, "learning_rate": 5.395404660875124e-05, "loss": 1.032, "step": 5428 }, { "epoch": 0.8558918794966607, "grad_norm": 1.0546875, "learning_rate": 5.3949960005801004e-05, "loss": 1.2569, "step": 5429 }, { "epoch": 0.8560495313440537, "grad_norm": 0.953125, "learning_rate": 5.3945873500451196e-05, "loss": 0.7883, "step": 5430 }, { "epoch": 0.8562071831914466, "grad_norm": 1.0390625, "learning_rate": 5.394178709271047e-05, "loss": 0.996, "step": 5431 }, { "epoch": 0.8563648350388396, "grad_norm": 1.0859375, "learning_rate": 5.3937700782587506e-05, "loss": 1.0072, "step": 5432 }, { "epoch": 0.8565224868862327, "grad_norm": 0.87109375, "learning_rate": 5.393361457009095e-05, "loss": 0.8809, "step": 5433 }, { "epoch": 0.8566801387336257, "grad_norm": 0.99609375, "learning_rate": 5.392952845522947e-05, "loss": 0.9547, "step": 5434 }, { "epoch": 0.8568377905810187, "grad_norm": 1.109375, "learning_rate": 5.392544243801167e-05, "loss": 0.9758, "step": 5435 }, { "epoch": 0.8569954424284117, "grad_norm": 0.890625, "learning_rate": 5.392135651844631e-05, "loss": 1.0272, "step": 5436 }, { "epoch": 0.8571530942758048, "grad_norm": 0.89453125, "learning_rate": 5.3917270696542e-05, "loss": 0.9655, "step": 5437 }, { "epoch": 0.8573107461231978, "grad_norm": 0.953125, "learning_rate": 5.391318497230739e-05, "loss": 0.8843, "step": 5438 }, { "epoch": 0.8574683979705907, "grad_norm": 0.83203125, "learning_rate": 5.390909934575116e-05, "loss": 0.8495, "step": 5439 }, { "epoch": 0.8576260498179837, "grad_norm": 0.8984375, "learning_rate": 5.390501381688191e-05, "loss": 0.9226, "step": 5440 }, { "epoch": 0.8577837016653768, "grad_norm": 0.95703125, "learning_rate": 5.390092838570841e-05, "loss": 0.9759, "step": 5441 }, { "epoch": 0.8579413535127698, "grad_norm": 0.91796875, "learning_rate": 5.389684305223923e-05, "loss": 0.9135, "step": 5442 }, { "epoch": 0.8580990053601628, "grad_norm": 1.015625, "learning_rate": 5.3892757816483073e-05, "loss": 1.0694, "step": 5443 }, { "epoch": 0.8582566572075558, "grad_norm": 0.9609375, "learning_rate": 5.388867267844857e-05, "loss": 0.7533, "step": 5444 }, { "epoch": 0.8584143090549489, "grad_norm": 0.98828125, "learning_rate": 5.388458763814437e-05, "loss": 0.8991, "step": 5445 }, { "epoch": 0.8585719609023419, "grad_norm": 0.9765625, "learning_rate": 5.388050269557917e-05, "loss": 0.9416, "step": 5446 }, { "epoch": 0.8587296127497348, "grad_norm": 6.03125, "learning_rate": 5.387641785076162e-05, "loss": 0.9521, "step": 5447 }, { "epoch": 0.8588872645971278, "grad_norm": 1.0859375, "learning_rate": 5.387233310370036e-05, "loss": 1.2063, "step": 5448 }, { "epoch": 0.8590449164445209, "grad_norm": 0.91796875, "learning_rate": 5.386824845440406e-05, "loss": 0.7649, "step": 5449 }, { "epoch": 0.8592025682919139, "grad_norm": 0.90625, "learning_rate": 5.3864163902881316e-05, "loss": 0.9348, "step": 5450 }, { "epoch": 0.8593602201393069, "grad_norm": 0.828125, "learning_rate": 5.3860079449140886e-05, "loss": 0.8151, "step": 5451 }, { "epoch": 0.8595178719866999, "grad_norm": 0.84375, "learning_rate": 5.385599509319139e-05, "loss": 0.9416, "step": 5452 }, { "epoch": 0.859675523834093, "grad_norm": 1.2265625, "learning_rate": 5.385191083504146e-05, "loss": 0.8906, "step": 5453 }, { "epoch": 0.859833175681486, "grad_norm": 0.90625, "learning_rate": 5.3847826674699785e-05, "loss": 0.9712, "step": 5454 }, { "epoch": 0.8599908275288789, "grad_norm": 1.0703125, "learning_rate": 5.384374261217495e-05, "loss": 0.884, "step": 5455 }, { "epoch": 0.8601484793762719, "grad_norm": 1.0, "learning_rate": 5.383965864747571e-05, "loss": 0.9084, "step": 5456 }, { "epoch": 0.860306131223665, "grad_norm": 1.03125, "learning_rate": 5.3835574780610676e-05, "loss": 1.1974, "step": 5457 }, { "epoch": 0.860463783071058, "grad_norm": 1.078125, "learning_rate": 5.383149101158851e-05, "loss": 1.1331, "step": 5458 }, { "epoch": 0.860621434918451, "grad_norm": 0.8984375, "learning_rate": 5.3827407340417844e-05, "loss": 0.8904, "step": 5459 }, { "epoch": 0.860779086765844, "grad_norm": 0.9609375, "learning_rate": 5.382332376710731e-05, "loss": 1.0053, "step": 5460 }, { "epoch": 0.860936738613237, "grad_norm": 1.0234375, "learning_rate": 5.381924029166565e-05, "loss": 1.0447, "step": 5461 }, { "epoch": 0.8610943904606301, "grad_norm": 1.046875, "learning_rate": 5.381515691410147e-05, "loss": 0.9282, "step": 5462 }, { "epoch": 0.861252042308023, "grad_norm": 1.0078125, "learning_rate": 5.381107363442341e-05, "loss": 0.8708, "step": 5463 }, { "epoch": 0.861409694155416, "grad_norm": 0.95703125, "learning_rate": 5.380699045264017e-05, "loss": 1.1283, "step": 5464 }, { "epoch": 0.861567346002809, "grad_norm": 0.96484375, "learning_rate": 5.380290736876031e-05, "loss": 1.1498, "step": 5465 }, { "epoch": 0.8617249978502021, "grad_norm": 0.95703125, "learning_rate": 5.37988243827926e-05, "loss": 0.9288, "step": 5466 }, { "epoch": 0.8618826496975951, "grad_norm": 0.89453125, "learning_rate": 5.3794741494745636e-05, "loss": 0.9818, "step": 5467 }, { "epoch": 0.8620403015449881, "grad_norm": 0.94921875, "learning_rate": 5.379065870462807e-05, "loss": 0.8926, "step": 5468 }, { "epoch": 0.8621979533923811, "grad_norm": 1.0078125, "learning_rate": 5.3786576012448564e-05, "loss": 0.9919, "step": 5469 }, { "epoch": 0.8623556052397742, "grad_norm": 0.96875, "learning_rate": 5.378249341821577e-05, "loss": 0.881, "step": 5470 }, { "epoch": 0.8625132570871671, "grad_norm": 1.015625, "learning_rate": 5.3778410921938335e-05, "loss": 1.1738, "step": 5471 }, { "epoch": 0.8626709089345601, "grad_norm": 1.0390625, "learning_rate": 5.377432852362493e-05, "loss": 0.8708, "step": 5472 }, { "epoch": 0.8628285607819531, "grad_norm": 1.0078125, "learning_rate": 5.377024622328418e-05, "loss": 0.934, "step": 5473 }, { "epoch": 0.8629862126293462, "grad_norm": 1.984375, "learning_rate": 5.376616402092472e-05, "loss": 0.8773, "step": 5474 }, { "epoch": 0.8631438644767392, "grad_norm": 0.94140625, "learning_rate": 5.3762081916555277e-05, "loss": 1.0426, "step": 5475 }, { "epoch": 0.8633015163241322, "grad_norm": 0.94140625, "learning_rate": 5.3757999910184444e-05, "loss": 0.8709, "step": 5476 }, { "epoch": 0.8634591681715252, "grad_norm": 0.890625, "learning_rate": 5.3753918001820894e-05, "loss": 0.972, "step": 5477 }, { "epoch": 0.8636168200189183, "grad_norm": 0.96484375, "learning_rate": 5.374983619147327e-05, "loss": 1.005, "step": 5478 }, { "epoch": 0.8637744718663113, "grad_norm": 1.484375, "learning_rate": 5.3745754479150225e-05, "loss": 0.9892, "step": 5479 }, { "epoch": 0.8639321237137042, "grad_norm": 0.984375, "learning_rate": 5.374167286486037e-05, "loss": 0.8773, "step": 5480 }, { "epoch": 0.8640897755610972, "grad_norm": 0.9453125, "learning_rate": 5.3737591348612436e-05, "loss": 0.8845, "step": 5481 }, { "epoch": 0.8642474274084903, "grad_norm": 0.98046875, "learning_rate": 5.373350993041504e-05, "loss": 0.8877, "step": 5482 }, { "epoch": 0.8644050792558833, "grad_norm": 0.96484375, "learning_rate": 5.3729428610276814e-05, "loss": 1.0384, "step": 5483 }, { "epoch": 0.8645627311032763, "grad_norm": 1.03125, "learning_rate": 5.372534738820643e-05, "loss": 0.7953, "step": 5484 }, { "epoch": 0.8647203829506693, "grad_norm": 0.94921875, "learning_rate": 5.372126626421251e-05, "loss": 0.9685, "step": 5485 }, { "epoch": 0.8648780347980624, "grad_norm": 0.984375, "learning_rate": 5.3717185238303694e-05, "loss": 0.9756, "step": 5486 }, { "epoch": 0.8650356866454554, "grad_norm": 1.0, "learning_rate": 5.371310431048869e-05, "loss": 0.9024, "step": 5487 }, { "epoch": 0.8651933384928483, "grad_norm": 0.89453125, "learning_rate": 5.370902348077613e-05, "loss": 0.9132, "step": 5488 }, { "epoch": 0.8653509903402413, "grad_norm": 0.97265625, "learning_rate": 5.370494274917464e-05, "loss": 0.9129, "step": 5489 }, { "epoch": 0.8655086421876343, "grad_norm": 0.83203125, "learning_rate": 5.370086211569287e-05, "loss": 0.9744, "step": 5490 }, { "epoch": 0.8656662940350274, "grad_norm": 0.94921875, "learning_rate": 5.369678158033945e-05, "loss": 0.9591, "step": 5491 }, { "epoch": 0.8658239458824204, "grad_norm": 0.9375, "learning_rate": 5.3692701143123094e-05, "loss": 1.0054, "step": 5492 }, { "epoch": 0.8659815977298134, "grad_norm": 1.09375, "learning_rate": 5.36886208040524e-05, "loss": 1.0017, "step": 5493 }, { "epoch": 0.8661392495772064, "grad_norm": 1.0078125, "learning_rate": 5.368454056313603e-05, "loss": 1.1823, "step": 5494 }, { "epoch": 0.8662969014245995, "grad_norm": 0.85546875, "learning_rate": 5.368046042038264e-05, "loss": 0.9306, "step": 5495 }, { "epoch": 0.8664545532719924, "grad_norm": 0.90234375, "learning_rate": 5.367638037580083e-05, "loss": 1.0665, "step": 5496 }, { "epoch": 0.8666122051193854, "grad_norm": 0.94140625, "learning_rate": 5.3672300429399305e-05, "loss": 0.9194, "step": 5497 }, { "epoch": 0.8667698569667784, "grad_norm": 0.92578125, "learning_rate": 5.36682205811867e-05, "loss": 1.0626, "step": 5498 }, { "epoch": 0.8669275088141715, "grad_norm": 1.0703125, "learning_rate": 5.366414083117165e-05, "loss": 1.0507, "step": 5499 }, { "epoch": 0.8670851606615645, "grad_norm": 0.859375, "learning_rate": 5.366006117936281e-05, "loss": 0.9181, "step": 5500 }, { "epoch": 0.8672428125089575, "grad_norm": 0.93359375, "learning_rate": 5.365598162576878e-05, "loss": 0.9944, "step": 5501 }, { "epoch": 0.8674004643563505, "grad_norm": 0.87890625, "learning_rate": 5.365190217039828e-05, "loss": 1.0055, "step": 5502 }, { "epoch": 0.8675581162037436, "grad_norm": 0.99609375, "learning_rate": 5.364782281325994e-05, "loss": 0.9273, "step": 5503 }, { "epoch": 0.8677157680511365, "grad_norm": 1.1484375, "learning_rate": 5.364374355436239e-05, "loss": 1.1744, "step": 5504 }, { "epoch": 0.8678734198985295, "grad_norm": 0.98828125, "learning_rate": 5.363966439371426e-05, "loss": 0.9707, "step": 5505 }, { "epoch": 0.8680310717459225, "grad_norm": 1.125, "learning_rate": 5.363558533132418e-05, "loss": 0.924, "step": 5506 }, { "epoch": 0.8681887235933156, "grad_norm": 0.9609375, "learning_rate": 5.363150636720087e-05, "loss": 1.1148, "step": 5507 }, { "epoch": 0.8683463754407086, "grad_norm": 0.890625, "learning_rate": 5.362742750135292e-05, "loss": 0.8912, "step": 5508 }, { "epoch": 0.8685040272881016, "grad_norm": 0.90625, "learning_rate": 5.3623348733789e-05, "loss": 0.9579, "step": 5509 }, { "epoch": 0.8686616791354946, "grad_norm": 0.99609375, "learning_rate": 5.361927006451775e-05, "loss": 0.8604, "step": 5510 }, { "epoch": 0.8688193309828877, "grad_norm": 0.92578125, "learning_rate": 5.3615191493547745e-05, "loss": 0.9789, "step": 5511 }, { "epoch": 0.8689769828302806, "grad_norm": 0.953125, "learning_rate": 5.361111302088774e-05, "loss": 0.9296, "step": 5512 }, { "epoch": 0.8691346346776736, "grad_norm": 5.84375, "learning_rate": 5.360703464654633e-05, "loss": 0.99, "step": 5513 }, { "epoch": 0.8692922865250666, "grad_norm": 1.0546875, "learning_rate": 5.360295637053215e-05, "loss": 1.1978, "step": 5514 }, { "epoch": 0.8694499383724597, "grad_norm": 0.90234375, "learning_rate": 5.359887819285386e-05, "loss": 0.9493, "step": 5515 }, { "epoch": 0.8696075902198527, "grad_norm": 0.98046875, "learning_rate": 5.359480011352009e-05, "loss": 1.0329, "step": 5516 }, { "epoch": 0.8697652420672457, "grad_norm": 0.9765625, "learning_rate": 5.35907221325395e-05, "loss": 1.0045, "step": 5517 }, { "epoch": 0.8699228939146387, "grad_norm": 0.9140625, "learning_rate": 5.358664424992072e-05, "loss": 0.7608, "step": 5518 }, { "epoch": 0.8700805457620318, "grad_norm": 1.0078125, "learning_rate": 5.358256646567239e-05, "loss": 1.1914, "step": 5519 }, { "epoch": 0.8702381976094247, "grad_norm": 1.0078125, "learning_rate": 5.357848877980315e-05, "loss": 1.0989, "step": 5520 }, { "epoch": 0.8703958494568177, "grad_norm": 0.98828125, "learning_rate": 5.357441119232162e-05, "loss": 1.1018, "step": 5521 }, { "epoch": 0.8705535013042107, "grad_norm": 1.0703125, "learning_rate": 5.3570333703236495e-05, "loss": 1.17, "step": 5522 }, { "epoch": 0.8707111531516037, "grad_norm": 0.98046875, "learning_rate": 5.356625631255642e-05, "loss": 0.7659, "step": 5523 }, { "epoch": 0.8708688049989968, "grad_norm": 0.88671875, "learning_rate": 5.356217902028999e-05, "loss": 0.9319, "step": 5524 }, { "epoch": 0.8710264568463898, "grad_norm": 0.953125, "learning_rate": 5.355810182644587e-05, "loss": 0.9368, "step": 5525 }, { "epoch": 0.8711841086937828, "grad_norm": 1.0546875, "learning_rate": 5.355402473103268e-05, "loss": 1.1292, "step": 5526 }, { "epoch": 0.8713417605411758, "grad_norm": 1.0078125, "learning_rate": 5.3549947734059104e-05, "loss": 1.105, "step": 5527 }, { "epoch": 0.8714994123885688, "grad_norm": 0.94921875, "learning_rate": 5.354587083553375e-05, "loss": 0.9808, "step": 5528 }, { "epoch": 0.8716570642359618, "grad_norm": 0.8984375, "learning_rate": 5.3541794035465276e-05, "loss": 0.787, "step": 5529 }, { "epoch": 0.8718147160833548, "grad_norm": 1.078125, "learning_rate": 5.353771733386231e-05, "loss": 0.7993, "step": 5530 }, { "epoch": 0.8719723679307478, "grad_norm": 0.98046875, "learning_rate": 5.353364073073347e-05, "loss": 1.0618, "step": 5531 }, { "epoch": 0.8721300197781409, "grad_norm": 1.2109375, "learning_rate": 5.352956422608746e-05, "loss": 0.7805, "step": 5532 }, { "epoch": 0.8722876716255339, "grad_norm": 0.97265625, "learning_rate": 5.352548781993287e-05, "loss": 0.8289, "step": 5533 }, { "epoch": 0.8724453234729269, "grad_norm": 1.0625, "learning_rate": 5.3521411512278366e-05, "loss": 0.9012, "step": 5534 }, { "epoch": 0.8726029753203199, "grad_norm": 0.90234375, "learning_rate": 5.351733530313255e-05, "loss": 0.7937, "step": 5535 }, { "epoch": 0.8727606271677129, "grad_norm": 1.0234375, "learning_rate": 5.3513259192504076e-05, "loss": 0.9976, "step": 5536 }, { "epoch": 0.8729182790151059, "grad_norm": 1.1484375, "learning_rate": 5.350918318040161e-05, "loss": 1.0026, "step": 5537 }, { "epoch": 0.8730759308624989, "grad_norm": 0.95703125, "learning_rate": 5.350510726683378e-05, "loss": 0.7239, "step": 5538 }, { "epoch": 0.8732335827098919, "grad_norm": 0.9375, "learning_rate": 5.350103145180922e-05, "loss": 1.1158, "step": 5539 }, { "epoch": 0.873391234557285, "grad_norm": 1.25, "learning_rate": 5.349695573533655e-05, "loss": 0.9755, "step": 5540 }, { "epoch": 0.873548886404678, "grad_norm": 0.99609375, "learning_rate": 5.3492880117424404e-05, "loss": 0.9594, "step": 5541 }, { "epoch": 0.873706538252071, "grad_norm": 0.9453125, "learning_rate": 5.348880459808148e-05, "loss": 0.9597, "step": 5542 }, { "epoch": 0.873864190099464, "grad_norm": 1.0, "learning_rate": 5.348472917731637e-05, "loss": 0.9553, "step": 5543 }, { "epoch": 0.874021841946857, "grad_norm": 0.9921875, "learning_rate": 5.348065385513772e-05, "loss": 1.2525, "step": 5544 }, { "epoch": 0.87417949379425, "grad_norm": 0.86328125, "learning_rate": 5.347657863155415e-05, "loss": 1.0777, "step": 5545 }, { "epoch": 0.874337145641643, "grad_norm": 0.9765625, "learning_rate": 5.347250350657429e-05, "loss": 1.1205, "step": 5546 }, { "epoch": 0.874494797489036, "grad_norm": 0.96875, "learning_rate": 5.346842848020683e-05, "loss": 1.1123, "step": 5547 }, { "epoch": 0.874652449336429, "grad_norm": 0.91796875, "learning_rate": 5.34643535524604e-05, "loss": 0.9374, "step": 5548 }, { "epoch": 0.8748101011838221, "grad_norm": 1.0390625, "learning_rate": 5.346027872334358e-05, "loss": 1.1903, "step": 5549 }, { "epoch": 0.8749677530312151, "grad_norm": 0.93359375, "learning_rate": 5.3456203992865065e-05, "loss": 1.1511, "step": 5550 }, { "epoch": 0.8751254048786081, "grad_norm": 0.98828125, "learning_rate": 5.345212936103341e-05, "loss": 0.8491, "step": 5551 }, { "epoch": 0.875283056726001, "grad_norm": 1.1015625, "learning_rate": 5.344805482785735e-05, "loss": 0.8454, "step": 5552 }, { "epoch": 0.8754407085733941, "grad_norm": 0.796875, "learning_rate": 5.344398039334548e-05, "loss": 0.9668, "step": 5553 }, { "epoch": 0.8755983604207871, "grad_norm": 1.0078125, "learning_rate": 5.343990605750643e-05, "loss": 1.0905, "step": 5554 }, { "epoch": 0.8757560122681801, "grad_norm": 0.84375, "learning_rate": 5.3435831820348833e-05, "loss": 0.7291, "step": 5555 }, { "epoch": 0.8759136641155731, "grad_norm": 0.9765625, "learning_rate": 5.343175768188133e-05, "loss": 0.855, "step": 5556 }, { "epoch": 0.8760713159629662, "grad_norm": 0.85546875, "learning_rate": 5.342768364211257e-05, "loss": 1.0429, "step": 5557 }, { "epoch": 0.8762289678103592, "grad_norm": 0.89453125, "learning_rate": 5.342360970105116e-05, "loss": 0.9714, "step": 5558 }, { "epoch": 0.8763866196577522, "grad_norm": 1.03125, "learning_rate": 5.341953585870574e-05, "loss": 1.0793, "step": 5559 }, { "epoch": 0.8765442715051451, "grad_norm": 0.94140625, "learning_rate": 5.341546211508492e-05, "loss": 1.0013, "step": 5560 }, { "epoch": 0.8767019233525382, "grad_norm": 0.96484375, "learning_rate": 5.3411388470197397e-05, "loss": 1.0199, "step": 5561 }, { "epoch": 0.8768595751999312, "grad_norm": 0.90625, "learning_rate": 5.340731492405179e-05, "loss": 0.9532, "step": 5562 }, { "epoch": 0.8770172270473242, "grad_norm": 1.046875, "learning_rate": 5.340324147665671e-05, "loss": 1.0215, "step": 5563 }, { "epoch": 0.8771748788947172, "grad_norm": 0.96484375, "learning_rate": 5.339916812802079e-05, "loss": 0.9403, "step": 5564 }, { "epoch": 0.8773325307421103, "grad_norm": 0.98828125, "learning_rate": 5.339509487815268e-05, "loss": 1.3008, "step": 5565 }, { "epoch": 0.8774901825895033, "grad_norm": 0.85546875, "learning_rate": 5.3391021727060944e-05, "loss": 0.8369, "step": 5566 }, { "epoch": 0.8776478344368963, "grad_norm": 1.125, "learning_rate": 5.3386948674754333e-05, "loss": 1.1578, "step": 5567 }, { "epoch": 0.8778054862842892, "grad_norm": 0.98046875, "learning_rate": 5.338287572124141e-05, "loss": 0.9204, "step": 5568 }, { "epoch": 0.8779631381316823, "grad_norm": 0.91015625, "learning_rate": 5.337880286653082e-05, "loss": 0.8878, "step": 5569 }, { "epoch": 0.8781207899790753, "grad_norm": 0.8359375, "learning_rate": 5.337473011063119e-05, "loss": 0.7978, "step": 5570 }, { "epoch": 0.8782784418264683, "grad_norm": 0.9921875, "learning_rate": 5.337065745355112e-05, "loss": 0.867, "step": 5571 }, { "epoch": 0.8784360936738613, "grad_norm": 0.890625, "learning_rate": 5.336658489529931e-05, "loss": 1.0389, "step": 5572 }, { "epoch": 0.8785937455212544, "grad_norm": 0.94140625, "learning_rate": 5.336251243588436e-05, "loss": 0.905, "step": 5573 }, { "epoch": 0.8787513973686474, "grad_norm": 1.015625, "learning_rate": 5.33584400753149e-05, "loss": 0.9505, "step": 5574 }, { "epoch": 0.8789090492160404, "grad_norm": 0.94921875, "learning_rate": 5.335436781359956e-05, "loss": 0.9754, "step": 5575 }, { "epoch": 0.8790667010634333, "grad_norm": 0.9375, "learning_rate": 5.335029565074694e-05, "loss": 0.978, "step": 5576 }, { "epoch": 0.8792243529108263, "grad_norm": 0.98046875, "learning_rate": 5.3346223586765734e-05, "loss": 1.0564, "step": 5577 }, { "epoch": 0.8793820047582194, "grad_norm": 0.99609375, "learning_rate": 5.3342151621664536e-05, "loss": 1.0063, "step": 5578 }, { "epoch": 0.8795396566056124, "grad_norm": 1.1015625, "learning_rate": 5.3338079755451975e-05, "loss": 0.9175, "step": 5579 }, { "epoch": 0.8796973084530054, "grad_norm": 0.91796875, "learning_rate": 5.3334007988136704e-05, "loss": 0.8044, "step": 5580 }, { "epoch": 0.8798549603003984, "grad_norm": 1.046875, "learning_rate": 5.3329936319727295e-05, "loss": 1.1958, "step": 5581 }, { "epoch": 0.8800126121477915, "grad_norm": 0.8671875, "learning_rate": 5.332586475023245e-05, "loss": 0.6708, "step": 5582 }, { "epoch": 0.8801702639951845, "grad_norm": 0.90234375, "learning_rate": 5.332179327966076e-05, "loss": 0.8885, "step": 5583 }, { "epoch": 0.8803279158425774, "grad_norm": 0.95703125, "learning_rate": 5.331772190802087e-05, "loss": 0.93, "step": 5584 }, { "epoch": 0.8804855676899704, "grad_norm": 0.9375, "learning_rate": 5.331365063532141e-05, "loss": 0.897, "step": 5585 }, { "epoch": 0.8806432195373635, "grad_norm": 0.96484375, "learning_rate": 5.3309579461570945e-05, "loss": 0.952, "step": 5586 }, { "epoch": 0.8808008713847565, "grad_norm": 1.1328125, "learning_rate": 5.3305508386778203e-05, "loss": 1.0395, "step": 5587 }, { "epoch": 0.8809585232321495, "grad_norm": 0.94921875, "learning_rate": 5.330143741095177e-05, "loss": 0.9916, "step": 5588 }, { "epoch": 0.8811161750795425, "grad_norm": 0.99609375, "learning_rate": 5.3297366534100266e-05, "loss": 1.2572, "step": 5589 }, { "epoch": 0.8812738269269356, "grad_norm": 1.2578125, "learning_rate": 5.329329575623232e-05, "loss": 0.9552, "step": 5590 }, { "epoch": 0.8814314787743286, "grad_norm": 0.99609375, "learning_rate": 5.328922507735653e-05, "loss": 1.0277, "step": 5591 }, { "epoch": 0.8815891306217215, "grad_norm": 0.8125, "learning_rate": 5.3285154497481596e-05, "loss": 0.7809, "step": 5592 }, { "epoch": 0.8817467824691145, "grad_norm": 1.046875, "learning_rate": 5.328108401661611e-05, "loss": 1.1159, "step": 5593 }, { "epoch": 0.8819044343165076, "grad_norm": 0.921875, "learning_rate": 5.3277013634768694e-05, "loss": 0.795, "step": 5594 }, { "epoch": 0.8820620861639006, "grad_norm": 1.0, "learning_rate": 5.327294335194798e-05, "loss": 1.1674, "step": 5595 }, { "epoch": 0.8822197380112936, "grad_norm": 0.9375, "learning_rate": 5.326887316816258e-05, "loss": 1.0644, "step": 5596 }, { "epoch": 0.8823773898586866, "grad_norm": 1.1171875, "learning_rate": 5.32648030834211e-05, "loss": 0.7833, "step": 5597 }, { "epoch": 0.8825350417060797, "grad_norm": 0.99609375, "learning_rate": 5.3260733097732254e-05, "loss": 1.1752, "step": 5598 }, { "epoch": 0.8826926935534727, "grad_norm": 0.98828125, "learning_rate": 5.3256663211104585e-05, "loss": 0.9997, "step": 5599 }, { "epoch": 0.8828503454008656, "grad_norm": 1.0390625, "learning_rate": 5.325259342354676e-05, "loss": 1.0539, "step": 5600 }, { "epoch": 0.8830079972482586, "grad_norm": 0.88671875, "learning_rate": 5.324852373506739e-05, "loss": 0.8949, "step": 5601 }, { "epoch": 0.8831656490956517, "grad_norm": 1.0390625, "learning_rate": 5.3244454145675114e-05, "loss": 0.9282, "step": 5602 }, { "epoch": 0.8833233009430447, "grad_norm": 1.0, "learning_rate": 5.3240384655378525e-05, "loss": 1.0816, "step": 5603 }, { "epoch": 0.8834809527904377, "grad_norm": 1.21875, "learning_rate": 5.323631526418629e-05, "loss": 1.367, "step": 5604 }, { "epoch": 0.8836386046378307, "grad_norm": 1.234375, "learning_rate": 5.323224597210699e-05, "loss": 0.8751, "step": 5605 }, { "epoch": 0.8837962564852238, "grad_norm": 1.0390625, "learning_rate": 5.322817677914924e-05, "loss": 1.0953, "step": 5606 }, { "epoch": 0.8839539083326168, "grad_norm": 0.953125, "learning_rate": 5.322410768532174e-05, "loss": 1.0726, "step": 5607 }, { "epoch": 0.8841115601800097, "grad_norm": 0.921875, "learning_rate": 5.322003869063307e-05, "loss": 0.8766, "step": 5608 }, { "epoch": 0.8842692120274027, "grad_norm": 0.96484375, "learning_rate": 5.3215969795091845e-05, "loss": 1.0606, "step": 5609 }, { "epoch": 0.8844268638747957, "grad_norm": 0.89453125, "learning_rate": 5.321190099870671e-05, "loss": 1.0368, "step": 5610 }, { "epoch": 0.8845845157221888, "grad_norm": 0.87890625, "learning_rate": 5.320783230148623e-05, "loss": 0.9647, "step": 5611 }, { "epoch": 0.8847421675695818, "grad_norm": 0.89453125, "learning_rate": 5.320376370343911e-05, "loss": 1.018, "step": 5612 }, { "epoch": 0.8848998194169748, "grad_norm": 1.3359375, "learning_rate": 5.319969520457395e-05, "loss": 0.8532, "step": 5613 }, { "epoch": 0.8850574712643678, "grad_norm": 1.1875, "learning_rate": 5.3195626804899354e-05, "loss": 0.9745, "step": 5614 }, { "epoch": 0.8852151231117609, "grad_norm": 1.140625, "learning_rate": 5.319155850442394e-05, "loss": 1.1008, "step": 5615 }, { "epoch": 0.8853727749591538, "grad_norm": 0.9765625, "learning_rate": 5.318749030315633e-05, "loss": 1.0226, "step": 5616 }, { "epoch": 0.8855304268065468, "grad_norm": 0.9140625, "learning_rate": 5.318342220110518e-05, "loss": 0.8461, "step": 5617 }, { "epoch": 0.8856880786539398, "grad_norm": 0.9609375, "learning_rate": 5.3179354198279085e-05, "loss": 0.9375, "step": 5618 }, { "epoch": 0.8858457305013329, "grad_norm": 2.359375, "learning_rate": 5.3175286294686686e-05, "loss": 0.7864, "step": 5619 }, { "epoch": 0.8860033823487259, "grad_norm": 0.96484375, "learning_rate": 5.317121849033659e-05, "loss": 1.008, "step": 5620 }, { "epoch": 0.8861610341961189, "grad_norm": 1.09375, "learning_rate": 5.316715078523737e-05, "loss": 1.1926, "step": 5621 }, { "epoch": 0.8863186860435119, "grad_norm": 0.97265625, "learning_rate": 5.3163083179397756e-05, "loss": 0.8189, "step": 5622 }, { "epoch": 0.886476337890905, "grad_norm": 1.0, "learning_rate": 5.3159015672826295e-05, "loss": 0.9734, "step": 5623 }, { "epoch": 0.8866339897382979, "grad_norm": 0.97265625, "learning_rate": 5.315494826553162e-05, "loss": 1.1089, "step": 5624 }, { "epoch": 0.8867916415856909, "grad_norm": 1.03125, "learning_rate": 5.315088095752236e-05, "loss": 0.9951, "step": 5625 }, { "epoch": 0.8869492934330839, "grad_norm": 1.3125, "learning_rate": 5.3146813748807144e-05, "loss": 0.9189, "step": 5626 }, { "epoch": 0.887106945280477, "grad_norm": 1.5078125, "learning_rate": 5.314274663939452e-05, "loss": 0.7972, "step": 5627 }, { "epoch": 0.88726459712787, "grad_norm": 0.9453125, "learning_rate": 5.313867962929321e-05, "loss": 0.9639, "step": 5628 }, { "epoch": 0.887422248975263, "grad_norm": 1.0859375, "learning_rate": 5.313461271851179e-05, "loss": 1.0188, "step": 5629 }, { "epoch": 0.887579900822656, "grad_norm": 1.2890625, "learning_rate": 5.3130545907058884e-05, "loss": 0.9206, "step": 5630 }, { "epoch": 0.8877375526700491, "grad_norm": 0.92578125, "learning_rate": 5.3126479194943104e-05, "loss": 0.9683, "step": 5631 }, { "epoch": 0.8878952045174421, "grad_norm": 1.140625, "learning_rate": 5.3122412582173034e-05, "loss": 0.9772, "step": 5632 }, { "epoch": 0.888052856364835, "grad_norm": 0.92578125, "learning_rate": 5.3118346068757366e-05, "loss": 0.828, "step": 5633 }, { "epoch": 0.888210508212228, "grad_norm": 0.96484375, "learning_rate": 5.311427965470468e-05, "loss": 0.9667, "step": 5634 }, { "epoch": 0.888368160059621, "grad_norm": 1.078125, "learning_rate": 5.311021334002362e-05, "loss": 0.9997, "step": 5635 }, { "epoch": 0.8885258119070141, "grad_norm": 1.109375, "learning_rate": 5.310614712472276e-05, "loss": 1.0826, "step": 5636 }, { "epoch": 0.8886834637544071, "grad_norm": 1.0234375, "learning_rate": 5.310208100881071e-05, "loss": 1.0051, "step": 5637 }, { "epoch": 0.8888411156018001, "grad_norm": 1.1640625, "learning_rate": 5.309801499229616e-05, "loss": 1.0067, "step": 5638 }, { "epoch": 0.8889987674491932, "grad_norm": 0.94140625, "learning_rate": 5.3093949075187676e-05, "loss": 0.7348, "step": 5639 }, { "epoch": 0.8891564192965862, "grad_norm": 0.99609375, "learning_rate": 5.30898832574939e-05, "loss": 1.0236, "step": 5640 }, { "epoch": 0.8893140711439791, "grad_norm": 0.97265625, "learning_rate": 5.3085817539223437e-05, "loss": 1.0125, "step": 5641 }, { "epoch": 0.8894717229913721, "grad_norm": 0.8828125, "learning_rate": 5.308175192038489e-05, "loss": 0.9826, "step": 5642 }, { "epoch": 0.8896293748387651, "grad_norm": 0.953125, "learning_rate": 5.307768640098689e-05, "loss": 0.9488, "step": 5643 }, { "epoch": 0.8897870266861582, "grad_norm": 0.96875, "learning_rate": 5.307362098103805e-05, "loss": 1.1617, "step": 5644 }, { "epoch": 0.8899446785335512, "grad_norm": 1.0, "learning_rate": 5.306955566054696e-05, "loss": 1.0921, "step": 5645 }, { "epoch": 0.8901023303809442, "grad_norm": 0.9296875, "learning_rate": 5.3065490439522294e-05, "loss": 1.0238, "step": 5646 }, { "epoch": 0.8902599822283372, "grad_norm": 1.2109375, "learning_rate": 5.3061425317972646e-05, "loss": 1.0684, "step": 5647 }, { "epoch": 0.8904176340757303, "grad_norm": 0.9375, "learning_rate": 5.3057360295906625e-05, "loss": 1.0786, "step": 5648 }, { "epoch": 0.8905752859231232, "grad_norm": 1.015625, "learning_rate": 5.3053295373332836e-05, "loss": 1.2517, "step": 5649 }, { "epoch": 0.8907329377705162, "grad_norm": 1.6953125, "learning_rate": 5.30492305502599e-05, "loss": 0.8766, "step": 5650 }, { "epoch": 0.8908905896179092, "grad_norm": 0.83203125, "learning_rate": 5.3045165826696454e-05, "loss": 0.6458, "step": 5651 }, { "epoch": 0.8910482414653023, "grad_norm": 1.0625, "learning_rate": 5.3041101202651046e-05, "loss": 1.0941, "step": 5652 }, { "epoch": 0.8912058933126953, "grad_norm": 0.984375, "learning_rate": 5.303703667813238e-05, "loss": 1.0064, "step": 5653 }, { "epoch": 0.8913635451600883, "grad_norm": 1.0859375, "learning_rate": 5.3032972253149026e-05, "loss": 1.0211, "step": 5654 }, { "epoch": 0.8915211970074813, "grad_norm": 0.99609375, "learning_rate": 5.3028907927709605e-05, "loss": 1.1466, "step": 5655 }, { "epoch": 0.8916788488548744, "grad_norm": 1.0234375, "learning_rate": 5.3024843701822733e-05, "loss": 0.9122, "step": 5656 }, { "epoch": 0.8918365007022673, "grad_norm": 1.3828125, "learning_rate": 5.302077957549698e-05, "loss": 0.8689, "step": 5657 }, { "epoch": 0.8919941525496603, "grad_norm": 1.125, "learning_rate": 5.3016715548741036e-05, "loss": 1.0993, "step": 5658 }, { "epoch": 0.8921518043970533, "grad_norm": 0.90625, "learning_rate": 5.301265162156348e-05, "loss": 0.9303, "step": 5659 }, { "epoch": 0.8923094562444464, "grad_norm": 0.93359375, "learning_rate": 5.300858779397293e-05, "loss": 0.9972, "step": 5660 }, { "epoch": 0.8924671080918394, "grad_norm": 1.125, "learning_rate": 5.300452406597798e-05, "loss": 1.0082, "step": 5661 }, { "epoch": 0.8926247599392324, "grad_norm": 0.8828125, "learning_rate": 5.300046043758722e-05, "loss": 0.8564, "step": 5662 }, { "epoch": 0.8927824117866254, "grad_norm": 0.921875, "learning_rate": 5.299639690880933e-05, "loss": 1.0099, "step": 5663 }, { "epoch": 0.8929400636340185, "grad_norm": 0.9921875, "learning_rate": 5.299233347965291e-05, "loss": 1.0481, "step": 5664 }, { "epoch": 0.8930977154814114, "grad_norm": 1.0234375, "learning_rate": 5.298827015012653e-05, "loss": 0.9609, "step": 5665 }, { "epoch": 0.8932553673288044, "grad_norm": 0.984375, "learning_rate": 5.2984206920238844e-05, "loss": 0.9816, "step": 5666 }, { "epoch": 0.8934130191761974, "grad_norm": 1.0625, "learning_rate": 5.298014378999838e-05, "loss": 1.0295, "step": 5667 }, { "epoch": 0.8935706710235904, "grad_norm": 0.87890625, "learning_rate": 5.297608075941387e-05, "loss": 0.8711, "step": 5668 }, { "epoch": 0.8937283228709835, "grad_norm": 0.85546875, "learning_rate": 5.297201782849388e-05, "loss": 1.0331, "step": 5669 }, { "epoch": 0.8938859747183765, "grad_norm": 0.98046875, "learning_rate": 5.2967954997246996e-05, "loss": 1.0351, "step": 5670 }, { "epoch": 0.8940436265657695, "grad_norm": 0.94140625, "learning_rate": 5.296389226568184e-05, "loss": 1.0203, "step": 5671 }, { "epoch": 0.8942012784131625, "grad_norm": 0.9296875, "learning_rate": 5.295982963380699e-05, "loss": 1.1639, "step": 5672 }, { "epoch": 0.8943589302605555, "grad_norm": 0.96875, "learning_rate": 5.2955767101631135e-05, "loss": 0.9663, "step": 5673 }, { "epoch": 0.8945165821079485, "grad_norm": 0.859375, "learning_rate": 5.295170466916284e-05, "loss": 0.8075, "step": 5674 }, { "epoch": 0.8946742339553415, "grad_norm": 0.96875, "learning_rate": 5.294764233641072e-05, "loss": 0.9778, "step": 5675 }, { "epoch": 0.8948318858027345, "grad_norm": 0.875, "learning_rate": 5.2943580103383384e-05, "loss": 1.019, "step": 5676 }, { "epoch": 0.8949895376501276, "grad_norm": 0.87890625, "learning_rate": 5.29395179700894e-05, "loss": 0.873, "step": 5677 }, { "epoch": 0.8951471894975206, "grad_norm": 1.5703125, "learning_rate": 5.293545593653746e-05, "loss": 0.7727, "step": 5678 }, { "epoch": 0.8953048413449136, "grad_norm": 0.97265625, "learning_rate": 5.293139400273612e-05, "loss": 0.9591, "step": 5679 }, { "epoch": 0.8954624931923066, "grad_norm": 1.0078125, "learning_rate": 5.292733216869401e-05, "loss": 0.8596, "step": 5680 }, { "epoch": 0.8956201450396996, "grad_norm": 0.86328125, "learning_rate": 5.292327043441971e-05, "loss": 1.0043, "step": 5681 }, { "epoch": 0.8957777968870926, "grad_norm": 0.8984375, "learning_rate": 5.291920879992184e-05, "loss": 0.9112, "step": 5682 }, { "epoch": 0.8959354487344856, "grad_norm": 1.171875, "learning_rate": 5.291514726520903e-05, "loss": 0.9408, "step": 5683 }, { "epoch": 0.8960931005818786, "grad_norm": 1.0078125, "learning_rate": 5.2911085830289885e-05, "loss": 1.0698, "step": 5684 }, { "epoch": 0.8962507524292717, "grad_norm": 0.9453125, "learning_rate": 5.2907024495172994e-05, "loss": 1.0327, "step": 5685 }, { "epoch": 0.8964084042766647, "grad_norm": 0.91796875, "learning_rate": 5.290296325986699e-05, "loss": 0.9329, "step": 5686 }, { "epoch": 0.8965660561240577, "grad_norm": 0.87109375, "learning_rate": 5.289890212438045e-05, "loss": 0.8695, "step": 5687 }, { "epoch": 0.8967237079714507, "grad_norm": 0.9609375, "learning_rate": 5.2894841088722005e-05, "loss": 0.7918, "step": 5688 }, { "epoch": 0.8968813598188436, "grad_norm": 1.0078125, "learning_rate": 5.289078015290023e-05, "loss": 0.8638, "step": 5689 }, { "epoch": 0.8970390116662367, "grad_norm": 0.98828125, "learning_rate": 5.288671931692377e-05, "loss": 0.9629, "step": 5690 }, { "epoch": 0.8971966635136297, "grad_norm": 0.9375, "learning_rate": 5.2882658580801215e-05, "loss": 0.9847, "step": 5691 }, { "epoch": 0.8973543153610227, "grad_norm": 0.87890625, "learning_rate": 5.287859794454113e-05, "loss": 0.8517, "step": 5692 }, { "epoch": 0.8975119672084158, "grad_norm": 1.0546875, "learning_rate": 5.28745374081522e-05, "loss": 1.1116, "step": 5693 }, { "epoch": 0.8976696190558088, "grad_norm": 1.0390625, "learning_rate": 5.2870476971643e-05, "loss": 0.9708, "step": 5694 }, { "epoch": 0.8978272709032018, "grad_norm": 0.96875, "learning_rate": 5.286641663502214e-05, "loss": 1.0561, "step": 5695 }, { "epoch": 0.8979849227505948, "grad_norm": 1.015625, "learning_rate": 5.28623563982982e-05, "loss": 1.0881, "step": 5696 }, { "epoch": 0.8981425745979877, "grad_norm": 0.92578125, "learning_rate": 5.2858296261479766e-05, "loss": 1.3231, "step": 5697 }, { "epoch": 0.8983002264453808, "grad_norm": 1.09375, "learning_rate": 5.2854236224575524e-05, "loss": 1.0912, "step": 5698 }, { "epoch": 0.8984578782927738, "grad_norm": 0.98828125, "learning_rate": 5.285017628759403e-05, "loss": 1.1144, "step": 5699 }, { "epoch": 0.8986155301401668, "grad_norm": 0.9453125, "learning_rate": 5.284611645054389e-05, "loss": 0.8805, "step": 5700 }, { "epoch": 0.8987731819875598, "grad_norm": 1.328125, "learning_rate": 5.2842056713433706e-05, "loss": 0.9397, "step": 5701 }, { "epoch": 0.8989308338349529, "grad_norm": 1.5, "learning_rate": 5.283799707627206e-05, "loss": 1.2732, "step": 5702 }, { "epoch": 0.8990884856823459, "grad_norm": 0.9296875, "learning_rate": 5.2833937539067614e-05, "loss": 0.9459, "step": 5703 }, { "epoch": 0.8992461375297389, "grad_norm": 0.91015625, "learning_rate": 5.2829878101828946e-05, "loss": 0.9582, "step": 5704 }, { "epoch": 0.8994037893771318, "grad_norm": 0.92578125, "learning_rate": 5.282581876456465e-05, "loss": 1.0042, "step": 5705 }, { "epoch": 0.8995614412245249, "grad_norm": 0.9140625, "learning_rate": 5.282175952728334e-05, "loss": 0.7642, "step": 5706 }, { "epoch": 0.8997190930719179, "grad_norm": 0.953125, "learning_rate": 5.281770038999356e-05, "loss": 0.9775, "step": 5707 }, { "epoch": 0.8998767449193109, "grad_norm": 0.94140625, "learning_rate": 5.2813641352704036e-05, "loss": 1.102, "step": 5708 }, { "epoch": 0.9000343967667039, "grad_norm": 0.91015625, "learning_rate": 5.2809582415423285e-05, "loss": 0.8249, "step": 5709 }, { "epoch": 0.900192048614097, "grad_norm": 0.96484375, "learning_rate": 5.280552357815992e-05, "loss": 1.1963, "step": 5710 }, { "epoch": 0.90034970046149, "grad_norm": 2.328125, "learning_rate": 5.280146484092257e-05, "loss": 0.8222, "step": 5711 }, { "epoch": 0.900507352308883, "grad_norm": 0.98828125, "learning_rate": 5.279740620371976e-05, "loss": 1.0591, "step": 5712 }, { "epoch": 0.9006650041562759, "grad_norm": 0.98828125, "learning_rate": 5.279334766656019e-05, "loss": 1.0861, "step": 5713 }, { "epoch": 0.900822656003669, "grad_norm": 0.89453125, "learning_rate": 5.278928922945243e-05, "loss": 0.9344, "step": 5714 }, { "epoch": 0.900980307851062, "grad_norm": 0.90625, "learning_rate": 5.278523089240506e-05, "loss": 0.8548, "step": 5715 }, { "epoch": 0.901137959698455, "grad_norm": 0.953125, "learning_rate": 5.27811726554267e-05, "loss": 1.1744, "step": 5716 }, { "epoch": 0.901295611545848, "grad_norm": 1.0234375, "learning_rate": 5.2777114518525915e-05, "loss": 1.1203, "step": 5717 }, { "epoch": 0.901453263393241, "grad_norm": 0.984375, "learning_rate": 5.277305648171138e-05, "loss": 1.0783, "step": 5718 }, { "epoch": 0.9016109152406341, "grad_norm": 0.984375, "learning_rate": 5.276899854499164e-05, "loss": 0.8651, "step": 5719 }, { "epoch": 0.9017685670880271, "grad_norm": 0.89453125, "learning_rate": 5.276494070837531e-05, "loss": 0.8279, "step": 5720 }, { "epoch": 0.90192621893542, "grad_norm": 0.96484375, "learning_rate": 5.276088297187098e-05, "loss": 1.0883, "step": 5721 }, { "epoch": 0.902083870782813, "grad_norm": 0.8203125, "learning_rate": 5.275682533548724e-05, "loss": 0.7935, "step": 5722 }, { "epoch": 0.9022415226302061, "grad_norm": 1.046875, "learning_rate": 5.275276779923273e-05, "loss": 1.085, "step": 5723 }, { "epoch": 0.9023991744775991, "grad_norm": 0.94921875, "learning_rate": 5.274871036311604e-05, "loss": 0.9311, "step": 5724 }, { "epoch": 0.9025568263249921, "grad_norm": 0.91015625, "learning_rate": 5.2744653027145754e-05, "loss": 0.9166, "step": 5725 }, { "epoch": 0.9027144781723851, "grad_norm": 1.0234375, "learning_rate": 5.274059579133047e-05, "loss": 1.0862, "step": 5726 }, { "epoch": 0.9028721300197782, "grad_norm": 0.88671875, "learning_rate": 5.27365386556788e-05, "loss": 0.9073, "step": 5727 }, { "epoch": 0.9030297818671712, "grad_norm": 0.921875, "learning_rate": 5.2732481620199325e-05, "loss": 0.9277, "step": 5728 }, { "epoch": 0.9031874337145641, "grad_norm": 0.94140625, "learning_rate": 5.272842468490067e-05, "loss": 0.9981, "step": 5729 }, { "epoch": 0.9033450855619571, "grad_norm": 0.9140625, "learning_rate": 5.272436784979136e-05, "loss": 0.8866, "step": 5730 }, { "epoch": 0.9035027374093502, "grad_norm": 0.9296875, "learning_rate": 5.272031111488009e-05, "loss": 0.9711, "step": 5731 }, { "epoch": 0.9036603892567432, "grad_norm": 0.9765625, "learning_rate": 5.271625448017543e-05, "loss": 0.9463, "step": 5732 }, { "epoch": 0.9038180411041362, "grad_norm": 0.9296875, "learning_rate": 5.271219794568596e-05, "loss": 0.7304, "step": 5733 }, { "epoch": 0.9039756929515292, "grad_norm": 1.0703125, "learning_rate": 5.2708141511420275e-05, "loss": 1.0453, "step": 5734 }, { "epoch": 0.9041333447989223, "grad_norm": 0.921875, "learning_rate": 5.2704085177386996e-05, "loss": 0.9152, "step": 5735 }, { "epoch": 0.9042909966463153, "grad_norm": 0.9765625, "learning_rate": 5.270002894359469e-05, "loss": 1.0095, "step": 5736 }, { "epoch": 0.9044486484937082, "grad_norm": 0.98828125, "learning_rate": 5.2695972810051945e-05, "loss": 1.0509, "step": 5737 }, { "epoch": 0.9046063003411012, "grad_norm": 0.95703125, "learning_rate": 5.269191677676742e-05, "loss": 0.9205, "step": 5738 }, { "epoch": 0.9047639521884943, "grad_norm": 0.92578125, "learning_rate": 5.2687860843749656e-05, "loss": 1.138, "step": 5739 }, { "epoch": 0.9049216040358873, "grad_norm": 1.03125, "learning_rate": 5.2683805011007294e-05, "loss": 1.2047, "step": 5740 }, { "epoch": 0.9050792558832803, "grad_norm": 1.0625, "learning_rate": 5.267974927854888e-05, "loss": 1.0533, "step": 5741 }, { "epoch": 0.9052369077306733, "grad_norm": 0.9921875, "learning_rate": 5.267569364638301e-05, "loss": 0.8819, "step": 5742 }, { "epoch": 0.9053945595780664, "grad_norm": 1.046875, "learning_rate": 5.2671638114518315e-05, "loss": 0.857, "step": 5743 }, { "epoch": 0.9055522114254594, "grad_norm": 0.953125, "learning_rate": 5.266758268296341e-05, "loss": 0.946, "step": 5744 }, { "epoch": 0.9057098632728523, "grad_norm": 0.91015625, "learning_rate": 5.2663527351726835e-05, "loss": 0.8963, "step": 5745 }, { "epoch": 0.9058675151202453, "grad_norm": 1.0546875, "learning_rate": 5.265947212081722e-05, "loss": 0.9654, "step": 5746 }, { "epoch": 0.9060251669676384, "grad_norm": 0.953125, "learning_rate": 5.26554169902431e-05, "loss": 0.8807, "step": 5747 }, { "epoch": 0.9061828188150314, "grad_norm": 1.0390625, "learning_rate": 5.2651361960013165e-05, "loss": 0.9528, "step": 5748 }, { "epoch": 0.9063404706624244, "grad_norm": 0.91796875, "learning_rate": 5.264730703013596e-05, "loss": 0.9488, "step": 5749 }, { "epoch": 0.9064981225098174, "grad_norm": 1.0390625, "learning_rate": 5.2643252200620086e-05, "loss": 1.0399, "step": 5750 }, { "epoch": 0.9066557743572105, "grad_norm": 0.98828125, "learning_rate": 5.2639197471474125e-05, "loss": 1.0373, "step": 5751 }, { "epoch": 0.9068134262046035, "grad_norm": 0.96484375, "learning_rate": 5.2635142842706644e-05, "loss": 0.9129, "step": 5752 }, { "epoch": 0.9069710780519964, "grad_norm": 0.84765625, "learning_rate": 5.26310883143263e-05, "loss": 0.7311, "step": 5753 }, { "epoch": 0.9071287298993894, "grad_norm": 0.8359375, "learning_rate": 5.262703388634167e-05, "loss": 0.7276, "step": 5754 }, { "epoch": 0.9072863817467824, "grad_norm": 0.82421875, "learning_rate": 5.262297955876132e-05, "loss": 0.9429, "step": 5755 }, { "epoch": 0.9074440335941755, "grad_norm": 1.015625, "learning_rate": 5.261892533159387e-05, "loss": 1.1244, "step": 5756 }, { "epoch": 0.9076016854415685, "grad_norm": 0.96484375, "learning_rate": 5.261487120484785e-05, "loss": 1.0323, "step": 5757 }, { "epoch": 0.9077593372889615, "grad_norm": 1.0625, "learning_rate": 5.2610817178531954e-05, "loss": 1.089, "step": 5758 }, { "epoch": 0.9079169891363545, "grad_norm": 0.90234375, "learning_rate": 5.2606763252654713e-05, "loss": 1.1197, "step": 5759 }, { "epoch": 0.9080746409837476, "grad_norm": 0.9296875, "learning_rate": 5.2602709427224725e-05, "loss": 1.0397, "step": 5760 }, { "epoch": 0.9082322928311405, "grad_norm": 0.9765625, "learning_rate": 5.25986557022506e-05, "loss": 0.8965, "step": 5761 }, { "epoch": 0.9083899446785335, "grad_norm": 1.015625, "learning_rate": 5.259460207774085e-05, "loss": 0.9575, "step": 5762 }, { "epoch": 0.9085475965259265, "grad_norm": 1.109375, "learning_rate": 5.25905485537042e-05, "loss": 0.8594, "step": 5763 }, { "epoch": 0.9087052483733196, "grad_norm": 0.91015625, "learning_rate": 5.258649513014915e-05, "loss": 0.969, "step": 5764 }, { "epoch": 0.9088629002207126, "grad_norm": 1.046875, "learning_rate": 5.2582441807084324e-05, "loss": 1.2605, "step": 5765 }, { "epoch": 0.9090205520681056, "grad_norm": 0.9296875, "learning_rate": 5.25783885845183e-05, "loss": 0.9492, "step": 5766 }, { "epoch": 0.9091782039154986, "grad_norm": 0.91796875, "learning_rate": 5.2574335462459665e-05, "loss": 0.8452, "step": 5767 }, { "epoch": 0.9093358557628917, "grad_norm": 0.94921875, "learning_rate": 5.2570282440916995e-05, "loss": 0.8894, "step": 5768 }, { "epoch": 0.9094935076102846, "grad_norm": 0.97265625, "learning_rate": 5.256622951989893e-05, "loss": 0.9056, "step": 5769 }, { "epoch": 0.9096511594576776, "grad_norm": 0.94140625, "learning_rate": 5.2562176699414014e-05, "loss": 0.8731, "step": 5770 }, { "epoch": 0.9098088113050706, "grad_norm": 2.390625, "learning_rate": 5.255812397947086e-05, "loss": 1.0679, "step": 5771 }, { "epoch": 0.9099664631524637, "grad_norm": 0.9921875, "learning_rate": 5.255407136007806e-05, "loss": 1.0462, "step": 5772 }, { "epoch": 0.9101241149998567, "grad_norm": 0.87890625, "learning_rate": 5.2550018841244195e-05, "loss": 1.013, "step": 5773 }, { "epoch": 0.9102817668472497, "grad_norm": 1.0234375, "learning_rate": 5.254596642297786e-05, "loss": 1.1781, "step": 5774 }, { "epoch": 0.9104394186946427, "grad_norm": 0.9609375, "learning_rate": 5.2541914105287616e-05, "loss": 0.9664, "step": 5775 }, { "epoch": 0.9105970705420358, "grad_norm": 0.97265625, "learning_rate": 5.253786188818208e-05, "loss": 1.0172, "step": 5776 }, { "epoch": 0.9107547223894287, "grad_norm": 0.96484375, "learning_rate": 5.253380977166984e-05, "loss": 0.8114, "step": 5777 }, { "epoch": 0.9109123742368217, "grad_norm": 0.984375, "learning_rate": 5.252975775575944e-05, "loss": 1.0404, "step": 5778 }, { "epoch": 0.9110700260842147, "grad_norm": 0.890625, "learning_rate": 5.252570584045953e-05, "loss": 0.9843, "step": 5779 }, { "epoch": 0.9112276779316077, "grad_norm": 0.9296875, "learning_rate": 5.2521654025778685e-05, "loss": 1.0182, "step": 5780 }, { "epoch": 0.9113853297790008, "grad_norm": 0.78515625, "learning_rate": 5.2517602311725464e-05, "loss": 0.8779, "step": 5781 }, { "epoch": 0.9115429816263938, "grad_norm": 1.015625, "learning_rate": 5.25135506983085e-05, "loss": 1.0808, "step": 5782 }, { "epoch": 0.9117006334737868, "grad_norm": 0.96484375, "learning_rate": 5.250949918553629e-05, "loss": 0.9974, "step": 5783 }, { "epoch": 0.9118582853211799, "grad_norm": 0.9921875, "learning_rate": 5.2505447773417526e-05, "loss": 1.0804, "step": 5784 }, { "epoch": 0.9120159371685728, "grad_norm": 0.94921875, "learning_rate": 5.250139646196075e-05, "loss": 1.1376, "step": 5785 }, { "epoch": 0.9121735890159658, "grad_norm": 0.9296875, "learning_rate": 5.2497345251174555e-05, "loss": 0.8969, "step": 5786 }, { "epoch": 0.9123312408633588, "grad_norm": 0.91796875, "learning_rate": 5.2493294141067514e-05, "loss": 0.962, "step": 5787 }, { "epoch": 0.9124888927107518, "grad_norm": 0.83984375, "learning_rate": 5.248924313164818e-05, "loss": 0.8887, "step": 5788 }, { "epoch": 0.9126465445581449, "grad_norm": 1.140625, "learning_rate": 5.248519222292523e-05, "loss": 1.2643, "step": 5789 }, { "epoch": 0.9128041964055379, "grad_norm": 0.98828125, "learning_rate": 5.2481141414907184e-05, "loss": 0.8951, "step": 5790 }, { "epoch": 0.9129618482529309, "grad_norm": 0.9453125, "learning_rate": 5.247709070760265e-05, "loss": 1.0097, "step": 5791 }, { "epoch": 0.9131195001003239, "grad_norm": 0.91015625, "learning_rate": 5.247304010102021e-05, "loss": 0.8005, "step": 5792 }, { "epoch": 0.913277151947717, "grad_norm": 1.765625, "learning_rate": 5.2468989595168404e-05, "loss": 1.0551, "step": 5793 }, { "epoch": 0.9134348037951099, "grad_norm": 1.09375, "learning_rate": 5.24649391900559e-05, "loss": 1.1272, "step": 5794 }, { "epoch": 0.9135924556425029, "grad_norm": 0.921875, "learning_rate": 5.246088888569123e-05, "loss": 0.9478, "step": 5795 }, { "epoch": 0.9137501074898959, "grad_norm": 0.98828125, "learning_rate": 5.2456838682083e-05, "loss": 0.9822, "step": 5796 }, { "epoch": 0.913907759337289, "grad_norm": 1.4921875, "learning_rate": 5.2452788579239775e-05, "loss": 0.9619, "step": 5797 }, { "epoch": 0.914065411184682, "grad_norm": 1.0703125, "learning_rate": 5.244873857717011e-05, "loss": 0.9809, "step": 5798 }, { "epoch": 0.914223063032075, "grad_norm": 0.96875, "learning_rate": 5.244468867588267e-05, "loss": 0.9612, "step": 5799 }, { "epoch": 0.914380714879468, "grad_norm": 1.015625, "learning_rate": 5.2440638875386e-05, "loss": 0.9462, "step": 5800 }, { "epoch": 0.9145383667268611, "grad_norm": 0.9609375, "learning_rate": 5.2436589175688655e-05, "loss": 0.9039, "step": 5801 }, { "epoch": 0.914696018574254, "grad_norm": 0.85546875, "learning_rate": 5.243253957679926e-05, "loss": 0.8172, "step": 5802 }, { "epoch": 0.914853670421647, "grad_norm": 0.93359375, "learning_rate": 5.242849007872632e-05, "loss": 0.9576, "step": 5803 }, { "epoch": 0.91501132226904, "grad_norm": 0.98046875, "learning_rate": 5.242444068147854e-05, "loss": 1.0571, "step": 5804 }, { "epoch": 0.915168974116433, "grad_norm": 1.015625, "learning_rate": 5.242039138506443e-05, "loss": 1.0112, "step": 5805 }, { "epoch": 0.9153266259638261, "grad_norm": 1.03125, "learning_rate": 5.241634218949256e-05, "loss": 1.1044, "step": 5806 }, { "epoch": 0.9154842778112191, "grad_norm": 1.0, "learning_rate": 5.2412293094771556e-05, "loss": 1.0583, "step": 5807 }, { "epoch": 0.9156419296586121, "grad_norm": 1.015625, "learning_rate": 5.240824410090994e-05, "loss": 0.946, "step": 5808 }, { "epoch": 0.9157995815060052, "grad_norm": 0.9765625, "learning_rate": 5.240419520791635e-05, "loss": 1.1492, "step": 5809 }, { "epoch": 0.9159572333533981, "grad_norm": 0.90234375, "learning_rate": 5.2400146415799366e-05, "loss": 1.1111, "step": 5810 }, { "epoch": 0.9161148852007911, "grad_norm": 1.03125, "learning_rate": 5.239609772456755e-05, "loss": 1.0249, "step": 5811 }, { "epoch": 0.9162725370481841, "grad_norm": 0.91796875, "learning_rate": 5.239204913422947e-05, "loss": 0.9796, "step": 5812 }, { "epoch": 0.9164301888955771, "grad_norm": 0.8125, "learning_rate": 5.2388000644793735e-05, "loss": 0.8498, "step": 5813 }, { "epoch": 0.9165878407429702, "grad_norm": 0.9921875, "learning_rate": 5.238395225626891e-05, "loss": 1.0396, "step": 5814 }, { "epoch": 0.9167454925903632, "grad_norm": 0.94921875, "learning_rate": 5.237990396866357e-05, "loss": 0.985, "step": 5815 }, { "epoch": 0.9169031444377562, "grad_norm": 1.0234375, "learning_rate": 5.237585578198626e-05, "loss": 1.0859, "step": 5816 }, { "epoch": 0.9170607962851492, "grad_norm": 0.89453125, "learning_rate": 5.2371807696245644e-05, "loss": 1.1208, "step": 5817 }, { "epoch": 0.9172184481325422, "grad_norm": 1.0390625, "learning_rate": 5.236775971145026e-05, "loss": 1.0906, "step": 5818 }, { "epoch": 0.9173760999799352, "grad_norm": 0.8828125, "learning_rate": 5.2363711827608684e-05, "loss": 0.7741, "step": 5819 }, { "epoch": 0.9175337518273282, "grad_norm": 1.015625, "learning_rate": 5.235966404472951e-05, "loss": 0.9896, "step": 5820 }, { "epoch": 0.9176914036747212, "grad_norm": 0.91015625, "learning_rate": 5.235561636282129e-05, "loss": 0.8187, "step": 5821 }, { "epoch": 0.9178490555221143, "grad_norm": 1.0703125, "learning_rate": 5.2351568781892625e-05, "loss": 1.0774, "step": 5822 }, { "epoch": 0.9180067073695073, "grad_norm": 1.03125, "learning_rate": 5.234752130195205e-05, "loss": 1.1198, "step": 5823 }, { "epoch": 0.9181643592169003, "grad_norm": 0.91015625, "learning_rate": 5.234347392300822e-05, "loss": 0.7836, "step": 5824 }, { "epoch": 0.9183220110642933, "grad_norm": 1.125, "learning_rate": 5.2339426645069664e-05, "loss": 1.1228, "step": 5825 }, { "epoch": 0.9184796629116863, "grad_norm": 1.1484375, "learning_rate": 5.2335379468144976e-05, "loss": 0.6992, "step": 5826 }, { "epoch": 0.9186373147590793, "grad_norm": 1.015625, "learning_rate": 5.233133239224273e-05, "loss": 1.0232, "step": 5827 }, { "epoch": 0.9187949666064723, "grad_norm": 1.2421875, "learning_rate": 5.2327285417371455e-05, "loss": 0.8242, "step": 5828 }, { "epoch": 0.9189526184538653, "grad_norm": 0.95703125, "learning_rate": 5.232323854353981e-05, "loss": 0.9238, "step": 5829 }, { "epoch": 0.9191102703012584, "grad_norm": 0.9921875, "learning_rate": 5.231919177075634e-05, "loss": 0.9631, "step": 5830 }, { "epoch": 0.9192679221486514, "grad_norm": 0.91796875, "learning_rate": 5.231514509902962e-05, "loss": 0.9566, "step": 5831 }, { "epoch": 0.9194255739960444, "grad_norm": 0.8671875, "learning_rate": 5.231109852836822e-05, "loss": 0.8354, "step": 5832 }, { "epoch": 0.9195832258434374, "grad_norm": 0.9765625, "learning_rate": 5.230705205878068e-05, "loss": 1.2012, "step": 5833 }, { "epoch": 0.9197408776908303, "grad_norm": 0.94140625, "learning_rate": 5.230300569027566e-05, "loss": 1.105, "step": 5834 }, { "epoch": 0.9198985295382234, "grad_norm": 0.8046875, "learning_rate": 5.229895942286169e-05, "loss": 0.7986, "step": 5835 }, { "epoch": 0.9200561813856164, "grad_norm": 0.99609375, "learning_rate": 5.229491325654736e-05, "loss": 1.1218, "step": 5836 }, { "epoch": 0.9202138332330094, "grad_norm": 0.89453125, "learning_rate": 5.229086719134122e-05, "loss": 0.9815, "step": 5837 }, { "epoch": 0.9203714850804025, "grad_norm": 1.0390625, "learning_rate": 5.228682122725184e-05, "loss": 1.1044, "step": 5838 }, { "epoch": 0.9205291369277955, "grad_norm": 0.81640625, "learning_rate": 5.228277536428783e-05, "loss": 0.9445, "step": 5839 }, { "epoch": 0.9206867887751885, "grad_norm": 0.98046875, "learning_rate": 5.2278729602457765e-05, "loss": 1.0341, "step": 5840 }, { "epoch": 0.9208444406225815, "grad_norm": 0.9140625, "learning_rate": 5.227468394177021e-05, "loss": 1.0504, "step": 5841 }, { "epoch": 0.9210020924699744, "grad_norm": 0.82421875, "learning_rate": 5.227063838223372e-05, "loss": 0.8376, "step": 5842 }, { "epoch": 0.9211597443173675, "grad_norm": 1.03125, "learning_rate": 5.226659292385685e-05, "loss": 1.01, "step": 5843 }, { "epoch": 0.9213173961647605, "grad_norm": 1.0546875, "learning_rate": 5.2262547566648255e-05, "loss": 1.1597, "step": 5844 }, { "epoch": 0.9214750480121535, "grad_norm": 1.03125, "learning_rate": 5.2258502310616455e-05, "loss": 1.0583, "step": 5845 }, { "epoch": 0.9216326998595465, "grad_norm": 1.0859375, "learning_rate": 5.225445715577003e-05, "loss": 0.9475, "step": 5846 }, { "epoch": 0.9217903517069396, "grad_norm": 0.96875, "learning_rate": 5.2250412102117564e-05, "loss": 0.8149, "step": 5847 }, { "epoch": 0.9219480035543326, "grad_norm": 1.0625, "learning_rate": 5.224636714966756e-05, "loss": 1.1223, "step": 5848 }, { "epoch": 0.9221056554017256, "grad_norm": 1.0234375, "learning_rate": 5.2242322298428714e-05, "loss": 0.973, "step": 5849 }, { "epoch": 0.9222633072491185, "grad_norm": 0.89453125, "learning_rate": 5.223827754840952e-05, "loss": 0.9615, "step": 5850 }, { "epoch": 0.9224209590965116, "grad_norm": 0.98828125, "learning_rate": 5.223423289961857e-05, "loss": 0.9123, "step": 5851 }, { "epoch": 0.9225786109439046, "grad_norm": 1.0703125, "learning_rate": 5.2230188352064434e-05, "loss": 1.1603, "step": 5852 }, { "epoch": 0.9227362627912976, "grad_norm": 0.98828125, "learning_rate": 5.222614390575564e-05, "loss": 1.1328, "step": 5853 }, { "epoch": 0.9228939146386906, "grad_norm": 0.89453125, "learning_rate": 5.222209956070085e-05, "loss": 0.8645, "step": 5854 }, { "epoch": 0.9230515664860837, "grad_norm": 0.92578125, "learning_rate": 5.22180553169086e-05, "loss": 0.7007, "step": 5855 }, { "epoch": 0.9232092183334767, "grad_norm": 0.9609375, "learning_rate": 5.2214011174387437e-05, "loss": 0.8474, "step": 5856 }, { "epoch": 0.9233668701808697, "grad_norm": 1.46875, "learning_rate": 5.220996713314594e-05, "loss": 1.0671, "step": 5857 }, { "epoch": 0.9235245220282626, "grad_norm": 1.0546875, "learning_rate": 5.2205923193192695e-05, "loss": 1.0246, "step": 5858 }, { "epoch": 0.9236821738756557, "grad_norm": 0.96875, "learning_rate": 5.220187935453627e-05, "loss": 1.0155, "step": 5859 }, { "epoch": 0.9238398257230487, "grad_norm": 1.34375, "learning_rate": 5.219783561718521e-05, "loss": 0.8627, "step": 5860 }, { "epoch": 0.9239974775704417, "grad_norm": 1.09375, "learning_rate": 5.219379198114812e-05, "loss": 0.9377, "step": 5861 }, { "epoch": 0.9241551294178347, "grad_norm": 0.92578125, "learning_rate": 5.2189748446433564e-05, "loss": 0.9327, "step": 5862 }, { "epoch": 0.9243127812652278, "grad_norm": 0.88671875, "learning_rate": 5.2185705013050045e-05, "loss": 0.9565, "step": 5863 }, { "epoch": 0.9244704331126208, "grad_norm": 1.0390625, "learning_rate": 5.2181661681006245e-05, "loss": 0.9692, "step": 5864 }, { "epoch": 0.9246280849600138, "grad_norm": 1.0625, "learning_rate": 5.217761845031066e-05, "loss": 0.9802, "step": 5865 }, { "epoch": 0.9247857368074067, "grad_norm": 0.8984375, "learning_rate": 5.217357532097189e-05, "loss": 0.8779, "step": 5866 }, { "epoch": 0.9249433886547997, "grad_norm": 0.8671875, "learning_rate": 5.2169532292998505e-05, "loss": 0.8657, "step": 5867 }, { "epoch": 0.9251010405021928, "grad_norm": 0.89453125, "learning_rate": 5.216548936639901e-05, "loss": 0.9938, "step": 5868 }, { "epoch": 0.9252586923495858, "grad_norm": 0.97265625, "learning_rate": 5.2161446541182067e-05, "loss": 0.8194, "step": 5869 }, { "epoch": 0.9254163441969788, "grad_norm": 0.9765625, "learning_rate": 5.215740381735621e-05, "loss": 0.894, "step": 5870 }, { "epoch": 0.9255739960443718, "grad_norm": 0.98046875, "learning_rate": 5.215336119493e-05, "loss": 0.9336, "step": 5871 }, { "epoch": 0.9257316478917649, "grad_norm": 0.86328125, "learning_rate": 5.2149318673912e-05, "loss": 0.8645, "step": 5872 }, { "epoch": 0.9258892997391579, "grad_norm": 0.9765625, "learning_rate": 5.214527625431075e-05, "loss": 0.9279, "step": 5873 }, { "epoch": 0.9260469515865508, "grad_norm": 1.0625, "learning_rate": 5.214123393613489e-05, "loss": 0.8389, "step": 5874 }, { "epoch": 0.9262046034339438, "grad_norm": 1.28125, "learning_rate": 5.213719171939295e-05, "loss": 1.2833, "step": 5875 }, { "epoch": 0.9263622552813369, "grad_norm": 0.92578125, "learning_rate": 5.21331496040935e-05, "loss": 0.9594, "step": 5876 }, { "epoch": 0.9265199071287299, "grad_norm": 1.0234375, "learning_rate": 5.21291075902451e-05, "loss": 1.077, "step": 5877 }, { "epoch": 0.9266775589761229, "grad_norm": 0.96875, "learning_rate": 5.212506567785627e-05, "loss": 0.8524, "step": 5878 }, { "epoch": 0.9268352108235159, "grad_norm": 0.88671875, "learning_rate": 5.212102386693567e-05, "loss": 0.7821, "step": 5879 }, { "epoch": 0.926992862670909, "grad_norm": 1.015625, "learning_rate": 5.211698215749183e-05, "loss": 0.9558, "step": 5880 }, { "epoch": 0.927150514518302, "grad_norm": 0.89453125, "learning_rate": 5.211294054953331e-05, "loss": 0.9101, "step": 5881 }, { "epoch": 0.9273081663656949, "grad_norm": 0.984375, "learning_rate": 5.210889904306868e-05, "loss": 1.0595, "step": 5882 }, { "epoch": 0.9274658182130879, "grad_norm": 0.98046875, "learning_rate": 5.2104857638106456e-05, "loss": 0.9815, "step": 5883 }, { "epoch": 0.927623470060481, "grad_norm": 1.0390625, "learning_rate": 5.210081633465529e-05, "loss": 1.0831, "step": 5884 }, { "epoch": 0.927781121907874, "grad_norm": 0.8828125, "learning_rate": 5.20967751327237e-05, "loss": 1.1076, "step": 5885 }, { "epoch": 0.927938773755267, "grad_norm": 0.9765625, "learning_rate": 5.2092734032320266e-05, "loss": 0.9915, "step": 5886 }, { "epoch": 0.92809642560266, "grad_norm": 1.0390625, "learning_rate": 5.208869303345354e-05, "loss": 1.1267, "step": 5887 }, { "epoch": 0.9282540774500531, "grad_norm": 0.95703125, "learning_rate": 5.2084652136132054e-05, "loss": 0.8463, "step": 5888 }, { "epoch": 0.9284117292974461, "grad_norm": 0.8984375, "learning_rate": 5.2080611340364436e-05, "loss": 0.9818, "step": 5889 }, { "epoch": 0.928569381144839, "grad_norm": 1.234375, "learning_rate": 5.2076570646159226e-05, "loss": 1.1022, "step": 5890 }, { "epoch": 0.928727032992232, "grad_norm": 0.99609375, "learning_rate": 5.207253005352499e-05, "loss": 1.0835, "step": 5891 }, { "epoch": 0.928884684839625, "grad_norm": 0.9765625, "learning_rate": 5.206848956247029e-05, "loss": 0.9519, "step": 5892 }, { "epoch": 0.9290423366870181, "grad_norm": 0.9375, "learning_rate": 5.206444917300365e-05, "loss": 0.988, "step": 5893 }, { "epoch": 0.9291999885344111, "grad_norm": 0.875, "learning_rate": 5.2060408885133705e-05, "loss": 0.9596, "step": 5894 }, { "epoch": 0.9293576403818041, "grad_norm": 1.015625, "learning_rate": 5.2056368698868986e-05, "loss": 1.039, "step": 5895 }, { "epoch": 0.9295152922291972, "grad_norm": 0.9453125, "learning_rate": 5.205232861421805e-05, "loss": 0.9141, "step": 5896 }, { "epoch": 0.9296729440765902, "grad_norm": 1.6328125, "learning_rate": 5.204828863118946e-05, "loss": 1.0132, "step": 5897 }, { "epoch": 0.9298305959239831, "grad_norm": 0.953125, "learning_rate": 5.204424874979179e-05, "loss": 0.8834, "step": 5898 }, { "epoch": 0.9299882477713761, "grad_norm": 0.94140625, "learning_rate": 5.204020897003359e-05, "loss": 0.8777, "step": 5899 }, { "epoch": 0.9301458996187691, "grad_norm": 0.8984375, "learning_rate": 5.203616929192341e-05, "loss": 0.8955, "step": 5900 }, { "epoch": 0.9303035514661622, "grad_norm": 1.03125, "learning_rate": 5.203212971546981e-05, "loss": 0.9617, "step": 5901 }, { "epoch": 0.9304612033135552, "grad_norm": 0.890625, "learning_rate": 5.202809024068141e-05, "loss": 0.8958, "step": 5902 }, { "epoch": 0.9306188551609482, "grad_norm": 0.98046875, "learning_rate": 5.202405086756672e-05, "loss": 0.9283, "step": 5903 }, { "epoch": 0.9307765070083412, "grad_norm": 0.83984375, "learning_rate": 5.202001159613431e-05, "loss": 0.7725, "step": 5904 }, { "epoch": 0.9309341588557343, "grad_norm": 0.9296875, "learning_rate": 5.201597242639275e-05, "loss": 0.8305, "step": 5905 }, { "epoch": 0.9310918107031272, "grad_norm": 0.9765625, "learning_rate": 5.2011933358350596e-05, "loss": 0.9733, "step": 5906 }, { "epoch": 0.9312494625505202, "grad_norm": 0.86328125, "learning_rate": 5.2007894392016386e-05, "loss": 0.7721, "step": 5907 }, { "epoch": 0.9314071143979132, "grad_norm": 1.078125, "learning_rate": 5.2003855527398684e-05, "loss": 0.9208, "step": 5908 }, { "epoch": 0.9315647662453063, "grad_norm": 1.0390625, "learning_rate": 5.1999816764506094e-05, "loss": 0.9847, "step": 5909 }, { "epoch": 0.9317224180926993, "grad_norm": 1.03125, "learning_rate": 5.199577810334716e-05, "loss": 0.8112, "step": 5910 }, { "epoch": 0.9318800699400923, "grad_norm": 0.93359375, "learning_rate": 5.199173954393042e-05, "loss": 0.8262, "step": 5911 }, { "epoch": 0.9320377217874853, "grad_norm": 1.0078125, "learning_rate": 5.198770108626445e-05, "loss": 1.0527, "step": 5912 }, { "epoch": 0.9321953736348784, "grad_norm": 0.984375, "learning_rate": 5.198366273035781e-05, "loss": 1.2363, "step": 5913 }, { "epoch": 0.9323530254822713, "grad_norm": 1.546875, "learning_rate": 5.1979624476219004e-05, "loss": 0.871, "step": 5914 }, { "epoch": 0.9325106773296643, "grad_norm": 1.0859375, "learning_rate": 5.197558632385667e-05, "loss": 0.8931, "step": 5915 }, { "epoch": 0.9326683291770573, "grad_norm": 1.0546875, "learning_rate": 5.1971548273279344e-05, "loss": 0.8731, "step": 5916 }, { "epoch": 0.9328259810244504, "grad_norm": 0.9296875, "learning_rate": 5.1967510324495585e-05, "loss": 1.0519, "step": 5917 }, { "epoch": 0.9329836328718434, "grad_norm": 0.96875, "learning_rate": 5.1963472477513945e-05, "loss": 1.0993, "step": 5918 }, { "epoch": 0.9331412847192364, "grad_norm": 0.9765625, "learning_rate": 5.195943473234293e-05, "loss": 0.961, "step": 5919 }, { "epoch": 0.9332989365666294, "grad_norm": 0.87890625, "learning_rate": 5.195539708899118e-05, "loss": 0.9082, "step": 5920 }, { "epoch": 0.9334565884140225, "grad_norm": 1.015625, "learning_rate": 5.1951359547467216e-05, "loss": 0.8568, "step": 5921 }, { "epoch": 0.9336142402614154, "grad_norm": 1.0, "learning_rate": 5.194732210777962e-05, "loss": 1.0848, "step": 5922 }, { "epoch": 0.9337718921088084, "grad_norm": 1.0, "learning_rate": 5.194328476993692e-05, "loss": 0.9581, "step": 5923 }, { "epoch": 0.9339295439562014, "grad_norm": 0.953125, "learning_rate": 5.193924753394763e-05, "loss": 0.9395, "step": 5924 }, { "epoch": 0.9340871958035945, "grad_norm": 0.92578125, "learning_rate": 5.193521039982041e-05, "loss": 0.8488, "step": 5925 }, { "epoch": 0.9342448476509875, "grad_norm": 0.98828125, "learning_rate": 5.193117336756377e-05, "loss": 1.0533, "step": 5926 }, { "epoch": 0.9344024994983805, "grad_norm": 0.9375, "learning_rate": 5.1927136437186255e-05, "loss": 1.03, "step": 5927 }, { "epoch": 0.9345601513457735, "grad_norm": 0.9375, "learning_rate": 5.192309960869641e-05, "loss": 0.9089, "step": 5928 }, { "epoch": 0.9347178031931666, "grad_norm": 0.95703125, "learning_rate": 5.1919062882102786e-05, "loss": 0.9257, "step": 5929 }, { "epoch": 0.9348754550405595, "grad_norm": 1.0625, "learning_rate": 5.1915026257414e-05, "loss": 1.1853, "step": 5930 }, { "epoch": 0.9350331068879525, "grad_norm": 0.88671875, "learning_rate": 5.191098973463855e-05, "loss": 0.7492, "step": 5931 }, { "epoch": 0.9351907587353455, "grad_norm": 0.9453125, "learning_rate": 5.190695331378504e-05, "loss": 0.9495, "step": 5932 }, { "epoch": 0.9353484105827385, "grad_norm": 0.9609375, "learning_rate": 5.190291699486196e-05, "loss": 1.0073, "step": 5933 }, { "epoch": 0.9355060624301316, "grad_norm": 1.0234375, "learning_rate": 5.189888077787788e-05, "loss": 0.9552, "step": 5934 }, { "epoch": 0.9356637142775246, "grad_norm": 0.984375, "learning_rate": 5.18948446628414e-05, "loss": 0.9881, "step": 5935 }, { "epoch": 0.9358213661249176, "grad_norm": 0.92578125, "learning_rate": 5.189080864976106e-05, "loss": 0.9923, "step": 5936 }, { "epoch": 0.9359790179723106, "grad_norm": 0.91015625, "learning_rate": 5.1886772738645394e-05, "loss": 0.9373, "step": 5937 }, { "epoch": 0.9361366698197036, "grad_norm": 0.96875, "learning_rate": 5.1882736929502964e-05, "loss": 1.0164, "step": 5938 }, { "epoch": 0.9362943216670966, "grad_norm": 1.03125, "learning_rate": 5.187870122234228e-05, "loss": 0.9574, "step": 5939 }, { "epoch": 0.9364519735144896, "grad_norm": 0.99609375, "learning_rate": 5.187466561717198e-05, "loss": 1.0283, "step": 5940 }, { "epoch": 0.9366096253618826, "grad_norm": 0.87890625, "learning_rate": 5.187063011400057e-05, "loss": 1.1919, "step": 5941 }, { "epoch": 0.9367672772092757, "grad_norm": 0.953125, "learning_rate": 5.1866594712836615e-05, "loss": 0.9824, "step": 5942 }, { "epoch": 0.9369249290566687, "grad_norm": 0.9609375, "learning_rate": 5.1862559413688664e-05, "loss": 0.8855, "step": 5943 }, { "epoch": 0.9370825809040617, "grad_norm": 0.9453125, "learning_rate": 5.185852421656526e-05, "loss": 0.9611, "step": 5944 }, { "epoch": 0.9372402327514547, "grad_norm": 0.875, "learning_rate": 5.1854489121474966e-05, "loss": 0.9198, "step": 5945 }, { "epoch": 0.9373978845988478, "grad_norm": 0.90234375, "learning_rate": 5.185045412842633e-05, "loss": 0.9571, "step": 5946 }, { "epoch": 0.9375555364462407, "grad_norm": 1.046875, "learning_rate": 5.18464192374279e-05, "loss": 1.1453, "step": 5947 }, { "epoch": 0.9377131882936337, "grad_norm": 0.88671875, "learning_rate": 5.1842384448488245e-05, "loss": 0.9111, "step": 5948 }, { "epoch": 0.9378708401410267, "grad_norm": 0.9921875, "learning_rate": 5.1838349761615857e-05, "loss": 1.0392, "step": 5949 }, { "epoch": 0.9380284919884198, "grad_norm": 0.91796875, "learning_rate": 5.183431517681937e-05, "loss": 0.879, "step": 5950 }, { "epoch": 0.9381861438358128, "grad_norm": 1.109375, "learning_rate": 5.1830280694107304e-05, "loss": 1.0958, "step": 5951 }, { "epoch": 0.9383437956832058, "grad_norm": 0.984375, "learning_rate": 5.18262463134882e-05, "loss": 1.0192, "step": 5952 }, { "epoch": 0.9385014475305988, "grad_norm": 0.94140625, "learning_rate": 5.1822212034970615e-05, "loss": 0.8064, "step": 5953 }, { "epoch": 0.9386590993779919, "grad_norm": 0.93359375, "learning_rate": 5.1818177858563066e-05, "loss": 0.942, "step": 5954 }, { "epoch": 0.9388167512253848, "grad_norm": 0.95703125, "learning_rate": 5.181414378427416e-05, "loss": 1.0061, "step": 5955 }, { "epoch": 0.9389744030727778, "grad_norm": 0.98046875, "learning_rate": 5.181010981211243e-05, "loss": 1.0374, "step": 5956 }, { "epoch": 0.9391320549201708, "grad_norm": 0.8828125, "learning_rate": 5.180607594208642e-05, "loss": 0.9151, "step": 5957 }, { "epoch": 0.9392897067675638, "grad_norm": 0.9140625, "learning_rate": 5.180204217420468e-05, "loss": 0.9625, "step": 5958 }, { "epoch": 0.9394473586149569, "grad_norm": 0.89453125, "learning_rate": 5.179800850847572e-05, "loss": 0.9019, "step": 5959 }, { "epoch": 0.9396050104623499, "grad_norm": 0.91796875, "learning_rate": 5.179397494490814e-05, "loss": 0.9473, "step": 5960 }, { "epoch": 0.9397626623097429, "grad_norm": 0.8671875, "learning_rate": 5.17899414835105e-05, "loss": 0.9781, "step": 5961 }, { "epoch": 0.939920314157136, "grad_norm": 0.9140625, "learning_rate": 5.1785908124291314e-05, "loss": 0.8964, "step": 5962 }, { "epoch": 0.9400779660045289, "grad_norm": 0.8671875, "learning_rate": 5.178187486725914e-05, "loss": 0.7881, "step": 5963 }, { "epoch": 0.9402356178519219, "grad_norm": 0.8984375, "learning_rate": 5.177784171242248e-05, "loss": 1.0487, "step": 5964 }, { "epoch": 0.9403932696993149, "grad_norm": 0.9609375, "learning_rate": 5.177380865978998e-05, "loss": 1.0007, "step": 5965 }, { "epoch": 0.9405509215467079, "grad_norm": 0.88671875, "learning_rate": 5.1769775709370136e-05, "loss": 0.876, "step": 5966 }, { "epoch": 0.940708573394101, "grad_norm": 0.984375, "learning_rate": 5.17657428611715e-05, "loss": 0.9164, "step": 5967 }, { "epoch": 0.940866225241494, "grad_norm": 1.0703125, "learning_rate": 5.1761710115202597e-05, "loss": 1.0681, "step": 5968 }, { "epoch": 0.941023877088887, "grad_norm": 1.0234375, "learning_rate": 5.175767747147197e-05, "loss": 1.197, "step": 5969 }, { "epoch": 0.94118152893628, "grad_norm": 0.9296875, "learning_rate": 5.175364492998822e-05, "loss": 0.8356, "step": 5970 }, { "epoch": 0.941339180783673, "grad_norm": 0.89453125, "learning_rate": 5.174961249075986e-05, "loss": 0.8718, "step": 5971 }, { "epoch": 0.941496832631066, "grad_norm": 0.93359375, "learning_rate": 5.174558015379545e-05, "loss": 0.9766, "step": 5972 }, { "epoch": 0.941654484478459, "grad_norm": 0.921875, "learning_rate": 5.1741547919103506e-05, "loss": 0.9965, "step": 5973 }, { "epoch": 0.941812136325852, "grad_norm": 0.96484375, "learning_rate": 5.173751578669257e-05, "loss": 1.0399, "step": 5974 }, { "epoch": 0.9419697881732451, "grad_norm": 0.84765625, "learning_rate": 5.173348375657123e-05, "loss": 0.7095, "step": 5975 }, { "epoch": 0.9421274400206381, "grad_norm": 0.97265625, "learning_rate": 5.172945182874803e-05, "loss": 1.1426, "step": 5976 }, { "epoch": 0.9422850918680311, "grad_norm": 0.96875, "learning_rate": 5.17254200032315e-05, "loss": 0.9876, "step": 5977 }, { "epoch": 0.9424427437154241, "grad_norm": 1.0078125, "learning_rate": 5.172138828003017e-05, "loss": 0.8665, "step": 5978 }, { "epoch": 0.942600395562817, "grad_norm": 1.078125, "learning_rate": 5.171735665915257e-05, "loss": 1.2725, "step": 5979 }, { "epoch": 0.9427580474102101, "grad_norm": 0.90234375, "learning_rate": 5.171332514060731e-05, "loss": 0.8259, "step": 5980 }, { "epoch": 0.9429156992576031, "grad_norm": 1.015625, "learning_rate": 5.1709293724402896e-05, "loss": 0.9257, "step": 5981 }, { "epoch": 0.9430733511049961, "grad_norm": 0.98828125, "learning_rate": 5.1705262410547875e-05, "loss": 0.842, "step": 5982 }, { "epoch": 0.9432310029523892, "grad_norm": 0.953125, "learning_rate": 5.170123119905078e-05, "loss": 0.786, "step": 5983 }, { "epoch": 0.9433886547997822, "grad_norm": 0.8984375, "learning_rate": 5.1697200089920174e-05, "loss": 0.9072, "step": 5984 }, { "epoch": 0.9435463066471752, "grad_norm": 1.0234375, "learning_rate": 5.169316908316459e-05, "loss": 1.1225, "step": 5985 }, { "epoch": 0.9437039584945682, "grad_norm": 0.8203125, "learning_rate": 5.168913817879257e-05, "loss": 0.8003, "step": 5986 }, { "epoch": 0.9438616103419611, "grad_norm": 0.9765625, "learning_rate": 5.1685107376812625e-05, "loss": 1.0501, "step": 5987 }, { "epoch": 0.9440192621893542, "grad_norm": 1.296875, "learning_rate": 5.168107667723338e-05, "loss": 1.0591, "step": 5988 }, { "epoch": 0.9441769140367472, "grad_norm": 0.92578125, "learning_rate": 5.1677046080063315e-05, "loss": 0.7935, "step": 5989 }, { "epoch": 0.9443345658841402, "grad_norm": 1.0078125, "learning_rate": 5.1673015585311e-05, "loss": 0.9711, "step": 5990 }, { "epoch": 0.9444922177315332, "grad_norm": 1.0625, "learning_rate": 5.1668985192984966e-05, "loss": 1.0681, "step": 5991 }, { "epoch": 0.9446498695789263, "grad_norm": 0.88671875, "learning_rate": 5.166495490309376e-05, "loss": 0.8749, "step": 5992 }, { "epoch": 0.9448075214263193, "grad_norm": 0.9921875, "learning_rate": 5.16609247156459e-05, "loss": 1.0906, "step": 5993 }, { "epoch": 0.9449651732737123, "grad_norm": 0.88671875, "learning_rate": 5.1656894630649924e-05, "loss": 0.892, "step": 5994 }, { "epoch": 0.9451228251211052, "grad_norm": 0.84765625, "learning_rate": 5.165286464811443e-05, "loss": 0.9334, "step": 5995 }, { "epoch": 0.9452804769684983, "grad_norm": 0.83203125, "learning_rate": 5.1648834768047923e-05, "loss": 0.8715, "step": 5996 }, { "epoch": 0.9454381288158913, "grad_norm": 0.953125, "learning_rate": 5.1644804990458964e-05, "loss": 0.7605, "step": 5997 }, { "epoch": 0.9455957806632843, "grad_norm": 0.94921875, "learning_rate": 5.164077531535605e-05, "loss": 0.9868, "step": 5998 }, { "epoch": 0.9457534325106773, "grad_norm": 0.96875, "learning_rate": 5.1636745742747724e-05, "loss": 0.8976, "step": 5999 }, { "epoch": 0.9459110843580704, "grad_norm": 0.96484375, "learning_rate": 5.163271627264259e-05, "loss": 0.9973, "step": 6000 }, { "epoch": 0.9459110843580704, "eval_loss": 0.959323525428772, "eval_runtime": 309.4169, "eval_samples_per_second": 32.319, "eval_steps_per_second": 0.675, "step": 6000 }, { "epoch": 0.9460687362054634, "grad_norm": 0.875, "learning_rate": 5.162868690504916e-05, "loss": 0.8932, "step": 6001 }, { "epoch": 0.9462263880528564, "grad_norm": 0.9921875, "learning_rate": 5.162465763997595e-05, "loss": 0.932, "step": 6002 }, { "epoch": 0.9463840399002493, "grad_norm": 1.0078125, "learning_rate": 5.162062847743152e-05, "loss": 1.0426, "step": 6003 }, { "epoch": 0.9465416917476424, "grad_norm": 0.94140625, "learning_rate": 5.161659941742436e-05, "loss": 1.0108, "step": 6004 }, { "epoch": 0.9466993435950354, "grad_norm": 0.9609375, "learning_rate": 5.161257045996308e-05, "loss": 0.8938, "step": 6005 }, { "epoch": 0.9468569954424284, "grad_norm": 0.98046875, "learning_rate": 5.160854160505622e-05, "loss": 1.0057, "step": 6006 }, { "epoch": 0.9470146472898214, "grad_norm": 1.015625, "learning_rate": 5.160451285271226e-05, "loss": 1.1199, "step": 6007 }, { "epoch": 0.9471722991372145, "grad_norm": 1.1875, "learning_rate": 5.160048420293978e-05, "loss": 0.8967, "step": 6008 }, { "epoch": 0.9473299509846075, "grad_norm": 1.0234375, "learning_rate": 5.159645565574727e-05, "loss": 0.8131, "step": 6009 }, { "epoch": 0.9474876028320005, "grad_norm": 1.09375, "learning_rate": 5.159242721114334e-05, "loss": 0.9427, "step": 6010 }, { "epoch": 0.9476452546793934, "grad_norm": 0.96875, "learning_rate": 5.158839886913651e-05, "loss": 1.0102, "step": 6011 }, { "epoch": 0.9478029065267864, "grad_norm": 1.0546875, "learning_rate": 5.158437062973529e-05, "loss": 1.1484, "step": 6012 }, { "epoch": 0.9479605583741795, "grad_norm": 0.99609375, "learning_rate": 5.158034249294823e-05, "loss": 0.967, "step": 6013 }, { "epoch": 0.9481182102215725, "grad_norm": 1.1796875, "learning_rate": 5.1576314458783816e-05, "loss": 0.977, "step": 6014 }, { "epoch": 0.9482758620689655, "grad_norm": 0.9453125, "learning_rate": 5.157228652725069e-05, "loss": 1.1578, "step": 6015 }, { "epoch": 0.9484335139163586, "grad_norm": 1.0390625, "learning_rate": 5.156825869835733e-05, "loss": 1.125, "step": 6016 }, { "epoch": 0.9485911657637516, "grad_norm": 0.90625, "learning_rate": 5.156423097211227e-05, "loss": 0.9617, "step": 6017 }, { "epoch": 0.9487488176111446, "grad_norm": 1.0859375, "learning_rate": 5.156020334852407e-05, "loss": 1.3923, "step": 6018 }, { "epoch": 0.9489064694585375, "grad_norm": 0.984375, "learning_rate": 5.1556175827601196e-05, "loss": 0.9741, "step": 6019 }, { "epoch": 0.9490641213059305, "grad_norm": 0.93359375, "learning_rate": 5.155214840935228e-05, "loss": 0.8716, "step": 6020 }, { "epoch": 0.9492217731533236, "grad_norm": 1.125, "learning_rate": 5.1548121093785825e-05, "loss": 1.0271, "step": 6021 }, { "epoch": 0.9493794250007166, "grad_norm": 0.99609375, "learning_rate": 5.154409388091035e-05, "loss": 1.0327, "step": 6022 }, { "epoch": 0.9495370768481096, "grad_norm": 0.9140625, "learning_rate": 5.1540066770734406e-05, "loss": 0.9956, "step": 6023 }, { "epoch": 0.9496947286955026, "grad_norm": 0.9921875, "learning_rate": 5.1536039763266474e-05, "loss": 0.8596, "step": 6024 }, { "epoch": 0.9498523805428957, "grad_norm": 0.90234375, "learning_rate": 5.153201285851519e-05, "loss": 1.0377, "step": 6025 }, { "epoch": 0.9500100323902887, "grad_norm": 0.93359375, "learning_rate": 5.152798605648901e-05, "loss": 0.8718, "step": 6026 }, { "epoch": 0.9501676842376816, "grad_norm": 0.89453125, "learning_rate": 5.1523959357196516e-05, "loss": 1.0539, "step": 6027 }, { "epoch": 0.9503253360850746, "grad_norm": 0.93359375, "learning_rate": 5.1519932760646194e-05, "loss": 0.9756, "step": 6028 }, { "epoch": 0.9504829879324677, "grad_norm": 0.75390625, "learning_rate": 5.1515906266846626e-05, "loss": 0.7756, "step": 6029 }, { "epoch": 0.9506406397798607, "grad_norm": 0.87890625, "learning_rate": 5.151187987580631e-05, "loss": 0.8525, "step": 6030 }, { "epoch": 0.9507982916272537, "grad_norm": 0.890625, "learning_rate": 5.1507853587533806e-05, "loss": 0.9808, "step": 6031 }, { "epoch": 0.9509559434746467, "grad_norm": 0.91796875, "learning_rate": 5.150382740203763e-05, "loss": 0.9077, "step": 6032 }, { "epoch": 0.9511135953220398, "grad_norm": 0.8984375, "learning_rate": 5.149980131932631e-05, "loss": 0.8928, "step": 6033 }, { "epoch": 0.9512712471694328, "grad_norm": 1.046875, "learning_rate": 5.149577533940836e-05, "loss": 0.8483, "step": 6034 }, { "epoch": 0.9514288990168257, "grad_norm": 0.7890625, "learning_rate": 5.149174946229238e-05, "loss": 0.8436, "step": 6035 }, { "epoch": 0.9515865508642187, "grad_norm": 0.94140625, "learning_rate": 5.1487723687986866e-05, "loss": 0.9597, "step": 6036 }, { "epoch": 0.9517442027116118, "grad_norm": 0.9921875, "learning_rate": 5.148369801650035e-05, "loss": 1.1048, "step": 6037 }, { "epoch": 0.9519018545590048, "grad_norm": 0.88671875, "learning_rate": 5.1479672447841354e-05, "loss": 0.9719, "step": 6038 }, { "epoch": 0.9520595064063978, "grad_norm": 1.0078125, "learning_rate": 5.147564698201838e-05, "loss": 0.7895, "step": 6039 }, { "epoch": 0.9522171582537908, "grad_norm": 1.0, "learning_rate": 5.1471621619040044e-05, "loss": 1.1075, "step": 6040 }, { "epoch": 0.9523748101011839, "grad_norm": 0.94140625, "learning_rate": 5.146759635891483e-05, "loss": 0.9394, "step": 6041 }, { "epoch": 0.9525324619485769, "grad_norm": 0.96484375, "learning_rate": 5.1463571201651264e-05, "loss": 0.8723, "step": 6042 }, { "epoch": 0.9526901137959698, "grad_norm": 0.9453125, "learning_rate": 5.14595461472579e-05, "loss": 0.8326, "step": 6043 }, { "epoch": 0.9528477656433628, "grad_norm": 0.92578125, "learning_rate": 5.145552119574321e-05, "loss": 1.0257, "step": 6044 }, { "epoch": 0.9530054174907558, "grad_norm": 1.1328125, "learning_rate": 5.145149634711579e-05, "loss": 1.1076, "step": 6045 }, { "epoch": 0.9531630693381489, "grad_norm": 1.03125, "learning_rate": 5.144747160138417e-05, "loss": 1.1927, "step": 6046 }, { "epoch": 0.9533207211855419, "grad_norm": 1.5390625, "learning_rate": 5.144344695855685e-05, "loss": 1.0061, "step": 6047 }, { "epoch": 0.9534783730329349, "grad_norm": 1.015625, "learning_rate": 5.143942241864237e-05, "loss": 0.9752, "step": 6048 }, { "epoch": 0.953636024880328, "grad_norm": 0.875, "learning_rate": 5.143539798164923e-05, "loss": 0.8646, "step": 6049 }, { "epoch": 0.953793676727721, "grad_norm": 1.0859375, "learning_rate": 5.143137364758601e-05, "loss": 0.999, "step": 6050 }, { "epoch": 0.9539513285751139, "grad_norm": 0.9765625, "learning_rate": 5.142734941646123e-05, "loss": 0.89, "step": 6051 }, { "epoch": 0.9541089804225069, "grad_norm": 0.96875, "learning_rate": 5.1423325288283396e-05, "loss": 0.9009, "step": 6052 }, { "epoch": 0.9542666322698999, "grad_norm": 0.97265625, "learning_rate": 5.141930126306105e-05, "loss": 0.9557, "step": 6053 }, { "epoch": 0.954424284117293, "grad_norm": 1.234375, "learning_rate": 5.141527734080272e-05, "loss": 0.7477, "step": 6054 }, { "epoch": 0.954581935964686, "grad_norm": 0.9296875, "learning_rate": 5.14112535215169e-05, "loss": 1.0028, "step": 6055 }, { "epoch": 0.954739587812079, "grad_norm": 1.0546875, "learning_rate": 5.140722980521218e-05, "loss": 1.1223, "step": 6056 }, { "epoch": 0.954897239659472, "grad_norm": 0.859375, "learning_rate": 5.140320619189707e-05, "loss": 0.8888, "step": 6057 }, { "epoch": 0.9550548915068651, "grad_norm": 1.0546875, "learning_rate": 5.139918268158008e-05, "loss": 1.1492, "step": 6058 }, { "epoch": 0.955212543354258, "grad_norm": 0.8671875, "learning_rate": 5.139515927426974e-05, "loss": 0.8697, "step": 6059 }, { "epoch": 0.955370195201651, "grad_norm": 0.9375, "learning_rate": 5.139113596997456e-05, "loss": 0.9028, "step": 6060 }, { "epoch": 0.955527847049044, "grad_norm": 0.89453125, "learning_rate": 5.13871127687031e-05, "loss": 0.8188, "step": 6061 }, { "epoch": 0.9556854988964371, "grad_norm": 0.90234375, "learning_rate": 5.1383089670463904e-05, "loss": 0.9077, "step": 6062 }, { "epoch": 0.9558431507438301, "grad_norm": 0.9296875, "learning_rate": 5.137906667526545e-05, "loss": 0.8774, "step": 6063 }, { "epoch": 0.9560008025912231, "grad_norm": 1.0234375, "learning_rate": 5.1375043783116295e-05, "loss": 0.9426, "step": 6064 }, { "epoch": 0.9561584544386161, "grad_norm": 0.87890625, "learning_rate": 5.137102099402491e-05, "loss": 1.0485, "step": 6065 }, { "epoch": 0.9563161062860092, "grad_norm": 0.90625, "learning_rate": 5.1366998307999915e-05, "loss": 0.9104, "step": 6066 }, { "epoch": 0.9564737581334021, "grad_norm": 0.87109375, "learning_rate": 5.1362975725049775e-05, "loss": 0.7291, "step": 6067 }, { "epoch": 0.9566314099807951, "grad_norm": 1.0234375, "learning_rate": 5.135895324518305e-05, "loss": 1.1182, "step": 6068 }, { "epoch": 0.9567890618281881, "grad_norm": 1.09375, "learning_rate": 5.1354930868408224e-05, "loss": 1.1858, "step": 6069 }, { "epoch": 0.9569467136755812, "grad_norm": 0.98046875, "learning_rate": 5.1350908594733835e-05, "loss": 0.8538, "step": 6070 }, { "epoch": 0.9571043655229742, "grad_norm": 0.890625, "learning_rate": 5.134688642416844e-05, "loss": 0.9835, "step": 6071 }, { "epoch": 0.9572620173703672, "grad_norm": 1.015625, "learning_rate": 5.1342864356720486e-05, "loss": 1.1581, "step": 6072 }, { "epoch": 0.9574196692177602, "grad_norm": 2.609375, "learning_rate": 5.133884239239859e-05, "loss": 1.1422, "step": 6073 }, { "epoch": 0.9575773210651533, "grad_norm": 0.921875, "learning_rate": 5.1334820531211234e-05, "loss": 0.9639, "step": 6074 }, { "epoch": 0.9577349729125462, "grad_norm": 0.921875, "learning_rate": 5.1330798773166946e-05, "loss": 0.8985, "step": 6075 }, { "epoch": 0.9578926247599392, "grad_norm": 0.984375, "learning_rate": 5.132677711827425e-05, "loss": 0.8634, "step": 6076 }, { "epoch": 0.9580502766073322, "grad_norm": 0.9921875, "learning_rate": 5.132275556654166e-05, "loss": 0.9618, "step": 6077 }, { "epoch": 0.9582079284547252, "grad_norm": 0.859375, "learning_rate": 5.131873411797772e-05, "loss": 0.9962, "step": 6078 }, { "epoch": 0.9583655803021183, "grad_norm": 0.94140625, "learning_rate": 5.131471277259092e-05, "loss": 0.9375, "step": 6079 }, { "epoch": 0.9585232321495113, "grad_norm": 0.95703125, "learning_rate": 5.131069153038979e-05, "loss": 0.9513, "step": 6080 }, { "epoch": 0.9586808839969043, "grad_norm": 0.88671875, "learning_rate": 5.130667039138288e-05, "loss": 0.8425, "step": 6081 }, { "epoch": 0.9588385358442973, "grad_norm": 1.0078125, "learning_rate": 5.130264935557871e-05, "loss": 0.9104, "step": 6082 }, { "epoch": 0.9589961876916903, "grad_norm": 1.0078125, "learning_rate": 5.1298628422985804e-05, "loss": 1.2201, "step": 6083 }, { "epoch": 0.9591538395390833, "grad_norm": 0.921875, "learning_rate": 5.129460759361265e-05, "loss": 1.0961, "step": 6084 }, { "epoch": 0.9593114913864763, "grad_norm": 0.99609375, "learning_rate": 5.1290586867467775e-05, "loss": 0.8757, "step": 6085 }, { "epoch": 0.9594691432338693, "grad_norm": 0.91796875, "learning_rate": 5.128656624455974e-05, "loss": 0.8503, "step": 6086 }, { "epoch": 0.9596267950812624, "grad_norm": 0.99609375, "learning_rate": 5.128254572489705e-05, "loss": 0.9588, "step": 6087 }, { "epoch": 0.9597844469286554, "grad_norm": 0.87890625, "learning_rate": 5.127852530848821e-05, "loss": 0.8888, "step": 6088 }, { "epoch": 0.9599420987760484, "grad_norm": 0.9765625, "learning_rate": 5.1274504995341765e-05, "loss": 0.8162, "step": 6089 }, { "epoch": 0.9600997506234414, "grad_norm": 1.0703125, "learning_rate": 5.127048478546617e-05, "loss": 0.9641, "step": 6090 }, { "epoch": 0.9602574024708344, "grad_norm": 0.9921875, "learning_rate": 5.126646467887004e-05, "loss": 1.0423, "step": 6091 }, { "epoch": 0.9604150543182274, "grad_norm": 0.98828125, "learning_rate": 5.1262444675561846e-05, "loss": 0.9312, "step": 6092 }, { "epoch": 0.9605727061656204, "grad_norm": 0.890625, "learning_rate": 5.125842477555014e-05, "loss": 0.7581, "step": 6093 }, { "epoch": 0.9607303580130134, "grad_norm": 0.9921875, "learning_rate": 5.1254404978843396e-05, "loss": 0.994, "step": 6094 }, { "epoch": 0.9608880098604065, "grad_norm": 1.015625, "learning_rate": 5.125038528545012e-05, "loss": 1.0063, "step": 6095 }, { "epoch": 0.9610456617077995, "grad_norm": 1.0625, "learning_rate": 5.12463656953789e-05, "loss": 0.9376, "step": 6096 }, { "epoch": 0.9612033135551925, "grad_norm": 1.0, "learning_rate": 5.1242346208638236e-05, "loss": 0.9376, "step": 6097 }, { "epoch": 0.9613609654025855, "grad_norm": 1.0078125, "learning_rate": 5.123832682523661e-05, "loss": 0.9298, "step": 6098 }, { "epoch": 0.9615186172499784, "grad_norm": 0.97265625, "learning_rate": 5.123430754518258e-05, "loss": 1.0575, "step": 6099 }, { "epoch": 0.9616762690973715, "grad_norm": 0.984375, "learning_rate": 5.1230288368484604e-05, "loss": 1.0552, "step": 6100 }, { "epoch": 0.9618339209447645, "grad_norm": 1.40625, "learning_rate": 5.122626929515128e-05, "loss": 1.1064, "step": 6101 }, { "epoch": 0.9619915727921575, "grad_norm": 0.9609375, "learning_rate": 5.12222503251911e-05, "loss": 1.0491, "step": 6102 }, { "epoch": 0.9621492246395505, "grad_norm": 0.94921875, "learning_rate": 5.121823145861257e-05, "loss": 0.8215, "step": 6103 }, { "epoch": 0.9623068764869436, "grad_norm": 1.171875, "learning_rate": 5.12142126954242e-05, "loss": 1.0392, "step": 6104 }, { "epoch": 0.9624645283343366, "grad_norm": 0.94921875, "learning_rate": 5.1210194035634496e-05, "loss": 0.7783, "step": 6105 }, { "epoch": 0.9626221801817296, "grad_norm": 0.9296875, "learning_rate": 5.120617547925202e-05, "loss": 0.8902, "step": 6106 }, { "epoch": 0.9627798320291227, "grad_norm": 0.95703125, "learning_rate": 5.120215702628527e-05, "loss": 1.0132, "step": 6107 }, { "epoch": 0.9629374838765156, "grad_norm": 0.8984375, "learning_rate": 5.119813867674276e-05, "loss": 0.9804, "step": 6108 }, { "epoch": 0.9630951357239086, "grad_norm": 0.99609375, "learning_rate": 5.119412043063301e-05, "loss": 1.3122, "step": 6109 }, { "epoch": 0.9632527875713016, "grad_norm": 0.9375, "learning_rate": 5.119010228796448e-05, "loss": 0.8689, "step": 6110 }, { "epoch": 0.9634104394186946, "grad_norm": 0.82421875, "learning_rate": 5.118608424874579e-05, "loss": 0.9412, "step": 6111 }, { "epoch": 0.9635680912660877, "grad_norm": 1.0, "learning_rate": 5.118206631298541e-05, "loss": 1.2311, "step": 6112 }, { "epoch": 0.9637257431134807, "grad_norm": 1.765625, "learning_rate": 5.117804848069183e-05, "loss": 1.0583, "step": 6113 }, { "epoch": 0.9638833949608737, "grad_norm": 1.4921875, "learning_rate": 5.1174030751873604e-05, "loss": 1.1644, "step": 6114 }, { "epoch": 0.9640410468082667, "grad_norm": 1.0859375, "learning_rate": 5.1170013126539216e-05, "loss": 1.0145, "step": 6115 }, { "epoch": 0.9641986986556597, "grad_norm": 0.96484375, "learning_rate": 5.1165995604697205e-05, "loss": 1.0502, "step": 6116 }, { "epoch": 0.9643563505030527, "grad_norm": 0.95703125, "learning_rate": 5.1161978186356067e-05, "loss": 1.0838, "step": 6117 }, { "epoch": 0.9645140023504457, "grad_norm": 0.97265625, "learning_rate": 5.115796087152433e-05, "loss": 1.1146, "step": 6118 }, { "epoch": 0.9646716541978387, "grad_norm": 0.96484375, "learning_rate": 5.11539436602105e-05, "loss": 0.8063, "step": 6119 }, { "epoch": 0.9648293060452318, "grad_norm": 0.94921875, "learning_rate": 5.114992655242306e-05, "loss": 1.0924, "step": 6120 }, { "epoch": 0.9649869578926248, "grad_norm": 0.9765625, "learning_rate": 5.11459095481706e-05, "loss": 0.8917, "step": 6121 }, { "epoch": 0.9651446097400178, "grad_norm": 1.0078125, "learning_rate": 5.114189264746159e-05, "loss": 0.8554, "step": 6122 }, { "epoch": 0.9653022615874108, "grad_norm": 0.83203125, "learning_rate": 5.113787585030454e-05, "loss": 0.8014, "step": 6123 }, { "epoch": 0.9654599134348038, "grad_norm": 0.9140625, "learning_rate": 5.113385915670796e-05, "loss": 0.8642, "step": 6124 }, { "epoch": 0.9656175652821968, "grad_norm": 1.078125, "learning_rate": 5.112984256668035e-05, "loss": 0.972, "step": 6125 }, { "epoch": 0.9657752171295898, "grad_norm": 1.0859375, "learning_rate": 5.1125826080230285e-05, "loss": 0.9072, "step": 6126 }, { "epoch": 0.9659328689769828, "grad_norm": 1.0, "learning_rate": 5.112180969736623e-05, "loss": 0.9733, "step": 6127 }, { "epoch": 0.9660905208243759, "grad_norm": 0.890625, "learning_rate": 5.1117793418096704e-05, "loss": 0.9915, "step": 6128 }, { "epoch": 0.9662481726717689, "grad_norm": 1.0078125, "learning_rate": 5.111377724243023e-05, "loss": 1.1649, "step": 6129 }, { "epoch": 0.9664058245191619, "grad_norm": 0.99609375, "learning_rate": 5.110976117037527e-05, "loss": 1.0184, "step": 6130 }, { "epoch": 0.9665634763665549, "grad_norm": 0.93359375, "learning_rate": 5.110574520194041e-05, "loss": 0.9566, "step": 6131 }, { "epoch": 0.9667211282139478, "grad_norm": 1.0078125, "learning_rate": 5.110172933713413e-05, "loss": 0.8872, "step": 6132 }, { "epoch": 0.9668787800613409, "grad_norm": 0.91015625, "learning_rate": 5.109771357596495e-05, "loss": 0.9252, "step": 6133 }, { "epoch": 0.9670364319087339, "grad_norm": 0.97265625, "learning_rate": 5.109369791844136e-05, "loss": 1.0443, "step": 6134 }, { "epoch": 0.9671940837561269, "grad_norm": 0.84375, "learning_rate": 5.108968236457185e-05, "loss": 0.8595, "step": 6135 }, { "epoch": 0.96735173560352, "grad_norm": 1.9765625, "learning_rate": 5.1085666914364983e-05, "loss": 1.0237, "step": 6136 }, { "epoch": 0.967509387450913, "grad_norm": 0.953125, "learning_rate": 5.108165156782927e-05, "loss": 1.0559, "step": 6137 }, { "epoch": 0.967667039298306, "grad_norm": 1.0703125, "learning_rate": 5.10776363249732e-05, "loss": 1.3043, "step": 6138 }, { "epoch": 0.967824691145699, "grad_norm": 0.90234375, "learning_rate": 5.107362118580528e-05, "loss": 0.9638, "step": 6139 }, { "epoch": 0.9679823429930919, "grad_norm": 1.015625, "learning_rate": 5.106960615033397e-05, "loss": 0.8888, "step": 6140 }, { "epoch": 0.968139994840485, "grad_norm": 0.9609375, "learning_rate": 5.106559121856789e-05, "loss": 0.9563, "step": 6141 }, { "epoch": 0.968297646687878, "grad_norm": 0.8203125, "learning_rate": 5.1061576390515474e-05, "loss": 0.767, "step": 6142 }, { "epoch": 0.968455298535271, "grad_norm": 1.25, "learning_rate": 5.105756166618527e-05, "loss": 1.1356, "step": 6143 }, { "epoch": 0.968612950382664, "grad_norm": 1.0703125, "learning_rate": 5.105354704558576e-05, "loss": 1.0198, "step": 6144 }, { "epoch": 0.9687706022300571, "grad_norm": 0.9609375, "learning_rate": 5.104953252872542e-05, "loss": 1.1681, "step": 6145 }, { "epoch": 0.9689282540774501, "grad_norm": 1.03125, "learning_rate": 5.1045518115612835e-05, "loss": 1.0892, "step": 6146 }, { "epoch": 0.9690859059248431, "grad_norm": 1.21875, "learning_rate": 5.1041503806256474e-05, "loss": 1.1484, "step": 6147 }, { "epoch": 0.969243557772236, "grad_norm": 0.8828125, "learning_rate": 5.103748960066485e-05, "loss": 0.9237, "step": 6148 }, { "epoch": 0.9694012096196291, "grad_norm": 0.94140625, "learning_rate": 5.103347549884647e-05, "loss": 1.0899, "step": 6149 }, { "epoch": 0.9695588614670221, "grad_norm": 0.921875, "learning_rate": 5.1029461500809805e-05, "loss": 1.0778, "step": 6150 }, { "epoch": 0.9697165133144151, "grad_norm": 0.93359375, "learning_rate": 5.102544760656343e-05, "loss": 1.0235, "step": 6151 }, { "epoch": 0.9698741651618081, "grad_norm": 0.8828125, "learning_rate": 5.1021433816115814e-05, "loss": 0.812, "step": 6152 }, { "epoch": 0.9700318170092012, "grad_norm": 0.9765625, "learning_rate": 5.1017420129475476e-05, "loss": 1.0329, "step": 6153 }, { "epoch": 0.9701894688565942, "grad_norm": 0.89453125, "learning_rate": 5.101340654665092e-05, "loss": 0.9798, "step": 6154 }, { "epoch": 0.9703471207039872, "grad_norm": 0.9765625, "learning_rate": 5.1009393067650645e-05, "loss": 0.9775, "step": 6155 }, { "epoch": 0.9705047725513801, "grad_norm": 0.87890625, "learning_rate": 5.100537969248316e-05, "loss": 0.766, "step": 6156 }, { "epoch": 0.9706624243987731, "grad_norm": 1.375, "learning_rate": 5.100136642115697e-05, "loss": 1.1291, "step": 6157 }, { "epoch": 0.9708200762461662, "grad_norm": 0.95703125, "learning_rate": 5.099735325368056e-05, "loss": 0.9208, "step": 6158 }, { "epoch": 0.9709777280935592, "grad_norm": 0.9921875, "learning_rate": 5.099334019006248e-05, "loss": 1.126, "step": 6159 }, { "epoch": 0.9711353799409522, "grad_norm": 5.125, "learning_rate": 5.098932723031122e-05, "loss": 1.0685, "step": 6160 }, { "epoch": 0.9712930317883453, "grad_norm": 1.0390625, "learning_rate": 5.098531437443528e-05, "loss": 0.992, "step": 6161 }, { "epoch": 0.9714506836357383, "grad_norm": 0.8671875, "learning_rate": 5.0981301622443166e-05, "loss": 0.7634, "step": 6162 }, { "epoch": 0.9716083354831313, "grad_norm": 1.6484375, "learning_rate": 5.097728897434337e-05, "loss": 1.0612, "step": 6163 }, { "epoch": 0.9717659873305242, "grad_norm": 1.1484375, "learning_rate": 5.097327643014442e-05, "loss": 1.0621, "step": 6164 }, { "epoch": 0.9719236391779172, "grad_norm": 0.87890625, "learning_rate": 5.0969263989854776e-05, "loss": 0.8118, "step": 6165 }, { "epoch": 0.9720812910253103, "grad_norm": 1.0703125, "learning_rate": 5.0965251653483e-05, "loss": 1.0723, "step": 6166 }, { "epoch": 0.9722389428727033, "grad_norm": 0.9765625, "learning_rate": 5.096123942103758e-05, "loss": 0.9283, "step": 6167 }, { "epoch": 0.9723965947200963, "grad_norm": 1.078125, "learning_rate": 5.0957227292527e-05, "loss": 1.0944, "step": 6168 }, { "epoch": 0.9725542465674893, "grad_norm": 1.3828125, "learning_rate": 5.0953215267959774e-05, "loss": 0.7535, "step": 6169 }, { "epoch": 0.9727118984148824, "grad_norm": 0.86328125, "learning_rate": 5.094920334734438e-05, "loss": 0.9668, "step": 6170 }, { "epoch": 0.9728695502622754, "grad_norm": 1.0, "learning_rate": 5.0945191530689374e-05, "loss": 1.0312, "step": 6171 }, { "epoch": 0.9730272021096683, "grad_norm": 0.921875, "learning_rate": 5.094117981800324e-05, "loss": 0.9909, "step": 6172 }, { "epoch": 0.9731848539570613, "grad_norm": 1.03125, "learning_rate": 5.093716820929446e-05, "loss": 0.7702, "step": 6173 }, { "epoch": 0.9733425058044544, "grad_norm": 0.91796875, "learning_rate": 5.093315670457155e-05, "loss": 0.9903, "step": 6174 }, { "epoch": 0.9735001576518474, "grad_norm": 0.87109375, "learning_rate": 5.092914530384296e-05, "loss": 0.8161, "step": 6175 }, { "epoch": 0.9736578094992404, "grad_norm": 1.015625, "learning_rate": 5.092513400711729e-05, "loss": 0.9295, "step": 6176 }, { "epoch": 0.9738154613466334, "grad_norm": 1.1328125, "learning_rate": 5.0921122814403e-05, "loss": 0.9594, "step": 6177 }, { "epoch": 0.9739731131940265, "grad_norm": 4.59375, "learning_rate": 5.091711172570859e-05, "loss": 0.9644, "step": 6178 }, { "epoch": 0.9741307650414195, "grad_norm": 0.8828125, "learning_rate": 5.091310074104254e-05, "loss": 0.7723, "step": 6179 }, { "epoch": 0.9742884168888124, "grad_norm": 0.828125, "learning_rate": 5.090908986041334e-05, "loss": 0.7609, "step": 6180 }, { "epoch": 0.9744460687362054, "grad_norm": 0.94140625, "learning_rate": 5.0905079083829554e-05, "loss": 1.0714, "step": 6181 }, { "epoch": 0.9746037205835985, "grad_norm": 1.0703125, "learning_rate": 5.090106841129965e-05, "loss": 1.035, "step": 6182 }, { "epoch": 0.9747613724309915, "grad_norm": 1.0390625, "learning_rate": 5.089705784283212e-05, "loss": 0.9035, "step": 6183 }, { "epoch": 0.9749190242783845, "grad_norm": 0.87890625, "learning_rate": 5.089304737843547e-05, "loss": 0.8264, "step": 6184 }, { "epoch": 0.9750766761257775, "grad_norm": 0.90625, "learning_rate": 5.088903701811816e-05, "loss": 0.8915, "step": 6185 }, { "epoch": 0.9752343279731706, "grad_norm": 1.125, "learning_rate": 5.088502676188878e-05, "loss": 1.0113, "step": 6186 }, { "epoch": 0.9753919798205636, "grad_norm": 0.9140625, "learning_rate": 5.088101660975575e-05, "loss": 0.984, "step": 6187 }, { "epoch": 0.9755496316679565, "grad_norm": 0.9453125, "learning_rate": 5.087700656172762e-05, "loss": 0.8693, "step": 6188 }, { "epoch": 0.9757072835153495, "grad_norm": 0.921875, "learning_rate": 5.087299661781286e-05, "loss": 0.8011, "step": 6189 }, { "epoch": 0.9758649353627425, "grad_norm": 0.9375, "learning_rate": 5.086898677801995e-05, "loss": 1.038, "step": 6190 }, { "epoch": 0.9760225872101356, "grad_norm": 0.95703125, "learning_rate": 5.086497704235743e-05, "loss": 1.0494, "step": 6191 }, { "epoch": 0.9761802390575286, "grad_norm": 0.875, "learning_rate": 5.086096741083379e-05, "loss": 0.8913, "step": 6192 }, { "epoch": 0.9763378909049216, "grad_norm": 1.0625, "learning_rate": 5.085695788345753e-05, "loss": 0.9412, "step": 6193 }, { "epoch": 0.9764955427523146, "grad_norm": 0.94921875, "learning_rate": 5.0852948460237134e-05, "loss": 0.8567, "step": 6194 }, { "epoch": 0.9766531945997077, "grad_norm": 1.0546875, "learning_rate": 5.084893914118111e-05, "loss": 1.1136, "step": 6195 }, { "epoch": 0.9768108464471006, "grad_norm": 1.03125, "learning_rate": 5.08449299262979e-05, "loss": 0.8274, "step": 6196 }, { "epoch": 0.9769684982944936, "grad_norm": 1.0078125, "learning_rate": 5.08409208155961e-05, "loss": 0.9139, "step": 6197 }, { "epoch": 0.9771261501418866, "grad_norm": 1.078125, "learning_rate": 5.083691180908416e-05, "loss": 0.936, "step": 6198 }, { "epoch": 0.9772838019892797, "grad_norm": 2.96875, "learning_rate": 5.083290290677056e-05, "loss": 1.1384, "step": 6199 }, { "epoch": 0.9774414538366727, "grad_norm": 1.5390625, "learning_rate": 5.0828894108663825e-05, "loss": 0.9161, "step": 6200 }, { "epoch": 0.9775991056840657, "grad_norm": 1.03125, "learning_rate": 5.082488541477244e-05, "loss": 0.9197, "step": 6201 }, { "epoch": 0.9777567575314587, "grad_norm": 0.85546875, "learning_rate": 5.0820876825104905e-05, "loss": 0.7796, "step": 6202 }, { "epoch": 0.9779144093788518, "grad_norm": 0.98046875, "learning_rate": 5.0816868339669696e-05, "loss": 1.2388, "step": 6203 }, { "epoch": 0.9780720612262447, "grad_norm": 0.9296875, "learning_rate": 5.0812859958475335e-05, "loss": 0.8058, "step": 6204 }, { "epoch": 0.9782297130736377, "grad_norm": 0.921875, "learning_rate": 5.080885168153029e-05, "loss": 0.8904, "step": 6205 }, { "epoch": 0.9783873649210307, "grad_norm": 0.8828125, "learning_rate": 5.0804843508843045e-05, "loss": 0.9233, "step": 6206 }, { "epoch": 0.9785450167684238, "grad_norm": 1.21875, "learning_rate": 5.080083544042216e-05, "loss": 0.895, "step": 6207 }, { "epoch": 0.9787026686158168, "grad_norm": 1.046875, "learning_rate": 5.079682747627609e-05, "loss": 0.9837, "step": 6208 }, { "epoch": 0.9788603204632098, "grad_norm": 1.0390625, "learning_rate": 5.079281961641333e-05, "loss": 0.958, "step": 6209 }, { "epoch": 0.9790179723106028, "grad_norm": 1.09375, "learning_rate": 5.078881186084239e-05, "loss": 0.8657, "step": 6210 }, { "epoch": 0.9791756241579959, "grad_norm": 1.0, "learning_rate": 5.07848042095717e-05, "loss": 0.9161, "step": 6211 }, { "epoch": 0.9793332760053888, "grad_norm": 1.0234375, "learning_rate": 5.078079666260984e-05, "loss": 0.8087, "step": 6212 }, { "epoch": 0.9794909278527818, "grad_norm": 0.98046875, "learning_rate": 5.077678921996527e-05, "loss": 1.0254, "step": 6213 }, { "epoch": 0.9796485797001748, "grad_norm": 1.515625, "learning_rate": 5.077278188164649e-05, "loss": 0.9125, "step": 6214 }, { "epoch": 0.9798062315475679, "grad_norm": 0.8984375, "learning_rate": 5.0768774647661974e-05, "loss": 0.8898, "step": 6215 }, { "epoch": 0.9799638833949609, "grad_norm": 0.8828125, "learning_rate": 5.076476751802019e-05, "loss": 0.9189, "step": 6216 }, { "epoch": 0.9801215352423539, "grad_norm": 0.921875, "learning_rate": 5.076076049272971e-05, "loss": 0.9931, "step": 6217 }, { "epoch": 0.9802791870897469, "grad_norm": 0.9921875, "learning_rate": 5.075675357179899e-05, "loss": 0.8888, "step": 6218 }, { "epoch": 0.98043683893714, "grad_norm": 0.95703125, "learning_rate": 5.07527467552365e-05, "loss": 1.127, "step": 6219 }, { "epoch": 0.9805944907845329, "grad_norm": 0.9765625, "learning_rate": 5.074874004305077e-05, "loss": 1.1695, "step": 6220 }, { "epoch": 0.9807521426319259, "grad_norm": 0.9296875, "learning_rate": 5.074473343525022e-05, "loss": 1.1037, "step": 6221 }, { "epoch": 0.9809097944793189, "grad_norm": 0.91796875, "learning_rate": 5.074072693184342e-05, "loss": 0.9835, "step": 6222 }, { "epoch": 0.981067446326712, "grad_norm": 2.40625, "learning_rate": 5.073672053283884e-05, "loss": 0.9306, "step": 6223 }, { "epoch": 0.981225098174105, "grad_norm": 1.0546875, "learning_rate": 5.073271423824497e-05, "loss": 1.0804, "step": 6224 }, { "epoch": 0.981382750021498, "grad_norm": 0.828125, "learning_rate": 5.072870804807031e-05, "loss": 0.9123, "step": 6225 }, { "epoch": 0.981540401868891, "grad_norm": 0.953125, "learning_rate": 5.0724701962323274e-05, "loss": 0.9612, "step": 6226 }, { "epoch": 0.981698053716284, "grad_norm": 0.953125, "learning_rate": 5.0720695981012455e-05, "loss": 1.0878, "step": 6227 }, { "epoch": 0.981855705563677, "grad_norm": 0.875, "learning_rate": 5.071669010414633e-05, "loss": 0.827, "step": 6228 }, { "epoch": 0.98201335741107, "grad_norm": 0.8671875, "learning_rate": 5.071268433173333e-05, "loss": 0.8911, "step": 6229 }, { "epoch": 0.982171009258463, "grad_norm": 0.921875, "learning_rate": 5.0708678663781995e-05, "loss": 0.9276, "step": 6230 }, { "epoch": 0.982328661105856, "grad_norm": 0.9453125, "learning_rate": 5.070467310030076e-05, "loss": 0.9871, "step": 6231 }, { "epoch": 0.9824863129532491, "grad_norm": 1.0234375, "learning_rate": 5.0700667641298196e-05, "loss": 0.9789, "step": 6232 }, { "epoch": 0.9826439648006421, "grad_norm": 0.98828125, "learning_rate": 5.069666228678274e-05, "loss": 0.9062, "step": 6233 }, { "epoch": 0.9828016166480351, "grad_norm": 0.96484375, "learning_rate": 5.069265703676289e-05, "loss": 0.8797, "step": 6234 }, { "epoch": 0.9829592684954281, "grad_norm": 0.98046875, "learning_rate": 5.068865189124714e-05, "loss": 0.943, "step": 6235 }, { "epoch": 0.983116920342821, "grad_norm": 0.8671875, "learning_rate": 5.0684646850243946e-05, "loss": 1.0768, "step": 6236 }, { "epoch": 0.9832745721902141, "grad_norm": 0.953125, "learning_rate": 5.068064191376185e-05, "loss": 0.8451, "step": 6237 }, { "epoch": 0.9834322240376071, "grad_norm": 1.0078125, "learning_rate": 5.067663708180932e-05, "loss": 0.9286, "step": 6238 }, { "epoch": 0.9835898758850001, "grad_norm": 0.87109375, "learning_rate": 5.067263235439483e-05, "loss": 0.8852, "step": 6239 }, { "epoch": 0.9837475277323932, "grad_norm": 0.91015625, "learning_rate": 5.066862773152687e-05, "loss": 1.0036, "step": 6240 }, { "epoch": 0.9839051795797862, "grad_norm": 0.90625, "learning_rate": 5.0664623213213947e-05, "loss": 1.022, "step": 6241 }, { "epoch": 0.9840628314271792, "grad_norm": 0.828125, "learning_rate": 5.066061879946453e-05, "loss": 0.7765, "step": 6242 }, { "epoch": 0.9842204832745722, "grad_norm": 0.95703125, "learning_rate": 5.065661449028709e-05, "loss": 0.9773, "step": 6243 }, { "epoch": 0.9843781351219651, "grad_norm": 1.03125, "learning_rate": 5.065261028569015e-05, "loss": 1.1496, "step": 6244 }, { "epoch": 0.9845357869693582, "grad_norm": 0.90625, "learning_rate": 5.064860618568219e-05, "loss": 0.8877, "step": 6245 }, { "epoch": 0.9846934388167512, "grad_norm": 0.92578125, "learning_rate": 5.064460219027169e-05, "loss": 0.9299, "step": 6246 }, { "epoch": 0.9848510906641442, "grad_norm": 0.92578125, "learning_rate": 5.064059829946715e-05, "loss": 0.9905, "step": 6247 }, { "epoch": 0.9850087425115372, "grad_norm": 1.0390625, "learning_rate": 5.0636594513277015e-05, "loss": 1.1631, "step": 6248 }, { "epoch": 0.9851663943589303, "grad_norm": 0.96484375, "learning_rate": 5.0632590831709816e-05, "loss": 0.9508, "step": 6249 }, { "epoch": 0.9853240462063233, "grad_norm": 0.9453125, "learning_rate": 5.0628587254773995e-05, "loss": 1.0704, "step": 6250 }, { "epoch": 0.9854816980537163, "grad_norm": 1.0, "learning_rate": 5.062458378247804e-05, "loss": 0.9032, "step": 6251 }, { "epoch": 0.9856393499011092, "grad_norm": 1.0390625, "learning_rate": 5.062058041483049e-05, "loss": 1.0423, "step": 6252 }, { "epoch": 0.9857970017485023, "grad_norm": 1.03125, "learning_rate": 5.061657715183981e-05, "loss": 0.9898, "step": 6253 }, { "epoch": 0.9859546535958953, "grad_norm": 0.9921875, "learning_rate": 5.0612573993514465e-05, "loss": 1.1372, "step": 6254 }, { "epoch": 0.9861123054432883, "grad_norm": 1.078125, "learning_rate": 5.0608570939862933e-05, "loss": 0.97, "step": 6255 }, { "epoch": 0.9862699572906813, "grad_norm": 0.95703125, "learning_rate": 5.060456799089369e-05, "loss": 0.9411, "step": 6256 }, { "epoch": 0.9864276091380744, "grad_norm": 1.0078125, "learning_rate": 5.060056514661527e-05, "loss": 1.067, "step": 6257 }, { "epoch": 0.9865852609854674, "grad_norm": 0.93359375, "learning_rate": 5.059656240703614e-05, "loss": 0.9598, "step": 6258 }, { "epoch": 0.9867429128328604, "grad_norm": 0.921875, "learning_rate": 5.059255977216477e-05, "loss": 0.912, "step": 6259 }, { "epoch": 0.9869005646802534, "grad_norm": 0.953125, "learning_rate": 5.058855724200964e-05, "loss": 1.0736, "step": 6260 }, { "epoch": 0.9870582165276464, "grad_norm": 1.0078125, "learning_rate": 5.05845548165792e-05, "loss": 1.1007, "step": 6261 }, { "epoch": 0.9872158683750394, "grad_norm": 0.97265625, "learning_rate": 5.0580552495882005e-05, "loss": 0.9513, "step": 6262 }, { "epoch": 0.9873735202224324, "grad_norm": 1.0546875, "learning_rate": 5.0576550279926516e-05, "loss": 0.9707, "step": 6263 }, { "epoch": 0.9875311720698254, "grad_norm": 0.91015625, "learning_rate": 5.05725481687212e-05, "loss": 0.7737, "step": 6264 }, { "epoch": 0.9876888239172185, "grad_norm": 0.9296875, "learning_rate": 5.0568546162274546e-05, "loss": 0.851, "step": 6265 }, { "epoch": 0.9878464757646115, "grad_norm": 0.94140625, "learning_rate": 5.056454426059498e-05, "loss": 1.0445, "step": 6266 }, { "epoch": 0.9880041276120045, "grad_norm": 0.99609375, "learning_rate": 5.0560542463691083e-05, "loss": 1.0755, "step": 6267 }, { "epoch": 0.9881617794593975, "grad_norm": 1.5234375, "learning_rate": 5.05565407715713e-05, "loss": 1.0833, "step": 6268 }, { "epoch": 0.9883194313067905, "grad_norm": 0.875, "learning_rate": 5.0552539184244094e-05, "loss": 1.0062, "step": 6269 }, { "epoch": 0.9884770831541835, "grad_norm": 0.921875, "learning_rate": 5.054853770171797e-05, "loss": 0.9839, "step": 6270 }, { "epoch": 0.9886347350015765, "grad_norm": 0.87109375, "learning_rate": 5.054453632400133e-05, "loss": 1.0254, "step": 6271 }, { "epoch": 0.9887923868489695, "grad_norm": 0.8671875, "learning_rate": 5.0540535051102776e-05, "loss": 0.9012, "step": 6272 }, { "epoch": 0.9889500386963626, "grad_norm": 0.953125, "learning_rate": 5.0536533883030726e-05, "loss": 0.9597, "step": 6273 }, { "epoch": 0.9891076905437556, "grad_norm": 0.984375, "learning_rate": 5.053253281979367e-05, "loss": 1.0043, "step": 6274 }, { "epoch": 0.9892653423911486, "grad_norm": 0.80859375, "learning_rate": 5.052853186140008e-05, "loss": 0.8044, "step": 6275 }, { "epoch": 0.9894229942385416, "grad_norm": 0.9921875, "learning_rate": 5.052453100785839e-05, "loss": 0.9659, "step": 6276 }, { "epoch": 0.9895806460859345, "grad_norm": 0.96875, "learning_rate": 5.052053025917717e-05, "loss": 0.9449, "step": 6277 }, { "epoch": 0.9897382979333276, "grad_norm": 0.90625, "learning_rate": 5.051652961536486e-05, "loss": 0.9016, "step": 6278 }, { "epoch": 0.9898959497807206, "grad_norm": 0.8359375, "learning_rate": 5.051252907642994e-05, "loss": 0.8199, "step": 6279 }, { "epoch": 0.9900536016281136, "grad_norm": 1.0, "learning_rate": 5.050852864238089e-05, "loss": 0.7977, "step": 6280 }, { "epoch": 0.9902112534755066, "grad_norm": 0.94921875, "learning_rate": 5.0504528313226144e-05, "loss": 1.1122, "step": 6281 }, { "epoch": 0.9903689053228997, "grad_norm": 0.89453125, "learning_rate": 5.050052808897425e-05, "loss": 0.9225, "step": 6282 }, { "epoch": 0.9905265571702927, "grad_norm": 0.94140625, "learning_rate": 5.049652796963367e-05, "loss": 0.9358, "step": 6283 }, { "epoch": 0.9906842090176857, "grad_norm": 0.9375, "learning_rate": 5.049252795521286e-05, "loss": 1.2093, "step": 6284 }, { "epoch": 0.9908418608650786, "grad_norm": 0.90625, "learning_rate": 5.048852804572032e-05, "loss": 0.9311, "step": 6285 }, { "epoch": 0.9909995127124717, "grad_norm": 0.9296875, "learning_rate": 5.0484528241164494e-05, "loss": 0.9212, "step": 6286 }, { "epoch": 0.9911571645598647, "grad_norm": 0.97265625, "learning_rate": 5.04805285415539e-05, "loss": 1.022, "step": 6287 }, { "epoch": 0.9913148164072577, "grad_norm": 0.921875, "learning_rate": 5.047652894689699e-05, "loss": 0.8836, "step": 6288 }, { "epoch": 0.9914724682546507, "grad_norm": 0.90234375, "learning_rate": 5.047252945720224e-05, "loss": 0.8671, "step": 6289 }, { "epoch": 0.9916301201020438, "grad_norm": 0.9609375, "learning_rate": 5.046853007247814e-05, "loss": 1.1066, "step": 6290 }, { "epoch": 0.9917877719494368, "grad_norm": 0.9765625, "learning_rate": 5.046453079273312e-05, "loss": 0.9417, "step": 6291 }, { "epoch": 0.9919454237968298, "grad_norm": 0.9140625, "learning_rate": 5.046053161797574e-05, "loss": 0.9379, "step": 6292 }, { "epoch": 0.9921030756442227, "grad_norm": 1.0703125, "learning_rate": 5.0456532548214416e-05, "loss": 0.9187, "step": 6293 }, { "epoch": 0.9922607274916158, "grad_norm": 0.93359375, "learning_rate": 5.045253358345765e-05, "loss": 0.9066, "step": 6294 }, { "epoch": 0.9924183793390088, "grad_norm": 0.83203125, "learning_rate": 5.044853472371391e-05, "loss": 0.7859, "step": 6295 }, { "epoch": 0.9925760311864018, "grad_norm": 1.1015625, "learning_rate": 5.044453596899162e-05, "loss": 1.0803, "step": 6296 }, { "epoch": 0.9927336830337948, "grad_norm": 0.86328125, "learning_rate": 5.044053731929934e-05, "loss": 0.931, "step": 6297 }, { "epoch": 0.9928913348811879, "grad_norm": 1.1484375, "learning_rate": 5.0436538774645526e-05, "loss": 1.2049, "step": 6298 }, { "epoch": 0.9930489867285809, "grad_norm": 0.84375, "learning_rate": 5.043254033503862e-05, "loss": 0.9402, "step": 6299 }, { "epoch": 0.9932066385759739, "grad_norm": 1.1953125, "learning_rate": 5.042854200048712e-05, "loss": 0.9887, "step": 6300 }, { "epoch": 0.9933642904233668, "grad_norm": 0.9296875, "learning_rate": 5.042454377099946e-05, "loss": 0.8942, "step": 6301 }, { "epoch": 0.9935219422707599, "grad_norm": 1.1015625, "learning_rate": 5.042054564658416e-05, "loss": 0.9965, "step": 6302 }, { "epoch": 0.9936795941181529, "grad_norm": 1.0234375, "learning_rate": 5.04165476272497e-05, "loss": 0.9882, "step": 6303 }, { "epoch": 0.9938372459655459, "grad_norm": 0.95703125, "learning_rate": 5.041254971300453e-05, "loss": 0.9204, "step": 6304 }, { "epoch": 0.9939948978129389, "grad_norm": 0.9765625, "learning_rate": 5.040855190385712e-05, "loss": 0.9309, "step": 6305 }, { "epoch": 0.994152549660332, "grad_norm": 0.9609375, "learning_rate": 5.0404554199815915e-05, "loss": 1.1481, "step": 6306 }, { "epoch": 0.994310201507725, "grad_norm": 0.90234375, "learning_rate": 5.040055660088946e-05, "loss": 0.8913, "step": 6307 }, { "epoch": 0.994467853355118, "grad_norm": 0.9609375, "learning_rate": 5.039655910708618e-05, "loss": 0.9688, "step": 6308 }, { "epoch": 0.9946255052025109, "grad_norm": 0.9375, "learning_rate": 5.0392561718414575e-05, "loss": 0.8557, "step": 6309 }, { "epoch": 0.9947831570499039, "grad_norm": 0.9140625, "learning_rate": 5.03885644348831e-05, "loss": 0.8381, "step": 6310 }, { "epoch": 0.994940808897297, "grad_norm": 0.95703125, "learning_rate": 5.038456725650018e-05, "loss": 0.9648, "step": 6311 }, { "epoch": 0.99509846074469, "grad_norm": 1.0234375, "learning_rate": 5.038057018327438e-05, "loss": 1.0214, "step": 6312 }, { "epoch": 0.995256112592083, "grad_norm": 0.8828125, "learning_rate": 5.037657321521412e-05, "loss": 0.8468, "step": 6313 }, { "epoch": 0.995413764439476, "grad_norm": 0.9375, "learning_rate": 5.037257635232788e-05, "loss": 1.1382, "step": 6314 }, { "epoch": 0.9955714162868691, "grad_norm": 1.140625, "learning_rate": 5.036857959462412e-05, "loss": 0.9959, "step": 6315 }, { "epoch": 0.9957290681342621, "grad_norm": 1.015625, "learning_rate": 5.0364582942111284e-05, "loss": 0.8813, "step": 6316 }, { "epoch": 0.995886719981655, "grad_norm": 0.96875, "learning_rate": 5.036058639479792e-05, "loss": 1.0607, "step": 6317 }, { "epoch": 0.996044371829048, "grad_norm": 0.94140625, "learning_rate": 5.0356589952692455e-05, "loss": 0.993, "step": 6318 }, { "epoch": 0.9962020236764411, "grad_norm": 0.93359375, "learning_rate": 5.035259361580336e-05, "loss": 0.9779, "step": 6319 }, { "epoch": 0.9963596755238341, "grad_norm": 1.0859375, "learning_rate": 5.03485973841391e-05, "loss": 1.0452, "step": 6320 }, { "epoch": 0.9965173273712271, "grad_norm": 0.90625, "learning_rate": 5.0344601257708116e-05, "loss": 0.8008, "step": 6321 }, { "epoch": 0.9966749792186201, "grad_norm": 1.046875, "learning_rate": 5.0340605236518945e-05, "loss": 0.9249, "step": 6322 }, { "epoch": 0.9968326310660132, "grad_norm": 0.9375, "learning_rate": 5.033660932058002e-05, "loss": 0.8058, "step": 6323 }, { "epoch": 0.9969902829134062, "grad_norm": 0.9375, "learning_rate": 5.0332613509899816e-05, "loss": 1.0034, "step": 6324 }, { "epoch": 0.9971479347607991, "grad_norm": 0.99609375, "learning_rate": 5.032861780448681e-05, "loss": 0.8987, "step": 6325 }, { "epoch": 0.9973055866081921, "grad_norm": 0.94921875, "learning_rate": 5.032462220434946e-05, "loss": 1.0116, "step": 6326 }, { "epoch": 0.9974632384555852, "grad_norm": 1.0078125, "learning_rate": 5.032062670949622e-05, "loss": 1.1243, "step": 6327 }, { "epoch": 0.9976208903029782, "grad_norm": 0.92578125, "learning_rate": 5.031663131993558e-05, "loss": 0.8762, "step": 6328 }, { "epoch": 0.9977785421503712, "grad_norm": 0.93359375, "learning_rate": 5.031263603567595e-05, "loss": 1.0111, "step": 6329 }, { "epoch": 0.9979361939977642, "grad_norm": 0.80859375, "learning_rate": 5.030864085672591e-05, "loss": 0.7475, "step": 6330 }, { "epoch": 0.9980938458451573, "grad_norm": 0.8671875, "learning_rate": 5.030464578309384e-05, "loss": 0.9049, "step": 6331 }, { "epoch": 0.9982514976925503, "grad_norm": 1.046875, "learning_rate": 5.0300650814788254e-05, "loss": 1.1051, "step": 6332 }, { "epoch": 0.9984091495399432, "grad_norm": 0.921875, "learning_rate": 5.029665595181758e-05, "loss": 0.901, "step": 6333 }, { "epoch": 0.9985668013873362, "grad_norm": 1.59375, "learning_rate": 5.029266119419031e-05, "loss": 0.9752, "step": 6334 }, { "epoch": 0.9987244532347292, "grad_norm": 0.95703125, "learning_rate": 5.028866654191491e-05, "loss": 0.8623, "step": 6335 }, { "epoch": 0.9988821050821223, "grad_norm": 0.9296875, "learning_rate": 5.02846719949998e-05, "loss": 0.886, "step": 6336 }, { "epoch": 0.9990397569295153, "grad_norm": 1.015625, "learning_rate": 5.0280677553453515e-05, "loss": 0.9857, "step": 6337 }, { "epoch": 0.9991974087769083, "grad_norm": 0.8984375, "learning_rate": 5.027668321728449e-05, "loss": 0.8594, "step": 6338 }, { "epoch": 0.9993550606243014, "grad_norm": 0.94140625, "learning_rate": 5.02726889865012e-05, "loss": 1.0154, "step": 6339 }, { "epoch": 0.9995127124716944, "grad_norm": 0.9375, "learning_rate": 5.026869486111211e-05, "loss": 0.8742, "step": 6340 }, { "epoch": 0.9996703643190873, "grad_norm": 1.0546875, "learning_rate": 5.0264700841125665e-05, "loss": 1.0499, "step": 6341 }, { "epoch": 0.9998280161664803, "grad_norm": 1.1015625, "learning_rate": 5.0260706926550314e-05, "loss": 0.9543, "step": 6342 }, { "epoch": 0.9999856680138733, "grad_norm": 1.0546875, "learning_rate": 5.025671311739459e-05, "loss": 0.9777, "step": 6343 }, { "epoch": 1.0, "grad_norm": 3.640625, "learning_rate": 5.0252719413666916e-05, "loss": 0.7707, "step": 6344 }, { "epoch": 4.384321665723373e-05, "grad_norm": 0.97265625, "learning_rate": 5.024872581537576e-05, "loss": 1.0584, "step": 1 }, { "epoch": 8.768643331446746e-05, "grad_norm": 0.95703125, "learning_rate": 5.02447323225296e-05, "loss": 1.0001, "step": 2 }, { "epoch": 0.00013152964997170118, "grad_norm": 1.0078125, "learning_rate": 5.0240738935136834e-05, "loss": 0.9125, "step": 3 }, { "epoch": 0.00017537286662893493, "grad_norm": 1.0234375, "learning_rate": 5.023674565320602e-05, "loss": 0.9166, "step": 4 }, { "epoch": 0.00021921608328616865, "grad_norm": 0.86328125, "learning_rate": 5.023275247674557e-05, "loss": 0.9278, "step": 5 }, { "epoch": 0.00026305929994340237, "grad_norm": 1.046875, "learning_rate": 5.022875940576397e-05, "loss": 0.8569, "step": 6 }, { "epoch": 0.00030690251660063614, "grad_norm": 1.109375, "learning_rate": 5.0224766440269655e-05, "loss": 0.7878, "step": 7 }, { "epoch": 0.00035074573325786986, "grad_norm": 0.8359375, "learning_rate": 5.0220773580271086e-05, "loss": 0.7722, "step": 8 }, { "epoch": 0.0003945889499151036, "grad_norm": 0.921875, "learning_rate": 5.0216780825776766e-05, "loss": 0.7898, "step": 9 }, { "epoch": 0.0004384321665723373, "grad_norm": 0.88671875, "learning_rate": 5.021278817679513e-05, "loss": 0.7402, "step": 10 }, { "epoch": 0.00048227538322957107, "grad_norm": 0.9296875, "learning_rate": 5.020879563333465e-05, "loss": 0.8207, "step": 11 }, { "epoch": 0.0005261185998868047, "grad_norm": 1.03125, "learning_rate": 5.020480319540377e-05, "loss": 1.06, "step": 12 }, { "epoch": 0.0005699618165440386, "grad_norm": 1.1328125, "learning_rate": 5.020081086301094e-05, "loss": 1.0929, "step": 13 }, { "epoch": 0.0006138050332012723, "grad_norm": 0.9765625, "learning_rate": 5.019681863616468e-05, "loss": 0.9009, "step": 14 }, { "epoch": 0.000657648249858506, "grad_norm": 0.86328125, "learning_rate": 5.019282651487341e-05, "loss": 0.8358, "step": 15 }, { "epoch": 0.0007014914665157397, "grad_norm": 0.88671875, "learning_rate": 5.018883449914561e-05, "loss": 0.8383, "step": 16 }, { "epoch": 0.0007453346831729734, "grad_norm": 0.8671875, "learning_rate": 5.018484258898972e-05, "loss": 0.971, "step": 17 }, { "epoch": 0.0007891778998302072, "grad_norm": 0.875, "learning_rate": 5.018085078441417e-05, "loss": 0.8209, "step": 18 }, { "epoch": 0.0008330211164874409, "grad_norm": 0.84375, "learning_rate": 5.01768590854275e-05, "loss": 0.978, "step": 19 }, { "epoch": 0.0008768643331446746, "grad_norm": 0.86328125, "learning_rate": 5.017286749203813e-05, "loss": 0.9122, "step": 20 }, { "epoch": 0.0009207075498019084, "grad_norm": 0.73046875, "learning_rate": 5.0168876004254505e-05, "loss": 0.7657, "step": 21 }, { "epoch": 0.0009645507664591421, "grad_norm": 0.953125, "learning_rate": 5.016488462208512e-05, "loss": 0.9234, "step": 22 }, { "epoch": 0.0010083939831163759, "grad_norm": 0.73828125, "learning_rate": 5.0160893345538373e-05, "loss": 0.81, "step": 23 }, { "epoch": 0.0010522371997736095, "grad_norm": 0.8046875, "learning_rate": 5.015690217462279e-05, "loss": 0.7965, "step": 24 }, { "epoch": 0.0010960804164308433, "grad_norm": 0.76953125, "learning_rate": 5.015291110934681e-05, "loss": 0.8597, "step": 25 }, { "epoch": 0.0011399236330880771, "grad_norm": 0.9140625, "learning_rate": 5.014892014971888e-05, "loss": 0.9638, "step": 26 }, { "epoch": 0.0011837668497453107, "grad_norm": 0.96875, "learning_rate": 5.014492929574748e-05, "loss": 0.9593, "step": 27 }, { "epoch": 0.0012276100664025446, "grad_norm": 1.046875, "learning_rate": 5.014093854744104e-05, "loss": 0.7753, "step": 28 }, { "epoch": 0.0012714532830597782, "grad_norm": 0.84375, "learning_rate": 5.013694790480803e-05, "loss": 0.7393, "step": 29 }, { "epoch": 0.001315296499717012, "grad_norm": 0.82421875, "learning_rate": 5.0132957367856914e-05, "loss": 0.9677, "step": 30 }, { "epoch": 0.0013591397163742456, "grad_norm": 0.9609375, "learning_rate": 5.0128966936596145e-05, "loss": 0.888, "step": 31 }, { "epoch": 0.0014029829330314794, "grad_norm": 0.9453125, "learning_rate": 5.012497661103417e-05, "loss": 0.9888, "step": 32 }, { "epoch": 0.0014468261496887133, "grad_norm": 0.96875, "learning_rate": 5.012098639117944e-05, "loss": 0.9464, "step": 33 }, { "epoch": 0.0014906693663459469, "grad_norm": 0.80859375, "learning_rate": 5.011699627704045e-05, "loss": 0.8721, "step": 34 }, { "epoch": 0.0015345125830031807, "grad_norm": 0.84375, "learning_rate": 5.0113006268625626e-05, "loss": 0.8189, "step": 35 }, { "epoch": 0.0015783557996604143, "grad_norm": 0.9296875, "learning_rate": 5.010901636594345e-05, "loss": 0.8813, "step": 36 }, { "epoch": 0.0016221990163176481, "grad_norm": 0.89453125, "learning_rate": 5.010502656900236e-05, "loss": 0.9055, "step": 37 }, { "epoch": 0.0016660422329748817, "grad_norm": 0.91015625, "learning_rate": 5.010103687781078e-05, "loss": 0.8445, "step": 38 }, { "epoch": 0.0017098854496321156, "grad_norm": 0.828125, "learning_rate": 5.0097047292377234e-05, "loss": 0.8103, "step": 39 }, { "epoch": 0.0017537286662893492, "grad_norm": 0.8671875, "learning_rate": 5.009305781271014e-05, "loss": 0.9634, "step": 40 }, { "epoch": 0.001797571882946583, "grad_norm": 0.890625, "learning_rate": 5.0089068438817955e-05, "loss": 0.9088, "step": 41 }, { "epoch": 0.0018414150996038168, "grad_norm": 0.83984375, "learning_rate": 5.0085079170709145e-05, "loss": 0.8586, "step": 42 }, { "epoch": 0.0018852583162610505, "grad_norm": 0.83203125, "learning_rate": 5.0081090008392114e-05, "loss": 0.7916, "step": 43 }, { "epoch": 0.0019291015329182843, "grad_norm": 0.828125, "learning_rate": 5.00771009518754e-05, "loss": 0.8336, "step": 44 }, { "epoch": 0.001972944749575518, "grad_norm": 0.82421875, "learning_rate": 5.007311200116742e-05, "loss": 0.8392, "step": 45 }, { "epoch": 0.0020167879662327517, "grad_norm": 0.96484375, "learning_rate": 5.006912315627662e-05, "loss": 0.8085, "step": 46 }, { "epoch": 0.0020606311828899855, "grad_norm": 0.94921875, "learning_rate": 5.0065134417211454e-05, "loss": 0.835, "step": 47 }, { "epoch": 0.002104474399547219, "grad_norm": 0.94921875, "learning_rate": 5.0061145783980354e-05, "loss": 0.8486, "step": 48 }, { "epoch": 0.0021483176162044528, "grad_norm": 0.84765625, "learning_rate": 5.0057157256591834e-05, "loss": 0.769, "step": 49 }, { "epoch": 0.0021921608328616866, "grad_norm": 0.82421875, "learning_rate": 5.005316883505432e-05, "loss": 0.9368, "step": 50 }, { "epoch": 0.0022360040495189204, "grad_norm": 0.82421875, "learning_rate": 5.0049180519376257e-05, "loss": 0.9236, "step": 51 }, { "epoch": 0.0022798472661761542, "grad_norm": 0.78515625, "learning_rate": 5.0045192309566105e-05, "loss": 0.784, "step": 52 }, { "epoch": 0.0023236904828333876, "grad_norm": 0.85546875, "learning_rate": 5.0041204205632266e-05, "loss": 0.8094, "step": 53 }, { "epoch": 0.0023675336994906215, "grad_norm": 0.9296875, "learning_rate": 5.003721620758328e-05, "loss": 0.9394, "step": 54 }, { "epoch": 0.0024113769161478553, "grad_norm": 0.7734375, "learning_rate": 5.003322831542756e-05, "loss": 0.8254, "step": 55 }, { "epoch": 0.002455220132805089, "grad_norm": 0.87109375, "learning_rate": 5.002924052917355e-05, "loss": 0.877, "step": 56 }, { "epoch": 0.002499063349462323, "grad_norm": 0.87890625, "learning_rate": 5.002525284882972e-05, "loss": 0.9679, "step": 57 }, { "epoch": 0.0025429065661195563, "grad_norm": 0.8515625, "learning_rate": 5.0021265274404474e-05, "loss": 0.9491, "step": 58 }, { "epoch": 0.00258674978277679, "grad_norm": 0.828125, "learning_rate": 5.001727780590632e-05, "loss": 0.8285, "step": 59 }, { "epoch": 0.002630592999434024, "grad_norm": 0.95703125, "learning_rate": 5.001329044334371e-05, "loss": 0.8668, "step": 60 }, { "epoch": 0.002674436216091258, "grad_norm": 0.84375, "learning_rate": 5.0009303186725056e-05, "loss": 0.8354, "step": 61 }, { "epoch": 0.002718279432748491, "grad_norm": 0.87109375, "learning_rate": 5.000531603605883e-05, "loss": 0.8973, "step": 62 }, { "epoch": 0.002762122649405725, "grad_norm": 0.83203125, "learning_rate": 5.0001328991353445e-05, "loss": 0.8307, "step": 63 }, { "epoch": 0.002805965866062959, "grad_norm": 0.94921875, "learning_rate": 4.999734205261743e-05, "loss": 0.9271, "step": 64 }, { "epoch": 0.0028498090827201927, "grad_norm": 0.81640625, "learning_rate": 4.999335521985917e-05, "loss": 0.8197, "step": 65 }, { "epoch": 0.0028936522993774265, "grad_norm": 0.78515625, "learning_rate": 4.998936849308715e-05, "loss": 0.773, "step": 66 }, { "epoch": 0.00293749551603466, "grad_norm": 0.84765625, "learning_rate": 4.998538187230981e-05, "loss": 0.939, "step": 67 }, { "epoch": 0.0029813387326918937, "grad_norm": 0.87890625, "learning_rate": 4.998139535753559e-05, "loss": 0.8179, "step": 68 }, { "epoch": 0.0030251819493491276, "grad_norm": 0.9296875, "learning_rate": 4.997740894877294e-05, "loss": 0.8116, "step": 69 }, { "epoch": 0.0030690251660063614, "grad_norm": 0.7734375, "learning_rate": 4.997342264603028e-05, "loss": 0.9225, "step": 70 }, { "epoch": 0.003112868382663595, "grad_norm": 0.8984375, "learning_rate": 4.996943644931612e-05, "loss": 0.9243, "step": 71 }, { "epoch": 0.0031567115993208286, "grad_norm": 0.8671875, "learning_rate": 4.996545035863889e-05, "loss": 0.7815, "step": 72 }, { "epoch": 0.0032005548159780624, "grad_norm": 0.859375, "learning_rate": 4.996146437400702e-05, "loss": 1.0366, "step": 73 }, { "epoch": 0.0032443980326352963, "grad_norm": 0.87890625, "learning_rate": 4.9957478495428965e-05, "loss": 0.8199, "step": 74 }, { "epoch": 0.00328824124929253, "grad_norm": 0.8984375, "learning_rate": 4.9953492722913185e-05, "loss": 0.8172, "step": 75 }, { "epoch": 0.0033320844659497635, "grad_norm": 1.0546875, "learning_rate": 4.9949507056468115e-05, "loss": 0.9314, "step": 76 }, { "epoch": 0.0033759276826069973, "grad_norm": 0.91796875, "learning_rate": 4.99455214961022e-05, "loss": 0.9103, "step": 77 }, { "epoch": 0.003419770899264231, "grad_norm": 0.859375, "learning_rate": 4.9941536041823846e-05, "loss": 0.8825, "step": 78 }, { "epoch": 0.003463614115921465, "grad_norm": 0.93359375, "learning_rate": 4.993755069364159e-05, "loss": 0.7796, "step": 79 }, { "epoch": 0.0035074573325786984, "grad_norm": 0.84375, "learning_rate": 4.993356545156383e-05, "loss": 0.8363, "step": 80 }, { "epoch": 0.003551300549235932, "grad_norm": 0.859375, "learning_rate": 4.9929580315599024e-05, "loss": 0.8812, "step": 81 }, { "epoch": 0.003595143765893166, "grad_norm": 0.78125, "learning_rate": 4.992559528575561e-05, "loss": 0.8534, "step": 82 }, { "epoch": 0.0036389869825504, "grad_norm": 0.953125, "learning_rate": 4.9921610362041994e-05, "loss": 0.9928, "step": 83 }, { "epoch": 0.0036828301992076337, "grad_norm": 0.80859375, "learning_rate": 4.991762554446669e-05, "loss": 0.9253, "step": 84 }, { "epoch": 0.003726673415864867, "grad_norm": 0.8671875, "learning_rate": 4.991364083303813e-05, "loss": 0.9005, "step": 85 }, { "epoch": 0.003770516632522101, "grad_norm": 0.86328125, "learning_rate": 4.990965622776475e-05, "loss": 0.8404, "step": 86 }, { "epoch": 0.0038143598491793347, "grad_norm": 1.34375, "learning_rate": 4.9905671728654976e-05, "loss": 0.9964, "step": 87 }, { "epoch": 0.0038582030658365686, "grad_norm": 0.78515625, "learning_rate": 4.990168733571724e-05, "loss": 0.8115, "step": 88 }, { "epoch": 0.003902046282493802, "grad_norm": 0.82421875, "learning_rate": 4.9897703048960046e-05, "loss": 0.776, "step": 89 }, { "epoch": 0.003945889499151036, "grad_norm": 0.76953125, "learning_rate": 4.98937188683918e-05, "loss": 0.7806, "step": 90 }, { "epoch": 0.00398973271580827, "grad_norm": 0.78125, "learning_rate": 4.988973479402097e-05, "loss": 0.7176, "step": 91 }, { "epoch": 0.004033575932465503, "grad_norm": 1.0625, "learning_rate": 4.988575082585597e-05, "loss": 0.8331, "step": 92 }, { "epoch": 0.004077419149122737, "grad_norm": 1.0078125, "learning_rate": 4.988176696390523e-05, "loss": 0.9226, "step": 93 }, { "epoch": 0.004121262365779971, "grad_norm": 0.94140625, "learning_rate": 4.987778320817725e-05, "loss": 0.9496, "step": 94 }, { "epoch": 0.004165105582437205, "grad_norm": 0.78515625, "learning_rate": 4.987379955868044e-05, "loss": 0.8732, "step": 95 }, { "epoch": 0.004208948799094438, "grad_norm": 0.82421875, "learning_rate": 4.9869816015423256e-05, "loss": 0.9693, "step": 96 }, { "epoch": 0.004252792015751672, "grad_norm": 0.796875, "learning_rate": 4.986583257841413e-05, "loss": 0.7694, "step": 97 }, { "epoch": 0.0042966352324089055, "grad_norm": 0.84765625, "learning_rate": 4.986184924766146e-05, "loss": 0.8459, "step": 98 }, { "epoch": 0.004340478449066139, "grad_norm": 0.82421875, "learning_rate": 4.985786602317378e-05, "loss": 0.8346, "step": 99 }, { "epoch": 0.004384321665723373, "grad_norm": 0.96875, "learning_rate": 4.9853882904959496e-05, "loss": 0.9138, "step": 100 }, { "epoch": 0.004428164882380607, "grad_norm": 0.7890625, "learning_rate": 4.984989989302702e-05, "loss": 0.8055, "step": 101 }, { "epoch": 0.004472008099037841, "grad_norm": 0.8984375, "learning_rate": 4.984591698738483e-05, "loss": 0.846, "step": 102 }, { "epoch": 0.004515851315695075, "grad_norm": 0.9140625, "learning_rate": 4.984193418804131e-05, "loss": 0.9141, "step": 103 }, { "epoch": 0.0045596945323523085, "grad_norm": 0.75, "learning_rate": 4.9837951495004976e-05, "loss": 0.7612, "step": 104 }, { "epoch": 0.0046035377490095414, "grad_norm": 0.828125, "learning_rate": 4.983396890828425e-05, "loss": 0.8877, "step": 105 }, { "epoch": 0.004647380965666775, "grad_norm": 0.828125, "learning_rate": 4.982998642788754e-05, "loss": 0.8928, "step": 106 }, { "epoch": 0.004691224182324009, "grad_norm": 0.84375, "learning_rate": 4.9826004053823324e-05, "loss": 0.9693, "step": 107 }, { "epoch": 0.004735067398981243, "grad_norm": 0.859375, "learning_rate": 4.982202178609998e-05, "loss": 0.8087, "step": 108 }, { "epoch": 0.004778910615638477, "grad_norm": 0.77734375, "learning_rate": 4.981803962472603e-05, "loss": 0.882, "step": 109 }, { "epoch": 0.004822753832295711, "grad_norm": 0.75390625, "learning_rate": 4.9814057569709885e-05, "loss": 0.7753, "step": 110 }, { "epoch": 0.004866597048952944, "grad_norm": 0.85546875, "learning_rate": 4.981007562105996e-05, "loss": 0.9255, "step": 111 }, { "epoch": 0.004910440265610178, "grad_norm": 0.88671875, "learning_rate": 4.980609377878472e-05, "loss": 0.8581, "step": 112 }, { "epoch": 0.004954283482267412, "grad_norm": 0.87109375, "learning_rate": 4.98021120428926e-05, "loss": 0.9629, "step": 113 }, { "epoch": 0.004998126698924646, "grad_norm": 0.859375, "learning_rate": 4.979813041339203e-05, "loss": 0.9996, "step": 114 }, { "epoch": 0.005041969915581879, "grad_norm": 0.75390625, "learning_rate": 4.979414889029146e-05, "loss": 0.7842, "step": 115 }, { "epoch": 0.005085813132239113, "grad_norm": 0.85546875, "learning_rate": 4.9790167473599315e-05, "loss": 0.8796, "step": 116 }, { "epoch": 0.0051296563488963465, "grad_norm": 0.79296875, "learning_rate": 4.9786186163324035e-05, "loss": 0.8441, "step": 117 }, { "epoch": 0.00517349956555358, "grad_norm": 0.90625, "learning_rate": 4.9782204959474033e-05, "loss": 0.9248, "step": 118 }, { "epoch": 0.005217342782210814, "grad_norm": 0.83203125, "learning_rate": 4.977822386205781e-05, "loss": 0.9112, "step": 119 }, { "epoch": 0.005261185998868048, "grad_norm": 0.81640625, "learning_rate": 4.9774242871083774e-05, "loss": 0.7753, "step": 120 }, { "epoch": 0.005305029215525282, "grad_norm": 0.796875, "learning_rate": 4.977026198656036e-05, "loss": 0.9881, "step": 121 }, { "epoch": 0.005348872432182516, "grad_norm": 0.8046875, "learning_rate": 4.9766281208496e-05, "loss": 0.8306, "step": 122 }, { "epoch": 0.0053927156488397495, "grad_norm": 1.078125, "learning_rate": 4.97623005368991e-05, "loss": 0.8605, "step": 123 }, { "epoch": 0.005436558865496982, "grad_norm": 0.88671875, "learning_rate": 4.975831997177818e-05, "loss": 1.0703, "step": 124 }, { "epoch": 0.005480402082154216, "grad_norm": 0.8125, "learning_rate": 4.975433951314161e-05, "loss": 0.8439, "step": 125 }, { "epoch": 0.00552424529881145, "grad_norm": 0.8671875, "learning_rate": 4.975035916099786e-05, "loss": 0.8515, "step": 126 }, { "epoch": 0.005568088515468684, "grad_norm": 0.8515625, "learning_rate": 4.974637891535535e-05, "loss": 0.8655, "step": 127 }, { "epoch": 0.005611931732125918, "grad_norm": 0.80859375, "learning_rate": 4.974239877622248e-05, "loss": 0.9295, "step": 128 }, { "epoch": 0.005655774948783152, "grad_norm": 0.796875, "learning_rate": 4.9738418743607765e-05, "loss": 0.7708, "step": 129 }, { "epoch": 0.005699618165440385, "grad_norm": 0.78515625, "learning_rate": 4.973443881751959e-05, "loss": 0.7583, "step": 130 }, { "epoch": 0.005743461382097619, "grad_norm": 0.81640625, "learning_rate": 4.97304589979664e-05, "loss": 0.9152, "step": 131 }, { "epoch": 0.005787304598754853, "grad_norm": 0.94140625, "learning_rate": 4.972647928495663e-05, "loss": 1.0113, "step": 132 }, { "epoch": 0.005831147815412086, "grad_norm": 0.91796875, "learning_rate": 4.972249967849868e-05, "loss": 0.9016, "step": 133 }, { "epoch": 0.00587499103206932, "grad_norm": 0.921875, "learning_rate": 4.971852017860105e-05, "loss": 0.9161, "step": 134 }, { "epoch": 0.005918834248726554, "grad_norm": 0.8515625, "learning_rate": 4.9714540785272146e-05, "loss": 0.8866, "step": 135 }, { "epoch": 0.0059626774653837875, "grad_norm": 0.8359375, "learning_rate": 4.971056149852039e-05, "loss": 0.8917, "step": 136 }, { "epoch": 0.006006520682041021, "grad_norm": 0.85546875, "learning_rate": 4.970658231835424e-05, "loss": 0.8551, "step": 137 }, { "epoch": 0.006050363898698255, "grad_norm": 0.91796875, "learning_rate": 4.970260324478211e-05, "loss": 0.9945, "step": 138 }, { "epoch": 0.006094207115355489, "grad_norm": 0.85546875, "learning_rate": 4.969862427781239e-05, "loss": 0.8729, "step": 139 }, { "epoch": 0.006138050332012723, "grad_norm": 0.85546875, "learning_rate": 4.96946454174536e-05, "loss": 0.9312, "step": 140 }, { "epoch": 0.006181893548669957, "grad_norm": 0.84765625, "learning_rate": 4.969066666371415e-05, "loss": 0.8313, "step": 141 }, { "epoch": 0.00622573676532719, "grad_norm": 0.85546875, "learning_rate": 4.9686688016602436e-05, "loss": 0.9059, "step": 142 }, { "epoch": 0.006269579981984423, "grad_norm": 0.90234375, "learning_rate": 4.968270947612692e-05, "loss": 0.9432, "step": 143 }, { "epoch": 0.006313423198641657, "grad_norm": 0.96484375, "learning_rate": 4.967873104229599e-05, "loss": 0.915, "step": 144 }, { "epoch": 0.006357266415298891, "grad_norm": 0.85546875, "learning_rate": 4.9674752715118146e-05, "loss": 0.9155, "step": 145 }, { "epoch": 0.006401109631956125, "grad_norm": 0.79296875, "learning_rate": 4.967077449460179e-05, "loss": 0.7594, "step": 146 }, { "epoch": 0.006444952848613359, "grad_norm": 0.765625, "learning_rate": 4.9666796380755356e-05, "loss": 0.8237, "step": 147 }, { "epoch": 0.0064887960652705926, "grad_norm": 0.8671875, "learning_rate": 4.966281837358726e-05, "loss": 0.7984, "step": 148 }, { "epoch": 0.006532639281927826, "grad_norm": 0.94921875, "learning_rate": 4.9658840473105905e-05, "loss": 0.9267, "step": 149 }, { "epoch": 0.00657648249858506, "grad_norm": 0.79296875, "learning_rate": 4.9654862679319814e-05, "loss": 0.8037, "step": 150 }, { "epoch": 0.006620325715242293, "grad_norm": 0.75, "learning_rate": 4.9650884992237344e-05, "loss": 0.7768, "step": 151 }, { "epoch": 0.006664168931899527, "grad_norm": 0.83203125, "learning_rate": 4.964690741186696e-05, "loss": 0.9107, "step": 152 }, { "epoch": 0.006708012148556761, "grad_norm": 0.8125, "learning_rate": 4.964292993821708e-05, "loss": 0.7079, "step": 153 }, { "epoch": 0.006751855365213995, "grad_norm": 0.8359375, "learning_rate": 4.9638952571296115e-05, "loss": 0.8807, "step": 154 }, { "epoch": 0.0067956985818712285, "grad_norm": 0.93359375, "learning_rate": 4.9634975311112484e-05, "loss": 0.9414, "step": 155 }, { "epoch": 0.006839541798528462, "grad_norm": 0.8515625, "learning_rate": 4.963099815767468e-05, "loss": 0.7123, "step": 156 }, { "epoch": 0.006883385015185696, "grad_norm": 0.87890625, "learning_rate": 4.96270211109911e-05, "loss": 0.8115, "step": 157 }, { "epoch": 0.00692722823184293, "grad_norm": 0.859375, "learning_rate": 4.9623044171070165e-05, "loss": 0.9378, "step": 158 }, { "epoch": 0.006971071448500164, "grad_norm": 0.86328125, "learning_rate": 4.961906733792031e-05, "loss": 0.8421, "step": 159 }, { "epoch": 0.007014914665157397, "grad_norm": 0.796875, "learning_rate": 4.9615090611549966e-05, "loss": 0.8475, "step": 160 }, { "epoch": 0.007058757881814631, "grad_norm": 0.796875, "learning_rate": 4.961111399196755e-05, "loss": 0.8359, "step": 161 }, { "epoch": 0.007102601098471864, "grad_norm": 0.96484375, "learning_rate": 4.9607137479181496e-05, "loss": 0.7138, "step": 162 }, { "epoch": 0.007146444315129098, "grad_norm": 0.87890625, "learning_rate": 4.960316107320024e-05, "loss": 1.0373, "step": 163 }, { "epoch": 0.007190287531786332, "grad_norm": 0.9140625, "learning_rate": 4.959918477403217e-05, "loss": 0.9913, "step": 164 }, { "epoch": 0.007234130748443566, "grad_norm": 0.80859375, "learning_rate": 4.959520858168578e-05, "loss": 0.8247, "step": 165 }, { "epoch": 0.0072779739651008, "grad_norm": 0.8828125, "learning_rate": 4.959123249616946e-05, "loss": 0.9366, "step": 166 }, { "epoch": 0.0073218171817580335, "grad_norm": 0.78125, "learning_rate": 4.9587256517491654e-05, "loss": 0.82, "step": 167 }, { "epoch": 0.007365660398415267, "grad_norm": 0.80078125, "learning_rate": 4.9583280645660766e-05, "loss": 0.9535, "step": 168 }, { "epoch": 0.0074095036150725, "grad_norm": 0.83203125, "learning_rate": 4.957930488068519e-05, "loss": 0.8166, "step": 169 }, { "epoch": 0.007453346831729734, "grad_norm": 0.80078125, "learning_rate": 4.9575329222573444e-05, "loss": 0.8775, "step": 170 }, { "epoch": 0.007497190048386968, "grad_norm": 0.84375, "learning_rate": 4.957135367133391e-05, "loss": 0.8665, "step": 171 }, { "epoch": 0.007541033265044202, "grad_norm": 0.97265625, "learning_rate": 4.956737822697499e-05, "loss": 0.9813, "step": 172 }, { "epoch": 0.007584876481701436, "grad_norm": 0.87109375, "learning_rate": 4.956340288950515e-05, "loss": 0.8716, "step": 173 }, { "epoch": 0.0076287196983586695, "grad_norm": 0.85546875, "learning_rate": 4.9559427658932746e-05, "loss": 0.9009, "step": 174 }, { "epoch": 0.007672562915015903, "grad_norm": 0.76953125, "learning_rate": 4.955545253526629e-05, "loss": 0.8169, "step": 175 }, { "epoch": 0.007716406131673137, "grad_norm": 0.8203125, "learning_rate": 4.955147751851418e-05, "loss": 0.9127, "step": 176 }, { "epoch": 0.007760249348330371, "grad_norm": 0.8515625, "learning_rate": 4.954750260868481e-05, "loss": 0.7492, "step": 177 }, { "epoch": 0.007804092564987604, "grad_norm": 0.8046875, "learning_rate": 4.9543527805786635e-05, "loss": 0.7702, "step": 178 }, { "epoch": 0.007847935781644838, "grad_norm": 0.8203125, "learning_rate": 4.953955310982803e-05, "loss": 0.9048, "step": 179 }, { "epoch": 0.007891778998302072, "grad_norm": 0.859375, "learning_rate": 4.953557852081751e-05, "loss": 0.8198, "step": 180 }, { "epoch": 0.007935622214959305, "grad_norm": 0.8515625, "learning_rate": 4.953160403876342e-05, "loss": 0.8676, "step": 181 }, { "epoch": 0.00797946543161654, "grad_norm": 0.82421875, "learning_rate": 4.952762966367424e-05, "loss": 0.8211, "step": 182 }, { "epoch": 0.008023308648273773, "grad_norm": 0.90625, "learning_rate": 4.952365539555835e-05, "loss": 0.9046, "step": 183 }, { "epoch": 0.008067151864931007, "grad_norm": 0.87109375, "learning_rate": 4.951968123442414e-05, "loss": 0.9406, "step": 184 }, { "epoch": 0.00811099508158824, "grad_norm": 0.78125, "learning_rate": 4.9515707180280124e-05, "loss": 0.9604, "step": 185 }, { "epoch": 0.008154838298245475, "grad_norm": 0.8984375, "learning_rate": 4.9511733233134685e-05, "loss": 0.8354, "step": 186 }, { "epoch": 0.008198681514902708, "grad_norm": 0.80078125, "learning_rate": 4.9507759392996245e-05, "loss": 0.7426, "step": 187 }, { "epoch": 0.008242524731559942, "grad_norm": 0.85546875, "learning_rate": 4.9503785659873216e-05, "loss": 0.8947, "step": 188 }, { "epoch": 0.008286367948217176, "grad_norm": 0.84765625, "learning_rate": 4.9499812033773985e-05, "loss": 0.9796, "step": 189 }, { "epoch": 0.00833021116487441, "grad_norm": 0.86328125, "learning_rate": 4.9495838514707064e-05, "loss": 0.8942, "step": 190 }, { "epoch": 0.008374054381531644, "grad_norm": 0.8125, "learning_rate": 4.949186510268082e-05, "loss": 1.0091, "step": 191 }, { "epoch": 0.008417897598188876, "grad_norm": 0.90234375, "learning_rate": 4.9487891797703686e-05, "loss": 0.8141, "step": 192 }, { "epoch": 0.00846174081484611, "grad_norm": 0.90234375, "learning_rate": 4.9483918599784076e-05, "loss": 0.8139, "step": 193 }, { "epoch": 0.008505584031503343, "grad_norm": 0.828125, "learning_rate": 4.947994550893038e-05, "loss": 0.832, "step": 194 }, { "epoch": 0.008549427248160577, "grad_norm": 0.86328125, "learning_rate": 4.9475972525151085e-05, "loss": 0.8613, "step": 195 }, { "epoch": 0.008593270464817811, "grad_norm": 0.8984375, "learning_rate": 4.947199964845457e-05, "loss": 0.8824, "step": 196 }, { "epoch": 0.008637113681475045, "grad_norm": 0.828125, "learning_rate": 4.946802687884927e-05, "loss": 0.7715, "step": 197 }, { "epoch": 0.008680956898132279, "grad_norm": 0.98046875, "learning_rate": 4.9464054216343606e-05, "loss": 0.8956, "step": 198 }, { "epoch": 0.008724800114789513, "grad_norm": 0.78125, "learning_rate": 4.946008166094599e-05, "loss": 0.7596, "step": 199 }, { "epoch": 0.008768643331446746, "grad_norm": 0.86328125, "learning_rate": 4.9456109212664833e-05, "loss": 0.9681, "step": 200 }, { "epoch": 0.00881248654810398, "grad_norm": 0.83984375, "learning_rate": 4.945213687150857e-05, "loss": 0.8719, "step": 201 }, { "epoch": 0.008856329764761214, "grad_norm": 0.8359375, "learning_rate": 4.9448164637485616e-05, "loss": 0.7695, "step": 202 }, { "epoch": 0.008900172981418448, "grad_norm": 0.91015625, "learning_rate": 4.9444192510604384e-05, "loss": 0.9245, "step": 203 }, { "epoch": 0.008944016198075682, "grad_norm": 0.8359375, "learning_rate": 4.9440220490873255e-05, "loss": 0.8294, "step": 204 }, { "epoch": 0.008987859414732915, "grad_norm": 0.84765625, "learning_rate": 4.9436248578300724e-05, "loss": 0.7191, "step": 205 }, { "epoch": 0.00903170263139015, "grad_norm": 0.87890625, "learning_rate": 4.943227677289518e-05, "loss": 0.8751, "step": 206 }, { "epoch": 0.009075545848047383, "grad_norm": 0.78125, "learning_rate": 4.9428305074665036e-05, "loss": 0.7951, "step": 207 }, { "epoch": 0.009119389064704617, "grad_norm": 0.9140625, "learning_rate": 4.942433348361871e-05, "loss": 0.8357, "step": 208 }, { "epoch": 0.00916323228136185, "grad_norm": 1.1328125, "learning_rate": 4.942036199976459e-05, "loss": 1.0444, "step": 209 }, { "epoch": 0.009207075498019083, "grad_norm": 0.89453125, "learning_rate": 4.9416390623111144e-05, "loss": 0.9875, "step": 210 }, { "epoch": 0.009250918714676317, "grad_norm": 0.84375, "learning_rate": 4.941241935366676e-05, "loss": 0.7802, "step": 211 }, { "epoch": 0.00929476193133355, "grad_norm": 0.9375, "learning_rate": 4.940844819143988e-05, "loss": 1.0067, "step": 212 }, { "epoch": 0.009338605147990784, "grad_norm": 0.8984375, "learning_rate": 4.9404477136438896e-05, "loss": 0.8602, "step": 213 }, { "epoch": 0.009382448364648018, "grad_norm": 0.89453125, "learning_rate": 4.940050618867219e-05, "loss": 0.9289, "step": 214 }, { "epoch": 0.009426291581305252, "grad_norm": 0.81640625, "learning_rate": 4.939653534814826e-05, "loss": 0.8571, "step": 215 }, { "epoch": 0.009470134797962486, "grad_norm": 0.86328125, "learning_rate": 4.939256461487548e-05, "loss": 0.8362, "step": 216 }, { "epoch": 0.00951397801461972, "grad_norm": 0.90234375, "learning_rate": 4.938859398886226e-05, "loss": 0.794, "step": 217 }, { "epoch": 0.009557821231276954, "grad_norm": 0.88671875, "learning_rate": 4.938462347011703e-05, "loss": 0.843, "step": 218 }, { "epoch": 0.009601664447934187, "grad_norm": 0.83984375, "learning_rate": 4.9380653058648166e-05, "loss": 0.9118, "step": 219 }, { "epoch": 0.009645507664591421, "grad_norm": 0.89453125, "learning_rate": 4.937668275446414e-05, "loss": 0.8475, "step": 220 }, { "epoch": 0.009689350881248655, "grad_norm": 0.84375, "learning_rate": 4.937271255757335e-05, "loss": 0.823, "step": 221 }, { "epoch": 0.009733194097905889, "grad_norm": 0.84375, "learning_rate": 4.9368742467984194e-05, "loss": 0.8775, "step": 222 }, { "epoch": 0.009777037314563123, "grad_norm": 0.8828125, "learning_rate": 4.93647724857051e-05, "loss": 0.8699, "step": 223 }, { "epoch": 0.009820880531220356, "grad_norm": 0.76953125, "learning_rate": 4.936080261074444e-05, "loss": 0.9074, "step": 224 }, { "epoch": 0.00986472374787759, "grad_norm": 0.7734375, "learning_rate": 4.93568328431107e-05, "loss": 0.7563, "step": 225 }, { "epoch": 0.009908566964534824, "grad_norm": 0.93359375, "learning_rate": 4.935286318281226e-05, "loss": 0.7951, "step": 226 }, { "epoch": 0.009952410181192058, "grad_norm": 0.81640625, "learning_rate": 4.934889362985753e-05, "loss": 0.9238, "step": 227 }, { "epoch": 0.009996253397849292, "grad_norm": 0.81640625, "learning_rate": 4.9344924184254916e-05, "loss": 0.7984, "step": 228 }, { "epoch": 0.010040096614506524, "grad_norm": 0.7734375, "learning_rate": 4.934095484601281e-05, "loss": 0.8418, "step": 229 }, { "epoch": 0.010083939831163758, "grad_norm": 0.82421875, "learning_rate": 4.93369856151397e-05, "loss": 0.9034, "step": 230 }, { "epoch": 0.010127783047820992, "grad_norm": 0.76953125, "learning_rate": 4.9333016491643944e-05, "loss": 0.8955, "step": 231 }, { "epoch": 0.010171626264478225, "grad_norm": 0.828125, "learning_rate": 4.932904747553396e-05, "loss": 0.8477, "step": 232 }, { "epoch": 0.01021546948113546, "grad_norm": 0.82421875, "learning_rate": 4.932507856681817e-05, "loss": 0.8272, "step": 233 }, { "epoch": 0.010259312697792693, "grad_norm": 0.81640625, "learning_rate": 4.9321109765504944e-05, "loss": 0.9652, "step": 234 }, { "epoch": 0.010303155914449927, "grad_norm": 0.80859375, "learning_rate": 4.9317141071602766e-05, "loss": 0.8422, "step": 235 }, { "epoch": 0.01034699913110716, "grad_norm": 0.88671875, "learning_rate": 4.9313172485120005e-05, "loss": 0.7664, "step": 236 }, { "epoch": 0.010390842347764394, "grad_norm": 0.74609375, "learning_rate": 4.930920400606508e-05, "loss": 0.8047, "step": 237 }, { "epoch": 0.010434685564421628, "grad_norm": 0.84765625, "learning_rate": 4.930523563444641e-05, "loss": 0.8528, "step": 238 }, { "epoch": 0.010478528781078862, "grad_norm": 0.84765625, "learning_rate": 4.930126737027239e-05, "loss": 0.8917, "step": 239 }, { "epoch": 0.010522371997736096, "grad_norm": 0.83203125, "learning_rate": 4.929729921355143e-05, "loss": 0.9132, "step": 240 }, { "epoch": 0.01056621521439333, "grad_norm": 0.86328125, "learning_rate": 4.929333116429191e-05, "loss": 0.9906, "step": 241 }, { "epoch": 0.010610058431050564, "grad_norm": 0.765625, "learning_rate": 4.9289363222502316e-05, "loss": 0.8452, "step": 242 }, { "epoch": 0.010653901647707797, "grad_norm": 0.8203125, "learning_rate": 4.928539538819101e-05, "loss": 0.8529, "step": 243 }, { "epoch": 0.010697744864365031, "grad_norm": 0.80078125, "learning_rate": 4.928142766136642e-05, "loss": 0.9488, "step": 244 }, { "epoch": 0.010741588081022265, "grad_norm": 0.8046875, "learning_rate": 4.927746004203695e-05, "loss": 0.8252, "step": 245 }, { "epoch": 0.010785431297679499, "grad_norm": 0.8359375, "learning_rate": 4.927349253021099e-05, "loss": 0.9818, "step": 246 }, { "epoch": 0.010829274514336731, "grad_norm": 0.81640625, "learning_rate": 4.9269525125896975e-05, "loss": 0.8125, "step": 247 }, { "epoch": 0.010873117730993965, "grad_norm": 1.15625, "learning_rate": 4.926555782910329e-05, "loss": 0.7566, "step": 248 }, { "epoch": 0.010916960947651199, "grad_norm": 0.80078125, "learning_rate": 4.926159063983833e-05, "loss": 0.8182, "step": 249 }, { "epoch": 0.010960804164308433, "grad_norm": 0.8515625, "learning_rate": 4.9257623558110555e-05, "loss": 0.8388, "step": 250 }, { "epoch": 0.011004647380965666, "grad_norm": 0.8671875, "learning_rate": 4.925365658392835e-05, "loss": 0.8688, "step": 251 }, { "epoch": 0.0110484905976229, "grad_norm": 0.84765625, "learning_rate": 4.924968971730013e-05, "loss": 0.8038, "step": 252 }, { "epoch": 0.011092333814280134, "grad_norm": 0.8828125, "learning_rate": 4.924572295823429e-05, "loss": 0.8992, "step": 253 }, { "epoch": 0.011136177030937368, "grad_norm": 0.83203125, "learning_rate": 4.9241756306739185e-05, "loss": 0.9462, "step": 254 }, { "epoch": 0.011180020247594602, "grad_norm": 0.76953125, "learning_rate": 4.9237789762823325e-05, "loss": 0.8978, "step": 255 }, { "epoch": 0.011223863464251835, "grad_norm": 0.91796875, "learning_rate": 4.923382332649506e-05, "loss": 0.8479, "step": 256 }, { "epoch": 0.01126770668090907, "grad_norm": 1.1015625, "learning_rate": 4.922985699776283e-05, "loss": 0.9143, "step": 257 }, { "epoch": 0.011311549897566303, "grad_norm": 0.79296875, "learning_rate": 4.922589077663499e-05, "loss": 0.7431, "step": 258 }, { "epoch": 0.011355393114223537, "grad_norm": 0.80859375, "learning_rate": 4.9221924663119946e-05, "loss": 0.8086, "step": 259 }, { "epoch": 0.01139923633088077, "grad_norm": 0.8125, "learning_rate": 4.921795865722616e-05, "loss": 0.8557, "step": 260 }, { "epoch": 0.011443079547538005, "grad_norm": 0.80078125, "learning_rate": 4.921399275896201e-05, "loss": 0.7435, "step": 261 }, { "epoch": 0.011486922764195238, "grad_norm": 0.796875, "learning_rate": 4.921002696833591e-05, "loss": 0.7888, "step": 262 }, { "epoch": 0.011530765980852472, "grad_norm": 0.84375, "learning_rate": 4.920606128535624e-05, "loss": 0.7889, "step": 263 }, { "epoch": 0.011574609197509706, "grad_norm": 1.21875, "learning_rate": 4.920209571003139e-05, "loss": 0.9391, "step": 264 }, { "epoch": 0.011618452414166938, "grad_norm": 0.95703125, "learning_rate": 4.919813024236983e-05, "loss": 0.9565, "step": 265 }, { "epoch": 0.011662295630824172, "grad_norm": 0.7734375, "learning_rate": 4.9194164882379936e-05, "loss": 0.7463, "step": 266 }, { "epoch": 0.011706138847481406, "grad_norm": 0.921875, "learning_rate": 4.91901996300701e-05, "loss": 0.9365, "step": 267 }, { "epoch": 0.01174998206413864, "grad_norm": 1.1328125, "learning_rate": 4.918623448544874e-05, "loss": 0.7788, "step": 268 }, { "epoch": 0.011793825280795874, "grad_norm": 0.90625, "learning_rate": 4.91822694485242e-05, "loss": 0.8689, "step": 269 }, { "epoch": 0.011837668497453107, "grad_norm": 0.87109375, "learning_rate": 4.9178304519304984e-05, "loss": 0.8732, "step": 270 }, { "epoch": 0.011881511714110341, "grad_norm": 0.82421875, "learning_rate": 4.9174339697799445e-05, "loss": 0.8271, "step": 271 }, { "epoch": 0.011925354930767575, "grad_norm": 0.7734375, "learning_rate": 4.917037498401598e-05, "loss": 0.7494, "step": 272 }, { "epoch": 0.011969198147424809, "grad_norm": 0.80078125, "learning_rate": 4.9166410377963e-05, "loss": 0.7875, "step": 273 }, { "epoch": 0.012013041364082043, "grad_norm": 0.80078125, "learning_rate": 4.916244587964888e-05, "loss": 0.8375, "step": 274 }, { "epoch": 0.012056884580739276, "grad_norm": 0.828125, "learning_rate": 4.9158481489082084e-05, "loss": 0.8705, "step": 275 }, { "epoch": 0.01210072779739651, "grad_norm": 0.83984375, "learning_rate": 4.915451720627098e-05, "loss": 1.045, "step": 276 }, { "epoch": 0.012144571014053744, "grad_norm": 0.83203125, "learning_rate": 4.915055303122397e-05, "loss": 0.864, "step": 277 }, { "epoch": 0.012188414230710978, "grad_norm": 0.87109375, "learning_rate": 4.9146588963949456e-05, "loss": 0.8254, "step": 278 }, { "epoch": 0.012232257447368212, "grad_norm": 0.87109375, "learning_rate": 4.914262500445584e-05, "loss": 0.8439, "step": 279 }, { "epoch": 0.012276100664025446, "grad_norm": 0.79296875, "learning_rate": 4.913866115275149e-05, "loss": 0.8367, "step": 280 }, { "epoch": 0.01231994388068268, "grad_norm": 0.80078125, "learning_rate": 4.9134697408844874e-05, "loss": 0.8986, "step": 281 }, { "epoch": 0.012363787097339913, "grad_norm": 0.71484375, "learning_rate": 4.913073377274437e-05, "loss": 0.8905, "step": 282 }, { "epoch": 0.012407630313997145, "grad_norm": 0.84765625, "learning_rate": 4.912677024445834e-05, "loss": 0.8692, "step": 283 }, { "epoch": 0.01245147353065438, "grad_norm": 0.92578125, "learning_rate": 4.9122806823995236e-05, "loss": 0.8898, "step": 284 }, { "epoch": 0.012495316747311613, "grad_norm": 0.921875, "learning_rate": 4.911884351136343e-05, "loss": 0.8803, "step": 285 }, { "epoch": 0.012539159963968847, "grad_norm": 0.90625, "learning_rate": 4.911488030657133e-05, "loss": 0.8027, "step": 286 }, { "epoch": 0.01258300318062608, "grad_norm": 0.76171875, "learning_rate": 4.911091720962733e-05, "loss": 0.9842, "step": 287 }, { "epoch": 0.012626846397283314, "grad_norm": 0.84375, "learning_rate": 4.910695422053984e-05, "loss": 0.8083, "step": 288 }, { "epoch": 0.012670689613940548, "grad_norm": 0.84765625, "learning_rate": 4.910299133931719e-05, "loss": 0.8819, "step": 289 }, { "epoch": 0.012714532830597782, "grad_norm": 0.8671875, "learning_rate": 4.9099028565967895e-05, "loss": 0.8287, "step": 290 }, { "epoch": 0.012758376047255016, "grad_norm": 0.83203125, "learning_rate": 4.9095065900500304e-05, "loss": 0.85, "step": 291 }, { "epoch": 0.01280221926391225, "grad_norm": 0.7890625, "learning_rate": 4.90911033429228e-05, "loss": 0.796, "step": 292 }, { "epoch": 0.012846062480569484, "grad_norm": 0.8828125, "learning_rate": 4.908714089324381e-05, "loss": 0.9037, "step": 293 }, { "epoch": 0.012889905697226717, "grad_norm": 0.80859375, "learning_rate": 4.90831785514717e-05, "loss": 0.7762, "step": 294 }, { "epoch": 0.012933748913883951, "grad_norm": 0.93359375, "learning_rate": 4.907921631761485e-05, "loss": 0.8844, "step": 295 }, { "epoch": 0.012977592130541185, "grad_norm": 0.9296875, "learning_rate": 4.907525419168173e-05, "loss": 0.7813, "step": 296 }, { "epoch": 0.013021435347198419, "grad_norm": 0.84375, "learning_rate": 4.9071292173680694e-05, "loss": 0.9135, "step": 297 }, { "epoch": 0.013065278563855653, "grad_norm": 0.78125, "learning_rate": 4.9067330263620145e-05, "loss": 0.9442, "step": 298 }, { "epoch": 0.013109121780512887, "grad_norm": 0.88671875, "learning_rate": 4.906336846150848e-05, "loss": 0.8806, "step": 299 }, { "epoch": 0.01315296499717012, "grad_norm": 0.84375, "learning_rate": 4.905940676735405e-05, "loss": 0.8596, "step": 300 }, { "epoch": 0.013196808213827354, "grad_norm": 0.75, "learning_rate": 4.905544518116534e-05, "loss": 0.8673, "step": 301 }, { "epoch": 0.013240651430484586, "grad_norm": 0.734375, "learning_rate": 4.9051483702950694e-05, "loss": 0.8887, "step": 302 }, { "epoch": 0.01328449464714182, "grad_norm": 0.8125, "learning_rate": 4.9047522332718534e-05, "loss": 0.8749, "step": 303 }, { "epoch": 0.013328337863799054, "grad_norm": 0.84765625, "learning_rate": 4.904356107047722e-05, "loss": 0.951, "step": 304 }, { "epoch": 0.013372181080456288, "grad_norm": 0.80078125, "learning_rate": 4.9039599916235134e-05, "loss": 0.8814, "step": 305 }, { "epoch": 0.013416024297113522, "grad_norm": 1.0, "learning_rate": 4.9035638870000746e-05, "loss": 0.9642, "step": 306 }, { "epoch": 0.013459867513770755, "grad_norm": 0.8359375, "learning_rate": 4.903167793178239e-05, "loss": 0.8942, "step": 307 }, { "epoch": 0.01350371073042799, "grad_norm": 0.7265625, "learning_rate": 4.90277171015885e-05, "loss": 0.8147, "step": 308 }, { "epoch": 0.013547553947085223, "grad_norm": 0.80078125, "learning_rate": 4.902375637942744e-05, "loss": 0.7401, "step": 309 }, { "epoch": 0.013591397163742457, "grad_norm": 0.79296875, "learning_rate": 4.9019795765307584e-05, "loss": 0.8283, "step": 310 }, { "epoch": 0.01363524038039969, "grad_norm": 1.1171875, "learning_rate": 4.901583525923738e-05, "loss": 0.8187, "step": 311 }, { "epoch": 0.013679083597056925, "grad_norm": 0.94921875, "learning_rate": 4.901187486122523e-05, "loss": 1.0098, "step": 312 }, { "epoch": 0.013722926813714158, "grad_norm": 0.80859375, "learning_rate": 4.900791457127947e-05, "loss": 0.761, "step": 313 }, { "epoch": 0.013766770030371392, "grad_norm": 0.87109375, "learning_rate": 4.9003954389408525e-05, "loss": 0.9011, "step": 314 }, { "epoch": 0.013810613247028626, "grad_norm": 0.8359375, "learning_rate": 4.899999431562075e-05, "loss": 0.9243, "step": 315 }, { "epoch": 0.01385445646368586, "grad_norm": 0.77734375, "learning_rate": 4.89960343499246e-05, "loss": 0.7063, "step": 316 }, { "epoch": 0.013898299680343094, "grad_norm": 0.95703125, "learning_rate": 4.899207449232845e-05, "loss": 0.7827, "step": 317 }, { "epoch": 0.013942142897000328, "grad_norm": 1.125, "learning_rate": 4.898811474284068e-05, "loss": 0.9359, "step": 318 }, { "epoch": 0.013985986113657561, "grad_norm": 0.76171875, "learning_rate": 4.8984155101469697e-05, "loss": 0.9044, "step": 319 }, { "epoch": 0.014029829330314793, "grad_norm": 0.82421875, "learning_rate": 4.898019556822383e-05, "loss": 0.9531, "step": 320 }, { "epoch": 0.014073672546972027, "grad_norm": 0.78515625, "learning_rate": 4.897623614311156e-05, "loss": 0.8352, "step": 321 }, { "epoch": 0.014117515763629261, "grad_norm": 0.8671875, "learning_rate": 4.897227682614124e-05, "loss": 0.8428, "step": 322 }, { "epoch": 0.014161358980286495, "grad_norm": 0.84375, "learning_rate": 4.896831761732127e-05, "loss": 0.7798, "step": 323 }, { "epoch": 0.014205202196943729, "grad_norm": 0.8359375, "learning_rate": 4.896435851666004e-05, "loss": 0.9632, "step": 324 }, { "epoch": 0.014249045413600963, "grad_norm": 0.7578125, "learning_rate": 4.896039952416591e-05, "loss": 0.809, "step": 325 }, { "epoch": 0.014292888630258196, "grad_norm": 0.87109375, "learning_rate": 4.895644063984728e-05, "loss": 0.9257, "step": 326 }, { "epoch": 0.01433673184691543, "grad_norm": 0.79296875, "learning_rate": 4.895248186371258e-05, "loss": 0.7274, "step": 327 }, { "epoch": 0.014380575063572664, "grad_norm": 0.796875, "learning_rate": 4.894852319577019e-05, "loss": 0.7734, "step": 328 }, { "epoch": 0.014424418280229898, "grad_norm": 0.8671875, "learning_rate": 4.894456463602849e-05, "loss": 0.8839, "step": 329 }, { "epoch": 0.014468261496887132, "grad_norm": 0.83984375, "learning_rate": 4.894060618449585e-05, "loss": 0.7676, "step": 330 }, { "epoch": 0.014512104713544366, "grad_norm": 0.90234375, "learning_rate": 4.893664784118069e-05, "loss": 0.778, "step": 331 }, { "epoch": 0.0145559479302016, "grad_norm": 0.8515625, "learning_rate": 4.8932689606091386e-05, "loss": 1.0188, "step": 332 }, { "epoch": 0.014599791146858833, "grad_norm": 0.9375, "learning_rate": 4.892873147923632e-05, "loss": 0.8581, "step": 333 }, { "epoch": 0.014643634363516067, "grad_norm": 0.84765625, "learning_rate": 4.89247734606239e-05, "loss": 0.9083, "step": 334 }, { "epoch": 0.014687477580173301, "grad_norm": 0.90625, "learning_rate": 4.892081555026245e-05, "loss": 0.9071, "step": 335 }, { "epoch": 0.014731320796830535, "grad_norm": 1.0546875, "learning_rate": 4.891685774816046e-05, "loss": 0.7674, "step": 336 }, { "epoch": 0.014775164013487769, "grad_norm": 0.921875, "learning_rate": 4.891290005432626e-05, "loss": 1.0123, "step": 337 }, { "epoch": 0.014819007230145, "grad_norm": 0.82421875, "learning_rate": 4.890894246876826e-05, "loss": 0.843, "step": 338 }, { "epoch": 0.014862850446802234, "grad_norm": 0.93359375, "learning_rate": 4.8904984991494826e-05, "loss": 0.8486, "step": 339 }, { "epoch": 0.014906693663459468, "grad_norm": 0.8671875, "learning_rate": 4.890102762251433e-05, "loss": 0.8797, "step": 340 }, { "epoch": 0.014950536880116702, "grad_norm": 0.93359375, "learning_rate": 4.889707036183522e-05, "loss": 0.8694, "step": 341 }, { "epoch": 0.014994380096773936, "grad_norm": 0.94921875, "learning_rate": 4.8893113209465844e-05, "loss": 0.7968, "step": 342 }, { "epoch": 0.01503822331343117, "grad_norm": 0.97265625, "learning_rate": 4.88891561654146e-05, "loss": 0.8781, "step": 343 }, { "epoch": 0.015082066530088404, "grad_norm": 0.859375, "learning_rate": 4.888519922968986e-05, "loss": 0.8264, "step": 344 }, { "epoch": 0.015125909746745637, "grad_norm": 0.84375, "learning_rate": 4.888124240229999e-05, "loss": 0.8713, "step": 345 }, { "epoch": 0.015169752963402871, "grad_norm": 0.89453125, "learning_rate": 4.887728568325343e-05, "loss": 0.9656, "step": 346 }, { "epoch": 0.015213596180060105, "grad_norm": 0.86328125, "learning_rate": 4.887332907255855e-05, "loss": 0.8017, "step": 347 }, { "epoch": 0.015257439396717339, "grad_norm": 0.87109375, "learning_rate": 4.8869372570223725e-05, "loss": 0.8521, "step": 348 }, { "epoch": 0.015301282613374573, "grad_norm": 0.79296875, "learning_rate": 4.8865416176257336e-05, "loss": 0.7503, "step": 349 }, { "epoch": 0.015345125830031807, "grad_norm": 1.0234375, "learning_rate": 4.886145989066775e-05, "loss": 0.8797, "step": 350 }, { "epoch": 0.01538896904668904, "grad_norm": 0.80859375, "learning_rate": 4.8857503713463406e-05, "loss": 0.852, "step": 351 }, { "epoch": 0.015432812263346274, "grad_norm": 0.8671875, "learning_rate": 4.8853547644652664e-05, "loss": 0.7603, "step": 352 }, { "epoch": 0.015476655480003508, "grad_norm": 0.8515625, "learning_rate": 4.8849591684243904e-05, "loss": 0.8404, "step": 353 }, { "epoch": 0.015520498696660742, "grad_norm": 0.859375, "learning_rate": 4.884563583224551e-05, "loss": 0.7463, "step": 354 }, { "epoch": 0.015564341913317976, "grad_norm": 0.765625, "learning_rate": 4.884168008866582e-05, "loss": 0.7974, "step": 355 }, { "epoch": 0.015608185129975208, "grad_norm": 0.90234375, "learning_rate": 4.883772445351331e-05, "loss": 0.8703, "step": 356 }, { "epoch": 0.015652028346632443, "grad_norm": 0.85546875, "learning_rate": 4.883376892679632e-05, "loss": 0.8195, "step": 357 }, { "epoch": 0.015695871563289675, "grad_norm": 0.9140625, "learning_rate": 4.882981350852322e-05, "loss": 1.0289, "step": 358 }, { "epoch": 0.01573971477994691, "grad_norm": 0.8203125, "learning_rate": 4.8825858198702425e-05, "loss": 0.8706, "step": 359 }, { "epoch": 0.015783557996604143, "grad_norm": 0.796875, "learning_rate": 4.882190299734225e-05, "loss": 0.7424, "step": 360 }, { "epoch": 0.01582740121326138, "grad_norm": 0.87109375, "learning_rate": 4.881794790445118e-05, "loss": 0.8289, "step": 361 }, { "epoch": 0.01587124442991861, "grad_norm": 0.859375, "learning_rate": 4.881399292003752e-05, "loss": 0.8104, "step": 362 }, { "epoch": 0.015915087646575846, "grad_norm": 0.875, "learning_rate": 4.8810038044109694e-05, "loss": 0.941, "step": 363 }, { "epoch": 0.01595893086323308, "grad_norm": 0.80859375, "learning_rate": 4.880608327667605e-05, "loss": 0.8197, "step": 364 }, { "epoch": 0.016002774079890314, "grad_norm": 0.85546875, "learning_rate": 4.880212861774497e-05, "loss": 0.8605, "step": 365 }, { "epoch": 0.016046617296547546, "grad_norm": 0.76171875, "learning_rate": 4.8798174067324874e-05, "loss": 0.7394, "step": 366 }, { "epoch": 0.016090460513204778, "grad_norm": 0.89453125, "learning_rate": 4.879421962542412e-05, "loss": 0.7741, "step": 367 }, { "epoch": 0.016134303729862014, "grad_norm": 1.421875, "learning_rate": 4.8790265292051096e-05, "loss": 0.8532, "step": 368 }, { "epoch": 0.016178146946519246, "grad_norm": 0.7421875, "learning_rate": 4.8786311067214186e-05, "loss": 0.7703, "step": 369 }, { "epoch": 0.01622199016317648, "grad_norm": 0.95703125, "learning_rate": 4.878235695092175e-05, "loss": 0.8496, "step": 370 }, { "epoch": 0.016265833379833713, "grad_norm": 0.90234375, "learning_rate": 4.877840294318219e-05, "loss": 0.8791, "step": 371 }, { "epoch": 0.01630967659649095, "grad_norm": 0.8984375, "learning_rate": 4.877444904400387e-05, "loss": 1.009, "step": 372 }, { "epoch": 0.01635351981314818, "grad_norm": 0.8359375, "learning_rate": 4.8770495253395174e-05, "loss": 0.9077, "step": 373 }, { "epoch": 0.016397363029805417, "grad_norm": 0.77734375, "learning_rate": 4.8766541571364496e-05, "loss": 0.8158, "step": 374 }, { "epoch": 0.01644120624646265, "grad_norm": 0.8046875, "learning_rate": 4.876258799792016e-05, "loss": 0.814, "step": 375 }, { "epoch": 0.016485049463119884, "grad_norm": 0.90234375, "learning_rate": 4.8758634533070635e-05, "loss": 0.8713, "step": 376 }, { "epoch": 0.016528892679777116, "grad_norm": 0.91015625, "learning_rate": 4.8754681176824244e-05, "loss": 0.8241, "step": 377 }, { "epoch": 0.016572735896434352, "grad_norm": 0.859375, "learning_rate": 4.875072792918939e-05, "loss": 0.8058, "step": 378 }, { "epoch": 0.016616579113091584, "grad_norm": 1.1640625, "learning_rate": 4.874677479017443e-05, "loss": 0.7905, "step": 379 }, { "epoch": 0.01666042232974882, "grad_norm": 0.84765625, "learning_rate": 4.8742821759787714e-05, "loss": 0.9253, "step": 380 }, { "epoch": 0.016704265546406052, "grad_norm": 0.83203125, "learning_rate": 4.87388688380377e-05, "loss": 0.8338, "step": 381 }, { "epoch": 0.016748108763063287, "grad_norm": 0.79296875, "learning_rate": 4.873491602493272e-05, "loss": 0.77, "step": 382 }, { "epoch": 0.01679195197972052, "grad_norm": 0.80078125, "learning_rate": 4.873096332048116e-05, "loss": 0.7683, "step": 383 }, { "epoch": 0.01683579519637775, "grad_norm": 0.828125, "learning_rate": 4.872701072469139e-05, "loss": 0.7457, "step": 384 }, { "epoch": 0.016879638413034987, "grad_norm": 0.8515625, "learning_rate": 4.872305823757175e-05, "loss": 0.9194, "step": 385 }, { "epoch": 0.01692348162969222, "grad_norm": 0.77734375, "learning_rate": 4.87191058591307e-05, "loss": 0.8988, "step": 386 }, { "epoch": 0.016967324846349455, "grad_norm": 0.890625, "learning_rate": 4.871515358937657e-05, "loss": 0.8844, "step": 387 }, { "epoch": 0.017011168063006687, "grad_norm": 0.84375, "learning_rate": 4.8711201428317746e-05, "loss": 0.8642, "step": 388 }, { "epoch": 0.017055011279663922, "grad_norm": 1.0390625, "learning_rate": 4.8707249375962595e-05, "loss": 1.0417, "step": 389 }, { "epoch": 0.017098854496321154, "grad_norm": 0.91015625, "learning_rate": 4.8703297432319453e-05, "loss": 0.9457, "step": 390 }, { "epoch": 0.01714269771297839, "grad_norm": 0.8828125, "learning_rate": 4.8699345597396786e-05, "loss": 0.8286, "step": 391 }, { "epoch": 0.017186540929635622, "grad_norm": 0.8125, "learning_rate": 4.869539387120292e-05, "loss": 0.805, "step": 392 }, { "epoch": 0.017230384146292858, "grad_norm": 0.828125, "learning_rate": 4.869144225374623e-05, "loss": 0.9601, "step": 393 }, { "epoch": 0.01727422736295009, "grad_norm": 0.85546875, "learning_rate": 4.86874907450351e-05, "loss": 0.8979, "step": 394 }, { "epoch": 0.017318070579607325, "grad_norm": 0.91015625, "learning_rate": 4.8683539345077864e-05, "loss": 0.776, "step": 395 }, { "epoch": 0.017361913796264557, "grad_norm": 0.93359375, "learning_rate": 4.867958805388297e-05, "loss": 0.8924, "step": 396 }, { "epoch": 0.017405757012921793, "grad_norm": 0.84375, "learning_rate": 4.8675636871458766e-05, "loss": 0.8558, "step": 397 }, { "epoch": 0.017449600229579025, "grad_norm": 0.890625, "learning_rate": 4.867168579781361e-05, "loss": 0.9742, "step": 398 }, { "epoch": 0.01749344344623626, "grad_norm": 0.8125, "learning_rate": 4.866773483295588e-05, "loss": 0.781, "step": 399 }, { "epoch": 0.017537286662893493, "grad_norm": 0.83203125, "learning_rate": 4.8663783976893906e-05, "loss": 0.8143, "step": 400 }, { "epoch": 0.01758112987955073, "grad_norm": 0.95703125, "learning_rate": 4.8659833229636156e-05, "loss": 0.8317, "step": 401 }, { "epoch": 0.01762497309620796, "grad_norm": 0.8984375, "learning_rate": 4.865588259119095e-05, "loss": 0.8986, "step": 402 }, { "epoch": 0.017668816312865192, "grad_norm": 0.859375, "learning_rate": 4.865193206156667e-05, "loss": 0.8488, "step": 403 }, { "epoch": 0.017712659529522428, "grad_norm": 1.3359375, "learning_rate": 4.864798164077168e-05, "loss": 0.9383, "step": 404 }, { "epoch": 0.01775650274617966, "grad_norm": 0.83203125, "learning_rate": 4.864403132881432e-05, "loss": 0.7669, "step": 405 }, { "epoch": 0.017800345962836896, "grad_norm": 0.82421875, "learning_rate": 4.8640081125703054e-05, "loss": 0.9284, "step": 406 }, { "epoch": 0.017844189179494128, "grad_norm": 0.8671875, "learning_rate": 4.8636131031446184e-05, "loss": 0.9174, "step": 407 }, { "epoch": 0.017888032396151363, "grad_norm": 0.765625, "learning_rate": 4.86321810460521e-05, "loss": 0.834, "step": 408 }, { "epoch": 0.017931875612808595, "grad_norm": 0.87109375, "learning_rate": 4.862823116952917e-05, "loss": 1.0201, "step": 409 }, { "epoch": 0.01797571882946583, "grad_norm": 0.83203125, "learning_rate": 4.8624281401885776e-05, "loss": 0.8336, "step": 410 }, { "epoch": 0.018019562046123063, "grad_norm": 0.8828125, "learning_rate": 4.862033174313028e-05, "loss": 0.8119, "step": 411 }, { "epoch": 0.0180634052627803, "grad_norm": 0.89453125, "learning_rate": 4.861638219327101e-05, "loss": 0.8798, "step": 412 }, { "epoch": 0.01810724847943753, "grad_norm": 0.78125, "learning_rate": 4.861243275231642e-05, "loss": 0.8028, "step": 413 }, { "epoch": 0.018151091696094766, "grad_norm": 0.8828125, "learning_rate": 4.860848342027484e-05, "loss": 0.9029, "step": 414 }, { "epoch": 0.018194934912752, "grad_norm": 0.77734375, "learning_rate": 4.860453419715465e-05, "loss": 0.7523, "step": 415 }, { "epoch": 0.018238778129409234, "grad_norm": 0.8359375, "learning_rate": 4.860058508296421e-05, "loss": 0.8171, "step": 416 }, { "epoch": 0.018282621346066466, "grad_norm": 0.796875, "learning_rate": 4.8596636077711885e-05, "loss": 0.7536, "step": 417 }, { "epoch": 0.0183264645627237, "grad_norm": 0.8359375, "learning_rate": 4.859268718140606e-05, "loss": 0.8907, "step": 418 }, { "epoch": 0.018370307779380934, "grad_norm": 0.94140625, "learning_rate": 4.85887383940551e-05, "loss": 0.9629, "step": 419 }, { "epoch": 0.018414150996038166, "grad_norm": 0.9375, "learning_rate": 4.858478971566736e-05, "loss": 0.8217, "step": 420 }, { "epoch": 0.0184579942126954, "grad_norm": 0.85546875, "learning_rate": 4.858084114625119e-05, "loss": 0.9297, "step": 421 }, { "epoch": 0.018501837429352633, "grad_norm": 1.296875, "learning_rate": 4.857689268581501e-05, "loss": 0.886, "step": 422 }, { "epoch": 0.01854568064600987, "grad_norm": 0.77734375, "learning_rate": 4.857294433436719e-05, "loss": 0.6712, "step": 423 }, { "epoch": 0.0185895238626671, "grad_norm": 0.7734375, "learning_rate": 4.8568996091916056e-05, "loss": 0.7573, "step": 424 }, { "epoch": 0.018633367079324337, "grad_norm": 0.75390625, "learning_rate": 4.856504795847e-05, "loss": 0.8098, "step": 425 }, { "epoch": 0.01867721029598157, "grad_norm": 0.80859375, "learning_rate": 4.856109993403735e-05, "loss": 0.836, "step": 426 }, { "epoch": 0.018721053512638804, "grad_norm": 0.76953125, "learning_rate": 4.855715201862655e-05, "loss": 0.8002, "step": 427 }, { "epoch": 0.018764896729296036, "grad_norm": 0.8515625, "learning_rate": 4.8553204212245917e-05, "loss": 0.8441, "step": 428 }, { "epoch": 0.018808739945953272, "grad_norm": 0.8125, "learning_rate": 4.8549256514903826e-05, "loss": 0.82, "step": 429 }, { "epoch": 0.018852583162610504, "grad_norm": 1.0390625, "learning_rate": 4.8545308926608656e-05, "loss": 0.8263, "step": 430 }, { "epoch": 0.01889642637926774, "grad_norm": 0.828125, "learning_rate": 4.8541361447368714e-05, "loss": 0.9201, "step": 431 }, { "epoch": 0.01894026959592497, "grad_norm": 0.71484375, "learning_rate": 4.853741407719247e-05, "loss": 0.8131, "step": 432 }, { "epoch": 0.018984112812582207, "grad_norm": 0.984375, "learning_rate": 4.8533466816088224e-05, "loss": 0.8001, "step": 433 }, { "epoch": 0.01902795602923944, "grad_norm": 0.8828125, "learning_rate": 4.8529519664064346e-05, "loss": 0.9737, "step": 434 }, { "epoch": 0.019071799245896675, "grad_norm": 0.7890625, "learning_rate": 4.8525572621129225e-05, "loss": 0.7274, "step": 435 }, { "epoch": 0.019115642462553907, "grad_norm": 1.0078125, "learning_rate": 4.852162568729116e-05, "loss": 0.9375, "step": 436 }, { "epoch": 0.019159485679211143, "grad_norm": 1.09375, "learning_rate": 4.851767886255861e-05, "loss": 0.8482, "step": 437 }, { "epoch": 0.019203328895868375, "grad_norm": 0.9140625, "learning_rate": 4.851373214693989e-05, "loss": 0.8939, "step": 438 }, { "epoch": 0.019247172112525607, "grad_norm": 1.09375, "learning_rate": 4.850978554044339e-05, "loss": 0.8342, "step": 439 }, { "epoch": 0.019291015329182842, "grad_norm": 1.15625, "learning_rate": 4.850583904307744e-05, "loss": 0.9333, "step": 440 }, { "epoch": 0.019334858545840074, "grad_norm": 0.90234375, "learning_rate": 4.850189265485039e-05, "loss": 0.7383, "step": 441 }, { "epoch": 0.01937870176249731, "grad_norm": 0.85546875, "learning_rate": 4.8497946375770664e-05, "loss": 0.8116, "step": 442 }, { "epoch": 0.019422544979154542, "grad_norm": 0.9140625, "learning_rate": 4.8494000205846604e-05, "loss": 0.9949, "step": 443 }, { "epoch": 0.019466388195811778, "grad_norm": 0.7890625, "learning_rate": 4.849005414508657e-05, "loss": 0.8835, "step": 444 }, { "epoch": 0.01951023141246901, "grad_norm": 0.84375, "learning_rate": 4.8486108193498915e-05, "loss": 0.8915, "step": 445 }, { "epoch": 0.019554074629126245, "grad_norm": 0.87890625, "learning_rate": 4.8482162351091976e-05, "loss": 0.8655, "step": 446 }, { "epoch": 0.019597917845783477, "grad_norm": 0.84375, "learning_rate": 4.8478216617874184e-05, "loss": 0.787, "step": 447 }, { "epoch": 0.019641761062440713, "grad_norm": 0.90625, "learning_rate": 4.847427099385387e-05, "loss": 0.8651, "step": 448 }, { "epoch": 0.019685604279097945, "grad_norm": 0.83203125, "learning_rate": 4.847032547903939e-05, "loss": 0.9338, "step": 449 }, { "epoch": 0.01972944749575518, "grad_norm": 0.81640625, "learning_rate": 4.8466380073439125e-05, "loss": 0.9468, "step": 450 }, { "epoch": 0.019773290712412413, "grad_norm": 0.88671875, "learning_rate": 4.846243477706136e-05, "loss": 0.8263, "step": 451 }, { "epoch": 0.01981713392906965, "grad_norm": 0.79296875, "learning_rate": 4.845848958991457e-05, "loss": 0.8715, "step": 452 }, { "epoch": 0.01986097714572688, "grad_norm": 0.78515625, "learning_rate": 4.845454451200706e-05, "loss": 0.7744, "step": 453 }, { "epoch": 0.019904820362384116, "grad_norm": 0.8671875, "learning_rate": 4.845059954334721e-05, "loss": 0.8411, "step": 454 }, { "epoch": 0.019948663579041348, "grad_norm": 0.87109375, "learning_rate": 4.8446654683943346e-05, "loss": 1.0177, "step": 455 }, { "epoch": 0.019992506795698584, "grad_norm": 1.0859375, "learning_rate": 4.8442709933803864e-05, "loss": 0.8628, "step": 456 }, { "epoch": 0.020036350012355816, "grad_norm": 0.7734375, "learning_rate": 4.8438765292937114e-05, "loss": 0.8, "step": 457 }, { "epoch": 0.020080193229013048, "grad_norm": 0.890625, "learning_rate": 4.843482076135145e-05, "loss": 0.8294, "step": 458 }, { "epoch": 0.020124036445670283, "grad_norm": 0.9921875, "learning_rate": 4.843087633905524e-05, "loss": 0.8318, "step": 459 }, { "epoch": 0.020167879662327515, "grad_norm": 0.859375, "learning_rate": 4.842693202605679e-05, "loss": 0.9173, "step": 460 }, { "epoch": 0.02021172287898475, "grad_norm": 0.80859375, "learning_rate": 4.842298782236456e-05, "loss": 0.8477, "step": 461 }, { "epoch": 0.020255566095641983, "grad_norm": 0.87890625, "learning_rate": 4.841904372798686e-05, "loss": 0.854, "step": 462 }, { "epoch": 0.02029940931229922, "grad_norm": 0.8203125, "learning_rate": 4.841509974293204e-05, "loss": 0.8894, "step": 463 }, { "epoch": 0.02034325252895645, "grad_norm": 0.8828125, "learning_rate": 4.841115586720847e-05, "loss": 0.8728, "step": 464 }, { "epoch": 0.020387095745613686, "grad_norm": 0.84375, "learning_rate": 4.8407212100824506e-05, "loss": 0.9047, "step": 465 }, { "epoch": 0.02043093896227092, "grad_norm": 0.83984375, "learning_rate": 4.8403268443788476e-05, "loss": 1.0778, "step": 466 }, { "epoch": 0.020474782178928154, "grad_norm": 0.8359375, "learning_rate": 4.83993248961088e-05, "loss": 0.7296, "step": 467 }, { "epoch": 0.020518625395585386, "grad_norm": 0.83203125, "learning_rate": 4.839538145779381e-05, "loss": 0.8719, "step": 468 }, { "epoch": 0.02056246861224262, "grad_norm": 1.234375, "learning_rate": 4.839143812885186e-05, "loss": 0.8927, "step": 469 }, { "epoch": 0.020606311828899854, "grad_norm": 0.78515625, "learning_rate": 4.83874949092913e-05, "loss": 0.8786, "step": 470 }, { "epoch": 0.02065015504555709, "grad_norm": 0.80859375, "learning_rate": 4.838355179912046e-05, "loss": 0.8724, "step": 471 }, { "epoch": 0.02069399826221432, "grad_norm": 0.80078125, "learning_rate": 4.837960879834777e-05, "loss": 0.8923, "step": 472 }, { "epoch": 0.020737841478871557, "grad_norm": 0.74609375, "learning_rate": 4.837566590698155e-05, "loss": 0.7265, "step": 473 }, { "epoch": 0.02078168469552879, "grad_norm": 0.828125, "learning_rate": 4.837172312503015e-05, "loss": 0.7073, "step": 474 }, { "epoch": 0.02082552791218602, "grad_norm": 0.77734375, "learning_rate": 4.836778045250194e-05, "loss": 0.7592, "step": 475 }, { "epoch": 0.020869371128843257, "grad_norm": 1.234375, "learning_rate": 4.836383788940523e-05, "loss": 0.7035, "step": 476 }, { "epoch": 0.02091321434550049, "grad_norm": 0.85546875, "learning_rate": 4.8359895435748447e-05, "loss": 1.0148, "step": 477 }, { "epoch": 0.020957057562157724, "grad_norm": 0.74609375, "learning_rate": 4.8355953091539915e-05, "loss": 0.7656, "step": 478 }, { "epoch": 0.021000900778814956, "grad_norm": 0.84765625, "learning_rate": 4.8352010856787997e-05, "loss": 0.7657, "step": 479 }, { "epoch": 0.021044743995472192, "grad_norm": 0.98828125, "learning_rate": 4.8348068731501026e-05, "loss": 0.8261, "step": 480 }, { "epoch": 0.021088587212129424, "grad_norm": 0.80859375, "learning_rate": 4.8344126715687344e-05, "loss": 0.7439, "step": 481 }, { "epoch": 0.02113243042878666, "grad_norm": 0.9765625, "learning_rate": 4.834018480935537e-05, "loss": 0.7135, "step": 482 }, { "epoch": 0.02117627364544389, "grad_norm": 0.859375, "learning_rate": 4.833624301251342e-05, "loss": 0.6938, "step": 483 }, { "epoch": 0.021220116862101127, "grad_norm": 0.85546875, "learning_rate": 4.833230132516984e-05, "loss": 0.859, "step": 484 }, { "epoch": 0.02126396007875836, "grad_norm": 0.81640625, "learning_rate": 4.8328359747333006e-05, "loss": 0.833, "step": 485 }, { "epoch": 0.021307803295415595, "grad_norm": 0.95703125, "learning_rate": 4.832441827901122e-05, "loss": 0.9659, "step": 486 }, { "epoch": 0.021351646512072827, "grad_norm": 0.7734375, "learning_rate": 4.832047692021291e-05, "loss": 0.8758, "step": 487 }, { "epoch": 0.021395489728730063, "grad_norm": 0.75390625, "learning_rate": 4.831653567094639e-05, "loss": 0.8145, "step": 488 }, { "epoch": 0.021439332945387295, "grad_norm": 0.81640625, "learning_rate": 4.831259453122003e-05, "loss": 0.8236, "step": 489 }, { "epoch": 0.02148317616204453, "grad_norm": 0.75390625, "learning_rate": 4.8308653501042166e-05, "loss": 0.7014, "step": 490 }, { "epoch": 0.021527019378701762, "grad_norm": 0.7734375, "learning_rate": 4.830471258042113e-05, "loss": 0.8488, "step": 491 }, { "epoch": 0.021570862595358998, "grad_norm": 0.87109375, "learning_rate": 4.830077176936533e-05, "loss": 0.9364, "step": 492 }, { "epoch": 0.02161470581201623, "grad_norm": 0.734375, "learning_rate": 4.8296831067883083e-05, "loss": 0.7448, "step": 493 }, { "epoch": 0.021658549028673462, "grad_norm": 0.7734375, "learning_rate": 4.829289047598276e-05, "loss": 0.8521, "step": 494 }, { "epoch": 0.021702392245330698, "grad_norm": 0.8046875, "learning_rate": 4.8288949993672685e-05, "loss": 0.7794, "step": 495 }, { "epoch": 0.02174623546198793, "grad_norm": 1.09375, "learning_rate": 4.828500962096123e-05, "loss": 0.7881, "step": 496 }, { "epoch": 0.021790078678645165, "grad_norm": 0.82421875, "learning_rate": 4.828106935785671e-05, "loss": 0.8048, "step": 497 }, { "epoch": 0.021833921895302397, "grad_norm": 0.9296875, "learning_rate": 4.827712920436754e-05, "loss": 0.8486, "step": 498 }, { "epoch": 0.021877765111959633, "grad_norm": 0.82421875, "learning_rate": 4.8273189160502044e-05, "loss": 0.9004, "step": 499 }, { "epoch": 0.021921608328616865, "grad_norm": 0.8515625, "learning_rate": 4.826924922626855e-05, "loss": 0.7962, "step": 500 } ], "logging_steps": 1, "max_steps": 22809, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3223332569333891e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }