{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6657, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 9.216344833374023, "learning_rate": 5.0000000000000004e-08, "loss": 0.8051, "step": 1 }, { "epoch": 0.0, "grad_norm": 6.206076622009277, "learning_rate": 1.0000000000000001e-07, "loss": 0.7451, "step": 2 }, { "epoch": 0.0, "grad_norm": 5.022519588470459, "learning_rate": 1.5000000000000002e-07, "loss": 0.7754, "step": 3 }, { "epoch": 0.0, "grad_norm": 8.47269344329834, "learning_rate": 2.0000000000000002e-07, "loss": 0.728, "step": 4 }, { "epoch": 0.0, "grad_norm": 5.961706161499023, "learning_rate": 2.5000000000000004e-07, "loss": 0.7789, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.422206878662109, "learning_rate": 3.0000000000000004e-07, "loss": 0.7443, "step": 6 }, { "epoch": 0.0, "grad_norm": 6.778023719787598, "learning_rate": 3.5000000000000004e-07, "loss": 0.7371, "step": 7 }, { "epoch": 0.0, "grad_norm": 8.777544021606445, "learning_rate": 4.0000000000000003e-07, "loss": 0.7758, "step": 8 }, { "epoch": 0.0, "grad_norm": 8.191843032836914, "learning_rate": 4.5000000000000003e-07, "loss": 0.7368, "step": 9 }, { "epoch": 0.0, "grad_norm": 5.89084529876709, "learning_rate": 5.000000000000001e-07, "loss": 0.7539, "step": 10 }, { "epoch": 0.0, "grad_norm": 6.304842948913574, "learning_rate": 5.5e-07, "loss": 0.7603, "step": 11 }, { "epoch": 0.0, "grad_norm": 4.9237542152404785, "learning_rate": 6.000000000000001e-07, "loss": 0.6452, "step": 12 }, { "epoch": 0.0, "grad_norm": 5.385876655578613, "learning_rate": 6.5e-07, "loss": 0.7485, "step": 13 }, { "epoch": 0.0, "grad_norm": 5.312966346740723, "learning_rate": 7.000000000000001e-07, "loss": 0.6894, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.2677462100982666, "learning_rate": 7.5e-07, "loss": 0.8086, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.4296367168426514, "learning_rate": 8.000000000000001e-07, "loss": 0.7205, "step": 16 }, { "epoch": 0.0, "grad_norm": 5.89979362487793, "learning_rate": 8.500000000000001e-07, "loss": 0.7187, "step": 17 }, { "epoch": 0.0, "grad_norm": 4.393117427825928, "learning_rate": 9.000000000000001e-07, "loss": 0.6184, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.0363590717315674, "learning_rate": 9.500000000000001e-07, "loss": 0.7564, "step": 19 }, { "epoch": 0.0, "grad_norm": 2.2013661861419678, "learning_rate": 1.0000000000000002e-06, "loss": 0.652, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.497699022293091, "learning_rate": 1.0500000000000001e-06, "loss": 0.6864, "step": 21 }, { "epoch": 0.0, "grad_norm": 2.7840077877044678, "learning_rate": 1.1e-06, "loss": 0.6362, "step": 22 }, { "epoch": 0.0, "grad_norm": 2.728130578994751, "learning_rate": 1.1500000000000002e-06, "loss": 0.7122, "step": 23 }, { "epoch": 0.0, "grad_norm": 2.2526721954345703, "learning_rate": 1.2000000000000002e-06, "loss": 0.5891, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.0032923221588135, "learning_rate": 1.25e-06, "loss": 0.5887, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.0636212825775146, "learning_rate": 1.3e-06, "loss": 0.6147, "step": 26 }, { "epoch": 0.0, "grad_norm": 2.581963539123535, "learning_rate": 1.3500000000000002e-06, "loss": 0.506, "step": 27 }, { "epoch": 0.0, "grad_norm": 1.9784207344055176, "learning_rate": 1.4000000000000001e-06, "loss": 0.6569, "step": 28 }, { "epoch": 0.0, "grad_norm": 1.6925359964370728, "learning_rate": 1.45e-06, "loss": 0.4983, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.1976478099823, "learning_rate": 1.5e-06, "loss": 0.6051, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.165533542633057, "learning_rate": 1.5500000000000002e-06, "loss": 0.5591, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.077219247817993, "learning_rate": 1.6000000000000001e-06, "loss": 0.4935, "step": 32 }, { "epoch": 0.0, "grad_norm": 3.2716355323791504, "learning_rate": 1.6500000000000003e-06, "loss": 0.5972, "step": 33 }, { "epoch": 0.01, "grad_norm": 2.0140631198883057, "learning_rate": 1.7000000000000002e-06, "loss": 0.551, "step": 34 }, { "epoch": 0.01, "grad_norm": 1.743302822113037, "learning_rate": 1.75e-06, "loss": 0.6626, "step": 35 }, { "epoch": 0.01, "grad_norm": 3.102935314178467, "learning_rate": 1.8000000000000001e-06, "loss": 0.4959, "step": 36 }, { "epoch": 0.01, "grad_norm": 1.3923463821411133, "learning_rate": 1.85e-06, "loss": 0.5544, "step": 37 }, { "epoch": 0.01, "grad_norm": 1.299470067024231, "learning_rate": 1.9000000000000002e-06, "loss": 0.5078, "step": 38 }, { "epoch": 0.01, "grad_norm": 1.3329691886901855, "learning_rate": 1.9500000000000004e-06, "loss": 0.6069, "step": 39 }, { "epoch": 0.01, "grad_norm": 2.5494062900543213, "learning_rate": 2.0000000000000003e-06, "loss": 0.5565, "step": 40 }, { "epoch": 0.01, "grad_norm": 3.336667060852051, "learning_rate": 2.05e-06, "loss": 0.5008, "step": 41 }, { "epoch": 0.01, "grad_norm": 1.2009618282318115, "learning_rate": 2.1000000000000002e-06, "loss": 0.4832, "step": 42 }, { "epoch": 0.01, "grad_norm": 1.352482557296753, "learning_rate": 2.15e-06, "loss": 0.5634, "step": 43 }, { "epoch": 0.01, "grad_norm": 1.744902491569519, "learning_rate": 2.2e-06, "loss": 0.5527, "step": 44 }, { "epoch": 0.01, "grad_norm": 1.5867419242858887, "learning_rate": 2.25e-06, "loss": 0.5929, "step": 45 }, { "epoch": 0.01, "grad_norm": 1.6441349983215332, "learning_rate": 2.3000000000000004e-06, "loss": 0.471, "step": 46 }, { "epoch": 0.01, "grad_norm": 1.4348138570785522, "learning_rate": 2.35e-06, "loss": 0.5468, "step": 47 }, { "epoch": 0.01, "grad_norm": 1.5809327363967896, "learning_rate": 2.4000000000000003e-06, "loss": 0.5086, "step": 48 }, { "epoch": 0.01, "grad_norm": 1.6838728189468384, "learning_rate": 2.4500000000000003e-06, "loss": 0.7906, "step": 49 }, { "epoch": 0.01, "grad_norm": 1.9473453760147095, "learning_rate": 2.5e-06, "loss": 0.552, "step": 50 }, { "epoch": 0.01, "grad_norm": 1.7204724550247192, "learning_rate": 2.55e-06, "loss": 0.482, "step": 51 }, { "epoch": 0.01, "grad_norm": 1.6194877624511719, "learning_rate": 2.6e-06, "loss": 0.7892, "step": 52 }, { "epoch": 0.01, "grad_norm": 2.340284824371338, "learning_rate": 2.6500000000000005e-06, "loss": 0.5137, "step": 53 }, { "epoch": 0.01, "grad_norm": 1.7191145420074463, "learning_rate": 2.7000000000000004e-06, "loss": 0.579, "step": 54 }, { "epoch": 0.01, "grad_norm": 1.1429994106292725, "learning_rate": 2.7500000000000004e-06, "loss": 0.5203, "step": 55 }, { "epoch": 0.01, "grad_norm": 1.7220251560211182, "learning_rate": 2.8000000000000003e-06, "loss": 0.563, "step": 56 }, { "epoch": 0.01, "grad_norm": 1.4226551055908203, "learning_rate": 2.85e-06, "loss": 0.5329, "step": 57 }, { "epoch": 0.01, "grad_norm": 1.6564255952835083, "learning_rate": 2.9e-06, "loss": 0.4886, "step": 58 }, { "epoch": 0.01, "grad_norm": 3.263592481613159, "learning_rate": 2.95e-06, "loss": 0.5825, "step": 59 }, { "epoch": 0.01, "grad_norm": 1.8507544994354248, "learning_rate": 3e-06, "loss": 0.5334, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.329227328300476, "learning_rate": 3.05e-06, "loss": 0.5215, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.398729920387268, "learning_rate": 3.1000000000000004e-06, "loss": 0.512, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.4255733489990234, "learning_rate": 3.1500000000000003e-06, "loss": 0.5111, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.46450936794281, "learning_rate": 3.2000000000000003e-06, "loss": 0.7823, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.5293128490447998, "learning_rate": 3.2500000000000002e-06, "loss": 0.5423, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.4929172992706299, "learning_rate": 3.3000000000000006e-06, "loss": 0.8238, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.4993669986724854, "learning_rate": 3.3500000000000005e-06, "loss": 0.5538, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.3954360485076904, "learning_rate": 3.4000000000000005e-06, "loss": 0.5181, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.4524667263031006, "learning_rate": 3.45e-06, "loss": 0.5092, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.715070128440857, "learning_rate": 3.5e-06, "loss": 0.4958, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.1867640018463135, "learning_rate": 3.5500000000000003e-06, "loss": 0.4159, "step": 71 }, { "epoch": 0.01, "grad_norm": 2.128693103790283, "learning_rate": 3.6000000000000003e-06, "loss": 0.5467, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.2268809080123901, "learning_rate": 3.65e-06, "loss": 0.7378, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.0815649032592773, "learning_rate": 3.7e-06, "loss": 0.4361, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.6703121662139893, "learning_rate": 3.7500000000000005e-06, "loss": 0.5602, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.6223803758621216, "learning_rate": 3.8000000000000005e-06, "loss": 0.5404, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.3139238357543945, "learning_rate": 3.85e-06, "loss": 0.582, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.6628838777542114, "learning_rate": 3.900000000000001e-06, "loss": 0.5332, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.8306076526641846, "learning_rate": 3.95e-06, "loss": 0.535, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.3007962703704834, "learning_rate": 4.000000000000001e-06, "loss": 0.5161, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.5512853860855103, "learning_rate": 4.05e-06, "loss": 0.5248, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.606092095375061, "learning_rate": 4.1e-06, "loss": 0.6084, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.6994787454605103, "learning_rate": 4.15e-06, "loss": 0.594, "step": 83 }, { "epoch": 0.01, "grad_norm": 2.6806154251098633, "learning_rate": 4.2000000000000004e-06, "loss": 0.532, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.3899866342544556, "learning_rate": 4.25e-06, "loss": 0.4884, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.6193784475326538, "learning_rate": 4.3e-06, "loss": 0.4854, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.6855883598327637, "learning_rate": 4.350000000000001e-06, "loss": 0.6261, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.4627059698104858, "learning_rate": 4.4e-06, "loss": 0.4938, "step": 88 }, { "epoch": 0.01, "grad_norm": 3.341945171356201, "learning_rate": 4.450000000000001e-06, "loss": 0.4462, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.8844380378723145, "learning_rate": 4.5e-06, "loss": 0.4726, "step": 90 }, { "epoch": 0.01, "grad_norm": 1.1282368898391724, "learning_rate": 4.5500000000000005e-06, "loss": 0.5393, "step": 91 }, { "epoch": 0.01, "grad_norm": 1.6442251205444336, "learning_rate": 4.600000000000001e-06, "loss": 0.5214, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.4016815423965454, "learning_rate": 4.65e-06, "loss": 0.538, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.3321430683135986, "learning_rate": 4.7e-06, "loss": 0.5139, "step": 94 }, { "epoch": 0.01, "grad_norm": 2.0752475261688232, "learning_rate": 4.75e-06, "loss": 0.4516, "step": 95 }, { "epoch": 0.01, "grad_norm": 1.50879967212677, "learning_rate": 4.800000000000001e-06, "loss": 0.7886, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.228383779525757, "learning_rate": 4.85e-06, "loss": 0.4604, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.8180030584335327, "learning_rate": 4.9000000000000005e-06, "loss": 0.5203, "step": 98 }, { "epoch": 0.01, "grad_norm": 7.875080108642578, "learning_rate": 4.95e-06, "loss": 0.5579, "step": 99 }, { "epoch": 0.02, "grad_norm": 2.368553876876831, "learning_rate": 5e-06, "loss": 0.4945, "step": 100 }, { "epoch": 0.02, "grad_norm": 1.8370745182037354, "learning_rate": 5.050000000000001e-06, "loss": 0.4996, "step": 101 }, { "epoch": 0.02, "grad_norm": 1.4010632038116455, "learning_rate": 5.1e-06, "loss": 0.8099, "step": 102 }, { "epoch": 0.02, "grad_norm": 1.560505747795105, "learning_rate": 5.150000000000001e-06, "loss": 0.5928, "step": 103 }, { "epoch": 0.02, "grad_norm": 1.1708941459655762, "learning_rate": 5.2e-06, "loss": 0.728, "step": 104 }, { "epoch": 0.02, "grad_norm": 1.2475477457046509, "learning_rate": 5.2500000000000006e-06, "loss": 0.4497, "step": 105 }, { "epoch": 0.02, "grad_norm": 1.193566918373108, "learning_rate": 5.300000000000001e-06, "loss": 0.7561, "step": 106 }, { "epoch": 0.02, "grad_norm": 1.7343307733535767, "learning_rate": 5.3500000000000004e-06, "loss": 0.5058, "step": 107 }, { "epoch": 0.02, "grad_norm": 1.4473809003829956, "learning_rate": 5.400000000000001e-06, "loss": 0.5294, "step": 108 }, { "epoch": 0.02, "grad_norm": 1.57465660572052, "learning_rate": 5.450000000000001e-06, "loss": 0.5586, "step": 109 }, { "epoch": 0.02, "grad_norm": 1.5883936882019043, "learning_rate": 5.500000000000001e-06, "loss": 0.5044, "step": 110 }, { "epoch": 0.02, "grad_norm": 4.314908027648926, "learning_rate": 5.550000000000001e-06, "loss": 0.4384, "step": 111 }, { "epoch": 0.02, "grad_norm": 1.5061193704605103, "learning_rate": 5.600000000000001e-06, "loss": 0.5446, "step": 112 }, { "epoch": 0.02, "grad_norm": 1.344829797744751, "learning_rate": 5.65e-06, "loss": 0.4816, "step": 113 }, { "epoch": 0.02, "grad_norm": 1.7067680358886719, "learning_rate": 5.7e-06, "loss": 0.5459, "step": 114 }, { "epoch": 0.02, "grad_norm": 1.9676406383514404, "learning_rate": 5.75e-06, "loss": 0.5358, "step": 115 }, { "epoch": 0.02, "grad_norm": 1.5618263483047485, "learning_rate": 5.8e-06, "loss": 0.506, "step": 116 }, { "epoch": 0.02, "grad_norm": 1.9052551984786987, "learning_rate": 5.85e-06, "loss": 0.5585, "step": 117 }, { "epoch": 0.02, "grad_norm": 1.9070708751678467, "learning_rate": 5.9e-06, "loss": 0.5372, "step": 118 }, { "epoch": 0.02, "grad_norm": 2.9828410148620605, "learning_rate": 5.950000000000001e-06, "loss": 0.4951, "step": 119 }, { "epoch": 0.02, "grad_norm": 2.0229809284210205, "learning_rate": 6e-06, "loss": 0.5052, "step": 120 }, { "epoch": 0.02, "grad_norm": 2.2157928943634033, "learning_rate": 6.0500000000000005e-06, "loss": 0.5716, "step": 121 }, { "epoch": 0.02, "grad_norm": 1.4389758110046387, "learning_rate": 6.1e-06, "loss": 0.7493, "step": 122 }, { "epoch": 0.02, "grad_norm": 1.7119325399398804, "learning_rate": 6.15e-06, "loss": 0.4673, "step": 123 }, { "epoch": 0.02, "grad_norm": 2.351287603378296, "learning_rate": 6.200000000000001e-06, "loss": 0.4444, "step": 124 }, { "epoch": 0.02, "grad_norm": 1.5570749044418335, "learning_rate": 6.25e-06, "loss": 0.5256, "step": 125 }, { "epoch": 0.02, "grad_norm": 1.2451484203338623, "learning_rate": 6.300000000000001e-06, "loss": 0.5248, "step": 126 }, { "epoch": 0.02, "grad_norm": 1.5641099214553833, "learning_rate": 6.35e-06, "loss": 0.4721, "step": 127 }, { "epoch": 0.02, "grad_norm": 1.90048086643219, "learning_rate": 6.4000000000000006e-06, "loss": 0.4813, "step": 128 }, { "epoch": 0.02, "grad_norm": 1.3798425197601318, "learning_rate": 6.450000000000001e-06, "loss": 0.5523, "step": 129 }, { "epoch": 0.02, "grad_norm": 1.425847053527832, "learning_rate": 6.5000000000000004e-06, "loss": 0.7206, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.716511607170105, "learning_rate": 6.550000000000001e-06, "loss": 0.4869, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.3717998266220093, "learning_rate": 6.600000000000001e-06, "loss": 0.5221, "step": 132 }, { "epoch": 0.02, "grad_norm": 1.6510571241378784, "learning_rate": 6.650000000000001e-06, "loss": 0.5102, "step": 133 }, { "epoch": 0.02, "grad_norm": 2.647509813308716, "learning_rate": 6.700000000000001e-06, "loss": 0.5367, "step": 134 }, { "epoch": 0.02, "grad_norm": 1.411824107170105, "learning_rate": 6.750000000000001e-06, "loss": 0.7628, "step": 135 }, { "epoch": 0.02, "grad_norm": 2.019472599029541, "learning_rate": 6.800000000000001e-06, "loss": 0.5436, "step": 136 }, { "epoch": 0.02, "grad_norm": 1.2911547422409058, "learning_rate": 6.850000000000001e-06, "loss": 0.4388, "step": 137 }, { "epoch": 0.02, "grad_norm": 2.068082332611084, "learning_rate": 6.9e-06, "loss": 0.5449, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.6911259889602661, "learning_rate": 6.95e-06, "loss": 0.4656, "step": 139 }, { "epoch": 0.02, "grad_norm": 2.0828232765197754, "learning_rate": 7e-06, "loss": 0.5529, "step": 140 }, { "epoch": 0.02, "grad_norm": 1.263565182685852, "learning_rate": 7.05e-06, "loss": 0.4866, "step": 141 }, { "epoch": 0.02, "grad_norm": 3.8453457355499268, "learning_rate": 7.100000000000001e-06, "loss": 0.5789, "step": 142 }, { "epoch": 0.02, "grad_norm": 1.77423095703125, "learning_rate": 7.15e-06, "loss": 0.4542, "step": 143 }, { "epoch": 0.02, "grad_norm": 1.8407195806503296, "learning_rate": 7.2000000000000005e-06, "loss": 0.6169, "step": 144 }, { "epoch": 0.02, "grad_norm": 1.8473724126815796, "learning_rate": 7.25e-06, "loss": 0.4878, "step": 145 }, { "epoch": 0.02, "grad_norm": 1.4072082042694092, "learning_rate": 7.3e-06, "loss": 0.4457, "step": 146 }, { "epoch": 0.02, "grad_norm": 2.0004026889801025, "learning_rate": 7.350000000000001e-06, "loss": 0.4525, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.7567468881607056, "learning_rate": 7.4e-06, "loss": 0.4949, "step": 148 }, { "epoch": 0.02, "grad_norm": 1.8399001359939575, "learning_rate": 7.450000000000001e-06, "loss": 0.5166, "step": 149 }, { "epoch": 0.02, "grad_norm": 1.5580756664276123, "learning_rate": 7.500000000000001e-06, "loss": 0.4854, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.5048457384109497, "learning_rate": 7.5500000000000006e-06, "loss": 0.5454, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.3454675674438477, "learning_rate": 7.600000000000001e-06, "loss": 0.5489, "step": 152 }, { "epoch": 0.02, "grad_norm": 2.304851770401001, "learning_rate": 7.650000000000001e-06, "loss": 0.5012, "step": 153 }, { "epoch": 0.02, "grad_norm": 2.5427772998809814, "learning_rate": 7.7e-06, "loss": 0.5343, "step": 154 }, { "epoch": 0.02, "grad_norm": 1.533339023590088, "learning_rate": 7.75e-06, "loss": 0.5492, "step": 155 }, { "epoch": 0.02, "grad_norm": 1.779507040977478, "learning_rate": 7.800000000000002e-06, "loss": 0.3992, "step": 156 }, { "epoch": 0.02, "grad_norm": 1.5493593215942383, "learning_rate": 7.850000000000001e-06, "loss": 0.4561, "step": 157 }, { "epoch": 0.02, "grad_norm": 1.3808040618896484, "learning_rate": 7.9e-06, "loss": 0.5376, "step": 158 }, { "epoch": 0.02, "grad_norm": 1.9539458751678467, "learning_rate": 7.950000000000002e-06, "loss": 0.5164, "step": 159 }, { "epoch": 0.02, "grad_norm": 1.7852787971496582, "learning_rate": 8.000000000000001e-06, "loss": 0.5154, "step": 160 }, { "epoch": 0.02, "grad_norm": 2.0747218132019043, "learning_rate": 8.050000000000001e-06, "loss": 0.4766, "step": 161 }, { "epoch": 0.02, "grad_norm": 1.5693285465240479, "learning_rate": 8.1e-06, "loss": 0.4779, "step": 162 }, { "epoch": 0.02, "grad_norm": 1.334315299987793, "learning_rate": 8.15e-06, "loss": 0.496, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.4113271236419678, "learning_rate": 8.2e-06, "loss": 0.5499, "step": 164 }, { "epoch": 0.02, "grad_norm": 1.4509303569793701, "learning_rate": 8.25e-06, "loss": 0.5613, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.6541037559509277, "learning_rate": 8.3e-06, "loss": 0.5237, "step": 166 }, { "epoch": 0.03, "grad_norm": 10.083968162536621, "learning_rate": 8.35e-06, "loss": 0.5058, "step": 167 }, { "epoch": 0.03, "grad_norm": 1.4449021816253662, "learning_rate": 8.400000000000001e-06, "loss": 0.5087, "step": 168 }, { "epoch": 0.03, "grad_norm": 1.3584126234054565, "learning_rate": 8.45e-06, "loss": 0.4869, "step": 169 }, { "epoch": 0.03, "grad_norm": 2.0871362686157227, "learning_rate": 8.5e-06, "loss": 0.5149, "step": 170 }, { "epoch": 0.03, "grad_norm": 1.811505913734436, "learning_rate": 8.550000000000001e-06, "loss": 0.5029, "step": 171 }, { "epoch": 0.03, "grad_norm": 1.2113027572631836, "learning_rate": 8.6e-06, "loss": 0.45, "step": 172 }, { "epoch": 0.03, "grad_norm": 2.6240363121032715, "learning_rate": 8.65e-06, "loss": 0.4186, "step": 173 }, { "epoch": 0.03, "grad_norm": 1.3825879096984863, "learning_rate": 8.700000000000001e-06, "loss": 0.5307, "step": 174 }, { "epoch": 0.03, "grad_norm": 1.4539110660552979, "learning_rate": 8.750000000000001e-06, "loss": 0.5333, "step": 175 }, { "epoch": 0.03, "grad_norm": 2.344031810760498, "learning_rate": 8.8e-06, "loss": 0.503, "step": 176 }, { "epoch": 0.03, "grad_norm": 2.084747076034546, "learning_rate": 8.85e-06, "loss": 0.5575, "step": 177 }, { "epoch": 0.03, "grad_norm": 2.4718432426452637, "learning_rate": 8.900000000000001e-06, "loss": 0.5383, "step": 178 }, { "epoch": 0.03, "grad_norm": 1.27049720287323, "learning_rate": 8.95e-06, "loss": 0.4784, "step": 179 }, { "epoch": 0.03, "grad_norm": 1.4500404596328735, "learning_rate": 9e-06, "loss": 0.4589, "step": 180 }, { "epoch": 0.03, "grad_norm": 1.8684719800949097, "learning_rate": 9.050000000000001e-06, "loss": 0.522, "step": 181 }, { "epoch": 0.03, "grad_norm": 1.9482722282409668, "learning_rate": 9.100000000000001e-06, "loss": 0.5284, "step": 182 }, { "epoch": 0.03, "grad_norm": 1.5460186004638672, "learning_rate": 9.15e-06, "loss": 0.4437, "step": 183 }, { "epoch": 0.03, "grad_norm": 1.2165240049362183, "learning_rate": 9.200000000000002e-06, "loss": 0.4841, "step": 184 }, { "epoch": 0.03, "grad_norm": 1.1973820924758911, "learning_rate": 9.250000000000001e-06, "loss": 0.5727, "step": 185 }, { "epoch": 0.03, "grad_norm": 1.9446003437042236, "learning_rate": 9.3e-06, "loss": 0.5857, "step": 186 }, { "epoch": 0.03, "grad_norm": 3.4200549125671387, "learning_rate": 9.350000000000002e-06, "loss": 0.5961, "step": 187 }, { "epoch": 0.03, "grad_norm": 2.1682417392730713, "learning_rate": 9.4e-06, "loss": 0.4281, "step": 188 }, { "epoch": 0.03, "grad_norm": 1.8261798620224, "learning_rate": 9.450000000000001e-06, "loss": 0.501, "step": 189 }, { "epoch": 0.03, "grad_norm": 1.7259304523468018, "learning_rate": 9.5e-06, "loss": 0.533, "step": 190 }, { "epoch": 0.03, "grad_norm": 1.5931000709533691, "learning_rate": 9.55e-06, "loss": 0.5507, "step": 191 }, { "epoch": 0.03, "grad_norm": 1.5514650344848633, "learning_rate": 9.600000000000001e-06, "loss": 0.4541, "step": 192 }, { "epoch": 0.03, "grad_norm": 1.5920366048812866, "learning_rate": 9.65e-06, "loss": 0.4749, "step": 193 }, { "epoch": 0.03, "grad_norm": 1.6478047370910645, "learning_rate": 9.7e-06, "loss": 0.469, "step": 194 }, { "epoch": 0.03, "grad_norm": 2.999720811843872, "learning_rate": 9.75e-06, "loss": 0.4639, "step": 195 }, { "epoch": 0.03, "grad_norm": 1.5653431415557861, "learning_rate": 9.800000000000001e-06, "loss": 0.4917, "step": 196 }, { "epoch": 0.03, "grad_norm": 1.4825968742370605, "learning_rate": 9.85e-06, "loss": 0.5817, "step": 197 }, { "epoch": 0.03, "grad_norm": 1.6341500282287598, "learning_rate": 9.9e-06, "loss": 0.4976, "step": 198 }, { "epoch": 0.03, "grad_norm": 2.70700740814209, "learning_rate": 9.950000000000001e-06, "loss": 0.5053, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.2851394414901733, "learning_rate": 1e-05, "loss": 0.4504, "step": 200 }, { "epoch": 0.03, "grad_norm": 1.4738597869873047, "learning_rate": 9.999999408195624e-06, "loss": 0.7473, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.8492881059646606, "learning_rate": 9.999997632782634e-06, "loss": 0.4453, "step": 202 }, { "epoch": 0.03, "grad_norm": 1.9671233892440796, "learning_rate": 9.999994673761451e-06, "loss": 0.461, "step": 203 }, { "epoch": 0.03, "grad_norm": 1.4560949802398682, "learning_rate": 9.999990531132775e-06, "loss": 0.463, "step": 204 }, { "epoch": 0.03, "grad_norm": 1.1846489906311035, "learning_rate": 9.999985204897588e-06, "loss": 0.4968, "step": 205 }, { "epoch": 0.03, "grad_norm": 2.02722430229187, "learning_rate": 9.999978695057148e-06, "loss": 0.5267, "step": 206 }, { "epoch": 0.03, "grad_norm": 3.0705864429473877, "learning_rate": 9.999971001613e-06, "loss": 0.5674, "step": 207 }, { "epoch": 0.03, "grad_norm": 2.461305618286133, "learning_rate": 9.999962124566963e-06, "loss": 0.4741, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.807320475578308, "learning_rate": 9.999952063921136e-06, "loss": 0.4357, "step": 209 }, { "epoch": 0.03, "grad_norm": 2.965524435043335, "learning_rate": 9.999940819677906e-06, "loss": 0.5758, "step": 210 }, { "epoch": 0.03, "grad_norm": 1.6095221042633057, "learning_rate": 9.99992839183993e-06, "loss": 0.5322, "step": 211 }, { "epoch": 0.03, "grad_norm": 2.373774766921997, "learning_rate": 9.999914780410152e-06, "loss": 0.5274, "step": 212 }, { "epoch": 0.03, "grad_norm": 1.3409595489501953, "learning_rate": 9.999899985391796e-06, "loss": 0.7724, "step": 213 }, { "epoch": 0.03, "grad_norm": 1.3656233549118042, "learning_rate": 9.99988400678836e-06, "loss": 0.4664, "step": 214 }, { "epoch": 0.03, "grad_norm": 1.7753010988235474, "learning_rate": 9.999866844603629e-06, "loss": 0.5127, "step": 215 }, { "epoch": 0.03, "grad_norm": 1.45271897315979, "learning_rate": 9.999848498841665e-06, "loss": 0.53, "step": 216 }, { "epoch": 0.03, "grad_norm": 1.6548235416412354, "learning_rate": 9.999828969506814e-06, "loss": 0.4857, "step": 217 }, { "epoch": 0.03, "grad_norm": 2.1140246391296387, "learning_rate": 9.999808256603692e-06, "loss": 0.5471, "step": 218 }, { "epoch": 0.03, "grad_norm": 1.4681618213653564, "learning_rate": 9.999786360137208e-06, "loss": 0.5246, "step": 219 }, { "epoch": 0.03, "grad_norm": 3.0275824069976807, "learning_rate": 9.999763280112545e-06, "loss": 0.5094, "step": 220 }, { "epoch": 0.03, "grad_norm": 1.4317963123321533, "learning_rate": 9.999739016535164e-06, "loss": 0.5274, "step": 221 }, { "epoch": 0.03, "grad_norm": 2.6764612197875977, "learning_rate": 9.99971356941081e-06, "loss": 0.5447, "step": 222 }, { "epoch": 0.03, "grad_norm": 1.3419287204742432, "learning_rate": 9.999686938745508e-06, "loss": 0.789, "step": 223 }, { "epoch": 0.03, "grad_norm": 1.7424551248550415, "learning_rate": 9.99965912454556e-06, "loss": 0.4763, "step": 224 }, { "epoch": 0.03, "grad_norm": 1.3599278926849365, "learning_rate": 9.999630126817553e-06, "loss": 0.436, "step": 225 }, { "epoch": 0.03, "grad_norm": 1.1846086978912354, "learning_rate": 9.999599945568348e-06, "loss": 0.4903, "step": 226 }, { "epoch": 0.03, "grad_norm": 1.370955228805542, "learning_rate": 9.99956858080509e-06, "loss": 0.5394, "step": 227 }, { "epoch": 0.03, "grad_norm": 1.3246210813522339, "learning_rate": 9.999536032535206e-06, "loss": 0.4954, "step": 228 }, { "epoch": 0.03, "grad_norm": 2.113231658935547, "learning_rate": 9.9995023007664e-06, "loss": 0.5504, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.6911265850067139, "learning_rate": 9.999467385506659e-06, "loss": 0.5005, "step": 230 }, { "epoch": 0.03, "grad_norm": 1.9012815952301025, "learning_rate": 9.999431286764244e-06, "loss": 0.5293, "step": 231 }, { "epoch": 0.03, "grad_norm": 2.202765464782715, "learning_rate": 9.9993940045477e-06, "loss": 0.5259, "step": 232 }, { "epoch": 0.04, "grad_norm": 3.6310222148895264, "learning_rate": 9.999355538865859e-06, "loss": 0.4404, "step": 233 }, { "epoch": 0.04, "grad_norm": 2.326993465423584, "learning_rate": 9.99931588972782e-06, "loss": 0.5205, "step": 234 }, { "epoch": 0.04, "grad_norm": 3.9959936141967773, "learning_rate": 9.999275057142971e-06, "loss": 0.4135, "step": 235 }, { "epoch": 0.04, "grad_norm": 1.576037049293518, "learning_rate": 9.999233041120979e-06, "loss": 0.491, "step": 236 }, { "epoch": 0.04, "grad_norm": 1.5360389947891235, "learning_rate": 9.99918984167179e-06, "loss": 0.5219, "step": 237 }, { "epoch": 0.04, "grad_norm": 1.3273695707321167, "learning_rate": 9.999145458805628e-06, "loss": 0.4248, "step": 238 }, { "epoch": 0.04, "grad_norm": 2.718477249145508, "learning_rate": 9.999099892533004e-06, "loss": 0.4682, "step": 239 }, { "epoch": 0.04, "grad_norm": 1.6387255191802979, "learning_rate": 9.999053142864699e-06, "loss": 0.4869, "step": 240 }, { "epoch": 0.04, "grad_norm": 1.4982798099517822, "learning_rate": 9.999005209811783e-06, "loss": 0.4175, "step": 241 }, { "epoch": 0.04, "grad_norm": 1.7405450344085693, "learning_rate": 9.998956093385603e-06, "loss": 0.5152, "step": 242 }, { "epoch": 0.04, "grad_norm": 1.5765814781188965, "learning_rate": 9.998905793597784e-06, "loss": 0.5034, "step": 243 }, { "epoch": 0.04, "grad_norm": 1.663150668144226, "learning_rate": 9.998854310460233e-06, "loss": 0.5126, "step": 244 }, { "epoch": 0.04, "grad_norm": 1.6148582696914673, "learning_rate": 9.998801643985139e-06, "loss": 0.4985, "step": 245 }, { "epoch": 0.04, "grad_norm": 1.663811206817627, "learning_rate": 9.99874779418497e-06, "loss": 0.4537, "step": 246 }, { "epoch": 0.04, "grad_norm": 1.6349430084228516, "learning_rate": 9.99869276107247e-06, "loss": 0.424, "step": 247 }, { "epoch": 0.04, "grad_norm": 1.6400316953659058, "learning_rate": 9.998636544660669e-06, "loss": 0.5288, "step": 248 }, { "epoch": 0.04, "grad_norm": 2.206085681915283, "learning_rate": 9.998579144962874e-06, "loss": 0.4329, "step": 249 }, { "epoch": 0.04, "grad_norm": 2.0398802757263184, "learning_rate": 9.998520561992675e-06, "loss": 0.4609, "step": 250 }, { "epoch": 0.04, "grad_norm": 1.960763692855835, "learning_rate": 9.998460795763935e-06, "loss": 0.4907, "step": 251 }, { "epoch": 0.04, "grad_norm": 1.50313401222229, "learning_rate": 9.998399846290805e-06, "loss": 0.5055, "step": 252 }, { "epoch": 0.04, "grad_norm": 1.4769412279129028, "learning_rate": 9.998337713587714e-06, "loss": 0.5313, "step": 253 }, { "epoch": 0.04, "grad_norm": 3.309326171875, "learning_rate": 9.99827439766937e-06, "loss": 0.458, "step": 254 }, { "epoch": 0.04, "grad_norm": 1.8123770952224731, "learning_rate": 9.998209898550756e-06, "loss": 0.6258, "step": 255 }, { "epoch": 0.04, "grad_norm": 1.7437069416046143, "learning_rate": 9.998144216247148e-06, "loss": 0.5096, "step": 256 }, { "epoch": 0.04, "grad_norm": 2.206125020980835, "learning_rate": 9.99807735077409e-06, "loss": 0.4508, "step": 257 }, { "epoch": 0.04, "grad_norm": 2.9244821071624756, "learning_rate": 9.99800930214741e-06, "loss": 0.5234, "step": 258 }, { "epoch": 0.04, "grad_norm": 1.6873438358306885, "learning_rate": 9.997940070383221e-06, "loss": 0.5155, "step": 259 }, { "epoch": 0.04, "grad_norm": 2.95277738571167, "learning_rate": 9.997869655497906e-06, "loss": 0.5509, "step": 260 }, { "epoch": 0.04, "grad_norm": 1.623983383178711, "learning_rate": 9.997798057508139e-06, "loss": 0.4521, "step": 261 }, { "epoch": 0.04, "grad_norm": 1.1490137577056885, "learning_rate": 9.997725276430863e-06, "loss": 0.7329, "step": 262 }, { "epoch": 0.04, "grad_norm": 2.275517225265503, "learning_rate": 9.997651312283313e-06, "loss": 0.5153, "step": 263 }, { "epoch": 0.04, "grad_norm": 1.6917893886566162, "learning_rate": 9.997576165082994e-06, "loss": 0.4481, "step": 264 }, { "epoch": 0.04, "grad_norm": 1.145427942276001, "learning_rate": 9.997499834847697e-06, "loss": 0.4695, "step": 265 }, { "epoch": 0.04, "grad_norm": 3.5461301803588867, "learning_rate": 9.997422321595488e-06, "loss": 0.4175, "step": 266 }, { "epoch": 0.04, "grad_norm": 1.970658540725708, "learning_rate": 9.99734362534472e-06, "loss": 0.4715, "step": 267 }, { "epoch": 0.04, "grad_norm": 2.125077962875366, "learning_rate": 9.997263746114019e-06, "loss": 0.4716, "step": 268 }, { "epoch": 0.04, "grad_norm": 2.6855218410491943, "learning_rate": 9.997182683922295e-06, "loss": 0.5832, "step": 269 }, { "epoch": 0.04, "grad_norm": 3.049365997314453, "learning_rate": 9.997100438788738e-06, "loss": 0.5314, "step": 270 }, { "epoch": 0.04, "grad_norm": 1.7764861583709717, "learning_rate": 9.997017010732818e-06, "loss": 0.6082, "step": 271 }, { "epoch": 0.04, "grad_norm": 2.16890549659729, "learning_rate": 9.996932399774282e-06, "loss": 0.4785, "step": 272 }, { "epoch": 0.04, "grad_norm": 1.6492301225662231, "learning_rate": 9.99684660593316e-06, "loss": 0.4698, "step": 273 }, { "epoch": 0.04, "grad_norm": 2.2730112075805664, "learning_rate": 9.996759629229762e-06, "loss": 0.4935, "step": 274 }, { "epoch": 0.04, "grad_norm": 2.2313120365142822, "learning_rate": 9.996671469684676e-06, "loss": 0.5027, "step": 275 }, { "epoch": 0.04, "grad_norm": 2.491135358810425, "learning_rate": 9.996582127318773e-06, "loss": 0.5808, "step": 276 }, { "epoch": 0.04, "grad_norm": 1.6409087181091309, "learning_rate": 9.996491602153202e-06, "loss": 0.4903, "step": 277 }, { "epoch": 0.04, "grad_norm": 1.440616488456726, "learning_rate": 9.99639989420939e-06, "loss": 0.4515, "step": 278 }, { "epoch": 0.04, "grad_norm": 1.5153642892837524, "learning_rate": 9.99630700350905e-06, "loss": 0.537, "step": 279 }, { "epoch": 0.04, "grad_norm": 1.3483446836471558, "learning_rate": 9.996212930074167e-06, "loss": 0.7429, "step": 280 }, { "epoch": 0.04, "grad_norm": 1.3935810327529907, "learning_rate": 9.996117673927013e-06, "loss": 0.4802, "step": 281 }, { "epoch": 0.04, "grad_norm": 1.4794998168945312, "learning_rate": 9.996021235090137e-06, "loss": 0.4594, "step": 282 }, { "epoch": 0.04, "grad_norm": 1.4164793491363525, "learning_rate": 9.995923613586367e-06, "loss": 0.8096, "step": 283 }, { "epoch": 0.04, "grad_norm": 1.7908576726913452, "learning_rate": 9.995824809438814e-06, "loss": 0.5614, "step": 284 }, { "epoch": 0.04, "grad_norm": 1.481379508972168, "learning_rate": 9.995724822670866e-06, "loss": 0.526, "step": 285 }, { "epoch": 0.04, "grad_norm": 1.597434401512146, "learning_rate": 9.995623653306191e-06, "loss": 0.5029, "step": 286 }, { "epoch": 0.04, "grad_norm": 1.115678310394287, "learning_rate": 9.995521301368741e-06, "loss": 0.5376, "step": 287 }, { "epoch": 0.04, "grad_norm": 1.3540256023406982, "learning_rate": 9.99541776688274e-06, "loss": 0.4739, "step": 288 }, { "epoch": 0.04, "grad_norm": 1.2932310104370117, "learning_rate": 9.995313049872701e-06, "loss": 0.4977, "step": 289 }, { "epoch": 0.04, "grad_norm": 1.228064775466919, "learning_rate": 9.995207150363413e-06, "loss": 0.4771, "step": 290 }, { "epoch": 0.04, "grad_norm": 1.304926872253418, "learning_rate": 9.995100068379942e-06, "loss": 0.5462, "step": 291 }, { "epoch": 0.04, "grad_norm": 1.0802055597305298, "learning_rate": 9.994991803947638e-06, "loss": 0.5107, "step": 292 }, { "epoch": 0.04, "grad_norm": 1.798813819885254, "learning_rate": 9.99488235709213e-06, "loss": 0.5363, "step": 293 }, { "epoch": 0.04, "grad_norm": 1.6060359477996826, "learning_rate": 9.994771727839325e-06, "loss": 0.4386, "step": 294 }, { "epoch": 0.04, "grad_norm": 2.5492284297943115, "learning_rate": 9.994659916215414e-06, "loss": 0.5061, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.6627404689788818, "learning_rate": 9.994546922246862e-06, "loss": 0.4654, "step": 296 }, { "epoch": 0.04, "grad_norm": 1.7086586952209473, "learning_rate": 9.994432745960419e-06, "loss": 0.5467, "step": 297 }, { "epoch": 0.04, "grad_norm": 1.3310346603393555, "learning_rate": 9.994317387383112e-06, "loss": 0.5044, "step": 298 }, { "epoch": 0.04, "grad_norm": 2.029215097427368, "learning_rate": 9.994200846542251e-06, "loss": 0.5806, "step": 299 }, { "epoch": 0.05, "grad_norm": 1.1868699789047241, "learning_rate": 9.994083123465423e-06, "loss": 0.521, "step": 300 }, { "epoch": 0.05, "grad_norm": 1.2874051332473755, "learning_rate": 9.993964218180493e-06, "loss": 0.4967, "step": 301 }, { "epoch": 0.05, "grad_norm": 1.988726258277893, "learning_rate": 9.993844130715611e-06, "loss": 0.662, "step": 302 }, { "epoch": 0.05, "grad_norm": 2.041715145111084, "learning_rate": 9.993722861099204e-06, "loss": 0.4424, "step": 303 }, { "epoch": 0.05, "grad_norm": 1.4559005498886108, "learning_rate": 9.993600409359978e-06, "loss": 0.4831, "step": 304 }, { "epoch": 0.05, "grad_norm": 1.4749737977981567, "learning_rate": 9.993476775526922e-06, "loss": 0.5007, "step": 305 }, { "epoch": 0.05, "grad_norm": 1.7034924030303955, "learning_rate": 9.993351959629302e-06, "loss": 0.4422, "step": 306 }, { "epoch": 0.05, "grad_norm": 1.5511888265609741, "learning_rate": 9.993225961696663e-06, "loss": 0.4687, "step": 307 }, { "epoch": 0.05, "grad_norm": 1.2554242610931396, "learning_rate": 9.993098781758834e-06, "loss": 0.5188, "step": 308 }, { "epoch": 0.05, "grad_norm": 1.3459324836730957, "learning_rate": 9.99297041984592e-06, "loss": 0.5816, "step": 309 }, { "epoch": 0.05, "grad_norm": 1.3878494501113892, "learning_rate": 9.992840875988306e-06, "loss": 0.7569, "step": 310 }, { "epoch": 0.05, "grad_norm": 2.2002410888671875, "learning_rate": 9.992710150216657e-06, "loss": 0.464, "step": 311 }, { "epoch": 0.05, "grad_norm": 1.6957499980926514, "learning_rate": 9.992578242561925e-06, "loss": 0.5405, "step": 312 }, { "epoch": 0.05, "grad_norm": 1.5183041095733643, "learning_rate": 9.992445153055327e-06, "loss": 0.4379, "step": 313 }, { "epoch": 0.05, "grad_norm": 1.4183837175369263, "learning_rate": 9.992310881728375e-06, "loss": 0.4599, "step": 314 }, { "epoch": 0.05, "grad_norm": 1.8762025833129883, "learning_rate": 9.992175428612851e-06, "loss": 0.5006, "step": 315 }, { "epoch": 0.05, "grad_norm": 1.8365817070007324, "learning_rate": 9.99203879374082e-06, "loss": 0.5002, "step": 316 }, { "epoch": 0.05, "grad_norm": 1.707092523574829, "learning_rate": 9.991900977144625e-06, "loss": 0.6151, "step": 317 }, { "epoch": 0.05, "grad_norm": 1.659830927848816, "learning_rate": 9.991761978856891e-06, "loss": 0.5696, "step": 318 }, { "epoch": 0.05, "grad_norm": 1.5418020486831665, "learning_rate": 9.991621798910522e-06, "loss": 0.492, "step": 319 }, { "epoch": 0.05, "grad_norm": 1.1798330545425415, "learning_rate": 9.991480437338704e-06, "loss": 0.7072, "step": 320 }, { "epoch": 0.05, "grad_norm": 1.5811840295791626, "learning_rate": 9.991337894174898e-06, "loss": 0.548, "step": 321 }, { "epoch": 0.05, "grad_norm": 2.109661817550659, "learning_rate": 9.991194169452846e-06, "loss": 0.4614, "step": 322 }, { "epoch": 0.05, "grad_norm": 1.4547100067138672, "learning_rate": 9.991049263206573e-06, "loss": 0.7068, "step": 323 }, { "epoch": 0.05, "grad_norm": 2.2300515174865723, "learning_rate": 9.99090317547038e-06, "loss": 0.4435, "step": 324 }, { "epoch": 0.05, "grad_norm": 1.5330740213394165, "learning_rate": 9.990755906278851e-06, "loss": 0.5414, "step": 325 }, { "epoch": 0.05, "grad_norm": 2.0671212673187256, "learning_rate": 9.990607455666847e-06, "loss": 0.5672, "step": 326 }, { "epoch": 0.05, "grad_norm": 2.8390538692474365, "learning_rate": 9.990457823669508e-06, "loss": 0.5619, "step": 327 }, { "epoch": 0.05, "grad_norm": 1.7256108522415161, "learning_rate": 9.990307010322256e-06, "loss": 0.5309, "step": 328 }, { "epoch": 0.05, "grad_norm": 1.5459314584732056, "learning_rate": 9.990155015660794e-06, "loss": 0.5572, "step": 329 }, { "epoch": 0.05, "grad_norm": 1.7206617593765259, "learning_rate": 9.990001839721097e-06, "loss": 0.4463, "step": 330 }, { "epoch": 0.05, "grad_norm": 1.3754196166992188, "learning_rate": 9.989847482539433e-06, "loss": 0.434, "step": 331 }, { "epoch": 0.05, "grad_norm": 1.5380648374557495, "learning_rate": 9.989691944152333e-06, "loss": 0.5469, "step": 332 }, { "epoch": 0.05, "grad_norm": 1.1860941648483276, "learning_rate": 9.989535224596623e-06, "loss": 0.4936, "step": 333 }, { "epoch": 0.05, "grad_norm": 1.1680052280426025, "learning_rate": 9.989377323909399e-06, "loss": 0.7177, "step": 334 }, { "epoch": 0.05, "grad_norm": 4.55633544921875, "learning_rate": 9.98921824212804e-06, "loss": 0.4792, "step": 335 }, { "epoch": 0.05, "grad_norm": 1.2429320812225342, "learning_rate": 9.989057979290204e-06, "loss": 0.7194, "step": 336 }, { "epoch": 0.05, "grad_norm": 1.9487026929855347, "learning_rate": 9.98889653543383e-06, "loss": 0.5268, "step": 337 }, { "epoch": 0.05, "grad_norm": 1.2854102849960327, "learning_rate": 9.988733910597132e-06, "loss": 0.4725, "step": 338 }, { "epoch": 0.05, "grad_norm": 1.602010726928711, "learning_rate": 9.98857010481861e-06, "loss": 0.4092, "step": 339 }, { "epoch": 0.05, "grad_norm": 1.4286696910858154, "learning_rate": 9.98840511813704e-06, "loss": 0.4562, "step": 340 }, { "epoch": 0.05, "grad_norm": 1.8009840250015259, "learning_rate": 9.988238950591478e-06, "loss": 0.5798, "step": 341 }, { "epoch": 0.05, "grad_norm": 2.002406597137451, "learning_rate": 9.988071602221256e-06, "loss": 0.5201, "step": 342 }, { "epoch": 0.05, "grad_norm": 1.77364981174469, "learning_rate": 9.987903073065995e-06, "loss": 0.5267, "step": 343 }, { "epoch": 0.05, "grad_norm": 1.984587550163269, "learning_rate": 9.987733363165584e-06, "loss": 0.4575, "step": 344 }, { "epoch": 0.05, "grad_norm": 2.862123489379883, "learning_rate": 9.9875624725602e-06, "loss": 0.5446, "step": 345 }, { "epoch": 0.05, "grad_norm": 1.2996296882629395, "learning_rate": 9.987390401290295e-06, "loss": 0.5901, "step": 346 }, { "epoch": 0.05, "grad_norm": 1.5497322082519531, "learning_rate": 9.987217149396605e-06, "loss": 0.4876, "step": 347 }, { "epoch": 0.05, "grad_norm": 3.6021831035614014, "learning_rate": 9.987042716920138e-06, "loss": 0.5331, "step": 348 }, { "epoch": 0.05, "grad_norm": 1.533854603767395, "learning_rate": 9.98686710390219e-06, "loss": 0.4788, "step": 349 }, { "epoch": 0.05, "grad_norm": 2.120351791381836, "learning_rate": 9.986690310384329e-06, "loss": 0.499, "step": 350 }, { "epoch": 0.05, "grad_norm": 1.8264763355255127, "learning_rate": 9.98651233640841e-06, "loss": 0.5212, "step": 351 }, { "epoch": 0.05, "grad_norm": 1.51094388961792, "learning_rate": 9.986333182016559e-06, "loss": 0.5169, "step": 352 }, { "epoch": 0.05, "grad_norm": 1.4618817567825317, "learning_rate": 9.986152847251187e-06, "loss": 0.4933, "step": 353 }, { "epoch": 0.05, "grad_norm": 1.3127484321594238, "learning_rate": 9.985971332154985e-06, "loss": 0.7647, "step": 354 }, { "epoch": 0.05, "grad_norm": 1.2627310752868652, "learning_rate": 9.98578863677092e-06, "loss": 0.6918, "step": 355 }, { "epoch": 0.05, "grad_norm": 1.2971868515014648, "learning_rate": 9.985604761142242e-06, "loss": 0.5458, "step": 356 }, { "epoch": 0.05, "grad_norm": 2.40871262550354, "learning_rate": 9.985419705312475e-06, "loss": 0.4566, "step": 357 }, { "epoch": 0.05, "grad_norm": 1.7050837278366089, "learning_rate": 9.985233469325429e-06, "loss": 0.4279, "step": 358 }, { "epoch": 0.05, "grad_norm": 1.8704237937927246, "learning_rate": 9.985046053225186e-06, "loss": 0.5274, "step": 359 }, { "epoch": 0.05, "grad_norm": 1.6095014810562134, "learning_rate": 9.984857457056116e-06, "loss": 0.5358, "step": 360 }, { "epoch": 0.05, "grad_norm": 1.4251214265823364, "learning_rate": 9.984667680862861e-06, "loss": 0.5761, "step": 361 }, { "epoch": 0.05, "grad_norm": 1.2238554954528809, "learning_rate": 9.984476724690346e-06, "loss": 0.46, "step": 362 }, { "epoch": 0.05, "grad_norm": 7.045658111572266, "learning_rate": 9.984284588583774e-06, "loss": 0.5134, "step": 363 }, { "epoch": 0.05, "grad_norm": 1.3061941862106323, "learning_rate": 9.98409127258863e-06, "loss": 0.4647, "step": 364 }, { "epoch": 0.05, "grad_norm": 1.204996943473816, "learning_rate": 9.983896776750673e-06, "loss": 0.5331, "step": 365 }, { "epoch": 0.05, "grad_norm": 4.150661468505859, "learning_rate": 9.983701101115946e-06, "loss": 0.5356, "step": 366 }, { "epoch": 0.06, "grad_norm": 1.5941818952560425, "learning_rate": 9.983504245730771e-06, "loss": 0.51, "step": 367 }, { "epoch": 0.06, "grad_norm": 1.5274639129638672, "learning_rate": 9.983306210641744e-06, "loss": 0.5316, "step": 368 }, { "epoch": 0.06, "grad_norm": 1.3045518398284912, "learning_rate": 9.983106995895748e-06, "loss": 0.4476, "step": 369 }, { "epoch": 0.06, "grad_norm": 1.5879727602005005, "learning_rate": 9.98290660153994e-06, "loss": 0.4947, "step": 370 }, { "epoch": 0.06, "grad_norm": 1.0785049200057983, "learning_rate": 9.982705027621757e-06, "loss": 0.5148, "step": 371 }, { "epoch": 0.06, "grad_norm": 1.3922569751739502, "learning_rate": 9.982502274188917e-06, "loss": 0.5754, "step": 372 }, { "epoch": 0.06, "grad_norm": 1.7216569185256958, "learning_rate": 9.982298341289416e-06, "loss": 0.495, "step": 373 }, { "epoch": 0.06, "grad_norm": 1.1204564571380615, "learning_rate": 9.98209322897153e-06, "loss": 0.5192, "step": 374 }, { "epoch": 0.06, "grad_norm": 1.073109745979309, "learning_rate": 9.981886937283812e-06, "loss": 0.4859, "step": 375 }, { "epoch": 0.06, "grad_norm": 3.870542287826538, "learning_rate": 9.981679466275095e-06, "loss": 0.5149, "step": 376 }, { "epoch": 0.06, "grad_norm": 1.7058933973312378, "learning_rate": 9.981470815994496e-06, "loss": 0.4591, "step": 377 }, { "epoch": 0.06, "grad_norm": 1.4745763540267944, "learning_rate": 9.981260986491404e-06, "loss": 0.7481, "step": 378 }, { "epoch": 0.06, "grad_norm": 1.4094566106796265, "learning_rate": 9.981049977815489e-06, "loss": 0.5315, "step": 379 }, { "epoch": 0.06, "grad_norm": 1.529308557510376, "learning_rate": 9.980837790016704e-06, "loss": 0.5179, "step": 380 }, { "epoch": 0.06, "grad_norm": 1.5316954851150513, "learning_rate": 9.980624423145278e-06, "loss": 0.5032, "step": 381 }, { "epoch": 0.06, "grad_norm": 1.1152559518814087, "learning_rate": 9.980409877251718e-06, "loss": 0.4464, "step": 382 }, { "epoch": 0.06, "grad_norm": 1.8825665712356567, "learning_rate": 9.980194152386813e-06, "loss": 0.4898, "step": 383 }, { "epoch": 0.06, "grad_norm": 2.572195529937744, "learning_rate": 9.979977248601629e-06, "loss": 0.4677, "step": 384 }, { "epoch": 0.06, "grad_norm": 1.1290643215179443, "learning_rate": 9.979759165947514e-06, "loss": 0.4843, "step": 385 }, { "epoch": 0.06, "grad_norm": 1.6385074853897095, "learning_rate": 9.979539904476089e-06, "loss": 0.561, "step": 386 }, { "epoch": 0.06, "grad_norm": 1.198962926864624, "learning_rate": 9.979319464239263e-06, "loss": 0.441, "step": 387 }, { "epoch": 0.06, "grad_norm": 2.3230979442596436, "learning_rate": 9.979097845289214e-06, "loss": 0.5042, "step": 388 }, { "epoch": 0.06, "grad_norm": 1.3348242044448853, "learning_rate": 9.978875047678407e-06, "loss": 0.5252, "step": 389 }, { "epoch": 0.06, "grad_norm": 2.216259241104126, "learning_rate": 9.978651071459581e-06, "loss": 0.5411, "step": 390 }, { "epoch": 0.06, "grad_norm": 2.2633368968963623, "learning_rate": 9.978425916685758e-06, "loss": 0.5316, "step": 391 }, { "epoch": 0.06, "grad_norm": 1.3961447477340698, "learning_rate": 9.978199583410237e-06, "loss": 0.4683, "step": 392 }, { "epoch": 0.06, "grad_norm": 3.161043643951416, "learning_rate": 9.977972071686596e-06, "loss": 0.413, "step": 393 }, { "epoch": 0.06, "grad_norm": 1.9823722839355469, "learning_rate": 9.97774338156869e-06, "loss": 0.5157, "step": 394 }, { "epoch": 0.06, "grad_norm": 1.3898842334747314, "learning_rate": 9.977513513110656e-06, "loss": 0.494, "step": 395 }, { "epoch": 0.06, "grad_norm": 1.357505440711975, "learning_rate": 9.97728246636691e-06, "loss": 0.5056, "step": 396 }, { "epoch": 0.06, "grad_norm": 1.4899464845657349, "learning_rate": 9.977050241392145e-06, "loss": 0.5282, "step": 397 }, { "epoch": 0.06, "grad_norm": 1.907165765762329, "learning_rate": 9.976816838241334e-06, "loss": 0.4483, "step": 398 }, { "epoch": 0.06, "grad_norm": 1.9117463827133179, "learning_rate": 9.976582256969726e-06, "loss": 0.5295, "step": 399 }, { "epoch": 0.06, "grad_norm": 2.145329236984253, "learning_rate": 9.976346497632857e-06, "loss": 0.5538, "step": 400 }, { "epoch": 0.06, "grad_norm": 1.7396674156188965, "learning_rate": 9.976109560286532e-06, "loss": 0.4913, "step": 401 }, { "epoch": 0.06, "grad_norm": 1.6017102003097534, "learning_rate": 9.975871444986837e-06, "loss": 0.488, "step": 402 }, { "epoch": 0.06, "grad_norm": 1.69334876537323, "learning_rate": 9.975632151790146e-06, "loss": 0.5268, "step": 403 }, { "epoch": 0.06, "grad_norm": 1.3860859870910645, "learning_rate": 9.9753916807531e-06, "loss": 0.5129, "step": 404 }, { "epoch": 0.06, "grad_norm": 1.6581506729125977, "learning_rate": 9.975150031932626e-06, "loss": 0.5136, "step": 405 }, { "epoch": 0.06, "grad_norm": 2.3927249908447266, "learning_rate": 9.974907205385924e-06, "loss": 0.5193, "step": 406 }, { "epoch": 0.06, "grad_norm": 1.3765637874603271, "learning_rate": 9.97466320117048e-06, "loss": 0.4505, "step": 407 }, { "epoch": 0.06, "grad_norm": 3.281003475189209, "learning_rate": 9.974418019344055e-06, "loss": 0.5339, "step": 408 }, { "epoch": 0.06, "grad_norm": 1.9935810565948486, "learning_rate": 9.974171659964688e-06, "loss": 0.4583, "step": 409 }, { "epoch": 0.06, "grad_norm": 1.4182196855545044, "learning_rate": 9.973924123090698e-06, "loss": 0.5251, "step": 410 }, { "epoch": 0.06, "grad_norm": 1.4942646026611328, "learning_rate": 9.97367540878068e-06, "loss": 0.5209, "step": 411 }, { "epoch": 0.06, "grad_norm": 1.7239570617675781, "learning_rate": 9.973425517093511e-06, "loss": 0.5169, "step": 412 }, { "epoch": 0.06, "grad_norm": 1.3517318964004517, "learning_rate": 9.973174448088347e-06, "loss": 0.5072, "step": 413 }, { "epoch": 0.06, "grad_norm": 2.042025566101074, "learning_rate": 9.972922201824625e-06, "loss": 0.6258, "step": 414 }, { "epoch": 0.06, "grad_norm": 1.1984963417053223, "learning_rate": 9.97266877836205e-06, "loss": 0.5669, "step": 415 }, { "epoch": 0.06, "grad_norm": 1.9050573110580444, "learning_rate": 9.972414177760616e-06, "loss": 0.4989, "step": 416 }, { "epoch": 0.06, "grad_norm": 1.3270243406295776, "learning_rate": 9.972158400080594e-06, "loss": 0.4459, "step": 417 }, { "epoch": 0.06, "grad_norm": 2.031404495239258, "learning_rate": 9.97190144538253e-06, "loss": 0.5148, "step": 418 }, { "epoch": 0.06, "grad_norm": 4.309995174407959, "learning_rate": 9.97164331372725e-06, "loss": 0.4108, "step": 419 }, { "epoch": 0.06, "grad_norm": 2.6173059940338135, "learning_rate": 9.971384005175863e-06, "loss": 0.5205, "step": 420 }, { "epoch": 0.06, "grad_norm": 1.2464032173156738, "learning_rate": 9.971123519789752e-06, "loss": 0.4313, "step": 421 }, { "epoch": 0.06, "grad_norm": 1.3675910234451294, "learning_rate": 9.970861857630578e-06, "loss": 0.4879, "step": 422 }, { "epoch": 0.06, "grad_norm": 1.2862210273742676, "learning_rate": 9.97059901876028e-06, "loss": 0.5177, "step": 423 }, { "epoch": 0.06, "grad_norm": 1.278022289276123, "learning_rate": 9.970335003241084e-06, "loss": 0.5141, "step": 424 }, { "epoch": 0.06, "grad_norm": 2.135965585708618, "learning_rate": 9.970069811135482e-06, "loss": 0.6308, "step": 425 }, { "epoch": 0.06, "grad_norm": 1.6246057748794556, "learning_rate": 9.969803442506253e-06, "loss": 0.5509, "step": 426 }, { "epoch": 0.06, "grad_norm": 2.049867868423462, "learning_rate": 9.969535897416457e-06, "loss": 0.5247, "step": 427 }, { "epoch": 0.06, "grad_norm": 1.2770811319351196, "learning_rate": 9.969267175929418e-06, "loss": 0.5385, "step": 428 }, { "epoch": 0.06, "grad_norm": 1.6919407844543457, "learning_rate": 9.968997278108756e-06, "loss": 0.4751, "step": 429 }, { "epoch": 0.06, "grad_norm": 1.3551599979400635, "learning_rate": 9.968726204018359e-06, "loss": 0.5322, "step": 430 }, { "epoch": 0.06, "grad_norm": 1.2485735416412354, "learning_rate": 9.968453953722394e-06, "loss": 0.7284, "step": 431 }, { "epoch": 0.06, "grad_norm": 1.671436071395874, "learning_rate": 9.968180527285314e-06, "loss": 0.5532, "step": 432 }, { "epoch": 0.07, "grad_norm": 1.9521578550338745, "learning_rate": 9.96790592477184e-06, "loss": 0.5165, "step": 433 }, { "epoch": 0.07, "grad_norm": 1.5819640159606934, "learning_rate": 9.967630146246979e-06, "loss": 0.4034, "step": 434 }, { "epoch": 0.07, "grad_norm": 0.9947062134742737, "learning_rate": 9.967353191776013e-06, "loss": 0.7395, "step": 435 }, { "epoch": 0.07, "grad_norm": 1.2304219007492065, "learning_rate": 9.967075061424503e-06, "loss": 0.4936, "step": 436 }, { "epoch": 0.07, "grad_norm": 1.790428876876831, "learning_rate": 9.96679575525829e-06, "loss": 0.4669, "step": 437 }, { "epoch": 0.07, "grad_norm": 1.3004088401794434, "learning_rate": 9.966515273343488e-06, "loss": 0.5734, "step": 438 }, { "epoch": 0.07, "grad_norm": 1.6801897287368774, "learning_rate": 9.966233615746496e-06, "loss": 0.5338, "step": 439 }, { "epoch": 0.07, "grad_norm": 1.3127912282943726, "learning_rate": 9.965950782533991e-06, "loss": 0.4295, "step": 440 }, { "epoch": 0.07, "grad_norm": 1.4111464023590088, "learning_rate": 9.965666773772919e-06, "loss": 0.5567, "step": 441 }, { "epoch": 0.07, "grad_norm": 2.078608512878418, "learning_rate": 9.965381589530519e-06, "loss": 0.4982, "step": 442 }, { "epoch": 0.07, "grad_norm": 1.1668100357055664, "learning_rate": 9.965095229874293e-06, "loss": 0.7484, "step": 443 }, { "epoch": 0.07, "grad_norm": 1.254705548286438, "learning_rate": 9.964807694872034e-06, "loss": 0.5291, "step": 444 }, { "epoch": 0.07, "grad_norm": 1.3036447763442993, "learning_rate": 9.964518984591804e-06, "loss": 0.5905, "step": 445 }, { "epoch": 0.07, "grad_norm": 5.243808746337891, "learning_rate": 9.96422909910195e-06, "loss": 0.5255, "step": 446 }, { "epoch": 0.07, "grad_norm": 1.331425428390503, "learning_rate": 9.963938038471091e-06, "loss": 0.4978, "step": 447 }, { "epoch": 0.07, "grad_norm": 1.2471380233764648, "learning_rate": 9.963645802768132e-06, "loss": 0.4682, "step": 448 }, { "epoch": 0.07, "grad_norm": 1.7828364372253418, "learning_rate": 9.963352392062246e-06, "loss": 0.5505, "step": 449 }, { "epoch": 0.07, "grad_norm": 1.6235771179199219, "learning_rate": 9.963057806422895e-06, "loss": 0.4349, "step": 450 }, { "epoch": 0.07, "grad_norm": 1.3600564002990723, "learning_rate": 9.96276204591981e-06, "loss": 0.7514, "step": 451 }, { "epoch": 0.07, "grad_norm": 1.8707880973815918, "learning_rate": 9.962465110623005e-06, "loss": 0.5317, "step": 452 }, { "epoch": 0.07, "grad_norm": 2.4652605056762695, "learning_rate": 9.962167000602771e-06, "loss": 0.4694, "step": 453 }, { "epoch": 0.07, "grad_norm": 1.2584682703018188, "learning_rate": 9.961867715929679e-06, "loss": 0.5191, "step": 454 }, { "epoch": 0.07, "grad_norm": 1.543083667755127, "learning_rate": 9.961567256674574e-06, "loss": 0.5259, "step": 455 }, { "epoch": 0.07, "grad_norm": 1.233580470085144, "learning_rate": 9.96126562290858e-06, "loss": 0.5352, "step": 456 }, { "epoch": 0.07, "grad_norm": 1.7255274057388306, "learning_rate": 9.960962814703103e-06, "loss": 0.5067, "step": 457 }, { "epoch": 0.07, "grad_norm": 1.7572003602981567, "learning_rate": 9.960658832129826e-06, "loss": 0.5473, "step": 458 }, { "epoch": 0.07, "grad_norm": 1.3338514566421509, "learning_rate": 9.960353675260703e-06, "loss": 0.478, "step": 459 }, { "epoch": 0.07, "grad_norm": 1.0577614307403564, "learning_rate": 9.960047344167974e-06, "loss": 0.4636, "step": 460 }, { "epoch": 0.07, "grad_norm": 1.6645374298095703, "learning_rate": 9.959739838924156e-06, "loss": 0.4773, "step": 461 }, { "epoch": 0.07, "grad_norm": 1.7057937383651733, "learning_rate": 9.959431159602037e-06, "loss": 0.5431, "step": 462 }, { "epoch": 0.07, "grad_norm": 1.996673583984375, "learning_rate": 9.959121306274695e-06, "loss": 0.5203, "step": 463 }, { "epoch": 0.07, "grad_norm": 1.7018557786941528, "learning_rate": 9.958810279015474e-06, "loss": 0.5737, "step": 464 }, { "epoch": 0.07, "grad_norm": 1.8946975469589233, "learning_rate": 9.958498077898002e-06, "loss": 0.5101, "step": 465 }, { "epoch": 0.07, "grad_norm": 1.131433129310608, "learning_rate": 9.958184702996186e-06, "loss": 0.4916, "step": 466 }, { "epoch": 0.07, "grad_norm": 2.357848644256592, "learning_rate": 9.957870154384206e-06, "loss": 0.5555, "step": 467 }, { "epoch": 0.07, "grad_norm": 1.37078857421875, "learning_rate": 9.957554432136523e-06, "loss": 0.5346, "step": 468 }, { "epoch": 0.07, "grad_norm": 1.6297051906585693, "learning_rate": 9.957237536327875e-06, "loss": 0.4473, "step": 469 }, { "epoch": 0.07, "grad_norm": 1.4352065324783325, "learning_rate": 9.95691946703328e-06, "loss": 0.5424, "step": 470 }, { "epoch": 0.07, "grad_norm": 1.3344829082489014, "learning_rate": 9.95660022432803e-06, "loss": 0.4617, "step": 471 }, { "epoch": 0.07, "grad_norm": 1.1067124605178833, "learning_rate": 9.9562798082877e-06, "loss": 0.4811, "step": 472 }, { "epoch": 0.07, "grad_norm": 1.3372067213058472, "learning_rate": 9.955958218988133e-06, "loss": 0.4383, "step": 473 }, { "epoch": 0.07, "grad_norm": 2.6999669075012207, "learning_rate": 9.955635456505462e-06, "loss": 0.5299, "step": 474 }, { "epoch": 0.07, "grad_norm": 2.6580612659454346, "learning_rate": 9.95531152091609e-06, "loss": 0.57, "step": 475 }, { "epoch": 0.07, "grad_norm": 1.4661284685134888, "learning_rate": 9.9549864122967e-06, "loss": 0.4969, "step": 476 }, { "epoch": 0.07, "grad_norm": 1.2061774730682373, "learning_rate": 9.95466013072425e-06, "loss": 0.7477, "step": 477 }, { "epoch": 0.07, "grad_norm": 1.7240867614746094, "learning_rate": 9.954332676275982e-06, "loss": 0.5393, "step": 478 }, { "epoch": 0.07, "grad_norm": 1.3618351221084595, "learning_rate": 9.954004049029409e-06, "loss": 0.7701, "step": 479 }, { "epoch": 0.07, "grad_norm": 1.3625969886779785, "learning_rate": 9.953674249062323e-06, "loss": 0.4217, "step": 480 }, { "epoch": 0.07, "grad_norm": 1.6596208810806274, "learning_rate": 9.953343276452798e-06, "loss": 0.4599, "step": 481 }, { "epoch": 0.07, "grad_norm": 6.371055603027344, "learning_rate": 9.95301113127918e-06, "loss": 0.5161, "step": 482 }, { "epoch": 0.07, "grad_norm": 2.583275556564331, "learning_rate": 9.952677813620097e-06, "loss": 0.5031, "step": 483 }, { "epoch": 0.07, "grad_norm": 1.6052075624465942, "learning_rate": 9.95234332355445e-06, "loss": 0.5146, "step": 484 }, { "epoch": 0.07, "grad_norm": 1.3071842193603516, "learning_rate": 9.952007661161423e-06, "loss": 0.5302, "step": 485 }, { "epoch": 0.07, "grad_norm": 1.315118670463562, "learning_rate": 9.95167082652047e-06, "loss": 0.7417, "step": 486 }, { "epoch": 0.07, "grad_norm": 1.4296387434005737, "learning_rate": 9.951332819711332e-06, "loss": 0.456, "step": 487 }, { "epoch": 0.07, "grad_norm": 1.6809533834457397, "learning_rate": 9.950993640814022e-06, "loss": 0.544, "step": 488 }, { "epoch": 0.07, "grad_norm": 1.5136022567749023, "learning_rate": 9.950653289908828e-06, "loss": 0.616, "step": 489 }, { "epoch": 0.07, "grad_norm": 1.4270751476287842, "learning_rate": 9.950311767076322e-06, "loss": 0.4841, "step": 490 }, { "epoch": 0.07, "grad_norm": 2.2242045402526855, "learning_rate": 9.949969072397346e-06, "loss": 0.4728, "step": 491 }, { "epoch": 0.07, "grad_norm": 1.5940669775009155, "learning_rate": 9.949625205953026e-06, "loss": 0.58, "step": 492 }, { "epoch": 0.07, "grad_norm": 2.101560354232788, "learning_rate": 9.949280167824762e-06, "loss": 0.4975, "step": 493 }, { "epoch": 0.07, "grad_norm": 1.3490002155303955, "learning_rate": 9.948933958094231e-06, "loss": 0.6329, "step": 494 }, { "epoch": 0.07, "grad_norm": 3.539364814758301, "learning_rate": 9.948586576843391e-06, "loss": 0.5188, "step": 495 }, { "epoch": 0.07, "grad_norm": 1.362019419670105, "learning_rate": 9.948238024154472e-06, "loss": 0.4673, "step": 496 }, { "epoch": 0.07, "grad_norm": 2.485926389694214, "learning_rate": 9.947888300109988e-06, "loss": 0.4499, "step": 497 }, { "epoch": 0.07, "grad_norm": 1.3221272230148315, "learning_rate": 9.94753740479272e-06, "loss": 0.4751, "step": 498 }, { "epoch": 0.07, "grad_norm": 1.3077969551086426, "learning_rate": 9.947185338285736e-06, "loss": 0.4947, "step": 499 }, { "epoch": 0.08, "grad_norm": 1.7332340478897095, "learning_rate": 9.94683210067238e-06, "loss": 0.5208, "step": 500 }, { "epoch": 0.08, "grad_norm": 1.6486847400665283, "learning_rate": 9.946477692036268e-06, "loss": 0.5066, "step": 501 }, { "epoch": 0.08, "grad_norm": 1.4454028606414795, "learning_rate": 9.946122112461296e-06, "loss": 0.6065, "step": 502 }, { "epoch": 0.08, "grad_norm": 1.9925732612609863, "learning_rate": 9.94576536203164e-06, "loss": 0.5655, "step": 503 }, { "epoch": 0.08, "grad_norm": 1.1824731826782227, "learning_rate": 9.945407440831747e-06, "loss": 0.5047, "step": 504 }, { "epoch": 0.08, "grad_norm": 1.4390571117401123, "learning_rate": 9.945048348946348e-06, "loss": 0.4758, "step": 505 }, { "epoch": 0.08, "grad_norm": 2.8210370540618896, "learning_rate": 9.944688086460446e-06, "loss": 0.4983, "step": 506 }, { "epoch": 0.08, "grad_norm": 1.5581355094909668, "learning_rate": 9.944326653459324e-06, "loss": 0.5071, "step": 507 }, { "epoch": 0.08, "grad_norm": 1.6840397119522095, "learning_rate": 9.94396405002854e-06, "loss": 0.472, "step": 508 }, { "epoch": 0.08, "grad_norm": 1.3270962238311768, "learning_rate": 9.943600276253933e-06, "loss": 0.5478, "step": 509 }, { "epoch": 0.08, "grad_norm": 1.356063961982727, "learning_rate": 9.943235332221613e-06, "loss": 0.5195, "step": 510 }, { "epoch": 0.08, "grad_norm": 1.7224984169006348, "learning_rate": 9.942869218017969e-06, "loss": 0.5061, "step": 511 }, { "epoch": 0.08, "grad_norm": 1.5450400114059448, "learning_rate": 9.94250193372967e-06, "loss": 0.5861, "step": 512 }, { "epoch": 0.08, "grad_norm": 1.6601500511169434, "learning_rate": 9.942133479443665e-06, "loss": 0.5706, "step": 513 }, { "epoch": 0.08, "grad_norm": 2.0880911350250244, "learning_rate": 9.941763855247167e-06, "loss": 0.4652, "step": 514 }, { "epoch": 0.08, "grad_norm": 1.1194971799850464, "learning_rate": 9.941393061227677e-06, "loss": 0.4592, "step": 515 }, { "epoch": 0.08, "grad_norm": 1.0409687757492065, "learning_rate": 9.941021097472973e-06, "loss": 0.4583, "step": 516 }, { "epoch": 0.08, "grad_norm": 1.2834585905075073, "learning_rate": 9.940647964071104e-06, "loss": 0.7666, "step": 517 }, { "epoch": 0.08, "grad_norm": 3.119136095046997, "learning_rate": 9.9402736611104e-06, "loss": 0.4409, "step": 518 }, { "epoch": 0.08, "grad_norm": 1.3360316753387451, "learning_rate": 9.939898188679465e-06, "loss": 0.4887, "step": 519 }, { "epoch": 0.08, "grad_norm": 2.2940754890441895, "learning_rate": 9.939521546867182e-06, "loss": 0.4622, "step": 520 }, { "epoch": 0.08, "grad_norm": 1.6991565227508545, "learning_rate": 9.939143735762714e-06, "loss": 0.5318, "step": 521 }, { "epoch": 0.08, "grad_norm": 1.2954540252685547, "learning_rate": 9.93876475545549e-06, "loss": 0.4208, "step": 522 }, { "epoch": 0.08, "grad_norm": 2.1527862548828125, "learning_rate": 9.938384606035229e-06, "loss": 0.5251, "step": 523 }, { "epoch": 0.08, "grad_norm": 1.7557440996170044, "learning_rate": 9.938003287591918e-06, "loss": 0.5586, "step": 524 }, { "epoch": 0.08, "grad_norm": 1.6895666122436523, "learning_rate": 9.937620800215824e-06, "loss": 0.5205, "step": 525 }, { "epoch": 0.08, "grad_norm": 1.2207505702972412, "learning_rate": 9.93723714399749e-06, "loss": 0.5143, "step": 526 }, { "epoch": 0.08, "grad_norm": 1.6126763820648193, "learning_rate": 9.936852319027734e-06, "loss": 0.4776, "step": 527 }, { "epoch": 0.08, "grad_norm": 1.509953260421753, "learning_rate": 9.936466325397657e-06, "loss": 0.482, "step": 528 }, { "epoch": 0.08, "grad_norm": 1.2344532012939453, "learning_rate": 9.936079163198628e-06, "loss": 0.4677, "step": 529 }, { "epoch": 0.08, "grad_norm": 1.656226634979248, "learning_rate": 9.935690832522299e-06, "loss": 0.5073, "step": 530 }, { "epoch": 0.08, "grad_norm": 1.404739499092102, "learning_rate": 9.935301333460594e-06, "loss": 0.4882, "step": 531 }, { "epoch": 0.08, "grad_norm": 1.4845311641693115, "learning_rate": 9.934910666105716e-06, "loss": 0.481, "step": 532 }, { "epoch": 0.08, "grad_norm": 1.5595049858093262, "learning_rate": 9.934518830550147e-06, "loss": 0.4876, "step": 533 }, { "epoch": 0.08, "grad_norm": 1.3054759502410889, "learning_rate": 9.934125826886642e-06, "loss": 0.5536, "step": 534 }, { "epoch": 0.08, "grad_norm": 1.2417443990707397, "learning_rate": 9.933731655208232e-06, "loss": 0.4938, "step": 535 }, { "epoch": 0.08, "grad_norm": 1.4988971948623657, "learning_rate": 9.933336315608229e-06, "loss": 0.4671, "step": 536 }, { "epoch": 0.08, "grad_norm": 1.4651635885238647, "learning_rate": 9.932939808180215e-06, "loss": 0.4725, "step": 537 }, { "epoch": 0.08, "grad_norm": 1.424375057220459, "learning_rate": 9.932542133018054e-06, "loss": 0.4737, "step": 538 }, { "epoch": 0.08, "grad_norm": 1.8808828592300415, "learning_rate": 9.932143290215885e-06, "loss": 0.5388, "step": 539 }, { "epoch": 0.08, "grad_norm": 1.8450567722320557, "learning_rate": 9.93174327986812e-06, "loss": 0.4956, "step": 540 }, { "epoch": 0.08, "grad_norm": 1.419738531112671, "learning_rate": 9.931342102069453e-06, "loss": 0.5337, "step": 541 }, { "epoch": 0.08, "grad_norm": 1.4764049053192139, "learning_rate": 9.93093975691485e-06, "loss": 0.4774, "step": 542 }, { "epoch": 0.08, "grad_norm": 1.507968783378601, "learning_rate": 9.930536244499556e-06, "loss": 0.5168, "step": 543 }, { "epoch": 0.08, "grad_norm": 1.264594316482544, "learning_rate": 9.93013156491909e-06, "loss": 0.504, "step": 544 }, { "epoch": 0.08, "grad_norm": 1.2301750183105469, "learning_rate": 9.929725718269248e-06, "loss": 0.5204, "step": 545 }, { "epoch": 0.08, "grad_norm": 1.4799244403839111, "learning_rate": 9.929318704646105e-06, "loss": 0.4814, "step": 546 }, { "epoch": 0.08, "grad_norm": 1.4030873775482178, "learning_rate": 9.928910524146007e-06, "loss": 0.5062, "step": 547 }, { "epoch": 0.08, "grad_norm": 1.095573902130127, "learning_rate": 9.928501176865581e-06, "loss": 0.5385, "step": 548 }, { "epoch": 0.08, "grad_norm": 2.5394861698150635, "learning_rate": 9.928090662901728e-06, "loss": 0.5893, "step": 549 }, { "epoch": 0.08, "grad_norm": 1.125536322593689, "learning_rate": 9.927678982351627e-06, "loss": 0.5038, "step": 550 }, { "epoch": 0.08, "grad_norm": 2.8243155479431152, "learning_rate": 9.92726613531273e-06, "loss": 0.5692, "step": 551 }, { "epoch": 0.08, "grad_norm": 1.4229458570480347, "learning_rate": 9.926852121882766e-06, "loss": 0.5449, "step": 552 }, { "epoch": 0.08, "grad_norm": 1.3533653020858765, "learning_rate": 9.926436942159744e-06, "loss": 0.5208, "step": 553 }, { "epoch": 0.08, "grad_norm": 1.7604763507843018, "learning_rate": 9.926020596241942e-06, "loss": 0.5248, "step": 554 }, { "epoch": 0.08, "grad_norm": 1.3098639249801636, "learning_rate": 9.925603084227923e-06, "loss": 0.5317, "step": 555 }, { "epoch": 0.08, "grad_norm": 1.56639564037323, "learning_rate": 9.92518440621652e-06, "loss": 0.5881, "step": 556 }, { "epoch": 0.08, "grad_norm": 1.118886947631836, "learning_rate": 9.924764562306839e-06, "loss": 0.5144, "step": 557 }, { "epoch": 0.08, "grad_norm": 1.8504705429077148, "learning_rate": 9.92434355259827e-06, "loss": 0.5737, "step": 558 }, { "epoch": 0.08, "grad_norm": 1.9518780708312988, "learning_rate": 9.923921377190476e-06, "loss": 0.4171, "step": 559 }, { "epoch": 0.08, "grad_norm": 1.7340620756149292, "learning_rate": 9.92349803618339e-06, "loss": 0.3921, "step": 560 }, { "epoch": 0.08, "grad_norm": 2.226219892501831, "learning_rate": 9.923073529677232e-06, "loss": 0.5516, "step": 561 }, { "epoch": 0.08, "grad_norm": 1.2260104417800903, "learning_rate": 9.922647857772487e-06, "loss": 0.7316, "step": 562 }, { "epoch": 0.08, "grad_norm": 2.8255529403686523, "learning_rate": 9.922221020569924e-06, "loss": 0.5115, "step": 563 }, { "epoch": 0.08, "grad_norm": 1.555275321006775, "learning_rate": 9.921793018170584e-06, "loss": 0.4469, "step": 564 }, { "epoch": 0.08, "grad_norm": 1.4351365566253662, "learning_rate": 9.921363850675785e-06, "loss": 0.545, "step": 565 }, { "epoch": 0.09, "grad_norm": 1.3400613069534302, "learning_rate": 9.920933518187119e-06, "loss": 0.5022, "step": 566 }, { "epoch": 0.09, "grad_norm": 1.7400435209274292, "learning_rate": 9.920502020806456e-06, "loss": 0.4144, "step": 567 }, { "epoch": 0.09, "grad_norm": 1.0263371467590332, "learning_rate": 9.92006935863594e-06, "loss": 0.5431, "step": 568 }, { "epoch": 0.09, "grad_norm": 1.3854117393493652, "learning_rate": 9.91963553177799e-06, "loss": 0.4162, "step": 569 }, { "epoch": 0.09, "grad_norm": 6.466152667999268, "learning_rate": 9.919200540335307e-06, "loss": 0.4812, "step": 570 }, { "epoch": 0.09, "grad_norm": 1.4685089588165283, "learning_rate": 9.918764384410859e-06, "loss": 0.5276, "step": 571 }, { "epoch": 0.09, "grad_norm": 1.3956729173660278, "learning_rate": 9.918327064107895e-06, "loss": 0.5021, "step": 572 }, { "epoch": 0.09, "grad_norm": 1.3125091791152954, "learning_rate": 9.917888579529937e-06, "loss": 0.4775, "step": 573 }, { "epoch": 0.09, "grad_norm": 1.6371071338653564, "learning_rate": 9.917448930780786e-06, "loss": 0.5198, "step": 574 }, { "epoch": 0.09, "grad_norm": 1.3062313795089722, "learning_rate": 9.917008117964515e-06, "loss": 0.7637, "step": 575 }, { "epoch": 0.09, "grad_norm": 1.3287410736083984, "learning_rate": 9.916566141185472e-06, "loss": 0.5057, "step": 576 }, { "epoch": 0.09, "grad_norm": 1.110788106918335, "learning_rate": 9.916123000548287e-06, "loss": 0.4398, "step": 577 }, { "epoch": 0.09, "grad_norm": 1.4487438201904297, "learning_rate": 9.91567869615786e-06, "loss": 0.5663, "step": 578 }, { "epoch": 0.09, "grad_norm": 1.453196406364441, "learning_rate": 9.915233228119364e-06, "loss": 0.4615, "step": 579 }, { "epoch": 0.09, "grad_norm": 1.1684538125991821, "learning_rate": 9.914786596538254e-06, "loss": 0.4914, "step": 580 }, { "epoch": 0.09, "grad_norm": 1.4552702903747559, "learning_rate": 9.914338801520256e-06, "loss": 0.5204, "step": 581 }, { "epoch": 0.09, "grad_norm": 1.7018812894821167, "learning_rate": 9.913889843171373e-06, "loss": 0.5313, "step": 582 }, { "epoch": 0.09, "grad_norm": 1.2965214252471924, "learning_rate": 9.913439721597885e-06, "loss": 0.4758, "step": 583 }, { "epoch": 0.09, "grad_norm": 1.2320911884307861, "learning_rate": 9.912988436906344e-06, "loss": 0.4822, "step": 584 }, { "epoch": 0.09, "grad_norm": 1.374952793121338, "learning_rate": 9.912535989203578e-06, "loss": 0.4783, "step": 585 }, { "epoch": 0.09, "grad_norm": 1.6728023290634155, "learning_rate": 9.912082378596694e-06, "loss": 0.5474, "step": 586 }, { "epoch": 0.09, "grad_norm": 1.1047497987747192, "learning_rate": 9.911627605193068e-06, "loss": 0.502, "step": 587 }, { "epoch": 0.09, "grad_norm": 1.2607964277267456, "learning_rate": 9.911171669100356e-06, "loss": 0.7686, "step": 588 }, { "epoch": 0.09, "grad_norm": 1.4474951028823853, "learning_rate": 9.910714570426491e-06, "loss": 0.4959, "step": 589 }, { "epoch": 0.09, "grad_norm": 1.302376627922058, "learning_rate": 9.910256309279676e-06, "loss": 0.7447, "step": 590 }, { "epoch": 0.09, "grad_norm": 1.55811607837677, "learning_rate": 9.90979688576839e-06, "loss": 0.5071, "step": 591 }, { "epoch": 0.09, "grad_norm": 1.2420817613601685, "learning_rate": 9.90933630000139e-06, "loss": 0.4572, "step": 592 }, { "epoch": 0.09, "grad_norm": 1.1047115325927734, "learning_rate": 9.908874552087704e-06, "loss": 0.5007, "step": 593 }, { "epoch": 0.09, "grad_norm": 1.2863689661026, "learning_rate": 9.908411642136643e-06, "loss": 0.5562, "step": 594 }, { "epoch": 0.09, "grad_norm": 1.2741018533706665, "learning_rate": 9.907947570257783e-06, "loss": 0.5775, "step": 595 }, { "epoch": 0.09, "grad_norm": 2.832216739654541, "learning_rate": 9.907482336560985e-06, "loss": 0.4986, "step": 596 }, { "epoch": 0.09, "grad_norm": 1.2929258346557617, "learning_rate": 9.907015941156374e-06, "loss": 0.5171, "step": 597 }, { "epoch": 0.09, "grad_norm": 1.9339784383773804, "learning_rate": 9.90654838415436e-06, "loss": 0.5644, "step": 598 }, { "epoch": 0.09, "grad_norm": 1.4042946100234985, "learning_rate": 9.906079665665622e-06, "loss": 0.423, "step": 599 }, { "epoch": 0.09, "grad_norm": 1.1137264966964722, "learning_rate": 9.905609785801115e-06, "loss": 0.4765, "step": 600 }, { "epoch": 0.09, "grad_norm": 1.384628176689148, "learning_rate": 9.905138744672073e-06, "loss": 0.5391, "step": 601 }, { "epoch": 0.09, "grad_norm": 1.7206989526748657, "learning_rate": 9.904666542390001e-06, "loss": 0.4749, "step": 602 }, { "epoch": 0.09, "grad_norm": 2.2782256603240967, "learning_rate": 9.904193179066678e-06, "loss": 0.4459, "step": 603 }, { "epoch": 0.09, "grad_norm": 1.3071174621582031, "learning_rate": 9.90371865481416e-06, "loss": 0.53, "step": 604 }, { "epoch": 0.09, "grad_norm": 1.3524141311645508, "learning_rate": 9.903242969744776e-06, "loss": 0.4927, "step": 605 }, { "epoch": 0.09, "grad_norm": 1.3173283338546753, "learning_rate": 9.902766123971132e-06, "loss": 0.4603, "step": 606 }, { "epoch": 0.09, "grad_norm": 1.5824307203292847, "learning_rate": 9.902288117606109e-06, "loss": 0.5011, "step": 607 }, { "epoch": 0.09, "grad_norm": 1.3912371397018433, "learning_rate": 9.901808950762861e-06, "loss": 0.5038, "step": 608 }, { "epoch": 0.09, "grad_norm": 2.2561960220336914, "learning_rate": 9.901328623554815e-06, "loss": 0.4927, "step": 609 }, { "epoch": 0.09, "grad_norm": 1.1585981845855713, "learning_rate": 9.900847136095677e-06, "loss": 0.558, "step": 610 }, { "epoch": 0.09, "grad_norm": 1.3125559091567993, "learning_rate": 9.900364488499426e-06, "loss": 0.4618, "step": 611 }, { "epoch": 0.09, "grad_norm": 1.2502315044403076, "learning_rate": 9.899880680880315e-06, "loss": 0.5403, "step": 612 }, { "epoch": 0.09, "grad_norm": 1.8725965023040771, "learning_rate": 9.89939571335287e-06, "loss": 0.5377, "step": 613 }, { "epoch": 0.09, "grad_norm": 1.7869187593460083, "learning_rate": 9.898909586031893e-06, "loss": 0.5168, "step": 614 }, { "epoch": 0.09, "grad_norm": 1.3870935440063477, "learning_rate": 9.898422299032465e-06, "loss": 0.4508, "step": 615 }, { "epoch": 0.09, "grad_norm": 1.180015206336975, "learning_rate": 9.897933852469932e-06, "loss": 0.446, "step": 616 }, { "epoch": 0.09, "grad_norm": 1.2805078029632568, "learning_rate": 9.897444246459925e-06, "loss": 0.5855, "step": 617 }, { "epoch": 0.09, "grad_norm": 1.251345157623291, "learning_rate": 9.896953481118341e-06, "loss": 0.4582, "step": 618 }, { "epoch": 0.09, "grad_norm": 1.8621348142623901, "learning_rate": 9.896461556561356e-06, "loss": 0.5538, "step": 619 }, { "epoch": 0.09, "grad_norm": 1.2283464670181274, "learning_rate": 9.895968472905419e-06, "loss": 0.7416, "step": 620 }, { "epoch": 0.09, "grad_norm": 1.263950228691101, "learning_rate": 9.895474230267254e-06, "loss": 0.4539, "step": 621 }, { "epoch": 0.09, "grad_norm": 1.5994184017181396, "learning_rate": 9.894978828763859e-06, "loss": 0.4559, "step": 622 }, { "epoch": 0.09, "grad_norm": 1.4296013116836548, "learning_rate": 9.894482268512506e-06, "loss": 0.4406, "step": 623 }, { "epoch": 0.09, "grad_norm": 1.3255058526992798, "learning_rate": 9.893984549630742e-06, "loss": 0.6148, "step": 624 }, { "epoch": 0.09, "grad_norm": 1.4385106563568115, "learning_rate": 9.893485672236387e-06, "loss": 0.4792, "step": 625 }, { "epoch": 0.09, "grad_norm": 2.0935041904449463, "learning_rate": 9.892985636447537e-06, "loss": 0.4479, "step": 626 }, { "epoch": 0.09, "grad_norm": 1.9351789951324463, "learning_rate": 9.89248444238256e-06, "loss": 0.4816, "step": 627 }, { "epoch": 0.09, "grad_norm": 1.315254807472229, "learning_rate": 9.891982090160101e-06, "loss": 0.4971, "step": 628 }, { "epoch": 0.09, "grad_norm": 1.2028567790985107, "learning_rate": 9.89147857989908e-06, "loss": 0.4456, "step": 629 }, { "epoch": 0.09, "grad_norm": 1.2720329761505127, "learning_rate": 9.890973911718682e-06, "loss": 0.556, "step": 630 }, { "epoch": 0.09, "grad_norm": 1.3511110544204712, "learning_rate": 9.89046808573838e-06, "loss": 0.5793, "step": 631 }, { "epoch": 0.09, "grad_norm": 2.1867926120758057, "learning_rate": 9.889961102077909e-06, "loss": 0.6151, "step": 632 }, { "epoch": 0.1, "grad_norm": 1.6178654432296753, "learning_rate": 9.889452960857287e-06, "loss": 0.5472, "step": 633 }, { "epoch": 0.1, "grad_norm": 0.9181707501411438, "learning_rate": 9.8889436621968e-06, "loss": 0.508, "step": 634 }, { "epoch": 0.1, "grad_norm": 1.3006561994552612, "learning_rate": 9.88843320621701e-06, "loss": 0.5289, "step": 635 }, { "epoch": 0.1, "grad_norm": 1.5697033405303955, "learning_rate": 9.887921593038752e-06, "loss": 0.5006, "step": 636 }, { "epoch": 0.1, "grad_norm": 1.7287403345108032, "learning_rate": 9.887408822783137e-06, "loss": 0.4593, "step": 637 }, { "epoch": 0.1, "grad_norm": 1.5025991201400757, "learning_rate": 9.886894895571552e-06, "loss": 0.5541, "step": 638 }, { "epoch": 0.1, "grad_norm": 1.595983624458313, "learning_rate": 9.88637981152565e-06, "loss": 0.4291, "step": 639 }, { "epoch": 0.1, "grad_norm": 1.2689462900161743, "learning_rate": 9.885863570767365e-06, "loss": 0.4957, "step": 640 }, { "epoch": 0.1, "grad_norm": 1.551769495010376, "learning_rate": 9.885346173418902e-06, "loss": 0.419, "step": 641 }, { "epoch": 0.1, "grad_norm": 1.5100829601287842, "learning_rate": 9.884827619602739e-06, "loss": 0.5271, "step": 642 }, { "epoch": 0.1, "grad_norm": 1.39177405834198, "learning_rate": 9.88430790944163e-06, "loss": 0.4436, "step": 643 }, { "epoch": 0.1, "grad_norm": 1.5382856130599976, "learning_rate": 9.883787043058604e-06, "loss": 0.4851, "step": 644 }, { "epoch": 0.1, "grad_norm": 1.6262422800064087, "learning_rate": 9.883265020576958e-06, "loss": 0.4575, "step": 645 }, { "epoch": 0.1, "grad_norm": 1.5649949312210083, "learning_rate": 9.882741842120266e-06, "loss": 0.4883, "step": 646 }, { "epoch": 0.1, "grad_norm": 1.6555309295654297, "learning_rate": 9.88221750781238e-06, "loss": 0.5076, "step": 647 }, { "epoch": 0.1, "grad_norm": 2.12483549118042, "learning_rate": 9.881692017777416e-06, "loss": 0.576, "step": 648 }, { "epoch": 0.1, "grad_norm": 1.3974922895431519, "learning_rate": 9.881165372139772e-06, "loss": 0.5091, "step": 649 }, { "epoch": 0.1, "grad_norm": 1.6897199153900146, "learning_rate": 9.880637571024115e-06, "loss": 0.4802, "step": 650 }, { "epoch": 0.1, "grad_norm": 1.3867378234863281, "learning_rate": 9.880108614555387e-06, "loss": 0.7839, "step": 651 }, { "epoch": 0.1, "grad_norm": 1.503555178642273, "learning_rate": 9.879578502858806e-06, "loss": 0.5928, "step": 652 }, { "epoch": 0.1, "grad_norm": 1.4846203327178955, "learning_rate": 9.879047236059858e-06, "loss": 0.4922, "step": 653 }, { "epoch": 0.1, "grad_norm": 1.192387580871582, "learning_rate": 9.878514814284308e-06, "loss": 0.5291, "step": 654 }, { "epoch": 0.1, "grad_norm": 1.4831136465072632, "learning_rate": 9.87798123765819e-06, "loss": 0.3972, "step": 655 }, { "epoch": 0.1, "grad_norm": 1.313805341720581, "learning_rate": 9.877446506307812e-06, "loss": 0.4867, "step": 656 }, { "epoch": 0.1, "grad_norm": 1.6764811277389526, "learning_rate": 9.87691062035976e-06, "loss": 0.5217, "step": 657 }, { "epoch": 0.1, "grad_norm": 1.2798676490783691, "learning_rate": 9.876373579940886e-06, "loss": 0.6134, "step": 658 }, { "epoch": 0.1, "grad_norm": 1.343772292137146, "learning_rate": 9.875835385178323e-06, "loss": 0.5056, "step": 659 }, { "epoch": 0.1, "grad_norm": 1.9086869955062866, "learning_rate": 9.875296036199472e-06, "loss": 0.4481, "step": 660 }, { "epoch": 0.1, "grad_norm": 1.5540740489959717, "learning_rate": 9.874755533132006e-06, "loss": 0.483, "step": 661 }, { "epoch": 0.1, "grad_norm": 1.309112548828125, "learning_rate": 9.874213876103877e-06, "loss": 0.4696, "step": 662 }, { "epoch": 0.1, "grad_norm": 1.5163369178771973, "learning_rate": 9.873671065243306e-06, "loss": 0.4661, "step": 663 }, { "epoch": 0.1, "grad_norm": 1.3704172372817993, "learning_rate": 9.87312710067879e-06, "loss": 0.517, "step": 664 }, { "epoch": 0.1, "grad_norm": 1.1847633123397827, "learning_rate": 9.872581982539093e-06, "loss": 0.4844, "step": 665 }, { "epoch": 0.1, "grad_norm": 1.2561070919036865, "learning_rate": 9.872035710953261e-06, "loss": 0.435, "step": 666 }, { "epoch": 0.1, "grad_norm": 1.3699443340301514, "learning_rate": 9.871488286050603e-06, "loss": 0.559, "step": 667 }, { "epoch": 0.1, "grad_norm": 1.3774628639221191, "learning_rate": 9.870939707960712e-06, "loss": 0.5034, "step": 668 }, { "epoch": 0.1, "grad_norm": 1.1945503950119019, "learning_rate": 9.870389976813444e-06, "loss": 0.5453, "step": 669 }, { "epoch": 0.1, "grad_norm": 1.8639898300170898, "learning_rate": 9.869839092738935e-06, "loss": 0.5121, "step": 670 }, { "epoch": 0.1, "grad_norm": 1.55771005153656, "learning_rate": 9.869287055867591e-06, "loss": 0.4551, "step": 671 }, { "epoch": 0.1, "grad_norm": 1.6604076623916626, "learning_rate": 9.86873386633009e-06, "loss": 0.4936, "step": 672 }, { "epoch": 0.1, "grad_norm": 1.4859559535980225, "learning_rate": 9.868179524257384e-06, "loss": 0.5506, "step": 673 }, { "epoch": 0.1, "grad_norm": 1.5566248893737793, "learning_rate": 9.8676240297807e-06, "loss": 0.4885, "step": 674 }, { "epoch": 0.1, "grad_norm": 2.1247360706329346, "learning_rate": 9.867067383031532e-06, "loss": 0.4574, "step": 675 }, { "epoch": 0.1, "grad_norm": 1.5132217407226562, "learning_rate": 9.866509584141655e-06, "loss": 0.4977, "step": 676 }, { "epoch": 0.1, "grad_norm": 1.595355749130249, "learning_rate": 9.865950633243107e-06, "loss": 0.5313, "step": 677 }, { "epoch": 0.1, "grad_norm": 2.1573474407196045, "learning_rate": 9.865390530468207e-06, "loss": 0.52, "step": 678 }, { "epoch": 0.1, "grad_norm": 1.792440414428711, "learning_rate": 9.864829275949543e-06, "loss": 0.515, "step": 679 }, { "epoch": 0.1, "grad_norm": 1.2651904821395874, "learning_rate": 9.864266869819977e-06, "loss": 0.7354, "step": 680 }, { "epoch": 0.1, "grad_norm": 1.2725346088409424, "learning_rate": 9.86370331221264e-06, "loss": 0.5066, "step": 681 }, { "epoch": 0.1, "grad_norm": 1.5870460271835327, "learning_rate": 9.86313860326094e-06, "loss": 0.4583, "step": 682 }, { "epoch": 0.1, "grad_norm": 1.3763399124145508, "learning_rate": 9.862572743098557e-06, "loss": 0.451, "step": 683 }, { "epoch": 0.1, "grad_norm": 1.308718204498291, "learning_rate": 9.862005731859443e-06, "loss": 0.4787, "step": 684 }, { "epoch": 0.1, "grad_norm": 2.7342419624328613, "learning_rate": 9.861437569677819e-06, "loss": 0.5599, "step": 685 }, { "epoch": 0.1, "grad_norm": 1.8749260902404785, "learning_rate": 9.860868256688181e-06, "loss": 0.5559, "step": 686 }, { "epoch": 0.1, "grad_norm": 1.3347142934799194, "learning_rate": 9.860297793025301e-06, "loss": 0.5444, "step": 687 }, { "epoch": 0.1, "grad_norm": 2.07949161529541, "learning_rate": 9.859726178824218e-06, "loss": 0.5283, "step": 688 }, { "epoch": 0.1, "grad_norm": 1.6123825311660767, "learning_rate": 9.859153414220247e-06, "loss": 0.5113, "step": 689 }, { "epoch": 0.1, "grad_norm": 1.6185271739959717, "learning_rate": 9.858579499348972e-06, "loss": 0.5324, "step": 690 }, { "epoch": 0.1, "grad_norm": 1.9137145280838013, "learning_rate": 9.858004434346253e-06, "loss": 0.4748, "step": 691 }, { "epoch": 0.1, "grad_norm": 2.2408411502838135, "learning_rate": 9.85742821934822e-06, "loss": 0.5794, "step": 692 }, { "epoch": 0.1, "grad_norm": 1.2615889310836792, "learning_rate": 9.856850854491273e-06, "loss": 0.5039, "step": 693 }, { "epoch": 0.1, "grad_norm": 1.903219223022461, "learning_rate": 9.85627233991209e-06, "loss": 0.4841, "step": 694 }, { "epoch": 0.1, "grad_norm": 1.2214040756225586, "learning_rate": 9.855692675747616e-06, "loss": 0.4309, "step": 695 }, { "epoch": 0.1, "grad_norm": 1.22428560256958, "learning_rate": 9.855111862135072e-06, "loss": 0.421, "step": 696 }, { "epoch": 0.1, "grad_norm": 1.681580662727356, "learning_rate": 9.854529899211948e-06, "loss": 0.5039, "step": 697 }, { "epoch": 0.1, "grad_norm": 1.3193751573562622, "learning_rate": 9.853946787116006e-06, "loss": 0.532, "step": 698 }, { "epoch": 0.11, "grad_norm": 1.629521369934082, "learning_rate": 9.853362525985286e-06, "loss": 0.5281, "step": 699 }, { "epoch": 0.11, "grad_norm": 1.2133550643920898, "learning_rate": 9.85277711595809e-06, "loss": 0.5454, "step": 700 }, { "epoch": 0.11, "grad_norm": 1.9066038131713867, "learning_rate": 9.852190557172999e-06, "loss": 0.5197, "step": 701 }, { "epoch": 0.11, "grad_norm": 1.2790488004684448, "learning_rate": 9.851602849768867e-06, "loss": 0.4919, "step": 702 }, { "epoch": 0.11, "grad_norm": 1.2819576263427734, "learning_rate": 9.851013993884813e-06, "loss": 0.506, "step": 703 }, { "epoch": 0.11, "grad_norm": 1.1565511226654053, "learning_rate": 9.850423989660234e-06, "loss": 0.7305, "step": 704 }, { "epoch": 0.11, "grad_norm": 1.9352303743362427, "learning_rate": 9.849832837234796e-06, "loss": 0.4956, "step": 705 }, { "epoch": 0.11, "grad_norm": 1.1905860900878906, "learning_rate": 9.84924053674844e-06, "loss": 0.7509, "step": 706 }, { "epoch": 0.11, "grad_norm": 1.3598499298095703, "learning_rate": 9.848647088341372e-06, "loss": 0.5749, "step": 707 }, { "epoch": 0.11, "grad_norm": 1.1567697525024414, "learning_rate": 9.84805249215408e-06, "loss": 0.5833, "step": 708 }, { "epoch": 0.11, "grad_norm": 2.390453338623047, "learning_rate": 9.847456748327312e-06, "loss": 0.5334, "step": 709 }, { "epoch": 0.11, "grad_norm": 1.358150601387024, "learning_rate": 9.846859857002096e-06, "loss": 0.5273, "step": 710 }, { "epoch": 0.11, "grad_norm": 1.173087239265442, "learning_rate": 9.84626181831973e-06, "loss": 0.489, "step": 711 }, { "epoch": 0.11, "grad_norm": 1.3826289176940918, "learning_rate": 9.845662632421782e-06, "loss": 0.4729, "step": 712 }, { "epoch": 0.11, "grad_norm": 1.2049938440322876, "learning_rate": 9.845062299450094e-06, "loss": 0.4699, "step": 713 }, { "epoch": 0.11, "grad_norm": 1.6382914781570435, "learning_rate": 9.844460819546775e-06, "loss": 0.5662, "step": 714 }, { "epoch": 0.11, "grad_norm": 1.2152016162872314, "learning_rate": 9.84385819285421e-06, "loss": 0.4488, "step": 715 }, { "epoch": 0.11, "grad_norm": 1.745985507965088, "learning_rate": 9.843254419515053e-06, "loss": 0.5129, "step": 716 }, { "epoch": 0.11, "grad_norm": 1.1796667575836182, "learning_rate": 9.84264949967223e-06, "loss": 0.5881, "step": 717 }, { "epoch": 0.11, "grad_norm": 1.3601758480072021, "learning_rate": 9.842043433468942e-06, "loss": 0.5222, "step": 718 }, { "epoch": 0.11, "grad_norm": 1.2692792415618896, "learning_rate": 9.841436221048654e-06, "loss": 0.4752, "step": 719 }, { "epoch": 0.11, "grad_norm": 2.212998867034912, "learning_rate": 9.840827862555107e-06, "loss": 0.4906, "step": 720 }, { "epoch": 0.11, "grad_norm": 1.4546306133270264, "learning_rate": 9.840218358132315e-06, "loss": 0.5318, "step": 721 }, { "epoch": 0.11, "grad_norm": 1.5507484674453735, "learning_rate": 9.83960770792456e-06, "loss": 0.5837, "step": 722 }, { "epoch": 0.11, "grad_norm": 1.1645417213439941, "learning_rate": 9.838995912076395e-06, "loss": 0.558, "step": 723 }, { "epoch": 0.11, "grad_norm": 5.03980827331543, "learning_rate": 9.838382970732646e-06, "loss": 0.545, "step": 724 }, { "epoch": 0.11, "grad_norm": 1.2638295888900757, "learning_rate": 9.837768884038409e-06, "loss": 0.5407, "step": 725 }, { "epoch": 0.11, "grad_norm": 1.584238052368164, "learning_rate": 9.837153652139053e-06, "loss": 0.5427, "step": 726 }, { "epoch": 0.11, "grad_norm": 1.3303097486495972, "learning_rate": 9.836537275180216e-06, "loss": 0.5705, "step": 727 }, { "epoch": 0.11, "grad_norm": 1.1930118799209595, "learning_rate": 9.835919753307808e-06, "loss": 0.7327, "step": 728 }, { "epoch": 0.11, "grad_norm": 2.0764474868774414, "learning_rate": 9.83530108666801e-06, "loss": 0.502, "step": 729 }, { "epoch": 0.11, "grad_norm": 1.0418527126312256, "learning_rate": 9.834681275407275e-06, "loss": 0.5233, "step": 730 }, { "epoch": 0.11, "grad_norm": 1.6232659816741943, "learning_rate": 9.834060319672323e-06, "loss": 0.5229, "step": 731 }, { "epoch": 0.11, "grad_norm": 1.1600449085235596, "learning_rate": 9.833438219610148e-06, "loss": 0.7165, "step": 732 }, { "epoch": 0.11, "grad_norm": 1.011531114578247, "learning_rate": 9.832814975368017e-06, "loss": 0.4907, "step": 733 }, { "epoch": 0.11, "grad_norm": 1.3595755100250244, "learning_rate": 9.832190587093464e-06, "loss": 0.4234, "step": 734 }, { "epoch": 0.11, "grad_norm": 1.2927824258804321, "learning_rate": 9.831565054934297e-06, "loss": 0.5227, "step": 735 }, { "epoch": 0.11, "grad_norm": 1.2304143905639648, "learning_rate": 9.83093837903859e-06, "loss": 0.552, "step": 736 }, { "epoch": 0.11, "grad_norm": 1.1736196279525757, "learning_rate": 9.830310559554694e-06, "loss": 0.521, "step": 737 }, { "epoch": 0.11, "grad_norm": 1.464807391166687, "learning_rate": 9.829681596631224e-06, "loss": 0.5282, "step": 738 }, { "epoch": 0.11, "grad_norm": 1.6515990495681763, "learning_rate": 9.829051490417074e-06, "loss": 0.5127, "step": 739 }, { "epoch": 0.11, "grad_norm": 1.0576913356781006, "learning_rate": 9.828420241061398e-06, "loss": 0.5475, "step": 740 }, { "epoch": 0.11, "grad_norm": 1.6355204582214355, "learning_rate": 9.82778784871363e-06, "loss": 0.5133, "step": 741 }, { "epoch": 0.11, "grad_norm": 1.4234682321548462, "learning_rate": 9.827154313523473e-06, "loss": 0.4844, "step": 742 }, { "epoch": 0.11, "grad_norm": 1.7362086772918701, "learning_rate": 9.826519635640895e-06, "loss": 0.5664, "step": 743 }, { "epoch": 0.11, "grad_norm": 1.2466405630111694, "learning_rate": 9.825883815216137e-06, "loss": 0.5005, "step": 744 }, { "epoch": 0.11, "grad_norm": 1.182104468345642, "learning_rate": 9.825246852399715e-06, "loss": 0.3917, "step": 745 }, { "epoch": 0.11, "grad_norm": 1.1328339576721191, "learning_rate": 9.82460874734241e-06, "loss": 0.5142, "step": 746 }, { "epoch": 0.11, "grad_norm": 0.932713508605957, "learning_rate": 9.823969500195278e-06, "loss": 0.4516, "step": 747 }, { "epoch": 0.11, "grad_norm": 1.492268681526184, "learning_rate": 9.823329111109639e-06, "loss": 0.4905, "step": 748 }, { "epoch": 0.11, "grad_norm": 1.2069798707962036, "learning_rate": 9.822687580237089e-06, "loss": 0.4701, "step": 749 }, { "epoch": 0.11, "grad_norm": 1.3139722347259521, "learning_rate": 9.82204490772949e-06, "loss": 0.5017, "step": 750 }, { "epoch": 0.11, "grad_norm": 1.2330877780914307, "learning_rate": 9.821401093738983e-06, "loss": 0.5296, "step": 751 }, { "epoch": 0.11, "grad_norm": 1.472341775894165, "learning_rate": 9.820756138417966e-06, "loss": 0.4497, "step": 752 }, { "epoch": 0.11, "grad_norm": 1.170564889907837, "learning_rate": 9.820110041919115e-06, "loss": 0.498, "step": 753 }, { "epoch": 0.11, "grad_norm": 0.9885419607162476, "learning_rate": 9.819462804395378e-06, "loss": 0.4604, "step": 754 }, { "epoch": 0.11, "grad_norm": 1.5382198095321655, "learning_rate": 9.818814425999967e-06, "loss": 0.4531, "step": 755 }, { "epoch": 0.11, "grad_norm": 1.2932593822479248, "learning_rate": 9.818164906886372e-06, "loss": 0.4818, "step": 756 }, { "epoch": 0.11, "grad_norm": 1.266801357269287, "learning_rate": 9.817514247208341e-06, "loss": 0.4813, "step": 757 }, { "epoch": 0.11, "grad_norm": 1.2161976099014282, "learning_rate": 9.816862447119907e-06, "loss": 0.4913, "step": 758 }, { "epoch": 0.11, "grad_norm": 1.3057245016098022, "learning_rate": 9.816209506775361e-06, "loss": 0.5163, "step": 759 }, { "epoch": 0.11, "grad_norm": 1.2366656064987183, "learning_rate": 9.815555426329267e-06, "loss": 0.5505, "step": 760 }, { "epoch": 0.11, "grad_norm": 1.3679362535476685, "learning_rate": 9.814900205936464e-06, "loss": 0.4806, "step": 761 }, { "epoch": 0.11, "grad_norm": 1.1668195724487305, "learning_rate": 9.814243845752053e-06, "loss": 0.4265, "step": 762 }, { "epoch": 0.11, "grad_norm": 1.249698281288147, "learning_rate": 9.813586345931412e-06, "loss": 0.5942, "step": 763 }, { "epoch": 0.11, "grad_norm": 1.8875826597213745, "learning_rate": 9.812927706630183e-06, "loss": 0.5725, "step": 764 }, { "epoch": 0.11, "grad_norm": 1.1541050672531128, "learning_rate": 9.812267928004282e-06, "loss": 0.4661, "step": 765 }, { "epoch": 0.12, "grad_norm": 1.2173123359680176, "learning_rate": 9.811607010209893e-06, "loss": 0.4492, "step": 766 }, { "epoch": 0.12, "grad_norm": 1.1599067449569702, "learning_rate": 9.810944953403467e-06, "loss": 0.4704, "step": 767 }, { "epoch": 0.12, "grad_norm": 1.3909093141555786, "learning_rate": 9.81028175774173e-06, "loss": 0.5104, "step": 768 }, { "epoch": 0.12, "grad_norm": 1.3624018430709839, "learning_rate": 9.809617423381677e-06, "loss": 0.5156, "step": 769 }, { "epoch": 0.12, "grad_norm": 1.1888322830200195, "learning_rate": 9.808951950480562e-06, "loss": 0.523, "step": 770 }, { "epoch": 0.12, "grad_norm": 1.377636432647705, "learning_rate": 9.808285339195927e-06, "loss": 0.4907, "step": 771 }, { "epoch": 0.12, "grad_norm": 1.244140386581421, "learning_rate": 9.807617589685568e-06, "loss": 0.5189, "step": 772 }, { "epoch": 0.12, "grad_norm": 1.2018319368362427, "learning_rate": 9.806948702107555e-06, "loss": 0.723, "step": 773 }, { "epoch": 0.12, "grad_norm": 1.2854362726211548, "learning_rate": 9.80627867662023e-06, "loss": 0.5267, "step": 774 }, { "epoch": 0.12, "grad_norm": 1.7196664810180664, "learning_rate": 9.805607513382203e-06, "loss": 0.5368, "step": 775 }, { "epoch": 0.12, "grad_norm": 1.080664038658142, "learning_rate": 9.804935212552351e-06, "loss": 0.5021, "step": 776 }, { "epoch": 0.12, "grad_norm": 1.2957206964492798, "learning_rate": 9.804261774289824e-06, "loss": 0.7575, "step": 777 }, { "epoch": 0.12, "grad_norm": 3.6261560916900635, "learning_rate": 9.80358719875404e-06, "loss": 0.5422, "step": 778 }, { "epoch": 0.12, "grad_norm": 1.6122649908065796, "learning_rate": 9.802911486104684e-06, "loss": 0.5399, "step": 779 }, { "epoch": 0.12, "grad_norm": 1.2571039199829102, "learning_rate": 9.802234636501713e-06, "loss": 0.5248, "step": 780 }, { "epoch": 0.12, "grad_norm": 1.03786301612854, "learning_rate": 9.80155665010535e-06, "loss": 0.4913, "step": 781 }, { "epoch": 0.12, "grad_norm": 1.2757413387298584, "learning_rate": 9.800877527076094e-06, "loss": 0.5776, "step": 782 }, { "epoch": 0.12, "grad_norm": 1.3959358930587769, "learning_rate": 9.800197267574703e-06, "loss": 0.4709, "step": 783 }, { "epoch": 0.12, "grad_norm": 2.6221892833709717, "learning_rate": 9.79951587176221e-06, "loss": 0.4351, "step": 784 }, { "epoch": 0.12, "grad_norm": 1.5096001625061035, "learning_rate": 9.79883333979992e-06, "loss": 0.517, "step": 785 }, { "epoch": 0.12, "grad_norm": 1.2475712299346924, "learning_rate": 9.7981496718494e-06, "loss": 0.5065, "step": 786 }, { "epoch": 0.12, "grad_norm": 1.3287599086761475, "learning_rate": 9.797464868072489e-06, "loss": 0.4912, "step": 787 }, { "epoch": 0.12, "grad_norm": 1.4548848867416382, "learning_rate": 9.796778928631294e-06, "loss": 0.5009, "step": 788 }, { "epoch": 0.12, "grad_norm": 1.4513864517211914, "learning_rate": 9.796091853688195e-06, "loss": 0.5243, "step": 789 }, { "epoch": 0.12, "grad_norm": 1.5659456253051758, "learning_rate": 9.795403643405836e-06, "loss": 0.4627, "step": 790 }, { "epoch": 0.12, "grad_norm": 1.7581919431686401, "learning_rate": 9.79471429794713e-06, "loss": 0.4747, "step": 791 }, { "epoch": 0.12, "grad_norm": 1.2326606512069702, "learning_rate": 9.794023817475263e-06, "loss": 0.7442, "step": 792 }, { "epoch": 0.12, "grad_norm": 1.4086371660232544, "learning_rate": 9.793332202153685e-06, "loss": 0.4876, "step": 793 }, { "epoch": 0.12, "grad_norm": 1.5698308944702148, "learning_rate": 9.792639452146116e-06, "loss": 0.5518, "step": 794 }, { "epoch": 0.12, "grad_norm": 1.0303236246109009, "learning_rate": 9.791945567616546e-06, "loss": 0.4718, "step": 795 }, { "epoch": 0.12, "grad_norm": 1.1517406702041626, "learning_rate": 9.791250548729231e-06, "loss": 0.4575, "step": 796 }, { "epoch": 0.12, "grad_norm": 1.4664955139160156, "learning_rate": 9.7905543956487e-06, "loss": 0.4552, "step": 797 }, { "epoch": 0.12, "grad_norm": 1.0135352611541748, "learning_rate": 9.789857108539742e-06, "loss": 0.4608, "step": 798 }, { "epoch": 0.12, "grad_norm": 1.098620891571045, "learning_rate": 9.789158687567427e-06, "loss": 0.4758, "step": 799 }, { "epoch": 0.12, "grad_norm": 1.3371137380599976, "learning_rate": 9.788459132897083e-06, "loss": 0.4939, "step": 800 }, { "epoch": 0.12, "grad_norm": 1.1460729837417603, "learning_rate": 9.787758444694308e-06, "loss": 0.4795, "step": 801 }, { "epoch": 0.12, "grad_norm": 1.3753434419631958, "learning_rate": 9.787056623124973e-06, "loss": 0.5712, "step": 802 }, { "epoch": 0.12, "grad_norm": 1.1329721212387085, "learning_rate": 9.786353668355214e-06, "loss": 0.4723, "step": 803 }, { "epoch": 0.12, "grad_norm": 1.3347886800765991, "learning_rate": 9.785649580551434e-06, "loss": 0.578, "step": 804 }, { "epoch": 0.12, "grad_norm": 1.6439828872680664, "learning_rate": 9.78494435988031e-06, "loss": 0.4736, "step": 805 }, { "epoch": 0.12, "grad_norm": 2.4796226024627686, "learning_rate": 9.784238006508778e-06, "loss": 0.5531, "step": 806 }, { "epoch": 0.12, "grad_norm": 1.5077428817749023, "learning_rate": 9.78353052060405e-06, "loss": 0.5084, "step": 807 }, { "epoch": 0.12, "grad_norm": 1.3723413944244385, "learning_rate": 9.782821902333602e-06, "loss": 0.5358, "step": 808 }, { "epoch": 0.12, "grad_norm": 1.210244059562683, "learning_rate": 9.782112151865181e-06, "loss": 0.5663, "step": 809 }, { "epoch": 0.12, "grad_norm": 2.5573203563690186, "learning_rate": 9.7814012693668e-06, "loss": 0.5851, "step": 810 }, { "epoch": 0.12, "grad_norm": 1.3095524311065674, "learning_rate": 9.780689255006737e-06, "loss": 0.5273, "step": 811 }, { "epoch": 0.12, "grad_norm": 1.559304118156433, "learning_rate": 9.779976108953545e-06, "loss": 0.4668, "step": 812 }, { "epoch": 0.12, "grad_norm": 1.271428108215332, "learning_rate": 9.779261831376043e-06, "loss": 0.487, "step": 813 }, { "epoch": 0.12, "grad_norm": 1.8195722103118896, "learning_rate": 9.77854642244331e-06, "loss": 0.5713, "step": 814 }, { "epoch": 0.12, "grad_norm": 1.3328944444656372, "learning_rate": 9.777829882324705e-06, "loss": 0.5054, "step": 815 }, { "epoch": 0.12, "grad_norm": 1.0100606679916382, "learning_rate": 9.777112211189843e-06, "loss": 0.5225, "step": 816 }, { "epoch": 0.12, "grad_norm": 2.19535756111145, "learning_rate": 9.776393409208618e-06, "loss": 0.4795, "step": 817 }, { "epoch": 0.12, "grad_norm": 1.1950020790100098, "learning_rate": 9.775673476551181e-06, "loss": 0.457, "step": 818 }, { "epoch": 0.12, "grad_norm": 1.1722710132598877, "learning_rate": 9.774952413387958e-06, "loss": 0.4295, "step": 819 }, { "epoch": 0.12, "grad_norm": 1.1324433088302612, "learning_rate": 9.77423021988964e-06, "loss": 0.4461, "step": 820 }, { "epoch": 0.12, "grad_norm": 0.9945248365402222, "learning_rate": 9.773506896227188e-06, "loss": 0.4997, "step": 821 }, { "epoch": 0.12, "grad_norm": 0.9031025767326355, "learning_rate": 9.772782442571825e-06, "loss": 0.5065, "step": 822 }, { "epoch": 0.12, "grad_norm": 1.3776183128356934, "learning_rate": 9.772056859095048e-06, "loss": 0.7626, "step": 823 }, { "epoch": 0.12, "grad_norm": 1.1511043310165405, "learning_rate": 9.771330145968616e-06, "loss": 0.7551, "step": 824 }, { "epoch": 0.12, "grad_norm": 1.0866553783416748, "learning_rate": 9.770602303364558e-06, "loss": 0.5296, "step": 825 }, { "epoch": 0.12, "grad_norm": 1.2422674894332886, "learning_rate": 9.769873331455172e-06, "loss": 0.4645, "step": 826 }, { "epoch": 0.12, "grad_norm": 1.2790991067886353, "learning_rate": 9.769143230413021e-06, "loss": 0.4914, "step": 827 }, { "epoch": 0.12, "grad_norm": 1.1766886711120605, "learning_rate": 9.768412000410934e-06, "loss": 0.4485, "step": 828 }, { "epoch": 0.12, "grad_norm": 1.1664637327194214, "learning_rate": 9.76767964162201e-06, "loss": 0.5053, "step": 829 }, { "epoch": 0.12, "grad_norm": 1.7213560342788696, "learning_rate": 9.766946154219616e-06, "loss": 0.5618, "step": 830 }, { "epoch": 0.12, "grad_norm": 1.2375755310058594, "learning_rate": 9.766211538377382e-06, "loss": 0.481, "step": 831 }, { "epoch": 0.12, "grad_norm": 0.9944421648979187, "learning_rate": 9.765475794269208e-06, "loss": 0.5201, "step": 832 }, { "epoch": 0.13, "grad_norm": 1.4064381122589111, "learning_rate": 9.76473892206926e-06, "loss": 0.4714, "step": 833 }, { "epoch": 0.13, "grad_norm": 1.2736927270889282, "learning_rate": 9.764000921951976e-06, "loss": 0.4827, "step": 834 }, { "epoch": 0.13, "grad_norm": 1.4459254741668701, "learning_rate": 9.763261794092052e-06, "loss": 0.5522, "step": 835 }, { "epoch": 0.13, "grad_norm": 1.2606312036514282, "learning_rate": 9.762521538664458e-06, "loss": 0.4803, "step": 836 }, { "epoch": 0.13, "grad_norm": 1.1771260499954224, "learning_rate": 9.761780155844425e-06, "loss": 0.4743, "step": 837 }, { "epoch": 0.13, "grad_norm": 1.3907597064971924, "learning_rate": 9.76103764580746e-06, "loss": 0.5156, "step": 838 }, { "epoch": 0.13, "grad_norm": 1.7728126049041748, "learning_rate": 9.760294008729327e-06, "loss": 0.5481, "step": 839 }, { "epoch": 0.13, "grad_norm": 1.4500117301940918, "learning_rate": 9.759549244786063e-06, "loss": 0.4499, "step": 840 }, { "epoch": 0.13, "grad_norm": 1.3774126768112183, "learning_rate": 9.758803354153969e-06, "loss": 0.4955, "step": 841 }, { "epoch": 0.13, "grad_norm": 1.553907871246338, "learning_rate": 9.758056337009614e-06, "loss": 0.5221, "step": 842 }, { "epoch": 0.13, "grad_norm": 1.373882532119751, "learning_rate": 9.757308193529833e-06, "loss": 0.758, "step": 843 }, { "epoch": 0.13, "grad_norm": 1.8141626119613647, "learning_rate": 9.756558923891728e-06, "loss": 0.5171, "step": 844 }, { "epoch": 0.13, "grad_norm": 2.3568224906921387, "learning_rate": 9.755808528272668e-06, "loss": 0.5574, "step": 845 }, { "epoch": 0.13, "grad_norm": 1.5109009742736816, "learning_rate": 9.755057006850286e-06, "loss": 0.6179, "step": 846 }, { "epoch": 0.13, "grad_norm": 1.3028814792633057, "learning_rate": 9.754304359802484e-06, "loss": 0.4657, "step": 847 }, { "epoch": 0.13, "grad_norm": 2.23203182220459, "learning_rate": 9.753550587307432e-06, "loss": 0.4342, "step": 848 }, { "epoch": 0.13, "grad_norm": 1.3048144578933716, "learning_rate": 9.752795689543563e-06, "loss": 0.4855, "step": 849 }, { "epoch": 0.13, "grad_norm": 1.9354846477508545, "learning_rate": 9.752039666689578e-06, "loss": 0.5318, "step": 850 }, { "epoch": 0.13, "grad_norm": 2.8246147632598877, "learning_rate": 9.751282518924444e-06, "loss": 0.4894, "step": 851 }, { "epoch": 0.13, "grad_norm": 1.53639817237854, "learning_rate": 9.750524246427392e-06, "loss": 0.5316, "step": 852 }, { "epoch": 0.13, "grad_norm": 2.095611810684204, "learning_rate": 9.749764849377925e-06, "loss": 0.4586, "step": 853 }, { "epoch": 0.13, "grad_norm": 1.3599839210510254, "learning_rate": 9.749004327955808e-06, "loss": 0.4872, "step": 854 }, { "epoch": 0.13, "grad_norm": 1.5262340307235718, "learning_rate": 9.748242682341071e-06, "loss": 0.4598, "step": 855 }, { "epoch": 0.13, "grad_norm": 3.1463611125946045, "learning_rate": 9.747479912714015e-06, "loss": 0.5389, "step": 856 }, { "epoch": 0.13, "grad_norm": 2.811809778213501, "learning_rate": 9.746716019255202e-06, "loss": 0.5326, "step": 857 }, { "epoch": 0.13, "grad_norm": 1.4919154644012451, "learning_rate": 9.745951002145464e-06, "loss": 0.4942, "step": 858 }, { "epoch": 0.13, "grad_norm": 1.379707932472229, "learning_rate": 9.745184861565895e-06, "loss": 0.4567, "step": 859 }, { "epoch": 0.13, "grad_norm": 1.026309847831726, "learning_rate": 9.74441759769786e-06, "loss": 0.4733, "step": 860 }, { "epoch": 0.13, "grad_norm": 2.3116066455841064, "learning_rate": 9.743649210722983e-06, "loss": 0.5403, "step": 861 }, { "epoch": 0.13, "grad_norm": 1.361974835395813, "learning_rate": 9.74287970082316e-06, "loss": 0.7665, "step": 862 }, { "epoch": 0.13, "grad_norm": 1.550121545791626, "learning_rate": 9.742109068180552e-06, "loss": 0.5768, "step": 863 }, { "epoch": 0.13, "grad_norm": 1.2637468576431274, "learning_rate": 9.741337312977584e-06, "loss": 0.5731, "step": 864 }, { "epoch": 0.13, "grad_norm": 1.6974797248840332, "learning_rate": 9.740564435396947e-06, "loss": 0.5459, "step": 865 }, { "epoch": 0.13, "grad_norm": 2.8393726348876953, "learning_rate": 9.739790435621595e-06, "loss": 0.5077, "step": 866 }, { "epoch": 0.13, "grad_norm": 1.095444679260254, "learning_rate": 9.739015313834755e-06, "loss": 0.7399, "step": 867 }, { "epoch": 0.13, "grad_norm": 1.251961350440979, "learning_rate": 9.738239070219913e-06, "loss": 0.4596, "step": 868 }, { "epoch": 0.13, "grad_norm": 3.7504992485046387, "learning_rate": 9.737461704960824e-06, "loss": 0.5184, "step": 869 }, { "epoch": 0.13, "grad_norm": 1.275967001914978, "learning_rate": 9.736683218241505e-06, "loss": 0.5257, "step": 870 }, { "epoch": 0.13, "grad_norm": 1.746312141418457, "learning_rate": 9.735903610246243e-06, "loss": 0.46, "step": 871 }, { "epoch": 0.13, "grad_norm": 1.0759719610214233, "learning_rate": 9.735122881159587e-06, "loss": 0.535, "step": 872 }, { "epoch": 0.13, "grad_norm": 1.2429442405700684, "learning_rate": 9.734341031166355e-06, "loss": 0.5403, "step": 873 }, { "epoch": 0.13, "grad_norm": 1.5747851133346558, "learning_rate": 9.733558060451623e-06, "loss": 0.5776, "step": 874 }, { "epoch": 0.13, "grad_norm": 1.6515079736709595, "learning_rate": 9.73277396920074e-06, "loss": 0.5022, "step": 875 }, { "epoch": 0.13, "grad_norm": 1.8648334741592407, "learning_rate": 9.731988757599317e-06, "loss": 0.522, "step": 876 }, { "epoch": 0.13, "grad_norm": 1.2541348934173584, "learning_rate": 9.731202425833234e-06, "loss": 0.4796, "step": 877 }, { "epoch": 0.13, "grad_norm": 1.1117089986801147, "learning_rate": 9.730414974088627e-06, "loss": 0.465, "step": 878 }, { "epoch": 0.13, "grad_norm": 1.6262778043746948, "learning_rate": 9.729626402551909e-06, "loss": 0.5166, "step": 879 }, { "epoch": 0.13, "grad_norm": 1.6812143325805664, "learning_rate": 9.728836711409746e-06, "loss": 0.5758, "step": 880 }, { "epoch": 0.13, "grad_norm": 2.223020315170288, "learning_rate": 9.72804590084908e-06, "loss": 0.553, "step": 881 }, { "epoch": 0.13, "grad_norm": 1.4639016389846802, "learning_rate": 9.72725397105711e-06, "loss": 0.537, "step": 882 }, { "epoch": 0.13, "grad_norm": 1.113882064819336, "learning_rate": 9.726460922221302e-06, "loss": 0.4722, "step": 883 }, { "epoch": 0.13, "grad_norm": 1.2445518970489502, "learning_rate": 9.725666754529393e-06, "loss": 0.5398, "step": 884 }, { "epoch": 0.13, "grad_norm": 1.3340028524398804, "learning_rate": 9.724871468169375e-06, "loss": 0.5073, "step": 885 }, { "epoch": 0.13, "grad_norm": 1.465474247932434, "learning_rate": 9.724075063329512e-06, "loss": 0.4518, "step": 886 }, { "epoch": 0.13, "grad_norm": 1.534026861190796, "learning_rate": 9.723277540198328e-06, "loss": 0.4691, "step": 887 }, { "epoch": 0.13, "grad_norm": 1.4354679584503174, "learning_rate": 9.722478898964617e-06, "loss": 0.4037, "step": 888 }, { "epoch": 0.13, "grad_norm": 1.6497234106063843, "learning_rate": 9.721679139817434e-06, "loss": 0.403, "step": 889 }, { "epoch": 0.13, "grad_norm": 1.7715669870376587, "learning_rate": 9.720878262946098e-06, "loss": 0.4253, "step": 890 }, { "epoch": 0.13, "grad_norm": 1.2624090909957886, "learning_rate": 9.720076268540198e-06, "loss": 0.5266, "step": 891 }, { "epoch": 0.13, "grad_norm": 1.8264472484588623, "learning_rate": 9.719273156789576e-06, "loss": 0.5547, "step": 892 }, { "epoch": 0.13, "grad_norm": 1.5611509084701538, "learning_rate": 9.718468927884352e-06, "loss": 0.427, "step": 893 }, { "epoch": 0.13, "grad_norm": 1.1130266189575195, "learning_rate": 9.717663582014902e-06, "loss": 0.4815, "step": 894 }, { "epoch": 0.13, "grad_norm": 1.0999236106872559, "learning_rate": 9.716857119371871e-06, "loss": 0.4364, "step": 895 }, { "epoch": 0.13, "grad_norm": 1.779739260673523, "learning_rate": 9.716049540146164e-06, "loss": 0.5276, "step": 896 }, { "epoch": 0.13, "grad_norm": 1.5610913038253784, "learning_rate": 9.715240844528954e-06, "loss": 0.4915, "step": 897 }, { "epoch": 0.13, "grad_norm": 1.2693336009979248, "learning_rate": 9.714431032711678e-06, "loss": 0.4951, "step": 898 }, { "epoch": 0.14, "grad_norm": 1.439664363861084, "learning_rate": 9.713620104886033e-06, "loss": 0.4869, "step": 899 }, { "epoch": 0.14, "grad_norm": 1.1337090730667114, "learning_rate": 9.712808061243984e-06, "loss": 0.7134, "step": 900 }, { "epoch": 0.14, "grad_norm": 1.6320891380310059, "learning_rate": 9.71199490197776e-06, "loss": 0.5459, "step": 901 }, { "epoch": 0.14, "grad_norm": 1.6709940433502197, "learning_rate": 9.711180627279854e-06, "loss": 0.536, "step": 902 }, { "epoch": 0.14, "grad_norm": 1.551906943321228, "learning_rate": 9.710365237343023e-06, "loss": 0.4883, "step": 903 }, { "epoch": 0.14, "grad_norm": 2.832919120788574, "learning_rate": 9.709548732360286e-06, "loss": 0.527, "step": 904 }, { "epoch": 0.14, "grad_norm": 1.6695393323898315, "learning_rate": 9.708731112524928e-06, "loss": 0.5587, "step": 905 }, { "epoch": 0.14, "grad_norm": 1.4210410118103027, "learning_rate": 9.707912378030498e-06, "loss": 0.4931, "step": 906 }, { "epoch": 0.14, "grad_norm": 1.793747067451477, "learning_rate": 9.707092529070808e-06, "loss": 0.4997, "step": 907 }, { "epoch": 0.14, "grad_norm": 1.4479914903640747, "learning_rate": 9.706271565839931e-06, "loss": 0.5046, "step": 908 }, { "epoch": 0.14, "grad_norm": 1.353573203086853, "learning_rate": 9.705449488532213e-06, "loss": 0.5323, "step": 909 }, { "epoch": 0.14, "grad_norm": 1.4987508058547974, "learning_rate": 9.704626297342254e-06, "loss": 0.4005, "step": 910 }, { "epoch": 0.14, "grad_norm": 1.7152915000915527, "learning_rate": 9.703801992464921e-06, "loss": 0.4341, "step": 911 }, { "epoch": 0.14, "grad_norm": 1.2513285875320435, "learning_rate": 9.702976574095345e-06, "loss": 0.507, "step": 912 }, { "epoch": 0.14, "grad_norm": 1.6608192920684814, "learning_rate": 9.702150042428922e-06, "loss": 0.5067, "step": 913 }, { "epoch": 0.14, "grad_norm": 1.5731807947158813, "learning_rate": 9.701322397661308e-06, "loss": 0.4678, "step": 914 }, { "epoch": 0.14, "grad_norm": 1.5839991569519043, "learning_rate": 9.700493639988424e-06, "loss": 0.4947, "step": 915 }, { "epoch": 0.14, "grad_norm": 1.5904958248138428, "learning_rate": 9.69966376960646e-06, "loss": 0.4581, "step": 916 }, { "epoch": 0.14, "grad_norm": 1.9236098527908325, "learning_rate": 9.698832786711859e-06, "loss": 0.4633, "step": 917 }, { "epoch": 0.14, "grad_norm": 1.5254161357879639, "learning_rate": 9.698000691501333e-06, "loss": 0.4776, "step": 918 }, { "epoch": 0.14, "grad_norm": 1.3811852931976318, "learning_rate": 9.697167484171862e-06, "loss": 0.5642, "step": 919 }, { "epoch": 0.14, "grad_norm": 1.4684128761291504, "learning_rate": 9.696333164920677e-06, "loss": 0.539, "step": 920 }, { "epoch": 0.14, "grad_norm": 2.0324528217315674, "learning_rate": 9.695497733945286e-06, "loss": 0.5342, "step": 921 }, { "epoch": 0.14, "grad_norm": 1.2302322387695312, "learning_rate": 9.694661191443451e-06, "loss": 0.4808, "step": 922 }, { "epoch": 0.14, "grad_norm": 2.291639804840088, "learning_rate": 9.693823537613199e-06, "loss": 0.4827, "step": 923 }, { "epoch": 0.14, "grad_norm": 1.2774345874786377, "learning_rate": 9.692984772652822e-06, "loss": 0.7637, "step": 924 }, { "epoch": 0.14, "grad_norm": 1.379562258720398, "learning_rate": 9.692144896760873e-06, "loss": 0.4289, "step": 925 }, { "epoch": 0.14, "grad_norm": 1.497887134552002, "learning_rate": 9.691303910136171e-06, "loss": 0.4996, "step": 926 }, { "epoch": 0.14, "grad_norm": 3.20267653465271, "learning_rate": 9.690461812977794e-06, "loss": 0.5046, "step": 927 }, { "epoch": 0.14, "grad_norm": 1.822776436805725, "learning_rate": 9.689618605485086e-06, "loss": 0.5262, "step": 928 }, { "epoch": 0.14, "grad_norm": 4.328483581542969, "learning_rate": 9.68877428785765e-06, "loss": 0.5692, "step": 929 }, { "epoch": 0.14, "grad_norm": 1.2863439321517944, "learning_rate": 9.687928860295356e-06, "loss": 0.5402, "step": 930 }, { "epoch": 0.14, "grad_norm": 1.0464507341384888, "learning_rate": 9.687082322998335e-06, "loss": 0.4632, "step": 931 }, { "epoch": 0.14, "grad_norm": 1.3624275922775269, "learning_rate": 9.686234676166983e-06, "loss": 0.5102, "step": 932 }, { "epoch": 0.14, "grad_norm": 1.4481875896453857, "learning_rate": 9.685385920001952e-06, "loss": 0.4593, "step": 933 }, { "epoch": 0.14, "grad_norm": 1.449349045753479, "learning_rate": 9.684536054704165e-06, "loss": 0.5298, "step": 934 }, { "epoch": 0.14, "grad_norm": 1.2252168655395508, "learning_rate": 9.6836850804748e-06, "loss": 0.4741, "step": 935 }, { "epoch": 0.14, "grad_norm": 1.3204290866851807, "learning_rate": 9.682832997515305e-06, "loss": 0.5591, "step": 936 }, { "epoch": 0.14, "grad_norm": 1.3127620220184326, "learning_rate": 9.681979806027383e-06, "loss": 0.5629, "step": 937 }, { "epoch": 0.14, "grad_norm": 1.4057918787002563, "learning_rate": 9.681125506213006e-06, "loss": 0.516, "step": 938 }, { "epoch": 0.14, "grad_norm": 1.3118188381195068, "learning_rate": 9.680270098274403e-06, "loss": 0.5079, "step": 939 }, { "epoch": 0.14, "grad_norm": 1.232740879058838, "learning_rate": 9.67941358241407e-06, "loss": 0.4267, "step": 940 }, { "epoch": 0.14, "grad_norm": 1.1607475280761719, "learning_rate": 9.678555958834762e-06, "loss": 0.4487, "step": 941 }, { "epoch": 0.14, "grad_norm": 3.197089433670044, "learning_rate": 9.677697227739494e-06, "loss": 0.4966, "step": 942 }, { "epoch": 0.14, "grad_norm": 1.3774336576461792, "learning_rate": 9.67683738933155e-06, "loss": 0.4465, "step": 943 }, { "epoch": 0.14, "grad_norm": 1.6603002548217773, "learning_rate": 9.675976443814473e-06, "loss": 0.4891, "step": 944 }, { "epoch": 0.14, "grad_norm": 1.064130187034607, "learning_rate": 9.675114391392065e-06, "loss": 0.5799, "step": 945 }, { "epoch": 0.14, "grad_norm": 1.7056586742401123, "learning_rate": 9.674251232268393e-06, "loss": 0.5122, "step": 946 }, { "epoch": 0.14, "grad_norm": 1.4086663722991943, "learning_rate": 9.673386966647787e-06, "loss": 0.4478, "step": 947 }, { "epoch": 0.14, "grad_norm": 1.6954411268234253, "learning_rate": 9.672521594734838e-06, "loss": 0.4994, "step": 948 }, { "epoch": 0.14, "grad_norm": 1.335860013961792, "learning_rate": 9.671655116734396e-06, "loss": 0.5636, "step": 949 }, { "epoch": 0.14, "grad_norm": 1.4623291492462158, "learning_rate": 9.670787532851575e-06, "loss": 0.4552, "step": 950 }, { "epoch": 0.14, "grad_norm": 1.5913732051849365, "learning_rate": 9.669918843291753e-06, "loss": 0.4445, "step": 951 }, { "epoch": 0.14, "grad_norm": 1.4387308359146118, "learning_rate": 9.669049048260565e-06, "loss": 0.5478, "step": 952 }, { "epoch": 0.14, "grad_norm": 1.4134818315505981, "learning_rate": 9.668178147963915e-06, "loss": 0.4296, "step": 953 }, { "epoch": 0.14, "grad_norm": 1.778599500656128, "learning_rate": 9.667306142607959e-06, "loss": 0.4885, "step": 954 }, { "epoch": 0.14, "grad_norm": 1.4505892992019653, "learning_rate": 9.666433032399123e-06, "loss": 0.489, "step": 955 }, { "epoch": 0.14, "grad_norm": 1.0873271226882935, "learning_rate": 9.665558817544091e-06, "loss": 0.5071, "step": 956 }, { "epoch": 0.14, "grad_norm": 1.6638346910476685, "learning_rate": 9.664683498249807e-06, "loss": 0.4233, "step": 957 }, { "epoch": 0.14, "grad_norm": 1.2786509990692139, "learning_rate": 9.66380707472348e-06, "loss": 0.5252, "step": 958 }, { "epoch": 0.14, "grad_norm": 1.2508333921432495, "learning_rate": 9.662929547172575e-06, "loss": 0.4779, "step": 959 }, { "epoch": 0.14, "grad_norm": 1.7123829126358032, "learning_rate": 9.662050915804826e-06, "loss": 0.4991, "step": 960 }, { "epoch": 0.14, "grad_norm": 1.7954450845718384, "learning_rate": 9.661171180828221e-06, "loss": 0.5137, "step": 961 }, { "epoch": 0.14, "grad_norm": 1.1412769556045532, "learning_rate": 9.660290342451015e-06, "loss": 0.5012, "step": 962 }, { "epoch": 0.14, "grad_norm": 1.111166000366211, "learning_rate": 9.659408400881722e-06, "loss": 0.4466, "step": 963 }, { "epoch": 0.14, "grad_norm": 1.1361223459243774, "learning_rate": 9.658525356329113e-06, "loss": 0.4878, "step": 964 }, { "epoch": 0.14, "grad_norm": 1.0407549142837524, "learning_rate": 9.657641209002226e-06, "loss": 0.5438, "step": 965 }, { "epoch": 0.15, "grad_norm": 1.6471161842346191, "learning_rate": 9.656755959110359e-06, "loss": 0.4638, "step": 966 }, { "epoch": 0.15, "grad_norm": 1.3837443590164185, "learning_rate": 9.655869606863068e-06, "loss": 0.5285, "step": 967 }, { "epoch": 0.15, "grad_norm": 1.358781099319458, "learning_rate": 9.654982152470173e-06, "loss": 0.4302, "step": 968 }, { "epoch": 0.15, "grad_norm": 1.520907998085022, "learning_rate": 9.654093596141753e-06, "loss": 0.4953, "step": 969 }, { "epoch": 0.15, "grad_norm": 2.423729658126831, "learning_rate": 9.65320393808815e-06, "loss": 0.5041, "step": 970 }, { "epoch": 0.15, "grad_norm": 1.531014323234558, "learning_rate": 9.652313178519962e-06, "loss": 0.5721, "step": 971 }, { "epoch": 0.15, "grad_norm": 1.2820430994033813, "learning_rate": 9.651421317648058e-06, "loss": 0.4382, "step": 972 }, { "epoch": 0.15, "grad_norm": 1.2558152675628662, "learning_rate": 9.650528355683553e-06, "loss": 0.5076, "step": 973 }, { "epoch": 0.15, "grad_norm": 1.4162564277648926, "learning_rate": 9.649634292837834e-06, "loss": 0.4944, "step": 974 }, { "epoch": 0.15, "grad_norm": 0.9760634303092957, "learning_rate": 9.648739129322547e-06, "loss": 0.4774, "step": 975 }, { "epoch": 0.15, "grad_norm": 1.5981093645095825, "learning_rate": 9.647842865349595e-06, "loss": 0.5604, "step": 976 }, { "epoch": 0.15, "grad_norm": 1.2841600179672241, "learning_rate": 9.646945501131142e-06, "loss": 0.4629, "step": 977 }, { "epoch": 0.15, "grad_norm": 2.298389196395874, "learning_rate": 9.646047036879614e-06, "loss": 0.5706, "step": 978 }, { "epoch": 0.15, "grad_norm": 2.1233112812042236, "learning_rate": 9.645147472807698e-06, "loss": 0.536, "step": 979 }, { "epoch": 0.15, "grad_norm": 1.1366244554519653, "learning_rate": 9.644246809128342e-06, "loss": 0.4976, "step": 980 }, { "epoch": 0.15, "grad_norm": 1.6734539270401, "learning_rate": 9.643345046054748e-06, "loss": 0.5754, "step": 981 }, { "epoch": 0.15, "grad_norm": 1.2412936687469482, "learning_rate": 9.642442183800387e-06, "loss": 0.4942, "step": 982 }, { "epoch": 0.15, "grad_norm": 1.5640605688095093, "learning_rate": 9.641538222578984e-06, "loss": 0.543, "step": 983 }, { "epoch": 0.15, "grad_norm": 3.2512123584747314, "learning_rate": 9.640633162604528e-06, "loss": 0.4185, "step": 984 }, { "epoch": 0.15, "grad_norm": 1.041841983795166, "learning_rate": 9.639727004091265e-06, "loss": 0.4703, "step": 985 }, { "epoch": 0.15, "grad_norm": 0.986065149307251, "learning_rate": 9.638819747253703e-06, "loss": 0.4335, "step": 986 }, { "epoch": 0.15, "grad_norm": 2.2113263607025146, "learning_rate": 9.63791139230661e-06, "loss": 0.4372, "step": 987 }, { "epoch": 0.15, "grad_norm": 1.4577538967132568, "learning_rate": 9.637001939465012e-06, "loss": 0.4646, "step": 988 }, { "epoch": 0.15, "grad_norm": 1.1862074136734009, "learning_rate": 9.636091388944195e-06, "loss": 0.4675, "step": 989 }, { "epoch": 0.15, "grad_norm": 1.4335650205612183, "learning_rate": 9.63517974095971e-06, "loss": 0.4927, "step": 990 }, { "epoch": 0.15, "grad_norm": 1.5130460262298584, "learning_rate": 9.634266995727364e-06, "loss": 0.5654, "step": 991 }, { "epoch": 0.15, "grad_norm": 1.4740664958953857, "learning_rate": 9.633353153463218e-06, "loss": 0.4672, "step": 992 }, { "epoch": 0.15, "grad_norm": 1.8987693786621094, "learning_rate": 9.632438214383604e-06, "loss": 0.6232, "step": 993 }, { "epoch": 0.15, "grad_norm": 1.3405776023864746, "learning_rate": 9.631522178705106e-06, "loss": 0.5056, "step": 994 }, { "epoch": 0.15, "grad_norm": 1.6325021982192993, "learning_rate": 9.630605046644569e-06, "loss": 0.4756, "step": 995 }, { "epoch": 0.15, "grad_norm": 1.6594470739364624, "learning_rate": 9.629686818419099e-06, "loss": 0.5046, "step": 996 }, { "epoch": 0.15, "grad_norm": 1.4410697221755981, "learning_rate": 9.628767494246059e-06, "loss": 0.4767, "step": 997 }, { "epoch": 0.15, "grad_norm": 1.2786345481872559, "learning_rate": 9.627847074343075e-06, "loss": 0.5079, "step": 998 }, { "epoch": 0.15, "grad_norm": 1.3085969686508179, "learning_rate": 9.62692555892803e-06, "loss": 0.4281, "step": 999 }, { "epoch": 0.15, "grad_norm": 1.6147692203521729, "learning_rate": 9.626002948219067e-06, "loss": 0.447, "step": 1000 }, { "epoch": 0.15, "grad_norm": 1.2450675964355469, "learning_rate": 9.625079242434585e-06, "loss": 0.5054, "step": 1001 }, { "epoch": 0.15, "grad_norm": 1.577627420425415, "learning_rate": 9.62415444179325e-06, "loss": 0.4933, "step": 1002 }, { "epoch": 0.15, "grad_norm": 1.3089332580566406, "learning_rate": 9.623228546513978e-06, "loss": 0.593, "step": 1003 }, { "epoch": 0.15, "grad_norm": 1.5434825420379639, "learning_rate": 9.622301556815953e-06, "loss": 0.4567, "step": 1004 }, { "epoch": 0.15, "grad_norm": 1.3649076223373413, "learning_rate": 9.62137347291861e-06, "loss": 0.4732, "step": 1005 }, { "epoch": 0.15, "grad_norm": 1.3395397663116455, "learning_rate": 9.620444295041649e-06, "loss": 0.5648, "step": 1006 }, { "epoch": 0.15, "grad_norm": 1.3141573667526245, "learning_rate": 9.619514023405023e-06, "loss": 0.4518, "step": 1007 }, { "epoch": 0.15, "grad_norm": 1.7956217527389526, "learning_rate": 9.618582658228951e-06, "loss": 0.4971, "step": 1008 }, { "epoch": 0.15, "grad_norm": 1.0643490552902222, "learning_rate": 9.617650199733909e-06, "loss": 0.5363, "step": 1009 }, { "epoch": 0.15, "grad_norm": 1.1961123943328857, "learning_rate": 9.616716648140625e-06, "loss": 0.4888, "step": 1010 }, { "epoch": 0.15, "grad_norm": 1.2715007066726685, "learning_rate": 9.615782003670095e-06, "loss": 0.5266, "step": 1011 }, { "epoch": 0.15, "grad_norm": 1.7269175052642822, "learning_rate": 9.614846266543568e-06, "loss": 0.5162, "step": 1012 }, { "epoch": 0.15, "grad_norm": 1.3238970041275024, "learning_rate": 9.613909436982553e-06, "loss": 0.7207, "step": 1013 }, { "epoch": 0.15, "grad_norm": 1.1559253931045532, "learning_rate": 9.61297151520882e-06, "loss": 0.4551, "step": 1014 }, { "epoch": 0.15, "grad_norm": 1.3025015592575073, "learning_rate": 9.612032501444392e-06, "loss": 0.4993, "step": 1015 }, { "epoch": 0.15, "grad_norm": 1.2574104070663452, "learning_rate": 9.611092395911556e-06, "loss": 0.4543, "step": 1016 }, { "epoch": 0.15, "grad_norm": 1.7769097089767456, "learning_rate": 9.610151198832857e-06, "loss": 0.5274, "step": 1017 }, { "epoch": 0.15, "grad_norm": 1.7233929634094238, "learning_rate": 9.609208910431096e-06, "loss": 0.5723, "step": 1018 }, { "epoch": 0.15, "grad_norm": 1.6215932369232178, "learning_rate": 9.60826553092933e-06, "loss": 0.4963, "step": 1019 }, { "epoch": 0.15, "grad_norm": 1.1557475328445435, "learning_rate": 9.607321060550881e-06, "loss": 0.4383, "step": 1020 }, { "epoch": 0.15, "grad_norm": 1.3267862796783447, "learning_rate": 9.606375499519325e-06, "loss": 0.4776, "step": 1021 }, { "epoch": 0.15, "grad_norm": 1.7922230958938599, "learning_rate": 9.605428848058497e-06, "loss": 0.5154, "step": 1022 }, { "epoch": 0.15, "grad_norm": 1.5705667734146118, "learning_rate": 9.604481106392489e-06, "loss": 0.5035, "step": 1023 }, { "epoch": 0.15, "grad_norm": 2.5346460342407227, "learning_rate": 9.603532274745651e-06, "loss": 0.5069, "step": 1024 }, { "epoch": 0.15, "grad_norm": 1.2107977867126465, "learning_rate": 9.602582353342595e-06, "loss": 0.4664, "step": 1025 }, { "epoch": 0.15, "grad_norm": 1.4862258434295654, "learning_rate": 9.601631342408186e-06, "loss": 0.5759, "step": 1026 }, { "epoch": 0.15, "grad_norm": 1.3590012788772583, "learning_rate": 9.60067924216755e-06, "loss": 0.5096, "step": 1027 }, { "epoch": 0.15, "grad_norm": 1.3032150268554688, "learning_rate": 9.599726052846069e-06, "loss": 0.5049, "step": 1028 }, { "epoch": 0.15, "grad_norm": 1.3443162441253662, "learning_rate": 9.598771774669384e-06, "loss": 0.5517, "step": 1029 }, { "epoch": 0.15, "grad_norm": 1.350956678390503, "learning_rate": 9.597816407863395e-06, "loss": 0.4993, "step": 1030 }, { "epoch": 0.15, "grad_norm": 1.4182870388031006, "learning_rate": 9.596859952654255e-06, "loss": 0.4752, "step": 1031 }, { "epoch": 0.16, "grad_norm": 1.2767096757888794, "learning_rate": 9.59590240926838e-06, "loss": 0.4455, "step": 1032 }, { "epoch": 0.16, "grad_norm": 1.1314949989318848, "learning_rate": 9.594943777932441e-06, "loss": 0.481, "step": 1033 }, { "epoch": 0.16, "grad_norm": 1.2801506519317627, "learning_rate": 9.593984058873365e-06, "loss": 0.5391, "step": 1034 }, { "epoch": 0.16, "grad_norm": 1.2910921573638916, "learning_rate": 9.59302325231834e-06, "loss": 0.5479, "step": 1035 }, { "epoch": 0.16, "grad_norm": 1.8881524801254272, "learning_rate": 9.592061358494814e-06, "loss": 0.5613, "step": 1036 }, { "epoch": 0.16, "grad_norm": 1.3134090900421143, "learning_rate": 9.59109837763048e-06, "loss": 0.752, "step": 1037 }, { "epoch": 0.16, "grad_norm": 2.268278121948242, "learning_rate": 9.5901343099533e-06, "loss": 0.5065, "step": 1038 }, { "epoch": 0.16, "grad_norm": 1.4738045930862427, "learning_rate": 9.589169155691491e-06, "loss": 0.5475, "step": 1039 }, { "epoch": 0.16, "grad_norm": 1.3642083406448364, "learning_rate": 9.588202915073526e-06, "loss": 0.4013, "step": 1040 }, { "epoch": 0.16, "grad_norm": 1.137466311454773, "learning_rate": 9.587235588328134e-06, "loss": 0.5462, "step": 1041 }, { "epoch": 0.16, "grad_norm": 1.1402629613876343, "learning_rate": 9.586267175684303e-06, "loss": 0.5324, "step": 1042 }, { "epoch": 0.16, "grad_norm": 1.04850435256958, "learning_rate": 9.585297677371277e-06, "loss": 0.5198, "step": 1043 }, { "epoch": 0.16, "grad_norm": 1.503277063369751, "learning_rate": 9.584327093618558e-06, "loss": 0.5298, "step": 1044 }, { "epoch": 0.16, "grad_norm": 3.009143829345703, "learning_rate": 9.583355424655903e-06, "loss": 0.5454, "step": 1045 }, { "epoch": 0.16, "grad_norm": 1.484186053276062, "learning_rate": 9.582382670713328e-06, "loss": 0.5049, "step": 1046 }, { "epoch": 0.16, "grad_norm": 1.9004095792770386, "learning_rate": 9.581408832021106e-06, "loss": 0.5347, "step": 1047 }, { "epoch": 0.16, "grad_norm": 1.4879337549209595, "learning_rate": 9.580433908809763e-06, "loss": 0.4474, "step": 1048 }, { "epoch": 0.16, "grad_norm": 1.1798819303512573, "learning_rate": 9.579457901310086e-06, "loss": 0.4925, "step": 1049 }, { "epoch": 0.16, "grad_norm": 1.1380393505096436, "learning_rate": 9.578480809753118e-06, "loss": 0.4258, "step": 1050 }, { "epoch": 0.16, "grad_norm": 1.1521748304367065, "learning_rate": 9.577502634370157e-06, "loss": 0.7509, "step": 1051 }, { "epoch": 0.16, "grad_norm": 1.2802079916000366, "learning_rate": 9.57652337539276e-06, "loss": 0.5148, "step": 1052 }, { "epoch": 0.16, "grad_norm": 1.0838391780853271, "learning_rate": 9.575543033052736e-06, "loss": 0.4799, "step": 1053 }, { "epoch": 0.16, "grad_norm": 1.2508682012557983, "learning_rate": 9.574561607582156e-06, "loss": 0.7368, "step": 1054 }, { "epoch": 0.16, "grad_norm": 1.251187801361084, "learning_rate": 9.573579099213342e-06, "loss": 0.4855, "step": 1055 }, { "epoch": 0.16, "grad_norm": 1.0946487188339233, "learning_rate": 9.572595508178877e-06, "loss": 0.5072, "step": 1056 }, { "epoch": 0.16, "grad_norm": 1.3954194784164429, "learning_rate": 9.5716108347116e-06, "loss": 0.5313, "step": 1057 }, { "epoch": 0.16, "grad_norm": 1.8760498762130737, "learning_rate": 9.570625079044601e-06, "loss": 0.4883, "step": 1058 }, { "epoch": 0.16, "grad_norm": 1.20220947265625, "learning_rate": 9.569638241411232e-06, "loss": 0.544, "step": 1059 }, { "epoch": 0.16, "grad_norm": 1.0953662395477295, "learning_rate": 9.568650322045098e-06, "loss": 0.5296, "step": 1060 }, { "epoch": 0.16, "grad_norm": 1.3454328775405884, "learning_rate": 9.567661321180064e-06, "loss": 0.4457, "step": 1061 }, { "epoch": 0.16, "grad_norm": 1.3497000932693481, "learning_rate": 9.566671239050243e-06, "loss": 0.4728, "step": 1062 }, { "epoch": 0.16, "grad_norm": 1.0898325443267822, "learning_rate": 9.565680075890011e-06, "loss": 0.7011, "step": 1063 }, { "epoch": 0.16, "grad_norm": 1.382404088973999, "learning_rate": 9.564687831934e-06, "loss": 0.5144, "step": 1064 }, { "epoch": 0.16, "grad_norm": 2.026655435562134, "learning_rate": 9.563694507417094e-06, "loss": 0.5283, "step": 1065 }, { "epoch": 0.16, "grad_norm": 2.827594518661499, "learning_rate": 9.562700102574434e-06, "loss": 0.4706, "step": 1066 }, { "epoch": 0.16, "grad_norm": 1.5561301708221436, "learning_rate": 9.561704617641417e-06, "loss": 0.4857, "step": 1067 }, { "epoch": 0.16, "grad_norm": 1.1721941232681274, "learning_rate": 9.560708052853697e-06, "loss": 0.532, "step": 1068 }, { "epoch": 0.16, "grad_norm": 1.4283208847045898, "learning_rate": 9.559710408447185e-06, "loss": 0.5828, "step": 1069 }, { "epoch": 0.16, "grad_norm": 1.0423637628555298, "learning_rate": 9.55871168465804e-06, "loss": 0.4963, "step": 1070 }, { "epoch": 0.16, "grad_norm": 1.1783908605575562, "learning_rate": 9.557711881722684e-06, "loss": 0.5267, "step": 1071 }, { "epoch": 0.16, "grad_norm": 1.456377387046814, "learning_rate": 9.556710999877795e-06, "loss": 0.4367, "step": 1072 }, { "epoch": 0.16, "grad_norm": 2.0390777587890625, "learning_rate": 9.555709039360297e-06, "loss": 0.4632, "step": 1073 }, { "epoch": 0.16, "grad_norm": 1.4385697841644287, "learning_rate": 9.554706000407382e-06, "loss": 0.5715, "step": 1074 }, { "epoch": 0.16, "grad_norm": 1.2042702436447144, "learning_rate": 9.553701883256488e-06, "loss": 0.5546, "step": 1075 }, { "epoch": 0.16, "grad_norm": 1.586682915687561, "learning_rate": 9.552696688145313e-06, "loss": 0.5478, "step": 1076 }, { "epoch": 0.16, "grad_norm": 1.412071943283081, "learning_rate": 9.551690415311806e-06, "loss": 0.5385, "step": 1077 }, { "epoch": 0.16, "grad_norm": 1.352500081062317, "learning_rate": 9.550683064994176e-06, "loss": 0.4903, "step": 1078 }, { "epoch": 0.16, "grad_norm": 1.1057080030441284, "learning_rate": 9.549674637430884e-06, "loss": 0.5729, "step": 1079 }, { "epoch": 0.16, "grad_norm": 1.1827813386917114, "learning_rate": 9.548665132860648e-06, "loss": 0.5634, "step": 1080 }, { "epoch": 0.16, "grad_norm": 1.2238571643829346, "learning_rate": 9.547654551522436e-06, "loss": 0.5313, "step": 1081 }, { "epoch": 0.16, "grad_norm": 1.779114007949829, "learning_rate": 9.54664289365548e-06, "loss": 0.513, "step": 1082 }, { "epoch": 0.16, "grad_norm": 1.590850830078125, "learning_rate": 9.545630159499254e-06, "loss": 0.4978, "step": 1083 }, { "epoch": 0.16, "grad_norm": 1.5005377531051636, "learning_rate": 9.544616349293503e-06, "loss": 0.4567, "step": 1084 }, { "epoch": 0.16, "grad_norm": 1.1745396852493286, "learning_rate": 9.543601463278212e-06, "loss": 0.5055, "step": 1085 }, { "epoch": 0.16, "grad_norm": 1.7790488004684448, "learning_rate": 9.542585501693628e-06, "loss": 0.5735, "step": 1086 }, { "epoch": 0.16, "grad_norm": 1.140218734741211, "learning_rate": 9.541568464780252e-06, "loss": 0.516, "step": 1087 }, { "epoch": 0.16, "grad_norm": 1.298066258430481, "learning_rate": 9.540550352778838e-06, "loss": 0.5275, "step": 1088 }, { "epoch": 0.16, "grad_norm": 1.6394418478012085, "learning_rate": 9.539531165930396e-06, "loss": 0.5274, "step": 1089 }, { "epoch": 0.16, "grad_norm": 1.4564709663391113, "learning_rate": 9.538510904476188e-06, "loss": 0.5043, "step": 1090 }, { "epoch": 0.16, "grad_norm": 1.511377215385437, "learning_rate": 9.537489568657734e-06, "loss": 0.5217, "step": 1091 }, { "epoch": 0.16, "grad_norm": 1.2558519840240479, "learning_rate": 9.536467158716804e-06, "loss": 0.4947, "step": 1092 }, { "epoch": 0.16, "grad_norm": 1.950581431388855, "learning_rate": 9.535443674895428e-06, "loss": 0.4955, "step": 1093 }, { "epoch": 0.16, "grad_norm": 1.2409355640411377, "learning_rate": 9.534419117435885e-06, "loss": 0.4297, "step": 1094 }, { "epoch": 0.16, "grad_norm": 1.364561915397644, "learning_rate": 9.53339348658071e-06, "loss": 0.5198, "step": 1095 }, { "epoch": 0.16, "grad_norm": 1.4132939577102661, "learning_rate": 9.53236678257269e-06, "loss": 0.5056, "step": 1096 }, { "epoch": 0.16, "grad_norm": 1.2467223405838013, "learning_rate": 9.531339005654873e-06, "loss": 0.4802, "step": 1097 }, { "epoch": 0.16, "grad_norm": 1.4413455724716187, "learning_rate": 9.530310156070554e-06, "loss": 0.4094, "step": 1098 }, { "epoch": 0.17, "grad_norm": 1.209649682044983, "learning_rate": 9.529280234063283e-06, "loss": 0.4243, "step": 1099 }, { "epoch": 0.17, "grad_norm": 1.2318700551986694, "learning_rate": 9.528249239876866e-06, "loss": 0.4953, "step": 1100 }, { "epoch": 0.17, "grad_norm": 1.4685168266296387, "learning_rate": 9.52721717375536e-06, "loss": 0.5431, "step": 1101 }, { "epoch": 0.17, "grad_norm": 1.3112378120422363, "learning_rate": 9.52618403594308e-06, "loss": 0.7902, "step": 1102 }, { "epoch": 0.17, "grad_norm": 1.1623220443725586, "learning_rate": 9.52514982668459e-06, "loss": 0.4379, "step": 1103 }, { "epoch": 0.17, "grad_norm": 1.2196779251098633, "learning_rate": 9.524114546224713e-06, "loss": 0.5389, "step": 1104 }, { "epoch": 0.17, "grad_norm": 1.221953272819519, "learning_rate": 9.523078194808518e-06, "loss": 0.4983, "step": 1105 }, { "epoch": 0.17, "grad_norm": 1.2083933353424072, "learning_rate": 9.522040772681336e-06, "loss": 0.4858, "step": 1106 }, { "epoch": 0.17, "grad_norm": 1.2378337383270264, "learning_rate": 9.521002280088745e-06, "loss": 0.4483, "step": 1107 }, { "epoch": 0.17, "grad_norm": 1.1544294357299805, "learning_rate": 9.519962717276578e-06, "loss": 0.48, "step": 1108 }, { "epoch": 0.17, "grad_norm": 1.23143470287323, "learning_rate": 9.518922084490925e-06, "loss": 0.4768, "step": 1109 }, { "epoch": 0.17, "grad_norm": 1.102584958076477, "learning_rate": 9.517880381978125e-06, "loss": 0.5681, "step": 1110 }, { "epoch": 0.17, "grad_norm": 1.0925602912902832, "learning_rate": 9.516837609984771e-06, "loss": 0.4491, "step": 1111 }, { "epoch": 0.17, "grad_norm": 1.2189912796020508, "learning_rate": 9.515793768757711e-06, "loss": 0.4783, "step": 1112 }, { "epoch": 0.17, "grad_norm": 1.3733057975769043, "learning_rate": 9.514748858544044e-06, "loss": 0.508, "step": 1113 }, { "epoch": 0.17, "grad_norm": 1.1749708652496338, "learning_rate": 9.513702879591124e-06, "loss": 0.4869, "step": 1114 }, { "epoch": 0.17, "grad_norm": 1.2180904150009155, "learning_rate": 9.512655832146556e-06, "loss": 0.4317, "step": 1115 }, { "epoch": 0.17, "grad_norm": 1.4251879453659058, "learning_rate": 9.511607716458198e-06, "loss": 0.4693, "step": 1116 }, { "epoch": 0.17, "grad_norm": 1.1189943552017212, "learning_rate": 9.510558532774163e-06, "loss": 0.5625, "step": 1117 }, { "epoch": 0.17, "grad_norm": 1.5208922624588013, "learning_rate": 9.509508281342816e-06, "loss": 0.5221, "step": 1118 }, { "epoch": 0.17, "grad_norm": 1.6456793546676636, "learning_rate": 9.508456962412775e-06, "loss": 0.5121, "step": 1119 }, { "epoch": 0.17, "grad_norm": 1.3546539545059204, "learning_rate": 9.507404576232908e-06, "loss": 0.5134, "step": 1120 }, { "epoch": 0.17, "grad_norm": 1.3526692390441895, "learning_rate": 9.506351123052339e-06, "loss": 0.7933, "step": 1121 }, { "epoch": 0.17, "grad_norm": 1.5378795862197876, "learning_rate": 9.505296603120443e-06, "loss": 0.4598, "step": 1122 }, { "epoch": 0.17, "grad_norm": 1.3697232007980347, "learning_rate": 9.504241016686847e-06, "loss": 0.5296, "step": 1123 }, { "epoch": 0.17, "grad_norm": 1.5486433506011963, "learning_rate": 9.503184364001432e-06, "loss": 0.551, "step": 1124 }, { "epoch": 0.17, "grad_norm": 1.569339394569397, "learning_rate": 9.50212664531433e-06, "loss": 0.5477, "step": 1125 }, { "epoch": 0.17, "grad_norm": 1.1471455097198486, "learning_rate": 9.50106786087593e-06, "loss": 0.4667, "step": 1126 }, { "epoch": 0.17, "grad_norm": 1.3889061212539673, "learning_rate": 9.500008010936863e-06, "loss": 0.5448, "step": 1127 }, { "epoch": 0.17, "grad_norm": 1.465944766998291, "learning_rate": 9.498947095748023e-06, "loss": 0.5251, "step": 1128 }, { "epoch": 0.17, "grad_norm": 1.2530361413955688, "learning_rate": 9.49788511556055e-06, "loss": 0.495, "step": 1129 }, { "epoch": 0.17, "grad_norm": 1.3327771425247192, "learning_rate": 9.496822070625838e-06, "loss": 0.5069, "step": 1130 }, { "epoch": 0.17, "grad_norm": 1.0233154296875, "learning_rate": 9.495757961195533e-06, "loss": 0.512, "step": 1131 }, { "epoch": 0.17, "grad_norm": 1.1250367164611816, "learning_rate": 9.494692787521531e-06, "loss": 0.5418, "step": 1132 }, { "epoch": 0.17, "grad_norm": 1.2275102138519287, "learning_rate": 9.493626549855986e-06, "loss": 0.5246, "step": 1133 }, { "epoch": 0.17, "grad_norm": 1.171734094619751, "learning_rate": 9.492559248451297e-06, "loss": 0.4485, "step": 1134 }, { "epoch": 0.17, "grad_norm": 1.268871545791626, "learning_rate": 9.491490883560116e-06, "loss": 0.4984, "step": 1135 }, { "epoch": 0.17, "grad_norm": 1.3409264087677002, "learning_rate": 9.49042145543535e-06, "loss": 0.5565, "step": 1136 }, { "epoch": 0.17, "grad_norm": 1.167966604232788, "learning_rate": 9.489350964330157e-06, "loss": 0.7268, "step": 1137 }, { "epoch": 0.17, "grad_norm": 1.1913275718688965, "learning_rate": 9.488279410497944e-06, "loss": 0.535, "step": 1138 }, { "epoch": 0.17, "grad_norm": 1.4697068929672241, "learning_rate": 9.48720679419237e-06, "loss": 0.5803, "step": 1139 }, { "epoch": 0.17, "grad_norm": 1.2905961275100708, "learning_rate": 9.486133115667349e-06, "loss": 0.432, "step": 1140 }, { "epoch": 0.17, "grad_norm": 1.1639035940170288, "learning_rate": 9.485058375177044e-06, "loss": 0.4935, "step": 1141 }, { "epoch": 0.17, "grad_norm": 1.076008915901184, "learning_rate": 9.483982572975868e-06, "loss": 0.4512, "step": 1142 }, { "epoch": 0.17, "grad_norm": 1.6751433610916138, "learning_rate": 9.482905709318486e-06, "loss": 0.5003, "step": 1143 }, { "epoch": 0.17, "grad_norm": 1.3084912300109863, "learning_rate": 9.481827784459818e-06, "loss": 0.4599, "step": 1144 }, { "epoch": 0.17, "grad_norm": 1.2833640575408936, "learning_rate": 9.48074879865503e-06, "loss": 0.7582, "step": 1145 }, { "epoch": 0.17, "grad_norm": 1.192406177520752, "learning_rate": 9.47966875215954e-06, "loss": 0.595, "step": 1146 }, { "epoch": 0.17, "grad_norm": 1.18509840965271, "learning_rate": 9.478587645229023e-06, "loss": 0.4314, "step": 1147 }, { "epoch": 0.17, "grad_norm": 2.311967611312866, "learning_rate": 9.477505478119398e-06, "loss": 0.5729, "step": 1148 }, { "epoch": 0.17, "grad_norm": 1.1746028661727905, "learning_rate": 9.476422251086837e-06, "loss": 0.4415, "step": 1149 }, { "epoch": 0.17, "grad_norm": 1.8263944387435913, "learning_rate": 9.475337964387763e-06, "loss": 0.4768, "step": 1150 }, { "epoch": 0.17, "grad_norm": 1.9906063079833984, "learning_rate": 9.474252618278852e-06, "loss": 0.5618, "step": 1151 }, { "epoch": 0.17, "grad_norm": 1.248396635055542, "learning_rate": 9.473166213017029e-06, "loss": 0.7243, "step": 1152 }, { "epoch": 0.17, "grad_norm": 1.2603514194488525, "learning_rate": 9.472078748859467e-06, "loss": 0.5501, "step": 1153 }, { "epoch": 0.17, "grad_norm": 1.5353198051452637, "learning_rate": 9.470990226063596e-06, "loss": 0.5178, "step": 1154 }, { "epoch": 0.17, "grad_norm": 1.3055553436279297, "learning_rate": 9.469900644887092e-06, "loss": 0.5054, "step": 1155 }, { "epoch": 0.17, "grad_norm": 1.3559901714324951, "learning_rate": 9.468810005587879e-06, "loss": 0.783, "step": 1156 }, { "epoch": 0.17, "grad_norm": 1.9580851793289185, "learning_rate": 9.467718308424139e-06, "loss": 0.4723, "step": 1157 }, { "epoch": 0.17, "grad_norm": 1.7650278806686401, "learning_rate": 9.4666255536543e-06, "loss": 0.4826, "step": 1158 }, { "epoch": 0.17, "grad_norm": 1.1654753684997559, "learning_rate": 9.465531741537041e-06, "loss": 0.4764, "step": 1159 }, { "epoch": 0.17, "grad_norm": 1.643987774848938, "learning_rate": 9.464436872331291e-06, "loss": 0.4287, "step": 1160 }, { "epoch": 0.17, "grad_norm": 1.1221072673797607, "learning_rate": 9.463340946296228e-06, "loss": 0.4248, "step": 1161 }, { "epoch": 0.17, "grad_norm": 1.2623177766799927, "learning_rate": 9.46224396369128e-06, "loss": 0.4697, "step": 1162 }, { "epoch": 0.17, "grad_norm": 1.160628080368042, "learning_rate": 9.461145924776131e-06, "loss": 0.5131, "step": 1163 }, { "epoch": 0.17, "grad_norm": 1.3182622194290161, "learning_rate": 9.46004682981071e-06, "loss": 0.5208, "step": 1164 }, { "epoch": 0.18, "grad_norm": 1.611823558807373, "learning_rate": 9.458946679055192e-06, "loss": 0.4603, "step": 1165 }, { "epoch": 0.18, "grad_norm": 1.7623144388198853, "learning_rate": 9.457845472770012e-06, "loss": 0.5131, "step": 1166 }, { "epoch": 0.18, "grad_norm": 1.0732609033584595, "learning_rate": 9.456743211215846e-06, "loss": 0.4929, "step": 1167 }, { "epoch": 0.18, "grad_norm": 2.118560552597046, "learning_rate": 9.455639894653627e-06, "loss": 0.5348, "step": 1168 }, { "epoch": 0.18, "grad_norm": 4.234200954437256, "learning_rate": 9.45453552334453e-06, "loss": 0.4976, "step": 1169 }, { "epoch": 0.18, "grad_norm": 1.497033715248108, "learning_rate": 9.453430097549986e-06, "loss": 0.477, "step": 1170 }, { "epoch": 0.18, "grad_norm": 1.1614934206008911, "learning_rate": 9.452323617531673e-06, "loss": 0.7552, "step": 1171 }, { "epoch": 0.18, "grad_norm": 1.1549960374832153, "learning_rate": 9.451216083551517e-06, "loss": 0.4581, "step": 1172 }, { "epoch": 0.18, "grad_norm": 1.5629801750183105, "learning_rate": 9.450107495871699e-06, "loss": 0.4972, "step": 1173 }, { "epoch": 0.18, "grad_norm": 1.2822495698928833, "learning_rate": 9.448997854754643e-06, "loss": 0.4531, "step": 1174 }, { "epoch": 0.18, "grad_norm": 1.2013111114501953, "learning_rate": 9.447887160463025e-06, "loss": 0.7286, "step": 1175 }, { "epoch": 0.18, "grad_norm": 1.2211477756500244, "learning_rate": 9.446775413259773e-06, "loss": 0.5047, "step": 1176 }, { "epoch": 0.18, "grad_norm": 1.2512274980545044, "learning_rate": 9.44566261340806e-06, "loss": 0.5104, "step": 1177 }, { "epoch": 0.18, "grad_norm": 1.1586065292358398, "learning_rate": 9.44454876117131e-06, "loss": 0.4641, "step": 1178 }, { "epoch": 0.18, "grad_norm": 1.3367193937301636, "learning_rate": 9.443433856813197e-06, "loss": 0.5036, "step": 1179 }, { "epoch": 0.18, "grad_norm": 1.1449291706085205, "learning_rate": 9.442317900597641e-06, "loss": 0.4813, "step": 1180 }, { "epoch": 0.18, "grad_norm": 1.2034629583358765, "learning_rate": 9.441200892788815e-06, "loss": 0.4443, "step": 1181 }, { "epoch": 0.18, "grad_norm": 1.5503782033920288, "learning_rate": 9.440082833651138e-06, "loss": 0.5155, "step": 1182 }, { "epoch": 0.18, "grad_norm": 1.520485758781433, "learning_rate": 9.438963723449281e-06, "loss": 0.404, "step": 1183 }, { "epoch": 0.18, "grad_norm": 1.2539663314819336, "learning_rate": 9.437843562448158e-06, "loss": 0.4141, "step": 1184 }, { "epoch": 0.18, "grad_norm": 1.0308932065963745, "learning_rate": 9.43672235091294e-06, "loss": 0.4996, "step": 1185 }, { "epoch": 0.18, "grad_norm": 1.2479987144470215, "learning_rate": 9.435600089109038e-06, "loss": 0.5163, "step": 1186 }, { "epoch": 0.18, "grad_norm": 1.3288482427597046, "learning_rate": 9.434476777302119e-06, "loss": 0.4617, "step": 1187 }, { "epoch": 0.18, "grad_norm": 0.9237494468688965, "learning_rate": 9.433352415758093e-06, "loss": 0.5079, "step": 1188 }, { "epoch": 0.18, "grad_norm": 0.9242920875549316, "learning_rate": 9.432227004743122e-06, "loss": 0.4022, "step": 1189 }, { "epoch": 0.18, "grad_norm": 1.3500051498413086, "learning_rate": 9.431100544523614e-06, "loss": 0.5029, "step": 1190 }, { "epoch": 0.18, "grad_norm": 1.0987874269485474, "learning_rate": 9.429973035366228e-06, "loss": 0.7175, "step": 1191 }, { "epoch": 0.18, "grad_norm": 1.329649567604065, "learning_rate": 9.42884447753787e-06, "loss": 0.501, "step": 1192 }, { "epoch": 0.18, "grad_norm": 1.698486566543579, "learning_rate": 9.427714871305694e-06, "loss": 0.4297, "step": 1193 }, { "epoch": 0.18, "grad_norm": 1.226395606994629, "learning_rate": 9.426584216937103e-06, "loss": 0.4872, "step": 1194 }, { "epoch": 0.18, "grad_norm": 1.9135124683380127, "learning_rate": 9.425452514699745e-06, "loss": 0.497, "step": 1195 }, { "epoch": 0.18, "grad_norm": 2.0742132663726807, "learning_rate": 9.42431976486152e-06, "loss": 0.5229, "step": 1196 }, { "epoch": 0.18, "grad_norm": 1.8081194162368774, "learning_rate": 9.423185967690575e-06, "loss": 0.4761, "step": 1197 }, { "epoch": 0.18, "grad_norm": 1.3117706775665283, "learning_rate": 9.422051123455305e-06, "loss": 0.4896, "step": 1198 }, { "epoch": 0.18, "grad_norm": 2.8847389221191406, "learning_rate": 9.420915232424351e-06, "loss": 0.4908, "step": 1199 }, { "epoch": 0.18, "grad_norm": 1.3513840436935425, "learning_rate": 9.419778294866603e-06, "loss": 0.4392, "step": 1200 }, { "epoch": 0.18, "grad_norm": 4.710855960845947, "learning_rate": 9.418640311051199e-06, "loss": 0.5347, "step": 1201 }, { "epoch": 0.18, "grad_norm": 5.699403285980225, "learning_rate": 9.417501281247526e-06, "loss": 0.5761, "step": 1202 }, { "epoch": 0.18, "grad_norm": 1.4317371845245361, "learning_rate": 9.416361205725216e-06, "loss": 0.5027, "step": 1203 }, { "epoch": 0.18, "grad_norm": 1.2519944906234741, "learning_rate": 9.41522008475415e-06, "loss": 0.5126, "step": 1204 }, { "epoch": 0.18, "grad_norm": 5.368774890899658, "learning_rate": 9.414077918604455e-06, "loss": 0.5129, "step": 1205 }, { "epoch": 0.18, "grad_norm": 1.5961239337921143, "learning_rate": 9.412934707546508e-06, "loss": 0.4281, "step": 1206 }, { "epoch": 0.18, "grad_norm": 1.5238826274871826, "learning_rate": 9.411790451850931e-06, "loss": 0.4712, "step": 1207 }, { "epoch": 0.18, "grad_norm": 1.265769362449646, "learning_rate": 9.410645151788595e-06, "loss": 0.413, "step": 1208 }, { "epoch": 0.18, "grad_norm": 1.3130706548690796, "learning_rate": 9.409498807630616e-06, "loss": 0.4936, "step": 1209 }, { "epoch": 0.18, "grad_norm": 1.9677257537841797, "learning_rate": 9.408351419648361e-06, "loss": 0.4981, "step": 1210 }, { "epoch": 0.18, "grad_norm": 1.4011188745498657, "learning_rate": 9.40720298811344e-06, "loss": 0.5046, "step": 1211 }, { "epoch": 0.18, "grad_norm": 1.4806259870529175, "learning_rate": 9.40605351329771e-06, "loss": 0.4702, "step": 1212 }, { "epoch": 0.18, "grad_norm": 1.6751554012298584, "learning_rate": 9.404902995473282e-06, "loss": 0.4577, "step": 1213 }, { "epoch": 0.18, "grad_norm": 1.1122691631317139, "learning_rate": 9.403751434912503e-06, "loss": 0.4637, "step": 1214 }, { "epoch": 0.18, "grad_norm": 1.4670895338058472, "learning_rate": 9.402598831887974e-06, "loss": 0.5562, "step": 1215 }, { "epoch": 0.18, "grad_norm": 1.4195905923843384, "learning_rate": 9.401445186672544e-06, "loss": 0.4615, "step": 1216 }, { "epoch": 0.18, "grad_norm": 2.249227523803711, "learning_rate": 9.400290499539303e-06, "loss": 0.5001, "step": 1217 }, { "epoch": 0.18, "grad_norm": 1.3608214855194092, "learning_rate": 9.399134770761592e-06, "loss": 0.535, "step": 1218 }, { "epoch": 0.18, "grad_norm": 1.1536097526550293, "learning_rate": 9.397978000612998e-06, "loss": 0.4789, "step": 1219 }, { "epoch": 0.18, "grad_norm": 1.48915433883667, "learning_rate": 9.396820189367349e-06, "loss": 0.5624, "step": 1220 }, { "epoch": 0.18, "grad_norm": 2.0173158645629883, "learning_rate": 9.395661337298729e-06, "loss": 0.4372, "step": 1221 }, { "epoch": 0.18, "grad_norm": 1.5651865005493164, "learning_rate": 9.394501444681461e-06, "loss": 0.4583, "step": 1222 }, { "epoch": 0.18, "grad_norm": 1.869188904762268, "learning_rate": 9.393340511790117e-06, "loss": 0.468, "step": 1223 }, { "epoch": 0.18, "grad_norm": 2.167772054672241, "learning_rate": 9.392178538899518e-06, "loss": 0.544, "step": 1224 }, { "epoch": 0.18, "grad_norm": 1.9925974607467651, "learning_rate": 9.391015526284724e-06, "loss": 0.4992, "step": 1225 }, { "epoch": 0.18, "grad_norm": 1.630788803100586, "learning_rate": 9.389851474221047e-06, "loss": 0.4908, "step": 1226 }, { "epoch": 0.18, "grad_norm": 1.5857847929000854, "learning_rate": 9.388686382984047e-06, "loss": 0.5348, "step": 1227 }, { "epoch": 0.18, "grad_norm": 1.6516413688659668, "learning_rate": 9.38752025284952e-06, "loss": 0.4983, "step": 1228 }, { "epoch": 0.18, "grad_norm": 2.2853200435638428, "learning_rate": 9.38635308409352e-06, "loss": 0.5352, "step": 1229 }, { "epoch": 0.18, "grad_norm": 1.3849587440490723, "learning_rate": 9.385184876992337e-06, "loss": 0.5816, "step": 1230 }, { "epoch": 0.18, "grad_norm": 1.25184965133667, "learning_rate": 9.384015631822512e-06, "loss": 0.7378, "step": 1231 }, { "epoch": 0.19, "grad_norm": 1.370384693145752, "learning_rate": 9.382845348860834e-06, "loss": 0.5069, "step": 1232 }, { "epoch": 0.19, "grad_norm": 1.5797200202941895, "learning_rate": 9.38167402838433e-06, "loss": 0.4908, "step": 1233 }, { "epoch": 0.19, "grad_norm": 2.338844060897827, "learning_rate": 9.38050167067028e-06, "loss": 0.5268, "step": 1234 }, { "epoch": 0.19, "grad_norm": 1.7303404808044434, "learning_rate": 9.379328275996206e-06, "loss": 0.4597, "step": 1235 }, { "epoch": 0.19, "grad_norm": 1.3292210102081299, "learning_rate": 9.378153844639875e-06, "loss": 0.5038, "step": 1236 }, { "epoch": 0.19, "grad_norm": 2.1260383129119873, "learning_rate": 9.376978376879302e-06, "loss": 0.5262, "step": 1237 }, { "epoch": 0.19, "grad_norm": 1.6526423692703247, "learning_rate": 9.375801872992744e-06, "loss": 0.5371, "step": 1238 }, { "epoch": 0.19, "grad_norm": 1.0698444843292236, "learning_rate": 9.374624333258707e-06, "loss": 0.436, "step": 1239 }, { "epoch": 0.19, "grad_norm": 1.2051169872283936, "learning_rate": 9.373445757955937e-06, "loss": 0.5069, "step": 1240 }, { "epoch": 0.19, "grad_norm": 1.2426135540008545, "learning_rate": 9.372266147363433e-06, "loss": 0.5182, "step": 1241 }, { "epoch": 0.19, "grad_norm": 1.4318583011627197, "learning_rate": 9.37108550176043e-06, "loss": 0.5107, "step": 1242 }, { "epoch": 0.19, "grad_norm": 1.572123408317566, "learning_rate": 9.369903821426416e-06, "loss": 0.5164, "step": 1243 }, { "epoch": 0.19, "grad_norm": 1.2106688022613525, "learning_rate": 9.368721106641119e-06, "loss": 0.5806, "step": 1244 }, { "epoch": 0.19, "grad_norm": 1.2337315082550049, "learning_rate": 9.367537357684515e-06, "loss": 0.4781, "step": 1245 }, { "epoch": 0.19, "grad_norm": 1.117316484451294, "learning_rate": 9.366352574836819e-06, "loss": 0.4916, "step": 1246 }, { "epoch": 0.19, "grad_norm": 1.096573829650879, "learning_rate": 9.3651667583785e-06, "loss": 0.7108, "step": 1247 }, { "epoch": 0.19, "grad_norm": 1.2300828695297241, "learning_rate": 9.36397990859026e-06, "loss": 0.7726, "step": 1248 }, { "epoch": 0.19, "grad_norm": 1.6415307521820068, "learning_rate": 9.362792025753059e-06, "loss": 0.5115, "step": 1249 }, { "epoch": 0.19, "grad_norm": 1.4282383918762207, "learning_rate": 9.36160311014809e-06, "loss": 0.528, "step": 1250 }, { "epoch": 0.19, "grad_norm": 1.0730310678482056, "learning_rate": 9.360413162056798e-06, "loss": 0.4368, "step": 1251 }, { "epoch": 0.19, "grad_norm": 1.378089189529419, "learning_rate": 9.359222181760869e-06, "loss": 0.4318, "step": 1252 }, { "epoch": 0.19, "grad_norm": 1.9370602369308472, "learning_rate": 9.358030169542235e-06, "loss": 0.5136, "step": 1253 }, { "epoch": 0.19, "grad_norm": 1.2603397369384766, "learning_rate": 9.356837125683065e-06, "loss": 0.4886, "step": 1254 }, { "epoch": 0.19, "grad_norm": 1.2506194114685059, "learning_rate": 9.355643050465786e-06, "loss": 0.4076, "step": 1255 }, { "epoch": 0.19, "grad_norm": 1.1695719957351685, "learning_rate": 9.354447944173059e-06, "loss": 0.514, "step": 1256 }, { "epoch": 0.19, "grad_norm": 1.9852322340011597, "learning_rate": 9.353251807087792e-06, "loss": 0.5416, "step": 1257 }, { "epoch": 0.19, "grad_norm": 1.23360013961792, "learning_rate": 9.352054639493133e-06, "loss": 0.5207, "step": 1258 }, { "epoch": 0.19, "grad_norm": 2.956663131713867, "learning_rate": 9.350856441672484e-06, "loss": 0.5016, "step": 1259 }, { "epoch": 0.19, "grad_norm": 1.2311264276504517, "learning_rate": 9.34965721390948e-06, "loss": 0.5379, "step": 1260 }, { "epoch": 0.19, "grad_norm": 1.4780523777008057, "learning_rate": 9.348456956488005e-06, "loss": 0.4981, "step": 1261 }, { "epoch": 0.19, "grad_norm": 1.2574177980422974, "learning_rate": 9.347255669692186e-06, "loss": 0.4893, "step": 1262 }, { "epoch": 0.19, "grad_norm": 1.9673058986663818, "learning_rate": 9.346053353806394e-06, "loss": 0.5493, "step": 1263 }, { "epoch": 0.19, "grad_norm": 1.6735273599624634, "learning_rate": 9.344850009115244e-06, "loss": 0.4975, "step": 1264 }, { "epoch": 0.19, "grad_norm": 1.4706579446792603, "learning_rate": 9.343645635903595e-06, "loss": 0.5069, "step": 1265 }, { "epoch": 0.19, "grad_norm": 2.2608869075775146, "learning_rate": 9.342440234456546e-06, "loss": 0.4723, "step": 1266 }, { "epoch": 0.19, "grad_norm": 1.3016645908355713, "learning_rate": 9.341233805059442e-06, "loss": 0.4721, "step": 1267 }, { "epoch": 0.19, "grad_norm": 3.38785982131958, "learning_rate": 9.340026347997872e-06, "loss": 0.4529, "step": 1268 }, { "epoch": 0.19, "grad_norm": 1.5146955251693726, "learning_rate": 9.338817863557665e-06, "loss": 0.4616, "step": 1269 }, { "epoch": 0.19, "grad_norm": 1.6683170795440674, "learning_rate": 9.3376083520249e-06, "loss": 0.5192, "step": 1270 }, { "epoch": 0.19, "grad_norm": 1.1773275136947632, "learning_rate": 9.336397813685891e-06, "loss": 0.5357, "step": 1271 }, { "epoch": 0.19, "grad_norm": 1.2701120376586914, "learning_rate": 9.335186248827199e-06, "loss": 0.5437, "step": 1272 }, { "epoch": 0.19, "grad_norm": 1.4681107997894287, "learning_rate": 9.333973657735629e-06, "loss": 0.4844, "step": 1273 }, { "epoch": 0.19, "grad_norm": 1.3323159217834473, "learning_rate": 9.332760040698228e-06, "loss": 0.4865, "step": 1274 }, { "epoch": 0.19, "grad_norm": 1.1208058595657349, "learning_rate": 9.331545398002285e-06, "loss": 0.5362, "step": 1275 }, { "epoch": 0.19, "grad_norm": 1.6296193599700928, "learning_rate": 9.330329729935332e-06, "loss": 0.5674, "step": 1276 }, { "epoch": 0.19, "grad_norm": 1.4864675998687744, "learning_rate": 9.329113036785144e-06, "loss": 0.5033, "step": 1277 }, { "epoch": 0.19, "grad_norm": 1.7556191682815552, "learning_rate": 9.32789531883974e-06, "loss": 0.5469, "step": 1278 }, { "epoch": 0.19, "grad_norm": 1.4174091815948486, "learning_rate": 9.326676576387377e-06, "loss": 0.4931, "step": 1279 }, { "epoch": 0.19, "grad_norm": 1.476854920387268, "learning_rate": 9.325456809716562e-06, "loss": 0.51, "step": 1280 }, { "epoch": 0.19, "grad_norm": 2.197284698486328, "learning_rate": 9.324236019116038e-06, "loss": 0.5118, "step": 1281 }, { "epoch": 0.19, "grad_norm": 1.5956568717956543, "learning_rate": 9.323014204874793e-06, "loss": 0.5409, "step": 1282 }, { "epoch": 0.19, "grad_norm": 1.9362907409667969, "learning_rate": 9.321791367282058e-06, "loss": 0.534, "step": 1283 }, { "epoch": 0.19, "grad_norm": 1.1768243312835693, "learning_rate": 9.320567506627302e-06, "loss": 0.7302, "step": 1284 }, { "epoch": 0.19, "grad_norm": 1.1992579698562622, "learning_rate": 9.319342623200245e-06, "loss": 0.4654, "step": 1285 }, { "epoch": 0.19, "grad_norm": 1.362775444984436, "learning_rate": 9.318116717290839e-06, "loss": 0.4945, "step": 1286 }, { "epoch": 0.19, "grad_norm": 1.3753780126571655, "learning_rate": 9.316889789189285e-06, "loss": 0.5007, "step": 1287 }, { "epoch": 0.19, "grad_norm": 2.2647476196289062, "learning_rate": 9.315661839186022e-06, "loss": 0.5155, "step": 1288 }, { "epoch": 0.19, "grad_norm": 1.6366498470306396, "learning_rate": 9.314432867571732e-06, "loss": 0.6639, "step": 1289 }, { "epoch": 0.19, "grad_norm": 1.4834133386611938, "learning_rate": 9.31320287463734e-06, "loss": 0.4945, "step": 1290 }, { "epoch": 0.19, "grad_norm": 1.4808955192565918, "learning_rate": 9.311971860674014e-06, "loss": 0.5142, "step": 1291 }, { "epoch": 0.19, "grad_norm": 1.2593339681625366, "learning_rate": 9.31073982597316e-06, "loss": 0.4587, "step": 1292 }, { "epoch": 0.19, "grad_norm": 1.3227390050888062, "learning_rate": 9.309506770826427e-06, "loss": 0.4794, "step": 1293 }, { "epoch": 0.19, "grad_norm": 0.9995209574699402, "learning_rate": 9.30827269552571e-06, "loss": 0.4846, "step": 1294 }, { "epoch": 0.19, "grad_norm": 1.1189805269241333, "learning_rate": 9.307037600363133e-06, "loss": 0.5159, "step": 1295 }, { "epoch": 0.19, "grad_norm": 1.6143929958343506, "learning_rate": 9.305801485631076e-06, "loss": 0.5625, "step": 1296 }, { "epoch": 0.19, "grad_norm": 1.8278288841247559, "learning_rate": 9.304564351622156e-06, "loss": 0.4238, "step": 1297 }, { "epoch": 0.19, "grad_norm": 1.8142731189727783, "learning_rate": 9.303326198629225e-06, "loss": 0.5252, "step": 1298 }, { "epoch": 0.2, "grad_norm": 1.5728756189346313, "learning_rate": 9.302087026945382e-06, "loss": 0.5045, "step": 1299 }, { "epoch": 0.2, "grad_norm": 1.8894942998886108, "learning_rate": 9.300846836863968e-06, "loss": 0.507, "step": 1300 }, { "epoch": 0.2, "grad_norm": 1.0666238069534302, "learning_rate": 9.29960562867856e-06, "loss": 0.4562, "step": 1301 }, { "epoch": 0.2, "grad_norm": 1.1832621097564697, "learning_rate": 9.29836340268298e-06, "loss": 0.5243, "step": 1302 }, { "epoch": 0.2, "grad_norm": 1.2326772212982178, "learning_rate": 9.297120159171292e-06, "loss": 0.5409, "step": 1303 }, { "epoch": 0.2, "grad_norm": 6.289426803588867, "learning_rate": 9.295875898437795e-06, "loss": 0.4201, "step": 1304 }, { "epoch": 0.2, "grad_norm": 1.3910667896270752, "learning_rate": 9.294630620777035e-06, "loss": 0.4671, "step": 1305 }, { "epoch": 0.2, "grad_norm": 1.5416640043258667, "learning_rate": 9.293384326483797e-06, "loss": 0.4706, "step": 1306 }, { "epoch": 0.2, "grad_norm": 1.0053426027297974, "learning_rate": 9.292137015853104e-06, "loss": 0.5028, "step": 1307 }, { "epoch": 0.2, "grad_norm": 1.2320058345794678, "learning_rate": 9.290888689180223e-06, "loss": 0.4489, "step": 1308 }, { "epoch": 0.2, "grad_norm": 1.535919189453125, "learning_rate": 9.289639346760659e-06, "loss": 0.458, "step": 1309 }, { "epoch": 0.2, "grad_norm": 1.295182704925537, "learning_rate": 9.288388988890157e-06, "loss": 0.5342, "step": 1310 }, { "epoch": 0.2, "grad_norm": 1.360221266746521, "learning_rate": 9.287137615864709e-06, "loss": 0.4593, "step": 1311 }, { "epoch": 0.2, "grad_norm": 1.647106409072876, "learning_rate": 9.285885227980538e-06, "loss": 0.4776, "step": 1312 }, { "epoch": 0.2, "grad_norm": 1.3525912761688232, "learning_rate": 9.284631825534113e-06, "loss": 0.4091, "step": 1313 }, { "epoch": 0.2, "grad_norm": 5.457178115844727, "learning_rate": 9.283377408822141e-06, "loss": 0.4999, "step": 1314 }, { "epoch": 0.2, "grad_norm": 1.0735502243041992, "learning_rate": 9.28212197814157e-06, "loss": 0.5105, "step": 1315 }, { "epoch": 0.2, "grad_norm": 1.3504583835601807, "learning_rate": 9.280865533789588e-06, "loss": 0.51, "step": 1316 }, { "epoch": 0.2, "grad_norm": 1.5052297115325928, "learning_rate": 9.279608076063622e-06, "loss": 0.4064, "step": 1317 }, { "epoch": 0.2, "grad_norm": 1.1619502305984497, "learning_rate": 9.27834960526134e-06, "loss": 0.4504, "step": 1318 }, { "epoch": 0.2, "grad_norm": 1.1141386032104492, "learning_rate": 9.277090121680649e-06, "loss": 0.454, "step": 1319 }, { "epoch": 0.2, "grad_norm": 1.7364346981048584, "learning_rate": 9.275829625619698e-06, "loss": 0.469, "step": 1320 }, { "epoch": 0.2, "grad_norm": 1.1218852996826172, "learning_rate": 9.274568117376869e-06, "loss": 0.4219, "step": 1321 }, { "epoch": 0.2, "grad_norm": 1.4196604490280151, "learning_rate": 9.273305597250797e-06, "loss": 0.4862, "step": 1322 }, { "epoch": 0.2, "grad_norm": 1.8877168893814087, "learning_rate": 9.272042065540338e-06, "loss": 0.5287, "step": 1323 }, { "epoch": 0.2, "grad_norm": 1.5872368812561035, "learning_rate": 9.270777522544605e-06, "loss": 0.5397, "step": 1324 }, { "epoch": 0.2, "grad_norm": 1.2747817039489746, "learning_rate": 9.269511968562939e-06, "loss": 0.5429, "step": 1325 }, { "epoch": 0.2, "grad_norm": 2.0624544620513916, "learning_rate": 9.268245403894925e-06, "loss": 0.4953, "step": 1326 }, { "epoch": 0.2, "grad_norm": 1.742588758468628, "learning_rate": 9.266977828840387e-06, "loss": 0.5197, "step": 1327 }, { "epoch": 0.2, "grad_norm": 1.699676513671875, "learning_rate": 9.265709243699388e-06, "loss": 0.5266, "step": 1328 }, { "epoch": 0.2, "grad_norm": 1.5803371667861938, "learning_rate": 9.264439648772229e-06, "loss": 0.4527, "step": 1329 }, { "epoch": 0.2, "grad_norm": 1.1347670555114746, "learning_rate": 9.263169044359449e-06, "loss": 0.4867, "step": 1330 }, { "epoch": 0.2, "grad_norm": 1.9661810398101807, "learning_rate": 9.261897430761831e-06, "loss": 0.4601, "step": 1331 }, { "epoch": 0.2, "grad_norm": 1.1647062301635742, "learning_rate": 9.26062480828039e-06, "loss": 0.422, "step": 1332 }, { "epoch": 0.2, "grad_norm": 2.4176998138427734, "learning_rate": 9.259351177216386e-06, "loss": 0.4775, "step": 1333 }, { "epoch": 0.2, "grad_norm": 1.3011219501495361, "learning_rate": 9.258076537871317e-06, "loss": 0.4376, "step": 1334 }, { "epoch": 0.2, "grad_norm": 1.0373284816741943, "learning_rate": 9.256800890546913e-06, "loss": 0.49, "step": 1335 }, { "epoch": 0.2, "grad_norm": 12.967611312866211, "learning_rate": 9.25552423554515e-06, "loss": 0.526, "step": 1336 }, { "epoch": 0.2, "grad_norm": 1.3000510931015015, "learning_rate": 9.25424657316824e-06, "loss": 0.4703, "step": 1337 }, { "epoch": 0.2, "grad_norm": 1.2541106939315796, "learning_rate": 9.252967903718634e-06, "loss": 0.7687, "step": 1338 }, { "epoch": 0.2, "grad_norm": 1.775075912475586, "learning_rate": 9.25168822749902e-06, "loss": 0.5345, "step": 1339 }, { "epoch": 0.2, "grad_norm": 1.779845118522644, "learning_rate": 9.250407544812325e-06, "loss": 0.5893, "step": 1340 }, { "epoch": 0.2, "grad_norm": 1.4448015689849854, "learning_rate": 9.249125855961714e-06, "loss": 0.5797, "step": 1341 }, { "epoch": 0.2, "grad_norm": 1.1932169198989868, "learning_rate": 9.247843161250593e-06, "loss": 0.7553, "step": 1342 }, { "epoch": 0.2, "grad_norm": 1.2341424226760864, "learning_rate": 9.246559460982603e-06, "loss": 0.4662, "step": 1343 }, { "epoch": 0.2, "grad_norm": 1.3548763990402222, "learning_rate": 9.245274755461621e-06, "loss": 0.4564, "step": 1344 }, { "epoch": 0.2, "grad_norm": 1.3674252033233643, "learning_rate": 9.243989044991768e-06, "loss": 0.4881, "step": 1345 }, { "epoch": 0.2, "grad_norm": 1.4099191427230835, "learning_rate": 9.242702329877397e-06, "loss": 0.502, "step": 1346 }, { "epoch": 0.2, "grad_norm": 1.4730761051177979, "learning_rate": 9.241414610423104e-06, "loss": 0.5205, "step": 1347 }, { "epoch": 0.2, "grad_norm": 1.0915255546569824, "learning_rate": 9.240125886933718e-06, "loss": 0.5243, "step": 1348 }, { "epoch": 0.2, "grad_norm": 1.2928593158721924, "learning_rate": 9.23883615971431e-06, "loss": 0.4774, "step": 1349 }, { "epoch": 0.2, "grad_norm": 1.3105278015136719, "learning_rate": 9.237545429070185e-06, "loss": 0.5228, "step": 1350 }, { "epoch": 0.2, "grad_norm": 1.940337061882019, "learning_rate": 9.236253695306888e-06, "loss": 0.5105, "step": 1351 }, { "epoch": 0.2, "grad_norm": 1.4019906520843506, "learning_rate": 9.234960958730199e-06, "loss": 0.557, "step": 1352 }, { "epoch": 0.2, "grad_norm": 1.2202776670455933, "learning_rate": 9.233667219646139e-06, "loss": 0.3752, "step": 1353 }, { "epoch": 0.2, "grad_norm": 1.7862985134124756, "learning_rate": 9.23237247836096e-06, "loss": 0.4363, "step": 1354 }, { "epoch": 0.2, "grad_norm": 1.3797708749771118, "learning_rate": 9.231076735181162e-06, "loss": 0.5225, "step": 1355 }, { "epoch": 0.2, "grad_norm": 1.2592912912368774, "learning_rate": 9.22977999041347e-06, "loss": 0.531, "step": 1356 }, { "epoch": 0.2, "grad_norm": 1.5218796730041504, "learning_rate": 9.228482244364854e-06, "loss": 0.4646, "step": 1357 }, { "epoch": 0.2, "grad_norm": 1.8996858596801758, "learning_rate": 9.227183497342519e-06, "loss": 0.5279, "step": 1358 }, { "epoch": 0.2, "grad_norm": 1.5077890157699585, "learning_rate": 9.225883749653905e-06, "loss": 0.4083, "step": 1359 }, { "epoch": 0.2, "grad_norm": 1.652261734008789, "learning_rate": 9.224583001606691e-06, "loss": 0.4948, "step": 1360 }, { "epoch": 0.2, "grad_norm": 1.4621385335922241, "learning_rate": 9.223281253508796e-06, "loss": 0.4881, "step": 1361 }, { "epoch": 0.2, "grad_norm": 1.9133710861206055, "learning_rate": 9.221978505668365e-06, "loss": 0.5564, "step": 1362 }, { "epoch": 0.2, "grad_norm": 1.407110333442688, "learning_rate": 9.220674758393792e-06, "loss": 0.7772, "step": 1363 }, { "epoch": 0.2, "grad_norm": 1.5948580503463745, "learning_rate": 9.219370011993703e-06, "loss": 0.5407, "step": 1364 }, { "epoch": 0.21, "grad_norm": 1.3786765336990356, "learning_rate": 9.218064266776958e-06, "loss": 0.4741, "step": 1365 }, { "epoch": 0.21, "grad_norm": 2.1102304458618164, "learning_rate": 9.216757523052652e-06, "loss": 0.481, "step": 1366 }, { "epoch": 0.21, "grad_norm": 0.9956469535827637, "learning_rate": 9.215449781130127e-06, "loss": 0.5117, "step": 1367 }, { "epoch": 0.21, "grad_norm": 1.2550601959228516, "learning_rate": 9.214141041318947e-06, "loss": 0.4672, "step": 1368 }, { "epoch": 0.21, "grad_norm": 1.1329981088638306, "learning_rate": 9.212831303928924e-06, "loss": 0.501, "step": 1369 }, { "epoch": 0.21, "grad_norm": 1.1386281251907349, "learning_rate": 9.211520569270098e-06, "loss": 0.5274, "step": 1370 }, { "epoch": 0.21, "grad_norm": 1.257060170173645, "learning_rate": 9.210208837652749e-06, "loss": 0.4476, "step": 1371 }, { "epoch": 0.21, "grad_norm": 1.5168932676315308, "learning_rate": 9.208896109387395e-06, "loss": 0.5308, "step": 1372 }, { "epoch": 0.21, "grad_norm": 1.420587182044983, "learning_rate": 9.207582384784786e-06, "loss": 0.498, "step": 1373 }, { "epoch": 0.21, "grad_norm": 1.295494556427002, "learning_rate": 9.206267664155906e-06, "loss": 0.5401, "step": 1374 }, { "epoch": 0.21, "grad_norm": 1.2664117813110352, "learning_rate": 9.204951947811984e-06, "loss": 0.5471, "step": 1375 }, { "epoch": 0.21, "grad_norm": 1.680850863456726, "learning_rate": 9.203635236064473e-06, "loss": 0.4997, "step": 1376 }, { "epoch": 0.21, "grad_norm": 1.5012105703353882, "learning_rate": 9.20231752922507e-06, "loss": 0.5269, "step": 1377 }, { "epoch": 0.21, "grad_norm": 1.3785831928253174, "learning_rate": 9.200998827605705e-06, "loss": 0.5324, "step": 1378 }, { "epoch": 0.21, "grad_norm": 1.8686798810958862, "learning_rate": 9.199679131518542e-06, "loss": 0.5054, "step": 1379 }, { "epoch": 0.21, "grad_norm": 1.8026667833328247, "learning_rate": 9.198358441275983e-06, "loss": 0.5369, "step": 1380 }, { "epoch": 0.21, "grad_norm": 1.3467440605163574, "learning_rate": 9.197036757190665e-06, "loss": 0.495, "step": 1381 }, { "epoch": 0.21, "grad_norm": 1.315015435218811, "learning_rate": 9.195714079575458e-06, "loss": 0.4438, "step": 1382 }, { "epoch": 0.21, "grad_norm": 1.511101484298706, "learning_rate": 9.194390408743467e-06, "loss": 0.5486, "step": 1383 }, { "epoch": 0.21, "grad_norm": 1.3511011600494385, "learning_rate": 9.193065745008036e-06, "loss": 0.5205, "step": 1384 }, { "epoch": 0.21, "grad_norm": 1.2649646997451782, "learning_rate": 9.191740088682742e-06, "loss": 0.5196, "step": 1385 }, { "epoch": 0.21, "grad_norm": 1.468040108680725, "learning_rate": 9.190413440081395e-06, "loss": 0.4041, "step": 1386 }, { "epoch": 0.21, "grad_norm": 1.7558338642120361, "learning_rate": 9.189085799518043e-06, "loss": 0.5582, "step": 1387 }, { "epoch": 0.21, "grad_norm": 1.9079216718673706, "learning_rate": 9.187757167306967e-06, "loss": 0.464, "step": 1388 }, { "epoch": 0.21, "grad_norm": 1.414036512374878, "learning_rate": 9.186427543762681e-06, "loss": 0.4413, "step": 1389 }, { "epoch": 0.21, "grad_norm": 1.1422568559646606, "learning_rate": 9.18509692919994e-06, "loss": 0.4694, "step": 1390 }, { "epoch": 0.21, "grad_norm": 1.615176796913147, "learning_rate": 9.183765323933727e-06, "loss": 0.4764, "step": 1391 }, { "epoch": 0.21, "grad_norm": 1.2072899341583252, "learning_rate": 9.18243272827926e-06, "loss": 0.4885, "step": 1392 }, { "epoch": 0.21, "grad_norm": 1.5427318811416626, "learning_rate": 9.181099142551998e-06, "loss": 0.5493, "step": 1393 }, { "epoch": 0.21, "grad_norm": 1.3966646194458008, "learning_rate": 9.179764567067624e-06, "loss": 0.4482, "step": 1394 }, { "epoch": 0.21, "grad_norm": 1.747755765914917, "learning_rate": 9.178429002142066e-06, "loss": 0.4664, "step": 1395 }, { "epoch": 0.21, "grad_norm": 1.1826988458633423, "learning_rate": 9.17709244809148e-06, "loss": 0.5185, "step": 1396 }, { "epoch": 0.21, "grad_norm": 1.2410372495651245, "learning_rate": 9.175754905232258e-06, "loss": 0.4228, "step": 1397 }, { "epoch": 0.21, "grad_norm": 1.4162592887878418, "learning_rate": 9.17441637388102e-06, "loss": 0.5296, "step": 1398 }, { "epoch": 0.21, "grad_norm": 1.3091577291488647, "learning_rate": 9.173076854354634e-06, "loss": 0.7691, "step": 1399 }, { "epoch": 0.21, "grad_norm": 1.8163206577301025, "learning_rate": 9.171736346970188e-06, "loss": 0.5567, "step": 1400 }, { "epoch": 0.21, "grad_norm": 1.824869155883789, "learning_rate": 9.17039485204501e-06, "loss": 0.5643, "step": 1401 }, { "epoch": 0.21, "grad_norm": 1.613119125366211, "learning_rate": 9.169052369896663e-06, "loss": 0.4852, "step": 1402 }, { "epoch": 0.21, "grad_norm": 1.5966309309005737, "learning_rate": 9.16770890084294e-06, "loss": 0.566, "step": 1403 }, { "epoch": 0.21, "grad_norm": 1.2169650793075562, "learning_rate": 9.166364445201868e-06, "loss": 0.5479, "step": 1404 }, { "epoch": 0.21, "grad_norm": 8.085444450378418, "learning_rate": 9.165019003291712e-06, "loss": 0.4727, "step": 1405 }, { "epoch": 0.21, "grad_norm": 1.551830768585205, "learning_rate": 9.163672575430965e-06, "loss": 0.4982, "step": 1406 }, { "epoch": 0.21, "grad_norm": 1.2466245889663696, "learning_rate": 9.162325161938356e-06, "loss": 0.5024, "step": 1407 }, { "epoch": 0.21, "grad_norm": 1.9863706827163696, "learning_rate": 9.16097676313285e-06, "loss": 0.5466, "step": 1408 }, { "epoch": 0.21, "grad_norm": 1.4476068019866943, "learning_rate": 9.15962737933364e-06, "loss": 0.4867, "step": 1409 }, { "epoch": 0.21, "grad_norm": 1.8685214519500732, "learning_rate": 9.158277010860152e-06, "loss": 0.5115, "step": 1410 }, { "epoch": 0.21, "grad_norm": 1.4079750776290894, "learning_rate": 9.156925658032052e-06, "loss": 0.5162, "step": 1411 }, { "epoch": 0.21, "grad_norm": 1.2916966676712036, "learning_rate": 9.155573321169234e-06, "loss": 0.5534, "step": 1412 }, { "epoch": 0.21, "grad_norm": 6.001343250274658, "learning_rate": 9.154220000591821e-06, "loss": 0.5254, "step": 1413 }, { "epoch": 0.21, "grad_norm": 1.5755019187927246, "learning_rate": 9.152865696620178e-06, "loss": 0.516, "step": 1414 }, { "epoch": 0.21, "grad_norm": 1.3150081634521484, "learning_rate": 9.151510409574897e-06, "loss": 0.3969, "step": 1415 }, { "epoch": 0.21, "grad_norm": 1.3972889184951782, "learning_rate": 9.150154139776806e-06, "loss": 0.4023, "step": 1416 }, { "epoch": 0.21, "grad_norm": 1.2097139358520508, "learning_rate": 9.148796887546959e-06, "loss": 0.702, "step": 1417 }, { "epoch": 0.21, "grad_norm": 1.5896549224853516, "learning_rate": 9.14743865320665e-06, "loss": 0.5476, "step": 1418 }, { "epoch": 0.21, "grad_norm": 1.2264232635498047, "learning_rate": 9.146079437077401e-06, "loss": 0.4555, "step": 1419 }, { "epoch": 0.21, "grad_norm": 1.3413817882537842, "learning_rate": 9.144719239480972e-06, "loss": 0.492, "step": 1420 }, { "epoch": 0.21, "grad_norm": 1.2424649000167847, "learning_rate": 9.143358060739347e-06, "loss": 0.4216, "step": 1421 }, { "epoch": 0.21, "grad_norm": 1.212012767791748, "learning_rate": 9.141995901174749e-06, "loss": 0.4632, "step": 1422 }, { "epoch": 0.21, "grad_norm": 1.4117058515548706, "learning_rate": 9.14063276110963e-06, "loss": 0.582, "step": 1423 }, { "epoch": 0.21, "grad_norm": 1.203729510307312, "learning_rate": 9.139268640866673e-06, "loss": 0.5246, "step": 1424 }, { "epoch": 0.21, "grad_norm": 1.5801626443862915, "learning_rate": 9.137903540768798e-06, "loss": 0.3934, "step": 1425 }, { "epoch": 0.21, "grad_norm": 1.9516838788986206, "learning_rate": 9.13653746113915e-06, "loss": 0.5738, "step": 1426 }, { "epoch": 0.21, "grad_norm": 2.207852840423584, "learning_rate": 9.135170402301117e-06, "loss": 0.4891, "step": 1427 }, { "epoch": 0.21, "grad_norm": 2.2673068046569824, "learning_rate": 9.133802364578304e-06, "loss": 0.3717, "step": 1428 }, { "epoch": 0.21, "grad_norm": 1.5565625429153442, "learning_rate": 9.132433348294559e-06, "loss": 0.4972, "step": 1429 }, { "epoch": 0.21, "grad_norm": 1.3802217245101929, "learning_rate": 9.131063353773956e-06, "loss": 0.4976, "step": 1430 }, { "epoch": 0.21, "grad_norm": 1.6573764085769653, "learning_rate": 9.129692381340807e-06, "loss": 0.5211, "step": 1431 }, { "epoch": 0.22, "grad_norm": 1.3873991966247559, "learning_rate": 9.128320431319644e-06, "loss": 0.762, "step": 1432 }, { "epoch": 0.22, "grad_norm": 1.6951541900634766, "learning_rate": 9.126947504035244e-06, "loss": 0.6044, "step": 1433 }, { "epoch": 0.22, "grad_norm": 0.9741880893707275, "learning_rate": 9.125573599812604e-06, "loss": 0.4519, "step": 1434 }, { "epoch": 0.22, "grad_norm": 1.4493855237960815, "learning_rate": 9.124198718976959e-06, "loss": 0.467, "step": 1435 }, { "epoch": 0.22, "grad_norm": 1.883093237876892, "learning_rate": 9.122822861853774e-06, "loss": 0.5384, "step": 1436 }, { "epoch": 0.22, "grad_norm": 1.4158921241760254, "learning_rate": 9.121446028768744e-06, "loss": 0.4714, "step": 1437 }, { "epoch": 0.22, "grad_norm": 1.9915369749069214, "learning_rate": 9.120068220047793e-06, "loss": 0.5238, "step": 1438 }, { "epoch": 0.22, "grad_norm": 1.721173644065857, "learning_rate": 9.11868943601708e-06, "loss": 0.573, "step": 1439 }, { "epoch": 0.22, "grad_norm": 1.4377111196517944, "learning_rate": 9.117309677002994e-06, "loss": 0.5056, "step": 1440 }, { "epoch": 0.22, "grad_norm": 1.8448742628097534, "learning_rate": 9.115928943332152e-06, "loss": 0.5586, "step": 1441 }, { "epoch": 0.22, "grad_norm": 1.9944732189178467, "learning_rate": 9.114547235331405e-06, "loss": 0.5255, "step": 1442 }, { "epoch": 0.22, "grad_norm": 1.3214634656906128, "learning_rate": 9.113164553327835e-06, "loss": 0.4753, "step": 1443 }, { "epoch": 0.22, "grad_norm": 1.8402554988861084, "learning_rate": 9.11178089764875e-06, "loss": 0.5461, "step": 1444 }, { "epoch": 0.22, "grad_norm": 2.358320713043213, "learning_rate": 9.110396268621692e-06, "loss": 0.4769, "step": 1445 }, { "epoch": 0.22, "grad_norm": 1.2255218029022217, "learning_rate": 9.109010666574434e-06, "loss": 0.5129, "step": 1446 }, { "epoch": 0.22, "grad_norm": 1.2574726343154907, "learning_rate": 9.107624091834976e-06, "loss": 0.5243, "step": 1447 }, { "epoch": 0.22, "grad_norm": 1.1801012754440308, "learning_rate": 9.106236544731553e-06, "loss": 0.4509, "step": 1448 }, { "epoch": 0.22, "grad_norm": 1.771304726600647, "learning_rate": 9.104848025592627e-06, "loss": 0.5541, "step": 1449 }, { "epoch": 0.22, "grad_norm": 1.22213876247406, "learning_rate": 9.103458534746889e-06, "loss": 0.5334, "step": 1450 }, { "epoch": 0.22, "grad_norm": 1.5363541841506958, "learning_rate": 9.102068072523264e-06, "loss": 0.4589, "step": 1451 }, { "epoch": 0.22, "grad_norm": 1.4193543195724487, "learning_rate": 9.100676639250903e-06, "loss": 0.5408, "step": 1452 }, { "epoch": 0.22, "grad_norm": 1.922810673713684, "learning_rate": 9.099284235259189e-06, "loss": 0.5513, "step": 1453 }, { "epoch": 0.22, "grad_norm": 1.2797250747680664, "learning_rate": 9.097890860877732e-06, "loss": 0.5631, "step": 1454 }, { "epoch": 0.22, "grad_norm": 1.2629057168960571, "learning_rate": 9.09649651643638e-06, "loss": 0.5381, "step": 1455 }, { "epoch": 0.22, "grad_norm": 1.4896997213363647, "learning_rate": 9.095101202265198e-06, "loss": 0.5068, "step": 1456 }, { "epoch": 0.22, "grad_norm": 1.2805030345916748, "learning_rate": 9.093704918694491e-06, "loss": 0.516, "step": 1457 }, { "epoch": 0.22, "grad_norm": 1.3690824508666992, "learning_rate": 9.09230766605479e-06, "loss": 0.5196, "step": 1458 }, { "epoch": 0.22, "grad_norm": 1.5626314878463745, "learning_rate": 9.090909444676852e-06, "loss": 0.5148, "step": 1459 }, { "epoch": 0.22, "grad_norm": 1.4239894151687622, "learning_rate": 9.08951025489167e-06, "loss": 0.4972, "step": 1460 }, { "epoch": 0.22, "grad_norm": 1.0845838785171509, "learning_rate": 9.088110097030461e-06, "loss": 0.4953, "step": 1461 }, { "epoch": 0.22, "grad_norm": 1.2264304161071777, "learning_rate": 9.086708971424671e-06, "loss": 0.4702, "step": 1462 }, { "epoch": 0.22, "grad_norm": 1.5998855829238892, "learning_rate": 9.085306878405981e-06, "loss": 0.4192, "step": 1463 }, { "epoch": 0.22, "grad_norm": 1.3591853380203247, "learning_rate": 9.083903818306294e-06, "loss": 0.4439, "step": 1464 }, { "epoch": 0.22, "grad_norm": 1.1679404973983765, "learning_rate": 9.082499791457744e-06, "loss": 0.4796, "step": 1465 }, { "epoch": 0.22, "grad_norm": 2.609133005142212, "learning_rate": 9.081094798192698e-06, "loss": 0.4777, "step": 1466 }, { "epoch": 0.22, "grad_norm": 1.7777076959609985, "learning_rate": 9.079688838843746e-06, "loss": 0.5494, "step": 1467 }, { "epoch": 0.22, "grad_norm": 1.4202969074249268, "learning_rate": 9.07828191374371e-06, "loss": 0.5509, "step": 1468 }, { "epoch": 0.22, "grad_norm": 1.3699451684951782, "learning_rate": 9.076874023225638e-06, "loss": 0.4984, "step": 1469 }, { "epoch": 0.22, "grad_norm": 1.5167728662490845, "learning_rate": 9.075465167622812e-06, "loss": 0.455, "step": 1470 }, { "epoch": 0.22, "grad_norm": 1.5176506042480469, "learning_rate": 9.074055347268735e-06, "loss": 0.5085, "step": 1471 }, { "epoch": 0.22, "grad_norm": 1.2663694620132446, "learning_rate": 9.072644562497145e-06, "loss": 0.7456, "step": 1472 }, { "epoch": 0.22, "grad_norm": 1.5289884805679321, "learning_rate": 9.071232813642005e-06, "loss": 0.431, "step": 1473 }, { "epoch": 0.22, "grad_norm": 1.3170506954193115, "learning_rate": 9.069820101037506e-06, "loss": 0.469, "step": 1474 }, { "epoch": 0.22, "grad_norm": 1.3868224620819092, "learning_rate": 9.068406425018066e-06, "loss": 0.4751, "step": 1475 }, { "epoch": 0.22, "grad_norm": 1.3275597095489502, "learning_rate": 9.066991785918334e-06, "loss": 0.7999, "step": 1476 }, { "epoch": 0.22, "grad_norm": 1.4730627536773682, "learning_rate": 9.065576184073188e-06, "loss": 0.421, "step": 1477 }, { "epoch": 0.22, "grad_norm": 3.0905580520629883, "learning_rate": 9.06415961981773e-06, "loss": 0.4615, "step": 1478 }, { "epoch": 0.22, "grad_norm": 3.3525044918060303, "learning_rate": 9.062742093487291e-06, "loss": 0.5189, "step": 1479 }, { "epoch": 0.22, "grad_norm": 1.6397210359573364, "learning_rate": 9.061323605417431e-06, "loss": 0.5048, "step": 1480 }, { "epoch": 0.22, "grad_norm": 1.7136646509170532, "learning_rate": 9.059904155943938e-06, "loss": 0.5349, "step": 1481 }, { "epoch": 0.22, "grad_norm": 1.1565234661102295, "learning_rate": 9.058483745402826e-06, "loss": 0.4944, "step": 1482 }, { "epoch": 0.22, "grad_norm": 1.6962825059890747, "learning_rate": 9.057062374130334e-06, "loss": 0.5046, "step": 1483 }, { "epoch": 0.22, "grad_norm": 1.5992798805236816, "learning_rate": 9.055640042462937e-06, "loss": 0.5351, "step": 1484 }, { "epoch": 0.22, "grad_norm": 1.4304951429367065, "learning_rate": 9.054216750737328e-06, "loss": 0.4699, "step": 1485 }, { "epoch": 0.22, "grad_norm": 1.179754614830017, "learning_rate": 9.052792499290432e-06, "loss": 0.502, "step": 1486 }, { "epoch": 0.22, "grad_norm": 1.253466248512268, "learning_rate": 9.0513672884594e-06, "loss": 0.5399, "step": 1487 }, { "epoch": 0.22, "grad_norm": 1.8464590311050415, "learning_rate": 9.04994111858161e-06, "loss": 0.5413, "step": 1488 }, { "epoch": 0.22, "grad_norm": 1.2213335037231445, "learning_rate": 9.04851398999467e-06, "loss": 0.4941, "step": 1489 }, { "epoch": 0.22, "grad_norm": 1.3137873411178589, "learning_rate": 9.04708590303641e-06, "loss": 0.4811, "step": 1490 }, { "epoch": 0.22, "grad_norm": 2.0453479290008545, "learning_rate": 9.04565685804489e-06, "loss": 0.474, "step": 1491 }, { "epoch": 0.22, "grad_norm": 1.4711477756500244, "learning_rate": 9.044226855358395e-06, "loss": 0.5417, "step": 1492 }, { "epoch": 0.22, "grad_norm": 1.364737629890442, "learning_rate": 9.04279589531544e-06, "loss": 0.5468, "step": 1493 }, { "epoch": 0.22, "grad_norm": 1.7261228561401367, "learning_rate": 9.041363978254763e-06, "loss": 0.505, "step": 1494 }, { "epoch": 0.22, "grad_norm": 1.1853407621383667, "learning_rate": 9.039931104515329e-06, "loss": 0.5024, "step": 1495 }, { "epoch": 0.22, "grad_norm": 1.405864953994751, "learning_rate": 9.038497274436332e-06, "loss": 0.4447, "step": 1496 }, { "epoch": 0.22, "grad_norm": 1.3793002367019653, "learning_rate": 9.03706248835719e-06, "loss": 0.4367, "step": 1497 }, { "epoch": 0.23, "grad_norm": 1.7423254251480103, "learning_rate": 9.035626746617547e-06, "loss": 0.5472, "step": 1498 }, { "epoch": 0.23, "grad_norm": 1.3293627500534058, "learning_rate": 9.034190049557277e-06, "loss": 0.5233, "step": 1499 }, { "epoch": 0.23, "grad_norm": 1.1597017049789429, "learning_rate": 9.032752397516475e-06, "loss": 0.7209, "step": 1500 }, { "epoch": 0.23, "grad_norm": 1.176792860031128, "learning_rate": 9.031313790835466e-06, "loss": 0.7556, "step": 1501 }, { "epoch": 0.23, "grad_norm": 1.0905896425247192, "learning_rate": 9.029874229854796e-06, "loss": 0.4304, "step": 1502 }, { "epoch": 0.23, "grad_norm": 1.2614291906356812, "learning_rate": 9.028433714915247e-06, "loss": 0.493, "step": 1503 }, { "epoch": 0.23, "grad_norm": 1.4750750064849854, "learning_rate": 9.026992246357815e-06, "loss": 0.495, "step": 1504 }, { "epoch": 0.23, "grad_norm": 1.2208752632141113, "learning_rate": 9.025549824523729e-06, "loss": 0.5107, "step": 1505 }, { "epoch": 0.23, "grad_norm": 1.4027036428451538, "learning_rate": 9.02410644975444e-06, "loss": 0.4752, "step": 1506 }, { "epoch": 0.23, "grad_norm": 1.6230242252349854, "learning_rate": 9.022662122391628e-06, "loss": 0.5344, "step": 1507 }, { "epoch": 0.23, "grad_norm": 1.6272026300430298, "learning_rate": 9.021216842777196e-06, "loss": 0.5768, "step": 1508 }, { "epoch": 0.23, "grad_norm": 1.3401442766189575, "learning_rate": 9.019770611253272e-06, "loss": 0.4776, "step": 1509 }, { "epoch": 0.23, "grad_norm": 1.3829748630523682, "learning_rate": 9.018323428162211e-06, "loss": 0.4826, "step": 1510 }, { "epoch": 0.23, "grad_norm": 1.4779058694839478, "learning_rate": 9.016875293846597e-06, "loss": 0.5802, "step": 1511 }, { "epoch": 0.23, "grad_norm": 1.4576680660247803, "learning_rate": 9.015426208649227e-06, "loss": 0.6092, "step": 1512 }, { "epoch": 0.23, "grad_norm": 1.3589873313903809, "learning_rate": 9.013976172913136e-06, "loss": 0.4545, "step": 1513 }, { "epoch": 0.23, "grad_norm": 1.1501303911209106, "learning_rate": 9.01252518698158e-06, "loss": 0.5768, "step": 1514 }, { "epoch": 0.23, "grad_norm": 1.3729209899902344, "learning_rate": 9.011073251198038e-06, "loss": 0.4805, "step": 1515 }, { "epoch": 0.23, "grad_norm": 1.663916826248169, "learning_rate": 9.009620365906211e-06, "loss": 0.4799, "step": 1516 }, { "epoch": 0.23, "grad_norm": 1.1153957843780518, "learning_rate": 9.008166531450031e-06, "loss": 0.4631, "step": 1517 }, { "epoch": 0.23, "grad_norm": 1.3980190753936768, "learning_rate": 9.006711748173655e-06, "loss": 0.5846, "step": 1518 }, { "epoch": 0.23, "grad_norm": 1.1886534690856934, "learning_rate": 9.005256016421458e-06, "loss": 0.4632, "step": 1519 }, { "epoch": 0.23, "grad_norm": 1.132794976234436, "learning_rate": 9.003799336538046e-06, "loss": 0.4376, "step": 1520 }, { "epoch": 0.23, "grad_norm": 1.3086563348770142, "learning_rate": 9.002341708868245e-06, "loss": 0.5059, "step": 1521 }, { "epoch": 0.23, "grad_norm": 1.0948108434677124, "learning_rate": 9.000883133757108e-06, "loss": 0.4724, "step": 1522 }, { "epoch": 0.23, "grad_norm": 1.2592203617095947, "learning_rate": 8.99942361154991e-06, "loss": 0.4847, "step": 1523 }, { "epoch": 0.23, "grad_norm": 1.323347568511963, "learning_rate": 8.997963142592154e-06, "loss": 0.4946, "step": 1524 }, { "epoch": 0.23, "grad_norm": 1.2209198474884033, "learning_rate": 8.996501727229565e-06, "loss": 0.4785, "step": 1525 }, { "epoch": 0.23, "grad_norm": 1.2434808015823364, "learning_rate": 8.99503936580809e-06, "loss": 0.5325, "step": 1526 }, { "epoch": 0.23, "grad_norm": 1.6342787742614746, "learning_rate": 8.9935760586739e-06, "loss": 0.521, "step": 1527 }, { "epoch": 0.23, "grad_norm": 1.1825730800628662, "learning_rate": 8.992111806173396e-06, "loss": 0.4924, "step": 1528 }, { "epoch": 0.23, "grad_norm": 1.271693229675293, "learning_rate": 8.990646608653196e-06, "loss": 0.5287, "step": 1529 }, { "epoch": 0.23, "grad_norm": 1.629255771636963, "learning_rate": 8.989180466460143e-06, "loss": 0.4289, "step": 1530 }, { "epoch": 0.23, "grad_norm": 1.406409502029419, "learning_rate": 8.987713379941307e-06, "loss": 0.5511, "step": 1531 }, { "epoch": 0.23, "grad_norm": 1.1274566650390625, "learning_rate": 8.98624534944398e-06, "loss": 0.4786, "step": 1532 }, { "epoch": 0.23, "grad_norm": 1.322790503501892, "learning_rate": 8.984776375315674e-06, "loss": 0.4587, "step": 1533 }, { "epoch": 0.23, "grad_norm": 1.3124428987503052, "learning_rate": 8.983306457904128e-06, "loss": 0.5573, "step": 1534 }, { "epoch": 0.23, "grad_norm": 1.169740915298462, "learning_rate": 8.981835597557304e-06, "loss": 0.4858, "step": 1535 }, { "epoch": 0.23, "grad_norm": 1.1487231254577637, "learning_rate": 8.980363794623385e-06, "loss": 0.5556, "step": 1536 }, { "epoch": 0.23, "grad_norm": 1.1511965990066528, "learning_rate": 8.978891049450783e-06, "loss": 0.5035, "step": 1537 }, { "epoch": 0.23, "grad_norm": 1.263709306716919, "learning_rate": 8.977417362388125e-06, "loss": 0.5063, "step": 1538 }, { "epoch": 0.23, "grad_norm": 1.145020842552185, "learning_rate": 8.975942733784266e-06, "loss": 0.4598, "step": 1539 }, { "epoch": 0.23, "grad_norm": 1.6309690475463867, "learning_rate": 8.97446716398828e-06, "loss": 0.5967, "step": 1540 }, { "epoch": 0.23, "grad_norm": 1.172707200050354, "learning_rate": 8.972990653349472e-06, "loss": 0.4645, "step": 1541 }, { "epoch": 0.23, "grad_norm": 1.2477456331253052, "learning_rate": 8.971513202217359e-06, "loss": 0.4493, "step": 1542 }, { "epoch": 0.23, "grad_norm": 1.2188640832901, "learning_rate": 8.970034810941688e-06, "loss": 0.4385, "step": 1543 }, { "epoch": 0.23, "grad_norm": 1.3819398880004883, "learning_rate": 8.968555479872428e-06, "loss": 0.4387, "step": 1544 }, { "epoch": 0.23, "grad_norm": 1.0953062772750854, "learning_rate": 8.967075209359766e-06, "loss": 0.4225, "step": 1545 }, { "epoch": 0.23, "grad_norm": 1.51253080368042, "learning_rate": 8.965593999754116e-06, "loss": 0.487, "step": 1546 }, { "epoch": 0.23, "grad_norm": 1.4766221046447754, "learning_rate": 8.96411185140611e-06, "loss": 0.4743, "step": 1547 }, { "epoch": 0.23, "grad_norm": 1.2382136583328247, "learning_rate": 8.962628764666609e-06, "loss": 0.4488, "step": 1548 }, { "epoch": 0.23, "grad_norm": 1.2977021932601929, "learning_rate": 8.961144739886688e-06, "loss": 0.542, "step": 1549 }, { "epoch": 0.23, "grad_norm": 1.450998306274414, "learning_rate": 8.95965977741765e-06, "loss": 0.4925, "step": 1550 }, { "epoch": 0.23, "grad_norm": 1.464455246925354, "learning_rate": 8.958173877611017e-06, "loss": 0.5056, "step": 1551 }, { "epoch": 0.23, "grad_norm": 1.343989372253418, "learning_rate": 8.956687040818534e-06, "loss": 0.5152, "step": 1552 }, { "epoch": 0.23, "grad_norm": 1.1734529733657837, "learning_rate": 8.955199267392168e-06, "loss": 0.5233, "step": 1553 }, { "epoch": 0.23, "grad_norm": 1.2089344263076782, "learning_rate": 8.953710557684108e-06, "loss": 0.4738, "step": 1554 }, { "epoch": 0.23, "grad_norm": 2.0730361938476562, "learning_rate": 8.952220912046763e-06, "loss": 0.4497, "step": 1555 }, { "epoch": 0.23, "grad_norm": 1.2555333375930786, "learning_rate": 8.950730330832762e-06, "loss": 0.549, "step": 1556 }, { "epoch": 0.23, "grad_norm": 1.2275817394256592, "learning_rate": 8.949238814394963e-06, "loss": 0.4767, "step": 1557 }, { "epoch": 0.23, "grad_norm": 1.6300101280212402, "learning_rate": 8.947746363086438e-06, "loss": 0.535, "step": 1558 }, { "epoch": 0.23, "grad_norm": 1.033401608467102, "learning_rate": 8.946252977260481e-06, "loss": 0.472, "step": 1559 }, { "epoch": 0.23, "grad_norm": 1.2731515169143677, "learning_rate": 8.944758657270614e-06, "loss": 0.4515, "step": 1560 }, { "epoch": 0.23, "grad_norm": 1.2072734832763672, "learning_rate": 8.943263403470568e-06, "loss": 0.7559, "step": 1561 }, { "epoch": 0.23, "grad_norm": 1.321234941482544, "learning_rate": 8.941767216214307e-06, "loss": 0.494, "step": 1562 }, { "epoch": 0.23, "grad_norm": 1.1344738006591797, "learning_rate": 8.94027009585601e-06, "loss": 0.437, "step": 1563 }, { "epoch": 0.23, "grad_norm": 2.7256853580474854, "learning_rate": 8.938772042750078e-06, "loss": 0.516, "step": 1564 }, { "epoch": 0.24, "grad_norm": 1.1759955883026123, "learning_rate": 8.937273057251133e-06, "loss": 0.4805, "step": 1565 }, { "epoch": 0.24, "grad_norm": 1.281517505645752, "learning_rate": 8.935773139714015e-06, "loss": 0.5228, "step": 1566 }, { "epoch": 0.24, "grad_norm": 1.3872240781784058, "learning_rate": 8.93427229049379e-06, "loss": 0.5227, "step": 1567 }, { "epoch": 0.24, "grad_norm": 1.4572972059249878, "learning_rate": 8.93277050994574e-06, "loss": 0.586, "step": 1568 }, { "epoch": 0.24, "grad_norm": 1.2820254564285278, "learning_rate": 8.931267798425372e-06, "loss": 0.5329, "step": 1569 }, { "epoch": 0.24, "grad_norm": 1.1274217367172241, "learning_rate": 8.929764156288406e-06, "loss": 0.4945, "step": 1570 }, { "epoch": 0.24, "grad_norm": 1.3592357635498047, "learning_rate": 8.92825958389079e-06, "loss": 0.4679, "step": 1571 }, { "epoch": 0.24, "grad_norm": 1.6889357566833496, "learning_rate": 8.926754081588688e-06, "loss": 0.5515, "step": 1572 }, { "epoch": 0.24, "grad_norm": 1.310085415840149, "learning_rate": 8.925247649738487e-06, "loss": 0.5641, "step": 1573 }, { "epoch": 0.24, "grad_norm": 1.4057384729385376, "learning_rate": 8.923740288696789e-06, "loss": 0.4974, "step": 1574 }, { "epoch": 0.24, "grad_norm": 0.9802756905555725, "learning_rate": 8.92223199882042e-06, "loss": 0.498, "step": 1575 }, { "epoch": 0.24, "grad_norm": 1.6362472772598267, "learning_rate": 8.920722780466426e-06, "loss": 0.4616, "step": 1576 }, { "epoch": 0.24, "grad_norm": 1.7217309474945068, "learning_rate": 8.919212633992071e-06, "loss": 0.5601, "step": 1577 }, { "epoch": 0.24, "grad_norm": 3.0508744716644287, "learning_rate": 8.917701559754842e-06, "loss": 0.4678, "step": 1578 }, { "epoch": 0.24, "grad_norm": 1.2764782905578613, "learning_rate": 8.916189558112441e-06, "loss": 0.5774, "step": 1579 }, { "epoch": 0.24, "grad_norm": 1.5062477588653564, "learning_rate": 8.914676629422789e-06, "loss": 0.5082, "step": 1580 }, { "epoch": 0.24, "grad_norm": 1.1655126810073853, "learning_rate": 8.913162774044034e-06, "loss": 0.5247, "step": 1581 }, { "epoch": 0.24, "grad_norm": 1.2395036220550537, "learning_rate": 8.911647992334536e-06, "loss": 0.53, "step": 1582 }, { "epoch": 0.24, "grad_norm": 1.0939944982528687, "learning_rate": 8.910132284652876e-06, "loss": 0.4756, "step": 1583 }, { "epoch": 0.24, "grad_norm": 1.3615303039550781, "learning_rate": 8.908615651357858e-06, "loss": 0.5366, "step": 1584 }, { "epoch": 0.24, "grad_norm": 2.0214338302612305, "learning_rate": 8.9070980928085e-06, "loss": 0.468, "step": 1585 }, { "epoch": 0.24, "grad_norm": 0.9926596283912659, "learning_rate": 8.905579609364041e-06, "loss": 0.4863, "step": 1586 }, { "epoch": 0.24, "grad_norm": 1.951778531074524, "learning_rate": 8.904060201383939e-06, "loss": 0.5584, "step": 1587 }, { "epoch": 0.24, "grad_norm": 1.3072185516357422, "learning_rate": 8.90253986922787e-06, "loss": 0.5023, "step": 1588 }, { "epoch": 0.24, "grad_norm": 1.0316944122314453, "learning_rate": 8.901018613255734e-06, "loss": 0.4647, "step": 1589 }, { "epoch": 0.24, "grad_norm": 1.2968617677688599, "learning_rate": 8.899496433827642e-06, "loss": 0.4517, "step": 1590 }, { "epoch": 0.24, "grad_norm": 1.1546916961669922, "learning_rate": 8.897973331303927e-06, "loss": 0.4798, "step": 1591 }, { "epoch": 0.24, "grad_norm": 1.1318193674087524, "learning_rate": 8.89644930604514e-06, "loss": 0.5235, "step": 1592 }, { "epoch": 0.24, "grad_norm": 1.0957199335098267, "learning_rate": 8.894924358412052e-06, "loss": 0.5055, "step": 1593 }, { "epoch": 0.24, "grad_norm": 1.8878700733184814, "learning_rate": 8.89339848876565e-06, "loss": 0.4782, "step": 1594 }, { "epoch": 0.24, "grad_norm": 1.3746544122695923, "learning_rate": 8.891871697467143e-06, "loss": 0.4951, "step": 1595 }, { "epoch": 0.24, "grad_norm": 1.0943771600723267, "learning_rate": 8.890343984877954e-06, "loss": 0.4556, "step": 1596 }, { "epoch": 0.24, "grad_norm": 1.0642361640930176, "learning_rate": 8.888815351359727e-06, "loss": 0.4188, "step": 1597 }, { "epoch": 0.24, "grad_norm": 1.4579145908355713, "learning_rate": 8.887285797274321e-06, "loss": 0.5269, "step": 1598 }, { "epoch": 0.24, "grad_norm": 1.7712332010269165, "learning_rate": 8.885755322983816e-06, "loss": 0.5158, "step": 1599 }, { "epoch": 0.24, "grad_norm": 1.4247585535049438, "learning_rate": 8.884223928850509e-06, "loss": 0.6188, "step": 1600 }, { "epoch": 0.24, "grad_norm": 1.2528126239776611, "learning_rate": 8.882691615236912e-06, "loss": 0.4965, "step": 1601 }, { "epoch": 0.24, "grad_norm": 1.3131414651870728, "learning_rate": 8.88115838250576e-06, "loss": 0.5164, "step": 1602 }, { "epoch": 0.24, "grad_norm": 1.2317723035812378, "learning_rate": 8.87962423102e-06, "loss": 0.53, "step": 1603 }, { "epoch": 0.24, "grad_norm": 1.3225411176681519, "learning_rate": 8.8780891611428e-06, "loss": 0.4498, "step": 1604 }, { "epoch": 0.24, "grad_norm": 1.2516804933547974, "learning_rate": 8.876553173237544e-06, "loss": 0.4869, "step": 1605 }, { "epoch": 0.24, "grad_norm": 1.3150806427001953, "learning_rate": 8.875016267667833e-06, "loss": 0.4835, "step": 1606 }, { "epoch": 0.24, "grad_norm": 1.2279926538467407, "learning_rate": 8.873478444797489e-06, "loss": 0.5072, "step": 1607 }, { "epoch": 0.24, "grad_norm": 1.1826846599578857, "learning_rate": 8.871939704990547e-06, "loss": 0.747, "step": 1608 }, { "epoch": 0.24, "grad_norm": 1.6216799020767212, "learning_rate": 8.870400048611259e-06, "loss": 0.5356, "step": 1609 }, { "epoch": 0.24, "grad_norm": 1.123616099357605, "learning_rate": 8.868859476024095e-06, "loss": 0.4932, "step": 1610 }, { "epoch": 0.24, "grad_norm": 1.2493823766708374, "learning_rate": 8.867317987593744e-06, "loss": 0.4754, "step": 1611 }, { "epoch": 0.24, "grad_norm": 1.2676923274993896, "learning_rate": 8.865775583685107e-06, "loss": 0.4785, "step": 1612 }, { "epoch": 0.24, "grad_norm": 1.377173900604248, "learning_rate": 8.864232264663306e-06, "loss": 0.5366, "step": 1613 }, { "epoch": 0.24, "grad_norm": 1.3904030323028564, "learning_rate": 8.86268803089368e-06, "loss": 0.5057, "step": 1614 }, { "epoch": 0.24, "grad_norm": 1.3298366069793701, "learning_rate": 8.86114288274178e-06, "loss": 0.4936, "step": 1615 }, { "epoch": 0.24, "grad_norm": 0.9336588978767395, "learning_rate": 8.859596820573378e-06, "loss": 0.4968, "step": 1616 }, { "epoch": 0.24, "grad_norm": 1.3151936531066895, "learning_rate": 8.858049844754457e-06, "loss": 0.4819, "step": 1617 }, { "epoch": 0.24, "grad_norm": 1.0574445724487305, "learning_rate": 8.856501955651225e-06, "loss": 0.6883, "step": 1618 }, { "epoch": 0.24, "grad_norm": 1.2610663175582886, "learning_rate": 8.854953153630097e-06, "loss": 0.5513, "step": 1619 }, { "epoch": 0.24, "grad_norm": 1.187889575958252, "learning_rate": 8.853403439057711e-06, "loss": 0.465, "step": 1620 }, { "epoch": 0.24, "grad_norm": 1.0730599164962769, "learning_rate": 8.851852812300915e-06, "loss": 0.5026, "step": 1621 }, { "epoch": 0.24, "grad_norm": 1.1953935623168945, "learning_rate": 8.850301273726781e-06, "loss": 0.4767, "step": 1622 }, { "epoch": 0.24, "grad_norm": 1.5309538841247559, "learning_rate": 8.848748823702587e-06, "loss": 0.5296, "step": 1623 }, { "epoch": 0.24, "grad_norm": 1.2406142950057983, "learning_rate": 8.847195462595833e-06, "loss": 0.3941, "step": 1624 }, { "epoch": 0.24, "grad_norm": 1.0086910724639893, "learning_rate": 8.845641190774232e-06, "loss": 0.4535, "step": 1625 }, { "epoch": 0.24, "grad_norm": 1.2979360818862915, "learning_rate": 8.844086008605718e-06, "loss": 0.5389, "step": 1626 }, { "epoch": 0.24, "grad_norm": 1.400368571281433, "learning_rate": 8.842529916458436e-06, "loss": 0.57, "step": 1627 }, { "epoch": 0.24, "grad_norm": 2.010911703109741, "learning_rate": 8.840972914700743e-06, "loss": 0.5396, "step": 1628 }, { "epoch": 0.24, "grad_norm": 1.5906423330307007, "learning_rate": 8.839415003701217e-06, "loss": 0.4404, "step": 1629 }, { "epoch": 0.24, "grad_norm": 1.2552416324615479, "learning_rate": 8.83785618382865e-06, "loss": 0.4468, "step": 1630 }, { "epoch": 0.25, "grad_norm": 1.9371377229690552, "learning_rate": 8.83629645545205e-06, "loss": 0.53, "step": 1631 }, { "epoch": 0.25, "grad_norm": 1.3290977478027344, "learning_rate": 8.834735818940635e-06, "loss": 0.5582, "step": 1632 }, { "epoch": 0.25, "grad_norm": 1.2685633897781372, "learning_rate": 8.833174274663844e-06, "loss": 0.5059, "step": 1633 }, { "epoch": 0.25, "grad_norm": 1.1407675743103027, "learning_rate": 8.831611822991329e-06, "loss": 0.5373, "step": 1634 }, { "epoch": 0.25, "grad_norm": 2.3898067474365234, "learning_rate": 8.830048464292955e-06, "loss": 0.5284, "step": 1635 }, { "epoch": 0.25, "grad_norm": 1.3072093725204468, "learning_rate": 8.828484198938801e-06, "loss": 0.4654, "step": 1636 }, { "epoch": 0.25, "grad_norm": 1.6623979806900024, "learning_rate": 8.826919027299168e-06, "loss": 0.4343, "step": 1637 }, { "epoch": 0.25, "grad_norm": 1.672965407371521, "learning_rate": 8.825352949744561e-06, "loss": 0.4863, "step": 1638 }, { "epoch": 0.25, "grad_norm": 2.0410172939300537, "learning_rate": 8.823785966645708e-06, "loss": 0.4946, "step": 1639 }, { "epoch": 0.25, "grad_norm": 1.253017783164978, "learning_rate": 8.822218078373545e-06, "loss": 0.7457, "step": 1640 }, { "epoch": 0.25, "grad_norm": 1.1323047876358032, "learning_rate": 8.82064928529923e-06, "loss": 0.4709, "step": 1641 }, { "epoch": 0.25, "grad_norm": 1.2584056854248047, "learning_rate": 8.819079587794125e-06, "loss": 0.7958, "step": 1642 }, { "epoch": 0.25, "grad_norm": 1.1814364194869995, "learning_rate": 8.817508986229815e-06, "loss": 0.5686, "step": 1643 }, { "epoch": 0.25, "grad_norm": 1.306287169456482, "learning_rate": 8.815937480978094e-06, "loss": 0.5049, "step": 1644 }, { "epoch": 0.25, "grad_norm": 1.3196191787719727, "learning_rate": 8.814365072410972e-06, "loss": 0.5608, "step": 1645 }, { "epoch": 0.25, "grad_norm": 1.6722790002822876, "learning_rate": 8.812791760900672e-06, "loss": 0.4632, "step": 1646 }, { "epoch": 0.25, "grad_norm": 1.3454930782318115, "learning_rate": 8.811217546819632e-06, "loss": 0.5744, "step": 1647 }, { "epoch": 0.25, "grad_norm": 1.3194915056228638, "learning_rate": 8.809642430540502e-06, "loss": 0.4435, "step": 1648 }, { "epoch": 0.25, "grad_norm": 1.119567632675171, "learning_rate": 8.808066412436148e-06, "loss": 0.5059, "step": 1649 }, { "epoch": 0.25, "grad_norm": 1.154075026512146, "learning_rate": 8.806489492879644e-06, "loss": 0.4528, "step": 1650 }, { "epoch": 0.25, "grad_norm": 1.0891308784484863, "learning_rate": 8.804911672244283e-06, "loss": 0.5089, "step": 1651 }, { "epoch": 0.25, "grad_norm": 1.2002102136611938, "learning_rate": 8.803332950903571e-06, "loss": 0.4853, "step": 1652 }, { "epoch": 0.25, "grad_norm": 1.1503517627716064, "learning_rate": 8.801753329231223e-06, "loss": 0.7503, "step": 1653 }, { "epoch": 0.25, "grad_norm": 1.175329566001892, "learning_rate": 8.800172807601173e-06, "loss": 0.5084, "step": 1654 }, { "epoch": 0.25, "grad_norm": 1.2300455570220947, "learning_rate": 8.79859138638756e-06, "loss": 0.4935, "step": 1655 }, { "epoch": 0.25, "grad_norm": 1.1432667970657349, "learning_rate": 8.797009065964748e-06, "loss": 0.5167, "step": 1656 }, { "epoch": 0.25, "grad_norm": 1.3957804441452026, "learning_rate": 8.7954258467073e-06, "loss": 0.5229, "step": 1657 }, { "epoch": 0.25, "grad_norm": 1.4486842155456543, "learning_rate": 8.793841728990001e-06, "loss": 0.4977, "step": 1658 }, { "epoch": 0.25, "grad_norm": 1.2814310789108276, "learning_rate": 8.792256713187847e-06, "loss": 0.7204, "step": 1659 }, { "epoch": 0.25, "grad_norm": 1.1406025886535645, "learning_rate": 8.790670799676045e-06, "loss": 0.521, "step": 1660 }, { "epoch": 0.25, "grad_norm": 1.0182340145111084, "learning_rate": 8.789083988830014e-06, "loss": 0.5127, "step": 1661 }, { "epoch": 0.25, "grad_norm": 9.213183403015137, "learning_rate": 8.787496281025387e-06, "loss": 0.4614, "step": 1662 }, { "epoch": 0.25, "grad_norm": 1.4325767755508423, "learning_rate": 8.78590767663801e-06, "loss": 0.5339, "step": 1663 }, { "epoch": 0.25, "grad_norm": 1.538521409034729, "learning_rate": 8.784318176043942e-06, "loss": 0.5152, "step": 1664 }, { "epoch": 0.25, "grad_norm": 1.1601104736328125, "learning_rate": 8.782727779619449e-06, "loss": 0.5538, "step": 1665 }, { "epoch": 0.25, "grad_norm": 1.2010310888290405, "learning_rate": 8.781136487741014e-06, "loss": 0.4333, "step": 1666 }, { "epoch": 0.25, "grad_norm": 1.0538578033447266, "learning_rate": 8.779544300785328e-06, "loss": 0.443, "step": 1667 }, { "epoch": 0.25, "grad_norm": 1.1116390228271484, "learning_rate": 8.777951219129301e-06, "loss": 0.5059, "step": 1668 }, { "epoch": 0.25, "grad_norm": 1.2641290426254272, "learning_rate": 8.776357243150044e-06, "loss": 0.4479, "step": 1669 }, { "epoch": 0.25, "grad_norm": 1.3175050020217896, "learning_rate": 8.774762373224892e-06, "loss": 0.5137, "step": 1670 }, { "epoch": 0.25, "grad_norm": 1.0069398880004883, "learning_rate": 8.773166609731383e-06, "loss": 0.4574, "step": 1671 }, { "epoch": 0.25, "grad_norm": 1.3714003562927246, "learning_rate": 8.771569953047267e-06, "loss": 0.4549, "step": 1672 }, { "epoch": 0.25, "grad_norm": 1.203713297843933, "learning_rate": 8.769972403550509e-06, "loss": 0.4581, "step": 1673 }, { "epoch": 0.25, "grad_norm": 1.2208400964736938, "learning_rate": 8.768373961619283e-06, "loss": 0.5216, "step": 1674 }, { "epoch": 0.25, "grad_norm": 1.0285223722457886, "learning_rate": 8.766774627631977e-06, "loss": 0.5417, "step": 1675 }, { "epoch": 0.25, "grad_norm": 1.4010896682739258, "learning_rate": 8.765174401967187e-06, "loss": 0.5199, "step": 1676 }, { "epoch": 0.25, "grad_norm": 1.4558022022247314, "learning_rate": 8.76357328500372e-06, "loss": 0.4548, "step": 1677 }, { "epoch": 0.25, "grad_norm": 1.7143518924713135, "learning_rate": 8.761971277120595e-06, "loss": 0.4876, "step": 1678 }, { "epoch": 0.25, "grad_norm": 1.1323012113571167, "learning_rate": 8.760368378697046e-06, "loss": 0.5095, "step": 1679 }, { "epoch": 0.25, "grad_norm": 1.9020435810089111, "learning_rate": 8.75876459011251e-06, "loss": 0.5512, "step": 1680 }, { "epoch": 0.25, "grad_norm": 1.0554178953170776, "learning_rate": 8.757159911746638e-06, "loss": 0.7268, "step": 1681 }, { "epoch": 0.25, "grad_norm": 1.2868213653564453, "learning_rate": 8.755554343979296e-06, "loss": 0.5318, "step": 1682 }, { "epoch": 0.25, "grad_norm": 1.3874928951263428, "learning_rate": 8.753947887190555e-06, "loss": 0.5033, "step": 1683 }, { "epoch": 0.25, "grad_norm": 1.2353838682174683, "learning_rate": 8.752340541760698e-06, "loss": 0.4917, "step": 1684 }, { "epoch": 0.25, "grad_norm": 1.4494608640670776, "learning_rate": 8.750732308070218e-06, "loss": 0.4315, "step": 1685 }, { "epoch": 0.25, "grad_norm": 1.3529667854309082, "learning_rate": 8.74912318649982e-06, "loss": 0.5022, "step": 1686 }, { "epoch": 0.25, "grad_norm": 1.4571505784988403, "learning_rate": 8.747513177430418e-06, "loss": 0.5095, "step": 1687 }, { "epoch": 0.25, "grad_norm": 0.9888219833374023, "learning_rate": 8.745902281243135e-06, "loss": 0.4502, "step": 1688 }, { "epoch": 0.25, "grad_norm": 1.7767515182495117, "learning_rate": 8.744290498319306e-06, "loss": 0.4814, "step": 1689 }, { "epoch": 0.25, "grad_norm": 1.156506061553955, "learning_rate": 8.742677829040477e-06, "loss": 0.4528, "step": 1690 }, { "epoch": 0.25, "grad_norm": 1.2259541749954224, "learning_rate": 8.741064273788399e-06, "loss": 0.4603, "step": 1691 }, { "epoch": 0.25, "grad_norm": 1.4126949310302734, "learning_rate": 8.739449832945036e-06, "loss": 0.5281, "step": 1692 }, { "epoch": 0.25, "grad_norm": 1.101479411125183, "learning_rate": 8.737834506892564e-06, "loss": 0.5075, "step": 1693 }, { "epoch": 0.25, "grad_norm": 1.946187973022461, "learning_rate": 8.736218296013362e-06, "loss": 0.476, "step": 1694 }, { "epoch": 0.25, "grad_norm": 0.9933519959449768, "learning_rate": 8.734601200690024e-06, "loss": 0.4927, "step": 1695 }, { "epoch": 0.25, "grad_norm": 4.829152584075928, "learning_rate": 8.732983221305351e-06, "loss": 0.531, "step": 1696 }, { "epoch": 0.25, "grad_norm": 1.2000317573547363, "learning_rate": 8.731364358242355e-06, "loss": 0.4216, "step": 1697 }, { "epoch": 0.26, "grad_norm": 1.3115845918655396, "learning_rate": 8.729744611884257e-06, "loss": 0.4775, "step": 1698 }, { "epoch": 0.26, "grad_norm": 1.2795155048370361, "learning_rate": 8.728123982614485e-06, "loss": 0.4996, "step": 1699 }, { "epoch": 0.26, "grad_norm": 1.1035679578781128, "learning_rate": 8.726502470816676e-06, "loss": 0.4784, "step": 1700 }, { "epoch": 0.26, "grad_norm": 1.3001277446746826, "learning_rate": 8.72488007687468e-06, "loss": 0.5331, "step": 1701 }, { "epoch": 0.26, "grad_norm": 1.1205244064331055, "learning_rate": 8.723256801172549e-06, "loss": 0.5091, "step": 1702 }, { "epoch": 0.26, "grad_norm": 1.199263334274292, "learning_rate": 8.721632644094548e-06, "loss": 0.5206, "step": 1703 }, { "epoch": 0.26, "grad_norm": 1.1348521709442139, "learning_rate": 8.720007606025155e-06, "loss": 0.4654, "step": 1704 }, { "epoch": 0.26, "grad_norm": 1.0742484331130981, "learning_rate": 8.718381687349048e-06, "loss": 0.471, "step": 1705 }, { "epoch": 0.26, "grad_norm": 1.3442060947418213, "learning_rate": 8.71675488845112e-06, "loss": 0.5006, "step": 1706 }, { "epoch": 0.26, "grad_norm": 1.805105209350586, "learning_rate": 8.715127209716467e-06, "loss": 0.4817, "step": 1707 }, { "epoch": 0.26, "grad_norm": 1.1800180673599243, "learning_rate": 8.713498651530394e-06, "loss": 0.3987, "step": 1708 }, { "epoch": 0.26, "grad_norm": 1.2714184522628784, "learning_rate": 8.711869214278423e-06, "loss": 0.5452, "step": 1709 }, { "epoch": 0.26, "grad_norm": 1.0865641832351685, "learning_rate": 8.71023889834627e-06, "loss": 0.4367, "step": 1710 }, { "epoch": 0.26, "grad_norm": 1.3198803663253784, "learning_rate": 8.708607704119872e-06, "loss": 0.5406, "step": 1711 }, { "epoch": 0.26, "grad_norm": 1.3801788091659546, "learning_rate": 8.706975631985364e-06, "loss": 0.4805, "step": 1712 }, { "epoch": 0.26, "grad_norm": 5.456722736358643, "learning_rate": 8.705342682329096e-06, "loss": 0.5013, "step": 1713 }, { "epoch": 0.26, "grad_norm": 1.4021308422088623, "learning_rate": 8.703708855537622e-06, "loss": 0.4999, "step": 1714 }, { "epoch": 0.26, "grad_norm": 1.238990306854248, "learning_rate": 8.7020741519977e-06, "loss": 0.5248, "step": 1715 }, { "epoch": 0.26, "grad_norm": 1.3142966032028198, "learning_rate": 8.700438572096308e-06, "loss": 0.4823, "step": 1716 }, { "epoch": 0.26, "grad_norm": 1.2345528602600098, "learning_rate": 8.698802116220617e-06, "loss": 0.5296, "step": 1717 }, { "epoch": 0.26, "grad_norm": 1.1867798566818237, "learning_rate": 8.697164784758015e-06, "loss": 0.4489, "step": 1718 }, { "epoch": 0.26, "grad_norm": 1.117111086845398, "learning_rate": 8.69552657809609e-06, "loss": 0.489, "step": 1719 }, { "epoch": 0.26, "grad_norm": 1.1692793369293213, "learning_rate": 8.693887496622645e-06, "loss": 0.5524, "step": 1720 }, { "epoch": 0.26, "grad_norm": 1.5464122295379639, "learning_rate": 8.692247540725684e-06, "loss": 0.5018, "step": 1721 }, { "epoch": 0.26, "grad_norm": 1.3381637334823608, "learning_rate": 8.690606710793423e-06, "loss": 0.6431, "step": 1722 }, { "epoch": 0.26, "grad_norm": 1.1021716594696045, "learning_rate": 8.688965007214278e-06, "loss": 0.4426, "step": 1723 }, { "epoch": 0.26, "grad_norm": 1.391553521156311, "learning_rate": 8.68732243037688e-06, "loss": 0.5169, "step": 1724 }, { "epoch": 0.26, "grad_norm": 1.0991548299789429, "learning_rate": 8.68567898067006e-06, "loss": 0.4661, "step": 1725 }, { "epoch": 0.26, "grad_norm": 1.0855486392974854, "learning_rate": 8.68403465848286e-06, "loss": 0.5055, "step": 1726 }, { "epoch": 0.26, "grad_norm": 1.7018256187438965, "learning_rate": 8.682389464204526e-06, "loss": 0.4781, "step": 1727 }, { "epoch": 0.26, "grad_norm": 1.281346082687378, "learning_rate": 8.680743398224511e-06, "loss": 0.5194, "step": 1728 }, { "epoch": 0.26, "grad_norm": 1.1084767580032349, "learning_rate": 8.679096460932477e-06, "loss": 0.4084, "step": 1729 }, { "epoch": 0.26, "grad_norm": 1.2479565143585205, "learning_rate": 8.677448652718286e-06, "loss": 0.5458, "step": 1730 }, { "epoch": 0.26, "grad_norm": 1.1851096153259277, "learning_rate": 8.675799973972012e-06, "loss": 0.5264, "step": 1731 }, { "epoch": 0.26, "grad_norm": 1.4452933073043823, "learning_rate": 8.67415042508393e-06, "loss": 0.5056, "step": 1732 }, { "epoch": 0.26, "grad_norm": 1.1086198091506958, "learning_rate": 8.67250000644453e-06, "loss": 0.4645, "step": 1733 }, { "epoch": 0.26, "grad_norm": 1.1011584997177124, "learning_rate": 8.6708487184445e-06, "loss": 0.4245, "step": 1734 }, { "epoch": 0.26, "grad_norm": 1.1628658771514893, "learning_rate": 8.669196561474735e-06, "loss": 0.4671, "step": 1735 }, { "epoch": 0.26, "grad_norm": 1.4795658588409424, "learning_rate": 8.667543535926335e-06, "loss": 0.5363, "step": 1736 }, { "epoch": 0.26, "grad_norm": 1.690724492073059, "learning_rate": 8.665889642190608e-06, "loss": 0.5528, "step": 1737 }, { "epoch": 0.26, "grad_norm": 1.2710955142974854, "learning_rate": 8.664234880659068e-06, "loss": 0.4517, "step": 1738 }, { "epoch": 0.26, "grad_norm": 1.2356282472610474, "learning_rate": 8.662579251723431e-06, "loss": 0.4435, "step": 1739 }, { "epoch": 0.26, "grad_norm": 1.2734869718551636, "learning_rate": 8.660922755775622e-06, "loss": 0.5413, "step": 1740 }, { "epoch": 0.26, "grad_norm": 1.0245107412338257, "learning_rate": 8.659265393207769e-06, "loss": 0.5186, "step": 1741 }, { "epoch": 0.26, "grad_norm": 0.9782384634017944, "learning_rate": 8.657607164412208e-06, "loss": 0.5668, "step": 1742 }, { "epoch": 0.26, "grad_norm": 1.9042954444885254, "learning_rate": 8.655948069781475e-06, "loss": 0.5343, "step": 1743 }, { "epoch": 0.26, "grad_norm": 1.3951473236083984, "learning_rate": 8.654288109708314e-06, "loss": 0.4977, "step": 1744 }, { "epoch": 0.26, "grad_norm": 1.164420247077942, "learning_rate": 8.652627284585674e-06, "loss": 0.5053, "step": 1745 }, { "epoch": 0.26, "grad_norm": 1.0424221754074097, "learning_rate": 8.650965594806707e-06, "loss": 0.479, "step": 1746 }, { "epoch": 0.26, "grad_norm": 1.1918065547943115, "learning_rate": 8.649303040764774e-06, "loss": 0.4752, "step": 1747 }, { "epoch": 0.26, "grad_norm": 1.3025072813034058, "learning_rate": 8.647639622853436e-06, "loss": 0.4596, "step": 1748 }, { "epoch": 0.26, "grad_norm": 1.3952810764312744, "learning_rate": 8.645975341466462e-06, "loss": 0.4274, "step": 1749 }, { "epoch": 0.26, "grad_norm": 1.2973756790161133, "learning_rate": 8.64431019699782e-06, "loss": 0.4883, "step": 1750 }, { "epoch": 0.26, "grad_norm": 2.3372161388397217, "learning_rate": 8.64264418984169e-06, "loss": 0.4486, "step": 1751 }, { "epoch": 0.26, "grad_norm": 1.9278141260147095, "learning_rate": 8.64097732039245e-06, "loss": 0.4501, "step": 1752 }, { "epoch": 0.26, "grad_norm": 0.9674215912818909, "learning_rate": 8.639309589044684e-06, "loss": 0.4902, "step": 1753 }, { "epoch": 0.26, "grad_norm": 1.1989529132843018, "learning_rate": 8.637640996193179e-06, "loss": 0.5274, "step": 1754 }, { "epoch": 0.26, "grad_norm": 1.468495488166809, "learning_rate": 8.635971542232933e-06, "loss": 0.5508, "step": 1755 }, { "epoch": 0.26, "grad_norm": 1.2584160566329956, "learning_rate": 8.634301227559136e-06, "loss": 0.5459, "step": 1756 }, { "epoch": 0.26, "grad_norm": 2.8068995475769043, "learning_rate": 8.63263005256719e-06, "loss": 0.5069, "step": 1757 }, { "epoch": 0.26, "grad_norm": 1.4589884281158447, "learning_rate": 8.630958017652698e-06, "loss": 0.5349, "step": 1758 }, { "epoch": 0.26, "grad_norm": 0.9942153692245483, "learning_rate": 8.629285123211467e-06, "loss": 0.4938, "step": 1759 }, { "epoch": 0.26, "grad_norm": 1.3090524673461914, "learning_rate": 8.627611369639508e-06, "loss": 0.5356, "step": 1760 }, { "epoch": 0.26, "grad_norm": 1.095207929611206, "learning_rate": 8.625936757333036e-06, "loss": 0.5324, "step": 1761 }, { "epoch": 0.26, "grad_norm": 1.3164137601852417, "learning_rate": 8.624261286688467e-06, "loss": 0.4951, "step": 1762 }, { "epoch": 0.26, "grad_norm": 1.305249810218811, "learning_rate": 8.62258495810242e-06, "loss": 0.4703, "step": 1763 }, { "epoch": 0.26, "grad_norm": 1.1537212133407593, "learning_rate": 8.62090777197172e-06, "loss": 0.5461, "step": 1764 }, { "epoch": 0.27, "grad_norm": 1.062306523323059, "learning_rate": 8.619229728693393e-06, "loss": 0.4629, "step": 1765 }, { "epoch": 0.27, "grad_norm": 1.2568604946136475, "learning_rate": 8.617550828664669e-06, "loss": 0.4878, "step": 1766 }, { "epoch": 0.27, "grad_norm": 1.4391775131225586, "learning_rate": 8.61587107228298e-06, "loss": 0.561, "step": 1767 }, { "epoch": 0.27, "grad_norm": 1.0327045917510986, "learning_rate": 8.614190459945959e-06, "loss": 0.4183, "step": 1768 }, { "epoch": 0.27, "grad_norm": 1.0879237651824951, "learning_rate": 8.612508992051447e-06, "loss": 0.5135, "step": 1769 }, { "epoch": 0.27, "grad_norm": 1.6293491125106812, "learning_rate": 8.61082666899748e-06, "loss": 0.5283, "step": 1770 }, { "epoch": 0.27, "grad_norm": 1.5126861333847046, "learning_rate": 8.609143491182303e-06, "loss": 0.5466, "step": 1771 }, { "epoch": 0.27, "grad_norm": 1.369160771369934, "learning_rate": 8.607459459004362e-06, "loss": 0.4379, "step": 1772 }, { "epoch": 0.27, "grad_norm": 1.1224613189697266, "learning_rate": 8.605774572862301e-06, "loss": 0.5629, "step": 1773 }, { "epoch": 0.27, "grad_norm": 1.195077896118164, "learning_rate": 8.604088833154971e-06, "loss": 0.4859, "step": 1774 }, { "epoch": 0.27, "grad_norm": 1.8223968744277954, "learning_rate": 8.602402240281422e-06, "loss": 0.5257, "step": 1775 }, { "epoch": 0.27, "grad_norm": 1.0969518423080444, "learning_rate": 8.600714794640909e-06, "loss": 0.5308, "step": 1776 }, { "epoch": 0.27, "grad_norm": 1.0337743759155273, "learning_rate": 8.599026496632886e-06, "loss": 0.507, "step": 1777 }, { "epoch": 0.27, "grad_norm": 1.1853848695755005, "learning_rate": 8.597337346657008e-06, "loss": 0.4546, "step": 1778 }, { "epoch": 0.27, "grad_norm": 1.50640869140625, "learning_rate": 8.595647345113139e-06, "loss": 0.4373, "step": 1779 }, { "epoch": 0.27, "grad_norm": 1.3875010013580322, "learning_rate": 8.593956492401332e-06, "loss": 0.484, "step": 1780 }, { "epoch": 0.27, "grad_norm": 1.9373008012771606, "learning_rate": 8.592264788921854e-06, "loss": 0.4709, "step": 1781 }, { "epoch": 0.27, "grad_norm": 1.7656848430633545, "learning_rate": 8.590572235075167e-06, "loss": 0.4914, "step": 1782 }, { "epoch": 0.27, "grad_norm": 1.36020028591156, "learning_rate": 8.588878831261934e-06, "loss": 0.6022, "step": 1783 }, { "epoch": 0.27, "grad_norm": 1.3311443328857422, "learning_rate": 8.587184577883018e-06, "loss": 0.4867, "step": 1784 }, { "epoch": 0.27, "grad_norm": 1.203980565071106, "learning_rate": 8.585489475339492e-06, "loss": 0.597, "step": 1785 }, { "epoch": 0.27, "grad_norm": 1.1267902851104736, "learning_rate": 8.58379352403262e-06, "loss": 0.5356, "step": 1786 }, { "epoch": 0.27, "grad_norm": 1.2129740715026855, "learning_rate": 8.58209672436387e-06, "loss": 0.5314, "step": 1787 }, { "epoch": 0.27, "grad_norm": 1.3386986255645752, "learning_rate": 8.580399076734913e-06, "loss": 0.5455, "step": 1788 }, { "epoch": 0.27, "grad_norm": 1.2364221811294556, "learning_rate": 8.578700581547619e-06, "loss": 0.5748, "step": 1789 }, { "epoch": 0.27, "grad_norm": 1.325402855873108, "learning_rate": 8.577001239204056e-06, "loss": 0.5373, "step": 1790 }, { "epoch": 0.27, "grad_norm": 1.2534407377243042, "learning_rate": 8.5753010501065e-06, "loss": 0.7235, "step": 1791 }, { "epoch": 0.27, "grad_norm": 1.7110689878463745, "learning_rate": 8.57360001465742e-06, "loss": 0.5422, "step": 1792 }, { "epoch": 0.27, "grad_norm": 1.3705350160598755, "learning_rate": 8.571898133259487e-06, "loss": 0.4613, "step": 1793 }, { "epoch": 0.27, "grad_norm": 1.8931374549865723, "learning_rate": 8.570195406315575e-06, "loss": 0.4725, "step": 1794 }, { "epoch": 0.27, "grad_norm": 1.161275863647461, "learning_rate": 8.568491834228757e-06, "loss": 0.5381, "step": 1795 }, { "epoch": 0.27, "grad_norm": 1.3843941688537598, "learning_rate": 8.566787417402302e-06, "loss": 0.5473, "step": 1796 }, { "epoch": 0.27, "grad_norm": 1.2373884916305542, "learning_rate": 8.565082156239689e-06, "loss": 0.7462, "step": 1797 }, { "epoch": 0.27, "grad_norm": 1.32167387008667, "learning_rate": 8.563376051144582e-06, "loss": 0.4861, "step": 1798 }, { "epoch": 0.27, "grad_norm": 1.6419718265533447, "learning_rate": 8.561669102520861e-06, "loss": 0.5908, "step": 1799 }, { "epoch": 0.27, "grad_norm": 1.1422637701034546, "learning_rate": 8.559961310772595e-06, "loss": 0.5847, "step": 1800 }, { "epoch": 0.27, "grad_norm": 1.2339184284210205, "learning_rate": 8.558252676304052e-06, "loss": 0.4714, "step": 1801 }, { "epoch": 0.27, "grad_norm": 1.278952956199646, "learning_rate": 8.55654319951971e-06, "loss": 0.4448, "step": 1802 }, { "epoch": 0.27, "grad_norm": 1.1036887168884277, "learning_rate": 8.554832880824233e-06, "loss": 0.6182, "step": 1803 }, { "epoch": 0.27, "grad_norm": 1.8207663297653198, "learning_rate": 8.553121720622494e-06, "loss": 0.5917, "step": 1804 }, { "epoch": 0.27, "grad_norm": 0.9759678840637207, "learning_rate": 8.551409719319561e-06, "loss": 0.4505, "step": 1805 }, { "epoch": 0.27, "grad_norm": 1.396270513534546, "learning_rate": 8.549696877320702e-06, "loss": 0.5383, "step": 1806 }, { "epoch": 0.27, "grad_norm": 1.2752735614776611, "learning_rate": 8.547983195031383e-06, "loss": 0.4756, "step": 1807 }, { "epoch": 0.27, "grad_norm": 1.604784607887268, "learning_rate": 8.546268672857272e-06, "loss": 0.46, "step": 1808 }, { "epoch": 0.27, "grad_norm": 1.433003544807434, "learning_rate": 8.54455331120423e-06, "loss": 0.5214, "step": 1809 }, { "epoch": 0.27, "grad_norm": 1.702668309211731, "learning_rate": 8.542837110478324e-06, "loss": 0.46, "step": 1810 }, { "epoch": 0.27, "grad_norm": 2.726297616958618, "learning_rate": 8.541120071085815e-06, "loss": 0.4521, "step": 1811 }, { "epoch": 0.27, "grad_norm": 1.1569803953170776, "learning_rate": 8.539402193433164e-06, "loss": 0.5308, "step": 1812 }, { "epoch": 0.27, "grad_norm": 1.4935014247894287, "learning_rate": 8.537683477927027e-06, "loss": 0.4586, "step": 1813 }, { "epoch": 0.27, "grad_norm": 1.1115319728851318, "learning_rate": 8.535963924974266e-06, "loss": 0.6012, "step": 1814 }, { "epoch": 0.27, "grad_norm": 1.121034860610962, "learning_rate": 8.534243534981935e-06, "loss": 0.7007, "step": 1815 }, { "epoch": 0.27, "grad_norm": 1.3732863664627075, "learning_rate": 8.532522308357285e-06, "loss": 0.487, "step": 1816 }, { "epoch": 0.27, "grad_norm": 1.3671280145645142, "learning_rate": 8.530800245507771e-06, "loss": 0.4651, "step": 1817 }, { "epoch": 0.27, "grad_norm": 1.4016063213348389, "learning_rate": 8.529077346841042e-06, "loss": 0.4759, "step": 1818 }, { "epoch": 0.27, "grad_norm": 1.124016523361206, "learning_rate": 8.527353612764944e-06, "loss": 0.5562, "step": 1819 }, { "epoch": 0.27, "grad_norm": 1.2907793521881104, "learning_rate": 8.525629043687523e-06, "loss": 0.4691, "step": 1820 }, { "epoch": 0.27, "grad_norm": 1.1048409938812256, "learning_rate": 8.523903640017024e-06, "loss": 0.7104, "step": 1821 }, { "epoch": 0.27, "grad_norm": 1.5539138317108154, "learning_rate": 8.522177402161885e-06, "loss": 0.5488, "step": 1822 }, { "epoch": 0.27, "grad_norm": 1.2843046188354492, "learning_rate": 8.520450330530746e-06, "loss": 0.46, "step": 1823 }, { "epoch": 0.27, "grad_norm": 1.490628957748413, "learning_rate": 8.51872242553244e-06, "loss": 0.466, "step": 1824 }, { "epoch": 0.27, "grad_norm": 3.0982017517089844, "learning_rate": 8.516993687576002e-06, "loss": 0.4952, "step": 1825 }, { "epoch": 0.27, "grad_norm": 1.2346765995025635, "learning_rate": 8.515264117070663e-06, "loss": 0.5031, "step": 1826 }, { "epoch": 0.27, "grad_norm": 1.0154247283935547, "learning_rate": 8.513533714425846e-06, "loss": 0.509, "step": 1827 }, { "epoch": 0.27, "grad_norm": 1.1156938076019287, "learning_rate": 8.511802480051179e-06, "loss": 0.5305, "step": 1828 }, { "epoch": 0.27, "grad_norm": 2.2779903411865234, "learning_rate": 8.510070414356478e-06, "loss": 0.4957, "step": 1829 }, { "epoch": 0.27, "grad_norm": 1.160202145576477, "learning_rate": 8.508337517751765e-06, "loss": 0.6011, "step": 1830 }, { "epoch": 0.28, "grad_norm": 1.5046281814575195, "learning_rate": 8.506603790647252e-06, "loss": 0.5378, "step": 1831 }, { "epoch": 0.28, "grad_norm": 1.0389491319656372, "learning_rate": 8.504869233453352e-06, "loss": 0.4597, "step": 1832 }, { "epoch": 0.28, "grad_norm": 3.1847829818725586, "learning_rate": 8.503133846580671e-06, "loss": 0.5386, "step": 1833 }, { "epoch": 0.28, "grad_norm": 1.4522218704223633, "learning_rate": 8.501397630440012e-06, "loss": 0.4733, "step": 1834 }, { "epoch": 0.28, "grad_norm": 3.311455011367798, "learning_rate": 8.499660585442376e-06, "loss": 0.4219, "step": 1835 }, { "epoch": 0.28, "grad_norm": 1.175516128540039, "learning_rate": 8.49792271199896e-06, "loss": 0.4488, "step": 1836 }, { "epoch": 0.28, "grad_norm": 1.6306215524673462, "learning_rate": 8.496184010521155e-06, "loss": 0.4911, "step": 1837 }, { "epoch": 0.28, "grad_norm": 1.453374981880188, "learning_rate": 8.494444481420552e-06, "loss": 0.4328, "step": 1838 }, { "epoch": 0.28, "grad_norm": 1.184086561203003, "learning_rate": 8.492704125108933e-06, "loss": 0.5331, "step": 1839 }, { "epoch": 0.28, "grad_norm": 1.0157546997070312, "learning_rate": 8.490962941998278e-06, "loss": 0.5354, "step": 1840 }, { "epoch": 0.28, "grad_norm": 1.3027619123458862, "learning_rate": 8.489220932500765e-06, "loss": 0.5216, "step": 1841 }, { "epoch": 0.28, "grad_norm": 2.4813003540039062, "learning_rate": 8.487478097028764e-06, "loss": 0.5243, "step": 1842 }, { "epoch": 0.28, "grad_norm": 1.0874325037002563, "learning_rate": 8.485734435994841e-06, "loss": 0.5137, "step": 1843 }, { "epoch": 0.28, "grad_norm": 1.132187843322754, "learning_rate": 8.483989949811761e-06, "loss": 0.5169, "step": 1844 }, { "epoch": 0.28, "grad_norm": 1.1240030527114868, "learning_rate": 8.482244638892482e-06, "loss": 0.4857, "step": 1845 }, { "epoch": 0.28, "grad_norm": 1.845409870147705, "learning_rate": 8.480498503650153e-06, "loss": 0.4535, "step": 1846 }, { "epoch": 0.28, "grad_norm": 3.815434694290161, "learning_rate": 8.478751544498127e-06, "loss": 0.528, "step": 1847 }, { "epoch": 0.28, "grad_norm": 1.7755560874938965, "learning_rate": 8.477003761849946e-06, "loss": 0.42, "step": 1848 }, { "epoch": 0.28, "grad_norm": 1.1641900539398193, "learning_rate": 8.475255156119346e-06, "loss": 0.5065, "step": 1849 }, { "epoch": 0.28, "grad_norm": 1.2794562578201294, "learning_rate": 8.473505727720262e-06, "loss": 0.5214, "step": 1850 }, { "epoch": 0.28, "grad_norm": 1.2628008127212524, "learning_rate": 8.471755477066819e-06, "loss": 0.4772, "step": 1851 }, { "epoch": 0.28, "grad_norm": 1.6093729734420776, "learning_rate": 8.470004404573344e-06, "loss": 0.5109, "step": 1852 }, { "epoch": 0.28, "grad_norm": 2.130239486694336, "learning_rate": 8.46825251065435e-06, "loss": 0.5474, "step": 1853 }, { "epoch": 0.28, "grad_norm": 1.4419996738433838, "learning_rate": 8.46649979572455e-06, "loss": 0.5386, "step": 1854 }, { "epoch": 0.28, "grad_norm": 1.8350971937179565, "learning_rate": 8.464746260198851e-06, "loss": 0.4697, "step": 1855 }, { "epoch": 0.28, "grad_norm": 1.856454849243164, "learning_rate": 8.46299190449235e-06, "loss": 0.5458, "step": 1856 }, { "epoch": 0.28, "grad_norm": 1.3240686655044556, "learning_rate": 8.461236729020344e-06, "loss": 0.5598, "step": 1857 }, { "epoch": 0.28, "grad_norm": 1.6181443929672241, "learning_rate": 8.459480734198317e-06, "loss": 0.5221, "step": 1858 }, { "epoch": 0.28, "grad_norm": 1.5366764068603516, "learning_rate": 8.457723920441957e-06, "loss": 0.4798, "step": 1859 }, { "epoch": 0.28, "grad_norm": 1.2230762243270874, "learning_rate": 8.455966288167136e-06, "loss": 0.449, "step": 1860 }, { "epoch": 0.28, "grad_norm": 1.1380709409713745, "learning_rate": 8.454207837789928e-06, "loss": 0.4756, "step": 1861 }, { "epoch": 0.28, "grad_norm": 1.4271767139434814, "learning_rate": 8.452448569726588e-06, "loss": 0.5119, "step": 1862 }, { "epoch": 0.28, "grad_norm": 2.1696677207946777, "learning_rate": 8.450688484393583e-06, "loss": 0.5435, "step": 1863 }, { "epoch": 0.28, "grad_norm": 1.50642991065979, "learning_rate": 8.448927582207556e-06, "loss": 0.4556, "step": 1864 }, { "epoch": 0.28, "grad_norm": 0.9275834560394287, "learning_rate": 8.447165863585355e-06, "loss": 0.4603, "step": 1865 }, { "epoch": 0.28, "grad_norm": 1.5822758674621582, "learning_rate": 8.445403328944017e-06, "loss": 0.4966, "step": 1866 }, { "epoch": 0.28, "grad_norm": 1.3134077787399292, "learning_rate": 8.44363997870077e-06, "loss": 0.4937, "step": 1867 }, { "epoch": 0.28, "grad_norm": 1.6626968383789062, "learning_rate": 8.441875813273038e-06, "loss": 0.484, "step": 1868 }, { "epoch": 0.28, "grad_norm": 1.2603470087051392, "learning_rate": 8.440110833078438e-06, "loss": 0.4733, "step": 1869 }, { "epoch": 0.28, "grad_norm": 1.1893633604049683, "learning_rate": 8.43834503853478e-06, "loss": 0.5267, "step": 1870 }, { "epoch": 0.28, "grad_norm": 1.2884622812271118, "learning_rate": 8.436578430060064e-06, "loss": 0.4538, "step": 1871 }, { "epoch": 0.28, "grad_norm": 1.1201016902923584, "learning_rate": 8.434811008072486e-06, "loss": 0.4418, "step": 1872 }, { "epoch": 0.28, "grad_norm": 1.1839452981948853, "learning_rate": 8.433042772990432e-06, "loss": 0.5146, "step": 1873 }, { "epoch": 0.28, "grad_norm": 1.2857584953308105, "learning_rate": 8.431273725232485e-06, "loss": 0.5913, "step": 1874 }, { "epoch": 0.28, "grad_norm": 2.3923704624176025, "learning_rate": 8.429503865217412e-06, "loss": 0.5421, "step": 1875 }, { "epoch": 0.28, "grad_norm": 1.2206439971923828, "learning_rate": 8.427733193364182e-06, "loss": 0.5228, "step": 1876 }, { "epoch": 0.28, "grad_norm": 2.003872871398926, "learning_rate": 8.425961710091948e-06, "loss": 0.4457, "step": 1877 }, { "epoch": 0.28, "grad_norm": 1.7398998737335205, "learning_rate": 8.424189415820063e-06, "loss": 0.5522, "step": 1878 }, { "epoch": 0.28, "grad_norm": 1.4015898704528809, "learning_rate": 8.422416310968061e-06, "loss": 0.5486, "step": 1879 }, { "epoch": 0.28, "grad_norm": 1.460644006729126, "learning_rate": 8.42064239595568e-06, "loss": 0.5097, "step": 1880 }, { "epoch": 0.28, "grad_norm": 1.13155996799469, "learning_rate": 8.418867671202844e-06, "loss": 0.5568, "step": 1881 }, { "epoch": 0.28, "grad_norm": 1.6741536855697632, "learning_rate": 8.417092137129665e-06, "loss": 0.5186, "step": 1882 }, { "epoch": 0.28, "grad_norm": 1.6674911975860596, "learning_rate": 8.415315794156456e-06, "loss": 0.5457, "step": 1883 }, { "epoch": 0.28, "grad_norm": 1.202759027481079, "learning_rate": 8.413538642703708e-06, "loss": 0.4743, "step": 1884 }, { "epoch": 0.28, "grad_norm": 2.000026226043701, "learning_rate": 8.41176068319212e-06, "loss": 0.3912, "step": 1885 }, { "epoch": 0.28, "grad_norm": 1.8508400917053223, "learning_rate": 8.409981916042572e-06, "loss": 0.5299, "step": 1886 }, { "epoch": 0.28, "grad_norm": 2.2995707988739014, "learning_rate": 8.408202341676132e-06, "loss": 0.4457, "step": 1887 }, { "epoch": 0.28, "grad_norm": 1.1639851331710815, "learning_rate": 8.406421960514068e-06, "loss": 0.5468, "step": 1888 }, { "epoch": 0.28, "grad_norm": 2.1399474143981934, "learning_rate": 8.404640772977834e-06, "loss": 0.4943, "step": 1889 }, { "epoch": 0.28, "grad_norm": 1.4472756385803223, "learning_rate": 8.402858779489075e-06, "loss": 0.5013, "step": 1890 }, { "epoch": 0.28, "grad_norm": 1.54185950756073, "learning_rate": 8.40107598046963e-06, "loss": 0.5519, "step": 1891 }, { "epoch": 0.28, "grad_norm": 1.537032961845398, "learning_rate": 8.399292376341523e-06, "loss": 0.5393, "step": 1892 }, { "epoch": 0.28, "grad_norm": 1.4813849925994873, "learning_rate": 8.397507967526975e-06, "loss": 0.4698, "step": 1893 }, { "epoch": 0.28, "grad_norm": 1.0915172100067139, "learning_rate": 8.395722754448392e-06, "loss": 0.5659, "step": 1894 }, { "epoch": 0.28, "grad_norm": 1.4095959663391113, "learning_rate": 8.393936737528375e-06, "loss": 0.538, "step": 1895 }, { "epoch": 0.28, "grad_norm": 1.1190086603164673, "learning_rate": 8.39214991718971e-06, "loss": 0.484, "step": 1896 }, { "epoch": 0.28, "grad_norm": 1.209825038909912, "learning_rate": 8.390362293855378e-06, "loss": 0.4455, "step": 1897 }, { "epoch": 0.29, "grad_norm": 1.5133507251739502, "learning_rate": 8.388573867948548e-06, "loss": 0.5094, "step": 1898 }, { "epoch": 0.29, "grad_norm": 1.9851418733596802, "learning_rate": 8.386784639892582e-06, "loss": 0.4873, "step": 1899 }, { "epoch": 0.29, "grad_norm": 1.1684080362319946, "learning_rate": 8.384994610111026e-06, "loss": 0.4859, "step": 1900 }, { "epoch": 0.29, "grad_norm": 1.3624770641326904, "learning_rate": 8.383203779027617e-06, "loss": 0.5113, "step": 1901 }, { "epoch": 0.29, "grad_norm": 1.1727055311203003, "learning_rate": 8.38141214706629e-06, "loss": 0.4302, "step": 1902 }, { "epoch": 0.29, "grad_norm": 1.1491889953613281, "learning_rate": 8.37961971465116e-06, "loss": 0.7054, "step": 1903 }, { "epoch": 0.29, "grad_norm": 1.3534843921661377, "learning_rate": 8.377826482206532e-06, "loss": 0.5399, "step": 1904 }, { "epoch": 0.29, "grad_norm": 1.0166674852371216, "learning_rate": 8.376032450156908e-06, "loss": 0.5363, "step": 1905 }, { "epoch": 0.29, "grad_norm": 0.9673303961753845, "learning_rate": 8.374237618926971e-06, "loss": 0.485, "step": 1906 }, { "epoch": 0.29, "grad_norm": 1.177343487739563, "learning_rate": 8.372441988941595e-06, "loss": 0.5119, "step": 1907 }, { "epoch": 0.29, "grad_norm": 1.251994252204895, "learning_rate": 8.370645560625852e-06, "loss": 0.5149, "step": 1908 }, { "epoch": 0.29, "grad_norm": 1.1830923557281494, "learning_rate": 8.368848334404987e-06, "loss": 0.5408, "step": 1909 }, { "epoch": 0.29, "grad_norm": 1.299050211906433, "learning_rate": 8.367050310704448e-06, "loss": 0.553, "step": 1910 }, { "epoch": 0.29, "grad_norm": 1.5261235237121582, "learning_rate": 8.365251489949866e-06, "loss": 0.5239, "step": 1911 }, { "epoch": 0.29, "grad_norm": 1.386844277381897, "learning_rate": 8.363451872567057e-06, "loss": 0.4999, "step": 1912 }, { "epoch": 0.29, "grad_norm": 1.240612506866455, "learning_rate": 8.361651458982034e-06, "loss": 0.5384, "step": 1913 }, { "epoch": 0.29, "grad_norm": 1.2166568040847778, "learning_rate": 8.359850249620993e-06, "loss": 0.4394, "step": 1914 }, { "epoch": 0.29, "grad_norm": 0.9181901812553406, "learning_rate": 8.358048244910319e-06, "loss": 0.4511, "step": 1915 }, { "epoch": 0.29, "grad_norm": 1.0706591606140137, "learning_rate": 8.356245445276585e-06, "loss": 0.4606, "step": 1916 }, { "epoch": 0.29, "grad_norm": 1.561891794204712, "learning_rate": 8.354441851146552e-06, "loss": 0.5185, "step": 1917 }, { "epoch": 0.29, "grad_norm": 1.1502307653427124, "learning_rate": 8.352637462947173e-06, "loss": 0.4809, "step": 1918 }, { "epoch": 0.29, "grad_norm": 1.3651647567749023, "learning_rate": 8.350832281105584e-06, "loss": 0.4797, "step": 1919 }, { "epoch": 0.29, "grad_norm": 1.1955329179763794, "learning_rate": 8.349026306049113e-06, "loss": 0.4295, "step": 1920 }, { "epoch": 0.29, "grad_norm": 1.1710273027420044, "learning_rate": 8.347219538205269e-06, "loss": 0.6109, "step": 1921 }, { "epoch": 0.29, "grad_norm": 1.2807750701904297, "learning_rate": 8.345411978001757e-06, "loss": 0.4668, "step": 1922 }, { "epoch": 0.29, "grad_norm": 1.2019319534301758, "learning_rate": 8.343603625866464e-06, "loss": 0.3966, "step": 1923 }, { "epoch": 0.29, "grad_norm": 0.9180043935775757, "learning_rate": 8.341794482227467e-06, "loss": 0.4997, "step": 1924 }, { "epoch": 0.29, "grad_norm": 1.079102873802185, "learning_rate": 8.339984547513031e-06, "loss": 0.5135, "step": 1925 }, { "epoch": 0.29, "grad_norm": 1.4213789701461792, "learning_rate": 8.338173822151607e-06, "loss": 0.4391, "step": 1926 }, { "epoch": 0.29, "grad_norm": 2.609724521636963, "learning_rate": 8.33636230657183e-06, "loss": 0.5019, "step": 1927 }, { "epoch": 0.29, "grad_norm": 1.1786974668502808, "learning_rate": 8.334550001202526e-06, "loss": 0.5293, "step": 1928 }, { "epoch": 0.29, "grad_norm": 1.3104584217071533, "learning_rate": 8.33273690647271e-06, "loss": 0.4824, "step": 1929 }, { "epoch": 0.29, "grad_norm": 1.108588457107544, "learning_rate": 8.330923022811576e-06, "loss": 0.4754, "step": 1930 }, { "epoch": 0.29, "grad_norm": 1.8859634399414062, "learning_rate": 8.329108350648516e-06, "loss": 0.499, "step": 1931 }, { "epoch": 0.29, "grad_norm": 1.16884446144104, "learning_rate": 8.327292890413096e-06, "loss": 0.5189, "step": 1932 }, { "epoch": 0.29, "grad_norm": 1.2545523643493652, "learning_rate": 8.32547664253508e-06, "loss": 0.7638, "step": 1933 }, { "epoch": 0.29, "grad_norm": 1.4165433645248413, "learning_rate": 8.32365960744441e-06, "loss": 0.4778, "step": 1934 }, { "epoch": 0.29, "grad_norm": 1.135283350944519, "learning_rate": 8.321841785571221e-06, "loss": 0.4739, "step": 1935 }, { "epoch": 0.29, "grad_norm": 1.974640965461731, "learning_rate": 8.320023177345827e-06, "loss": 0.446, "step": 1936 }, { "epoch": 0.29, "grad_norm": 1.2100675106048584, "learning_rate": 8.318203783198734e-06, "loss": 0.5493, "step": 1937 }, { "epoch": 0.29, "grad_norm": 1.096863865852356, "learning_rate": 8.316383603560634e-06, "loss": 0.4739, "step": 1938 }, { "epoch": 0.29, "grad_norm": 1.4050514698028564, "learning_rate": 8.314562638862401e-06, "loss": 0.5371, "step": 1939 }, { "epoch": 0.29, "grad_norm": 1.0857101678848267, "learning_rate": 8.312740889535096e-06, "loss": 0.535, "step": 1940 }, { "epoch": 0.29, "grad_norm": 1.1772516965866089, "learning_rate": 8.31091835600997e-06, "loss": 0.5147, "step": 1941 }, { "epoch": 0.29, "grad_norm": 1.190107822418213, "learning_rate": 8.309095038718453e-06, "loss": 0.4333, "step": 1942 }, { "epoch": 0.29, "grad_norm": 1.2564631700515747, "learning_rate": 8.307270938092166e-06, "loss": 0.3932, "step": 1943 }, { "epoch": 0.29, "grad_norm": 1.537312388420105, "learning_rate": 8.305446054562912e-06, "loss": 0.4745, "step": 1944 }, { "epoch": 0.29, "grad_norm": 1.2092684507369995, "learning_rate": 8.303620388562681e-06, "loss": 0.5261, "step": 1945 }, { "epoch": 0.29, "grad_norm": 1.3127723932266235, "learning_rate": 8.301793940523648e-06, "loss": 0.509, "step": 1946 }, { "epoch": 0.29, "grad_norm": 1.5921860933303833, "learning_rate": 8.299966710878173e-06, "loss": 0.5275, "step": 1947 }, { "epoch": 0.29, "grad_norm": 1.5688190460205078, "learning_rate": 8.298138700058802e-06, "loss": 0.5336, "step": 1948 }, { "epoch": 0.29, "grad_norm": 1.135748028755188, "learning_rate": 8.296309908498264e-06, "loss": 0.4315, "step": 1949 }, { "epoch": 0.29, "grad_norm": 6.903907775878906, "learning_rate": 8.294480336629474e-06, "loss": 0.4973, "step": 1950 }, { "epoch": 0.29, "grad_norm": 1.1105375289916992, "learning_rate": 8.292649984885528e-06, "loss": 0.4767, "step": 1951 }, { "epoch": 0.29, "grad_norm": 1.2655508518218994, "learning_rate": 8.290818853699716e-06, "loss": 0.4376, "step": 1952 }, { "epoch": 0.29, "grad_norm": 1.015041470527649, "learning_rate": 8.288986943505502e-06, "loss": 0.4573, "step": 1953 }, { "epoch": 0.29, "grad_norm": 1.1466478109359741, "learning_rate": 8.287154254736543e-06, "loss": 0.49, "step": 1954 }, { "epoch": 0.29, "grad_norm": 1.0488026142120361, "learning_rate": 8.285320787826672e-06, "loss": 0.4476, "step": 1955 }, { "epoch": 0.29, "grad_norm": 1.3885747194290161, "learning_rate": 8.283486543209913e-06, "loss": 0.5068, "step": 1956 }, { "epoch": 0.29, "grad_norm": 1.0620408058166504, "learning_rate": 8.281651521320471e-06, "loss": 0.4961, "step": 1957 }, { "epoch": 0.29, "grad_norm": 1.0496207475662231, "learning_rate": 8.279815722592738e-06, "loss": 0.4892, "step": 1958 }, { "epoch": 0.29, "grad_norm": 1.0363785028457642, "learning_rate": 8.27797914746128e-06, "loss": 0.4472, "step": 1959 }, { "epoch": 0.29, "grad_norm": 1.1560288667678833, "learning_rate": 8.276141796360866e-06, "loss": 0.5154, "step": 1960 }, { "epoch": 0.29, "grad_norm": 1.4130613803863525, "learning_rate": 8.274303669726427e-06, "loss": 0.4986, "step": 1961 }, { "epoch": 0.29, "grad_norm": 1.6201586723327637, "learning_rate": 8.27246476799309e-06, "loss": 0.485, "step": 1962 }, { "epoch": 0.29, "grad_norm": 1.5504785776138306, "learning_rate": 8.270625091596164e-06, "loss": 0.4858, "step": 1963 }, { "epoch": 0.3, "grad_norm": 1.1347355842590332, "learning_rate": 8.268784640971143e-06, "loss": 0.5957, "step": 1964 }, { "epoch": 0.3, "grad_norm": 1.3641228675842285, "learning_rate": 8.266943416553698e-06, "loss": 0.4786, "step": 1965 }, { "epoch": 0.3, "grad_norm": 1.3737993240356445, "learning_rate": 8.265101418779688e-06, "loss": 0.5264, "step": 1966 }, { "epoch": 0.3, "grad_norm": 1.2479106187820435, "learning_rate": 8.263258648085155e-06, "loss": 0.4123, "step": 1967 }, { "epoch": 0.3, "grad_norm": 1.1285394430160522, "learning_rate": 8.261415104906321e-06, "loss": 0.5308, "step": 1968 }, { "epoch": 0.3, "grad_norm": 1.5396361351013184, "learning_rate": 8.259570789679593e-06, "loss": 0.5734, "step": 1969 }, { "epoch": 0.3, "grad_norm": 1.2463819980621338, "learning_rate": 8.257725702841562e-06, "loss": 0.4786, "step": 1970 }, { "epoch": 0.3, "grad_norm": 1.1001089811325073, "learning_rate": 8.255879844829e-06, "loss": 0.4073, "step": 1971 }, { "epoch": 0.3, "grad_norm": 1.2612745761871338, "learning_rate": 8.25403321607886e-06, "loss": 0.585, "step": 1972 }, { "epoch": 0.3, "grad_norm": 1.2218334674835205, "learning_rate": 8.25218581702828e-06, "loss": 0.5076, "step": 1973 }, { "epoch": 0.3, "grad_norm": 1.0995893478393555, "learning_rate": 8.25033764811458e-06, "loss": 0.6166, "step": 1974 }, { "epoch": 0.3, "grad_norm": 1.2076212167739868, "learning_rate": 8.248488709775262e-06, "loss": 0.5223, "step": 1975 }, { "epoch": 0.3, "grad_norm": 1.3749693632125854, "learning_rate": 8.24663900244801e-06, "loss": 0.4695, "step": 1976 }, { "epoch": 0.3, "grad_norm": 1.3231396675109863, "learning_rate": 8.24478852657069e-06, "loss": 0.5207, "step": 1977 }, { "epoch": 0.3, "grad_norm": 1.4865174293518066, "learning_rate": 8.242937282581348e-06, "loss": 0.5298, "step": 1978 }, { "epoch": 0.3, "grad_norm": 1.1652485132217407, "learning_rate": 8.241085270918215e-06, "loss": 0.4751, "step": 1979 }, { "epoch": 0.3, "grad_norm": 1.2248640060424805, "learning_rate": 8.239232492019702e-06, "loss": 0.5126, "step": 1980 }, { "epoch": 0.3, "grad_norm": 1.3462159633636475, "learning_rate": 8.237378946324404e-06, "loss": 0.3781, "step": 1981 }, { "epoch": 0.3, "grad_norm": 1.5596877336502075, "learning_rate": 8.235524634271095e-06, "loss": 0.5904, "step": 1982 }, { "epoch": 0.3, "grad_norm": 1.3013298511505127, "learning_rate": 8.23366955629873e-06, "loss": 0.5511, "step": 1983 }, { "epoch": 0.3, "grad_norm": 0.9796875715255737, "learning_rate": 8.231813712846445e-06, "loss": 0.4779, "step": 1984 }, { "epoch": 0.3, "grad_norm": 1.0370749235153198, "learning_rate": 8.22995710435356e-06, "loss": 0.5068, "step": 1985 }, { "epoch": 0.3, "grad_norm": 1.2275376319885254, "learning_rate": 8.228099731259575e-06, "loss": 0.4281, "step": 1986 }, { "epoch": 0.3, "grad_norm": 1.218667984008789, "learning_rate": 8.22624159400417e-06, "loss": 0.4093, "step": 1987 }, { "epoch": 0.3, "grad_norm": 1.2674225568771362, "learning_rate": 8.224382693027207e-06, "loss": 0.4551, "step": 1988 }, { "epoch": 0.3, "grad_norm": 1.2037386894226074, "learning_rate": 8.22252302876873e-06, "loss": 0.7418, "step": 1989 }, { "epoch": 0.3, "grad_norm": 1.297209620475769, "learning_rate": 8.220662601668956e-06, "loss": 0.7763, "step": 1990 }, { "epoch": 0.3, "grad_norm": 1.2291944026947021, "learning_rate": 8.218801412168297e-06, "loss": 0.4719, "step": 1991 }, { "epoch": 0.3, "grad_norm": 1.3401703834533691, "learning_rate": 8.21693946070733e-06, "loss": 0.4874, "step": 1992 }, { "epoch": 0.3, "grad_norm": 1.5138051509857178, "learning_rate": 8.215076747726821e-06, "loss": 0.5606, "step": 1993 }, { "epoch": 0.3, "grad_norm": 1.2484278678894043, "learning_rate": 8.213213273667718e-06, "loss": 0.531, "step": 1994 }, { "epoch": 0.3, "grad_norm": 1.243335247039795, "learning_rate": 8.211349038971142e-06, "loss": 0.5057, "step": 1995 }, { "epoch": 0.3, "grad_norm": 2.6998138427734375, "learning_rate": 8.2094840440784e-06, "loss": 0.5295, "step": 1996 }, { "epoch": 0.3, "grad_norm": 1.0841480493545532, "learning_rate": 8.207618289430975e-06, "loss": 0.7269, "step": 1997 }, { "epoch": 0.3, "grad_norm": 1.1211984157562256, "learning_rate": 8.205751775470535e-06, "loss": 0.5746, "step": 1998 }, { "epoch": 0.3, "grad_norm": 1.1235861778259277, "learning_rate": 8.203884502638921e-06, "loss": 0.5152, "step": 1999 }, { "epoch": 0.3, "grad_norm": 1.3188871145248413, "learning_rate": 8.202016471378161e-06, "loss": 0.5214, "step": 2000 }, { "epoch": 0.3, "grad_norm": 1.13889741897583, "learning_rate": 8.200147682130452e-06, "loss": 0.4957, "step": 2001 }, { "epoch": 0.3, "grad_norm": 1.2605868577957153, "learning_rate": 8.198278135338185e-06, "loss": 0.4674, "step": 2002 }, { "epoch": 0.3, "grad_norm": 1.1909898519515991, "learning_rate": 8.196407831443917e-06, "loss": 0.7533, "step": 2003 }, { "epoch": 0.3, "grad_norm": 1.4430145025253296, "learning_rate": 8.194536770890392e-06, "loss": 0.5178, "step": 2004 }, { "epoch": 0.3, "grad_norm": 1.2613211870193481, "learning_rate": 8.192664954120528e-06, "loss": 0.7722, "step": 2005 }, { "epoch": 0.3, "grad_norm": 1.424066424369812, "learning_rate": 8.190792381577429e-06, "loss": 0.4916, "step": 2006 }, { "epoch": 0.3, "grad_norm": 2.099472761154175, "learning_rate": 8.18891905370437e-06, "loss": 0.4521, "step": 2007 }, { "epoch": 0.3, "grad_norm": 1.1689382791519165, "learning_rate": 8.187044970944811e-06, "loss": 0.4915, "step": 2008 }, { "epoch": 0.3, "grad_norm": 1.7302566766738892, "learning_rate": 8.185170133742386e-06, "loss": 0.5364, "step": 2009 }, { "epoch": 0.3, "grad_norm": 1.588390588760376, "learning_rate": 8.183294542540912e-06, "loss": 0.5495, "step": 2010 }, { "epoch": 0.3, "grad_norm": 1.2839229106903076, "learning_rate": 8.181418197784379e-06, "loss": 0.4921, "step": 2011 }, { "epoch": 0.3, "grad_norm": 1.1171602010726929, "learning_rate": 8.179541099916961e-06, "loss": 0.519, "step": 2012 }, { "epoch": 0.3, "grad_norm": 1.3931806087493896, "learning_rate": 8.177663249383008e-06, "loss": 0.4772, "step": 2013 }, { "epoch": 0.3, "grad_norm": 1.2090240716934204, "learning_rate": 8.175784646627048e-06, "loss": 0.4913, "step": 2014 }, { "epoch": 0.3, "grad_norm": 1.2156192064285278, "learning_rate": 8.173905292093784e-06, "loss": 0.5254, "step": 2015 }, { "epoch": 0.3, "grad_norm": 1.1069704294204712, "learning_rate": 8.172025186228107e-06, "loss": 0.4436, "step": 2016 }, { "epoch": 0.3, "grad_norm": 1.9179656505584717, "learning_rate": 8.170144329475071e-06, "loss": 0.4127, "step": 2017 }, { "epoch": 0.3, "grad_norm": 1.827163815498352, "learning_rate": 8.168262722279921e-06, "loss": 0.488, "step": 2018 }, { "epoch": 0.3, "grad_norm": 3.474024772644043, "learning_rate": 8.166380365088074e-06, "loss": 0.4667, "step": 2019 }, { "epoch": 0.3, "grad_norm": 1.0523464679718018, "learning_rate": 8.164497258345121e-06, "loss": 0.4496, "step": 2020 }, { "epoch": 0.3, "grad_norm": 0.927301824092865, "learning_rate": 8.162613402496839e-06, "loss": 0.4455, "step": 2021 }, { "epoch": 0.3, "grad_norm": 0.8985947966575623, "learning_rate": 8.160728797989175e-06, "loss": 0.4797, "step": 2022 }, { "epoch": 0.3, "grad_norm": 1.1926826238632202, "learning_rate": 8.158843445268256e-06, "loss": 0.4958, "step": 2023 }, { "epoch": 0.3, "grad_norm": 1.2098259925842285, "learning_rate": 8.156957344780386e-06, "loss": 0.4883, "step": 2024 }, { "epoch": 0.3, "grad_norm": 0.9990213513374329, "learning_rate": 8.155070496972049e-06, "loss": 0.4671, "step": 2025 }, { "epoch": 0.3, "grad_norm": 1.102433204650879, "learning_rate": 8.153182902289898e-06, "loss": 0.6047, "step": 2026 }, { "epoch": 0.3, "grad_norm": 0.9220840930938721, "learning_rate": 8.15129456118077e-06, "loss": 0.4521, "step": 2027 }, { "epoch": 0.3, "grad_norm": 0.9543277025222778, "learning_rate": 8.149405474091678e-06, "loss": 0.4507, "step": 2028 }, { "epoch": 0.3, "grad_norm": 1.2449274063110352, "learning_rate": 8.147515641469808e-06, "loss": 0.4809, "step": 2029 }, { "epoch": 0.3, "grad_norm": 1.2774995565414429, "learning_rate": 8.145625063762525e-06, "loss": 0.4271, "step": 2030 }, { "epoch": 0.31, "grad_norm": 1.6342476606369019, "learning_rate": 8.14373374141737e-06, "loss": 0.4453, "step": 2031 }, { "epoch": 0.31, "grad_norm": 1.3144367933273315, "learning_rate": 8.14184167488206e-06, "loss": 0.5272, "step": 2032 }, { "epoch": 0.31, "grad_norm": 1.1556551456451416, "learning_rate": 8.139948864604488e-06, "loss": 0.5636, "step": 2033 }, { "epoch": 0.31, "grad_norm": 1.2491849660873413, "learning_rate": 8.138055311032724e-06, "loss": 0.5708, "step": 2034 }, { "epoch": 0.31, "grad_norm": 0.9644768238067627, "learning_rate": 8.136161014615013e-06, "loss": 0.5298, "step": 2035 }, { "epoch": 0.31, "grad_norm": 1.666027307510376, "learning_rate": 8.134265975799775e-06, "loss": 0.4759, "step": 2036 }, { "epoch": 0.31, "grad_norm": 1.203344702720642, "learning_rate": 8.13237019503561e-06, "loss": 0.4501, "step": 2037 }, { "epoch": 0.31, "grad_norm": 1.1133407354354858, "learning_rate": 8.13047367277129e-06, "loss": 0.4824, "step": 2038 }, { "epoch": 0.31, "grad_norm": 1.1694674491882324, "learning_rate": 8.128576409455759e-06, "loss": 0.5623, "step": 2039 }, { "epoch": 0.31, "grad_norm": 0.9835247993469238, "learning_rate": 8.126678405538143e-06, "loss": 0.4928, "step": 2040 }, { "epoch": 0.31, "grad_norm": 1.141662836074829, "learning_rate": 8.12477966146774e-06, "loss": 0.4402, "step": 2041 }, { "epoch": 0.31, "grad_norm": 1.1275469064712524, "learning_rate": 8.122880177694028e-06, "loss": 0.4873, "step": 2042 }, { "epoch": 0.31, "grad_norm": 1.0972232818603516, "learning_rate": 8.120979954666652e-06, "loss": 0.529, "step": 2043 }, { "epoch": 0.31, "grad_norm": 1.115716576576233, "learning_rate": 8.119078992835439e-06, "loss": 0.5495, "step": 2044 }, { "epoch": 0.31, "grad_norm": 0.9632494449615479, "learning_rate": 8.117177292650384e-06, "loss": 0.4597, "step": 2045 }, { "epoch": 0.31, "grad_norm": 1.2525250911712646, "learning_rate": 8.115274854561663e-06, "loss": 0.5372, "step": 2046 }, { "epoch": 0.31, "grad_norm": 1.444968819618225, "learning_rate": 8.113371679019625e-06, "loss": 0.5026, "step": 2047 }, { "epoch": 0.31, "grad_norm": 1.352868914604187, "learning_rate": 8.111467766474793e-06, "loss": 0.5152, "step": 2048 }, { "epoch": 0.31, "grad_norm": 1.2257227897644043, "learning_rate": 8.109563117377865e-06, "loss": 0.4106, "step": 2049 }, { "epoch": 0.31, "grad_norm": 1.4239375591278076, "learning_rate": 8.10765773217971e-06, "loss": 0.4377, "step": 2050 }, { "epoch": 0.31, "grad_norm": 0.9464716911315918, "learning_rate": 8.105751611331377e-06, "loss": 0.5037, "step": 2051 }, { "epoch": 0.31, "grad_norm": 1.2867170572280884, "learning_rate": 8.103844755284086e-06, "loss": 0.5031, "step": 2052 }, { "epoch": 0.31, "grad_norm": 1.6044527292251587, "learning_rate": 8.10193716448923e-06, "loss": 0.5349, "step": 2053 }, { "epoch": 0.31, "grad_norm": 1.1767786741256714, "learning_rate": 8.100028839398377e-06, "loss": 0.4551, "step": 2054 }, { "epoch": 0.31, "grad_norm": 1.510029911994934, "learning_rate": 8.098119780463271e-06, "loss": 0.5114, "step": 2055 }, { "epoch": 0.31, "grad_norm": 1.069268822669983, "learning_rate": 8.096209988135828e-06, "loss": 0.4903, "step": 2056 }, { "epoch": 0.31, "grad_norm": 1.0878169536590576, "learning_rate": 8.094299462868134e-06, "loss": 0.4271, "step": 2057 }, { "epoch": 0.31, "grad_norm": 0.9312405586242676, "learning_rate": 8.092388205112455e-06, "loss": 0.4553, "step": 2058 }, { "epoch": 0.31, "grad_norm": 1.3616746664047241, "learning_rate": 8.090476215321226e-06, "loss": 0.526, "step": 2059 }, { "epoch": 0.31, "grad_norm": 1.2547234296798706, "learning_rate": 8.088563493947058e-06, "loss": 0.4836, "step": 2060 }, { "epoch": 0.31, "grad_norm": 1.2528437376022339, "learning_rate": 8.08665004144273e-06, "loss": 0.7502, "step": 2061 }, { "epoch": 0.31, "grad_norm": 1.171919345855713, "learning_rate": 8.0847358582612e-06, "loss": 0.4756, "step": 2062 }, { "epoch": 0.31, "grad_norm": 1.236702561378479, "learning_rate": 8.082820944855601e-06, "loss": 0.5488, "step": 2063 }, { "epoch": 0.31, "grad_norm": 1.115695834159851, "learning_rate": 8.080905301679229e-06, "loss": 0.4556, "step": 2064 }, { "epoch": 0.31, "grad_norm": 1.2175546884536743, "learning_rate": 8.07898892918556e-06, "loss": 0.5684, "step": 2065 }, { "epoch": 0.31, "grad_norm": 1.3064260482788086, "learning_rate": 8.077071827828242e-06, "loss": 0.4806, "step": 2066 }, { "epoch": 0.31, "grad_norm": 1.2762062549591064, "learning_rate": 8.075153998061094e-06, "loss": 0.4916, "step": 2067 }, { "epoch": 0.31, "grad_norm": 1.0802243947982788, "learning_rate": 8.073235440338107e-06, "loss": 0.4595, "step": 2068 }, { "epoch": 0.31, "grad_norm": 1.3013075590133667, "learning_rate": 8.071316155113446e-06, "loss": 0.5146, "step": 2069 }, { "epoch": 0.31, "grad_norm": 1.374537467956543, "learning_rate": 8.069396142841451e-06, "loss": 0.5154, "step": 2070 }, { "epoch": 0.31, "grad_norm": 0.9903571009635925, "learning_rate": 8.067475403976625e-06, "loss": 0.4853, "step": 2071 }, { "epoch": 0.31, "grad_norm": 1.3156918287277222, "learning_rate": 8.065553938973652e-06, "loss": 0.4832, "step": 2072 }, { "epoch": 0.31, "grad_norm": 1.020645260810852, "learning_rate": 8.063631748287381e-06, "loss": 0.5421, "step": 2073 }, { "epoch": 0.31, "grad_norm": 1.1490780115127563, "learning_rate": 8.061708832372841e-06, "loss": 0.4307, "step": 2074 }, { "epoch": 0.31, "grad_norm": 1.022618293762207, "learning_rate": 8.059785191685227e-06, "loss": 0.4546, "step": 2075 }, { "epoch": 0.31, "grad_norm": 1.2480998039245605, "learning_rate": 8.057860826679906e-06, "loss": 0.476, "step": 2076 }, { "epoch": 0.31, "grad_norm": 1.0840753316879272, "learning_rate": 8.055935737812415e-06, "loss": 0.4371, "step": 2077 }, { "epoch": 0.31, "grad_norm": 1.0213220119476318, "learning_rate": 8.054009925538467e-06, "loss": 0.5193, "step": 2078 }, { "epoch": 0.31, "grad_norm": 0.8913905620574951, "learning_rate": 8.052083390313943e-06, "loss": 0.4438, "step": 2079 }, { "epoch": 0.31, "grad_norm": 1.4895384311676025, "learning_rate": 8.050156132594896e-06, "loss": 0.5652, "step": 2080 }, { "epoch": 0.31, "grad_norm": 1.1531882286071777, "learning_rate": 8.048228152837548e-06, "loss": 0.673, "step": 2081 }, { "epoch": 0.31, "grad_norm": 1.5834559202194214, "learning_rate": 8.046299451498297e-06, "loss": 0.5335, "step": 2082 }, { "epoch": 0.31, "grad_norm": 1.1809049844741821, "learning_rate": 8.044370029033706e-06, "loss": 0.5483, "step": 2083 }, { "epoch": 0.31, "grad_norm": 1.12418532371521, "learning_rate": 8.04243988590051e-06, "loss": 0.4372, "step": 2084 }, { "epoch": 0.31, "grad_norm": 1.1342620849609375, "learning_rate": 8.04050902255562e-06, "loss": 0.5819, "step": 2085 }, { "epoch": 0.31, "grad_norm": 1.0337637662887573, "learning_rate": 8.03857743945611e-06, "loss": 0.5414, "step": 2086 }, { "epoch": 0.31, "grad_norm": 2.6836044788360596, "learning_rate": 8.03664513705923e-06, "loss": 0.5336, "step": 2087 }, { "epoch": 0.31, "grad_norm": 1.3142539262771606, "learning_rate": 8.034712115822396e-06, "loss": 0.5321, "step": 2088 }, { "epoch": 0.31, "grad_norm": 2.035740375518799, "learning_rate": 8.032778376203199e-06, "loss": 0.4638, "step": 2089 }, { "epoch": 0.31, "grad_norm": 1.1390225887298584, "learning_rate": 8.030843918659394e-06, "loss": 0.5515, "step": 2090 }, { "epoch": 0.31, "grad_norm": 1.1451482772827148, "learning_rate": 8.02890874364891e-06, "loss": 0.5005, "step": 2091 }, { "epoch": 0.31, "grad_norm": 0.9907151460647583, "learning_rate": 8.026972851629846e-06, "loss": 0.5136, "step": 2092 }, { "epoch": 0.31, "grad_norm": 1.1745476722717285, "learning_rate": 8.025036243060469e-06, "loss": 0.5133, "step": 2093 }, { "epoch": 0.31, "grad_norm": 1.2070096731185913, "learning_rate": 8.023098918399218e-06, "loss": 0.5583, "step": 2094 }, { "epoch": 0.31, "grad_norm": 3.1691508293151855, "learning_rate": 8.021160878104697e-06, "loss": 0.4951, "step": 2095 }, { "epoch": 0.31, "grad_norm": 1.1442681550979614, "learning_rate": 8.019222122635684e-06, "loss": 0.4845, "step": 2096 }, { "epoch": 0.32, "grad_norm": 1.2114425897598267, "learning_rate": 8.017282652451127e-06, "loss": 0.7276, "step": 2097 }, { "epoch": 0.32, "grad_norm": 0.9875019788742065, "learning_rate": 8.015342468010134e-06, "loss": 0.4733, "step": 2098 }, { "epoch": 0.32, "grad_norm": 1.0987014770507812, "learning_rate": 8.013401569771995e-06, "loss": 0.7191, "step": 2099 }, { "epoch": 0.32, "grad_norm": 1.1527906656265259, "learning_rate": 8.011459958196161e-06, "loss": 0.5405, "step": 2100 }, { "epoch": 0.32, "grad_norm": 1.0302931070327759, "learning_rate": 8.009517633742255e-06, "loss": 0.4408, "step": 2101 }, { "epoch": 0.32, "grad_norm": 1.442868709564209, "learning_rate": 8.007574596870062e-06, "loss": 0.5379, "step": 2102 }, { "epoch": 0.32, "grad_norm": 1.1473788022994995, "learning_rate": 8.00563084803955e-06, "loss": 0.4983, "step": 2103 }, { "epoch": 0.32, "grad_norm": 1.8804830312728882, "learning_rate": 8.003686387710839e-06, "loss": 0.4919, "step": 2104 }, { "epoch": 0.32, "grad_norm": 1.2875146865844727, "learning_rate": 8.001741216344228e-06, "loss": 0.48, "step": 2105 }, { "epoch": 0.32, "grad_norm": 1.3805054426193237, "learning_rate": 7.999795334400182e-06, "loss": 0.5131, "step": 2106 }, { "epoch": 0.32, "grad_norm": 1.3824795484542847, "learning_rate": 7.99784874233933e-06, "loss": 0.4934, "step": 2107 }, { "epoch": 0.32, "grad_norm": 1.0566229820251465, "learning_rate": 7.995901440622477e-06, "loss": 0.467, "step": 2108 }, { "epoch": 0.32, "grad_norm": 1.561585545539856, "learning_rate": 7.993953429710593e-06, "loss": 0.5445, "step": 2109 }, { "epoch": 0.32, "grad_norm": 0.9966574311256409, "learning_rate": 7.99200471006481e-06, "loss": 0.5054, "step": 2110 }, { "epoch": 0.32, "grad_norm": 1.1356797218322754, "learning_rate": 7.990055282146431e-06, "loss": 0.4496, "step": 2111 }, { "epoch": 0.32, "grad_norm": 1.1753194332122803, "learning_rate": 7.988105146416934e-06, "loss": 0.4899, "step": 2112 }, { "epoch": 0.32, "grad_norm": 1.0233427286148071, "learning_rate": 7.986154303337954e-06, "loss": 0.5456, "step": 2113 }, { "epoch": 0.32, "grad_norm": 1.0685334205627441, "learning_rate": 7.9842027533713e-06, "loss": 0.4637, "step": 2114 }, { "epoch": 0.32, "grad_norm": 5.330377101898193, "learning_rate": 7.982250496978948e-06, "loss": 0.4814, "step": 2115 }, { "epoch": 0.32, "grad_norm": 1.2139972448349, "learning_rate": 7.980297534623034e-06, "loss": 0.4795, "step": 2116 }, { "epoch": 0.32, "grad_norm": 1.2589304447174072, "learning_rate": 7.978343866765871e-06, "loss": 0.4819, "step": 2117 }, { "epoch": 0.32, "grad_norm": 1.298529028892517, "learning_rate": 7.976389493869934e-06, "loss": 0.5254, "step": 2118 }, { "epoch": 0.32, "grad_norm": 1.256259560585022, "learning_rate": 7.974434416397866e-06, "loss": 0.5052, "step": 2119 }, { "epoch": 0.32, "grad_norm": 1.1280410289764404, "learning_rate": 7.972478634812475e-06, "loss": 0.4747, "step": 2120 }, { "epoch": 0.32, "grad_norm": 1.1959552764892578, "learning_rate": 7.970522149576738e-06, "loss": 0.4265, "step": 2121 }, { "epoch": 0.32, "grad_norm": 1.210274338722229, "learning_rate": 7.968564961153796e-06, "loss": 0.4905, "step": 2122 }, { "epoch": 0.32, "grad_norm": 1.1900238990783691, "learning_rate": 7.966607070006961e-06, "loss": 0.5336, "step": 2123 }, { "epoch": 0.32, "grad_norm": 1.0472794771194458, "learning_rate": 7.964648476599706e-06, "loss": 0.4809, "step": 2124 }, { "epoch": 0.32, "grad_norm": 1.6986685991287231, "learning_rate": 7.962689181395672e-06, "loss": 0.4875, "step": 2125 }, { "epoch": 0.32, "grad_norm": 1.175195336341858, "learning_rate": 7.96072918485867e-06, "loss": 0.506, "step": 2126 }, { "epoch": 0.32, "grad_norm": 1.0602697134017944, "learning_rate": 7.958768487452674e-06, "loss": 0.4628, "step": 2127 }, { "epoch": 0.32, "grad_norm": 1.3154258728027344, "learning_rate": 7.95680708964182e-06, "loss": 0.4641, "step": 2128 }, { "epoch": 0.32, "grad_norm": 0.9934854507446289, "learning_rate": 7.954844991890414e-06, "loss": 0.4385, "step": 2129 }, { "epoch": 0.32, "grad_norm": 1.163089394569397, "learning_rate": 7.95288219466293e-06, "loss": 0.4287, "step": 2130 }, { "epoch": 0.32, "grad_norm": 1.4262349605560303, "learning_rate": 7.950918698424001e-06, "loss": 0.5537, "step": 2131 }, { "epoch": 0.32, "grad_norm": 1.3569245338439941, "learning_rate": 7.948954503638434e-06, "loss": 0.5064, "step": 2132 }, { "epoch": 0.32, "grad_norm": 0.981486976146698, "learning_rate": 7.946989610771194e-06, "loss": 0.5616, "step": 2133 }, { "epoch": 0.32, "grad_norm": 1.1674177646636963, "learning_rate": 7.945024020287415e-06, "loss": 0.4781, "step": 2134 }, { "epoch": 0.32, "grad_norm": 1.1776032447814941, "learning_rate": 7.94305773265239e-06, "loss": 0.405, "step": 2135 }, { "epoch": 0.32, "grad_norm": 1.195563793182373, "learning_rate": 7.94109074833159e-06, "loss": 0.5004, "step": 2136 }, { "epoch": 0.32, "grad_norm": 1.298108696937561, "learning_rate": 7.939123067790638e-06, "loss": 0.4951, "step": 2137 }, { "epoch": 0.32, "grad_norm": 0.9715425372123718, "learning_rate": 7.937154691495328e-06, "loss": 0.4578, "step": 2138 }, { "epoch": 0.32, "grad_norm": 1.336392879486084, "learning_rate": 7.935185619911619e-06, "loss": 0.4879, "step": 2139 }, { "epoch": 0.32, "grad_norm": 2.1268808841705322, "learning_rate": 7.933215853505627e-06, "loss": 0.5792, "step": 2140 }, { "epoch": 0.32, "grad_norm": 1.3532581329345703, "learning_rate": 7.931245392743647e-06, "loss": 0.4884, "step": 2141 }, { "epoch": 0.32, "grad_norm": 1.308308482170105, "learning_rate": 7.929274238092125e-06, "loss": 0.537, "step": 2142 }, { "epoch": 0.32, "grad_norm": 1.3148084878921509, "learning_rate": 7.927302390017677e-06, "loss": 0.4958, "step": 2143 }, { "epoch": 0.32, "grad_norm": 1.8922958374023438, "learning_rate": 7.925329848987082e-06, "loss": 0.5444, "step": 2144 }, { "epoch": 0.32, "grad_norm": 1.16143798828125, "learning_rate": 7.923356615467284e-06, "loss": 0.7253, "step": 2145 }, { "epoch": 0.32, "grad_norm": 14.530494689941406, "learning_rate": 7.92138268992539e-06, "loss": 0.4342, "step": 2146 }, { "epoch": 0.32, "grad_norm": 1.3731937408447266, "learning_rate": 7.919408072828671e-06, "loss": 0.5578, "step": 2147 }, { "epoch": 0.32, "grad_norm": 1.3664823770523071, "learning_rate": 7.917432764644564e-06, "loss": 0.44, "step": 2148 }, { "epoch": 0.32, "grad_norm": 1.161461591720581, "learning_rate": 7.915456765840664e-06, "loss": 0.4579, "step": 2149 }, { "epoch": 0.32, "grad_norm": 1.3094139099121094, "learning_rate": 7.913480076884733e-06, "loss": 0.4058, "step": 2150 }, { "epoch": 0.32, "grad_norm": 1.2091833353042603, "learning_rate": 7.9115026982447e-06, "loss": 0.491, "step": 2151 }, { "epoch": 0.32, "grad_norm": 1.2222604751586914, "learning_rate": 7.90952463038865e-06, "loss": 0.4788, "step": 2152 }, { "epoch": 0.32, "grad_norm": 1.0304089784622192, "learning_rate": 7.907545873784836e-06, "loss": 0.4464, "step": 2153 }, { "epoch": 0.32, "grad_norm": 1.3110706806182861, "learning_rate": 7.905566428901672e-06, "loss": 0.4966, "step": 2154 }, { "epoch": 0.32, "grad_norm": 1.2206004858016968, "learning_rate": 7.903586296207737e-06, "loss": 0.5468, "step": 2155 }, { "epoch": 0.32, "grad_norm": 1.1762847900390625, "learning_rate": 7.90160547617177e-06, "loss": 0.4954, "step": 2156 }, { "epoch": 0.32, "grad_norm": 1.4130455255508423, "learning_rate": 7.899623969262676e-06, "loss": 0.5095, "step": 2157 }, { "epoch": 0.32, "grad_norm": 0.9625163674354553, "learning_rate": 7.897641775949518e-06, "loss": 0.4442, "step": 2158 }, { "epoch": 0.32, "grad_norm": 1.1521462202072144, "learning_rate": 7.895658896701526e-06, "loss": 0.4909, "step": 2159 }, { "epoch": 0.32, "grad_norm": 1.2206379175186157, "learning_rate": 7.893675331988093e-06, "loss": 0.4818, "step": 2160 }, { "epoch": 0.32, "grad_norm": 0.9184541702270508, "learning_rate": 7.891691082278768e-06, "loss": 0.4291, "step": 2161 }, { "epoch": 0.32, "grad_norm": 1.3819621801376343, "learning_rate": 7.889706148043266e-06, "loss": 0.4408, "step": 2162 }, { "epoch": 0.32, "grad_norm": 1.0412794351577759, "learning_rate": 7.887720529751467e-06, "loss": 0.5057, "step": 2163 }, { "epoch": 0.33, "grad_norm": 1.0557228326797485, "learning_rate": 7.885734227873409e-06, "loss": 0.378, "step": 2164 }, { "epoch": 0.33, "grad_norm": 0.9863585233688354, "learning_rate": 7.883747242879292e-06, "loss": 0.4056, "step": 2165 }, { "epoch": 0.33, "grad_norm": 1.209446907043457, "learning_rate": 7.881759575239481e-06, "loss": 0.6954, "step": 2166 }, { "epoch": 0.33, "grad_norm": 1.1340231895446777, "learning_rate": 7.879771225424496e-06, "loss": 0.4605, "step": 2167 }, { "epoch": 0.33, "grad_norm": 1.0566279888153076, "learning_rate": 7.877782193905025e-06, "loss": 0.3266, "step": 2168 }, { "epoch": 0.33, "grad_norm": 1.5567615032196045, "learning_rate": 7.875792481151916e-06, "loss": 0.4661, "step": 2169 }, { "epoch": 0.33, "grad_norm": 1.4272849559783936, "learning_rate": 7.873802087636175e-06, "loss": 0.573, "step": 2170 }, { "epoch": 0.33, "grad_norm": 0.9611587524414062, "learning_rate": 7.871811013828973e-06, "loss": 0.4815, "step": 2171 }, { "epoch": 0.33, "grad_norm": 0.929174542427063, "learning_rate": 7.869819260201643e-06, "loss": 0.5579, "step": 2172 }, { "epoch": 0.33, "grad_norm": 3.3906450271606445, "learning_rate": 7.86782682722567e-06, "loss": 0.4263, "step": 2173 }, { "epoch": 0.33, "grad_norm": 1.8235715627670288, "learning_rate": 7.865833715372711e-06, "loss": 0.5087, "step": 2174 }, { "epoch": 0.33, "grad_norm": 1.1825190782546997, "learning_rate": 7.863839925114578e-06, "loss": 0.5598, "step": 2175 }, { "epoch": 0.33, "grad_norm": 1.0850032567977905, "learning_rate": 7.861845456923244e-06, "loss": 0.5027, "step": 2176 }, { "epoch": 0.33, "grad_norm": 1.4379758834838867, "learning_rate": 7.859850311270844e-06, "loss": 0.5321, "step": 2177 }, { "epoch": 0.33, "grad_norm": 1.434238076210022, "learning_rate": 7.85785448862967e-06, "loss": 0.4733, "step": 2178 }, { "epoch": 0.33, "grad_norm": 1.2935452461242676, "learning_rate": 7.85585798947218e-06, "loss": 0.4662, "step": 2179 }, { "epoch": 0.33, "grad_norm": 1.1454720497131348, "learning_rate": 7.853860814270987e-06, "loss": 0.4978, "step": 2180 }, { "epoch": 0.33, "grad_norm": 1.1206538677215576, "learning_rate": 7.851862963498864e-06, "loss": 0.7277, "step": 2181 }, { "epoch": 0.33, "grad_norm": 0.9836922883987427, "learning_rate": 7.84986443762875e-06, "loss": 0.5018, "step": 2182 }, { "epoch": 0.33, "grad_norm": 1.1256648302078247, "learning_rate": 7.847865237133735e-06, "loss": 0.4764, "step": 2183 }, { "epoch": 0.33, "grad_norm": 0.9991900324821472, "learning_rate": 7.845865362487077e-06, "loss": 0.5655, "step": 2184 }, { "epoch": 0.33, "grad_norm": 1.748929738998413, "learning_rate": 7.843864814162185e-06, "loss": 0.4788, "step": 2185 }, { "epoch": 0.33, "grad_norm": 1.2661168575286865, "learning_rate": 7.84186359263264e-06, "loss": 0.4659, "step": 2186 }, { "epoch": 0.33, "grad_norm": 1.009270191192627, "learning_rate": 7.839861698372166e-06, "loss": 0.4491, "step": 2187 }, { "epoch": 0.33, "grad_norm": 1.288762092590332, "learning_rate": 7.837859131854662e-06, "loss": 0.5808, "step": 2188 }, { "epoch": 0.33, "grad_norm": 1.1171178817749023, "learning_rate": 7.835855893554174e-06, "loss": 0.5209, "step": 2189 }, { "epoch": 0.33, "grad_norm": 1.0718928575515747, "learning_rate": 7.833851983944916e-06, "loss": 0.7357, "step": 2190 }, { "epoch": 0.33, "grad_norm": 1.1815677881240845, "learning_rate": 7.831847403501253e-06, "loss": 0.4564, "step": 2191 }, { "epoch": 0.33, "grad_norm": 1.0319461822509766, "learning_rate": 7.829842152697717e-06, "loss": 0.5659, "step": 2192 }, { "epoch": 0.33, "grad_norm": 1.4734992980957031, "learning_rate": 7.827836232008992e-06, "loss": 0.4751, "step": 2193 }, { "epoch": 0.33, "grad_norm": 1.5290164947509766, "learning_rate": 7.825829641909923e-06, "loss": 0.486, "step": 2194 }, { "epoch": 0.33, "grad_norm": 1.3249040842056274, "learning_rate": 7.823822382875514e-06, "loss": 0.4432, "step": 2195 }, { "epoch": 0.33, "grad_norm": 1.249887228012085, "learning_rate": 7.821814455380927e-06, "loss": 0.545, "step": 2196 }, { "epoch": 0.33, "grad_norm": 1.0664182901382446, "learning_rate": 7.819805859901483e-06, "loss": 0.5005, "step": 2197 }, { "epoch": 0.33, "grad_norm": 1.5329055786132812, "learning_rate": 7.817796596912658e-06, "loss": 0.4651, "step": 2198 }, { "epoch": 0.33, "grad_norm": 1.237580418586731, "learning_rate": 7.81578666689009e-06, "loss": 0.4688, "step": 2199 }, { "epoch": 0.33, "grad_norm": 1.2695503234863281, "learning_rate": 7.813776070309573e-06, "loss": 0.4802, "step": 2200 }, { "epoch": 0.33, "grad_norm": 0.9284994006156921, "learning_rate": 7.811764807647059e-06, "loss": 0.4008, "step": 2201 }, { "epoch": 0.33, "grad_norm": 1.1379517316818237, "learning_rate": 7.809752879378655e-06, "loss": 0.5092, "step": 2202 }, { "epoch": 0.33, "grad_norm": 1.165494441986084, "learning_rate": 7.807740285980633e-06, "loss": 0.5142, "step": 2203 }, { "epoch": 0.33, "grad_norm": 1.3199994564056396, "learning_rate": 7.805727027929413e-06, "loss": 0.5617, "step": 2204 }, { "epoch": 0.33, "grad_norm": 0.9599787592887878, "learning_rate": 7.80371310570158e-06, "loss": 0.4746, "step": 2205 }, { "epoch": 0.33, "grad_norm": 1.1813738346099854, "learning_rate": 7.801698519773872e-06, "loss": 0.5487, "step": 2206 }, { "epoch": 0.33, "grad_norm": 1.1649616956710815, "learning_rate": 7.799683270623186e-06, "loss": 0.6057, "step": 2207 }, { "epoch": 0.33, "grad_norm": 1.125325083732605, "learning_rate": 7.797667358726576e-06, "loss": 0.5048, "step": 2208 }, { "epoch": 0.33, "grad_norm": 1.1859546899795532, "learning_rate": 7.795650784561249e-06, "loss": 0.4402, "step": 2209 }, { "epoch": 0.33, "grad_norm": 1.3071953058242798, "learning_rate": 7.793633548604573e-06, "loss": 0.4946, "step": 2210 }, { "epoch": 0.33, "grad_norm": 1.038037896156311, "learning_rate": 7.791615651334075e-06, "loss": 0.5111, "step": 2211 }, { "epoch": 0.33, "grad_norm": 1.3134137392044067, "learning_rate": 7.789597093227432e-06, "loss": 0.4861, "step": 2212 }, { "epoch": 0.33, "grad_norm": 1.0159581899642944, "learning_rate": 7.78757787476248e-06, "loss": 0.5533, "step": 2213 }, { "epoch": 0.33, "grad_norm": 1.2850741147994995, "learning_rate": 7.785557996417214e-06, "loss": 0.4993, "step": 2214 }, { "epoch": 0.33, "grad_norm": 1.1775296926498413, "learning_rate": 7.783537458669783e-06, "loss": 0.4898, "step": 2215 }, { "epoch": 0.33, "grad_norm": 1.1524779796600342, "learning_rate": 7.781516261998492e-06, "loss": 0.5201, "step": 2216 }, { "epoch": 0.33, "grad_norm": 1.0705476999282837, "learning_rate": 7.779494406881801e-06, "loss": 0.5508, "step": 2217 }, { "epoch": 0.33, "grad_norm": 1.1263540983200073, "learning_rate": 7.777471893798328e-06, "loss": 0.5743, "step": 2218 }, { "epoch": 0.33, "grad_norm": 1.279028296470642, "learning_rate": 7.775448723226846e-06, "loss": 0.436, "step": 2219 }, { "epoch": 0.33, "grad_norm": 1.2280296087265015, "learning_rate": 7.773424895646284e-06, "loss": 0.7223, "step": 2220 }, { "epoch": 0.33, "grad_norm": 1.0446377992630005, "learning_rate": 7.771400411535726e-06, "loss": 0.4623, "step": 2221 }, { "epoch": 0.33, "grad_norm": 1.0466601848602295, "learning_rate": 7.769375271374408e-06, "loss": 0.5052, "step": 2222 }, { "epoch": 0.33, "grad_norm": 1.3218660354614258, "learning_rate": 7.76734947564173e-06, "loss": 0.5796, "step": 2223 }, { "epoch": 0.33, "grad_norm": 1.1179909706115723, "learning_rate": 7.765323024817237e-06, "loss": 0.4901, "step": 2224 }, { "epoch": 0.33, "grad_norm": 1.085776925086975, "learning_rate": 7.763295919380637e-06, "loss": 0.5403, "step": 2225 }, { "epoch": 0.33, "grad_norm": 1.0542304515838623, "learning_rate": 7.761268159811792e-06, "loss": 0.4998, "step": 2226 }, { "epoch": 0.33, "grad_norm": 1.2892329692840576, "learning_rate": 7.759239746590712e-06, "loss": 0.5387, "step": 2227 }, { "epoch": 0.33, "grad_norm": 1.162429928779602, "learning_rate": 7.757210680197569e-06, "loss": 0.4395, "step": 2228 }, { "epoch": 0.33, "grad_norm": 0.9679303169250488, "learning_rate": 7.755180961112686e-06, "loss": 0.4638, "step": 2229 }, { "epoch": 0.33, "grad_norm": 1.1792943477630615, "learning_rate": 7.753150589816543e-06, "loss": 0.4993, "step": 2230 }, { "epoch": 0.34, "grad_norm": 1.8477814197540283, "learning_rate": 7.751119566789772e-06, "loss": 0.7687, "step": 2231 }, { "epoch": 0.34, "grad_norm": 1.2205191850662231, "learning_rate": 7.749087892513161e-06, "loss": 0.5074, "step": 2232 }, { "epoch": 0.34, "grad_norm": 1.0045220851898193, "learning_rate": 7.747055567467652e-06, "loss": 0.5226, "step": 2233 }, { "epoch": 0.34, "grad_norm": 1.2944978475570679, "learning_rate": 7.74502259213434e-06, "loss": 0.5322, "step": 2234 }, { "epoch": 0.34, "grad_norm": 1.4726482629776, "learning_rate": 7.742988966994474e-06, "loss": 0.4649, "step": 2235 }, { "epoch": 0.34, "grad_norm": 1.174717903137207, "learning_rate": 7.740954692529457e-06, "loss": 0.4586, "step": 2236 }, { "epoch": 0.34, "grad_norm": 1.3838640451431274, "learning_rate": 7.738919769220847e-06, "loss": 0.5052, "step": 2237 }, { "epoch": 0.34, "grad_norm": 1.141700029373169, "learning_rate": 7.736884197550353e-06, "loss": 0.4307, "step": 2238 }, { "epoch": 0.34, "grad_norm": 0.9732778072357178, "learning_rate": 7.734847977999843e-06, "loss": 0.5878, "step": 2239 }, { "epoch": 0.34, "grad_norm": 1.1884546279907227, "learning_rate": 7.732811111051329e-06, "loss": 0.5845, "step": 2240 }, { "epoch": 0.34, "grad_norm": 1.2718251943588257, "learning_rate": 7.730773597186986e-06, "loss": 0.6009, "step": 2241 }, { "epoch": 0.34, "grad_norm": 1.1417548656463623, "learning_rate": 7.728735436889137e-06, "loss": 0.5156, "step": 2242 }, { "epoch": 0.34, "grad_norm": 1.2829365730285645, "learning_rate": 7.726696630640255e-06, "loss": 0.5272, "step": 2243 }, { "epoch": 0.34, "grad_norm": 1.2968498468399048, "learning_rate": 7.724657178922976e-06, "loss": 0.7753, "step": 2244 }, { "epoch": 0.34, "grad_norm": 1.2758896350860596, "learning_rate": 7.722617082220081e-06, "loss": 0.7191, "step": 2245 }, { "epoch": 0.34, "grad_norm": 1.1681660413742065, "learning_rate": 7.7205763410145e-06, "loss": 0.5409, "step": 2246 }, { "epoch": 0.34, "grad_norm": 1.176934003829956, "learning_rate": 7.718534955789327e-06, "loss": 0.5273, "step": 2247 }, { "epoch": 0.34, "grad_norm": 2.1676130294799805, "learning_rate": 7.716492927027799e-06, "loss": 0.5061, "step": 2248 }, { "epoch": 0.34, "grad_norm": 1.420136570930481, "learning_rate": 7.714450255213311e-06, "loss": 0.5233, "step": 2249 }, { "epoch": 0.34, "grad_norm": 1.3646793365478516, "learning_rate": 7.712406940829404e-06, "loss": 0.5695, "step": 2250 }, { "epoch": 0.34, "grad_norm": 1.2433483600616455, "learning_rate": 7.710362984359781e-06, "loss": 0.4638, "step": 2251 }, { "epoch": 0.34, "grad_norm": 1.6944663524627686, "learning_rate": 7.708318386288283e-06, "loss": 0.4733, "step": 2252 }, { "epoch": 0.34, "grad_norm": 1.1279878616333008, "learning_rate": 7.706273147098917e-06, "loss": 0.4848, "step": 2253 }, { "epoch": 0.34, "grad_norm": 1.2804617881774902, "learning_rate": 7.704227267275834e-06, "loss": 0.468, "step": 2254 }, { "epoch": 0.34, "grad_norm": 1.4686388969421387, "learning_rate": 7.70218074730334e-06, "loss": 0.4479, "step": 2255 }, { "epoch": 0.34, "grad_norm": 1.5951213836669922, "learning_rate": 7.700133587665885e-06, "loss": 0.4506, "step": 2256 }, { "epoch": 0.34, "grad_norm": 1.2267330884933472, "learning_rate": 7.698085788848083e-06, "loss": 0.5296, "step": 2257 }, { "epoch": 0.34, "grad_norm": 1.2052555084228516, "learning_rate": 7.696037351334689e-06, "loss": 0.698, "step": 2258 }, { "epoch": 0.34, "grad_norm": 1.265661358833313, "learning_rate": 7.693988275610613e-06, "loss": 0.5089, "step": 2259 }, { "epoch": 0.34, "grad_norm": 1.1408991813659668, "learning_rate": 7.691938562160916e-06, "loss": 0.4558, "step": 2260 }, { "epoch": 0.34, "grad_norm": 1.0576363801956177, "learning_rate": 7.68988821147081e-06, "loss": 0.5017, "step": 2261 }, { "epoch": 0.34, "grad_norm": 1.085523009300232, "learning_rate": 7.687837224025656e-06, "loss": 0.4441, "step": 2262 }, { "epoch": 0.34, "grad_norm": 1.1537896394729614, "learning_rate": 7.685785600310972e-06, "loss": 0.5111, "step": 2263 }, { "epoch": 0.34, "grad_norm": 1.1371105909347534, "learning_rate": 7.683733340812416e-06, "loss": 0.6189, "step": 2264 }, { "epoch": 0.34, "grad_norm": 1.3544899225234985, "learning_rate": 7.681680446015805e-06, "loss": 0.5428, "step": 2265 }, { "epoch": 0.34, "grad_norm": 1.2619630098342896, "learning_rate": 7.679626916407105e-06, "loss": 0.4357, "step": 2266 }, { "epoch": 0.34, "grad_norm": 1.2554347515106201, "learning_rate": 7.67757275247243e-06, "loss": 0.5197, "step": 2267 }, { "epoch": 0.34, "grad_norm": 1.1970428228378296, "learning_rate": 7.675517954698044e-06, "loss": 0.4866, "step": 2268 }, { "epoch": 0.34, "grad_norm": 1.310687780380249, "learning_rate": 7.673462523570368e-06, "loss": 0.7786, "step": 2269 }, { "epoch": 0.34, "grad_norm": 1.1235849857330322, "learning_rate": 7.67140645957596e-06, "loss": 0.5153, "step": 2270 }, { "epoch": 0.34, "grad_norm": 1.1677203178405762, "learning_rate": 7.669349763201536e-06, "loss": 0.4646, "step": 2271 }, { "epoch": 0.34, "grad_norm": 1.1846681833267212, "learning_rate": 7.667292434933966e-06, "loss": 0.5816, "step": 2272 }, { "epoch": 0.34, "grad_norm": 1.110307216644287, "learning_rate": 7.665234475260262e-06, "loss": 0.5509, "step": 2273 }, { "epoch": 0.34, "grad_norm": 1.49363374710083, "learning_rate": 7.663175884667585e-06, "loss": 0.4565, "step": 2274 }, { "epoch": 0.34, "grad_norm": 1.0901762247085571, "learning_rate": 7.66111666364325e-06, "loss": 0.5168, "step": 2275 }, { "epoch": 0.34, "grad_norm": 1.3715050220489502, "learning_rate": 7.659056812674722e-06, "loss": 0.4773, "step": 2276 }, { "epoch": 0.34, "grad_norm": 1.3269152641296387, "learning_rate": 7.656996332249607e-06, "loss": 0.4499, "step": 2277 }, { "epoch": 0.34, "grad_norm": 1.788150668144226, "learning_rate": 7.65493522285567e-06, "loss": 0.5121, "step": 2278 }, { "epoch": 0.34, "grad_norm": 1.1623916625976562, "learning_rate": 7.65287348498082e-06, "loss": 0.4291, "step": 2279 }, { "epoch": 0.34, "grad_norm": 1.049874186515808, "learning_rate": 7.650811119113112e-06, "loss": 0.5, "step": 2280 }, { "epoch": 0.34, "grad_norm": 1.1606426239013672, "learning_rate": 7.648748125740757e-06, "loss": 0.4667, "step": 2281 }, { "epoch": 0.34, "grad_norm": 1.4565225839614868, "learning_rate": 7.646684505352108e-06, "loss": 0.4762, "step": 2282 }, { "epoch": 0.34, "grad_norm": 1.2967579364776611, "learning_rate": 7.64462025843567e-06, "loss": 0.5007, "step": 2283 }, { "epoch": 0.34, "grad_norm": 1.8304908275604248, "learning_rate": 7.642555385480095e-06, "loss": 0.4886, "step": 2284 }, { "epoch": 0.34, "grad_norm": 1.0585025548934937, "learning_rate": 7.64048988697418e-06, "loss": 0.4708, "step": 2285 }, { "epoch": 0.34, "grad_norm": 1.189233660697937, "learning_rate": 7.638423763406878e-06, "loss": 0.5038, "step": 2286 }, { "epoch": 0.34, "grad_norm": 0.9521053433418274, "learning_rate": 7.636357015267284e-06, "loss": 0.4388, "step": 2287 }, { "epoch": 0.34, "grad_norm": 1.127618670463562, "learning_rate": 7.634289643044643e-06, "loss": 0.5644, "step": 2288 }, { "epoch": 0.34, "grad_norm": 1.2888423204421997, "learning_rate": 7.632221647228345e-06, "loss": 0.5278, "step": 2289 }, { "epoch": 0.34, "grad_norm": 1.2612581253051758, "learning_rate": 7.63015302830793e-06, "loss": 0.5327, "step": 2290 }, { "epoch": 0.34, "grad_norm": 1.212496042251587, "learning_rate": 7.628083786773084e-06, "loss": 0.5285, "step": 2291 }, { "epoch": 0.34, "grad_norm": 1.1320723295211792, "learning_rate": 7.626013923113644e-06, "loss": 0.5135, "step": 2292 }, { "epoch": 0.34, "grad_norm": 2.288585662841797, "learning_rate": 7.623943437819592e-06, "loss": 0.4282, "step": 2293 }, { "epoch": 0.34, "grad_norm": 1.2322803735733032, "learning_rate": 7.621872331381058e-06, "loss": 0.4161, "step": 2294 }, { "epoch": 0.34, "grad_norm": 1.5459561347961426, "learning_rate": 7.619800604288312e-06, "loss": 0.505, "step": 2295 }, { "epoch": 0.34, "grad_norm": 1.293083906173706, "learning_rate": 7.61772825703178e-06, "loss": 0.4851, "step": 2296 }, { "epoch": 0.35, "grad_norm": 1.2305504083633423, "learning_rate": 7.615655290102033e-06, "loss": 0.4627, "step": 2297 }, { "epoch": 0.35, "grad_norm": 1.3995587825775146, "learning_rate": 7.613581703989787e-06, "loss": 0.4632, "step": 2298 }, { "epoch": 0.35, "grad_norm": 0.9297808408737183, "learning_rate": 7.611507499185905e-06, "loss": 0.5453, "step": 2299 }, { "epoch": 0.35, "grad_norm": 1.0712443590164185, "learning_rate": 7.609432676181396e-06, "loss": 0.4792, "step": 2300 }, { "epoch": 0.35, "grad_norm": 1.5953797101974487, "learning_rate": 7.607357235467417e-06, "loss": 0.4449, "step": 2301 }, { "epoch": 0.35, "grad_norm": 1.0564831495285034, "learning_rate": 7.605281177535264e-06, "loss": 0.4744, "step": 2302 }, { "epoch": 0.35, "grad_norm": 2.0492446422576904, "learning_rate": 7.603204502876393e-06, "loss": 0.4552, "step": 2303 }, { "epoch": 0.35, "grad_norm": 1.3362518548965454, "learning_rate": 7.601127211982393e-06, "loss": 0.7425, "step": 2304 }, { "epoch": 0.35, "grad_norm": 1.6845834255218506, "learning_rate": 7.5990493053450076e-06, "loss": 0.5328, "step": 2305 }, { "epoch": 0.35, "grad_norm": 1.2669556140899658, "learning_rate": 7.596970783456119e-06, "loss": 0.4768, "step": 2306 }, { "epoch": 0.35, "grad_norm": 1.1972705125808716, "learning_rate": 7.5948916468077606e-06, "loss": 0.5606, "step": 2307 }, { "epoch": 0.35, "grad_norm": 1.1981420516967773, "learning_rate": 7.592811895892108e-06, "loss": 0.5574, "step": 2308 }, { "epoch": 0.35, "grad_norm": 2.279768466949463, "learning_rate": 7.590731531201485e-06, "loss": 0.4346, "step": 2309 }, { "epoch": 0.35, "grad_norm": 1.405651330947876, "learning_rate": 7.588650553228358e-06, "loss": 0.4514, "step": 2310 }, { "epoch": 0.35, "grad_norm": 1.165381669998169, "learning_rate": 7.586568962465341e-06, "loss": 0.4657, "step": 2311 }, { "epoch": 0.35, "grad_norm": 1.2791225910186768, "learning_rate": 7.584486759405191e-06, "loss": 0.5215, "step": 2312 }, { "epoch": 0.35, "grad_norm": 1.1655889749526978, "learning_rate": 7.582403944540808e-06, "loss": 0.4861, "step": 2313 }, { "epoch": 0.35, "grad_norm": 1.1470558643341064, "learning_rate": 7.580320518365245e-06, "loss": 0.5067, "step": 2314 }, { "epoch": 0.35, "grad_norm": 1.3758248090744019, "learning_rate": 7.57823648137169e-06, "loss": 0.4633, "step": 2315 }, { "epoch": 0.35, "grad_norm": 1.2103304862976074, "learning_rate": 7.5761518340534825e-06, "loss": 0.5177, "step": 2316 }, { "epoch": 0.35, "grad_norm": 1.2083553075790405, "learning_rate": 7.5740665769041026e-06, "loss": 0.4592, "step": 2317 }, { "epoch": 0.35, "grad_norm": 1.515596866607666, "learning_rate": 7.5719807104171775e-06, "loss": 0.5478, "step": 2318 }, { "epoch": 0.35, "grad_norm": 1.1872565746307373, "learning_rate": 7.569894235086472e-06, "loss": 0.7649, "step": 2319 }, { "epoch": 0.35, "grad_norm": 0.9914900660514832, "learning_rate": 7.567807151405908e-06, "loss": 0.5373, "step": 2320 }, { "epoch": 0.35, "grad_norm": 1.125916838645935, "learning_rate": 7.565719459869538e-06, "loss": 0.4117, "step": 2321 }, { "epoch": 0.35, "grad_norm": 1.269258975982666, "learning_rate": 7.5636311609715665e-06, "loss": 0.4437, "step": 2322 }, { "epoch": 0.35, "grad_norm": 1.0164706707000732, "learning_rate": 7.561542255206338e-06, "loss": 0.4621, "step": 2323 }, { "epoch": 0.35, "grad_norm": 1.018634557723999, "learning_rate": 7.559452743068342e-06, "loss": 0.4841, "step": 2324 }, { "epoch": 0.35, "grad_norm": 1.9982727766036987, "learning_rate": 7.557362625052211e-06, "loss": 0.526, "step": 2325 }, { "epoch": 0.35, "grad_norm": 1.0808093547821045, "learning_rate": 7.555271901652724e-06, "loss": 0.4813, "step": 2326 }, { "epoch": 0.35, "grad_norm": 0.9977470636367798, "learning_rate": 7.553180573364799e-06, "loss": 0.405, "step": 2327 }, { "epoch": 0.35, "grad_norm": 1.1710679531097412, "learning_rate": 7.551088640683498e-06, "loss": 0.5108, "step": 2328 }, { "epoch": 0.35, "grad_norm": 1.2961822748184204, "learning_rate": 7.548996104104029e-06, "loss": 0.4261, "step": 2329 }, { "epoch": 0.35, "grad_norm": 1.3440345525741577, "learning_rate": 7.546902964121739e-06, "loss": 0.4846, "step": 2330 }, { "epoch": 0.35, "grad_norm": 1.5996930599212646, "learning_rate": 7.544809221232119e-06, "loss": 0.5026, "step": 2331 }, { "epoch": 0.35, "grad_norm": 1.1689379215240479, "learning_rate": 7.542714875930807e-06, "loss": 0.5269, "step": 2332 }, { "epoch": 0.35, "grad_norm": 1.1690027713775635, "learning_rate": 7.5406199287135775e-06, "loss": 0.5112, "step": 2333 }, { "epoch": 0.35, "grad_norm": 1.3654327392578125, "learning_rate": 7.5385243800763505e-06, "loss": 0.4951, "step": 2334 }, { "epoch": 0.35, "grad_norm": 0.9143530130386353, "learning_rate": 7.536428230515188e-06, "loss": 0.4618, "step": 2335 }, { "epoch": 0.35, "grad_norm": 1.2324697971343994, "learning_rate": 7.5343314805262945e-06, "loss": 0.4592, "step": 2336 }, { "epoch": 0.35, "grad_norm": 1.2644633054733276, "learning_rate": 7.532234130606015e-06, "loss": 0.5339, "step": 2337 }, { "epoch": 0.35, "grad_norm": 0.9662632346153259, "learning_rate": 7.53013618125084e-06, "loss": 0.4944, "step": 2338 }, { "epoch": 0.35, "grad_norm": 1.149612545967102, "learning_rate": 7.528037632957399e-06, "loss": 0.4569, "step": 2339 }, { "epoch": 0.35, "grad_norm": 1.333944320678711, "learning_rate": 7.525938486222464e-06, "loss": 0.5103, "step": 2340 }, { "epoch": 0.35, "grad_norm": 1.1814922094345093, "learning_rate": 7.523838741542948e-06, "loss": 0.4724, "step": 2341 }, { "epoch": 0.35, "grad_norm": 1.375030517578125, "learning_rate": 7.5217383994159054e-06, "loss": 0.5235, "step": 2342 }, { "epoch": 0.35, "grad_norm": 1.2129656076431274, "learning_rate": 7.519637460338536e-06, "loss": 0.4493, "step": 2343 }, { "epoch": 0.35, "grad_norm": 1.2607988119125366, "learning_rate": 7.517535924808174e-06, "loss": 0.5003, "step": 2344 }, { "epoch": 0.35, "grad_norm": 1.3778417110443115, "learning_rate": 7.515433793322302e-06, "loss": 0.4932, "step": 2345 }, { "epoch": 0.35, "grad_norm": 1.177797794342041, "learning_rate": 7.513331066378538e-06, "loss": 0.44, "step": 2346 }, { "epoch": 0.35, "grad_norm": 1.2130974531173706, "learning_rate": 7.511227744474645e-06, "loss": 0.7262, "step": 2347 }, { "epoch": 0.35, "grad_norm": 1.3081746101379395, "learning_rate": 7.509123828108523e-06, "loss": 0.5285, "step": 2348 }, { "epoch": 0.35, "grad_norm": 0.9822036027908325, "learning_rate": 7.507019317778216e-06, "loss": 0.5316, "step": 2349 }, { "epoch": 0.35, "grad_norm": 1.3572824001312256, "learning_rate": 7.5049142139819066e-06, "loss": 0.5215, "step": 2350 }, { "epoch": 0.35, "grad_norm": 1.5319981575012207, "learning_rate": 7.50280851721792e-06, "loss": 0.4929, "step": 2351 }, { "epoch": 0.35, "grad_norm": 1.1541012525558472, "learning_rate": 7.500702227984718e-06, "loss": 0.4074, "step": 2352 }, { "epoch": 0.35, "grad_norm": 1.4199727773666382, "learning_rate": 7.498595346780908e-06, "loss": 0.6001, "step": 2353 }, { "epoch": 0.35, "grad_norm": 1.2331675291061401, "learning_rate": 7.496487874105234e-06, "loss": 0.7444, "step": 2354 }, { "epoch": 0.35, "grad_norm": 1.7406742572784424, "learning_rate": 7.494379810456578e-06, "loss": 0.4724, "step": 2355 }, { "epoch": 0.35, "grad_norm": 1.1169239282608032, "learning_rate": 7.4922711563339675e-06, "loss": 0.4978, "step": 2356 }, { "epoch": 0.35, "grad_norm": 1.0675419569015503, "learning_rate": 7.490161912236564e-06, "loss": 0.4761, "step": 2357 }, { "epoch": 0.35, "grad_norm": 1.1829025745391846, "learning_rate": 7.488052078663674e-06, "loss": 0.4315, "step": 2358 }, { "epoch": 0.35, "grad_norm": 1.5400534868240356, "learning_rate": 7.485941656114742e-06, "loss": 0.4882, "step": 2359 }, { "epoch": 0.35, "grad_norm": 1.3704111576080322, "learning_rate": 7.483830645089348e-06, "loss": 0.5097, "step": 2360 }, { "epoch": 0.35, "grad_norm": 1.0602749586105347, "learning_rate": 7.481719046087216e-06, "loss": 0.4836, "step": 2361 }, { "epoch": 0.35, "grad_norm": 1.05294668674469, "learning_rate": 7.479606859608205e-06, "loss": 0.4452, "step": 2362 }, { "epoch": 0.35, "grad_norm": 1.067036747932434, "learning_rate": 7.477494086152317e-06, "loss": 0.5096, "step": 2363 }, { "epoch": 0.36, "grad_norm": 1.2591038942337036, "learning_rate": 7.475380726219692e-06, "loss": 0.4904, "step": 2364 }, { "epoch": 0.36, "grad_norm": 1.274048924446106, "learning_rate": 7.473266780310608e-06, "loss": 0.5249, "step": 2365 }, { "epoch": 0.36, "grad_norm": 2.171163320541382, "learning_rate": 7.471152248925484e-06, "loss": 0.4984, "step": 2366 }, { "epoch": 0.36, "grad_norm": 1.118028998374939, "learning_rate": 7.4690371325648714e-06, "loss": 0.4919, "step": 2367 }, { "epoch": 0.36, "grad_norm": 1.261716365814209, "learning_rate": 7.466921431729467e-06, "loss": 0.5258, "step": 2368 }, { "epoch": 0.36, "grad_norm": 1.1559228897094727, "learning_rate": 7.464805146920101e-06, "loss": 0.5567, "step": 2369 }, { "epoch": 0.36, "grad_norm": 1.2705867290496826, "learning_rate": 7.462688278637746e-06, "loss": 0.6991, "step": 2370 }, { "epoch": 0.36, "grad_norm": 1.4116555452346802, "learning_rate": 7.460570827383511e-06, "loss": 0.5064, "step": 2371 }, { "epoch": 0.36, "grad_norm": 1.138493299484253, "learning_rate": 7.4584527936586435e-06, "loss": 0.4557, "step": 2372 }, { "epoch": 0.36, "grad_norm": 1.1655514240264893, "learning_rate": 7.456334177964525e-06, "loss": 0.5073, "step": 2373 }, { "epoch": 0.36, "grad_norm": 1.180747151374817, "learning_rate": 7.454214980802678e-06, "loss": 0.5154, "step": 2374 }, { "epoch": 0.36, "grad_norm": 1.0649062395095825, "learning_rate": 7.452095202674766e-06, "loss": 0.7057, "step": 2375 }, { "epoch": 0.36, "grad_norm": 1.1900917291641235, "learning_rate": 7.4499748440825845e-06, "loss": 0.4514, "step": 2376 }, { "epoch": 0.36, "grad_norm": 1.3165297508239746, "learning_rate": 7.447853905528069e-06, "loss": 0.5298, "step": 2377 }, { "epoch": 0.36, "grad_norm": 1.172811508178711, "learning_rate": 7.445732387513292e-06, "loss": 0.4641, "step": 2378 }, { "epoch": 0.36, "grad_norm": 1.3886696100234985, "learning_rate": 7.443610290540463e-06, "loss": 0.4847, "step": 2379 }, { "epoch": 0.36, "grad_norm": 1.308902382850647, "learning_rate": 7.441487615111926e-06, "loss": 0.5185, "step": 2380 }, { "epoch": 0.36, "grad_norm": 1.126732587814331, "learning_rate": 7.439364361730167e-06, "loss": 0.5166, "step": 2381 }, { "epoch": 0.36, "grad_norm": 1.1466495990753174, "learning_rate": 7.437240530897807e-06, "loss": 0.4682, "step": 2382 }, { "epoch": 0.36, "grad_norm": 1.2001398801803589, "learning_rate": 7.435116123117601e-06, "loss": 0.4165, "step": 2383 }, { "epoch": 0.36, "grad_norm": 1.5163896083831787, "learning_rate": 7.432991138892444e-06, "loss": 0.4487, "step": 2384 }, { "epoch": 0.36, "grad_norm": 1.2925935983657837, "learning_rate": 7.430865578725362e-06, "loss": 0.5253, "step": 2385 }, { "epoch": 0.36, "grad_norm": 1.2742618322372437, "learning_rate": 7.428739443119528e-06, "loss": 0.5373, "step": 2386 }, { "epoch": 0.36, "grad_norm": 1.1887495517730713, "learning_rate": 7.42661273257824e-06, "loss": 0.4553, "step": 2387 }, { "epoch": 0.36, "grad_norm": 1.075544834136963, "learning_rate": 7.424485447604937e-06, "loss": 0.5171, "step": 2388 }, { "epoch": 0.36, "grad_norm": 1.3111194372177124, "learning_rate": 7.422357588703195e-06, "loss": 0.5729, "step": 2389 }, { "epoch": 0.36, "grad_norm": 1.130771517753601, "learning_rate": 7.420229156376725e-06, "loss": 0.5585, "step": 2390 }, { "epoch": 0.36, "grad_norm": 1.1632000207901, "learning_rate": 7.4181001511293694e-06, "loss": 0.7556, "step": 2391 }, { "epoch": 0.36, "grad_norm": 1.170804738998413, "learning_rate": 7.415970573465115e-06, "loss": 0.4953, "step": 2392 }, { "epoch": 0.36, "grad_norm": 1.3445535898208618, "learning_rate": 7.413840423888075e-06, "loss": 0.4706, "step": 2393 }, { "epoch": 0.36, "grad_norm": 1.4086800813674927, "learning_rate": 7.4117097029025055e-06, "loss": 0.5624, "step": 2394 }, { "epoch": 0.36, "grad_norm": 1.258203148841858, "learning_rate": 7.409578411012792e-06, "loss": 0.3901, "step": 2395 }, { "epoch": 0.36, "grad_norm": 1.5873332023620605, "learning_rate": 7.407446548723458e-06, "loss": 0.5578, "step": 2396 }, { "epoch": 0.36, "grad_norm": 1.507365345954895, "learning_rate": 7.405314116539164e-06, "loss": 0.5091, "step": 2397 }, { "epoch": 0.36, "grad_norm": 1.3260647058486938, "learning_rate": 7.4031811149647015e-06, "loss": 0.4689, "step": 2398 }, { "epoch": 0.36, "grad_norm": 1.0883283615112305, "learning_rate": 7.4010475445049964e-06, "loss": 0.4692, "step": 2399 }, { "epoch": 0.36, "grad_norm": 0.990013062953949, "learning_rate": 7.398913405665115e-06, "loss": 0.4425, "step": 2400 }, { "epoch": 0.36, "grad_norm": 0.9790709614753723, "learning_rate": 7.396778698950251e-06, "loss": 0.4364, "step": 2401 }, { "epoch": 0.36, "grad_norm": 1.916588306427002, "learning_rate": 7.394643424865737e-06, "loss": 0.5376, "step": 2402 }, { "epoch": 0.36, "grad_norm": 1.4331011772155762, "learning_rate": 7.392507583917041e-06, "loss": 0.5338, "step": 2403 }, { "epoch": 0.36, "grad_norm": 1.3341747522354126, "learning_rate": 7.3903711766097604e-06, "loss": 0.4834, "step": 2404 }, { "epoch": 0.36, "grad_norm": 1.211679458618164, "learning_rate": 7.38823420344963e-06, "loss": 0.4817, "step": 2405 }, { "epoch": 0.36, "grad_norm": 1.322950839996338, "learning_rate": 7.386096664942518e-06, "loss": 0.5533, "step": 2406 }, { "epoch": 0.36, "grad_norm": 1.041101336479187, "learning_rate": 7.383958561594426e-06, "loss": 0.4398, "step": 2407 }, { "epoch": 0.36, "grad_norm": 1.0740821361541748, "learning_rate": 7.381819893911491e-06, "loss": 0.4837, "step": 2408 }, { "epoch": 0.36, "grad_norm": 1.3479503393173218, "learning_rate": 7.3796806623999785e-06, "loss": 0.5192, "step": 2409 }, { "epoch": 0.36, "grad_norm": 0.9638794660568237, "learning_rate": 7.377540867566295e-06, "loss": 0.4454, "step": 2410 }, { "epoch": 0.36, "grad_norm": 1.1737377643585205, "learning_rate": 7.375400509916975e-06, "loss": 0.4855, "step": 2411 }, { "epoch": 0.36, "grad_norm": 1.1129244565963745, "learning_rate": 7.373259589958688e-06, "loss": 0.4327, "step": 2412 }, { "epoch": 0.36, "grad_norm": 1.6409835815429688, "learning_rate": 7.371118108198234e-06, "loss": 0.5428, "step": 2413 }, { "epoch": 0.36, "grad_norm": 1.0059683322906494, "learning_rate": 7.368976065142552e-06, "loss": 0.5121, "step": 2414 }, { "epoch": 0.36, "grad_norm": 1.0730644464492798, "learning_rate": 7.366833461298709e-06, "loss": 0.4927, "step": 2415 }, { "epoch": 0.36, "grad_norm": 1.1968027353286743, "learning_rate": 7.364690297173904e-06, "loss": 0.4649, "step": 2416 }, { "epoch": 0.36, "grad_norm": 1.0667130947113037, "learning_rate": 7.362546573275473e-06, "loss": 0.4873, "step": 2417 }, { "epoch": 0.36, "grad_norm": 1.0361626148223877, "learning_rate": 7.360402290110882e-06, "loss": 0.5351, "step": 2418 }, { "epoch": 0.36, "grad_norm": 1.0851088762283325, "learning_rate": 7.358257448187726e-06, "loss": 0.5204, "step": 2419 }, { "epoch": 0.36, "grad_norm": 1.2031375169754028, "learning_rate": 7.356112048013741e-06, "loss": 0.5598, "step": 2420 }, { "epoch": 0.36, "grad_norm": 1.1849137544631958, "learning_rate": 7.353966090096786e-06, "loss": 0.5661, "step": 2421 }, { "epoch": 0.36, "grad_norm": 1.3773994445800781, "learning_rate": 7.3518195749448565e-06, "loss": 0.488, "step": 2422 }, { "epoch": 0.36, "grad_norm": 1.1725542545318604, "learning_rate": 7.349672503066081e-06, "loss": 0.473, "step": 2423 }, { "epoch": 0.36, "grad_norm": 0.9716424345970154, "learning_rate": 7.347524874968716e-06, "loss": 0.5508, "step": 2424 }, { "epoch": 0.36, "grad_norm": 1.3296419382095337, "learning_rate": 7.3453766911611525e-06, "loss": 0.4652, "step": 2425 }, { "epoch": 0.36, "grad_norm": 1.1514360904693604, "learning_rate": 7.3432279521519145e-06, "loss": 0.5362, "step": 2426 }, { "epoch": 0.36, "grad_norm": 1.358127474784851, "learning_rate": 7.341078658449653e-06, "loss": 0.5299, "step": 2427 }, { "epoch": 0.36, "grad_norm": 1.4640774726867676, "learning_rate": 7.338928810563152e-06, "loss": 0.4963, "step": 2428 }, { "epoch": 0.36, "grad_norm": 1.060956358909607, "learning_rate": 7.336778409001329e-06, "loss": 0.4862, "step": 2429 }, { "epoch": 0.37, "grad_norm": 1.335572361946106, "learning_rate": 7.334627454273229e-06, "loss": 0.4995, "step": 2430 }, { "epoch": 0.37, "grad_norm": 1.306188941001892, "learning_rate": 7.332475946888031e-06, "loss": 0.436, "step": 2431 }, { "epoch": 0.37, "grad_norm": 1.0307508707046509, "learning_rate": 7.330323887355044e-06, "loss": 0.473, "step": 2432 }, { "epoch": 0.37, "grad_norm": 1.1737627983093262, "learning_rate": 7.328171276183708e-06, "loss": 0.4895, "step": 2433 }, { "epoch": 0.37, "grad_norm": 1.3251450061798096, "learning_rate": 7.32601811388359e-06, "loss": 0.524, "step": 2434 }, { "epoch": 0.37, "grad_norm": 1.2297146320343018, "learning_rate": 7.323864400964391e-06, "loss": 0.4189, "step": 2435 }, { "epoch": 0.37, "grad_norm": 1.121243953704834, "learning_rate": 7.321710137935944e-06, "loss": 0.5215, "step": 2436 }, { "epoch": 0.37, "grad_norm": 1.0677365064620972, "learning_rate": 7.319555325308207e-06, "loss": 0.4899, "step": 2437 }, { "epoch": 0.37, "grad_norm": 1.1077566146850586, "learning_rate": 7.317399963591276e-06, "loss": 0.4941, "step": 2438 }, { "epoch": 0.37, "grad_norm": 1.3357861042022705, "learning_rate": 7.315244053295366e-06, "loss": 0.4981, "step": 2439 }, { "epoch": 0.37, "grad_norm": 1.1655590534210205, "learning_rate": 7.313087594930831e-06, "loss": 0.4745, "step": 2440 }, { "epoch": 0.37, "grad_norm": 1.287194013595581, "learning_rate": 7.310930589008151e-06, "loss": 0.4955, "step": 2441 }, { "epoch": 0.37, "grad_norm": 1.0656728744506836, "learning_rate": 7.308773036037935e-06, "loss": 0.5382, "step": 2442 }, { "epoch": 0.37, "grad_norm": 1.1567842960357666, "learning_rate": 7.306614936530926e-06, "loss": 0.5506, "step": 2443 }, { "epoch": 0.37, "grad_norm": 1.0315790176391602, "learning_rate": 7.304456290997991e-06, "loss": 0.5264, "step": 2444 }, { "epoch": 0.37, "grad_norm": 1.4906525611877441, "learning_rate": 7.302297099950127e-06, "loss": 0.5527, "step": 2445 }, { "epoch": 0.37, "grad_norm": 1.1220321655273438, "learning_rate": 7.300137363898464e-06, "loss": 0.535, "step": 2446 }, { "epoch": 0.37, "grad_norm": 1.4053677320480347, "learning_rate": 7.2979770833542565e-06, "loss": 0.5336, "step": 2447 }, { "epoch": 0.37, "grad_norm": 1.7675046920776367, "learning_rate": 7.295816258828892e-06, "loss": 0.4648, "step": 2448 }, { "epoch": 0.37, "grad_norm": 1.578696846961975, "learning_rate": 7.2936548908338825e-06, "loss": 0.5681, "step": 2449 }, { "epoch": 0.37, "grad_norm": 1.0829180479049683, "learning_rate": 7.291492979880872e-06, "loss": 0.4789, "step": 2450 }, { "epoch": 0.37, "grad_norm": 1.321234107017517, "learning_rate": 7.289330526481633e-06, "loss": 0.473, "step": 2451 }, { "epoch": 0.37, "grad_norm": 1.2954373359680176, "learning_rate": 7.287167531148061e-06, "loss": 0.5483, "step": 2452 }, { "epoch": 0.37, "grad_norm": 1.2952629327774048, "learning_rate": 7.285003994392188e-06, "loss": 0.5967, "step": 2453 }, { "epoch": 0.37, "grad_norm": 1.1178083419799805, "learning_rate": 7.28283991672617e-06, "loss": 0.5076, "step": 2454 }, { "epoch": 0.37, "grad_norm": 1.0261896848678589, "learning_rate": 7.2806752986622895e-06, "loss": 0.494, "step": 2455 }, { "epoch": 0.37, "grad_norm": 1.1954913139343262, "learning_rate": 7.278510140712961e-06, "loss": 0.4968, "step": 2456 }, { "epoch": 0.37, "grad_norm": 1.426167607307434, "learning_rate": 7.2763444433907216e-06, "loss": 0.5461, "step": 2457 }, { "epoch": 0.37, "grad_norm": 1.0826224088668823, "learning_rate": 7.2741782072082405e-06, "loss": 0.5252, "step": 2458 }, { "epoch": 0.37, "grad_norm": 1.0436322689056396, "learning_rate": 7.272011432678313e-06, "loss": 0.49, "step": 2459 }, { "epoch": 0.37, "grad_norm": 0.9330019950866699, "learning_rate": 7.2698441203138615e-06, "loss": 0.4974, "step": 2460 }, { "epoch": 0.37, "grad_norm": 1.1117441654205322, "learning_rate": 7.267676270627937e-06, "loss": 0.5182, "step": 2461 }, { "epoch": 0.37, "grad_norm": 1.266863465309143, "learning_rate": 7.2655078841337155e-06, "loss": 0.4894, "step": 2462 }, { "epoch": 0.37, "grad_norm": 1.2446571588516235, "learning_rate": 7.263338961344499e-06, "loss": 0.559, "step": 2463 }, { "epoch": 0.37, "grad_norm": 1.1283422708511353, "learning_rate": 7.261169502773724e-06, "loss": 0.5363, "step": 2464 }, { "epoch": 0.37, "grad_norm": 1.2128459215164185, "learning_rate": 7.2589995089349454e-06, "loss": 0.5203, "step": 2465 }, { "epoch": 0.37, "grad_norm": 1.1437820196151733, "learning_rate": 7.256828980341847e-06, "loss": 0.5704, "step": 2466 }, { "epoch": 0.37, "grad_norm": 1.3910884857177734, "learning_rate": 7.2546579175082425e-06, "loss": 0.7331, "step": 2467 }, { "epoch": 0.37, "grad_norm": 1.3426591157913208, "learning_rate": 7.252486320948068e-06, "loss": 0.5515, "step": 2468 }, { "epoch": 0.37, "grad_norm": 1.4219684600830078, "learning_rate": 7.2503141911753885e-06, "loss": 0.5338, "step": 2469 }, { "epoch": 0.37, "grad_norm": 1.1007901430130005, "learning_rate": 7.2481415287043934e-06, "loss": 0.4452, "step": 2470 }, { "epoch": 0.37, "grad_norm": 1.0186700820922852, "learning_rate": 7.2459683340494e-06, "loss": 0.4018, "step": 2471 }, { "epoch": 0.37, "grad_norm": 1.2065024375915527, "learning_rate": 7.2437946077248514e-06, "loss": 0.5367, "step": 2472 }, { "epoch": 0.37, "grad_norm": 1.1917107105255127, "learning_rate": 7.241620350245314e-06, "loss": 0.5045, "step": 2473 }, { "epoch": 0.37, "grad_norm": 1.2023261785507202, "learning_rate": 7.239445562125484e-06, "loss": 0.4811, "step": 2474 }, { "epoch": 0.37, "grad_norm": 1.3488636016845703, "learning_rate": 7.237270243880179e-06, "loss": 0.4902, "step": 2475 }, { "epoch": 0.37, "grad_norm": 1.1748993396759033, "learning_rate": 7.235094396024345e-06, "loss": 0.4816, "step": 2476 }, { "epoch": 0.37, "grad_norm": 1.3056575059890747, "learning_rate": 7.232918019073054e-06, "loss": 0.5389, "step": 2477 }, { "epoch": 0.37, "grad_norm": 0.9377543926239014, "learning_rate": 7.230741113541499e-06, "loss": 0.5285, "step": 2478 }, { "epoch": 0.37, "grad_norm": 1.202358603477478, "learning_rate": 7.228563679945004e-06, "loss": 0.5242, "step": 2479 }, { "epoch": 0.37, "grad_norm": 1.5620733499526978, "learning_rate": 7.226385718799011e-06, "loss": 0.4562, "step": 2480 }, { "epoch": 0.37, "grad_norm": 1.1945312023162842, "learning_rate": 7.224207230619095e-06, "loss": 0.7539, "step": 2481 }, { "epoch": 0.37, "grad_norm": 2.5172181129455566, "learning_rate": 7.2220282159209485e-06, "loss": 0.5821, "step": 2482 }, { "epoch": 0.37, "grad_norm": 1.0261980295181274, "learning_rate": 7.2198486752203924e-06, "loss": 0.4019, "step": 2483 }, { "epoch": 0.37, "grad_norm": 1.8482762575149536, "learning_rate": 7.217668609033372e-06, "loss": 0.5207, "step": 2484 }, { "epoch": 0.37, "grad_norm": 1.149757981300354, "learning_rate": 7.2154880178759556e-06, "loss": 0.5146, "step": 2485 }, { "epoch": 0.37, "grad_norm": 1.048001766204834, "learning_rate": 7.213306902264338e-06, "loss": 0.4859, "step": 2486 }, { "epoch": 0.37, "grad_norm": 1.1024492979049683, "learning_rate": 7.211125262714835e-06, "loss": 0.5133, "step": 2487 }, { "epoch": 0.37, "grad_norm": 1.2751622200012207, "learning_rate": 7.208943099743888e-06, "loss": 0.4766, "step": 2488 }, { "epoch": 0.37, "grad_norm": 0.9778058528900146, "learning_rate": 7.2067604138680636e-06, "loss": 0.4185, "step": 2489 }, { "epoch": 0.37, "grad_norm": 1.1546621322631836, "learning_rate": 7.204577205604051e-06, "loss": 0.4372, "step": 2490 }, { "epoch": 0.37, "grad_norm": 1.113701581954956, "learning_rate": 7.202393475468663e-06, "loss": 0.5082, "step": 2491 }, { "epoch": 0.37, "grad_norm": 1.795711636543274, "learning_rate": 7.200209223978836e-06, "loss": 0.5171, "step": 2492 }, { "epoch": 0.37, "grad_norm": 1.2841954231262207, "learning_rate": 7.1980244516516295e-06, "loss": 0.7769, "step": 2493 }, { "epoch": 0.37, "grad_norm": 1.2679105997085571, "learning_rate": 7.195839159004227e-06, "loss": 0.5307, "step": 2494 }, { "epoch": 0.37, "grad_norm": 1.266610860824585, "learning_rate": 7.1936533465539335e-06, "loss": 0.4609, "step": 2495 }, { "epoch": 0.37, "grad_norm": 1.1428779363632202, "learning_rate": 7.19146701481818e-06, "loss": 0.4725, "step": 2496 }, { "epoch": 0.38, "grad_norm": 1.387901782989502, "learning_rate": 7.189280164314517e-06, "loss": 0.5132, "step": 2497 }, { "epoch": 0.38, "grad_norm": 1.1613855361938477, "learning_rate": 7.187092795560623e-06, "loss": 0.5228, "step": 2498 }, { "epoch": 0.38, "grad_norm": 0.9894325137138367, "learning_rate": 7.184904909074293e-06, "loss": 0.4547, "step": 2499 }, { "epoch": 0.38, "grad_norm": 1.1657034158706665, "learning_rate": 7.182716505373447e-06, "loss": 0.6131, "step": 2500 }, { "epoch": 0.38, "grad_norm": 1.2603416442871094, "learning_rate": 7.1805275849761295e-06, "loss": 0.4302, "step": 2501 }, { "epoch": 0.38, "grad_norm": 1.2744890451431274, "learning_rate": 7.178338148400502e-06, "loss": 0.4565, "step": 2502 }, { "epoch": 0.38, "grad_norm": 4.735744953155518, "learning_rate": 7.176148196164856e-06, "loss": 0.5061, "step": 2503 }, { "epoch": 0.38, "grad_norm": 1.0928226709365845, "learning_rate": 7.173957728787602e-06, "loss": 0.4743, "step": 2504 }, { "epoch": 0.38, "grad_norm": 1.651412844657898, "learning_rate": 7.171766746787265e-06, "loss": 0.5055, "step": 2505 }, { "epoch": 0.38, "grad_norm": 1.1537952423095703, "learning_rate": 7.169575250682503e-06, "loss": 0.5047, "step": 2506 }, { "epoch": 0.38, "grad_norm": 1.0984512567520142, "learning_rate": 7.167383240992089e-06, "loss": 0.4792, "step": 2507 }, { "epoch": 0.38, "grad_norm": 2.177757740020752, "learning_rate": 7.16519071823492e-06, "loss": 0.533, "step": 2508 }, { "epoch": 0.38, "grad_norm": 2.0074334144592285, "learning_rate": 7.162997682930013e-06, "loss": 0.4183, "step": 2509 }, { "epoch": 0.38, "grad_norm": 1.1932638883590698, "learning_rate": 7.16080413559651e-06, "loss": 0.5224, "step": 2510 }, { "epoch": 0.38, "grad_norm": 1.2943356037139893, "learning_rate": 7.158610076753667e-06, "loss": 0.4846, "step": 2511 }, { "epoch": 0.38, "grad_norm": 1.1069107055664062, "learning_rate": 7.156415506920869e-06, "loss": 0.491, "step": 2512 }, { "epoch": 0.38, "grad_norm": 1.1888152360916138, "learning_rate": 7.154220426617615e-06, "loss": 0.4863, "step": 2513 }, { "epoch": 0.38, "grad_norm": 1.1516929864883423, "learning_rate": 7.1520248363635315e-06, "loss": 0.4285, "step": 2514 }, { "epoch": 0.38, "grad_norm": 1.2728084325790405, "learning_rate": 7.149828736678361e-06, "loss": 0.5544, "step": 2515 }, { "epoch": 0.38, "grad_norm": 1.1440449953079224, "learning_rate": 7.1476321280819695e-06, "loss": 0.7311, "step": 2516 }, { "epoch": 0.38, "grad_norm": 1.015945315361023, "learning_rate": 7.1454350110943394e-06, "loss": 0.4668, "step": 2517 }, { "epoch": 0.38, "grad_norm": 1.1348774433135986, "learning_rate": 7.143237386235577e-06, "loss": 0.5255, "step": 2518 }, { "epoch": 0.38, "grad_norm": 1.1823277473449707, "learning_rate": 7.141039254025908e-06, "loss": 0.5086, "step": 2519 }, { "epoch": 0.38, "grad_norm": 1.2702834606170654, "learning_rate": 7.138840614985681e-06, "loss": 0.5083, "step": 2520 }, { "epoch": 0.38, "grad_norm": 1.0547765493392944, "learning_rate": 7.136641469635357e-06, "loss": 0.5376, "step": 2521 }, { "epoch": 0.38, "grad_norm": 1.7083618640899658, "learning_rate": 7.134441818495526e-06, "loss": 0.6146, "step": 2522 }, { "epoch": 0.38, "grad_norm": 1.226755976676941, "learning_rate": 7.132241662086889e-06, "loss": 0.5788, "step": 2523 }, { "epoch": 0.38, "grad_norm": 1.2971011400222778, "learning_rate": 7.1300410009302726e-06, "loss": 0.7519, "step": 2524 }, { "epoch": 0.38, "grad_norm": 1.1183007955551147, "learning_rate": 7.127839835546623e-06, "loss": 0.5054, "step": 2525 }, { "epoch": 0.38, "grad_norm": 1.608896017074585, "learning_rate": 7.125638166457e-06, "loss": 0.4295, "step": 2526 }, { "epoch": 0.38, "grad_norm": 1.218650460243225, "learning_rate": 7.123435994182592e-06, "loss": 0.4764, "step": 2527 }, { "epoch": 0.38, "grad_norm": 1.526525855064392, "learning_rate": 7.121233319244697e-06, "loss": 0.5302, "step": 2528 }, { "epoch": 0.38, "grad_norm": 1.5094964504241943, "learning_rate": 7.119030142164737e-06, "loss": 0.5887, "step": 2529 }, { "epoch": 0.38, "grad_norm": 0.8832220435142517, "learning_rate": 7.1168264634642504e-06, "loss": 0.4995, "step": 2530 }, { "epoch": 0.38, "grad_norm": 1.0475434064865112, "learning_rate": 7.114622283664898e-06, "loss": 0.4439, "step": 2531 }, { "epoch": 0.38, "grad_norm": 1.4920374155044556, "learning_rate": 7.112417603288457e-06, "loss": 0.4285, "step": 2532 }, { "epoch": 0.38, "grad_norm": 1.3315271139144897, "learning_rate": 7.1102124228568235e-06, "loss": 0.5073, "step": 2533 }, { "epoch": 0.38, "grad_norm": 1.104947566986084, "learning_rate": 7.108006742892012e-06, "loss": 0.5779, "step": 2534 }, { "epoch": 0.38, "grad_norm": 1.1845771074295044, "learning_rate": 7.105800563916152e-06, "loss": 0.5445, "step": 2535 }, { "epoch": 0.38, "grad_norm": 1.22757887840271, "learning_rate": 7.103593886451495e-06, "loss": 0.4873, "step": 2536 }, { "epoch": 0.38, "grad_norm": 1.0719635486602783, "learning_rate": 7.101386711020412e-06, "loss": 0.5397, "step": 2537 }, { "epoch": 0.38, "grad_norm": 1.5885210037231445, "learning_rate": 7.099179038145387e-06, "loss": 0.5245, "step": 2538 }, { "epoch": 0.38, "grad_norm": 1.0632641315460205, "learning_rate": 7.096970868349026e-06, "loss": 0.5317, "step": 2539 }, { "epoch": 0.38, "grad_norm": 1.5145437717437744, "learning_rate": 7.094762202154048e-06, "loss": 0.4955, "step": 2540 }, { "epoch": 0.38, "grad_norm": 1.6295673847198486, "learning_rate": 7.092553040083297e-06, "loss": 0.5603, "step": 2541 }, { "epoch": 0.38, "grad_norm": 1.1986677646636963, "learning_rate": 7.090343382659726e-06, "loss": 0.4366, "step": 2542 }, { "epoch": 0.38, "grad_norm": 1.0923601388931274, "learning_rate": 7.0881332304064085e-06, "loss": 0.488, "step": 2543 }, { "epoch": 0.38, "grad_norm": 1.5465080738067627, "learning_rate": 7.085922583846539e-06, "loss": 0.5231, "step": 2544 }, { "epoch": 0.38, "grad_norm": 0.9320787191390991, "learning_rate": 7.083711443503423e-06, "loss": 0.5805, "step": 2545 }, { "epoch": 0.38, "grad_norm": 1.050771951675415, "learning_rate": 7.081499809900485e-06, "loss": 0.4886, "step": 2546 }, { "epoch": 0.38, "grad_norm": 1.5025771856307983, "learning_rate": 7.079287683561269e-06, "loss": 0.5568, "step": 2547 }, { "epoch": 0.38, "grad_norm": 1.2791694402694702, "learning_rate": 7.0770750650094335e-06, "loss": 0.5148, "step": 2548 }, { "epoch": 0.38, "grad_norm": 1.433935523033142, "learning_rate": 7.074861954768751e-06, "loss": 0.5009, "step": 2549 }, { "epoch": 0.38, "grad_norm": 1.1651890277862549, "learning_rate": 7.072648353363114e-06, "loss": 0.4837, "step": 2550 }, { "epoch": 0.38, "grad_norm": 1.1719008684158325, "learning_rate": 7.07043426131653e-06, "loss": 0.7405, "step": 2551 }, { "epoch": 0.38, "grad_norm": 1.181447982788086, "learning_rate": 7.0682196791531235e-06, "loss": 0.4975, "step": 2552 }, { "epoch": 0.38, "grad_norm": 1.1872624158859253, "learning_rate": 7.066004607397133e-06, "loss": 0.5224, "step": 2553 }, { "epoch": 0.38, "grad_norm": 0.9311281442642212, "learning_rate": 7.0637890465729165e-06, "loss": 0.4848, "step": 2554 }, { "epoch": 0.38, "grad_norm": 1.1117304563522339, "learning_rate": 7.061572997204943e-06, "loss": 0.4611, "step": 2555 }, { "epoch": 0.38, "grad_norm": 0.9852153658866882, "learning_rate": 7.0593564598177996e-06, "loss": 0.5012, "step": 2556 }, { "epoch": 0.38, "grad_norm": 0.9981886744499207, "learning_rate": 7.057139434936191e-06, "loss": 0.5284, "step": 2557 }, { "epoch": 0.38, "grad_norm": 0.8315958380699158, "learning_rate": 7.054921923084933e-06, "loss": 0.4339, "step": 2558 }, { "epoch": 0.38, "grad_norm": 0.9134000539779663, "learning_rate": 7.052703924788961e-06, "loss": 0.3928, "step": 2559 }, { "epoch": 0.38, "grad_norm": 1.1832629442214966, "learning_rate": 7.050485440573321e-06, "loss": 0.4998, "step": 2560 }, { "epoch": 0.38, "grad_norm": 1.2731720209121704, "learning_rate": 7.048266470963179e-06, "loss": 0.5448, "step": 2561 }, { "epoch": 0.38, "grad_norm": 1.01315438747406, "learning_rate": 7.046047016483811e-06, "loss": 0.555, "step": 2562 }, { "epoch": 0.39, "grad_norm": 1.8067876100540161, "learning_rate": 7.043827077660612e-06, "loss": 0.5498, "step": 2563 }, { "epoch": 0.39, "grad_norm": 1.0476452112197876, "learning_rate": 7.0416066550190885e-06, "loss": 0.5509, "step": 2564 }, { "epoch": 0.39, "grad_norm": 1.4896997213363647, "learning_rate": 7.039385749084864e-06, "loss": 0.5043, "step": 2565 }, { "epoch": 0.39, "grad_norm": 1.192434549331665, "learning_rate": 7.037164360383675e-06, "loss": 0.4897, "step": 2566 }, { "epoch": 0.39, "grad_norm": 1.3139485120773315, "learning_rate": 7.034942489441371e-06, "loss": 0.464, "step": 2567 }, { "epoch": 0.39, "grad_norm": 1.471852421760559, "learning_rate": 7.0327201367839195e-06, "loss": 0.4384, "step": 2568 }, { "epoch": 0.39, "grad_norm": 1.6360249519348145, "learning_rate": 7.030497302937396e-06, "loss": 0.5377, "step": 2569 }, { "epoch": 0.39, "grad_norm": 0.9720149040222168, "learning_rate": 7.028273988427999e-06, "loss": 0.4919, "step": 2570 }, { "epoch": 0.39, "grad_norm": 1.0384178161621094, "learning_rate": 7.026050193782032e-06, "loss": 0.4515, "step": 2571 }, { "epoch": 0.39, "grad_norm": 1.2431949377059937, "learning_rate": 7.023825919525916e-06, "loss": 0.4927, "step": 2572 }, { "epoch": 0.39, "grad_norm": 1.1680080890655518, "learning_rate": 7.021601166186185e-06, "loss": 0.5352, "step": 2573 }, { "epoch": 0.39, "grad_norm": 1.007080078125, "learning_rate": 7.019375934289487e-06, "loss": 0.4258, "step": 2574 }, { "epoch": 0.39, "grad_norm": 1.486417293548584, "learning_rate": 7.017150224362581e-06, "loss": 0.535, "step": 2575 }, { "epoch": 0.39, "grad_norm": 1.1656811237335205, "learning_rate": 7.014924036932346e-06, "loss": 0.7046, "step": 2576 }, { "epoch": 0.39, "grad_norm": 0.956391453742981, "learning_rate": 7.012697372525763e-06, "loss": 0.5374, "step": 2577 }, { "epoch": 0.39, "grad_norm": 1.4497857093811035, "learning_rate": 7.010470231669935e-06, "loss": 0.4865, "step": 2578 }, { "epoch": 0.39, "grad_norm": 1.2053182125091553, "learning_rate": 7.008242614892074e-06, "loss": 0.4493, "step": 2579 }, { "epoch": 0.39, "grad_norm": 1.2586092948913574, "learning_rate": 7.006014522719503e-06, "loss": 0.5148, "step": 2580 }, { "epoch": 0.39, "grad_norm": 1.1904820203781128, "learning_rate": 7.003785955679665e-06, "loss": 0.4119, "step": 2581 }, { "epoch": 0.39, "grad_norm": 1.048203945159912, "learning_rate": 7.001556914300108e-06, "loss": 0.479, "step": 2582 }, { "epoch": 0.39, "grad_norm": 1.8071250915527344, "learning_rate": 6.999327399108493e-06, "loss": 0.5381, "step": 2583 }, { "epoch": 0.39, "grad_norm": 1.4186617136001587, "learning_rate": 6.997097410632596e-06, "loss": 0.5996, "step": 2584 }, { "epoch": 0.39, "grad_norm": 1.3127832412719727, "learning_rate": 6.994866949400304e-06, "loss": 0.5074, "step": 2585 }, { "epoch": 0.39, "grad_norm": 1.1566685438156128, "learning_rate": 6.992636015939614e-06, "loss": 0.7379, "step": 2586 }, { "epoch": 0.39, "grad_norm": 1.2715259790420532, "learning_rate": 6.99040461077864e-06, "loss": 0.4919, "step": 2587 }, { "epoch": 0.39, "grad_norm": 1.2222356796264648, "learning_rate": 6.988172734445601e-06, "loss": 0.7127, "step": 2588 }, { "epoch": 0.39, "grad_norm": 1.1123685836791992, "learning_rate": 6.9859403874688314e-06, "loss": 0.4595, "step": 2589 }, { "epoch": 0.39, "grad_norm": 0.9489760994911194, "learning_rate": 6.983707570376776e-06, "loss": 0.5332, "step": 2590 }, { "epoch": 0.39, "grad_norm": 1.3199228048324585, "learning_rate": 6.981474283697992e-06, "loss": 0.5422, "step": 2591 }, { "epoch": 0.39, "grad_norm": 1.06222665309906, "learning_rate": 6.979240527961147e-06, "loss": 0.4567, "step": 2592 }, { "epoch": 0.39, "grad_norm": 1.3298444747924805, "learning_rate": 6.97700630369502e-06, "loss": 0.4894, "step": 2593 }, { "epoch": 0.39, "grad_norm": 1.3808202743530273, "learning_rate": 6.974771611428499e-06, "loss": 0.5222, "step": 2594 }, { "epoch": 0.39, "grad_norm": 1.1748343706130981, "learning_rate": 6.972536451690585e-06, "loss": 0.51, "step": 2595 }, { "epoch": 0.39, "grad_norm": 1.213172435760498, "learning_rate": 6.970300825010389e-06, "loss": 0.5037, "step": 2596 }, { "epoch": 0.39, "grad_norm": 1.2747524976730347, "learning_rate": 6.96806473191713e-06, "loss": 0.5674, "step": 2597 }, { "epoch": 0.39, "grad_norm": 1.2266918420791626, "learning_rate": 6.965828172940144e-06, "loss": 0.4323, "step": 2598 }, { "epoch": 0.39, "grad_norm": 1.2881782054901123, "learning_rate": 6.963591148608873e-06, "loss": 0.466, "step": 2599 }, { "epoch": 0.39, "grad_norm": 1.3592441082000732, "learning_rate": 6.961353659452867e-06, "loss": 0.5156, "step": 2600 }, { "epoch": 0.39, "grad_norm": 1.0860192775726318, "learning_rate": 6.959115706001788e-06, "loss": 0.4796, "step": 2601 }, { "epoch": 0.39, "grad_norm": 1.4323259592056274, "learning_rate": 6.956877288785409e-06, "loss": 0.5757, "step": 2602 }, { "epoch": 0.39, "grad_norm": 1.2906956672668457, "learning_rate": 6.954638408333611e-06, "loss": 0.5457, "step": 2603 }, { "epoch": 0.39, "grad_norm": 1.0619722604751587, "learning_rate": 6.9523990651763896e-06, "loss": 0.4803, "step": 2604 }, { "epoch": 0.39, "grad_norm": 0.9378067851066589, "learning_rate": 6.950159259843843e-06, "loss": 0.5023, "step": 2605 }, { "epoch": 0.39, "grad_norm": 1.1604187488555908, "learning_rate": 6.947918992866183e-06, "loss": 0.5164, "step": 2606 }, { "epoch": 0.39, "grad_norm": 1.2483552694320679, "learning_rate": 6.9456782647737275e-06, "loss": 0.4645, "step": 2607 }, { "epoch": 0.39, "grad_norm": 1.2411531209945679, "learning_rate": 6.943437076096906e-06, "loss": 0.5147, "step": 2608 }, { "epoch": 0.39, "grad_norm": 1.132265329360962, "learning_rate": 6.941195427366259e-06, "loss": 0.6897, "step": 2609 }, { "epoch": 0.39, "grad_norm": 2.2823474407196045, "learning_rate": 6.9389533191124315e-06, "loss": 0.5734, "step": 2610 }, { "epoch": 0.39, "grad_norm": 1.1742489337921143, "learning_rate": 6.93671075186618e-06, "loss": 0.5288, "step": 2611 }, { "epoch": 0.39, "grad_norm": 1.1575881242752075, "learning_rate": 6.934467726158368e-06, "loss": 0.5065, "step": 2612 }, { "epoch": 0.39, "grad_norm": 2.587110757827759, "learning_rate": 6.9322242425199714e-06, "loss": 0.4566, "step": 2613 }, { "epoch": 0.39, "grad_norm": 1.0271474123001099, "learning_rate": 6.929980301482067e-06, "loss": 0.5134, "step": 2614 }, { "epoch": 0.39, "grad_norm": 4.362549304962158, "learning_rate": 6.927735903575847e-06, "loss": 0.5778, "step": 2615 }, { "epoch": 0.39, "grad_norm": 1.1498677730560303, "learning_rate": 6.925491049332611e-06, "loss": 0.4899, "step": 2616 }, { "epoch": 0.39, "grad_norm": 1.3179620504379272, "learning_rate": 6.923245739283761e-06, "loss": 0.4131, "step": 2617 }, { "epoch": 0.39, "grad_norm": 1.2327708005905151, "learning_rate": 6.920999973960813e-06, "loss": 0.482, "step": 2618 }, { "epoch": 0.39, "grad_norm": 1.312416672706604, "learning_rate": 6.918753753895388e-06, "loss": 0.4815, "step": 2619 }, { "epoch": 0.39, "grad_norm": 1.0674726963043213, "learning_rate": 6.916507079619217e-06, "loss": 0.4661, "step": 2620 }, { "epoch": 0.39, "grad_norm": 1.0832544565200806, "learning_rate": 6.914259951664133e-06, "loss": 0.4467, "step": 2621 }, { "epoch": 0.39, "grad_norm": 0.9856125116348267, "learning_rate": 6.912012370562082e-06, "loss": 0.4434, "step": 2622 }, { "epoch": 0.39, "grad_norm": 1.3579851388931274, "learning_rate": 6.909764336845116e-06, "loss": 0.5176, "step": 2623 }, { "epoch": 0.39, "grad_norm": 1.1029176712036133, "learning_rate": 6.907515851045392e-06, "loss": 0.7626, "step": 2624 }, { "epoch": 0.39, "grad_norm": 1.3324695825576782, "learning_rate": 6.905266913695178e-06, "loss": 0.5673, "step": 2625 }, { "epoch": 0.39, "grad_norm": 1.1059333086013794, "learning_rate": 6.903017525326843e-06, "loss": 0.4618, "step": 2626 }, { "epoch": 0.39, "grad_norm": 1.3278623819351196, "learning_rate": 6.900767686472869e-06, "loss": 0.4252, "step": 2627 }, { "epoch": 0.39, "grad_norm": 1.3049159049987793, "learning_rate": 6.8985173976658395e-06, "loss": 0.422, "step": 2628 }, { "epoch": 0.39, "grad_norm": 1.2037442922592163, "learning_rate": 6.896266659438449e-06, "loss": 0.7781, "step": 2629 }, { "epoch": 0.4, "grad_norm": 1.2831134796142578, "learning_rate": 6.894015472323494e-06, "loss": 0.4569, "step": 2630 }, { "epoch": 0.4, "grad_norm": 1.4358597993850708, "learning_rate": 6.891763836853881e-06, "loss": 0.7616, "step": 2631 }, { "epoch": 0.4, "grad_norm": 1.1380627155303955, "learning_rate": 6.889511753562621e-06, "loss": 0.4587, "step": 2632 }, { "epoch": 0.4, "grad_norm": 1.156644582748413, "learning_rate": 6.887259222982829e-06, "loss": 0.4952, "step": 2633 }, { "epoch": 0.4, "grad_norm": 1.2608035802841187, "learning_rate": 6.885006245647732e-06, "loss": 0.4751, "step": 2634 }, { "epoch": 0.4, "grad_norm": 1.0896732807159424, "learning_rate": 6.882752822090656e-06, "loss": 0.733, "step": 2635 }, { "epoch": 0.4, "grad_norm": 1.1672205924987793, "learning_rate": 6.880498952845035e-06, "loss": 0.4585, "step": 2636 }, { "epoch": 0.4, "grad_norm": 0.9744054675102234, "learning_rate": 6.87824463844441e-06, "loss": 0.4698, "step": 2637 }, { "epoch": 0.4, "grad_norm": 1.2952197790145874, "learning_rate": 6.8759898794224265e-06, "loss": 0.4832, "step": 2638 }, { "epoch": 0.4, "grad_norm": 1.387797474861145, "learning_rate": 6.873734676312835e-06, "loss": 0.4766, "step": 2639 }, { "epoch": 0.4, "grad_norm": 2.381112813949585, "learning_rate": 6.871479029649489e-06, "loss": 0.4907, "step": 2640 }, { "epoch": 0.4, "grad_norm": 1.333020806312561, "learning_rate": 6.86922293996635e-06, "loss": 0.404, "step": 2641 }, { "epoch": 0.4, "grad_norm": 1.1023417711257935, "learning_rate": 6.866966407797488e-06, "loss": 0.423, "step": 2642 }, { "epoch": 0.4, "grad_norm": 1.1553367376327515, "learning_rate": 6.864709433677066e-06, "loss": 0.4224, "step": 2643 }, { "epoch": 0.4, "grad_norm": 1.0478266477584839, "learning_rate": 6.862452018139364e-06, "loss": 0.5675, "step": 2644 }, { "epoch": 0.4, "grad_norm": 1.0767780542373657, "learning_rate": 6.860194161718759e-06, "loss": 0.4546, "step": 2645 }, { "epoch": 0.4, "grad_norm": 1.5877269506454468, "learning_rate": 6.857935864949736e-06, "loss": 0.499, "step": 2646 }, { "epoch": 0.4, "grad_norm": 1.2132903337478638, "learning_rate": 6.855677128366881e-06, "loss": 0.6996, "step": 2647 }, { "epoch": 0.4, "grad_norm": 1.1469851732254028, "learning_rate": 6.8534179525048895e-06, "loss": 0.51, "step": 2648 }, { "epoch": 0.4, "grad_norm": 1.1460444927215576, "learning_rate": 6.851158337898554e-06, "loss": 0.7116, "step": 2649 }, { "epoch": 0.4, "grad_norm": 1.6478062868118286, "learning_rate": 6.8488982850827765e-06, "loss": 0.4854, "step": 2650 }, { "epoch": 0.4, "grad_norm": 1.3372451066970825, "learning_rate": 6.84663779459256e-06, "loss": 0.4358, "step": 2651 }, { "epoch": 0.4, "grad_norm": 1.1348235607147217, "learning_rate": 6.844376866963012e-06, "loss": 0.4689, "step": 2652 }, { "epoch": 0.4, "grad_norm": 1.0781553983688354, "learning_rate": 6.842115502729343e-06, "loss": 0.4579, "step": 2653 }, { "epoch": 0.4, "grad_norm": 1.0913130044937134, "learning_rate": 6.839853702426868e-06, "loss": 0.5046, "step": 2654 }, { "epoch": 0.4, "grad_norm": 1.0607496500015259, "learning_rate": 6.8375914665910026e-06, "loss": 0.4385, "step": 2655 }, { "epoch": 0.4, "grad_norm": 1.2446117401123047, "learning_rate": 6.835328795757268e-06, "loss": 0.4702, "step": 2656 }, { "epoch": 0.4, "grad_norm": 1.0997004508972168, "learning_rate": 6.833065690461289e-06, "loss": 0.6912, "step": 2657 }, { "epoch": 0.4, "grad_norm": 1.5115889310836792, "learning_rate": 6.830802151238789e-06, "loss": 0.4537, "step": 2658 }, { "epoch": 0.4, "grad_norm": 1.3228070735931396, "learning_rate": 6.828538178625599e-06, "loss": 0.4686, "step": 2659 }, { "epoch": 0.4, "grad_norm": 1.6103317737579346, "learning_rate": 6.826273773157652e-06, "loss": 0.4032, "step": 2660 }, { "epoch": 0.4, "grad_norm": 1.9849212169647217, "learning_rate": 6.8240089353709785e-06, "loss": 0.4397, "step": 2661 }, { "epoch": 0.4, "grad_norm": 1.1904457807540894, "learning_rate": 6.821743665801716e-06, "loss": 0.5028, "step": 2662 }, { "epoch": 0.4, "grad_norm": 1.1972668170928955, "learning_rate": 6.819477964986105e-06, "loss": 0.5628, "step": 2663 }, { "epoch": 0.4, "grad_norm": 1.036836862564087, "learning_rate": 6.817211833460484e-06, "loss": 0.4484, "step": 2664 }, { "epoch": 0.4, "grad_norm": 1.1562243700027466, "learning_rate": 6.8149452717612965e-06, "loss": 0.4437, "step": 2665 }, { "epoch": 0.4, "grad_norm": 1.3040190935134888, "learning_rate": 6.812678280425088e-06, "loss": 0.4469, "step": 2666 }, { "epoch": 0.4, "grad_norm": 1.0271475315093994, "learning_rate": 6.8104108599885024e-06, "loss": 0.5354, "step": 2667 }, { "epoch": 0.4, "grad_norm": 1.2686121463775635, "learning_rate": 6.8081430109882884e-06, "loss": 0.4906, "step": 2668 }, { "epoch": 0.4, "grad_norm": 1.4196857213974, "learning_rate": 6.805874733961295e-06, "loss": 0.4953, "step": 2669 }, { "epoch": 0.4, "grad_norm": 1.215727686882019, "learning_rate": 6.803606029444474e-06, "loss": 0.4901, "step": 2670 }, { "epoch": 0.4, "grad_norm": 0.9130131602287292, "learning_rate": 6.801336897974876e-06, "loss": 0.4252, "step": 2671 }, { "epoch": 0.4, "grad_norm": 1.2764467000961304, "learning_rate": 6.7990673400896546e-06, "loss": 0.4926, "step": 2672 }, { "epoch": 0.4, "grad_norm": 0.9966050386428833, "learning_rate": 6.796797356326063e-06, "loss": 0.5202, "step": 2673 }, { "epoch": 0.4, "grad_norm": 1.791323184967041, "learning_rate": 6.794526947221454e-06, "loss": 0.4348, "step": 2674 }, { "epoch": 0.4, "grad_norm": 1.4642338752746582, "learning_rate": 6.792256113313283e-06, "loss": 0.4946, "step": 2675 }, { "epoch": 0.4, "grad_norm": 1.311772108078003, "learning_rate": 6.78998485513911e-06, "loss": 0.5022, "step": 2676 }, { "epoch": 0.4, "grad_norm": 1.2438421249389648, "learning_rate": 6.787713173236588e-06, "loss": 0.5158, "step": 2677 }, { "epoch": 0.4, "grad_norm": 1.1874605417251587, "learning_rate": 6.7854410681434744e-06, "loss": 0.4321, "step": 2678 }, { "epoch": 0.4, "grad_norm": 1.1800390481948853, "learning_rate": 6.783168540397625e-06, "loss": 0.525, "step": 2679 }, { "epoch": 0.4, "grad_norm": 1.3806171417236328, "learning_rate": 6.780895590536995e-06, "loss": 0.4656, "step": 2680 }, { "epoch": 0.4, "grad_norm": 1.1860095262527466, "learning_rate": 6.778622219099644e-06, "loss": 0.492, "step": 2681 }, { "epoch": 0.4, "grad_norm": 2.3472959995269775, "learning_rate": 6.776348426623729e-06, "loss": 0.5696, "step": 2682 }, { "epoch": 0.4, "grad_norm": 1.0659173727035522, "learning_rate": 6.774074213647504e-06, "loss": 0.4537, "step": 2683 }, { "epoch": 0.4, "grad_norm": 1.089341402053833, "learning_rate": 6.771799580709324e-06, "loss": 0.7334, "step": 2684 }, { "epoch": 0.4, "grad_norm": 1.2253655195236206, "learning_rate": 6.769524528347647e-06, "loss": 0.4306, "step": 2685 }, { "epoch": 0.4, "grad_norm": 1.123187780380249, "learning_rate": 6.767249057101024e-06, "loss": 0.4887, "step": 2686 }, { "epoch": 0.4, "grad_norm": 1.2544349431991577, "learning_rate": 6.764973167508112e-06, "loss": 0.5478, "step": 2687 }, { "epoch": 0.4, "grad_norm": 1.4535837173461914, "learning_rate": 6.762696860107662e-06, "loss": 0.5129, "step": 2688 }, { "epoch": 0.4, "grad_norm": 1.0361723899841309, "learning_rate": 6.760420135438524e-06, "loss": 0.4549, "step": 2689 }, { "epoch": 0.4, "grad_norm": 1.2509015798568726, "learning_rate": 6.75814299403965e-06, "loss": 0.5044, "step": 2690 }, { "epoch": 0.4, "grad_norm": 1.0334950685501099, "learning_rate": 6.755865436450088e-06, "loss": 0.5147, "step": 2691 }, { "epoch": 0.4, "grad_norm": 1.2523243427276611, "learning_rate": 6.753587463208987e-06, "loss": 0.4522, "step": 2692 }, { "epoch": 0.4, "grad_norm": 5.11843204498291, "learning_rate": 6.751309074855592e-06, "loss": 0.4433, "step": 2693 }, { "epoch": 0.4, "grad_norm": 1.1233527660369873, "learning_rate": 6.7490302719292454e-06, "loss": 0.4377, "step": 2694 }, { "epoch": 0.4, "grad_norm": 1.1944859027862549, "learning_rate": 6.746751054969391e-06, "loss": 0.5092, "step": 2695 }, { "epoch": 0.4, "grad_norm": 1.2870608568191528, "learning_rate": 6.74447142451557e-06, "loss": 0.5784, "step": 2696 }, { "epoch": 0.41, "grad_norm": 1.2784851789474487, "learning_rate": 6.7421913811074194e-06, "loss": 0.5306, "step": 2697 }, { "epoch": 0.41, "grad_norm": 1.634955644607544, "learning_rate": 6.7399109252846736e-06, "loss": 0.4868, "step": 2698 }, { "epoch": 0.41, "grad_norm": 1.1835778951644897, "learning_rate": 6.737630057587169e-06, "loss": 0.4728, "step": 2699 }, { "epoch": 0.41, "grad_norm": 1.0456523895263672, "learning_rate": 6.735348778554835e-06, "loss": 0.4611, "step": 2700 }, { "epoch": 0.41, "grad_norm": 1.5209437608718872, "learning_rate": 6.733067088727699e-06, "loss": 0.4847, "step": 2701 }, { "epoch": 0.41, "grad_norm": 1.2233363389968872, "learning_rate": 6.730784988645889e-06, "loss": 0.7385, "step": 2702 }, { "epoch": 0.41, "grad_norm": 1.2726454734802246, "learning_rate": 6.728502478849625e-06, "loss": 0.5502, "step": 2703 }, { "epoch": 0.41, "grad_norm": 1.2411980628967285, "learning_rate": 6.726219559879229e-06, "loss": 0.5597, "step": 2704 }, { "epoch": 0.41, "grad_norm": 1.4455366134643555, "learning_rate": 6.7239362322751166e-06, "loss": 0.4865, "step": 2705 }, { "epoch": 0.41, "grad_norm": 1.2974337339401245, "learning_rate": 6.721652496577801e-06, "loss": 0.5142, "step": 2706 }, { "epoch": 0.41, "grad_norm": 1.6667962074279785, "learning_rate": 6.719368353327892e-06, "loss": 0.4956, "step": 2707 }, { "epoch": 0.41, "grad_norm": 1.9136743545532227, "learning_rate": 6.717083803066097e-06, "loss": 0.4835, "step": 2708 }, { "epoch": 0.41, "grad_norm": 1.841286301612854, "learning_rate": 6.7147988463332184e-06, "loss": 0.4577, "step": 2709 }, { "epoch": 0.41, "grad_norm": 1.150018572807312, "learning_rate": 6.712513483670153e-06, "loss": 0.5422, "step": 2710 }, { "epoch": 0.41, "grad_norm": 1.093744158744812, "learning_rate": 6.7102277156179e-06, "loss": 0.4894, "step": 2711 }, { "epoch": 0.41, "grad_norm": 1.289700984954834, "learning_rate": 6.707941542717547e-06, "loss": 0.5025, "step": 2712 }, { "epoch": 0.41, "grad_norm": 1.1125048398971558, "learning_rate": 6.705654965510282e-06, "loss": 0.5271, "step": 2713 }, { "epoch": 0.41, "grad_norm": 1.0024508237838745, "learning_rate": 6.703367984537388e-06, "loss": 0.456, "step": 2714 }, { "epoch": 0.41, "grad_norm": 1.5084142684936523, "learning_rate": 6.701080600340242e-06, "loss": 0.4989, "step": 2715 }, { "epoch": 0.41, "grad_norm": 1.1610472202301025, "learning_rate": 6.698792813460318e-06, "loss": 0.5296, "step": 2716 }, { "epoch": 0.41, "grad_norm": 1.17644464969635, "learning_rate": 6.696504624439185e-06, "loss": 0.411, "step": 2717 }, { "epoch": 0.41, "grad_norm": 1.0806481838226318, "learning_rate": 6.694216033818508e-06, "loss": 0.512, "step": 2718 }, { "epoch": 0.41, "grad_norm": 0.9766600728034973, "learning_rate": 6.691927042140044e-06, "loss": 0.5699, "step": 2719 }, { "epoch": 0.41, "grad_norm": 1.1394506692886353, "learning_rate": 6.68963764994565e-06, "loss": 0.4906, "step": 2720 }, { "epoch": 0.41, "grad_norm": 1.9973180294036865, "learning_rate": 6.6873478577772745e-06, "loss": 0.4385, "step": 2721 }, { "epoch": 0.41, "grad_norm": 1.1225661039352417, "learning_rate": 6.685057666176958e-06, "loss": 0.5084, "step": 2722 }, { "epoch": 0.41, "grad_norm": 1.164766788482666, "learning_rate": 6.682767075686842e-06, "loss": 0.4785, "step": 2723 }, { "epoch": 0.41, "grad_norm": 0.957314133644104, "learning_rate": 6.680476086849157e-06, "loss": 0.4566, "step": 2724 }, { "epoch": 0.41, "grad_norm": 1.4181411266326904, "learning_rate": 6.67818470020623e-06, "loss": 0.4555, "step": 2725 }, { "epoch": 0.41, "grad_norm": 1.538000464439392, "learning_rate": 6.675892916300486e-06, "loss": 0.4772, "step": 2726 }, { "epoch": 0.41, "grad_norm": 1.1655789613723755, "learning_rate": 6.6736007356744345e-06, "loss": 0.7598, "step": 2727 }, { "epoch": 0.41, "grad_norm": 1.1762261390686035, "learning_rate": 6.671308158870687e-06, "loss": 0.5133, "step": 2728 }, { "epoch": 0.41, "grad_norm": 1.146488070487976, "learning_rate": 6.669015186431946e-06, "loss": 0.5476, "step": 2729 }, { "epoch": 0.41, "grad_norm": 1.25919508934021, "learning_rate": 6.666721818901009e-06, "loss": 0.4468, "step": 2730 }, { "epoch": 0.41, "grad_norm": 1.4992557764053345, "learning_rate": 6.664428056820764e-06, "loss": 0.5291, "step": 2731 }, { "epoch": 0.41, "grad_norm": 1.2393901348114014, "learning_rate": 6.662133900734197e-06, "loss": 0.4611, "step": 2732 }, { "epoch": 0.41, "grad_norm": 0.9934820532798767, "learning_rate": 6.6598393511843824e-06, "loss": 0.4825, "step": 2733 }, { "epoch": 0.41, "grad_norm": 1.0421110391616821, "learning_rate": 6.657544408714491e-06, "loss": 0.4591, "step": 2734 }, { "epoch": 0.41, "grad_norm": 1.1274240016937256, "learning_rate": 6.655249073867784e-06, "loss": 0.5063, "step": 2735 }, { "epoch": 0.41, "grad_norm": 1.3505820035934448, "learning_rate": 6.652953347187619e-06, "loss": 0.4675, "step": 2736 }, { "epoch": 0.41, "grad_norm": 1.2992101907730103, "learning_rate": 6.650657229217445e-06, "loss": 0.4257, "step": 2737 }, { "epoch": 0.41, "grad_norm": 1.0296076536178589, "learning_rate": 6.648360720500802e-06, "loss": 0.5308, "step": 2738 }, { "epoch": 0.41, "grad_norm": 1.2620244026184082, "learning_rate": 6.646063821581322e-06, "loss": 0.5272, "step": 2739 }, { "epoch": 0.41, "grad_norm": 1.0654797554016113, "learning_rate": 6.643766533002734e-06, "loss": 0.4721, "step": 2740 }, { "epoch": 0.41, "grad_norm": 1.2296738624572754, "learning_rate": 6.641468855308853e-06, "loss": 0.4822, "step": 2741 }, { "epoch": 0.41, "grad_norm": 3.4417684078216553, "learning_rate": 6.63917078904359e-06, "loss": 0.5607, "step": 2742 }, { "epoch": 0.41, "grad_norm": 1.0598477125167847, "learning_rate": 6.63687233475095e-06, "loss": 0.4245, "step": 2743 }, { "epoch": 0.41, "grad_norm": 1.25343918800354, "learning_rate": 6.634573492975026e-06, "loss": 0.5748, "step": 2744 }, { "epoch": 0.41, "grad_norm": 1.2369585037231445, "learning_rate": 6.6322742642600025e-06, "loss": 0.4855, "step": 2745 }, { "epoch": 0.41, "grad_norm": 1.3864095211029053, "learning_rate": 6.629974649150157e-06, "loss": 0.4262, "step": 2746 }, { "epoch": 0.41, "grad_norm": 1.8581187725067139, "learning_rate": 6.627674648189858e-06, "loss": 0.5963, "step": 2747 }, { "epoch": 0.41, "grad_norm": 1.0904508829116821, "learning_rate": 6.625374261923567e-06, "loss": 0.4614, "step": 2748 }, { "epoch": 0.41, "grad_norm": 0.9969479441642761, "learning_rate": 6.623073490895837e-06, "loss": 0.4639, "step": 2749 }, { "epoch": 0.41, "grad_norm": 1.7094823122024536, "learning_rate": 6.6207723356513085e-06, "loss": 0.4606, "step": 2750 }, { "epoch": 0.41, "grad_norm": 1.1547105312347412, "learning_rate": 6.618470796734714e-06, "loss": 0.5286, "step": 2751 }, { "epoch": 0.41, "grad_norm": 1.516243577003479, "learning_rate": 6.6161688746908805e-06, "loss": 0.4678, "step": 2752 }, { "epoch": 0.41, "grad_norm": 1.4585853815078735, "learning_rate": 6.613866570064718e-06, "loss": 0.4609, "step": 2753 }, { "epoch": 0.41, "grad_norm": 1.4547789096832275, "learning_rate": 6.611563883401239e-06, "loss": 0.4924, "step": 2754 }, { "epoch": 0.41, "grad_norm": 1.6725918054580688, "learning_rate": 6.609260815245535e-06, "loss": 0.4606, "step": 2755 }, { "epoch": 0.41, "grad_norm": 0.9406384229660034, "learning_rate": 6.606957366142795e-06, "loss": 0.4869, "step": 2756 }, { "epoch": 0.41, "grad_norm": 1.139182686805725, "learning_rate": 6.604653536638293e-06, "loss": 0.4527, "step": 2757 }, { "epoch": 0.41, "grad_norm": 1.125524640083313, "learning_rate": 6.602349327277397e-06, "loss": 0.5155, "step": 2758 }, { "epoch": 0.41, "grad_norm": 1.2805061340332031, "learning_rate": 6.600044738605562e-06, "loss": 0.509, "step": 2759 }, { "epoch": 0.41, "grad_norm": 1.302076816558838, "learning_rate": 6.597739771168336e-06, "loss": 0.5533, "step": 2760 }, { "epoch": 0.41, "grad_norm": 1.1589107513427734, "learning_rate": 6.595434425511353e-06, "loss": 0.5124, "step": 2761 }, { "epoch": 0.41, "grad_norm": 1.2349034547805786, "learning_rate": 6.5931287021803415e-06, "loss": 0.4993, "step": 2762 }, { "epoch": 0.42, "grad_norm": 2.2814769744873047, "learning_rate": 6.590822601721115e-06, "loss": 0.5181, "step": 2763 }, { "epoch": 0.42, "grad_norm": 1.7399579286575317, "learning_rate": 6.588516124679575e-06, "loss": 0.4977, "step": 2764 }, { "epoch": 0.42, "grad_norm": 1.2305867671966553, "learning_rate": 6.586209271601718e-06, "loss": 0.5363, "step": 2765 }, { "epoch": 0.42, "grad_norm": 2.088927984237671, "learning_rate": 6.583902043033625e-06, "loss": 0.5111, "step": 2766 }, { "epoch": 0.42, "grad_norm": 1.6300827264785767, "learning_rate": 6.581594439521468e-06, "loss": 0.5449, "step": 2767 }, { "epoch": 0.42, "grad_norm": 1.5139299631118774, "learning_rate": 6.579286461611506e-06, "loss": 0.4892, "step": 2768 }, { "epoch": 0.42, "grad_norm": 1.272863507270813, "learning_rate": 6.576978109850088e-06, "loss": 0.4345, "step": 2769 }, { "epoch": 0.42, "grad_norm": 0.9169899225234985, "learning_rate": 6.574669384783652e-06, "loss": 0.4681, "step": 2770 }, { "epoch": 0.42, "grad_norm": 1.293682336807251, "learning_rate": 6.572360286958721e-06, "loss": 0.443, "step": 2771 }, { "epoch": 0.42, "grad_norm": 0.9407062530517578, "learning_rate": 6.570050816921911e-06, "loss": 0.4921, "step": 2772 }, { "epoch": 0.42, "grad_norm": 1.215090274810791, "learning_rate": 6.567740975219923e-06, "loss": 0.5357, "step": 2773 }, { "epoch": 0.42, "grad_norm": 1.199171781539917, "learning_rate": 6.565430762399546e-06, "loss": 0.4866, "step": 2774 }, { "epoch": 0.42, "grad_norm": 1.287514567375183, "learning_rate": 6.563120179007659e-06, "loss": 0.4963, "step": 2775 }, { "epoch": 0.42, "grad_norm": 1.262190341949463, "learning_rate": 6.560809225591227e-06, "loss": 0.4825, "step": 2776 }, { "epoch": 0.42, "grad_norm": 1.3877153396606445, "learning_rate": 6.5584979026973014e-06, "loss": 0.4245, "step": 2777 }, { "epoch": 0.42, "grad_norm": 1.1295543909072876, "learning_rate": 6.556186210873025e-06, "loss": 0.46, "step": 2778 }, { "epoch": 0.42, "grad_norm": 1.171504259109497, "learning_rate": 6.5538741506656236e-06, "loss": 0.5521, "step": 2779 }, { "epoch": 0.42, "grad_norm": 1.1827741861343384, "learning_rate": 6.551561722622414e-06, "loss": 0.458, "step": 2780 }, { "epoch": 0.42, "grad_norm": 1.2089564800262451, "learning_rate": 6.549248927290796e-06, "loss": 0.494, "step": 2781 }, { "epoch": 0.42, "grad_norm": 0.9680263996124268, "learning_rate": 6.54693576521826e-06, "loss": 0.5198, "step": 2782 }, { "epoch": 0.42, "grad_norm": 1.0616562366485596, "learning_rate": 6.544622236952382e-06, "loss": 0.4511, "step": 2783 }, { "epoch": 0.42, "grad_norm": 1.1551731824874878, "learning_rate": 6.542308343040823e-06, "loss": 0.4877, "step": 2784 }, { "epoch": 0.42, "grad_norm": 1.2622674703598022, "learning_rate": 6.539994084031334e-06, "loss": 0.5496, "step": 2785 }, { "epoch": 0.42, "grad_norm": 1.3150635957717896, "learning_rate": 6.537679460471747e-06, "loss": 0.4589, "step": 2786 }, { "epoch": 0.42, "grad_norm": 1.4043787717819214, "learning_rate": 6.535364472909989e-06, "loss": 0.4624, "step": 2787 }, { "epoch": 0.42, "grad_norm": 1.0349596738815308, "learning_rate": 6.533049121894064e-06, "loss": 0.5382, "step": 2788 }, { "epoch": 0.42, "grad_norm": 1.3926440477371216, "learning_rate": 6.530733407972067e-06, "loss": 0.5078, "step": 2789 }, { "epoch": 0.42, "grad_norm": 1.0460243225097656, "learning_rate": 6.528417331692177e-06, "loss": 0.5303, "step": 2790 }, { "epoch": 0.42, "grad_norm": 1.3015083074569702, "learning_rate": 6.526100893602661e-06, "loss": 0.527, "step": 2791 }, { "epoch": 0.42, "grad_norm": 1.1964820623397827, "learning_rate": 6.5237840942518706e-06, "loss": 0.5796, "step": 2792 }, { "epoch": 0.42, "grad_norm": 1.1439011096954346, "learning_rate": 6.521466934188241e-06, "loss": 0.5077, "step": 2793 }, { "epoch": 0.42, "grad_norm": 1.454410433769226, "learning_rate": 6.519149413960295e-06, "loss": 0.5117, "step": 2794 }, { "epoch": 0.42, "grad_norm": 1.5252925157546997, "learning_rate": 6.51683153411664e-06, "loss": 0.5166, "step": 2795 }, { "epoch": 0.42, "grad_norm": 1.222077488899231, "learning_rate": 6.51451329520597e-06, "loss": 0.488, "step": 2796 }, { "epoch": 0.42, "grad_norm": 1.571738839149475, "learning_rate": 6.5121946977770585e-06, "loss": 0.4235, "step": 2797 }, { "epoch": 0.42, "grad_norm": 1.2988219261169434, "learning_rate": 6.509875742378774e-06, "loss": 0.4667, "step": 2798 }, { "epoch": 0.42, "grad_norm": 1.1797325611114502, "learning_rate": 6.5075564295600594e-06, "loss": 0.4364, "step": 2799 }, { "epoch": 0.42, "grad_norm": 1.3390249013900757, "learning_rate": 6.505236759869947e-06, "loss": 0.5336, "step": 2800 }, { "epoch": 0.42, "grad_norm": 1.5081958770751953, "learning_rate": 6.502916733857554e-06, "loss": 0.579, "step": 2801 }, { "epoch": 0.42, "grad_norm": 1.1190965175628662, "learning_rate": 6.50059635207208e-06, "loss": 0.7456, "step": 2802 }, { "epoch": 0.42, "grad_norm": 1.2793587446212769, "learning_rate": 6.49827561506281e-06, "loss": 0.4872, "step": 2803 }, { "epoch": 0.42, "grad_norm": 2.109591484069824, "learning_rate": 6.4959545233791155e-06, "loss": 0.4986, "step": 2804 }, { "epoch": 0.42, "grad_norm": 7.487966537475586, "learning_rate": 6.493633077570446e-06, "loss": 0.5747, "step": 2805 }, { "epoch": 0.42, "grad_norm": 1.1666991710662842, "learning_rate": 6.491311278186339e-06, "loss": 0.5079, "step": 2806 }, { "epoch": 0.42, "grad_norm": 1.5527349710464478, "learning_rate": 6.488989125776417e-06, "loss": 0.5476, "step": 2807 }, { "epoch": 0.42, "grad_norm": 1.283318281173706, "learning_rate": 6.48666662089038e-06, "loss": 0.5143, "step": 2808 }, { "epoch": 0.42, "grad_norm": 0.9861534833908081, "learning_rate": 6.484343764078019e-06, "loss": 0.3968, "step": 2809 }, { "epoch": 0.42, "grad_norm": 1.464884877204895, "learning_rate": 6.482020555889206e-06, "loss": 0.4364, "step": 2810 }, { "epoch": 0.42, "grad_norm": 1.18137788772583, "learning_rate": 6.4796969968738895e-06, "loss": 0.6728, "step": 2811 }, { "epoch": 0.42, "grad_norm": 1.28562331199646, "learning_rate": 6.47737308758211e-06, "loss": 0.4821, "step": 2812 }, { "epoch": 0.42, "grad_norm": 1.2555696964263916, "learning_rate": 6.475048828563988e-06, "loss": 0.5301, "step": 2813 }, { "epoch": 0.42, "grad_norm": 1.1530907154083252, "learning_rate": 6.4727242203697215e-06, "loss": 0.4759, "step": 2814 }, { "epoch": 0.42, "grad_norm": 1.1397368907928467, "learning_rate": 6.470399263549603e-06, "loss": 0.4207, "step": 2815 }, { "epoch": 0.42, "grad_norm": 1.8993405103683472, "learning_rate": 6.4680739586539956e-06, "loss": 0.5347, "step": 2816 }, { "epoch": 0.42, "grad_norm": 1.2700008153915405, "learning_rate": 6.46574830623335e-06, "loss": 0.5112, "step": 2817 }, { "epoch": 0.42, "grad_norm": 2.070422887802124, "learning_rate": 6.463422306838199e-06, "loss": 0.4695, "step": 2818 }, { "epoch": 0.42, "grad_norm": 1.162853479385376, "learning_rate": 6.461095961019158e-06, "loss": 0.4382, "step": 2819 }, { "epoch": 0.42, "grad_norm": 1.4043551683425903, "learning_rate": 6.4587692693269235e-06, "loss": 0.5039, "step": 2820 }, { "epoch": 0.42, "grad_norm": 1.3432457447052002, "learning_rate": 6.4564422323122735e-06, "loss": 0.4878, "step": 2821 }, { "epoch": 0.42, "grad_norm": 1.1154043674468994, "learning_rate": 6.45411485052607e-06, "loss": 0.4303, "step": 2822 }, { "epoch": 0.42, "grad_norm": 0.9243492484092712, "learning_rate": 6.451787124519251e-06, "loss": 0.4315, "step": 2823 }, { "epoch": 0.42, "grad_norm": 1.0391420125961304, "learning_rate": 6.449459054842844e-06, "loss": 0.4833, "step": 2824 }, { "epoch": 0.42, "grad_norm": 2.8093206882476807, "learning_rate": 6.447130642047949e-06, "loss": 0.4591, "step": 2825 }, { "epoch": 0.42, "grad_norm": 1.2664036750793457, "learning_rate": 6.444801886685758e-06, "loss": 0.4559, "step": 2826 }, { "epoch": 0.42, "grad_norm": 1.3063035011291504, "learning_rate": 6.442472789307533e-06, "loss": 0.4569, "step": 2827 }, { "epoch": 0.42, "grad_norm": 1.666603446006775, "learning_rate": 6.440143350464625e-06, "loss": 0.4885, "step": 2828 }, { "epoch": 0.42, "grad_norm": 1.1084660291671753, "learning_rate": 6.437813570708463e-06, "loss": 0.5282, "step": 2829 }, { "epoch": 0.43, "grad_norm": 0.9977172017097473, "learning_rate": 6.435483450590554e-06, "loss": 0.4758, "step": 2830 }, { "epoch": 0.43, "grad_norm": 1.2249486446380615, "learning_rate": 6.433152990662489e-06, "loss": 0.5552, "step": 2831 }, { "epoch": 0.43, "grad_norm": 1.0490338802337646, "learning_rate": 6.43082219147594e-06, "loss": 0.5409, "step": 2832 }, { "epoch": 0.43, "grad_norm": 1.2402088642120361, "learning_rate": 6.428491053582657e-06, "loss": 0.5124, "step": 2833 }, { "epoch": 0.43, "grad_norm": 1.1610558032989502, "learning_rate": 6.42615957753447e-06, "loss": 0.4656, "step": 2834 }, { "epoch": 0.43, "grad_norm": 1.4092549085617065, "learning_rate": 6.423827763883294e-06, "loss": 0.4502, "step": 2835 }, { "epoch": 0.43, "grad_norm": 1.261022925376892, "learning_rate": 6.421495613181112e-06, "loss": 0.4852, "step": 2836 }, { "epoch": 0.43, "grad_norm": 1.4547542333602905, "learning_rate": 6.419163125980004e-06, "loss": 0.5383, "step": 2837 }, { "epoch": 0.43, "grad_norm": 1.05756676197052, "learning_rate": 6.416830302832114e-06, "loss": 0.4846, "step": 2838 }, { "epoch": 0.43, "grad_norm": 1.2158459424972534, "learning_rate": 6.414497144289675e-06, "loss": 0.5066, "step": 2839 }, { "epoch": 0.43, "grad_norm": 1.3533935546875, "learning_rate": 6.4121636509049965e-06, "loss": 0.5912, "step": 2840 }, { "epoch": 0.43, "grad_norm": 1.07439386844635, "learning_rate": 6.409829823230467e-06, "loss": 0.4837, "step": 2841 }, { "epoch": 0.43, "grad_norm": 1.5885714292526245, "learning_rate": 6.407495661818551e-06, "loss": 0.5301, "step": 2842 }, { "epoch": 0.43, "grad_norm": 1.1729614734649658, "learning_rate": 6.405161167221799e-06, "loss": 0.4379, "step": 2843 }, { "epoch": 0.43, "grad_norm": 1.296102523803711, "learning_rate": 6.402826339992836e-06, "loss": 0.4592, "step": 2844 }, { "epoch": 0.43, "grad_norm": 1.1483319997787476, "learning_rate": 6.400491180684365e-06, "loss": 0.5129, "step": 2845 }, { "epoch": 0.43, "grad_norm": 1.377109169960022, "learning_rate": 6.3981556898491705e-06, "loss": 0.5082, "step": 2846 }, { "epoch": 0.43, "grad_norm": 1.1392940282821655, "learning_rate": 6.395819868040114e-06, "loss": 0.6993, "step": 2847 }, { "epoch": 0.43, "grad_norm": 1.1160516738891602, "learning_rate": 6.393483715810134e-06, "loss": 0.5, "step": 2848 }, { "epoch": 0.43, "grad_norm": 1.2092007398605347, "learning_rate": 6.391147233712248e-06, "loss": 0.609, "step": 2849 }, { "epoch": 0.43, "grad_norm": 1.104915738105774, "learning_rate": 6.388810422299555e-06, "loss": 0.468, "step": 2850 }, { "epoch": 0.43, "grad_norm": 1.092905044555664, "learning_rate": 6.386473282125227e-06, "loss": 0.5625, "step": 2851 }, { "epoch": 0.43, "grad_norm": 1.3058072328567505, "learning_rate": 6.384135813742516e-06, "loss": 0.5991, "step": 2852 }, { "epoch": 0.43, "grad_norm": 1.2503905296325684, "learning_rate": 6.381798017704752e-06, "loss": 0.4721, "step": 2853 }, { "epoch": 0.43, "grad_norm": 1.1018201112747192, "learning_rate": 6.379459894565343e-06, "loss": 0.4996, "step": 2854 }, { "epoch": 0.43, "grad_norm": 1.190382719039917, "learning_rate": 6.377121444877773e-06, "loss": 0.5035, "step": 2855 }, { "epoch": 0.43, "grad_norm": 1.134190559387207, "learning_rate": 6.374782669195603e-06, "loss": 0.4705, "step": 2856 }, { "epoch": 0.43, "grad_norm": 1.01921546459198, "learning_rate": 6.372443568072473e-06, "loss": 0.4759, "step": 2857 }, { "epoch": 0.43, "grad_norm": 1.0406519174575806, "learning_rate": 6.370104142062098e-06, "loss": 0.4525, "step": 2858 }, { "epoch": 0.43, "grad_norm": 2.363454818725586, "learning_rate": 6.367764391718272e-06, "loss": 0.5068, "step": 2859 }, { "epoch": 0.43, "grad_norm": 1.3144031763076782, "learning_rate": 6.3654243175948646e-06, "loss": 0.5376, "step": 2860 }, { "epoch": 0.43, "grad_norm": 2.801135778427124, "learning_rate": 6.363083920245824e-06, "loss": 0.4809, "step": 2861 }, { "epoch": 0.43, "grad_norm": 1.8509459495544434, "learning_rate": 6.36074320022517e-06, "loss": 0.5393, "step": 2862 }, { "epoch": 0.43, "grad_norm": 2.264789342880249, "learning_rate": 6.358402158087003e-06, "loss": 0.4704, "step": 2863 }, { "epoch": 0.43, "grad_norm": 1.006108045578003, "learning_rate": 6.3560607943855e-06, "loss": 0.4958, "step": 2864 }, { "epoch": 0.43, "grad_norm": 1.4761955738067627, "learning_rate": 6.353719109674912e-06, "loss": 0.5008, "step": 2865 }, { "epoch": 0.43, "grad_norm": 1.0426969528198242, "learning_rate": 6.3513771045095655e-06, "loss": 0.4828, "step": 2866 }, { "epoch": 0.43, "grad_norm": 1.4481950998306274, "learning_rate": 6.349034779443865e-06, "loss": 0.5089, "step": 2867 }, { "epoch": 0.43, "grad_norm": 1.4570685625076294, "learning_rate": 6.3466921350322894e-06, "loss": 0.523, "step": 2868 }, { "epoch": 0.43, "grad_norm": 1.1630808115005493, "learning_rate": 6.3443491718293945e-06, "loss": 0.4273, "step": 2869 }, { "epoch": 0.43, "grad_norm": 1.372111439704895, "learning_rate": 6.342005890389811e-06, "loss": 0.523, "step": 2870 }, { "epoch": 0.43, "grad_norm": 1.1525236368179321, "learning_rate": 6.339662291268243e-06, "loss": 0.4946, "step": 2871 }, { "epoch": 0.43, "grad_norm": 1.21110999584198, "learning_rate": 6.337318375019472e-06, "loss": 0.4574, "step": 2872 }, { "epoch": 0.43, "grad_norm": 1.243999719619751, "learning_rate": 6.334974142198354e-06, "loss": 0.462, "step": 2873 }, { "epoch": 0.43, "grad_norm": 1.2084983587265015, "learning_rate": 6.332629593359821e-06, "loss": 0.5471, "step": 2874 }, { "epoch": 0.43, "grad_norm": 1.374376654624939, "learning_rate": 6.330284729058874e-06, "loss": 0.5077, "step": 2875 }, { "epoch": 0.43, "grad_norm": 1.3097164630889893, "learning_rate": 6.327939549850602e-06, "loss": 0.4607, "step": 2876 }, { "epoch": 0.43, "grad_norm": 1.2327874898910522, "learning_rate": 6.325594056290152e-06, "loss": 0.4877, "step": 2877 }, { "epoch": 0.43, "grad_norm": 1.1704338788986206, "learning_rate": 6.3232482489327565e-06, "loss": 0.7294, "step": 2878 }, { "epoch": 0.43, "grad_norm": 1.2593003511428833, "learning_rate": 6.320902128333719e-06, "loss": 0.441, "step": 2879 }, { "epoch": 0.43, "grad_norm": 1.448976993560791, "learning_rate": 6.318555695048418e-06, "loss": 0.4938, "step": 2880 }, { "epoch": 0.43, "grad_norm": 1.3523097038269043, "learning_rate": 6.316208949632303e-06, "loss": 0.4881, "step": 2881 }, { "epoch": 0.43, "grad_norm": 1.8407741785049438, "learning_rate": 6.3138618926409025e-06, "loss": 0.5045, "step": 2882 }, { "epoch": 0.43, "grad_norm": 1.2034162282943726, "learning_rate": 6.311514524629815e-06, "loss": 0.5221, "step": 2883 }, { "epoch": 0.43, "grad_norm": 1.1467328071594238, "learning_rate": 6.309166846154713e-06, "loss": 0.4936, "step": 2884 }, { "epoch": 0.43, "grad_norm": 1.1720234155654907, "learning_rate": 6.306818857771342e-06, "loss": 0.4253, "step": 2885 }, { "epoch": 0.43, "grad_norm": 1.2059468030929565, "learning_rate": 6.304470560035522e-06, "loss": 0.4306, "step": 2886 }, { "epoch": 0.43, "grad_norm": 1.1724234819412231, "learning_rate": 6.30212195350315e-06, "loss": 0.47, "step": 2887 }, { "epoch": 0.43, "grad_norm": 1.2857730388641357, "learning_rate": 6.299773038730189e-06, "loss": 0.7763, "step": 2888 }, { "epoch": 0.43, "grad_norm": 1.229141354560852, "learning_rate": 6.297423816272679e-06, "loss": 0.5614, "step": 2889 }, { "epoch": 0.43, "grad_norm": 1.1511833667755127, "learning_rate": 6.295074286686731e-06, "loss": 0.5018, "step": 2890 }, { "epoch": 0.43, "grad_norm": 2.1465353965759277, "learning_rate": 6.292724450528531e-06, "loss": 0.4339, "step": 2891 }, { "epoch": 0.43, "grad_norm": 1.44948148727417, "learning_rate": 6.2903743083543346e-06, "loss": 0.556, "step": 2892 }, { "epoch": 0.43, "grad_norm": 1.2006003856658936, "learning_rate": 6.288023860720475e-06, "loss": 0.4677, "step": 2893 }, { "epoch": 0.43, "grad_norm": 1.224477767944336, "learning_rate": 6.285673108183352e-06, "loss": 0.4706, "step": 2894 }, { "epoch": 0.43, "grad_norm": 1.1882917881011963, "learning_rate": 6.283322051299439e-06, "loss": 0.4753, "step": 2895 }, { "epoch": 0.44, "grad_norm": 1.2375613451004028, "learning_rate": 6.280970690625283e-06, "loss": 0.5001, "step": 2896 }, { "epoch": 0.44, "grad_norm": 1.2688437700271606, "learning_rate": 6.278619026717503e-06, "loss": 0.4738, "step": 2897 }, { "epoch": 0.44, "grad_norm": 1.1559423208236694, "learning_rate": 6.2762670601327875e-06, "loss": 0.5465, "step": 2898 }, { "epoch": 0.44, "grad_norm": 1.0194697380065918, "learning_rate": 6.273914791427902e-06, "loss": 0.4817, "step": 2899 }, { "epoch": 0.44, "grad_norm": 1.9100512266159058, "learning_rate": 6.271562221159676e-06, "loss": 0.5052, "step": 2900 }, { "epoch": 0.44, "grad_norm": 1.0242302417755127, "learning_rate": 6.269209349885014e-06, "loss": 0.437, "step": 2901 }, { "epoch": 0.44, "grad_norm": 0.963222086429596, "learning_rate": 6.266856178160892e-06, "loss": 0.4459, "step": 2902 }, { "epoch": 0.44, "grad_norm": 1.9001191854476929, "learning_rate": 6.264502706544358e-06, "loss": 0.4388, "step": 2903 }, { "epoch": 0.44, "grad_norm": 1.255056381225586, "learning_rate": 6.26214893559253e-06, "loss": 0.4723, "step": 2904 }, { "epoch": 0.44, "grad_norm": 1.269897222518921, "learning_rate": 6.259794865862596e-06, "loss": 0.4865, "step": 2905 }, { "epoch": 0.44, "grad_norm": 1.2330756187438965, "learning_rate": 6.257440497911816e-06, "loss": 0.5229, "step": 2906 }, { "epoch": 0.44, "grad_norm": 1.1151847839355469, "learning_rate": 6.255085832297522e-06, "loss": 0.5348, "step": 2907 }, { "epoch": 0.44, "grad_norm": 0.9478161334991455, "learning_rate": 6.25273086957711e-06, "loss": 0.527, "step": 2908 }, { "epoch": 0.44, "grad_norm": 1.0779303312301636, "learning_rate": 6.250375610308054e-06, "loss": 0.415, "step": 2909 }, { "epoch": 0.44, "grad_norm": 1.1524690389633179, "learning_rate": 6.248020055047894e-06, "loss": 0.4844, "step": 2910 }, { "epoch": 0.44, "grad_norm": 1.69286048412323, "learning_rate": 6.245664204354242e-06, "loss": 0.4971, "step": 2911 }, { "epoch": 0.44, "grad_norm": 1.3470284938812256, "learning_rate": 6.24330805878478e-06, "loss": 0.5216, "step": 2912 }, { "epoch": 0.44, "grad_norm": 1.3499020338058472, "learning_rate": 6.2409516188972574e-06, "loss": 0.5187, "step": 2913 }, { "epoch": 0.44, "grad_norm": 1.2884074449539185, "learning_rate": 6.238594885249493e-06, "loss": 0.5275, "step": 2914 }, { "epoch": 0.44, "grad_norm": 1.2713905572891235, "learning_rate": 6.236237858399381e-06, "loss": 0.5496, "step": 2915 }, { "epoch": 0.44, "grad_norm": 1.7055341005325317, "learning_rate": 6.233880538904878e-06, "loss": 0.4493, "step": 2916 }, { "epoch": 0.44, "grad_norm": 1.7935304641723633, "learning_rate": 6.231522927324014e-06, "loss": 0.5026, "step": 2917 }, { "epoch": 0.44, "grad_norm": 1.1148346662521362, "learning_rate": 6.229165024214886e-06, "loss": 0.4465, "step": 2918 }, { "epoch": 0.44, "grad_norm": 1.105339527130127, "learning_rate": 6.226806830135663e-06, "loss": 0.4869, "step": 2919 }, { "epoch": 0.44, "grad_norm": 1.2866768836975098, "learning_rate": 6.224448345644578e-06, "loss": 0.5495, "step": 2920 }, { "epoch": 0.44, "grad_norm": 1.1522936820983887, "learning_rate": 6.222089571299937e-06, "loss": 0.5092, "step": 2921 }, { "epoch": 0.44, "grad_norm": 1.5693994760513306, "learning_rate": 6.2197305076601145e-06, "loss": 0.5289, "step": 2922 }, { "epoch": 0.44, "grad_norm": 1.0902310609817505, "learning_rate": 6.217371155283551e-06, "loss": 0.414, "step": 2923 }, { "epoch": 0.44, "grad_norm": 1.2906107902526855, "learning_rate": 6.215011514728755e-06, "loss": 0.4336, "step": 2924 }, { "epoch": 0.44, "grad_norm": 1.1654329299926758, "learning_rate": 6.212651586554306e-06, "loss": 0.4301, "step": 2925 }, { "epoch": 0.44, "grad_norm": 1.0865427255630493, "learning_rate": 6.210291371318852e-06, "loss": 0.7187, "step": 2926 }, { "epoch": 0.44, "grad_norm": 1.077204704284668, "learning_rate": 6.207930869581105e-06, "loss": 0.4559, "step": 2927 }, { "epoch": 0.44, "grad_norm": 1.5914697647094727, "learning_rate": 6.205570081899847e-06, "loss": 0.5435, "step": 2928 }, { "epoch": 0.44, "grad_norm": 1.3542559146881104, "learning_rate": 6.203209008833929e-06, "loss": 0.4568, "step": 2929 }, { "epoch": 0.44, "grad_norm": 1.3706738948822021, "learning_rate": 6.200847650942269e-06, "loss": 0.4497, "step": 2930 }, { "epoch": 0.44, "grad_norm": 1.2702538967132568, "learning_rate": 6.1984860087838495e-06, "loss": 0.4911, "step": 2931 }, { "epoch": 0.44, "grad_norm": 1.0025957822799683, "learning_rate": 6.196124082917725e-06, "loss": 0.522, "step": 2932 }, { "epoch": 0.44, "grad_norm": 1.2045303583145142, "learning_rate": 6.1937618739030125e-06, "loss": 0.4958, "step": 2933 }, { "epoch": 0.44, "grad_norm": 1.1826896667480469, "learning_rate": 6.1913993822988995e-06, "loss": 0.4892, "step": 2934 }, { "epoch": 0.44, "grad_norm": 1.0695865154266357, "learning_rate": 6.189036608664639e-06, "loss": 0.5095, "step": 2935 }, { "epoch": 0.44, "grad_norm": 1.2960065603256226, "learning_rate": 6.186673553559551e-06, "loss": 0.4147, "step": 2936 }, { "epoch": 0.44, "grad_norm": 0.980527400970459, "learning_rate": 6.184310217543022e-06, "loss": 0.4703, "step": 2937 }, { "epoch": 0.44, "grad_norm": 1.3020110130310059, "learning_rate": 6.181946601174506e-06, "loss": 0.483, "step": 2938 }, { "epoch": 0.44, "grad_norm": 1.216688632965088, "learning_rate": 6.179582705013519e-06, "loss": 0.4949, "step": 2939 }, { "epoch": 0.44, "grad_norm": 1.3407970666885376, "learning_rate": 6.177218529619651e-06, "loss": 0.6105, "step": 2940 }, { "epoch": 0.44, "grad_norm": 1.3226866722106934, "learning_rate": 6.174854075552551e-06, "loss": 0.4388, "step": 2941 }, { "epoch": 0.44, "grad_norm": 1.5360090732574463, "learning_rate": 6.172489343371937e-06, "loss": 0.5226, "step": 2942 }, { "epoch": 0.44, "grad_norm": 1.1453746557235718, "learning_rate": 6.1701243336375935e-06, "loss": 0.4306, "step": 2943 }, { "epoch": 0.44, "grad_norm": 1.3106669187545776, "learning_rate": 6.16775904690937e-06, "loss": 0.4496, "step": 2944 }, { "epoch": 0.44, "grad_norm": 1.0482372045516968, "learning_rate": 6.165393483747179e-06, "loss": 0.6873, "step": 2945 }, { "epoch": 0.44, "grad_norm": 1.1464961767196655, "learning_rate": 6.163027644711003e-06, "loss": 0.4741, "step": 2946 }, { "epoch": 0.44, "grad_norm": 1.141156554222107, "learning_rate": 6.160661530360887e-06, "loss": 0.4671, "step": 2947 }, { "epoch": 0.44, "grad_norm": 1.2645882368087769, "learning_rate": 6.1582951412569415e-06, "loss": 0.5576, "step": 2948 }, { "epoch": 0.44, "grad_norm": 1.3339455127716064, "learning_rate": 6.155928477959342e-06, "loss": 0.5059, "step": 2949 }, { "epoch": 0.44, "grad_norm": 1.3964478969573975, "learning_rate": 6.15356154102833e-06, "loss": 0.4949, "step": 2950 }, { "epoch": 0.44, "grad_norm": 1.21611750125885, "learning_rate": 6.15119433102421e-06, "loss": 0.7223, "step": 2951 }, { "epoch": 0.44, "grad_norm": 1.2504955530166626, "learning_rate": 6.1488268485073536e-06, "loss": 0.4943, "step": 2952 }, { "epoch": 0.44, "grad_norm": 0.9775145053863525, "learning_rate": 6.1464590940381914e-06, "loss": 0.4663, "step": 2953 }, { "epoch": 0.44, "grad_norm": 1.1536489725112915, "learning_rate": 6.144091068177229e-06, "loss": 0.5004, "step": 2954 }, { "epoch": 0.44, "grad_norm": 0.9440672993659973, "learning_rate": 6.141722771485024e-06, "loss": 0.4531, "step": 2955 }, { "epoch": 0.44, "grad_norm": 1.2881379127502441, "learning_rate": 6.139354204522206e-06, "loss": 0.4661, "step": 2956 }, { "epoch": 0.44, "grad_norm": 1.4262531995773315, "learning_rate": 6.136985367849467e-06, "loss": 0.5198, "step": 2957 }, { "epoch": 0.44, "grad_norm": 1.1946192979812622, "learning_rate": 6.13461626202756e-06, "loss": 0.515, "step": 2958 }, { "epoch": 0.44, "grad_norm": 1.0820807218551636, "learning_rate": 6.132246887617306e-06, "loss": 0.4456, "step": 2959 }, { "epoch": 0.44, "grad_norm": 1.8387771844863892, "learning_rate": 6.129877245179587e-06, "loss": 0.4806, "step": 2960 }, { "epoch": 0.44, "grad_norm": 1.341299295425415, "learning_rate": 6.127507335275348e-06, "loss": 0.5026, "step": 2961 }, { "epoch": 0.44, "grad_norm": 1.8815211057662964, "learning_rate": 6.125137158465598e-06, "loss": 0.4356, "step": 2962 }, { "epoch": 0.45, "grad_norm": 1.1739252805709839, "learning_rate": 6.12276671531141e-06, "loss": 0.5358, "step": 2963 }, { "epoch": 0.45, "grad_norm": 1.71811044216156, "learning_rate": 6.1203960063739185e-06, "loss": 0.4661, "step": 2964 }, { "epoch": 0.45, "grad_norm": 1.085331916809082, "learning_rate": 6.1180250322143255e-06, "loss": 0.4963, "step": 2965 }, { "epoch": 0.45, "grad_norm": 1.2105048894882202, "learning_rate": 6.11565379339389e-06, "loss": 0.4651, "step": 2966 }, { "epoch": 0.45, "grad_norm": 1.7322025299072266, "learning_rate": 6.113282290473933e-06, "loss": 0.5487, "step": 2967 }, { "epoch": 0.45, "grad_norm": 1.6754015684127808, "learning_rate": 6.110910524015844e-06, "loss": 0.4303, "step": 2968 }, { "epoch": 0.45, "grad_norm": 1.1219193935394287, "learning_rate": 6.108538494581071e-06, "loss": 0.575, "step": 2969 }, { "epoch": 0.45, "grad_norm": 1.2933688163757324, "learning_rate": 6.106166202731123e-06, "loss": 0.4463, "step": 2970 }, { "epoch": 0.45, "grad_norm": 1.549291729927063, "learning_rate": 6.103793649027577e-06, "loss": 0.4677, "step": 2971 }, { "epoch": 0.45, "grad_norm": 0.9744592308998108, "learning_rate": 6.101420834032067e-06, "loss": 0.4865, "step": 2972 }, { "epoch": 0.45, "grad_norm": 1.2420077323913574, "learning_rate": 6.099047758306288e-06, "loss": 0.5183, "step": 2973 }, { "epoch": 0.45, "grad_norm": 1.4548691511154175, "learning_rate": 6.096674422411999e-06, "loss": 0.502, "step": 2974 }, { "epoch": 0.45, "grad_norm": 1.161047339439392, "learning_rate": 6.094300826911021e-06, "loss": 0.5404, "step": 2975 }, { "epoch": 0.45, "grad_norm": 1.2225277423858643, "learning_rate": 6.091926972365234e-06, "loss": 0.4259, "step": 2976 }, { "epoch": 0.45, "grad_norm": 1.5755575895309448, "learning_rate": 6.089552859336585e-06, "loss": 0.5374, "step": 2977 }, { "epoch": 0.45, "grad_norm": 1.231740951538086, "learning_rate": 6.087178488387074e-06, "loss": 0.4306, "step": 2978 }, { "epoch": 0.45, "grad_norm": 0.9833521842956543, "learning_rate": 6.08480386007877e-06, "loss": 0.4609, "step": 2979 }, { "epoch": 0.45, "grad_norm": 1.1585111618041992, "learning_rate": 6.0824289749737955e-06, "loss": 0.5594, "step": 2980 }, { "epoch": 0.45, "grad_norm": 1.1429738998413086, "learning_rate": 6.080053833634338e-06, "loss": 0.4582, "step": 2981 }, { "epoch": 0.45, "grad_norm": 1.2051212787628174, "learning_rate": 6.077678436622647e-06, "loss": 0.4478, "step": 2982 }, { "epoch": 0.45, "grad_norm": 2.0198261737823486, "learning_rate": 6.07530278450103e-06, "loss": 0.5053, "step": 2983 }, { "epoch": 0.45, "grad_norm": 1.0845160484313965, "learning_rate": 6.0729268778318566e-06, "loss": 0.455, "step": 2984 }, { "epoch": 0.45, "grad_norm": 2.132758617401123, "learning_rate": 6.070550717177553e-06, "loss": 0.4236, "step": 2985 }, { "epoch": 0.45, "grad_norm": 1.1096158027648926, "learning_rate": 6.068174303100609e-06, "loss": 0.4998, "step": 2986 }, { "epoch": 0.45, "grad_norm": 1.0602790117263794, "learning_rate": 6.065797636163575e-06, "loss": 0.4658, "step": 2987 }, { "epoch": 0.45, "grad_norm": 1.1686153411865234, "learning_rate": 6.063420716929058e-06, "loss": 0.5128, "step": 2988 }, { "epoch": 0.45, "grad_norm": 1.0737571716308594, "learning_rate": 6.061043545959727e-06, "loss": 0.4663, "step": 2989 }, { "epoch": 0.45, "grad_norm": 1.2813528776168823, "learning_rate": 6.058666123818311e-06, "loss": 0.4672, "step": 2990 }, { "epoch": 0.45, "grad_norm": 1.0652520656585693, "learning_rate": 6.056288451067598e-06, "loss": 0.4984, "step": 2991 }, { "epoch": 0.45, "grad_norm": 1.1073585748672485, "learning_rate": 6.053910528270431e-06, "loss": 0.4173, "step": 2992 }, { "epoch": 0.45, "grad_norm": 1.3517332077026367, "learning_rate": 6.05153235598972e-06, "loss": 0.4531, "step": 2993 }, { "epoch": 0.45, "grad_norm": 1.268057107925415, "learning_rate": 6.049153934788429e-06, "loss": 0.487, "step": 2994 }, { "epoch": 0.45, "grad_norm": 1.7082663774490356, "learning_rate": 6.0467752652295805e-06, "loss": 0.4786, "step": 2995 }, { "epoch": 0.45, "grad_norm": 7.809239387512207, "learning_rate": 6.04439634787626e-06, "loss": 0.5474, "step": 2996 }, { "epoch": 0.45, "grad_norm": 0.937484085559845, "learning_rate": 6.042017183291607e-06, "loss": 0.4323, "step": 2997 }, { "epoch": 0.45, "grad_norm": 1.1984167098999023, "learning_rate": 6.039637772038822e-06, "loss": 0.5353, "step": 2998 }, { "epoch": 0.45, "grad_norm": 1.0035550594329834, "learning_rate": 6.037258114681164e-06, "loss": 0.5329, "step": 2999 }, { "epoch": 0.45, "grad_norm": 1.3097360134124756, "learning_rate": 6.034878211781949e-06, "loss": 0.5408, "step": 3000 }, { "epoch": 0.45, "grad_norm": 1.326319694519043, "learning_rate": 6.032498063904551e-06, "loss": 0.4577, "step": 3001 }, { "epoch": 0.45, "grad_norm": 1.3727524280548096, "learning_rate": 6.030117671612405e-06, "loss": 0.4829, "step": 3002 }, { "epoch": 0.45, "grad_norm": 1.0442899465560913, "learning_rate": 6.027737035469e-06, "loss": 0.4511, "step": 3003 }, { "epoch": 0.45, "grad_norm": 1.0916804075241089, "learning_rate": 6.025356156037885e-06, "loss": 0.4555, "step": 3004 }, { "epoch": 0.45, "grad_norm": 1.2329779863357544, "learning_rate": 6.022975033882665e-06, "loss": 0.4698, "step": 3005 }, { "epoch": 0.45, "grad_norm": 1.1550095081329346, "learning_rate": 6.020593669567005e-06, "loss": 0.4984, "step": 3006 }, { "epoch": 0.45, "grad_norm": 1.3852863311767578, "learning_rate": 6.018212063654625e-06, "loss": 0.4538, "step": 3007 }, { "epoch": 0.45, "grad_norm": 1.2281633615493774, "learning_rate": 6.0158302167093016e-06, "loss": 0.5386, "step": 3008 }, { "epoch": 0.45, "grad_norm": 1.2104997634887695, "learning_rate": 6.013448129294871e-06, "loss": 0.4701, "step": 3009 }, { "epoch": 0.45, "grad_norm": 1.3287744522094727, "learning_rate": 6.011065801975225e-06, "loss": 0.5325, "step": 3010 }, { "epoch": 0.45, "grad_norm": 1.4532477855682373, "learning_rate": 6.008683235314313e-06, "loss": 0.4569, "step": 3011 }, { "epoch": 0.45, "grad_norm": 1.5832446813583374, "learning_rate": 6.00630042987614e-06, "loss": 0.4305, "step": 3012 }, { "epoch": 0.45, "grad_norm": 1.2821835279464722, "learning_rate": 6.003917386224766e-06, "loss": 0.4979, "step": 3013 }, { "epoch": 0.45, "grad_norm": 1.3541514873504639, "learning_rate": 6.001534104924311e-06, "loss": 0.473, "step": 3014 }, { "epoch": 0.45, "grad_norm": 1.1539386510849, "learning_rate": 5.999150586538951e-06, "loss": 0.4512, "step": 3015 }, { "epoch": 0.45, "grad_norm": 1.1373835802078247, "learning_rate": 5.996766831632913e-06, "loss": 0.4138, "step": 3016 }, { "epoch": 0.45, "grad_norm": 1.7057373523712158, "learning_rate": 5.994382840770488e-06, "loss": 0.5049, "step": 3017 }, { "epoch": 0.45, "grad_norm": 1.2030099630355835, "learning_rate": 5.991998614516014e-06, "loss": 0.5252, "step": 3018 }, { "epoch": 0.45, "grad_norm": 1.108441710472107, "learning_rate": 5.989614153433891e-06, "loss": 0.497, "step": 3019 }, { "epoch": 0.45, "grad_norm": 1.189691424369812, "learning_rate": 5.987229458088573e-06, "loss": 0.5317, "step": 3020 }, { "epoch": 0.45, "grad_norm": 2.0542805194854736, "learning_rate": 5.984844529044571e-06, "loss": 0.4801, "step": 3021 }, { "epoch": 0.45, "grad_norm": 1.6749800443649292, "learning_rate": 5.982459366866448e-06, "loss": 0.6046, "step": 3022 }, { "epoch": 0.45, "grad_norm": 1.3288999795913696, "learning_rate": 5.980073972118822e-06, "loss": 0.524, "step": 3023 }, { "epoch": 0.45, "grad_norm": 1.4265140295028687, "learning_rate": 5.9776883453663705e-06, "loss": 0.4869, "step": 3024 }, { "epoch": 0.45, "grad_norm": 1.4167271852493286, "learning_rate": 5.975302487173822e-06, "loss": 0.5415, "step": 3025 }, { "epoch": 0.45, "grad_norm": 1.8778550624847412, "learning_rate": 5.972916398105961e-06, "loss": 0.5285, "step": 3026 }, { "epoch": 0.45, "grad_norm": 2.8822927474975586, "learning_rate": 5.9705300787276275e-06, "loss": 0.4943, "step": 3027 }, { "epoch": 0.45, "grad_norm": 1.1201188564300537, "learning_rate": 5.968143529603715e-06, "loss": 0.5165, "step": 3028 }, { "epoch": 0.46, "grad_norm": 1.0840659141540527, "learning_rate": 5.965756751299171e-06, "loss": 0.5015, "step": 3029 }, { "epoch": 0.46, "grad_norm": 1.0811399221420288, "learning_rate": 5.963369744378998e-06, "loss": 0.4619, "step": 3030 }, { "epoch": 0.46, "grad_norm": 1.5396666526794434, "learning_rate": 5.960982509408252e-06, "loss": 0.4604, "step": 3031 }, { "epoch": 0.46, "grad_norm": 1.2230006456375122, "learning_rate": 5.958595046952045e-06, "loss": 0.4869, "step": 3032 }, { "epoch": 0.46, "grad_norm": 1.26640784740448, "learning_rate": 5.9562073575755396e-06, "loss": 0.7552, "step": 3033 }, { "epoch": 0.46, "grad_norm": 1.1033046245574951, "learning_rate": 5.953819441843954e-06, "loss": 0.517, "step": 3034 }, { "epoch": 0.46, "grad_norm": 1.1347073316574097, "learning_rate": 5.951431300322562e-06, "loss": 0.4934, "step": 3035 }, { "epoch": 0.46, "grad_norm": 1.2986516952514648, "learning_rate": 5.949042933576685e-06, "loss": 0.5235, "step": 3036 }, { "epoch": 0.46, "grad_norm": 1.458986520767212, "learning_rate": 5.946654342171704e-06, "loss": 0.529, "step": 3037 }, { "epoch": 0.46, "grad_norm": 1.2042032480239868, "learning_rate": 5.944265526673051e-06, "loss": 0.4925, "step": 3038 }, { "epoch": 0.46, "grad_norm": 1.48712956905365, "learning_rate": 5.941876487646208e-06, "loss": 0.4925, "step": 3039 }, { "epoch": 0.46, "grad_norm": 1.1551117897033691, "learning_rate": 5.939487225656715e-06, "loss": 0.4674, "step": 3040 }, { "epoch": 0.46, "grad_norm": 1.0075507164001465, "learning_rate": 5.93709774127016e-06, "loss": 0.498, "step": 3041 }, { "epoch": 0.46, "grad_norm": 1.206187129020691, "learning_rate": 5.934708035052187e-06, "loss": 0.4144, "step": 3042 }, { "epoch": 0.46, "grad_norm": 1.3571557998657227, "learning_rate": 5.932318107568492e-06, "loss": 0.5048, "step": 3043 }, { "epoch": 0.46, "grad_norm": 1.0991460084915161, "learning_rate": 5.929927959384823e-06, "loss": 0.4103, "step": 3044 }, { "epoch": 0.46, "grad_norm": 1.211140751838684, "learning_rate": 5.92753759106698e-06, "loss": 0.4554, "step": 3045 }, { "epoch": 0.46, "grad_norm": 1.1542829275131226, "learning_rate": 5.925147003180813e-06, "loss": 0.502, "step": 3046 }, { "epoch": 0.46, "grad_norm": 1.2582054138183594, "learning_rate": 5.922756196292228e-06, "loss": 0.4275, "step": 3047 }, { "epoch": 0.46, "grad_norm": 1.1027077436447144, "learning_rate": 5.920365170967181e-06, "loss": 0.5079, "step": 3048 }, { "epoch": 0.46, "grad_norm": 1.6469972133636475, "learning_rate": 5.917973927771678e-06, "loss": 0.3805, "step": 3049 }, { "epoch": 0.46, "grad_norm": 1.2168751955032349, "learning_rate": 5.915582467271782e-06, "loss": 0.5214, "step": 3050 }, { "epoch": 0.46, "grad_norm": 1.1210044622421265, "learning_rate": 5.913190790033603e-06, "loss": 0.5578, "step": 3051 }, { "epoch": 0.46, "grad_norm": 0.8982027769088745, "learning_rate": 5.910798896623299e-06, "loss": 0.4868, "step": 3052 }, { "epoch": 0.46, "grad_norm": 1.7258987426757812, "learning_rate": 5.908406787607084e-06, "loss": 0.5153, "step": 3053 }, { "epoch": 0.46, "grad_norm": 1.156590223312378, "learning_rate": 5.9060144635512265e-06, "loss": 0.5591, "step": 3054 }, { "epoch": 0.46, "grad_norm": 1.1264199018478394, "learning_rate": 5.9036219250220384e-06, "loss": 0.4901, "step": 3055 }, { "epoch": 0.46, "grad_norm": 1.4003937244415283, "learning_rate": 5.901229172585886e-06, "loss": 0.5209, "step": 3056 }, { "epoch": 0.46, "grad_norm": 1.30907142162323, "learning_rate": 5.898836206809188e-06, "loss": 0.4607, "step": 3057 }, { "epoch": 0.46, "grad_norm": 1.2964352369308472, "learning_rate": 5.8964430282584075e-06, "loss": 0.5009, "step": 3058 }, { "epoch": 0.46, "grad_norm": 1.4150831699371338, "learning_rate": 5.894049637500063e-06, "loss": 0.527, "step": 3059 }, { "epoch": 0.46, "grad_norm": 1.1798698902130127, "learning_rate": 5.891656035100725e-06, "loss": 0.5717, "step": 3060 }, { "epoch": 0.46, "grad_norm": 1.156648874282837, "learning_rate": 5.889262221627008e-06, "loss": 0.5303, "step": 3061 }, { "epoch": 0.46, "grad_norm": 1.1456459760665894, "learning_rate": 5.886868197645582e-06, "loss": 0.5117, "step": 3062 }, { "epoch": 0.46, "grad_norm": 1.1616138219833374, "learning_rate": 5.884473963723164e-06, "loss": 0.4618, "step": 3063 }, { "epoch": 0.46, "grad_norm": 1.2833778858184814, "learning_rate": 5.88207952042652e-06, "loss": 0.5777, "step": 3064 }, { "epoch": 0.46, "grad_norm": 1.4560751914978027, "learning_rate": 5.879684868322468e-06, "loss": 0.4417, "step": 3065 }, { "epoch": 0.46, "grad_norm": 1.0401986837387085, "learning_rate": 5.877290007977874e-06, "loss": 0.4588, "step": 3066 }, { "epoch": 0.46, "grad_norm": 1.4571610689163208, "learning_rate": 5.874894939959653e-06, "loss": 0.4729, "step": 3067 }, { "epoch": 0.46, "grad_norm": 1.1372096538543701, "learning_rate": 5.872499664834771e-06, "loss": 0.5274, "step": 3068 }, { "epoch": 0.46, "grad_norm": 1.767797589302063, "learning_rate": 5.870104183170241e-06, "loss": 0.395, "step": 3069 }, { "epoch": 0.46, "grad_norm": 0.9964312314987183, "learning_rate": 5.867708495533124e-06, "loss": 0.4232, "step": 3070 }, { "epoch": 0.46, "grad_norm": 1.5640658140182495, "learning_rate": 5.865312602490534e-06, "loss": 0.4685, "step": 3071 }, { "epoch": 0.46, "grad_norm": 1.28498113155365, "learning_rate": 5.862916504609629e-06, "loss": 0.502, "step": 3072 }, { "epoch": 0.46, "grad_norm": 1.3529459238052368, "learning_rate": 5.860520202457619e-06, "loss": 0.4963, "step": 3073 }, { "epoch": 0.46, "grad_norm": 1.3455439805984497, "learning_rate": 5.8581236966017604e-06, "loss": 0.4567, "step": 3074 }, { "epoch": 0.46, "grad_norm": 1.7041393518447876, "learning_rate": 5.855726987609359e-06, "loss": 0.5273, "step": 3075 }, { "epoch": 0.46, "grad_norm": 1.1123799085617065, "learning_rate": 5.853330076047764e-06, "loss": 0.5188, "step": 3076 }, { "epoch": 0.46, "grad_norm": 1.1459486484527588, "learning_rate": 5.850932962484381e-06, "loss": 0.5645, "step": 3077 }, { "epoch": 0.46, "grad_norm": 1.173500418663025, "learning_rate": 5.848535647486659e-06, "loss": 0.7263, "step": 3078 }, { "epoch": 0.46, "grad_norm": 1.405639886856079, "learning_rate": 5.846138131622091e-06, "loss": 0.5055, "step": 3079 }, { "epoch": 0.46, "grad_norm": 1.0466376543045044, "learning_rate": 5.843740415458223e-06, "loss": 0.4332, "step": 3080 }, { "epoch": 0.46, "grad_norm": 1.1875375509262085, "learning_rate": 5.841342499562648e-06, "loss": 0.4438, "step": 3081 }, { "epoch": 0.46, "grad_norm": 1.0863856077194214, "learning_rate": 5.838944384503004e-06, "loss": 0.4542, "step": 3082 }, { "epoch": 0.46, "grad_norm": 1.2641842365264893, "learning_rate": 5.836546070846975e-06, "loss": 0.4774, "step": 3083 }, { "epoch": 0.46, "grad_norm": 1.0826693773269653, "learning_rate": 5.834147559162297e-06, "loss": 0.4499, "step": 3084 }, { "epoch": 0.46, "grad_norm": 1.1156246662139893, "learning_rate": 5.831748850016747e-06, "loss": 0.4395, "step": 3085 }, { "epoch": 0.46, "grad_norm": 1.2858483791351318, "learning_rate": 5.8293499439781535e-06, "loss": 0.5009, "step": 3086 }, { "epoch": 0.46, "grad_norm": 1.146988868713379, "learning_rate": 5.8269508416143896e-06, "loss": 0.5189, "step": 3087 }, { "epoch": 0.46, "grad_norm": 1.0172054767608643, "learning_rate": 5.824551543493375e-06, "loss": 0.4662, "step": 3088 }, { "epoch": 0.46, "grad_norm": 1.3438165187835693, "learning_rate": 5.822152050183075e-06, "loss": 0.5412, "step": 3089 }, { "epoch": 0.46, "grad_norm": 1.2884100675582886, "learning_rate": 5.819752362251503e-06, "loss": 0.5109, "step": 3090 }, { "epoch": 0.46, "grad_norm": 1.0640515089035034, "learning_rate": 5.817352480266716e-06, "loss": 0.4401, "step": 3091 }, { "epoch": 0.46, "grad_norm": 1.2397414445877075, "learning_rate": 5.81495240479682e-06, "loss": 0.4973, "step": 3092 }, { "epoch": 0.46, "grad_norm": 0.9101976156234741, "learning_rate": 5.812552136409962e-06, "loss": 0.4021, "step": 3093 }, { "epoch": 0.46, "grad_norm": 1.0127918720245361, "learning_rate": 5.8101516756743416e-06, "loss": 0.4782, "step": 3094 }, { "epoch": 0.46, "grad_norm": 0.9942687749862671, "learning_rate": 5.807751023158198e-06, "loss": 0.5217, "step": 3095 }, { "epoch": 0.47, "grad_norm": 1.146541714668274, "learning_rate": 5.805350179429817e-06, "loss": 0.4593, "step": 3096 }, { "epoch": 0.47, "grad_norm": 0.9706078171730042, "learning_rate": 5.802949145057533e-06, "loss": 0.4278, "step": 3097 }, { "epoch": 0.47, "grad_norm": 1.430678129196167, "learning_rate": 5.8005479206097205e-06, "loss": 0.4875, "step": 3098 }, { "epoch": 0.47, "grad_norm": 1.1152024269104004, "learning_rate": 5.798146506654803e-06, "loss": 0.5567, "step": 3099 }, { "epoch": 0.47, "grad_norm": 1.0496234893798828, "learning_rate": 5.795744903761248e-06, "loss": 0.5102, "step": 3100 }, { "epoch": 0.47, "grad_norm": 1.2265183925628662, "learning_rate": 5.793343112497565e-06, "loss": 0.5704, "step": 3101 }, { "epoch": 0.47, "grad_norm": 1.5844172239303589, "learning_rate": 5.790941133432312e-06, "loss": 0.5734, "step": 3102 }, { "epoch": 0.47, "grad_norm": 1.2870718240737915, "learning_rate": 5.788538967134089e-06, "loss": 0.5148, "step": 3103 }, { "epoch": 0.47, "grad_norm": 1.0792601108551025, "learning_rate": 5.786136614171542e-06, "loss": 0.4506, "step": 3104 }, { "epoch": 0.47, "grad_norm": 1.0265151262283325, "learning_rate": 5.783734075113359e-06, "loss": 0.4878, "step": 3105 }, { "epoch": 0.47, "grad_norm": 1.1982957124710083, "learning_rate": 5.781331350528273e-06, "loss": 0.529, "step": 3106 }, { "epoch": 0.47, "grad_norm": 1.4145230054855347, "learning_rate": 5.778928440985063e-06, "loss": 0.4654, "step": 3107 }, { "epoch": 0.47, "grad_norm": 0.8862770199775696, "learning_rate": 5.7765253470525485e-06, "loss": 0.3946, "step": 3108 }, { "epoch": 0.47, "grad_norm": 1.5056788921356201, "learning_rate": 5.774122069299593e-06, "loss": 0.4703, "step": 3109 }, { "epoch": 0.47, "grad_norm": 1.2388718128204346, "learning_rate": 5.771718608295108e-06, "loss": 0.5301, "step": 3110 }, { "epoch": 0.47, "grad_norm": 1.0011428594589233, "learning_rate": 5.769314964608042e-06, "loss": 0.4899, "step": 3111 }, { "epoch": 0.47, "grad_norm": 1.6309524774551392, "learning_rate": 5.76691113880739e-06, "loss": 0.4731, "step": 3112 }, { "epoch": 0.47, "grad_norm": 1.3838847875595093, "learning_rate": 5.764507131462192e-06, "loss": 0.4843, "step": 3113 }, { "epoch": 0.47, "grad_norm": 1.3226677179336548, "learning_rate": 5.762102943141526e-06, "loss": 0.4717, "step": 3114 }, { "epoch": 0.47, "grad_norm": 1.1113101243972778, "learning_rate": 5.759698574414519e-06, "loss": 0.415, "step": 3115 }, { "epoch": 0.47, "grad_norm": 1.1686478853225708, "learning_rate": 5.757294025850335e-06, "loss": 0.505, "step": 3116 }, { "epoch": 0.47, "grad_norm": 1.3191648721694946, "learning_rate": 5.754889298018183e-06, "loss": 0.5174, "step": 3117 }, { "epoch": 0.47, "grad_norm": 1.0135573148727417, "learning_rate": 5.752484391487314e-06, "loss": 0.4631, "step": 3118 }, { "epoch": 0.47, "grad_norm": 1.301613211631775, "learning_rate": 5.750079306827023e-06, "loss": 0.5032, "step": 3119 }, { "epoch": 0.47, "grad_norm": 1.1891447305679321, "learning_rate": 5.7476740446066445e-06, "loss": 0.4451, "step": 3120 }, { "epoch": 0.47, "grad_norm": 1.1347460746765137, "learning_rate": 5.745268605395559e-06, "loss": 0.5509, "step": 3121 }, { "epoch": 0.47, "grad_norm": 1.2802799940109253, "learning_rate": 5.7428629897631835e-06, "loss": 0.3836, "step": 3122 }, { "epoch": 0.47, "grad_norm": 0.9295225143432617, "learning_rate": 5.7404571982789825e-06, "loss": 0.5411, "step": 3123 }, { "epoch": 0.47, "grad_norm": 1.2083383798599243, "learning_rate": 5.738051231512455e-06, "loss": 0.5503, "step": 3124 }, { "epoch": 0.47, "grad_norm": 1.0514038801193237, "learning_rate": 5.735645090033148e-06, "loss": 0.4926, "step": 3125 }, { "epoch": 0.47, "grad_norm": 1.1569474935531616, "learning_rate": 5.733238774410647e-06, "loss": 0.7512, "step": 3126 }, { "epoch": 0.47, "grad_norm": 1.1107672452926636, "learning_rate": 5.73083228521458e-06, "loss": 0.4981, "step": 3127 }, { "epoch": 0.47, "grad_norm": 4.305000305175781, "learning_rate": 5.728425623014616e-06, "loss": 0.4473, "step": 3128 }, { "epoch": 0.47, "grad_norm": 1.3644262552261353, "learning_rate": 5.726018788380464e-06, "loss": 0.4383, "step": 3129 }, { "epoch": 0.47, "grad_norm": 1.1130545139312744, "learning_rate": 5.723611781881871e-06, "loss": 0.4744, "step": 3130 }, { "epoch": 0.47, "grad_norm": 1.3046289682388306, "learning_rate": 5.721204604088631e-06, "loss": 0.5386, "step": 3131 }, { "epoch": 0.47, "grad_norm": 1.141234278678894, "learning_rate": 5.718797255570575e-06, "loss": 0.3921, "step": 3132 }, { "epoch": 0.47, "grad_norm": 1.5886038541793823, "learning_rate": 5.716389736897574e-06, "loss": 0.4991, "step": 3133 }, { "epoch": 0.47, "grad_norm": 1.1847152709960938, "learning_rate": 5.71398204863954e-06, "loss": 0.4734, "step": 3134 }, { "epoch": 0.47, "grad_norm": 1.2741498947143555, "learning_rate": 5.711574191366427e-06, "loss": 0.531, "step": 3135 }, { "epoch": 0.47, "grad_norm": 1.3374698162078857, "learning_rate": 5.7091661656482245e-06, "loss": 0.5333, "step": 3136 }, { "epoch": 0.47, "grad_norm": 1.2637327909469604, "learning_rate": 5.7067579720549645e-06, "loss": 0.5546, "step": 3137 }, { "epoch": 0.47, "grad_norm": 1.2235645055770874, "learning_rate": 5.704349611156722e-06, "loss": 0.5401, "step": 3138 }, { "epoch": 0.47, "grad_norm": 1.1621063947677612, "learning_rate": 5.701941083523606e-06, "loss": 0.7122, "step": 3139 }, { "epoch": 0.47, "grad_norm": 1.4476344585418701, "learning_rate": 5.699532389725767e-06, "loss": 0.5368, "step": 3140 }, { "epoch": 0.47, "grad_norm": 1.0802921056747437, "learning_rate": 5.697123530333397e-06, "loss": 0.444, "step": 3141 }, { "epoch": 0.47, "grad_norm": 1.1702913045883179, "learning_rate": 5.694714505916723e-06, "loss": 0.5425, "step": 3142 }, { "epoch": 0.47, "grad_norm": 1.2525584697723389, "learning_rate": 5.692305317046016e-06, "loss": 0.5125, "step": 3143 }, { "epoch": 0.47, "grad_norm": 1.2203823328018188, "learning_rate": 5.689895964291583e-06, "loss": 0.5212, "step": 3144 }, { "epoch": 0.47, "grad_norm": 1.7791777849197388, "learning_rate": 5.687486448223769e-06, "loss": 0.5334, "step": 3145 }, { "epoch": 0.47, "grad_norm": 1.0659807920455933, "learning_rate": 5.68507676941296e-06, "loss": 0.5541, "step": 3146 }, { "epoch": 0.47, "grad_norm": 1.046075463294983, "learning_rate": 5.682666928429579e-06, "loss": 0.5287, "step": 3147 }, { "epoch": 0.47, "grad_norm": 1.1436103582382202, "learning_rate": 5.680256925844085e-06, "loss": 0.5532, "step": 3148 }, { "epoch": 0.47, "grad_norm": 1.331254482269287, "learning_rate": 5.677846762226982e-06, "loss": 0.5105, "step": 3149 }, { "epoch": 0.47, "grad_norm": 1.154320478439331, "learning_rate": 5.675436438148807e-06, "loss": 0.4701, "step": 3150 }, { "epoch": 0.47, "grad_norm": 1.5169874429702759, "learning_rate": 5.673025954180136e-06, "loss": 0.5417, "step": 3151 }, { "epoch": 0.47, "grad_norm": 1.0817384719848633, "learning_rate": 5.670615310891583e-06, "loss": 0.7082, "step": 3152 }, { "epoch": 0.47, "grad_norm": 1.173013687133789, "learning_rate": 5.668204508853798e-06, "loss": 0.5054, "step": 3153 }, { "epoch": 0.47, "grad_norm": 1.0546667575836182, "learning_rate": 5.6657935486374735e-06, "loss": 0.4797, "step": 3154 }, { "epoch": 0.47, "grad_norm": 1.0742802619934082, "learning_rate": 5.6633824308133344e-06, "loss": 0.5398, "step": 3155 }, { "epoch": 0.47, "grad_norm": 1.0713385343551636, "learning_rate": 5.660971155952145e-06, "loss": 0.4902, "step": 3156 }, { "epoch": 0.47, "grad_norm": 0.9844189286231995, "learning_rate": 5.658559724624706e-06, "loss": 0.4998, "step": 3157 }, { "epoch": 0.47, "grad_norm": 1.2776498794555664, "learning_rate": 5.6561481374018565e-06, "loss": 0.4344, "step": 3158 }, { "epoch": 0.47, "grad_norm": 1.0970460176467896, "learning_rate": 5.653736394854471e-06, "loss": 0.4289, "step": 3159 }, { "epoch": 0.47, "grad_norm": 1.2882370948791504, "learning_rate": 5.651324497553462e-06, "loss": 0.5102, "step": 3160 }, { "epoch": 0.47, "grad_norm": 1.1346570253372192, "learning_rate": 5.648912446069777e-06, "loss": 0.4637, "step": 3161 }, { "epoch": 0.47, "grad_norm": 0.9017711281776428, "learning_rate": 5.646500240974402e-06, "loss": 0.4241, "step": 3162 }, { "epoch": 0.48, "grad_norm": 1.1187090873718262, "learning_rate": 5.644087882838358e-06, "loss": 0.4143, "step": 3163 }, { "epoch": 0.48, "grad_norm": 1.028951644897461, "learning_rate": 5.6416753722327025e-06, "loss": 0.4188, "step": 3164 }, { "epoch": 0.48, "grad_norm": 1.0165709257125854, "learning_rate": 5.63926270972853e-06, "loss": 0.5038, "step": 3165 }, { "epoch": 0.48, "grad_norm": 1.7018747329711914, "learning_rate": 5.63684989589697e-06, "loss": 0.483, "step": 3166 }, { "epoch": 0.48, "grad_norm": 1.3539785146713257, "learning_rate": 5.6344369313091865e-06, "loss": 0.4698, "step": 3167 }, { "epoch": 0.48, "grad_norm": 1.0567227602005005, "learning_rate": 5.632023816536382e-06, "loss": 0.412, "step": 3168 }, { "epoch": 0.48, "grad_norm": 1.0809803009033203, "learning_rate": 5.629610552149795e-06, "loss": 0.4982, "step": 3169 }, { "epoch": 0.48, "grad_norm": 0.9439632296562195, "learning_rate": 5.627197138720695e-06, "loss": 0.4712, "step": 3170 }, { "epoch": 0.48, "grad_norm": 1.1647520065307617, "learning_rate": 5.624783576820389e-06, "loss": 0.5222, "step": 3171 }, { "epoch": 0.48, "grad_norm": 1.3737409114837646, "learning_rate": 5.622369867020222e-06, "loss": 0.4616, "step": 3172 }, { "epoch": 0.48, "grad_norm": 1.0644726753234863, "learning_rate": 5.61995600989157e-06, "loss": 0.4336, "step": 3173 }, { "epoch": 0.48, "grad_norm": 1.6514581441879272, "learning_rate": 5.617542006005847e-06, "loss": 0.457, "step": 3174 }, { "epoch": 0.48, "grad_norm": 1.31319260597229, "learning_rate": 5.615127855934499e-06, "loss": 0.4774, "step": 3175 }, { "epoch": 0.48, "grad_norm": 1.0026030540466309, "learning_rate": 5.612713560249008e-06, "loss": 0.4732, "step": 3176 }, { "epoch": 0.48, "grad_norm": 1.2406973838806152, "learning_rate": 5.61029911952089e-06, "loss": 0.4916, "step": 3177 }, { "epoch": 0.48, "grad_norm": 1.1871927976608276, "learning_rate": 5.607884534321696e-06, "loss": 0.4932, "step": 3178 }, { "epoch": 0.48, "grad_norm": 1.0607866048812866, "learning_rate": 5.605469805223011e-06, "loss": 0.7104, "step": 3179 }, { "epoch": 0.48, "grad_norm": 1.0615935325622559, "learning_rate": 5.603054932796453e-06, "loss": 0.4896, "step": 3180 }, { "epoch": 0.48, "grad_norm": 2.4914462566375732, "learning_rate": 5.600639917613675e-06, "loss": 0.5477, "step": 3181 }, { "epoch": 0.48, "grad_norm": 1.2144057750701904, "learning_rate": 5.5982247602463655e-06, "loss": 0.7385, "step": 3182 }, { "epoch": 0.48, "grad_norm": 1.2106635570526123, "learning_rate": 5.595809461266244e-06, "loss": 0.4565, "step": 3183 }, { "epoch": 0.48, "grad_norm": 1.506065845489502, "learning_rate": 5.5933940212450635e-06, "loss": 0.4793, "step": 3184 }, { "epoch": 0.48, "grad_norm": 1.106290578842163, "learning_rate": 5.590978440754611e-06, "loss": 0.5187, "step": 3185 }, { "epoch": 0.48, "grad_norm": 1.2596805095672607, "learning_rate": 5.588562720366707e-06, "loss": 0.489, "step": 3186 }, { "epoch": 0.48, "grad_norm": 1.1612333059310913, "learning_rate": 5.586146860653205e-06, "loss": 0.4616, "step": 3187 }, { "epoch": 0.48, "grad_norm": 1.2441935539245605, "learning_rate": 5.583730862185993e-06, "loss": 0.4955, "step": 3188 }, { "epoch": 0.48, "grad_norm": 0.9418556094169617, "learning_rate": 5.5813147255369894e-06, "loss": 0.5015, "step": 3189 }, { "epoch": 0.48, "grad_norm": 1.3563354015350342, "learning_rate": 5.578898451278145e-06, "loss": 0.434, "step": 3190 }, { "epoch": 0.48, "grad_norm": 1.0446207523345947, "learning_rate": 5.576482039981448e-06, "loss": 0.4664, "step": 3191 }, { "epoch": 0.48, "grad_norm": 1.4765170812606812, "learning_rate": 5.574065492218911e-06, "loss": 0.5089, "step": 3192 }, { "epoch": 0.48, "grad_norm": 1.0821244716644287, "learning_rate": 5.571648808562587e-06, "loss": 0.5649, "step": 3193 }, { "epoch": 0.48, "grad_norm": 1.1727516651153564, "learning_rate": 5.569231989584555e-06, "loss": 0.4803, "step": 3194 }, { "epoch": 0.48, "grad_norm": 1.4345725774765015, "learning_rate": 5.566815035856932e-06, "loss": 0.441, "step": 3195 }, { "epoch": 0.48, "grad_norm": 1.1327749490737915, "learning_rate": 5.564397947951859e-06, "loss": 0.5542, "step": 3196 }, { "epoch": 0.48, "grad_norm": 1.1379189491271973, "learning_rate": 5.5619807264415185e-06, "loss": 0.4434, "step": 3197 }, { "epoch": 0.48, "grad_norm": 3.0638251304626465, "learning_rate": 5.559563371898114e-06, "loss": 0.5492, "step": 3198 }, { "epoch": 0.48, "grad_norm": 1.7784322500228882, "learning_rate": 5.557145884893889e-06, "loss": 0.4736, "step": 3199 }, { "epoch": 0.48, "grad_norm": 1.187110185623169, "learning_rate": 5.554728266001115e-06, "loss": 0.5182, "step": 3200 }, { "epoch": 0.48, "grad_norm": 1.2611700296401978, "learning_rate": 5.5523105157920956e-06, "loss": 0.4896, "step": 3201 }, { "epoch": 0.48, "grad_norm": 1.0774576663970947, "learning_rate": 5.549892634839163e-06, "loss": 0.3947, "step": 3202 }, { "epoch": 0.48, "grad_norm": 1.448086142539978, "learning_rate": 5.547474623714684e-06, "loss": 0.4961, "step": 3203 }, { "epoch": 0.48, "grad_norm": 1.1503797769546509, "learning_rate": 5.545056482991051e-06, "loss": 0.4995, "step": 3204 }, { "epoch": 0.48, "grad_norm": 1.3355693817138672, "learning_rate": 5.542638213240697e-06, "loss": 0.7543, "step": 3205 }, { "epoch": 0.48, "grad_norm": 0.9726278781890869, "learning_rate": 5.540219815036072e-06, "loss": 0.4568, "step": 3206 }, { "epoch": 0.48, "grad_norm": 5.191850185394287, "learning_rate": 5.537801288949669e-06, "loss": 0.5908, "step": 3207 }, { "epoch": 0.48, "grad_norm": 1.2598779201507568, "learning_rate": 5.535382635554003e-06, "loss": 0.4745, "step": 3208 }, { "epoch": 0.48, "grad_norm": 1.0314522981643677, "learning_rate": 5.53296385542162e-06, "loss": 0.4637, "step": 3209 }, { "epoch": 0.48, "grad_norm": 1.2778258323669434, "learning_rate": 5.530544949125102e-06, "loss": 0.4966, "step": 3210 }, { "epoch": 0.48, "grad_norm": 1.1816476583480835, "learning_rate": 5.5281259172370536e-06, "loss": 0.5115, "step": 3211 }, { "epoch": 0.48, "grad_norm": 1.177533507347107, "learning_rate": 5.525706760330115e-06, "loss": 0.4846, "step": 3212 }, { "epoch": 0.48, "grad_norm": 1.1883729696273804, "learning_rate": 5.523287478976952e-06, "loss": 0.5658, "step": 3213 }, { "epoch": 0.48, "grad_norm": 1.401513695716858, "learning_rate": 5.520868073750261e-06, "loss": 0.4951, "step": 3214 }, { "epoch": 0.48, "grad_norm": 1.1854640245437622, "learning_rate": 5.518448545222765e-06, "loss": 0.4838, "step": 3215 }, { "epoch": 0.48, "grad_norm": 1.0419055223464966, "learning_rate": 5.516028893967223e-06, "loss": 0.4668, "step": 3216 }, { "epoch": 0.48, "grad_norm": 1.127765417098999, "learning_rate": 5.513609120556418e-06, "loss": 0.5631, "step": 3217 }, { "epoch": 0.48, "grad_norm": 1.0539871454238892, "learning_rate": 5.5111892255631635e-06, "loss": 0.5508, "step": 3218 }, { "epoch": 0.48, "grad_norm": 1.0308043956756592, "learning_rate": 5.508769209560302e-06, "loss": 0.5093, "step": 3219 }, { "epoch": 0.48, "grad_norm": 1.1723167896270752, "learning_rate": 5.506349073120699e-06, "loss": 0.4766, "step": 3220 }, { "epoch": 0.48, "grad_norm": 1.1259280443191528, "learning_rate": 5.503928816817259e-06, "loss": 0.5117, "step": 3221 }, { "epoch": 0.48, "grad_norm": 1.3914029598236084, "learning_rate": 5.501508441222906e-06, "loss": 0.5744, "step": 3222 }, { "epoch": 0.48, "grad_norm": 1.273533821105957, "learning_rate": 5.499087946910598e-06, "loss": 0.5117, "step": 3223 }, { "epoch": 0.48, "grad_norm": 1.069236159324646, "learning_rate": 5.496667334453317e-06, "loss": 0.449, "step": 3224 }, { "epoch": 0.48, "grad_norm": 1.0477991104125977, "learning_rate": 5.494246604424076e-06, "loss": 0.513, "step": 3225 }, { "epoch": 0.48, "grad_norm": 2.624959945678711, "learning_rate": 5.491825757395912e-06, "loss": 0.4184, "step": 3226 }, { "epoch": 0.48, "grad_norm": 1.3862584829330444, "learning_rate": 5.489404793941895e-06, "loss": 0.5306, "step": 3227 }, { "epoch": 0.48, "grad_norm": 0.9921375513076782, "learning_rate": 5.486983714635118e-06, "loss": 0.4605, "step": 3228 }, { "epoch": 0.49, "grad_norm": 1.252490758895874, "learning_rate": 5.484562520048703e-06, "loss": 0.4841, "step": 3229 }, { "epoch": 0.49, "grad_norm": 1.0789257287979126, "learning_rate": 5.4821412107558005e-06, "loss": 0.7226, "step": 3230 }, { "epoch": 0.49, "grad_norm": 1.216723918914795, "learning_rate": 5.479719787329586e-06, "loss": 0.4508, "step": 3231 }, { "epoch": 0.49, "grad_norm": 1.2362562417984009, "learning_rate": 5.477298250343264e-06, "loss": 0.4828, "step": 3232 }, { "epoch": 0.49, "grad_norm": 1.1183929443359375, "learning_rate": 5.474876600370064e-06, "loss": 0.5802, "step": 3233 }, { "epoch": 0.49, "grad_norm": 1.0720916986465454, "learning_rate": 5.472454837983244e-06, "loss": 0.4988, "step": 3234 }, { "epoch": 0.49, "grad_norm": 1.2028006315231323, "learning_rate": 5.470032963756087e-06, "loss": 0.4357, "step": 3235 }, { "epoch": 0.49, "grad_norm": 1.320252776145935, "learning_rate": 5.467610978261906e-06, "loss": 0.5295, "step": 3236 }, { "epoch": 0.49, "grad_norm": 1.4041774272918701, "learning_rate": 5.465188882074035e-06, "loss": 0.5354, "step": 3237 }, { "epoch": 0.49, "grad_norm": 1.5241059064865112, "learning_rate": 5.462766675765836e-06, "loss": 0.7661, "step": 3238 }, { "epoch": 0.49, "grad_norm": 1.0939191579818726, "learning_rate": 5.460344359910701e-06, "loss": 0.7362, "step": 3239 }, { "epoch": 0.49, "grad_norm": 1.0333927869796753, "learning_rate": 5.457921935082042e-06, "loss": 0.4464, "step": 3240 }, { "epoch": 0.49, "grad_norm": 1.2486315965652466, "learning_rate": 5.455499401853301e-06, "loss": 0.5011, "step": 3241 }, { "epoch": 0.49, "grad_norm": 1.0440833568572998, "learning_rate": 5.453076760797945e-06, "loss": 0.6777, "step": 3242 }, { "epoch": 0.49, "grad_norm": 1.1860581636428833, "learning_rate": 5.4506540124894646e-06, "loss": 0.5181, "step": 3243 }, { "epoch": 0.49, "grad_norm": 1.381235122680664, "learning_rate": 5.448231157501377e-06, "loss": 0.5306, "step": 3244 }, { "epoch": 0.49, "grad_norm": 2.726951837539673, "learning_rate": 5.445808196407225e-06, "loss": 0.4303, "step": 3245 }, { "epoch": 0.49, "grad_norm": 1.395195484161377, "learning_rate": 5.4433851297805764e-06, "loss": 0.513, "step": 3246 }, { "epoch": 0.49, "grad_norm": 1.1024912595748901, "learning_rate": 5.440961958195025e-06, "loss": 0.4509, "step": 3247 }, { "epoch": 0.49, "grad_norm": 1.2169878482818604, "learning_rate": 5.438538682224185e-06, "loss": 0.5734, "step": 3248 }, { "epoch": 0.49, "grad_norm": 1.1685113906860352, "learning_rate": 5.436115302441702e-06, "loss": 0.5539, "step": 3249 }, { "epoch": 0.49, "grad_norm": 1.2967984676361084, "learning_rate": 5.43369181942124e-06, "loss": 0.5197, "step": 3250 }, { "epoch": 0.49, "grad_norm": 1.8628281354904175, "learning_rate": 5.431268233736491e-06, "loss": 0.4689, "step": 3251 }, { "epoch": 0.49, "grad_norm": 1.1300841569900513, "learning_rate": 5.42884454596117e-06, "loss": 0.5022, "step": 3252 }, { "epoch": 0.49, "grad_norm": 1.192416787147522, "learning_rate": 5.426420756669019e-06, "loss": 0.431, "step": 3253 }, { "epoch": 0.49, "grad_norm": 1.1452993154525757, "learning_rate": 5.4239968664338e-06, "loss": 0.4819, "step": 3254 }, { "epoch": 0.49, "grad_norm": 2.3079752922058105, "learning_rate": 5.421572875829299e-06, "loss": 0.4566, "step": 3255 }, { "epoch": 0.49, "grad_norm": 1.0287383794784546, "learning_rate": 5.41914878542933e-06, "loss": 0.4605, "step": 3256 }, { "epoch": 0.49, "grad_norm": 1.0646920204162598, "learning_rate": 5.416724595807726e-06, "loss": 0.5043, "step": 3257 }, { "epoch": 0.49, "grad_norm": 1.3952635526657104, "learning_rate": 5.414300307538347e-06, "loss": 0.4577, "step": 3258 }, { "epoch": 0.49, "grad_norm": 1.002762794494629, "learning_rate": 5.4118759211950726e-06, "loss": 0.4335, "step": 3259 }, { "epoch": 0.49, "grad_norm": 1.3637272119522095, "learning_rate": 5.4094514373518104e-06, "loss": 0.4495, "step": 3260 }, { "epoch": 0.49, "grad_norm": 1.5010416507720947, "learning_rate": 5.407026856582487e-06, "loss": 0.503, "step": 3261 }, { "epoch": 0.49, "grad_norm": 1.3251659870147705, "learning_rate": 5.404602179461054e-06, "loss": 0.5088, "step": 3262 }, { "epoch": 0.49, "grad_norm": 0.991156816482544, "learning_rate": 5.402177406561483e-06, "loss": 0.4204, "step": 3263 }, { "epoch": 0.49, "grad_norm": 1.18622624874115, "learning_rate": 5.399752538457773e-06, "loss": 0.5022, "step": 3264 }, { "epoch": 0.49, "grad_norm": 0.8869053721427917, "learning_rate": 5.397327575723942e-06, "loss": 0.4098, "step": 3265 }, { "epoch": 0.49, "grad_norm": 3.252054452896118, "learning_rate": 5.394902518934031e-06, "loss": 0.4141, "step": 3266 }, { "epoch": 0.49, "grad_norm": 1.6473338603973389, "learning_rate": 5.392477368662107e-06, "loss": 0.5297, "step": 3267 }, { "epoch": 0.49, "grad_norm": 1.4954925775527954, "learning_rate": 5.390052125482252e-06, "loss": 0.4166, "step": 3268 }, { "epoch": 0.49, "grad_norm": 1.440039038658142, "learning_rate": 5.387626789968574e-06, "loss": 0.5075, "step": 3269 }, { "epoch": 0.49, "grad_norm": 1.2910395860671997, "learning_rate": 5.385201362695204e-06, "loss": 0.5226, "step": 3270 }, { "epoch": 0.49, "grad_norm": 1.2577542066574097, "learning_rate": 5.382775844236293e-06, "loss": 0.5547, "step": 3271 }, { "epoch": 0.49, "grad_norm": 1.5738580226898193, "learning_rate": 5.380350235166014e-06, "loss": 0.4989, "step": 3272 }, { "epoch": 0.49, "grad_norm": 2.73419189453125, "learning_rate": 5.377924536058563e-06, "loss": 0.4952, "step": 3273 }, { "epoch": 0.49, "grad_norm": 1.4711889028549194, "learning_rate": 5.375498747488153e-06, "loss": 0.549, "step": 3274 }, { "epoch": 0.49, "grad_norm": 1.5643610954284668, "learning_rate": 5.373072870029022e-06, "loss": 0.4213, "step": 3275 }, { "epoch": 0.49, "grad_norm": 1.2577757835388184, "learning_rate": 5.370646904255428e-06, "loss": 0.5097, "step": 3276 }, { "epoch": 0.49, "grad_norm": 1.1954238414764404, "learning_rate": 5.36822085074165e-06, "loss": 0.4949, "step": 3277 }, { "epoch": 0.49, "grad_norm": 1.2206816673278809, "learning_rate": 5.365794710061987e-06, "loss": 0.501, "step": 3278 }, { "epoch": 0.49, "grad_norm": 2.1890809535980225, "learning_rate": 5.363368482790762e-06, "loss": 0.5144, "step": 3279 }, { "epoch": 0.49, "grad_norm": 0.9233826994895935, "learning_rate": 5.360942169502312e-06, "loss": 0.3282, "step": 3280 }, { "epoch": 0.49, "grad_norm": 1.42556893825531, "learning_rate": 5.358515770770999e-06, "loss": 0.5034, "step": 3281 }, { "epoch": 0.49, "grad_norm": 1.181225299835205, "learning_rate": 5.356089287171206e-06, "loss": 0.48, "step": 3282 }, { "epoch": 0.49, "grad_norm": 1.4185588359832764, "learning_rate": 5.353662719277334e-06, "loss": 0.4667, "step": 3283 }, { "epoch": 0.49, "grad_norm": 1.1369937658309937, "learning_rate": 5.351236067663803e-06, "loss": 0.5286, "step": 3284 }, { "epoch": 0.49, "grad_norm": 1.0206960439682007, "learning_rate": 5.348809332905056e-06, "loss": 0.489, "step": 3285 }, { "epoch": 0.49, "grad_norm": 1.0093897581100464, "learning_rate": 5.346382515575553e-06, "loss": 0.3924, "step": 3286 }, { "epoch": 0.49, "grad_norm": 1.5897568464279175, "learning_rate": 5.343955616249773e-06, "loss": 0.5617, "step": 3287 }, { "epoch": 0.49, "grad_norm": 1.0807329416275024, "learning_rate": 5.341528635502219e-06, "loss": 0.4598, "step": 3288 }, { "epoch": 0.49, "grad_norm": 1.0786889791488647, "learning_rate": 5.3391015739074085e-06, "loss": 0.4416, "step": 3289 }, { "epoch": 0.49, "grad_norm": 1.221483826637268, "learning_rate": 5.336674432039879e-06, "loss": 0.3989, "step": 3290 }, { "epoch": 0.49, "grad_norm": 1.5441621541976929, "learning_rate": 5.334247210474189e-06, "loss": 0.4934, "step": 3291 }, { "epoch": 0.49, "grad_norm": 1.3572899103164673, "learning_rate": 5.3318199097849146e-06, "loss": 0.5295, "step": 3292 }, { "epoch": 0.49, "grad_norm": 1.166922688484192, "learning_rate": 5.329392530546649e-06, "loss": 0.4666, "step": 3293 }, { "epoch": 0.49, "grad_norm": 1.126949667930603, "learning_rate": 5.326965073334007e-06, "loss": 0.4689, "step": 3294 }, { "epoch": 0.49, "grad_norm": 1.102970004081726, "learning_rate": 5.324537538721621e-06, "loss": 0.4647, "step": 3295 }, { "epoch": 0.5, "grad_norm": 1.1349563598632812, "learning_rate": 5.322109927284142e-06, "loss": 0.4906, "step": 3296 }, { "epoch": 0.5, "grad_norm": 1.5907330513000488, "learning_rate": 5.3196822395962355e-06, "loss": 0.4788, "step": 3297 }, { "epoch": 0.5, "grad_norm": 1.3248251676559448, "learning_rate": 5.31725447623259e-06, "loss": 0.4938, "step": 3298 }, { "epoch": 0.5, "grad_norm": 1.0506895780563354, "learning_rate": 5.314826637767909e-06, "loss": 0.4459, "step": 3299 }, { "epoch": 0.5, "grad_norm": 0.9344661831855774, "learning_rate": 5.312398724776915e-06, "loss": 0.4511, "step": 3300 }, { "epoch": 0.5, "grad_norm": 1.0634862184524536, "learning_rate": 5.309970737834349e-06, "loss": 0.4879, "step": 3301 }, { "epoch": 0.5, "grad_norm": 2.0666134357452393, "learning_rate": 5.3075426775149665e-06, "loss": 0.4749, "step": 3302 }, { "epoch": 0.5, "grad_norm": 1.0756012201309204, "learning_rate": 5.305114544393544e-06, "loss": 0.4731, "step": 3303 }, { "epoch": 0.5, "grad_norm": 1.6900900602340698, "learning_rate": 5.3026863390448705e-06, "loss": 0.4122, "step": 3304 }, { "epoch": 0.5, "grad_norm": 1.0492682456970215, "learning_rate": 5.300258062043758e-06, "loss": 0.4259, "step": 3305 }, { "epoch": 0.5, "grad_norm": 1.6407910585403442, "learning_rate": 5.2978297139650314e-06, "loss": 0.5335, "step": 3306 }, { "epoch": 0.5, "grad_norm": 1.2775744199752808, "learning_rate": 5.2954012953835334e-06, "loss": 0.4818, "step": 3307 }, { "epoch": 0.5, "grad_norm": 1.0554813146591187, "learning_rate": 5.292972806874123e-06, "loss": 0.4688, "step": 3308 }, { "epoch": 0.5, "grad_norm": 1.0909781455993652, "learning_rate": 5.290544249011678e-06, "loss": 0.5397, "step": 3309 }, { "epoch": 0.5, "grad_norm": 1.2453224658966064, "learning_rate": 5.28811562237109e-06, "loss": 0.5061, "step": 3310 }, { "epoch": 0.5, "grad_norm": 1.1922261714935303, "learning_rate": 5.285686927527266e-06, "loss": 0.4378, "step": 3311 }, { "epoch": 0.5, "grad_norm": 1.5897789001464844, "learning_rate": 5.283258165055134e-06, "loss": 0.5092, "step": 3312 }, { "epoch": 0.5, "grad_norm": 1.4284685850143433, "learning_rate": 5.280829335529632e-06, "loss": 0.5741, "step": 3313 }, { "epoch": 0.5, "grad_norm": 1.1164036989212036, "learning_rate": 5.278400439525718e-06, "loss": 0.5106, "step": 3314 }, { "epoch": 0.5, "grad_norm": 1.1330314874649048, "learning_rate": 5.275971477618366e-06, "loss": 0.481, "step": 3315 }, { "epoch": 0.5, "grad_norm": 1.4097540378570557, "learning_rate": 5.273542450382561e-06, "loss": 0.5324, "step": 3316 }, { "epoch": 0.5, "grad_norm": 1.3085863590240479, "learning_rate": 5.271113358393309e-06, "loss": 0.4981, "step": 3317 }, { "epoch": 0.5, "grad_norm": 1.1918026208877563, "learning_rate": 5.268684202225628e-06, "loss": 0.3708, "step": 3318 }, { "epoch": 0.5, "grad_norm": 1.2656362056732178, "learning_rate": 5.266254982454553e-06, "loss": 0.4651, "step": 3319 }, { "epoch": 0.5, "grad_norm": 0.9938597679138184, "learning_rate": 5.2638256996551305e-06, "loss": 0.5067, "step": 3320 }, { "epoch": 0.5, "grad_norm": 1.0554054975509644, "learning_rate": 5.261396354402428e-06, "loss": 0.4444, "step": 3321 }, { "epoch": 0.5, "grad_norm": 1.153174877166748, "learning_rate": 5.258966947271524e-06, "loss": 0.5202, "step": 3322 }, { "epoch": 0.5, "grad_norm": 1.0484617948532104, "learning_rate": 5.256537478837509e-06, "loss": 0.4369, "step": 3323 }, { "epoch": 0.5, "grad_norm": 1.1989870071411133, "learning_rate": 5.254107949675493e-06, "loss": 0.5609, "step": 3324 }, { "epoch": 0.5, "grad_norm": 1.179878830909729, "learning_rate": 5.251678360360599e-06, "loss": 0.4549, "step": 3325 }, { "epoch": 0.5, "grad_norm": 1.0727622509002686, "learning_rate": 5.249248711467963e-06, "loss": 0.5064, "step": 3326 }, { "epoch": 0.5, "grad_norm": 1.2015424966812134, "learning_rate": 5.246819003572735e-06, "loss": 0.5101, "step": 3327 }, { "epoch": 0.5, "grad_norm": 1.6457115411758423, "learning_rate": 5.2443892372500805e-06, "loss": 0.52, "step": 3328 }, { "epoch": 0.5, "grad_norm": 1.1334882974624634, "learning_rate": 5.241959413075178e-06, "loss": 0.5223, "step": 3329 }, { "epoch": 0.5, "grad_norm": 1.3221731185913086, "learning_rate": 5.23952953162322e-06, "loss": 0.5313, "step": 3330 }, { "epoch": 0.5, "grad_norm": 1.2550333738327026, "learning_rate": 5.237099593469411e-06, "loss": 0.411, "step": 3331 }, { "epoch": 0.5, "grad_norm": 1.1247316598892212, "learning_rate": 5.234669599188972e-06, "loss": 0.4852, "step": 3332 }, { "epoch": 0.5, "grad_norm": 1.118021011352539, "learning_rate": 5.232239549357134e-06, "loss": 0.486, "step": 3333 }, { "epoch": 0.5, "grad_norm": 1.1222916841506958, "learning_rate": 5.2298094445491444e-06, "loss": 0.4911, "step": 3334 }, { "epoch": 0.5, "grad_norm": 2.1550419330596924, "learning_rate": 5.227379285340259e-06, "loss": 0.4948, "step": 3335 }, { "epoch": 0.5, "grad_norm": 1.1062003374099731, "learning_rate": 5.2249490723057525e-06, "loss": 0.4559, "step": 3336 }, { "epoch": 0.5, "grad_norm": 1.57857346534729, "learning_rate": 5.222518806020907e-06, "loss": 0.5575, "step": 3337 }, { "epoch": 0.5, "grad_norm": 1.0208569765090942, "learning_rate": 5.22008848706102e-06, "loss": 0.3877, "step": 3338 }, { "epoch": 0.5, "grad_norm": 1.2534452676773071, "learning_rate": 5.217658116001404e-06, "loss": 0.4271, "step": 3339 }, { "epoch": 0.5, "grad_norm": 1.0769176483154297, "learning_rate": 5.2152276934173755e-06, "loss": 0.498, "step": 3340 }, { "epoch": 0.5, "grad_norm": 1.1140764951705933, "learning_rate": 5.21279721988427e-06, "loss": 0.5491, "step": 3341 }, { "epoch": 0.5, "grad_norm": 0.993719756603241, "learning_rate": 5.210366695977435e-06, "loss": 0.508, "step": 3342 }, { "epoch": 0.5, "grad_norm": 1.2207010984420776, "learning_rate": 5.2079361222722255e-06, "loss": 0.4941, "step": 3343 }, { "epoch": 0.5, "grad_norm": 1.4777445793151855, "learning_rate": 5.205505499344015e-06, "loss": 0.4402, "step": 3344 }, { "epoch": 0.5, "grad_norm": 1.1361373662948608, "learning_rate": 5.203074827768183e-06, "loss": 0.7119, "step": 3345 }, { "epoch": 0.5, "grad_norm": 1.2459959983825684, "learning_rate": 5.200644108120122e-06, "loss": 0.4314, "step": 3346 }, { "epoch": 0.5, "grad_norm": 0.9985036849975586, "learning_rate": 5.198213340975237e-06, "loss": 0.3691, "step": 3347 }, { "epoch": 0.5, "grad_norm": 1.188429832458496, "learning_rate": 5.19578252690894e-06, "loss": 0.423, "step": 3348 }, { "epoch": 0.5, "grad_norm": 1.0827018022537231, "learning_rate": 5.193351666496664e-06, "loss": 0.448, "step": 3349 }, { "epoch": 0.5, "grad_norm": 1.41739821434021, "learning_rate": 5.1909207603138414e-06, "loss": 0.4785, "step": 3350 }, { "epoch": 0.5, "grad_norm": 1.0901010036468506, "learning_rate": 5.188489808935924e-06, "loss": 0.4333, "step": 3351 }, { "epoch": 0.5, "grad_norm": 0.9926981925964355, "learning_rate": 5.186058812938367e-06, "loss": 0.4224, "step": 3352 }, { "epoch": 0.5, "grad_norm": 1.271761417388916, "learning_rate": 5.183627772896642e-06, "loss": 0.7468, "step": 3353 }, { "epoch": 0.5, "grad_norm": 1.2164143323898315, "learning_rate": 5.181196689386229e-06, "loss": 0.4511, "step": 3354 }, { "epoch": 0.5, "grad_norm": 1.1882277727127075, "learning_rate": 5.17876556298262e-06, "loss": 0.6348, "step": 3355 }, { "epoch": 0.5, "grad_norm": 1.3073846101760864, "learning_rate": 5.176334394261312e-06, "loss": 0.4612, "step": 3356 }, { "epoch": 0.5, "grad_norm": 1.311224102973938, "learning_rate": 5.17390318379782e-06, "loss": 0.5513, "step": 3357 }, { "epoch": 0.5, "grad_norm": 1.3019897937774658, "learning_rate": 5.171471932167661e-06, "loss": 0.4669, "step": 3358 }, { "epoch": 0.5, "grad_norm": 1.6847288608551025, "learning_rate": 5.169040639946364e-06, "loss": 0.4608, "step": 3359 }, { "epoch": 0.5, "grad_norm": 1.1239396333694458, "learning_rate": 5.166609307709473e-06, "loss": 0.4765, "step": 3360 }, { "epoch": 0.5, "grad_norm": 2.5860402584075928, "learning_rate": 5.164177936032534e-06, "loss": 0.4848, "step": 3361 }, { "epoch": 0.51, "grad_norm": 1.0436036586761475, "learning_rate": 5.161746525491107e-06, "loss": 0.4539, "step": 3362 }, { "epoch": 0.51, "grad_norm": 2.014350414276123, "learning_rate": 5.15931507666076e-06, "loss": 0.4608, "step": 3363 }, { "epoch": 0.51, "grad_norm": 0.9498105645179749, "learning_rate": 5.156883590117068e-06, "loss": 0.4579, "step": 3364 }, { "epoch": 0.51, "grad_norm": 1.083359718322754, "learning_rate": 5.154452066435617e-06, "loss": 0.4154, "step": 3365 }, { "epoch": 0.51, "grad_norm": 1.0878249406814575, "learning_rate": 5.152020506192003e-06, "loss": 0.4856, "step": 3366 }, { "epoch": 0.51, "grad_norm": 1.3011797666549683, "learning_rate": 5.14958890996183e-06, "loss": 0.555, "step": 3367 }, { "epoch": 0.51, "grad_norm": 1.329506754875183, "learning_rate": 5.147157278320708e-06, "loss": 0.464, "step": 3368 }, { "epoch": 0.51, "grad_norm": 1.0036444664001465, "learning_rate": 5.144725611844258e-06, "loss": 0.4979, "step": 3369 }, { "epoch": 0.51, "grad_norm": 1.102198839187622, "learning_rate": 5.142293911108107e-06, "loss": 0.5277, "step": 3370 }, { "epoch": 0.51, "grad_norm": 1.8411537408828735, "learning_rate": 5.139862176687891e-06, "loss": 0.4999, "step": 3371 }, { "epoch": 0.51, "grad_norm": 1.4733136892318726, "learning_rate": 5.137430409159256e-06, "loss": 0.6005, "step": 3372 }, { "epoch": 0.51, "grad_norm": 1.3064475059509277, "learning_rate": 5.1349986090978555e-06, "loss": 0.4983, "step": 3373 }, { "epoch": 0.51, "grad_norm": 1.140031099319458, "learning_rate": 5.132566777079347e-06, "loss": 0.4195, "step": 3374 }, { "epoch": 0.51, "grad_norm": 1.4108858108520508, "learning_rate": 5.130134913679401e-06, "loss": 0.4505, "step": 3375 }, { "epoch": 0.51, "grad_norm": 1.176177740097046, "learning_rate": 5.127703019473686e-06, "loss": 0.4506, "step": 3376 }, { "epoch": 0.51, "grad_norm": 1.176599144935608, "learning_rate": 5.125271095037893e-06, "loss": 0.5537, "step": 3377 }, { "epoch": 0.51, "grad_norm": 2.091614246368408, "learning_rate": 5.122839140947704e-06, "loss": 0.5119, "step": 3378 }, { "epoch": 0.51, "grad_norm": 1.346314787864685, "learning_rate": 5.12040715777882e-06, "loss": 0.4975, "step": 3379 }, { "epoch": 0.51, "grad_norm": 1.190236210823059, "learning_rate": 5.117975146106942e-06, "loss": 0.5212, "step": 3380 }, { "epoch": 0.51, "grad_norm": 1.0255742073059082, "learning_rate": 5.115543106507782e-06, "loss": 0.5432, "step": 3381 }, { "epoch": 0.51, "grad_norm": 3.01247239112854, "learning_rate": 5.113111039557055e-06, "loss": 0.5348, "step": 3382 }, { "epoch": 0.51, "grad_norm": 1.2388349771499634, "learning_rate": 5.110678945830485e-06, "loss": 0.4606, "step": 3383 }, { "epoch": 0.51, "grad_norm": 1.3442964553833008, "learning_rate": 5.108246825903801e-06, "loss": 0.4286, "step": 3384 }, { "epoch": 0.51, "grad_norm": 1.3450828790664673, "learning_rate": 5.105814680352739e-06, "loss": 0.4496, "step": 3385 }, { "epoch": 0.51, "grad_norm": 1.1357978582382202, "learning_rate": 5.1033825097530395e-06, "loss": 0.4511, "step": 3386 }, { "epoch": 0.51, "grad_norm": 1.1578055620193481, "learning_rate": 5.100950314680453e-06, "loss": 0.5063, "step": 3387 }, { "epoch": 0.51, "grad_norm": 1.1497042179107666, "learning_rate": 5.098518095710731e-06, "loss": 0.5036, "step": 3388 }, { "epoch": 0.51, "grad_norm": 0.9730983972549438, "learning_rate": 5.096085853419631e-06, "loss": 0.4561, "step": 3389 }, { "epoch": 0.51, "grad_norm": 1.2415331602096558, "learning_rate": 5.093653588382922e-06, "loss": 0.3822, "step": 3390 }, { "epoch": 0.51, "grad_norm": 1.2298643589019775, "learning_rate": 5.09122130117637e-06, "loss": 0.4845, "step": 3391 }, { "epoch": 0.51, "grad_norm": 1.1223949193954468, "learning_rate": 5.0887889923757525e-06, "loss": 0.5165, "step": 3392 }, { "epoch": 0.51, "grad_norm": 1.2427784204483032, "learning_rate": 5.0863566625568495e-06, "loss": 0.5117, "step": 3393 }, { "epoch": 0.51, "grad_norm": 1.1558433771133423, "learning_rate": 5.083924312295445e-06, "loss": 0.4449, "step": 3394 }, { "epoch": 0.51, "grad_norm": 1.3681328296661377, "learning_rate": 5.0814919421673315e-06, "loss": 0.5394, "step": 3395 }, { "epoch": 0.51, "grad_norm": 1.1817728281021118, "learning_rate": 5.079059552748302e-06, "loss": 0.5556, "step": 3396 }, { "epoch": 0.51, "grad_norm": 1.0209228992462158, "learning_rate": 5.076627144614158e-06, "loss": 0.5045, "step": 3397 }, { "epoch": 0.51, "grad_norm": 1.1451740264892578, "learning_rate": 5.074194718340701e-06, "loss": 0.5361, "step": 3398 }, { "epoch": 0.51, "grad_norm": 1.2611885070800781, "learning_rate": 5.071762274503741e-06, "loss": 0.5416, "step": 3399 }, { "epoch": 0.51, "grad_norm": 1.15174400806427, "learning_rate": 5.0693298136790895e-06, "loss": 0.4912, "step": 3400 }, { "epoch": 0.51, "grad_norm": 1.2037911415100098, "learning_rate": 5.066897336442563e-06, "loss": 0.478, "step": 3401 }, { "epoch": 0.51, "grad_norm": 1.3022481203079224, "learning_rate": 5.064464843369983e-06, "loss": 0.4752, "step": 3402 }, { "epoch": 0.51, "grad_norm": 1.1625765562057495, "learning_rate": 5.062032335037171e-06, "loss": 0.4291, "step": 3403 }, { "epoch": 0.51, "grad_norm": 1.1053334474563599, "learning_rate": 5.0595998120199565e-06, "loss": 0.5378, "step": 3404 }, { "epoch": 0.51, "grad_norm": 3.9309451580047607, "learning_rate": 5.0571672748941705e-06, "loss": 0.4159, "step": 3405 }, { "epoch": 0.51, "grad_norm": 0.8985252976417542, "learning_rate": 5.054734724235647e-06, "loss": 0.5444, "step": 3406 }, { "epoch": 0.51, "grad_norm": 1.0588891506195068, "learning_rate": 5.052302160620223e-06, "loss": 0.5184, "step": 3407 }, { "epoch": 0.51, "grad_norm": 1.1812750101089478, "learning_rate": 5.049869584623741e-06, "loss": 0.5143, "step": 3408 }, { "epoch": 0.51, "grad_norm": 1.1682325601577759, "learning_rate": 5.047436996822043e-06, "loss": 0.4546, "step": 3409 }, { "epoch": 0.51, "grad_norm": 1.3839659690856934, "learning_rate": 5.045004397790974e-06, "loss": 0.5046, "step": 3410 }, { "epoch": 0.51, "grad_norm": 3.1041460037231445, "learning_rate": 5.042571788106389e-06, "loss": 0.4513, "step": 3411 }, { "epoch": 0.51, "grad_norm": 1.3956973552703857, "learning_rate": 5.040139168344135e-06, "loss": 0.5242, "step": 3412 }, { "epoch": 0.51, "grad_norm": 1.3066279888153076, "learning_rate": 5.037706539080066e-06, "loss": 0.4719, "step": 3413 }, { "epoch": 0.51, "grad_norm": 1.1511774063110352, "learning_rate": 5.035273900890039e-06, "loss": 0.5329, "step": 3414 }, { "epoch": 0.51, "grad_norm": 1.331955909729004, "learning_rate": 5.032841254349911e-06, "loss": 0.4879, "step": 3415 }, { "epoch": 0.51, "grad_norm": 1.342289924621582, "learning_rate": 5.030408600035546e-06, "loss": 0.5216, "step": 3416 }, { "epoch": 0.51, "grad_norm": 1.21343195438385, "learning_rate": 5.0279759385228045e-06, "loss": 0.4979, "step": 3417 }, { "epoch": 0.51, "grad_norm": 1.0862404108047485, "learning_rate": 5.025543270387548e-06, "loss": 0.4755, "step": 3418 }, { "epoch": 0.51, "grad_norm": 1.2327709197998047, "learning_rate": 5.0231105962056445e-06, "loss": 0.476, "step": 3419 }, { "epoch": 0.51, "grad_norm": 1.2072091102600098, "learning_rate": 5.020677916552959e-06, "loss": 0.519, "step": 3420 }, { "epoch": 0.51, "grad_norm": 1.1662375926971436, "learning_rate": 5.018245232005362e-06, "loss": 0.4204, "step": 3421 }, { "epoch": 0.51, "grad_norm": 1.1298887729644775, "learning_rate": 5.015812543138722e-06, "loss": 0.5009, "step": 3422 }, { "epoch": 0.51, "grad_norm": 2.290724277496338, "learning_rate": 5.01337985052891e-06, "loss": 0.4887, "step": 3423 }, { "epoch": 0.51, "grad_norm": 1.1294199228286743, "learning_rate": 5.0109471547517954e-06, "loss": 0.4554, "step": 3424 }, { "epoch": 0.51, "grad_norm": 1.2139389514923096, "learning_rate": 5.00851445638325e-06, "loss": 0.5582, "step": 3425 }, { "epoch": 0.51, "grad_norm": 1.0723471641540527, "learning_rate": 5.0060817559991484e-06, "loss": 0.4559, "step": 3426 }, { "epoch": 0.51, "grad_norm": 1.1624053716659546, "learning_rate": 5.003649054175362e-06, "loss": 0.4624, "step": 3427 }, { "epoch": 0.51, "grad_norm": 1.1333459615707397, "learning_rate": 5.001216351487767e-06, "loss": 0.4751, "step": 3428 }, { "epoch": 0.52, "grad_norm": 1.1817954778671265, "learning_rate": 4.998783648512235e-06, "loss": 0.4898, "step": 3429 }, { "epoch": 0.52, "grad_norm": 1.190962314605713, "learning_rate": 4.996350945824639e-06, "loss": 0.7157, "step": 3430 }, { "epoch": 0.52, "grad_norm": 1.230159878730774, "learning_rate": 4.993918244000854e-06, "loss": 0.5502, "step": 3431 }, { "epoch": 0.52, "grad_norm": 1.3026634454727173, "learning_rate": 4.991485543616752e-06, "loss": 0.5093, "step": 3432 }, { "epoch": 0.52, "grad_norm": 1.2684193849563599, "learning_rate": 4.989052845248208e-06, "loss": 0.5458, "step": 3433 }, { "epoch": 0.52, "grad_norm": 1.1095526218414307, "learning_rate": 4.9866201494710934e-06, "loss": 0.4385, "step": 3434 }, { "epoch": 0.52, "grad_norm": 1.0677263736724854, "learning_rate": 4.984187456861278e-06, "loss": 0.4768, "step": 3435 }, { "epoch": 0.52, "grad_norm": 1.1651583909988403, "learning_rate": 4.981754767994638e-06, "loss": 0.4116, "step": 3436 }, { "epoch": 0.52, "grad_norm": 1.1154922246932983, "learning_rate": 4.979322083447042e-06, "loss": 0.7044, "step": 3437 }, { "epoch": 0.52, "grad_norm": 1.2067959308624268, "learning_rate": 4.976889403794357e-06, "loss": 0.5392, "step": 3438 }, { "epoch": 0.52, "grad_norm": 1.0054973363876343, "learning_rate": 4.9744567296124536e-06, "loss": 0.4521, "step": 3439 }, { "epoch": 0.52, "grad_norm": 1.2160848379135132, "learning_rate": 4.972024061477197e-06, "loss": 0.503, "step": 3440 }, { "epoch": 0.52, "grad_norm": 1.0490987300872803, "learning_rate": 4.969591399964456e-06, "loss": 0.426, "step": 3441 }, { "epoch": 0.52, "grad_norm": 1.0148017406463623, "learning_rate": 4.96715874565009e-06, "loss": 0.406, "step": 3442 }, { "epoch": 0.52, "grad_norm": 1.7598140239715576, "learning_rate": 4.9647260991099635e-06, "loss": 0.3616, "step": 3443 }, { "epoch": 0.52, "grad_norm": 1.1331292390823364, "learning_rate": 4.9622934609199365e-06, "loss": 0.5442, "step": 3444 }, { "epoch": 0.52, "grad_norm": 1.4782936573028564, "learning_rate": 4.959860831655867e-06, "loss": 0.4195, "step": 3445 }, { "epoch": 0.52, "grad_norm": 1.1827671527862549, "learning_rate": 4.957428211893611e-06, "loss": 0.4514, "step": 3446 }, { "epoch": 0.52, "grad_norm": 1.2197039127349854, "learning_rate": 4.954995602209025e-06, "loss": 0.56, "step": 3447 }, { "epoch": 0.52, "grad_norm": 1.0631505250930786, "learning_rate": 4.952563003177959e-06, "loss": 0.4673, "step": 3448 }, { "epoch": 0.52, "grad_norm": 3.5028839111328125, "learning_rate": 4.950130415376261e-06, "loss": 0.4396, "step": 3449 }, { "epoch": 0.52, "grad_norm": 1.0940803289413452, "learning_rate": 4.947697839379778e-06, "loss": 0.4867, "step": 3450 }, { "epoch": 0.52, "grad_norm": 1.0782634019851685, "learning_rate": 4.945265275764355e-06, "loss": 0.71, "step": 3451 }, { "epoch": 0.52, "grad_norm": 1.2094477415084839, "learning_rate": 4.942832725105831e-06, "loss": 0.5308, "step": 3452 }, { "epoch": 0.52, "grad_norm": 1.003982663154602, "learning_rate": 4.940400187980045e-06, "loss": 0.4768, "step": 3453 }, { "epoch": 0.52, "grad_norm": 1.3522142171859741, "learning_rate": 4.937967664962831e-06, "loss": 0.4611, "step": 3454 }, { "epoch": 0.52, "grad_norm": 1.0758353471755981, "learning_rate": 4.93553515663002e-06, "loss": 0.4605, "step": 3455 }, { "epoch": 0.52, "grad_norm": 1.5786727666854858, "learning_rate": 4.9331026635574394e-06, "loss": 0.5637, "step": 3456 }, { "epoch": 0.52, "grad_norm": 1.1676371097564697, "learning_rate": 4.93067018632091e-06, "loss": 0.4666, "step": 3457 }, { "epoch": 0.52, "grad_norm": 1.247012972831726, "learning_rate": 4.9282377254962606e-06, "loss": 0.4591, "step": 3458 }, { "epoch": 0.52, "grad_norm": 1.2800718545913696, "learning_rate": 4.9258052816593e-06, "loss": 0.5288, "step": 3459 }, { "epoch": 0.52, "grad_norm": 1.1697471141815186, "learning_rate": 4.9233728553858435e-06, "loss": 0.5184, "step": 3460 }, { "epoch": 0.52, "grad_norm": 1.1005394458770752, "learning_rate": 4.920940447251699e-06, "loss": 0.476, "step": 3461 }, { "epoch": 0.52, "grad_norm": 1.1144843101501465, "learning_rate": 4.91850805783267e-06, "loss": 0.7172, "step": 3462 }, { "epoch": 0.52, "grad_norm": 1.099844217300415, "learning_rate": 4.916075687704556e-06, "loss": 0.4335, "step": 3463 }, { "epoch": 0.52, "grad_norm": 1.2536487579345703, "learning_rate": 4.913643337443152e-06, "loss": 0.4521, "step": 3464 }, { "epoch": 0.52, "grad_norm": 1.2279983758926392, "learning_rate": 4.911211007624249e-06, "loss": 0.5544, "step": 3465 }, { "epoch": 0.52, "grad_norm": 1.2447254657745361, "learning_rate": 4.908778698823633e-06, "loss": 0.5641, "step": 3466 }, { "epoch": 0.52, "grad_norm": 1.139061689376831, "learning_rate": 4.906346411617081e-06, "loss": 0.5012, "step": 3467 }, { "epoch": 0.52, "grad_norm": 1.9369977712631226, "learning_rate": 4.903914146580368e-06, "loss": 0.4932, "step": 3468 }, { "epoch": 0.52, "grad_norm": 1.233665943145752, "learning_rate": 4.90148190428927e-06, "loss": 0.5032, "step": 3469 }, { "epoch": 0.52, "grad_norm": 1.332816481590271, "learning_rate": 4.899049685319549e-06, "loss": 0.5446, "step": 3470 }, { "epoch": 0.52, "grad_norm": 1.191532850265503, "learning_rate": 4.896617490246961e-06, "loss": 0.5352, "step": 3471 }, { "epoch": 0.52, "grad_norm": 1.116807460784912, "learning_rate": 4.894185319647263e-06, "loss": 0.5442, "step": 3472 }, { "epoch": 0.52, "grad_norm": 1.3383234739303589, "learning_rate": 4.8917531740962e-06, "loss": 0.5163, "step": 3473 }, { "epoch": 0.52, "grad_norm": 1.0684109926223755, "learning_rate": 4.889321054169516e-06, "loss": 0.5367, "step": 3474 }, { "epoch": 0.52, "grad_norm": 1.2958228588104248, "learning_rate": 4.886888960442946e-06, "loss": 0.4858, "step": 3475 }, { "epoch": 0.52, "grad_norm": 1.4313045740127563, "learning_rate": 4.884456893492219e-06, "loss": 0.4277, "step": 3476 }, { "epoch": 0.52, "grad_norm": 1.2733209133148193, "learning_rate": 4.8820248538930585e-06, "loss": 0.4612, "step": 3477 }, { "epoch": 0.52, "grad_norm": 1.0284154415130615, "learning_rate": 4.879592842221182e-06, "loss": 0.4354, "step": 3478 }, { "epoch": 0.52, "grad_norm": 1.280469536781311, "learning_rate": 4.877160859052296e-06, "loss": 0.5007, "step": 3479 }, { "epoch": 0.52, "grad_norm": 1.1222357749938965, "learning_rate": 4.874728904962108e-06, "loss": 0.4673, "step": 3480 }, { "epoch": 0.52, "grad_norm": 2.4909884929656982, "learning_rate": 4.872296980526313e-06, "loss": 0.5773, "step": 3481 }, { "epoch": 0.52, "grad_norm": 1.371192455291748, "learning_rate": 4.869865086320601e-06, "loss": 0.5272, "step": 3482 }, { "epoch": 0.52, "grad_norm": 1.037643551826477, "learning_rate": 4.867433222920654e-06, "loss": 0.429, "step": 3483 }, { "epoch": 0.52, "grad_norm": 1.0049642324447632, "learning_rate": 4.865001390902146e-06, "loss": 0.4684, "step": 3484 }, { "epoch": 0.52, "grad_norm": 0.9959530830383301, "learning_rate": 4.862569590840744e-06, "loss": 0.5013, "step": 3485 }, { "epoch": 0.52, "grad_norm": 1.1869828701019287, "learning_rate": 4.86013782331211e-06, "loss": 0.4882, "step": 3486 }, { "epoch": 0.52, "grad_norm": 1.5453261137008667, "learning_rate": 4.857706088891895e-06, "loss": 0.4816, "step": 3487 }, { "epoch": 0.52, "grad_norm": 1.3182859420776367, "learning_rate": 4.8552743881557446e-06, "loss": 0.4774, "step": 3488 }, { "epoch": 0.52, "grad_norm": 1.0813233852386475, "learning_rate": 4.852842721679293e-06, "loss": 0.425, "step": 3489 }, { "epoch": 0.52, "grad_norm": 1.1741604804992676, "learning_rate": 4.85041109003817e-06, "loss": 0.5124, "step": 3490 }, { "epoch": 0.52, "grad_norm": 1.3147807121276855, "learning_rate": 4.847979493807997e-06, "loss": 0.4201, "step": 3491 }, { "epoch": 0.52, "grad_norm": 1.0672461986541748, "learning_rate": 4.845547933564383e-06, "loss": 0.4984, "step": 3492 }, { "epoch": 0.52, "grad_norm": 1.0833066701889038, "learning_rate": 4.843116409882933e-06, "loss": 0.4124, "step": 3493 }, { "epoch": 0.52, "grad_norm": 1.137689232826233, "learning_rate": 4.8406849233392415e-06, "loss": 0.5448, "step": 3494 }, { "epoch": 0.53, "grad_norm": 1.7022743225097656, "learning_rate": 4.838253474508895e-06, "loss": 0.446, "step": 3495 }, { "epoch": 0.53, "grad_norm": 7.440685272216797, "learning_rate": 4.835822063967468e-06, "loss": 0.4922, "step": 3496 }, { "epoch": 0.53, "grad_norm": 1.250968337059021, "learning_rate": 4.83339069229053e-06, "loss": 0.5865, "step": 3497 }, { "epoch": 0.53, "grad_norm": 1.3129407167434692, "learning_rate": 4.8309593600536375e-06, "loss": 0.4682, "step": 3498 }, { "epoch": 0.53, "grad_norm": 1.0191835165023804, "learning_rate": 4.828528067832342e-06, "loss": 0.4595, "step": 3499 }, { "epoch": 0.53, "grad_norm": 1.298872470855713, "learning_rate": 4.826096816202182e-06, "loss": 0.4747, "step": 3500 }, { "epoch": 0.53, "grad_norm": 1.017991542816162, "learning_rate": 4.823665605738688e-06, "loss": 0.498, "step": 3501 }, { "epoch": 0.53, "grad_norm": 1.1385610103607178, "learning_rate": 4.821234437017381e-06, "loss": 0.4656, "step": 3502 }, { "epoch": 0.53, "grad_norm": 1.5791891813278198, "learning_rate": 4.818803310613772e-06, "loss": 0.4766, "step": 3503 }, { "epoch": 0.53, "grad_norm": 1.116694450378418, "learning_rate": 4.8163722271033595e-06, "loss": 0.5312, "step": 3504 }, { "epoch": 0.53, "grad_norm": 1.7104437351226807, "learning_rate": 4.8139411870616346e-06, "loss": 0.4281, "step": 3505 }, { "epoch": 0.53, "grad_norm": 1.0868955850601196, "learning_rate": 4.811510191064078e-06, "loss": 0.4836, "step": 3506 }, { "epoch": 0.53, "grad_norm": 1.5014092922210693, "learning_rate": 4.80907923968616e-06, "loss": 0.4691, "step": 3507 }, { "epoch": 0.53, "grad_norm": 1.2242456674575806, "learning_rate": 4.806648333503338e-06, "loss": 0.7263, "step": 3508 }, { "epoch": 0.53, "grad_norm": 1.528248906135559, "learning_rate": 4.804217473091061e-06, "loss": 0.5835, "step": 3509 }, { "epoch": 0.53, "grad_norm": 1.250257134437561, "learning_rate": 4.801786659024766e-06, "loss": 0.4547, "step": 3510 }, { "epoch": 0.53, "grad_norm": 12.093595504760742, "learning_rate": 4.79935589187988e-06, "loss": 0.5058, "step": 3511 }, { "epoch": 0.53, "grad_norm": 1.1947084665298462, "learning_rate": 4.796925172231818e-06, "loss": 0.4814, "step": 3512 }, { "epoch": 0.53, "grad_norm": 1.2125682830810547, "learning_rate": 4.794494500655986e-06, "loss": 0.554, "step": 3513 }, { "epoch": 0.53, "grad_norm": 1.9836944341659546, "learning_rate": 4.792063877727775e-06, "loss": 0.5485, "step": 3514 }, { "epoch": 0.53, "grad_norm": 1.1509732007980347, "learning_rate": 4.789633304022567e-06, "loss": 0.4829, "step": 3515 }, { "epoch": 0.53, "grad_norm": 1.0168710947036743, "learning_rate": 4.787202780115732e-06, "loss": 0.4315, "step": 3516 }, { "epoch": 0.53, "grad_norm": 1.1699135303497314, "learning_rate": 4.784772306582627e-06, "loss": 0.5113, "step": 3517 }, { "epoch": 0.53, "grad_norm": 1.0404609441757202, "learning_rate": 4.782341883998598e-06, "loss": 0.4655, "step": 3518 }, { "epoch": 0.53, "grad_norm": 1.1448034048080444, "learning_rate": 4.779911512938981e-06, "loss": 0.4723, "step": 3519 }, { "epoch": 0.53, "grad_norm": 1.1361477375030518, "learning_rate": 4.777481193979094e-06, "loss": 0.4789, "step": 3520 }, { "epoch": 0.53, "grad_norm": 1.103965163230896, "learning_rate": 4.77505092769425e-06, "loss": 0.5015, "step": 3521 }, { "epoch": 0.53, "grad_norm": 1.3568943738937378, "learning_rate": 4.772620714659742e-06, "loss": 0.5504, "step": 3522 }, { "epoch": 0.53, "grad_norm": 1.628006935119629, "learning_rate": 4.770190555450858e-06, "loss": 0.5321, "step": 3523 }, { "epoch": 0.53, "grad_norm": 1.2272204160690308, "learning_rate": 4.767760450642867e-06, "loss": 0.519, "step": 3524 }, { "epoch": 0.53, "grad_norm": 1.2578778266906738, "learning_rate": 4.76533040081103e-06, "loss": 0.4652, "step": 3525 }, { "epoch": 0.53, "grad_norm": 1.1661291122436523, "learning_rate": 4.76290040653059e-06, "loss": 0.4954, "step": 3526 }, { "epoch": 0.53, "grad_norm": 1.0790332555770874, "learning_rate": 4.760470468376782e-06, "loss": 0.4759, "step": 3527 }, { "epoch": 0.53, "grad_norm": 1.2862943410873413, "learning_rate": 4.758040586924823e-06, "loss": 0.5321, "step": 3528 }, { "epoch": 0.53, "grad_norm": 1.0065627098083496, "learning_rate": 4.755610762749921e-06, "loss": 0.5121, "step": 3529 }, { "epoch": 0.53, "grad_norm": 1.3652070760726929, "learning_rate": 4.753180996427267e-06, "loss": 0.5096, "step": 3530 }, { "epoch": 0.53, "grad_norm": 1.1209098100662231, "learning_rate": 4.750751288532039e-06, "loss": 0.5013, "step": 3531 }, { "epoch": 0.53, "grad_norm": 1.5435248613357544, "learning_rate": 4.748321639639404e-06, "loss": 0.5767, "step": 3532 }, { "epoch": 0.53, "grad_norm": 1.4026020765304565, "learning_rate": 4.74589205032451e-06, "loss": 0.4585, "step": 3533 }, { "epoch": 0.53, "grad_norm": 1.7875721454620361, "learning_rate": 4.743462521162494e-06, "loss": 0.4789, "step": 3534 }, { "epoch": 0.53, "grad_norm": 1.506030559539795, "learning_rate": 4.741033052728477e-06, "loss": 0.562, "step": 3535 }, { "epoch": 0.53, "grad_norm": 1.0264211893081665, "learning_rate": 4.7386036455975735e-06, "loss": 0.5293, "step": 3536 }, { "epoch": 0.53, "grad_norm": 1.532416582107544, "learning_rate": 4.73617430034487e-06, "loss": 0.4417, "step": 3537 }, { "epoch": 0.53, "grad_norm": 1.4207884073257446, "learning_rate": 4.733745017545449e-06, "loss": 0.5148, "step": 3538 }, { "epoch": 0.53, "grad_norm": 1.014062523841858, "learning_rate": 4.731315797774373e-06, "loss": 0.4606, "step": 3539 }, { "epoch": 0.53, "grad_norm": 1.1288114786148071, "learning_rate": 4.728886641606692e-06, "loss": 0.4475, "step": 3540 }, { "epoch": 0.53, "grad_norm": 1.294737696647644, "learning_rate": 4.72645754961744e-06, "loss": 0.4583, "step": 3541 }, { "epoch": 0.53, "grad_norm": 1.1224690675735474, "learning_rate": 4.7240285223816365e-06, "loss": 0.4301, "step": 3542 }, { "epoch": 0.53, "grad_norm": 1.1659363508224487, "learning_rate": 4.721599560474283e-06, "loss": 0.5081, "step": 3543 }, { "epoch": 0.53, "grad_norm": 1.3095179796218872, "learning_rate": 4.719170664470371e-06, "loss": 0.4441, "step": 3544 }, { "epoch": 0.53, "grad_norm": 1.2829519510269165, "learning_rate": 4.716741834944869e-06, "loss": 0.5031, "step": 3545 }, { "epoch": 0.53, "grad_norm": 1.2787617444992065, "learning_rate": 4.714313072472734e-06, "loss": 0.4777, "step": 3546 }, { "epoch": 0.53, "grad_norm": 1.2246277332305908, "learning_rate": 4.711884377628911e-06, "loss": 0.4702, "step": 3547 }, { "epoch": 0.53, "grad_norm": 1.108328938484192, "learning_rate": 4.709455750988323e-06, "loss": 0.4661, "step": 3548 }, { "epoch": 0.53, "grad_norm": 1.0831904411315918, "learning_rate": 4.707027193125877e-06, "loss": 0.5022, "step": 3549 }, { "epoch": 0.53, "grad_norm": 1.270954966545105, "learning_rate": 4.704598704616468e-06, "loss": 0.4758, "step": 3550 }, { "epoch": 0.53, "grad_norm": 1.4035723209381104, "learning_rate": 4.70217028603497e-06, "loss": 0.4989, "step": 3551 }, { "epoch": 0.53, "grad_norm": 1.2531236410140991, "learning_rate": 4.6997419379562435e-06, "loss": 0.494, "step": 3552 }, { "epoch": 0.53, "grad_norm": 1.4511617422103882, "learning_rate": 4.69731366095513e-06, "loss": 0.5742, "step": 3553 }, { "epoch": 0.53, "grad_norm": 1.3694677352905273, "learning_rate": 4.694885455606459e-06, "loss": 0.5173, "step": 3554 }, { "epoch": 0.53, "grad_norm": 1.0637125968933105, "learning_rate": 4.692457322485035e-06, "loss": 0.4845, "step": 3555 }, { "epoch": 0.53, "grad_norm": 1.0653237104415894, "learning_rate": 4.690029262165654e-06, "loss": 0.4756, "step": 3556 }, { "epoch": 0.53, "grad_norm": 1.1557172536849976, "learning_rate": 4.687601275223085e-06, "loss": 0.4428, "step": 3557 }, { "epoch": 0.53, "grad_norm": 1.246232271194458, "learning_rate": 4.6851733622320915e-06, "loss": 0.5385, "step": 3558 }, { "epoch": 0.53, "grad_norm": 1.0658373832702637, "learning_rate": 4.682745523767411e-06, "loss": 0.458, "step": 3559 }, { "epoch": 0.53, "grad_norm": 1.1435796022415161, "learning_rate": 4.680317760403765e-06, "loss": 0.4492, "step": 3560 }, { "epoch": 0.53, "grad_norm": 1.1855359077453613, "learning_rate": 4.677890072715861e-06, "loss": 0.5416, "step": 3561 }, { "epoch": 0.54, "grad_norm": 1.6149908304214478, "learning_rate": 4.67546246127838e-06, "loss": 0.5788, "step": 3562 }, { "epoch": 0.54, "grad_norm": 1.02857506275177, "learning_rate": 4.673034926665994e-06, "loss": 0.4432, "step": 3563 }, { "epoch": 0.54, "grad_norm": 1.2041804790496826, "learning_rate": 4.670607469453353e-06, "loss": 0.4629, "step": 3564 }, { "epoch": 0.54, "grad_norm": 0.9935782551765442, "learning_rate": 4.668180090215088e-06, "loss": 0.4936, "step": 3565 }, { "epoch": 0.54, "grad_norm": 1.1399400234222412, "learning_rate": 4.665752789525813e-06, "loss": 0.481, "step": 3566 }, { "epoch": 0.54, "grad_norm": 1.0705718994140625, "learning_rate": 4.663325567960123e-06, "loss": 0.4093, "step": 3567 }, { "epoch": 0.54, "grad_norm": 3.2244465351104736, "learning_rate": 4.660898426092592e-06, "loss": 0.4861, "step": 3568 }, { "epoch": 0.54, "grad_norm": 1.0842188596725464, "learning_rate": 4.658471364497781e-06, "loss": 0.4313, "step": 3569 }, { "epoch": 0.54, "grad_norm": 1.1175960302352905, "learning_rate": 4.656044383750227e-06, "loss": 0.4992, "step": 3570 }, { "epoch": 0.54, "grad_norm": 1.2547987699508667, "learning_rate": 4.653617484424448e-06, "loss": 0.4793, "step": 3571 }, { "epoch": 0.54, "grad_norm": 1.268438458442688, "learning_rate": 4.6511906670949445e-06, "loss": 0.7493, "step": 3572 }, { "epoch": 0.54, "grad_norm": 2.419137477874756, "learning_rate": 4.648763932336199e-06, "loss": 0.5489, "step": 3573 }, { "epoch": 0.54, "grad_norm": 1.1346991062164307, "learning_rate": 4.646337280722668e-06, "loss": 0.5071, "step": 3574 }, { "epoch": 0.54, "grad_norm": 1.3695330619812012, "learning_rate": 4.643910712828796e-06, "loss": 0.5055, "step": 3575 }, { "epoch": 0.54, "grad_norm": 1.1132367849349976, "learning_rate": 4.641484229229003e-06, "loss": 0.4944, "step": 3576 }, { "epoch": 0.54, "grad_norm": 1.1831494569778442, "learning_rate": 4.639057830497691e-06, "loss": 0.4516, "step": 3577 }, { "epoch": 0.54, "grad_norm": 1.1174263954162598, "learning_rate": 4.6366315172092405e-06, "loss": 0.5122, "step": 3578 }, { "epoch": 0.54, "grad_norm": 0.9551548957824707, "learning_rate": 4.634205289938013e-06, "loss": 0.3948, "step": 3579 }, { "epoch": 0.54, "grad_norm": 1.0749961137771606, "learning_rate": 4.631779149258351e-06, "loss": 0.4772, "step": 3580 }, { "epoch": 0.54, "grad_norm": 1.3080700635910034, "learning_rate": 4.629353095744573e-06, "loss": 0.4447, "step": 3581 }, { "epoch": 0.54, "grad_norm": 1.1417372226715088, "learning_rate": 4.6269271299709794e-06, "loss": 0.4288, "step": 3582 }, { "epoch": 0.54, "grad_norm": 1.0118499994277954, "learning_rate": 4.624501252511848e-06, "loss": 0.524, "step": 3583 }, { "epoch": 0.54, "grad_norm": 1.8071208000183105, "learning_rate": 4.622075463941438e-06, "loss": 0.4893, "step": 3584 }, { "epoch": 0.54, "grad_norm": 1.1442811489105225, "learning_rate": 4.619649764833987e-06, "loss": 0.4961, "step": 3585 }, { "epoch": 0.54, "grad_norm": 1.847528338432312, "learning_rate": 4.617224155763709e-06, "loss": 0.502, "step": 3586 }, { "epoch": 0.54, "grad_norm": 1.2129961252212524, "learning_rate": 4.614798637304798e-06, "loss": 0.5204, "step": 3587 }, { "epoch": 0.54, "grad_norm": 1.4450534582138062, "learning_rate": 4.612373210031428e-06, "loss": 0.5544, "step": 3588 }, { "epoch": 0.54, "grad_norm": 1.640673279762268, "learning_rate": 4.609947874517751e-06, "loss": 0.4679, "step": 3589 }, { "epoch": 0.54, "grad_norm": 1.0793776512145996, "learning_rate": 4.607522631337895e-06, "loss": 0.5212, "step": 3590 }, { "epoch": 0.54, "grad_norm": 1.0915944576263428, "learning_rate": 4.6050974810659684e-06, "loss": 0.4187, "step": 3591 }, { "epoch": 0.54, "grad_norm": 1.300745964050293, "learning_rate": 4.602672424276059e-06, "loss": 0.5294, "step": 3592 }, { "epoch": 0.54, "grad_norm": 1.1799927949905396, "learning_rate": 4.600247461542228e-06, "loss": 0.4089, "step": 3593 }, { "epoch": 0.54, "grad_norm": 1.2411447763442993, "learning_rate": 4.597822593438519e-06, "loss": 0.5297, "step": 3594 }, { "epoch": 0.54, "grad_norm": 1.3749088048934937, "learning_rate": 4.595397820538949e-06, "loss": 0.5035, "step": 3595 }, { "epoch": 0.54, "grad_norm": 1.6185815334320068, "learning_rate": 4.592973143417515e-06, "loss": 0.4598, "step": 3596 }, { "epoch": 0.54, "grad_norm": 1.1664786338806152, "learning_rate": 4.59054856264819e-06, "loss": 0.4945, "step": 3597 }, { "epoch": 0.54, "grad_norm": 1.136676549911499, "learning_rate": 4.588124078804929e-06, "loss": 0.4799, "step": 3598 }, { "epoch": 0.54, "grad_norm": 1.2024986743927002, "learning_rate": 4.585699692461655e-06, "loss": 0.725, "step": 3599 }, { "epoch": 0.54, "grad_norm": 1.1182255744934082, "learning_rate": 4.5832754041922764e-06, "loss": 0.4639, "step": 3600 }, { "epoch": 0.54, "grad_norm": 1.210958480834961, "learning_rate": 4.580851214570672e-06, "loss": 0.7094, "step": 3601 }, { "epoch": 0.54, "grad_norm": 1.213483214378357, "learning_rate": 4.5784271241707015e-06, "loss": 0.7145, "step": 3602 }, { "epoch": 0.54, "grad_norm": 0.9992721080780029, "learning_rate": 4.576003133566202e-06, "loss": 0.5303, "step": 3603 }, { "epoch": 0.54, "grad_norm": 1.048439621925354, "learning_rate": 4.573579243330982e-06, "loss": 0.4146, "step": 3604 }, { "epoch": 0.54, "grad_norm": 1.1947957277297974, "learning_rate": 4.5711554540388305e-06, "loss": 0.535, "step": 3605 }, { "epoch": 0.54, "grad_norm": 1.3493037223815918, "learning_rate": 4.568731766263511e-06, "loss": 0.5535, "step": 3606 }, { "epoch": 0.54, "grad_norm": 1.739652395248413, "learning_rate": 4.566308180578763e-06, "loss": 0.5098, "step": 3607 }, { "epoch": 0.54, "grad_norm": 1.5355932712554932, "learning_rate": 4.563884697558301e-06, "loss": 0.5371, "step": 3608 }, { "epoch": 0.54, "grad_norm": 1.1120346784591675, "learning_rate": 4.561461317775817e-06, "loss": 0.5314, "step": 3609 }, { "epoch": 0.54, "grad_norm": 1.2004469633102417, "learning_rate": 4.559038041804979e-06, "loss": 0.5151, "step": 3610 }, { "epoch": 0.54, "grad_norm": 1.56168532371521, "learning_rate": 4.556614870219426e-06, "loss": 0.4356, "step": 3611 }, { "epoch": 0.54, "grad_norm": 1.262870192527771, "learning_rate": 4.554191803592778e-06, "loss": 0.5134, "step": 3612 }, { "epoch": 0.54, "grad_norm": 1.0542292594909668, "learning_rate": 4.551768842498623e-06, "loss": 0.4473, "step": 3613 }, { "epoch": 0.54, "grad_norm": 1.189273715019226, "learning_rate": 4.549345987510537e-06, "loss": 0.4702, "step": 3614 }, { "epoch": 0.54, "grad_norm": 1.4278892278671265, "learning_rate": 4.546923239202057e-06, "loss": 0.521, "step": 3615 }, { "epoch": 0.54, "grad_norm": 1.0720741748809814, "learning_rate": 4.5445005981467e-06, "loss": 0.4754, "step": 3616 }, { "epoch": 0.54, "grad_norm": 1.127603530883789, "learning_rate": 4.542078064917959e-06, "loss": 0.5522, "step": 3617 }, { "epoch": 0.54, "grad_norm": 1.3337557315826416, "learning_rate": 4.539655640089301e-06, "loss": 0.4835, "step": 3618 }, { "epoch": 0.54, "grad_norm": 1.2786260843276978, "learning_rate": 4.5372333242341645e-06, "loss": 0.4767, "step": 3619 }, { "epoch": 0.54, "grad_norm": 0.98737633228302, "learning_rate": 4.534811117925967e-06, "loss": 0.3938, "step": 3620 }, { "epoch": 0.54, "grad_norm": 1.1462465524673462, "learning_rate": 4.532389021738095e-06, "loss": 0.5035, "step": 3621 }, { "epoch": 0.54, "grad_norm": 1.2107495069503784, "learning_rate": 4.529967036243914e-06, "loss": 0.4561, "step": 3622 }, { "epoch": 0.54, "grad_norm": 1.6230634450912476, "learning_rate": 4.527545162016758e-06, "loss": 0.5575, "step": 3623 }, { "epoch": 0.54, "grad_norm": 1.2815349102020264, "learning_rate": 4.525123399629936e-06, "loss": 0.4834, "step": 3624 }, { "epoch": 0.54, "grad_norm": 1.1893008947372437, "learning_rate": 4.522701749656736e-06, "loss": 0.4216, "step": 3625 }, { "epoch": 0.54, "grad_norm": 5.893732070922852, "learning_rate": 4.520280212670414e-06, "loss": 0.4792, "step": 3626 }, { "epoch": 0.54, "grad_norm": 1.1114970445632935, "learning_rate": 4.517858789244202e-06, "loss": 0.5512, "step": 3627 }, { "epoch": 0.54, "grad_norm": 1.2407941818237305, "learning_rate": 4.515437479951299e-06, "loss": 0.4854, "step": 3628 }, { "epoch": 0.55, "grad_norm": 1.1229435205459595, "learning_rate": 4.513016285364884e-06, "loss": 0.4757, "step": 3629 }, { "epoch": 0.55, "grad_norm": 1.135744571685791, "learning_rate": 4.510595206058107e-06, "loss": 0.447, "step": 3630 }, { "epoch": 0.55, "grad_norm": 1.033900499343872, "learning_rate": 4.508174242604089e-06, "loss": 0.5266, "step": 3631 }, { "epoch": 0.55, "grad_norm": 1.4774315357208252, "learning_rate": 4.505753395575926e-06, "loss": 0.4592, "step": 3632 }, { "epoch": 0.55, "grad_norm": 0.9841536283493042, "learning_rate": 4.503332665546684e-06, "loss": 0.4002, "step": 3633 }, { "epoch": 0.55, "grad_norm": 1.0459158420562744, "learning_rate": 4.500912053089404e-06, "loss": 0.4481, "step": 3634 }, { "epoch": 0.55, "grad_norm": 1.2268215417861938, "learning_rate": 4.498491558777094e-06, "loss": 0.521, "step": 3635 }, { "epoch": 0.55, "grad_norm": 1.0198802947998047, "learning_rate": 4.496071183182742e-06, "loss": 0.4364, "step": 3636 }, { "epoch": 0.55, "grad_norm": 1.6091887950897217, "learning_rate": 4.4936509268793014e-06, "loss": 0.3938, "step": 3637 }, { "epoch": 0.55, "grad_norm": 1.0250533819198608, "learning_rate": 4.4912307904397e-06, "loss": 0.4909, "step": 3638 }, { "epoch": 0.55, "grad_norm": 1.136307954788208, "learning_rate": 4.488810774436838e-06, "loss": 0.5282, "step": 3639 }, { "epoch": 0.55, "grad_norm": 1.0428262948989868, "learning_rate": 4.486390879443583e-06, "loss": 0.4659, "step": 3640 }, { "epoch": 0.55, "grad_norm": 1.4315192699432373, "learning_rate": 4.483971106032778e-06, "loss": 0.5348, "step": 3641 }, { "epoch": 0.55, "grad_norm": 1.0431160926818848, "learning_rate": 4.4815514547772365e-06, "loss": 0.441, "step": 3642 }, { "epoch": 0.55, "grad_norm": 1.188273549079895, "learning_rate": 4.479131926249743e-06, "loss": 0.4603, "step": 3643 }, { "epoch": 0.55, "grad_norm": 1.6332486867904663, "learning_rate": 4.47671252102305e-06, "loss": 0.5581, "step": 3644 }, { "epoch": 0.55, "grad_norm": 1.320725917816162, "learning_rate": 4.474293239669886e-06, "loss": 0.499, "step": 3645 }, { "epoch": 0.55, "grad_norm": 1.222602128982544, "learning_rate": 4.4718740827629456e-06, "loss": 0.4477, "step": 3646 }, { "epoch": 0.55, "grad_norm": 1.8398605585098267, "learning_rate": 4.469455050874898e-06, "loss": 0.5221, "step": 3647 }, { "epoch": 0.55, "grad_norm": 1.1197032928466797, "learning_rate": 4.467036144578381e-06, "loss": 0.5172, "step": 3648 }, { "epoch": 0.55, "grad_norm": 1.119875431060791, "learning_rate": 4.464617364445999e-06, "loss": 0.5096, "step": 3649 }, { "epoch": 0.55, "grad_norm": 1.0345312356948853, "learning_rate": 4.462198711050333e-06, "loss": 0.4448, "step": 3650 }, { "epoch": 0.55, "grad_norm": 0.9011099934577942, "learning_rate": 4.45978018496393e-06, "loss": 0.424, "step": 3651 }, { "epoch": 0.55, "grad_norm": 1.2938312292099, "learning_rate": 4.457361786759306e-06, "loss": 0.5596, "step": 3652 }, { "epoch": 0.55, "grad_norm": 1.6118963956832886, "learning_rate": 4.45494351700895e-06, "loss": 0.5091, "step": 3653 }, { "epoch": 0.55, "grad_norm": 0.9616488218307495, "learning_rate": 4.452525376285319e-06, "loss": 0.4448, "step": 3654 }, { "epoch": 0.55, "grad_norm": 0.9920531511306763, "learning_rate": 4.450107365160839e-06, "loss": 0.4472, "step": 3655 }, { "epoch": 0.55, "grad_norm": 1.2984455823898315, "learning_rate": 4.447689484207907e-06, "loss": 0.4653, "step": 3656 }, { "epoch": 0.55, "grad_norm": 1.3160547018051147, "learning_rate": 4.445271733998886e-06, "loss": 0.5561, "step": 3657 }, { "epoch": 0.55, "grad_norm": 1.0955770015716553, "learning_rate": 4.442854115106112e-06, "loss": 0.4497, "step": 3658 }, { "epoch": 0.55, "grad_norm": 1.0406646728515625, "learning_rate": 4.440436628101887e-06, "loss": 0.4045, "step": 3659 }, { "epoch": 0.55, "grad_norm": 1.1905605792999268, "learning_rate": 4.438019273558484e-06, "loss": 0.5073, "step": 3660 }, { "epoch": 0.55, "grad_norm": 0.8000094294548035, "learning_rate": 4.435602052048142e-06, "loss": 0.4572, "step": 3661 }, { "epoch": 0.55, "grad_norm": 1.4513583183288574, "learning_rate": 4.43318496414307e-06, "loss": 0.5863, "step": 3662 }, { "epoch": 0.55, "grad_norm": 1.1681630611419678, "learning_rate": 4.4307680104154466e-06, "loss": 0.5318, "step": 3663 }, { "epoch": 0.55, "grad_norm": 1.3543179035186768, "learning_rate": 4.428351191437416e-06, "loss": 0.5359, "step": 3664 }, { "epoch": 0.55, "grad_norm": 2.1085519790649414, "learning_rate": 4.425934507781091e-06, "loss": 0.5284, "step": 3665 }, { "epoch": 0.55, "grad_norm": 1.148541808128357, "learning_rate": 4.423517960018555e-06, "loss": 0.7266, "step": 3666 }, { "epoch": 0.55, "grad_norm": 1.1677597761154175, "learning_rate": 4.421101548721857e-06, "loss": 0.4773, "step": 3667 }, { "epoch": 0.55, "grad_norm": 1.5128037929534912, "learning_rate": 4.418685274463012e-06, "loss": 0.5059, "step": 3668 }, { "epoch": 0.55, "grad_norm": 1.0811731815338135, "learning_rate": 4.4162691378140085e-06, "loss": 0.4853, "step": 3669 }, { "epoch": 0.55, "grad_norm": 1.2893410921096802, "learning_rate": 4.413853139346796e-06, "loss": 0.436, "step": 3670 }, { "epoch": 0.55, "grad_norm": 1.9406447410583496, "learning_rate": 4.411437279633295e-06, "loss": 0.4431, "step": 3671 }, { "epoch": 0.55, "grad_norm": 1.1295605897903442, "learning_rate": 4.409021559245391e-06, "loss": 0.5055, "step": 3672 }, { "epoch": 0.55, "grad_norm": 1.1899131536483765, "learning_rate": 4.406605978754939e-06, "loss": 0.4408, "step": 3673 }, { "epoch": 0.55, "grad_norm": 1.213832139968872, "learning_rate": 4.404190538733758e-06, "loss": 0.5337, "step": 3674 }, { "epoch": 0.55, "grad_norm": 1.1128865480422974, "learning_rate": 4.401775239753635e-06, "loss": 0.4279, "step": 3675 }, { "epoch": 0.55, "grad_norm": 1.7852513790130615, "learning_rate": 4.399360082386326e-06, "loss": 0.4903, "step": 3676 }, { "epoch": 0.55, "grad_norm": 1.1025017499923706, "learning_rate": 4.39694506720355e-06, "loss": 0.5044, "step": 3677 }, { "epoch": 0.55, "grad_norm": 1.1413848400115967, "learning_rate": 4.394530194776992e-06, "loss": 0.4312, "step": 3678 }, { "epoch": 0.55, "grad_norm": 1.0688077211380005, "learning_rate": 4.392115465678305e-06, "loss": 0.4692, "step": 3679 }, { "epoch": 0.55, "grad_norm": 1.1997781991958618, "learning_rate": 4.389700880479112e-06, "loss": 0.7489, "step": 3680 }, { "epoch": 0.55, "grad_norm": 1.283198356628418, "learning_rate": 4.387286439750994e-06, "loss": 0.4556, "step": 3681 }, { "epoch": 0.55, "grad_norm": 1.0923196077346802, "learning_rate": 4.384872144065503e-06, "loss": 0.5612, "step": 3682 }, { "epoch": 0.55, "grad_norm": 1.455020546913147, "learning_rate": 4.3824579939941545e-06, "loss": 0.5287, "step": 3683 }, { "epoch": 0.55, "grad_norm": 1.1079142093658447, "learning_rate": 4.3800439901084305e-06, "loss": 0.4673, "step": 3684 }, { "epoch": 0.55, "grad_norm": 1.433843731880188, "learning_rate": 4.377630132979779e-06, "loss": 0.5465, "step": 3685 }, { "epoch": 0.55, "grad_norm": 1.5118646621704102, "learning_rate": 4.375216423179612e-06, "loss": 0.5298, "step": 3686 }, { "epoch": 0.55, "grad_norm": 1.15177583694458, "learning_rate": 4.372802861279307e-06, "loss": 0.5211, "step": 3687 }, { "epoch": 0.55, "grad_norm": 1.577227234840393, "learning_rate": 4.370389447850208e-06, "loss": 0.4709, "step": 3688 }, { "epoch": 0.55, "grad_norm": 1.0096338987350464, "learning_rate": 4.36797618346362e-06, "loss": 0.4617, "step": 3689 }, { "epoch": 0.55, "grad_norm": 1.547852873802185, "learning_rate": 4.365563068690816e-06, "loss": 0.4767, "step": 3690 }, { "epoch": 0.55, "grad_norm": 1.195367693901062, "learning_rate": 4.363150104103031e-06, "loss": 0.3843, "step": 3691 }, { "epoch": 0.55, "grad_norm": 1.3660132884979248, "learning_rate": 4.3607372902714715e-06, "loss": 0.4216, "step": 3692 }, { "epoch": 0.55, "grad_norm": 1.2622636556625366, "learning_rate": 4.358324627767298e-06, "loss": 0.507, "step": 3693 }, { "epoch": 0.55, "grad_norm": 1.0991339683532715, "learning_rate": 4.355912117161643e-06, "loss": 0.4848, "step": 3694 }, { "epoch": 0.56, "grad_norm": 1.0179349184036255, "learning_rate": 4.3534997590256e-06, "loss": 0.4723, "step": 3695 }, { "epoch": 0.56, "grad_norm": 1.2452044486999512, "learning_rate": 4.351087553930224e-06, "loss": 0.5091, "step": 3696 }, { "epoch": 0.56, "grad_norm": 1.5781954526901245, "learning_rate": 4.3486755024465395e-06, "loss": 0.4452, "step": 3697 }, { "epoch": 0.56, "grad_norm": 1.4991416931152344, "learning_rate": 4.346263605145531e-06, "loss": 0.5019, "step": 3698 }, { "epoch": 0.56, "grad_norm": 1.1843693256378174, "learning_rate": 4.343851862598144e-06, "loss": 0.4739, "step": 3699 }, { "epoch": 0.56, "grad_norm": 1.182368516921997, "learning_rate": 4.341440275375296e-06, "loss": 0.4991, "step": 3700 }, { "epoch": 0.56, "grad_norm": 1.6146924495697021, "learning_rate": 4.339028844047858e-06, "loss": 0.5267, "step": 3701 }, { "epoch": 0.56, "grad_norm": 1.739141821861267, "learning_rate": 4.3366175691866655e-06, "loss": 0.5604, "step": 3702 }, { "epoch": 0.56, "grad_norm": 1.3058322668075562, "learning_rate": 4.334206451362526e-06, "loss": 0.5607, "step": 3703 }, { "epoch": 0.56, "grad_norm": 1.1822038888931274, "learning_rate": 4.331795491146201e-06, "loss": 0.7124, "step": 3704 }, { "epoch": 0.56, "grad_norm": 1.2677783966064453, "learning_rate": 4.329384689108419e-06, "loss": 0.493, "step": 3705 }, { "epoch": 0.56, "grad_norm": 0.9963549375534058, "learning_rate": 4.326974045819865e-06, "loss": 0.4845, "step": 3706 }, { "epoch": 0.56, "grad_norm": 1.3612446784973145, "learning_rate": 4.324563561851195e-06, "loss": 0.4544, "step": 3707 }, { "epoch": 0.56, "grad_norm": 1.2747682332992554, "learning_rate": 4.322153237773019e-06, "loss": 0.5111, "step": 3708 }, { "epoch": 0.56, "grad_norm": 1.0786359310150146, "learning_rate": 4.319743074155916e-06, "loss": 0.4744, "step": 3709 }, { "epoch": 0.56, "grad_norm": 1.7064590454101562, "learning_rate": 4.3173330715704245e-06, "loss": 0.5379, "step": 3710 }, { "epoch": 0.56, "grad_norm": 1.129787564277649, "learning_rate": 4.314923230587043e-06, "loss": 0.5786, "step": 3711 }, { "epoch": 0.56, "grad_norm": 1.3340507745742798, "learning_rate": 4.312513551776233e-06, "loss": 0.5401, "step": 3712 }, { "epoch": 0.56, "grad_norm": 1.280794620513916, "learning_rate": 4.310104035708418e-06, "loss": 0.4322, "step": 3713 }, { "epoch": 0.56, "grad_norm": 1.0990432500839233, "learning_rate": 4.307694682953984e-06, "loss": 0.381, "step": 3714 }, { "epoch": 0.56, "grad_norm": 1.4362716674804688, "learning_rate": 4.305285494083277e-06, "loss": 0.4475, "step": 3715 }, { "epoch": 0.56, "grad_norm": 1.1383593082427979, "learning_rate": 4.302876469666604e-06, "loss": 0.4699, "step": 3716 }, { "epoch": 0.56, "grad_norm": 1.2737904787063599, "learning_rate": 4.300467610274234e-06, "loss": 0.4901, "step": 3717 }, { "epoch": 0.56, "grad_norm": 1.3057793378829956, "learning_rate": 4.298058916476397e-06, "loss": 0.5408, "step": 3718 }, { "epoch": 0.56, "grad_norm": 1.175628900527954, "learning_rate": 4.29565038884328e-06, "loss": 0.4824, "step": 3719 }, { "epoch": 0.56, "grad_norm": 1.1385914087295532, "learning_rate": 4.293242027945036e-06, "loss": 0.4196, "step": 3720 }, { "epoch": 0.56, "grad_norm": 1.1534909009933472, "learning_rate": 4.290833834351778e-06, "loss": 0.4306, "step": 3721 }, { "epoch": 0.56, "grad_norm": 1.3732469081878662, "learning_rate": 4.2884258086335755e-06, "loss": 0.4866, "step": 3722 }, { "epoch": 0.56, "grad_norm": 4.449821472167969, "learning_rate": 4.286017951360461e-06, "loss": 0.4501, "step": 3723 }, { "epoch": 0.56, "grad_norm": 1.3180469274520874, "learning_rate": 4.283610263102427e-06, "loss": 0.448, "step": 3724 }, { "epoch": 0.56, "grad_norm": 1.6037380695343018, "learning_rate": 4.281202744429426e-06, "loss": 0.4724, "step": 3725 }, { "epoch": 0.56, "grad_norm": 1.27145516872406, "learning_rate": 4.278795395911369e-06, "loss": 0.4445, "step": 3726 }, { "epoch": 0.56, "grad_norm": 1.290566325187683, "learning_rate": 4.276388218118129e-06, "loss": 0.4416, "step": 3727 }, { "epoch": 0.56, "grad_norm": 1.027188777923584, "learning_rate": 4.2739812116195375e-06, "loss": 0.7004, "step": 3728 }, { "epoch": 0.56, "grad_norm": 1.2497864961624146, "learning_rate": 4.271574376985386e-06, "loss": 0.457, "step": 3729 }, { "epoch": 0.56, "grad_norm": 1.2710908651351929, "learning_rate": 4.269167714785421e-06, "loss": 0.454, "step": 3730 }, { "epoch": 0.56, "grad_norm": 1.293735384941101, "learning_rate": 4.266761225589354e-06, "loss": 0.4252, "step": 3731 }, { "epoch": 0.56, "grad_norm": 1.2464838027954102, "learning_rate": 4.264354909966854e-06, "loss": 0.7505, "step": 3732 }, { "epoch": 0.56, "grad_norm": 1.3137589693069458, "learning_rate": 4.261948768487547e-06, "loss": 0.7461, "step": 3733 }, { "epoch": 0.56, "grad_norm": 1.3090187311172485, "learning_rate": 4.259542801721021e-06, "loss": 0.5008, "step": 3734 }, { "epoch": 0.56, "grad_norm": 1.149020791053772, "learning_rate": 4.2571370102368165e-06, "loss": 0.4693, "step": 3735 }, { "epoch": 0.56, "grad_norm": 1.0610713958740234, "learning_rate": 4.254731394604441e-06, "loss": 0.4691, "step": 3736 }, { "epoch": 0.56, "grad_norm": 1.2423663139343262, "learning_rate": 4.252325955393355e-06, "loss": 0.4404, "step": 3737 }, { "epoch": 0.56, "grad_norm": 1.0023943185806274, "learning_rate": 4.249920693172978e-06, "loss": 0.4745, "step": 3738 }, { "epoch": 0.56, "grad_norm": 1.2744433879852295, "learning_rate": 4.2475156085126865e-06, "loss": 0.3994, "step": 3739 }, { "epoch": 0.56, "grad_norm": 1.4508423805236816, "learning_rate": 4.245110701981818e-06, "loss": 0.57, "step": 3740 }, { "epoch": 0.56, "grad_norm": 1.3708081245422363, "learning_rate": 4.242705974149666e-06, "loss": 0.4709, "step": 3741 }, { "epoch": 0.56, "grad_norm": 1.1977401971817017, "learning_rate": 4.240301425585483e-06, "loss": 0.7371, "step": 3742 }, { "epoch": 0.56, "grad_norm": 1.0612579584121704, "learning_rate": 4.237897056858475e-06, "loss": 0.5102, "step": 3743 }, { "epoch": 0.56, "grad_norm": 1.415611982345581, "learning_rate": 4.23549286853781e-06, "loss": 0.4894, "step": 3744 }, { "epoch": 0.56, "grad_norm": 1.3083995580673218, "learning_rate": 4.233088861192611e-06, "loss": 0.4616, "step": 3745 }, { "epoch": 0.56, "grad_norm": 1.3276913166046143, "learning_rate": 4.2306850353919594e-06, "loss": 0.535, "step": 3746 }, { "epoch": 0.56, "grad_norm": 1.437239408493042, "learning_rate": 4.228281391704894e-06, "loss": 0.4756, "step": 3747 }, { "epoch": 0.56, "grad_norm": 1.1742632389068604, "learning_rate": 4.225877930700408e-06, "loss": 0.4667, "step": 3748 }, { "epoch": 0.56, "grad_norm": 2.050743818283081, "learning_rate": 4.223474652947453e-06, "loss": 0.518, "step": 3749 }, { "epoch": 0.56, "grad_norm": 1.4384630918502808, "learning_rate": 4.221071559014939e-06, "loss": 0.4914, "step": 3750 }, { "epoch": 0.56, "grad_norm": 2.496979236602783, "learning_rate": 4.218668649471728e-06, "loss": 0.4825, "step": 3751 }, { "epoch": 0.56, "grad_norm": 1.224778652191162, "learning_rate": 4.216265924886642e-06, "loss": 0.53, "step": 3752 }, { "epoch": 0.56, "grad_norm": 1.179530382156372, "learning_rate": 4.213863385828459e-06, "loss": 0.5163, "step": 3753 }, { "epoch": 0.56, "grad_norm": 1.0671130418777466, "learning_rate": 4.211461032865914e-06, "loss": 0.4409, "step": 3754 }, { "epoch": 0.56, "grad_norm": 1.2371846437454224, "learning_rate": 4.2090588665676905e-06, "loss": 0.5597, "step": 3755 }, { "epoch": 0.56, "grad_norm": 1.282738447189331, "learning_rate": 4.206656887502437e-06, "loss": 0.4713, "step": 3756 }, { "epoch": 0.56, "grad_norm": 1.3211393356323242, "learning_rate": 4.204255096238753e-06, "loss": 0.5242, "step": 3757 }, { "epoch": 0.56, "grad_norm": 1.1330759525299072, "learning_rate": 4.201853493345198e-06, "loss": 0.4711, "step": 3758 }, { "epoch": 0.56, "grad_norm": 1.2359209060668945, "learning_rate": 4.199452079390281e-06, "loss": 0.4796, "step": 3759 }, { "epoch": 0.56, "grad_norm": 2.097174882888794, "learning_rate": 4.197050854942469e-06, "loss": 0.514, "step": 3760 }, { "epoch": 0.56, "grad_norm": 1.626908540725708, "learning_rate": 4.194649820570184e-06, "loss": 0.3924, "step": 3761 }, { "epoch": 0.57, "grad_norm": 1.7967915534973145, "learning_rate": 4.192248976841804e-06, "loss": 0.4839, "step": 3762 }, { "epoch": 0.57, "grad_norm": 1.0979981422424316, "learning_rate": 4.18984832432566e-06, "loss": 0.4343, "step": 3763 }, { "epoch": 0.57, "grad_norm": 2.311352252960205, "learning_rate": 4.187447863590039e-06, "loss": 0.4615, "step": 3764 }, { "epoch": 0.57, "grad_norm": 1.2724932432174683, "learning_rate": 4.185047595203183e-06, "loss": 0.488, "step": 3765 }, { "epoch": 0.57, "grad_norm": 1.527817726135254, "learning_rate": 4.182647519733286e-06, "loss": 0.4974, "step": 3766 }, { "epoch": 0.57, "grad_norm": 0.9097291231155396, "learning_rate": 4.1802476377484994e-06, "loss": 0.4547, "step": 3767 }, { "epoch": 0.57, "grad_norm": 1.1336126327514648, "learning_rate": 4.177847949816927e-06, "loss": 0.5074, "step": 3768 }, { "epoch": 0.57, "grad_norm": 1.7430492639541626, "learning_rate": 4.175448456506626e-06, "loss": 0.5746, "step": 3769 }, { "epoch": 0.57, "grad_norm": 1.1747572422027588, "learning_rate": 4.17304915838561e-06, "loss": 0.4614, "step": 3770 }, { "epoch": 0.57, "grad_norm": 1.1794378757476807, "learning_rate": 4.170650056021847e-06, "loss": 0.7305, "step": 3771 }, { "epoch": 0.57, "grad_norm": 1.5288079977035522, "learning_rate": 4.168251149983255e-06, "loss": 0.5947, "step": 3772 }, { "epoch": 0.57, "grad_norm": 1.1395277976989746, "learning_rate": 4.165852440837705e-06, "loss": 0.4103, "step": 3773 }, { "epoch": 0.57, "grad_norm": 1.1309075355529785, "learning_rate": 4.163453929153026e-06, "loss": 0.5938, "step": 3774 }, { "epoch": 0.57, "grad_norm": 1.291271448135376, "learning_rate": 4.161055615496999e-06, "loss": 0.5206, "step": 3775 }, { "epoch": 0.57, "grad_norm": 1.3064205646514893, "learning_rate": 4.158657500437353e-06, "loss": 0.5336, "step": 3776 }, { "epoch": 0.57, "grad_norm": 1.4791362285614014, "learning_rate": 4.156259584541778e-06, "loss": 0.5406, "step": 3777 }, { "epoch": 0.57, "grad_norm": 1.0422056913375854, "learning_rate": 4.153861868377912e-06, "loss": 0.4539, "step": 3778 }, { "epoch": 0.57, "grad_norm": 0.9899629354476929, "learning_rate": 4.151464352513344e-06, "loss": 0.4819, "step": 3779 }, { "epoch": 0.57, "grad_norm": 1.2186529636383057, "learning_rate": 4.149067037515618e-06, "loss": 0.4604, "step": 3780 }, { "epoch": 0.57, "grad_norm": 1.1050870418548584, "learning_rate": 4.146669923952236e-06, "loss": 0.5351, "step": 3781 }, { "epoch": 0.57, "grad_norm": 1.4484353065490723, "learning_rate": 4.1442730123906425e-06, "loss": 0.4729, "step": 3782 }, { "epoch": 0.57, "grad_norm": 1.2669978141784668, "learning_rate": 4.141876303398241e-06, "loss": 0.5135, "step": 3783 }, { "epoch": 0.57, "grad_norm": 1.1426337957382202, "learning_rate": 4.1394797975423825e-06, "loss": 0.4736, "step": 3784 }, { "epoch": 0.57, "grad_norm": 1.1588364839553833, "learning_rate": 4.137083495390372e-06, "loss": 0.488, "step": 3785 }, { "epoch": 0.57, "grad_norm": 1.1204323768615723, "learning_rate": 4.134687397509468e-06, "loss": 0.4408, "step": 3786 }, { "epoch": 0.57, "grad_norm": 1.4077496528625488, "learning_rate": 4.132291504466878e-06, "loss": 0.4709, "step": 3787 }, { "epoch": 0.57, "grad_norm": 1.1349586248397827, "learning_rate": 4.129895816829761e-06, "loss": 0.5364, "step": 3788 }, { "epoch": 0.57, "grad_norm": 1.1601486206054688, "learning_rate": 4.127500335165231e-06, "loss": 0.4267, "step": 3789 }, { "epoch": 0.57, "grad_norm": 1.3625330924987793, "learning_rate": 4.125105060040348e-06, "loss": 0.3917, "step": 3790 }, { "epoch": 0.57, "grad_norm": 1.7813637256622314, "learning_rate": 4.122709992022127e-06, "loss": 0.5249, "step": 3791 }, { "epoch": 0.57, "grad_norm": 1.164196491241455, "learning_rate": 4.1203151316775324e-06, "loss": 0.4869, "step": 3792 }, { "epoch": 0.57, "grad_norm": 0.9772469401359558, "learning_rate": 4.1179204795734805e-06, "loss": 0.4176, "step": 3793 }, { "epoch": 0.57, "grad_norm": 1.3416606187820435, "learning_rate": 4.1155260362768364e-06, "loss": 0.549, "step": 3794 }, { "epoch": 0.57, "grad_norm": 1.3768384456634521, "learning_rate": 4.11313180235442e-06, "loss": 0.5217, "step": 3795 }, { "epoch": 0.57, "grad_norm": 1.0291115045547485, "learning_rate": 4.110737778372993e-06, "loss": 0.5177, "step": 3796 }, { "epoch": 0.57, "grad_norm": 1.1956169605255127, "learning_rate": 4.108343964899277e-06, "loss": 0.7549, "step": 3797 }, { "epoch": 0.57, "grad_norm": 1.2919418811798096, "learning_rate": 4.105950362499939e-06, "loss": 0.5256, "step": 3798 }, { "epoch": 0.57, "grad_norm": 1.2070749998092651, "learning_rate": 4.103556971741596e-06, "loss": 0.5089, "step": 3799 }, { "epoch": 0.57, "grad_norm": 1.095445156097412, "learning_rate": 4.101163793190815e-06, "loss": 0.5483, "step": 3800 }, { "epoch": 0.57, "grad_norm": 1.1706151962280273, "learning_rate": 4.098770827414116e-06, "loss": 0.474, "step": 3801 }, { "epoch": 0.57, "grad_norm": 1.2283607721328735, "learning_rate": 4.096378074977962e-06, "loss": 0.4645, "step": 3802 }, { "epoch": 0.57, "grad_norm": 1.4041193723678589, "learning_rate": 4.093985536448774e-06, "loss": 0.4927, "step": 3803 }, { "epoch": 0.57, "grad_norm": 1.5160404443740845, "learning_rate": 4.091593212392916e-06, "loss": 0.4446, "step": 3804 }, { "epoch": 0.57, "grad_norm": 1.8295387029647827, "learning_rate": 4.0892011033767035e-06, "loss": 0.4743, "step": 3805 }, { "epoch": 0.57, "grad_norm": 1.3235379457473755, "learning_rate": 4.086809209966399e-06, "loss": 0.5237, "step": 3806 }, { "epoch": 0.57, "grad_norm": 1.2407703399658203, "learning_rate": 4.0844175327282195e-06, "loss": 0.4945, "step": 3807 }, { "epoch": 0.57, "grad_norm": 2.3160972595214844, "learning_rate": 4.0820260722283226e-06, "loss": 0.5041, "step": 3808 }, { "epoch": 0.57, "grad_norm": 1.1638071537017822, "learning_rate": 4.079634829032821e-06, "loss": 0.5415, "step": 3809 }, { "epoch": 0.57, "grad_norm": 1.4323960542678833, "learning_rate": 4.077243803707774e-06, "loss": 0.5239, "step": 3810 }, { "epoch": 0.57, "grad_norm": 2.7615249156951904, "learning_rate": 4.07485299681919e-06, "loss": 0.5304, "step": 3811 }, { "epoch": 0.57, "grad_norm": 1.0409661531448364, "learning_rate": 4.072462408933023e-06, "loss": 0.3707, "step": 3812 }, { "epoch": 0.57, "grad_norm": 1.2795569896697998, "learning_rate": 4.0700720406151784e-06, "loss": 0.4627, "step": 3813 }, { "epoch": 0.57, "grad_norm": 1.1956254243850708, "learning_rate": 4.067681892431509e-06, "loss": 0.5335, "step": 3814 }, { "epoch": 0.57, "grad_norm": 1.3183609247207642, "learning_rate": 4.065291964947814e-06, "loss": 0.532, "step": 3815 }, { "epoch": 0.57, "grad_norm": 1.2110507488250732, "learning_rate": 4.062902258729842e-06, "loss": 0.4625, "step": 3816 }, { "epoch": 0.57, "grad_norm": 1.1990821361541748, "learning_rate": 4.060512774343287e-06, "loss": 0.526, "step": 3817 }, { "epoch": 0.57, "grad_norm": 1.0676536560058594, "learning_rate": 4.058123512353793e-06, "loss": 0.5655, "step": 3818 }, { "epoch": 0.57, "grad_norm": 1.0226037502288818, "learning_rate": 4.0557344733269505e-06, "loss": 0.4839, "step": 3819 }, { "epoch": 0.57, "grad_norm": 1.0306028127670288, "learning_rate": 4.053345657828298e-06, "loss": 0.4936, "step": 3820 }, { "epoch": 0.57, "grad_norm": 1.1601213216781616, "learning_rate": 4.050957066423316e-06, "loss": 0.499, "step": 3821 }, { "epoch": 0.57, "grad_norm": 1.028503656387329, "learning_rate": 4.048568699677441e-06, "loss": 0.4125, "step": 3822 }, { "epoch": 0.57, "grad_norm": 1.154378890991211, "learning_rate": 4.0461805581560475e-06, "loss": 0.4909, "step": 3823 }, { "epoch": 0.57, "grad_norm": 1.630055546760559, "learning_rate": 4.043792642424461e-06, "loss": 0.4678, "step": 3824 }, { "epoch": 0.57, "grad_norm": 1.1997991800308228, "learning_rate": 4.041404953047957e-06, "loss": 0.4327, "step": 3825 }, { "epoch": 0.57, "grad_norm": 1.4280496835708618, "learning_rate": 4.0390174905917486e-06, "loss": 0.4758, "step": 3826 }, { "epoch": 0.57, "grad_norm": 1.3068969249725342, "learning_rate": 4.036630255621004e-06, "loss": 0.552, "step": 3827 }, { "epoch": 0.58, "grad_norm": 1.2542644739151, "learning_rate": 4.0342432487008304e-06, "loss": 0.4774, "step": 3828 }, { "epoch": 0.58, "grad_norm": 1.1051125526428223, "learning_rate": 4.031856470396286e-06, "loss": 0.747, "step": 3829 }, { "epoch": 0.58, "grad_norm": 1.0640491247177124, "learning_rate": 4.029469921272373e-06, "loss": 0.4634, "step": 3830 }, { "epoch": 0.58, "grad_norm": 1.2452778816223145, "learning_rate": 4.02708360189404e-06, "loss": 0.4644, "step": 3831 }, { "epoch": 0.58, "grad_norm": 1.2548110485076904, "learning_rate": 4.02469751282618e-06, "loss": 0.4659, "step": 3832 }, { "epoch": 0.58, "grad_norm": 1.231063961982727, "learning_rate": 4.022311654633631e-06, "loss": 0.4726, "step": 3833 }, { "epoch": 0.58, "grad_norm": 1.13233482837677, "learning_rate": 4.01992602788118e-06, "loss": 0.4521, "step": 3834 }, { "epoch": 0.58, "grad_norm": 1.1370478868484497, "learning_rate": 4.017540633133553e-06, "loss": 0.7297, "step": 3835 }, { "epoch": 0.58, "grad_norm": 1.1067222356796265, "learning_rate": 4.01515547095543e-06, "loss": 0.5043, "step": 3836 }, { "epoch": 0.58, "grad_norm": 1.1008124351501465, "learning_rate": 4.0127705419114275e-06, "loss": 0.4947, "step": 3837 }, { "epoch": 0.58, "grad_norm": 1.1569074392318726, "learning_rate": 4.0103858465661105e-06, "loss": 0.4921, "step": 3838 }, { "epoch": 0.58, "grad_norm": 1.1858386993408203, "learning_rate": 4.0080013854839885e-06, "loss": 0.5245, "step": 3839 }, { "epoch": 0.58, "grad_norm": 1.1594393253326416, "learning_rate": 4.005617159229515e-06, "loss": 0.5132, "step": 3840 }, { "epoch": 0.58, "grad_norm": 1.4084268808364868, "learning_rate": 4.003233168367088e-06, "loss": 0.5153, "step": 3841 }, { "epoch": 0.58, "grad_norm": 1.6241073608398438, "learning_rate": 4.000849413461051e-06, "loss": 0.4668, "step": 3842 }, { "epoch": 0.58, "grad_norm": 1.374433994293213, "learning_rate": 3.9984658950756896e-06, "loss": 0.5176, "step": 3843 }, { "epoch": 0.58, "grad_norm": 1.3854119777679443, "learning_rate": 3.996082613775236e-06, "loss": 0.4723, "step": 3844 }, { "epoch": 0.58, "grad_norm": 1.1149280071258545, "learning_rate": 3.993699570123864e-06, "loss": 0.5087, "step": 3845 }, { "epoch": 0.58, "grad_norm": 1.2765733003616333, "learning_rate": 3.991316764685687e-06, "loss": 0.5019, "step": 3846 }, { "epoch": 0.58, "grad_norm": 1.1273398399353027, "learning_rate": 3.988934198024775e-06, "loss": 0.4681, "step": 3847 }, { "epoch": 0.58, "grad_norm": 1.0477094650268555, "learning_rate": 3.986551870705129e-06, "loss": 0.5066, "step": 3848 }, { "epoch": 0.58, "grad_norm": 1.0005667209625244, "learning_rate": 3.9841697832907e-06, "loss": 0.4607, "step": 3849 }, { "epoch": 0.58, "grad_norm": 1.2232699394226074, "learning_rate": 3.981787936345377e-06, "loss": 0.494, "step": 3850 }, { "epoch": 0.58, "grad_norm": 1.3173962831497192, "learning_rate": 3.979406330432996e-06, "loss": 0.4337, "step": 3851 }, { "epoch": 0.58, "grad_norm": 1.1510789394378662, "learning_rate": 3.977024966117336e-06, "loss": 0.4342, "step": 3852 }, { "epoch": 0.58, "grad_norm": 1.803697109222412, "learning_rate": 3.974643843962116e-06, "loss": 0.4797, "step": 3853 }, { "epoch": 0.58, "grad_norm": 1.19402015209198, "learning_rate": 3.972262964531001e-06, "loss": 0.4549, "step": 3854 }, { "epoch": 0.58, "grad_norm": 1.0802239179611206, "learning_rate": 3.969882328387596e-06, "loss": 0.442, "step": 3855 }, { "epoch": 0.58, "grad_norm": 1.1645846366882324, "learning_rate": 3.96750193609545e-06, "loss": 0.5167, "step": 3856 }, { "epoch": 0.58, "grad_norm": 1.1026060581207275, "learning_rate": 3.965121788218054e-06, "loss": 0.4762, "step": 3857 }, { "epoch": 0.58, "grad_norm": 1.2105247974395752, "learning_rate": 3.962741885318836e-06, "loss": 0.5095, "step": 3858 }, { "epoch": 0.58, "grad_norm": 1.1187512874603271, "learning_rate": 3.960362227961178e-06, "loss": 0.5248, "step": 3859 }, { "epoch": 0.58, "grad_norm": 1.2189611196517944, "learning_rate": 3.9579828167083935e-06, "loss": 0.4208, "step": 3860 }, { "epoch": 0.58, "grad_norm": 1.2718775272369385, "learning_rate": 3.955603652123741e-06, "loss": 0.4929, "step": 3861 }, { "epoch": 0.58, "grad_norm": 1.0455647706985474, "learning_rate": 3.95322473477042e-06, "loss": 0.7122, "step": 3862 }, { "epoch": 0.58, "grad_norm": 1.4590368270874023, "learning_rate": 3.950846065211573e-06, "loss": 0.4954, "step": 3863 }, { "epoch": 0.58, "grad_norm": 1.1627411842346191, "learning_rate": 3.948467644010282e-06, "loss": 0.7091, "step": 3864 }, { "epoch": 0.58, "grad_norm": 0.9864482283592224, "learning_rate": 3.9460894717295705e-06, "loss": 0.4903, "step": 3865 }, { "epoch": 0.58, "grad_norm": 1.421152949333191, "learning_rate": 3.9437115489324045e-06, "loss": 0.4877, "step": 3866 }, { "epoch": 0.58, "grad_norm": 1.5340853929519653, "learning_rate": 3.94133387618169e-06, "loss": 0.4958, "step": 3867 }, { "epoch": 0.58, "grad_norm": 1.3412729501724243, "learning_rate": 3.9389564540402736e-06, "loss": 0.5276, "step": 3868 }, { "epoch": 0.58, "grad_norm": 1.5760350227355957, "learning_rate": 3.9365792830709425e-06, "loss": 0.6006, "step": 3869 }, { "epoch": 0.58, "grad_norm": 1.5686756372451782, "learning_rate": 3.934202363836426e-06, "loss": 0.5551, "step": 3870 }, { "epoch": 0.58, "grad_norm": 1.1461549997329712, "learning_rate": 3.931825696899391e-06, "loss": 0.4741, "step": 3871 }, { "epoch": 0.58, "grad_norm": 1.1561541557312012, "learning_rate": 3.929449282822448e-06, "loss": 0.4918, "step": 3872 }, { "epoch": 0.58, "grad_norm": 1.1444692611694336, "learning_rate": 3.927073122168145e-06, "loss": 0.4791, "step": 3873 }, { "epoch": 0.58, "grad_norm": 1.0400179624557495, "learning_rate": 3.924697215498971e-06, "loss": 0.4925, "step": 3874 }, { "epoch": 0.58, "grad_norm": 1.2642179727554321, "learning_rate": 3.922321563377354e-06, "loss": 0.4767, "step": 3875 }, { "epoch": 0.58, "grad_norm": 1.0196568965911865, "learning_rate": 3.919946166365664e-06, "loss": 0.5069, "step": 3876 }, { "epoch": 0.58, "grad_norm": 1.0619981288909912, "learning_rate": 3.917571025026207e-06, "loss": 0.4387, "step": 3877 }, { "epoch": 0.58, "grad_norm": 1.1912646293640137, "learning_rate": 3.915196139921233e-06, "loss": 0.5106, "step": 3878 }, { "epoch": 0.58, "grad_norm": 1.1002869606018066, "learning_rate": 3.912821511612927e-06, "loss": 0.4682, "step": 3879 }, { "epoch": 0.58, "grad_norm": 1.4695113897323608, "learning_rate": 3.910447140663416e-06, "loss": 0.5065, "step": 3880 }, { "epoch": 0.58, "grad_norm": 1.4904026985168457, "learning_rate": 3.908073027634765e-06, "loss": 0.4749, "step": 3881 }, { "epoch": 0.58, "grad_norm": 1.602019190788269, "learning_rate": 3.90569917308898e-06, "loss": 0.4797, "step": 3882 }, { "epoch": 0.58, "grad_norm": 1.048354983329773, "learning_rate": 3.903325577588002e-06, "loss": 0.4629, "step": 3883 }, { "epoch": 0.58, "grad_norm": 1.3910847902297974, "learning_rate": 3.9009522416937135e-06, "loss": 0.4448, "step": 3884 }, { "epoch": 0.58, "grad_norm": 1.2084648609161377, "learning_rate": 3.898579165967934e-06, "loss": 0.4793, "step": 3885 }, { "epoch": 0.58, "grad_norm": 1.616073489189148, "learning_rate": 3.896206350972424e-06, "loss": 0.549, "step": 3886 }, { "epoch": 0.58, "grad_norm": 1.3312240839004517, "learning_rate": 3.8938337972688775e-06, "loss": 0.4652, "step": 3887 }, { "epoch": 0.58, "grad_norm": 1.0773589611053467, "learning_rate": 3.891461505418931e-06, "loss": 0.428, "step": 3888 }, { "epoch": 0.58, "grad_norm": 1.1401599645614624, "learning_rate": 3.889089475984159e-06, "loss": 0.525, "step": 3889 }, { "epoch": 0.58, "grad_norm": 1.1828844547271729, "learning_rate": 3.8867177095260695e-06, "loss": 0.4987, "step": 3890 }, { "epoch": 0.58, "grad_norm": 1.0951250791549683, "learning_rate": 3.8843462066061126e-06, "loss": 0.4749, "step": 3891 }, { "epoch": 0.58, "grad_norm": 1.238510012626648, "learning_rate": 3.881974967785675e-06, "loss": 0.4967, "step": 3892 }, { "epoch": 0.58, "grad_norm": 1.08229398727417, "learning_rate": 3.879603993626081e-06, "loss": 0.4522, "step": 3893 }, { "epoch": 0.58, "grad_norm": 1.1158554553985596, "learning_rate": 3.877233284688591e-06, "loss": 0.5297, "step": 3894 }, { "epoch": 0.59, "grad_norm": 1.116819977760315, "learning_rate": 3.874862841534403e-06, "loss": 0.499, "step": 3895 }, { "epoch": 0.59, "grad_norm": 1.375423550605774, "learning_rate": 3.872492664724654e-06, "loss": 0.5113, "step": 3896 }, { "epoch": 0.59, "grad_norm": 1.0239863395690918, "learning_rate": 3.870122754820415e-06, "loss": 0.6737, "step": 3897 }, { "epoch": 0.59, "grad_norm": 1.258987307548523, "learning_rate": 3.867753112382696e-06, "loss": 0.549, "step": 3898 }, { "epoch": 0.59, "grad_norm": 1.263578176498413, "learning_rate": 3.865383737972441e-06, "loss": 0.4924, "step": 3899 }, { "epoch": 0.59, "grad_norm": 1.169535517692566, "learning_rate": 3.863014632150535e-06, "loss": 0.4995, "step": 3900 }, { "epoch": 0.59, "grad_norm": 1.2995307445526123, "learning_rate": 3.860645795477795e-06, "loss": 0.5488, "step": 3901 }, { "epoch": 0.59, "grad_norm": 1.4899741411209106, "learning_rate": 3.8582772285149765e-06, "loss": 0.5531, "step": 3902 }, { "epoch": 0.59, "grad_norm": 1.1487253904342651, "learning_rate": 3.855908931822772e-06, "loss": 0.4893, "step": 3903 }, { "epoch": 0.59, "grad_norm": 1.1734052896499634, "learning_rate": 3.8535409059618085e-06, "loss": 0.5228, "step": 3904 }, { "epoch": 0.59, "grad_norm": 1.0161182880401611, "learning_rate": 3.851173151492649e-06, "loss": 0.5619, "step": 3905 }, { "epoch": 0.59, "grad_norm": 1.154305338859558, "learning_rate": 3.848805668975791e-06, "loss": 0.7728, "step": 3906 }, { "epoch": 0.59, "grad_norm": 1.5940622091293335, "learning_rate": 3.846438458971672e-06, "loss": 0.4525, "step": 3907 }, { "epoch": 0.59, "grad_norm": 1.275012493133545, "learning_rate": 3.844071522040659e-06, "loss": 0.4728, "step": 3908 }, { "epoch": 0.59, "grad_norm": 0.9856143593788147, "learning_rate": 3.84170485874306e-06, "loss": 0.41, "step": 3909 }, { "epoch": 0.59, "grad_norm": 0.9249933958053589, "learning_rate": 3.8393384696391155e-06, "loss": 0.4828, "step": 3910 }, { "epoch": 0.59, "grad_norm": 1.2578086853027344, "learning_rate": 3.836972355289e-06, "loss": 0.4167, "step": 3911 }, { "epoch": 0.59, "grad_norm": 1.2180150747299194, "learning_rate": 3.834606516252823e-06, "loss": 0.4615, "step": 3912 }, { "epoch": 0.59, "grad_norm": 2.1003973484039307, "learning_rate": 3.832240953090631e-06, "loss": 0.471, "step": 3913 }, { "epoch": 0.59, "grad_norm": 1.1133140325546265, "learning_rate": 3.829875666362406e-06, "loss": 0.7322, "step": 3914 }, { "epoch": 0.59, "grad_norm": 1.244620680809021, "learning_rate": 3.827510656628064e-06, "loss": 0.444, "step": 3915 }, { "epoch": 0.59, "grad_norm": 1.4511991739273071, "learning_rate": 3.82514592444745e-06, "loss": 0.487, "step": 3916 }, { "epoch": 0.59, "grad_norm": 1.5115967988967896, "learning_rate": 3.82278147038035e-06, "loss": 0.4815, "step": 3917 }, { "epoch": 0.59, "grad_norm": 1.024938941001892, "learning_rate": 3.820417294986482e-06, "loss": 0.4131, "step": 3918 }, { "epoch": 0.59, "grad_norm": 1.0323221683502197, "learning_rate": 3.818053398825496e-06, "loss": 0.5051, "step": 3919 }, { "epoch": 0.59, "grad_norm": 2.0843191146850586, "learning_rate": 3.815689782456979e-06, "loss": 0.4785, "step": 3920 }, { "epoch": 0.59, "grad_norm": 1.3037725687026978, "learning_rate": 3.8133264464404495e-06, "loss": 0.5292, "step": 3921 }, { "epoch": 0.59, "grad_norm": 1.3312273025512695, "learning_rate": 3.8109633913353627e-06, "loss": 0.4452, "step": 3922 }, { "epoch": 0.59, "grad_norm": 1.314328908920288, "learning_rate": 3.8086006177011026e-06, "loss": 0.4705, "step": 3923 }, { "epoch": 0.59, "grad_norm": 1.2481919527053833, "learning_rate": 3.806238126096988e-06, "loss": 0.4885, "step": 3924 }, { "epoch": 0.59, "grad_norm": 1.8505432605743408, "learning_rate": 3.803875917082276e-06, "loss": 0.4558, "step": 3925 }, { "epoch": 0.59, "grad_norm": 1.012825846672058, "learning_rate": 3.801513991216151e-06, "loss": 0.4377, "step": 3926 }, { "epoch": 0.59, "grad_norm": 1.23249351978302, "learning_rate": 3.799152349057733e-06, "loss": 0.4879, "step": 3927 }, { "epoch": 0.59, "grad_norm": 1.1257227659225464, "learning_rate": 3.796790991166072e-06, "loss": 0.5374, "step": 3928 }, { "epoch": 0.59, "grad_norm": 1.104799747467041, "learning_rate": 3.7944299181001544e-06, "loss": 0.4404, "step": 3929 }, { "epoch": 0.59, "grad_norm": 1.05022394657135, "learning_rate": 3.7920691304188973e-06, "loss": 0.4516, "step": 3930 }, { "epoch": 0.59, "grad_norm": 1.2300362586975098, "learning_rate": 3.7897086286811503e-06, "loss": 0.7274, "step": 3931 }, { "epoch": 0.59, "grad_norm": 1.3635200262069702, "learning_rate": 3.787348413445695e-06, "loss": 0.4526, "step": 3932 }, { "epoch": 0.59, "grad_norm": 1.2535990476608276, "learning_rate": 3.784988485271247e-06, "loss": 0.4408, "step": 3933 }, { "epoch": 0.59, "grad_norm": 1.356583595275879, "learning_rate": 3.782628844716451e-06, "loss": 0.5828, "step": 3934 }, { "epoch": 0.59, "grad_norm": 1.2249317169189453, "learning_rate": 3.780269492339888e-06, "loss": 0.4044, "step": 3935 }, { "epoch": 0.59, "grad_norm": 1.5791181325912476, "learning_rate": 3.777910428700062e-06, "loss": 0.4355, "step": 3936 }, { "epoch": 0.59, "grad_norm": 1.0605343580245972, "learning_rate": 3.7755516543554223e-06, "loss": 0.5404, "step": 3937 }, { "epoch": 0.59, "grad_norm": 1.1640173196792603, "learning_rate": 3.7731931698643377e-06, "loss": 0.4794, "step": 3938 }, { "epoch": 0.59, "grad_norm": 1.8369359970092773, "learning_rate": 3.7708349757851147e-06, "loss": 0.5137, "step": 3939 }, { "epoch": 0.59, "grad_norm": 1.152345895767212, "learning_rate": 3.768477072675988e-06, "loss": 0.5086, "step": 3940 }, { "epoch": 0.59, "grad_norm": 1.2136256694793701, "learning_rate": 3.7661194610951235e-06, "loss": 0.4418, "step": 3941 }, { "epoch": 0.59, "grad_norm": 1.138782024383545, "learning_rate": 3.763762141600621e-06, "loss": 0.5033, "step": 3942 }, { "epoch": 0.59, "grad_norm": 1.315133810043335, "learning_rate": 3.7614051147505083e-06, "loss": 0.725, "step": 3943 }, { "epoch": 0.59, "grad_norm": 1.293424367904663, "learning_rate": 3.759048381102745e-06, "loss": 0.4557, "step": 3944 }, { "epoch": 0.59, "grad_norm": 1.475818157196045, "learning_rate": 3.756691941215222e-06, "loss": 0.4415, "step": 3945 }, { "epoch": 0.59, "grad_norm": 1.17885422706604, "learning_rate": 3.7543357956457586e-06, "loss": 0.4805, "step": 3946 }, { "epoch": 0.59, "grad_norm": 0.945575475692749, "learning_rate": 3.751979944952106e-06, "loss": 0.5232, "step": 3947 }, { "epoch": 0.59, "grad_norm": 1.2284419536590576, "learning_rate": 3.749624389691947e-06, "loss": 0.7786, "step": 3948 }, { "epoch": 0.59, "grad_norm": 0.9564995169639587, "learning_rate": 3.7472691304228907e-06, "loss": 0.4312, "step": 3949 }, { "epoch": 0.59, "grad_norm": 1.1624871492385864, "learning_rate": 3.7449141677024793e-06, "loss": 0.4833, "step": 3950 }, { "epoch": 0.59, "grad_norm": 1.7031782865524292, "learning_rate": 3.7425595020881844e-06, "loss": 0.4512, "step": 3951 }, { "epoch": 0.59, "grad_norm": 1.3860348463058472, "learning_rate": 3.7402051341374044e-06, "loss": 0.4207, "step": 3952 }, { "epoch": 0.59, "grad_norm": 1.7508814334869385, "learning_rate": 3.7378510644074717e-06, "loss": 0.4434, "step": 3953 }, { "epoch": 0.59, "grad_norm": 1.3682966232299805, "learning_rate": 3.7354972934556435e-06, "loss": 0.5217, "step": 3954 }, { "epoch": 0.59, "grad_norm": 1.503952980041504, "learning_rate": 3.7331438218391097e-06, "loss": 0.4955, "step": 3955 }, { "epoch": 0.59, "grad_norm": 0.962443470954895, "learning_rate": 3.730790650114988e-06, "loss": 0.4432, "step": 3956 }, { "epoch": 0.59, "grad_norm": 1.0997023582458496, "learning_rate": 3.728437778840327e-06, "loss": 0.7121, "step": 3957 }, { "epoch": 0.59, "grad_norm": 1.2674771547317505, "learning_rate": 3.7260852085720987e-06, "loss": 0.4345, "step": 3958 }, { "epoch": 0.59, "grad_norm": 1.094803810119629, "learning_rate": 3.7237329398672116e-06, "loss": 0.4344, "step": 3959 }, { "epoch": 0.59, "grad_norm": 1.173957347869873, "learning_rate": 3.7213809732824973e-06, "loss": 0.4637, "step": 3960 }, { "epoch": 0.6, "grad_norm": 1.391635775566101, "learning_rate": 3.7190293093747174e-06, "loss": 0.4612, "step": 3961 }, { "epoch": 0.6, "grad_norm": 1.4216790199279785, "learning_rate": 3.7166779487005623e-06, "loss": 0.4467, "step": 3962 }, { "epoch": 0.6, "grad_norm": 1.3889753818511963, "learning_rate": 3.71432689181665e-06, "loss": 0.484, "step": 3963 }, { "epoch": 0.6, "grad_norm": 1.1406309604644775, "learning_rate": 3.7119761392795265e-06, "loss": 0.4638, "step": 3964 }, { "epoch": 0.6, "grad_norm": 1.0480157136917114, "learning_rate": 3.7096256916456663e-06, "loss": 0.5225, "step": 3965 }, { "epoch": 0.6, "grad_norm": 1.4036260843276978, "learning_rate": 3.7072755494714714e-06, "loss": 0.5541, "step": 3966 }, { "epoch": 0.6, "grad_norm": 1.4621012210845947, "learning_rate": 3.704925713313271e-06, "loss": 0.7148, "step": 3967 }, { "epoch": 0.6, "grad_norm": 1.307979702949524, "learning_rate": 3.7025761837273235e-06, "loss": 0.45, "step": 3968 }, { "epoch": 0.6, "grad_norm": 1.1669434309005737, "learning_rate": 3.7002269612698118e-06, "loss": 0.444, "step": 3969 }, { "epoch": 0.6, "grad_norm": 1.5586612224578857, "learning_rate": 3.6978780464968504e-06, "loss": 0.406, "step": 3970 }, { "epoch": 0.6, "grad_norm": 1.208022952079773, "learning_rate": 3.695529439964478e-06, "loss": 0.4382, "step": 3971 }, { "epoch": 0.6, "grad_norm": 1.012624979019165, "learning_rate": 3.6931811422286596e-06, "loss": 0.4641, "step": 3972 }, { "epoch": 0.6, "grad_norm": 1.2322474718093872, "learning_rate": 3.690833153845289e-06, "loss": 0.4763, "step": 3973 }, { "epoch": 0.6, "grad_norm": 1.0939228534698486, "learning_rate": 3.688485475370187e-06, "loss": 0.5051, "step": 3974 }, { "epoch": 0.6, "grad_norm": 1.2920796871185303, "learning_rate": 3.6861381073590984e-06, "loss": 0.4724, "step": 3975 }, { "epoch": 0.6, "grad_norm": 1.5666229724884033, "learning_rate": 3.683791050367699e-06, "loss": 0.5168, "step": 3976 }, { "epoch": 0.6, "grad_norm": 1.049272060394287, "learning_rate": 3.6814443049515846e-06, "loss": 0.4168, "step": 3977 }, { "epoch": 0.6, "grad_norm": 1.1028687953948975, "learning_rate": 3.679097871666283e-06, "loss": 0.4386, "step": 3978 }, { "epoch": 0.6, "grad_norm": 1.190873384475708, "learning_rate": 3.6767517510672456e-06, "loss": 0.4798, "step": 3979 }, { "epoch": 0.6, "grad_norm": 1.1972253322601318, "learning_rate": 3.67440594370985e-06, "loss": 0.501, "step": 3980 }, { "epoch": 0.6, "grad_norm": 1.0737251043319702, "learning_rate": 3.672060450149401e-06, "loss": 0.4456, "step": 3981 }, { "epoch": 0.6, "grad_norm": 1.2812182903289795, "learning_rate": 3.669715270941126e-06, "loss": 0.5027, "step": 3982 }, { "epoch": 0.6, "grad_norm": 0.9650639295578003, "learning_rate": 3.667370406640182e-06, "loss": 0.3835, "step": 3983 }, { "epoch": 0.6, "grad_norm": 1.0944472551345825, "learning_rate": 3.6650258578016474e-06, "loss": 0.5167, "step": 3984 }, { "epoch": 0.6, "grad_norm": 1.0437852144241333, "learning_rate": 3.6626816249805295e-06, "loss": 0.4318, "step": 3985 }, { "epoch": 0.6, "grad_norm": 1.3374766111373901, "learning_rate": 3.660337708731758e-06, "loss": 0.4599, "step": 3986 }, { "epoch": 0.6, "grad_norm": 1.0762027502059937, "learning_rate": 3.6579941096101908e-06, "loss": 0.5042, "step": 3987 }, { "epoch": 0.6, "grad_norm": 1.0871272087097168, "learning_rate": 3.655650828170607e-06, "loss": 0.4515, "step": 3988 }, { "epoch": 0.6, "grad_norm": 1.1560770273208618, "learning_rate": 3.6533078649677122e-06, "loss": 0.5301, "step": 3989 }, { "epoch": 0.6, "grad_norm": 2.2571401596069336, "learning_rate": 3.6509652205561376e-06, "loss": 0.4563, "step": 3990 }, { "epoch": 0.6, "grad_norm": 1.1264859437942505, "learning_rate": 3.6486228954904353e-06, "loss": 0.5153, "step": 3991 }, { "epoch": 0.6, "grad_norm": 1.1578168869018555, "learning_rate": 3.646280890325089e-06, "loss": 0.4127, "step": 3992 }, { "epoch": 0.6, "grad_norm": 1.1922775506973267, "learning_rate": 3.643939205614502e-06, "loss": 0.4424, "step": 3993 }, { "epoch": 0.6, "grad_norm": 1.2284735441207886, "learning_rate": 3.6415978419129983e-06, "loss": 0.462, "step": 3994 }, { "epoch": 0.6, "grad_norm": 1.475010633468628, "learning_rate": 3.639256799774832e-06, "loss": 0.4566, "step": 3995 }, { "epoch": 0.6, "grad_norm": 1.2555471658706665, "learning_rate": 3.636916079754178e-06, "loss": 0.447, "step": 3996 }, { "epoch": 0.6, "grad_norm": 1.0604768991470337, "learning_rate": 3.634575682405136e-06, "loss": 0.5231, "step": 3997 }, { "epoch": 0.6, "grad_norm": 1.248558521270752, "learning_rate": 3.632235608281729e-06, "loss": 0.4045, "step": 3998 }, { "epoch": 0.6, "grad_norm": 1.157265305519104, "learning_rate": 3.6298958579379034e-06, "loss": 0.4442, "step": 3999 }, { "epoch": 0.6, "grad_norm": 1.255542278289795, "learning_rate": 3.62755643192753e-06, "loss": 0.4746, "step": 4000 }, { "epoch": 0.6, "grad_norm": 1.3020254373550415, "learning_rate": 3.6252173308043997e-06, "loss": 0.505, "step": 4001 }, { "epoch": 0.6, "grad_norm": 1.1597963571548462, "learning_rate": 3.622878555122228e-06, "loss": 0.5477, "step": 4002 }, { "epoch": 0.6, "grad_norm": 1.382543683052063, "learning_rate": 3.6205401054346567e-06, "loss": 0.4707, "step": 4003 }, { "epoch": 0.6, "grad_norm": 1.0588041543960571, "learning_rate": 3.6182019822952474e-06, "loss": 0.5921, "step": 4004 }, { "epoch": 0.6, "grad_norm": 1.1271412372589111, "learning_rate": 3.615864186257485e-06, "loss": 0.4466, "step": 4005 }, { "epoch": 0.6, "grad_norm": 3.1589460372924805, "learning_rate": 3.6135267178747746e-06, "loss": 0.4175, "step": 4006 }, { "epoch": 0.6, "grad_norm": 1.313869595527649, "learning_rate": 3.6111895777004463e-06, "loss": 0.5132, "step": 4007 }, { "epoch": 0.6, "grad_norm": 1.028083324432373, "learning_rate": 3.6088527662877526e-06, "loss": 0.4807, "step": 4008 }, { "epoch": 0.6, "grad_norm": 1.2099393606185913, "learning_rate": 3.6065162841898683e-06, "loss": 0.5307, "step": 4009 }, { "epoch": 0.6, "grad_norm": 1.1820998191833496, "learning_rate": 3.604180131959888e-06, "loss": 0.5318, "step": 4010 }, { "epoch": 0.6, "grad_norm": 1.037348747253418, "learning_rate": 3.6018443101508303e-06, "loss": 0.4897, "step": 4011 }, { "epoch": 0.6, "grad_norm": 1.2332555055618286, "learning_rate": 3.599508819315636e-06, "loss": 0.5444, "step": 4012 }, { "epoch": 0.6, "grad_norm": 1.0406962633132935, "learning_rate": 3.597173660007165e-06, "loss": 0.5207, "step": 4013 }, { "epoch": 0.6, "grad_norm": 1.1360505819320679, "learning_rate": 3.5948388327782015e-06, "loss": 0.4086, "step": 4014 }, { "epoch": 0.6, "grad_norm": 1.2744619846343994, "learning_rate": 3.5925043381814495e-06, "loss": 0.5085, "step": 4015 }, { "epoch": 0.6, "grad_norm": 3.2571609020233154, "learning_rate": 3.5901701767695347e-06, "loss": 0.421, "step": 4016 }, { "epoch": 0.6, "grad_norm": 1.3451341390609741, "learning_rate": 3.587836349095005e-06, "loss": 0.4349, "step": 4017 }, { "epoch": 0.6, "grad_norm": 1.240207314491272, "learning_rate": 3.5855028557103253e-06, "loss": 0.5108, "step": 4018 }, { "epoch": 0.6, "grad_norm": 1.4963494539260864, "learning_rate": 3.583169697167887e-06, "loss": 0.4525, "step": 4019 }, { "epoch": 0.6, "grad_norm": 1.2685552835464478, "learning_rate": 3.580836874019998e-06, "loss": 0.4974, "step": 4020 }, { "epoch": 0.6, "grad_norm": 1.2959842681884766, "learning_rate": 3.5785043868188883e-06, "loss": 0.5041, "step": 4021 }, { "epoch": 0.6, "grad_norm": 1.1403322219848633, "learning_rate": 3.5761722361167095e-06, "loss": 0.4426, "step": 4022 }, { "epoch": 0.6, "grad_norm": 1.2480156421661377, "learning_rate": 3.573840422465531e-06, "loss": 0.487, "step": 4023 }, { "epoch": 0.6, "grad_norm": 1.145241379737854, "learning_rate": 3.5715089464173446e-06, "loss": 0.4993, "step": 4024 }, { "epoch": 0.6, "grad_norm": 1.3552428483963013, "learning_rate": 3.56917780852406e-06, "loss": 0.5104, "step": 4025 }, { "epoch": 0.6, "grad_norm": 1.642072081565857, "learning_rate": 3.5668470093375113e-06, "loss": 0.4841, "step": 4026 }, { "epoch": 0.6, "grad_norm": 1.1877130270004272, "learning_rate": 3.564516549409447e-06, "loss": 0.5048, "step": 4027 }, { "epoch": 0.61, "grad_norm": 1.0061366558074951, "learning_rate": 3.5621864292915383e-06, "loss": 0.4923, "step": 4028 }, { "epoch": 0.61, "grad_norm": 1.3551526069641113, "learning_rate": 3.559856649535376e-06, "loss": 0.5354, "step": 4029 }, { "epoch": 0.61, "grad_norm": 1.4710549116134644, "learning_rate": 3.5575272106924685e-06, "loss": 0.4733, "step": 4030 }, { "epoch": 0.61, "grad_norm": 1.1924155950546265, "learning_rate": 3.5551981133142442e-06, "loss": 0.4402, "step": 4031 }, { "epoch": 0.61, "grad_norm": 1.527850866317749, "learning_rate": 3.5528693579520523e-06, "loss": 0.4803, "step": 4032 }, { "epoch": 0.61, "grad_norm": 1.5509775876998901, "learning_rate": 3.5505409451571592e-06, "loss": 0.5686, "step": 4033 }, { "epoch": 0.61, "grad_norm": 1.901955485343933, "learning_rate": 3.5482128754807517e-06, "loss": 0.4824, "step": 4034 }, { "epoch": 0.61, "grad_norm": 1.35624361038208, "learning_rate": 3.545885149473933e-06, "loss": 0.4943, "step": 4035 }, { "epoch": 0.61, "grad_norm": 4.186854839324951, "learning_rate": 3.5435577676877277e-06, "loss": 0.4298, "step": 4036 }, { "epoch": 0.61, "grad_norm": 1.0778859853744507, "learning_rate": 3.5412307306730777e-06, "loss": 0.434, "step": 4037 }, { "epoch": 0.61, "grad_norm": 1.5404746532440186, "learning_rate": 3.5389040389808427e-06, "loss": 0.4563, "step": 4038 }, { "epoch": 0.61, "grad_norm": 0.9880332946777344, "learning_rate": 3.536577693161801e-06, "loss": 0.5201, "step": 4039 }, { "epoch": 0.61, "grad_norm": 1.2401305437088013, "learning_rate": 3.5342516937666514e-06, "loss": 0.4546, "step": 4040 }, { "epoch": 0.61, "grad_norm": 1.0337995290756226, "learning_rate": 3.5319260413460053e-06, "loss": 0.4099, "step": 4041 }, { "epoch": 0.61, "grad_norm": 1.1469115018844604, "learning_rate": 3.5296007364503994e-06, "loss": 0.533, "step": 4042 }, { "epoch": 0.61, "grad_norm": 1.3092752695083618, "learning_rate": 3.5272757796302793e-06, "loss": 0.463, "step": 4043 }, { "epoch": 0.61, "grad_norm": 1.0672253370285034, "learning_rate": 3.524951171436015e-06, "loss": 0.4025, "step": 4044 }, { "epoch": 0.61, "grad_norm": 1.310982584953308, "learning_rate": 3.5226269124178923e-06, "loss": 0.464, "step": 4045 }, { "epoch": 0.61, "grad_norm": 1.3986499309539795, "learning_rate": 3.520303003126113e-06, "loss": 0.5286, "step": 4046 }, { "epoch": 0.61, "grad_norm": 1.1325291395187378, "learning_rate": 3.517979444110796e-06, "loss": 0.4431, "step": 4047 }, { "epoch": 0.61, "grad_norm": 1.1676157712936401, "learning_rate": 3.5156562359219808e-06, "loss": 0.5602, "step": 4048 }, { "epoch": 0.61, "grad_norm": 1.032056450843811, "learning_rate": 3.5133333791096204e-06, "loss": 0.4254, "step": 4049 }, { "epoch": 0.61, "grad_norm": 1.2060147523880005, "learning_rate": 3.5110108742235847e-06, "loss": 0.533, "step": 4050 }, { "epoch": 0.61, "grad_norm": 1.289845585823059, "learning_rate": 3.508688721813661e-06, "loss": 0.7149, "step": 4051 }, { "epoch": 0.61, "grad_norm": 1.0141443014144897, "learning_rate": 3.506366922429555e-06, "loss": 0.501, "step": 4052 }, { "epoch": 0.61, "grad_norm": 1.0876169204711914, "learning_rate": 3.5040454766208854e-06, "loss": 0.4696, "step": 4053 }, { "epoch": 0.61, "grad_norm": 1.0904936790466309, "learning_rate": 3.5017243849371914e-06, "loss": 0.4473, "step": 4054 }, { "epoch": 0.61, "grad_norm": 1.0631402730941772, "learning_rate": 3.4994036479279226e-06, "loss": 0.477, "step": 4055 }, { "epoch": 0.61, "grad_norm": 1.1845799684524536, "learning_rate": 3.4970832661424485e-06, "loss": 0.4957, "step": 4056 }, { "epoch": 0.61, "grad_norm": 1.0628514289855957, "learning_rate": 3.4947632401300556e-06, "loss": 0.4686, "step": 4057 }, { "epoch": 0.61, "grad_norm": 1.0868594646453857, "learning_rate": 3.4924435704399414e-06, "loss": 0.4908, "step": 4058 }, { "epoch": 0.61, "grad_norm": 1.3703351020812988, "learning_rate": 3.4901242576212276e-06, "loss": 0.4806, "step": 4059 }, { "epoch": 0.61, "grad_norm": 1.2583544254302979, "learning_rate": 3.487805302222942e-06, "loss": 0.5795, "step": 4060 }, { "epoch": 0.61, "grad_norm": 1.3480901718139648, "learning_rate": 3.4854867047940324e-06, "loss": 0.4464, "step": 4061 }, { "epoch": 0.61, "grad_norm": 0.977742075920105, "learning_rate": 3.4831684658833607e-06, "loss": 0.4493, "step": 4062 }, { "epoch": 0.61, "grad_norm": 1.3852417469024658, "learning_rate": 3.480850586039707e-06, "loss": 0.5087, "step": 4063 }, { "epoch": 0.61, "grad_norm": 1.0262526273727417, "learning_rate": 3.4785330658117606e-06, "loss": 0.4086, "step": 4064 }, { "epoch": 0.61, "grad_norm": 1.3300448656082153, "learning_rate": 3.4762159057481315e-06, "loss": 0.5143, "step": 4065 }, { "epoch": 0.61, "grad_norm": 1.3457499742507935, "learning_rate": 3.473899106397341e-06, "loss": 0.5434, "step": 4066 }, { "epoch": 0.61, "grad_norm": 1.2651559114456177, "learning_rate": 3.471582668307825e-06, "loss": 0.5043, "step": 4067 }, { "epoch": 0.61, "grad_norm": 1.1883511543273926, "learning_rate": 3.4692665920279355e-06, "loss": 0.4262, "step": 4068 }, { "epoch": 0.61, "grad_norm": 1.1819840669631958, "learning_rate": 3.466950878105937e-06, "loss": 0.5271, "step": 4069 }, { "epoch": 0.61, "grad_norm": 2.8352346420288086, "learning_rate": 3.4646355270900117e-06, "loss": 0.5559, "step": 4070 }, { "epoch": 0.61, "grad_norm": 1.3015486001968384, "learning_rate": 3.4623205395282533e-06, "loss": 0.5221, "step": 4071 }, { "epoch": 0.61, "grad_norm": 1.1253987550735474, "learning_rate": 3.4600059159686684e-06, "loss": 0.422, "step": 4072 }, { "epoch": 0.61, "grad_norm": 1.0635873079299927, "learning_rate": 3.457691656959179e-06, "loss": 0.3941, "step": 4073 }, { "epoch": 0.61, "grad_norm": 1.2900766134262085, "learning_rate": 3.4553777630476205e-06, "loss": 0.4899, "step": 4074 }, { "epoch": 0.61, "grad_norm": 1.5372850894927979, "learning_rate": 3.4530642347817416e-06, "loss": 0.4533, "step": 4075 }, { "epoch": 0.61, "grad_norm": 1.7968394756317139, "learning_rate": 3.4507510727092055e-06, "loss": 0.4263, "step": 4076 }, { "epoch": 0.61, "grad_norm": 1.2981948852539062, "learning_rate": 3.4484382773775884e-06, "loss": 0.4355, "step": 4077 }, { "epoch": 0.61, "grad_norm": 1.338004231452942, "learning_rate": 3.4461258493343773e-06, "loss": 0.4682, "step": 4078 }, { "epoch": 0.61, "grad_norm": 1.423414707183838, "learning_rate": 3.4438137891269773e-06, "loss": 0.5372, "step": 4079 }, { "epoch": 0.61, "grad_norm": 1.1704978942871094, "learning_rate": 3.4415020973026985e-06, "loss": 0.4991, "step": 4080 }, { "epoch": 0.61, "grad_norm": 1.1359728574752808, "learning_rate": 3.4391907744087737e-06, "loss": 0.4081, "step": 4081 }, { "epoch": 0.61, "grad_norm": 1.2219703197479248, "learning_rate": 3.4368798209923416e-06, "loss": 0.4721, "step": 4082 }, { "epoch": 0.61, "grad_norm": 1.0463135242462158, "learning_rate": 3.434569237600455e-06, "loss": 0.5328, "step": 4083 }, { "epoch": 0.61, "grad_norm": 1.3211647272109985, "learning_rate": 3.4322590247800786e-06, "loss": 0.4916, "step": 4084 }, { "epoch": 0.61, "grad_norm": 1.1584094762802124, "learning_rate": 3.4299491830780907e-06, "loss": 0.5235, "step": 4085 }, { "epoch": 0.61, "grad_norm": 1.1380895376205444, "learning_rate": 3.42763971304128e-06, "loss": 0.4737, "step": 4086 }, { "epoch": 0.61, "grad_norm": 1.145498514175415, "learning_rate": 3.4253306152163503e-06, "loss": 0.505, "step": 4087 }, { "epoch": 0.61, "grad_norm": 1.185123324394226, "learning_rate": 3.423021890149913e-06, "loss": 0.4853, "step": 4088 }, { "epoch": 0.61, "grad_norm": 1.6866155862808228, "learning_rate": 3.420713538388495e-06, "loss": 0.4934, "step": 4089 }, { "epoch": 0.61, "grad_norm": 1.1872615814208984, "learning_rate": 3.4184055604785334e-06, "loss": 0.4384, "step": 4090 }, { "epoch": 0.61, "grad_norm": 1.3099006414413452, "learning_rate": 3.416097956966375e-06, "loss": 0.4641, "step": 4091 }, { "epoch": 0.61, "grad_norm": 1.246157169342041, "learning_rate": 3.4137907283982817e-06, "loss": 0.4992, "step": 4092 }, { "epoch": 0.61, "grad_norm": 1.1663645505905151, "learning_rate": 3.411483875320425e-06, "loss": 0.5388, "step": 4093 }, { "epoch": 0.61, "grad_norm": 1.2319798469543457, "learning_rate": 3.4091773982788867e-06, "loss": 0.4707, "step": 4094 }, { "epoch": 0.62, "grad_norm": 1.0129140615463257, "learning_rate": 3.4068712978196593e-06, "loss": 0.5338, "step": 4095 }, { "epoch": 0.62, "grad_norm": 1.376854658126831, "learning_rate": 3.4045655744886476e-06, "loss": 0.5723, "step": 4096 }, { "epoch": 0.62, "grad_norm": 1.3021920919418335, "learning_rate": 3.4022602288316654e-06, "loss": 0.5114, "step": 4097 }, { "epoch": 0.62, "grad_norm": 1.140984058380127, "learning_rate": 3.39995526139444e-06, "loss": 0.4293, "step": 4098 }, { "epoch": 0.62, "grad_norm": 1.1959365606307983, "learning_rate": 3.3976506727226054e-06, "loss": 0.4729, "step": 4099 }, { "epoch": 0.62, "grad_norm": 1.119282603263855, "learning_rate": 3.3953464633617085e-06, "loss": 0.509, "step": 4100 }, { "epoch": 0.62, "grad_norm": 1.127223014831543, "learning_rate": 3.3930426338572066e-06, "loss": 0.4846, "step": 4101 }, { "epoch": 0.62, "grad_norm": 1.512105107307434, "learning_rate": 3.3907391847544656e-06, "loss": 0.5075, "step": 4102 }, { "epoch": 0.62, "grad_norm": 1.1964913606643677, "learning_rate": 3.388436116598761e-06, "loss": 0.4897, "step": 4103 }, { "epoch": 0.62, "grad_norm": 1.5570793151855469, "learning_rate": 3.386133429935281e-06, "loss": 0.5006, "step": 4104 }, { "epoch": 0.62, "grad_norm": 1.047523856163025, "learning_rate": 3.3838311253091215e-06, "loss": 0.5446, "step": 4105 }, { "epoch": 0.62, "grad_norm": 0.9818440675735474, "learning_rate": 3.381529203265287e-06, "loss": 0.4266, "step": 4106 }, { "epoch": 0.62, "grad_norm": 1.4128086566925049, "learning_rate": 3.379227664348693e-06, "loss": 0.4838, "step": 4107 }, { "epoch": 0.62, "grad_norm": 1.1358486413955688, "learning_rate": 3.376926509104165e-06, "loss": 0.3933, "step": 4108 }, { "epoch": 0.62, "grad_norm": 1.2130259275436401, "learning_rate": 3.3746257380764335e-06, "loss": 0.4983, "step": 4109 }, { "epoch": 0.62, "grad_norm": 1.1324176788330078, "learning_rate": 3.372325351810144e-06, "loss": 0.5273, "step": 4110 }, { "epoch": 0.62, "grad_norm": 1.1993681192398071, "learning_rate": 3.370025350849846e-06, "loss": 0.4769, "step": 4111 }, { "epoch": 0.62, "grad_norm": 1.093234658241272, "learning_rate": 3.3677257357400004e-06, "loss": 0.5331, "step": 4112 }, { "epoch": 0.62, "grad_norm": 1.137793779373169, "learning_rate": 3.3654265070249766e-06, "loss": 0.4607, "step": 4113 }, { "epoch": 0.62, "grad_norm": 1.0670733451843262, "learning_rate": 3.363127665249051e-06, "loss": 0.4908, "step": 4114 }, { "epoch": 0.62, "grad_norm": 1.1469556093215942, "learning_rate": 3.36082921095641e-06, "loss": 0.5064, "step": 4115 }, { "epoch": 0.62, "grad_norm": 1.0189350843429565, "learning_rate": 3.3585311446911484e-06, "loss": 0.4728, "step": 4116 }, { "epoch": 0.62, "grad_norm": 1.3671777248382568, "learning_rate": 3.3562334669972684e-06, "loss": 0.4943, "step": 4117 }, { "epoch": 0.62, "grad_norm": 1.1329355239868164, "learning_rate": 3.353936178418679e-06, "loss": 0.4881, "step": 4118 }, { "epoch": 0.62, "grad_norm": 1.1395536661148071, "learning_rate": 3.3516392794992002e-06, "loss": 0.5151, "step": 4119 }, { "epoch": 0.62, "grad_norm": 1.2958918809890747, "learning_rate": 3.349342770782557e-06, "loss": 0.4913, "step": 4120 }, { "epoch": 0.62, "grad_norm": 1.0422203540802002, "learning_rate": 3.3470466528123822e-06, "loss": 0.4213, "step": 4121 }, { "epoch": 0.62, "grad_norm": 1.043195128440857, "learning_rate": 3.3447509261322174e-06, "loss": 0.4498, "step": 4122 }, { "epoch": 0.62, "grad_norm": 1.5076898336410522, "learning_rate": 3.3424555912855116e-06, "loss": 0.4507, "step": 4123 }, { "epoch": 0.62, "grad_norm": 1.1307185888290405, "learning_rate": 3.34016064881562e-06, "loss": 0.5125, "step": 4124 }, { "epoch": 0.62, "grad_norm": 1.6974482536315918, "learning_rate": 3.3378660992658042e-06, "loss": 0.5163, "step": 4125 }, { "epoch": 0.62, "grad_norm": 1.336675763130188, "learning_rate": 3.3355719431792367e-06, "loss": 0.4444, "step": 4126 }, { "epoch": 0.62, "grad_norm": 1.282526969909668, "learning_rate": 3.333278181098992e-06, "loss": 0.5093, "step": 4127 }, { "epoch": 0.62, "grad_norm": 1.281994342803955, "learning_rate": 3.3309848135680544e-06, "loss": 0.4505, "step": 4128 }, { "epoch": 0.62, "grad_norm": 1.696917176246643, "learning_rate": 3.3286918411293143e-06, "loss": 0.4927, "step": 4129 }, { "epoch": 0.62, "grad_norm": 1.1016616821289062, "learning_rate": 3.326399264325567e-06, "loss": 0.4368, "step": 4130 }, { "epoch": 0.62, "grad_norm": 1.1072361469268799, "learning_rate": 3.324107083699516e-06, "loss": 0.4482, "step": 4131 }, { "epoch": 0.62, "grad_norm": 1.0585745573043823, "learning_rate": 3.321815299793771e-06, "loss": 0.4973, "step": 4132 }, { "epoch": 0.62, "grad_norm": 1.291398048400879, "learning_rate": 3.319523913150845e-06, "loss": 0.4767, "step": 4133 }, { "epoch": 0.62, "grad_norm": 1.1092900037765503, "learning_rate": 3.3172329243131606e-06, "loss": 0.7319, "step": 4134 }, { "epoch": 0.62, "grad_norm": 1.1119740009307861, "learning_rate": 3.3149423338230437e-06, "loss": 0.505, "step": 4135 }, { "epoch": 0.62, "grad_norm": 1.1445205211639404, "learning_rate": 3.3126521422227264e-06, "loss": 0.4644, "step": 4136 }, { "epoch": 0.62, "grad_norm": 1.1706405878067017, "learning_rate": 3.3103623500543504e-06, "loss": 0.4365, "step": 4137 }, { "epoch": 0.62, "grad_norm": 1.186727523803711, "learning_rate": 3.308072957859956e-06, "loss": 0.4779, "step": 4138 }, { "epoch": 0.62, "grad_norm": 1.4218862056732178, "learning_rate": 3.305783966181493e-06, "loss": 0.5094, "step": 4139 }, { "epoch": 0.62, "grad_norm": 1.1862359046936035, "learning_rate": 3.3034953755608155e-06, "loss": 0.7217, "step": 4140 }, { "epoch": 0.62, "grad_norm": 1.248087763786316, "learning_rate": 3.3012071865396835e-06, "loss": 0.4712, "step": 4141 }, { "epoch": 0.62, "grad_norm": 1.3224226236343384, "learning_rate": 3.2989193996597604e-06, "loss": 0.4744, "step": 4142 }, { "epoch": 0.62, "grad_norm": 1.354246973991394, "learning_rate": 3.296632015462614e-06, "loss": 0.48, "step": 4143 }, { "epoch": 0.62, "grad_norm": 1.019948959350586, "learning_rate": 3.2943450344897206e-06, "loss": 0.4772, "step": 4144 }, { "epoch": 0.62, "grad_norm": 1.1704723834991455, "learning_rate": 3.2920584572824556e-06, "loss": 0.5243, "step": 4145 }, { "epoch": 0.62, "grad_norm": 1.1846938133239746, "learning_rate": 3.289772284382102e-06, "loss": 0.4544, "step": 4146 }, { "epoch": 0.62, "grad_norm": 1.2185473442077637, "learning_rate": 3.2874865163298466e-06, "loss": 0.4294, "step": 4147 }, { "epoch": 0.62, "grad_norm": 1.3734337091445923, "learning_rate": 3.285201153666783e-06, "loss": 0.4359, "step": 4148 }, { "epoch": 0.62, "grad_norm": 1.2729363441467285, "learning_rate": 3.282916196933904e-06, "loss": 0.731, "step": 4149 }, { "epoch": 0.62, "grad_norm": 1.3835129737854004, "learning_rate": 3.280631646672109e-06, "loss": 0.5361, "step": 4150 }, { "epoch": 0.62, "grad_norm": 1.1486760377883911, "learning_rate": 3.2783475034222007e-06, "loss": 0.3885, "step": 4151 }, { "epoch": 0.62, "grad_norm": 1.1091580390930176, "learning_rate": 3.276063767724885e-06, "loss": 0.4656, "step": 4152 }, { "epoch": 0.62, "grad_norm": 1.3121721744537354, "learning_rate": 3.273780440120773e-06, "loss": 0.4685, "step": 4153 }, { "epoch": 0.62, "grad_norm": 1.1016768217086792, "learning_rate": 3.2714975211503765e-06, "loss": 0.7243, "step": 4154 }, { "epoch": 0.62, "grad_norm": 1.3690907955169678, "learning_rate": 3.269215011354113e-06, "loss": 0.4837, "step": 4155 }, { "epoch": 0.62, "grad_norm": 1.2794692516326904, "learning_rate": 3.2669329112723025e-06, "loss": 0.4761, "step": 4156 }, { "epoch": 0.62, "grad_norm": 1.177574872970581, "learning_rate": 3.264651221445168e-06, "loss": 0.4693, "step": 4157 }, { "epoch": 0.62, "grad_norm": 1.3265407085418701, "learning_rate": 3.262369942412832e-06, "loss": 0.4411, "step": 4158 }, { "epoch": 0.62, "grad_norm": 2.0271427631378174, "learning_rate": 3.2600890747153264e-06, "loss": 0.4996, "step": 4159 }, { "epoch": 0.62, "grad_norm": 1.2384170293807983, "learning_rate": 3.257808618892581e-06, "loss": 0.468, "step": 4160 }, { "epoch": 0.63, "grad_norm": 1.3693101406097412, "learning_rate": 3.2555285754844313e-06, "loss": 0.5083, "step": 4161 }, { "epoch": 0.63, "grad_norm": 1.0791184902191162, "learning_rate": 3.2532489450306093e-06, "loss": 0.4465, "step": 4162 }, { "epoch": 0.63, "grad_norm": 1.2745674848556519, "learning_rate": 3.2509697280707554e-06, "loss": 0.4445, "step": 4163 }, { "epoch": 0.63, "grad_norm": 1.0671727657318115, "learning_rate": 3.2486909251444097e-06, "loss": 0.4558, "step": 4164 }, { "epoch": 0.63, "grad_norm": 1.2931402921676636, "learning_rate": 3.2464125367910137e-06, "loss": 0.4804, "step": 4165 }, { "epoch": 0.63, "grad_norm": 1.1181102991104126, "learning_rate": 3.2441345635499125e-06, "loss": 0.4688, "step": 4166 }, { "epoch": 0.63, "grad_norm": 1.120694875717163, "learning_rate": 3.2418570059603516e-06, "loss": 0.7203, "step": 4167 }, { "epoch": 0.63, "grad_norm": 1.0244213342666626, "learning_rate": 3.2395798645614775e-06, "loss": 0.6767, "step": 4168 }, { "epoch": 0.63, "grad_norm": 1.1293777227401733, "learning_rate": 3.2373031398923394e-06, "loss": 0.5484, "step": 4169 }, { "epoch": 0.63, "grad_norm": 1.5233601331710815, "learning_rate": 3.235026832491889e-06, "loss": 0.5499, "step": 4170 }, { "epoch": 0.63, "grad_norm": 0.9597512483596802, "learning_rate": 3.2327509428989755e-06, "loss": 0.4261, "step": 4171 }, { "epoch": 0.63, "grad_norm": 1.1787257194519043, "learning_rate": 3.230475471652354e-06, "loss": 0.7308, "step": 4172 }, { "epoch": 0.63, "grad_norm": 1.6041277647018433, "learning_rate": 3.2282004192906774e-06, "loss": 0.4538, "step": 4173 }, { "epoch": 0.63, "grad_norm": 1.3071924448013306, "learning_rate": 3.2259257863524983e-06, "loss": 0.516, "step": 4174 }, { "epoch": 0.63, "grad_norm": 1.1007215976715088, "learning_rate": 3.223651573376273e-06, "loss": 0.4808, "step": 4175 }, { "epoch": 0.63, "grad_norm": 1.139639139175415, "learning_rate": 3.2213777809003567e-06, "loss": 0.4874, "step": 4176 }, { "epoch": 0.63, "grad_norm": 1.2017183303833008, "learning_rate": 3.2191044094630064e-06, "loss": 0.4345, "step": 4177 }, { "epoch": 0.63, "grad_norm": 0.9593103528022766, "learning_rate": 3.2168314596023774e-06, "loss": 0.3868, "step": 4178 }, { "epoch": 0.63, "grad_norm": 1.3177942037582397, "learning_rate": 3.214558931856528e-06, "loss": 0.4377, "step": 4179 }, { "epoch": 0.63, "grad_norm": 1.098758339881897, "learning_rate": 3.212286826763412e-06, "loss": 0.4631, "step": 4180 }, { "epoch": 0.63, "grad_norm": 1.7599034309387207, "learning_rate": 3.21001514486089e-06, "loss": 0.4296, "step": 4181 }, { "epoch": 0.63, "grad_norm": 1.205664873123169, "learning_rate": 3.2077438866867163e-06, "loss": 0.49, "step": 4182 }, { "epoch": 0.63, "grad_norm": 1.2711565494537354, "learning_rate": 3.205473052778547e-06, "loss": 0.5107, "step": 4183 }, { "epoch": 0.63, "grad_norm": 1.0717415809631348, "learning_rate": 3.2032026436739393e-06, "loss": 0.4909, "step": 4184 }, { "epoch": 0.63, "grad_norm": 1.148633599281311, "learning_rate": 3.2009326599103467e-06, "loss": 0.478, "step": 4185 }, { "epoch": 0.63, "grad_norm": 1.136269211769104, "learning_rate": 3.1986631020251257e-06, "loss": 0.532, "step": 4186 }, { "epoch": 0.63, "grad_norm": 1.3343985080718994, "learning_rate": 3.1963939705555274e-06, "loss": 0.46, "step": 4187 }, { "epoch": 0.63, "grad_norm": 1.5305792093276978, "learning_rate": 3.194125266038706e-06, "loss": 0.4772, "step": 4188 }, { "epoch": 0.63, "grad_norm": 1.1033653020858765, "learning_rate": 3.1918569890117137e-06, "loss": 0.398, "step": 4189 }, { "epoch": 0.63, "grad_norm": 1.269152283668518, "learning_rate": 3.1895891400114997e-06, "loss": 0.4822, "step": 4190 }, { "epoch": 0.63, "grad_norm": 1.2628434896469116, "learning_rate": 3.1873217195749144e-06, "loss": 0.4573, "step": 4191 }, { "epoch": 0.63, "grad_norm": 1.8306210041046143, "learning_rate": 3.185054728238704e-06, "loss": 0.4465, "step": 4192 }, { "epoch": 0.63, "grad_norm": 1.174871802330017, "learning_rate": 3.1827881665395167e-06, "loss": 0.4439, "step": 4193 }, { "epoch": 0.63, "grad_norm": 1.552293062210083, "learning_rate": 3.180522035013896e-06, "loss": 0.4659, "step": 4194 }, { "epoch": 0.63, "grad_norm": 1.295082688331604, "learning_rate": 3.178256334198284e-06, "loss": 0.4341, "step": 4195 }, { "epoch": 0.63, "grad_norm": 1.3406304121017456, "learning_rate": 3.1759910646290227e-06, "loss": 0.4939, "step": 4196 }, { "epoch": 0.63, "grad_norm": 1.4305996894836426, "learning_rate": 3.1737262268423495e-06, "loss": 0.5117, "step": 4197 }, { "epoch": 0.63, "grad_norm": 1.1710951328277588, "learning_rate": 3.1714618213744025e-06, "loss": 0.7252, "step": 4198 }, { "epoch": 0.63, "grad_norm": 1.306310772895813, "learning_rate": 3.1691978487612127e-06, "loss": 0.5568, "step": 4199 }, { "epoch": 0.63, "grad_norm": 1.1800537109375, "learning_rate": 3.1669343095387138e-06, "loss": 0.5477, "step": 4200 }, { "epoch": 0.63, "grad_norm": 1.1271058320999146, "learning_rate": 3.164671204242734e-06, "loss": 0.4482, "step": 4201 }, { "epoch": 0.63, "grad_norm": 1.028165340423584, "learning_rate": 3.162408533409e-06, "loss": 0.708, "step": 4202 }, { "epoch": 0.63, "grad_norm": 1.0884407758712769, "learning_rate": 3.1601462975731336e-06, "loss": 0.4912, "step": 4203 }, { "epoch": 0.63, "grad_norm": 1.0936771631240845, "learning_rate": 3.157884497270658e-06, "loss": 0.4525, "step": 4204 }, { "epoch": 0.63, "grad_norm": 1.1014317274093628, "learning_rate": 3.155623133036989e-06, "loss": 0.6975, "step": 4205 }, { "epoch": 0.63, "grad_norm": 1.103590965270996, "learning_rate": 3.153362205407441e-06, "loss": 0.4535, "step": 4206 }, { "epoch": 0.63, "grad_norm": 1.4193551540374756, "learning_rate": 3.1511017149172247e-06, "loss": 0.4711, "step": 4207 }, { "epoch": 0.63, "grad_norm": 1.1436809301376343, "learning_rate": 3.1488416621014474e-06, "loss": 0.7078, "step": 4208 }, { "epoch": 0.63, "grad_norm": 1.293149709701538, "learning_rate": 3.146582047495112e-06, "loss": 0.4592, "step": 4209 }, { "epoch": 0.63, "grad_norm": 1.1240979433059692, "learning_rate": 3.144322871633121e-06, "loss": 0.3952, "step": 4210 }, { "epoch": 0.63, "grad_norm": 1.2994401454925537, "learning_rate": 3.1420641350502667e-06, "loss": 0.4376, "step": 4211 }, { "epoch": 0.63, "grad_norm": 1.0715842247009277, "learning_rate": 3.1398058382812435e-06, "loss": 0.5216, "step": 4212 }, { "epoch": 0.63, "grad_norm": 1.066828966140747, "learning_rate": 3.1375479818606387e-06, "loss": 0.4438, "step": 4213 }, { "epoch": 0.63, "grad_norm": 1.3278961181640625, "learning_rate": 3.135290566322934e-06, "loss": 0.4908, "step": 4214 }, { "epoch": 0.63, "grad_norm": 1.2020598649978638, "learning_rate": 3.1330335922025148e-06, "loss": 0.4276, "step": 4215 }, { "epoch": 0.63, "grad_norm": 1.5571986436843872, "learning_rate": 3.1307770600336496e-06, "loss": 0.5002, "step": 4216 }, { "epoch": 0.63, "grad_norm": 1.296955943107605, "learning_rate": 3.1285209703505126e-06, "loss": 0.5448, "step": 4217 }, { "epoch": 0.63, "grad_norm": 1.3728821277618408, "learning_rate": 3.126265323687167e-06, "loss": 0.4652, "step": 4218 }, { "epoch": 0.63, "grad_norm": 1.046671748161316, "learning_rate": 3.1240101205775748e-06, "loss": 0.4815, "step": 4219 }, { "epoch": 0.63, "grad_norm": 1.135014295578003, "learning_rate": 3.1217553615555906e-06, "loss": 0.4863, "step": 4220 }, { "epoch": 0.63, "grad_norm": 1.1618201732635498, "learning_rate": 3.119501047154966e-06, "loss": 0.524, "step": 4221 }, { "epoch": 0.63, "grad_norm": 1.2910598516464233, "learning_rate": 3.117247177909346e-06, "loss": 0.5278, "step": 4222 }, { "epoch": 0.63, "grad_norm": 1.5397508144378662, "learning_rate": 3.1149937543522702e-06, "loss": 0.4889, "step": 4223 }, { "epoch": 0.63, "grad_norm": 1.414420485496521, "learning_rate": 3.1127407770171726e-06, "loss": 0.4236, "step": 4224 }, { "epoch": 0.63, "grad_norm": 1.0627793073654175, "learning_rate": 3.11048824643738e-06, "loss": 0.4783, "step": 4225 }, { "epoch": 0.63, "grad_norm": 1.3977885246276855, "learning_rate": 3.10823616314612e-06, "loss": 0.5168, "step": 4226 }, { "epoch": 0.63, "grad_norm": 1.2551242113113403, "learning_rate": 3.1059845276765077e-06, "loss": 0.5435, "step": 4227 }, { "epoch": 0.64, "grad_norm": 1.2751466035842896, "learning_rate": 3.1037333405615535e-06, "loss": 0.5542, "step": 4228 }, { "epoch": 0.64, "grad_norm": 1.1057524681091309, "learning_rate": 3.101482602334162e-06, "loss": 0.5997, "step": 4229 }, { "epoch": 0.64, "grad_norm": 1.1933939456939697, "learning_rate": 3.099232313527133e-06, "loss": 0.4648, "step": 4230 }, { "epoch": 0.64, "grad_norm": 1.2310982942581177, "learning_rate": 3.096982474673158e-06, "loss": 0.4542, "step": 4231 }, { "epoch": 0.64, "grad_norm": 1.2274880409240723, "learning_rate": 3.094733086304824e-06, "loss": 0.4855, "step": 4232 }, { "epoch": 0.64, "grad_norm": 1.9544779062271118, "learning_rate": 3.092484148954609e-06, "loss": 0.4735, "step": 4233 }, { "epoch": 0.64, "grad_norm": 1.1469415426254272, "learning_rate": 3.090235663154885e-06, "loss": 0.4422, "step": 4234 }, { "epoch": 0.64, "grad_norm": 1.0784449577331543, "learning_rate": 3.0879876294379197e-06, "loss": 0.4602, "step": 4235 }, { "epoch": 0.64, "grad_norm": 1.0755500793457031, "learning_rate": 3.0857400483358676e-06, "loss": 0.4629, "step": 4236 }, { "epoch": 0.64, "grad_norm": 1.2463923692703247, "learning_rate": 3.083492920380784e-06, "loss": 0.6592, "step": 4237 }, { "epoch": 0.64, "grad_norm": 1.144013524055481, "learning_rate": 3.0812462461046116e-06, "loss": 0.5038, "step": 4238 }, { "epoch": 0.64, "grad_norm": 1.1738659143447876, "learning_rate": 3.0790000260391884e-06, "loss": 0.5529, "step": 4239 }, { "epoch": 0.64, "grad_norm": 1.0922801494598389, "learning_rate": 3.0767542607162405e-06, "loss": 0.4525, "step": 4240 }, { "epoch": 0.64, "grad_norm": 1.2859838008880615, "learning_rate": 3.074508950667391e-06, "loss": 0.5142, "step": 4241 }, { "epoch": 0.64, "grad_norm": 1.864901065826416, "learning_rate": 3.0722640964241534e-06, "loss": 0.5023, "step": 4242 }, { "epoch": 0.64, "grad_norm": 1.2572612762451172, "learning_rate": 3.0700196985179343e-06, "loss": 0.46, "step": 4243 }, { "epoch": 0.64, "grad_norm": 1.09493088722229, "learning_rate": 3.067775757480031e-06, "loss": 0.4511, "step": 4244 }, { "epoch": 0.64, "grad_norm": 1.0377367734909058, "learning_rate": 3.0655322738416325e-06, "loss": 0.358, "step": 4245 }, { "epoch": 0.64, "grad_norm": 1.162153959274292, "learning_rate": 3.063289248133822e-06, "loss": 0.6798, "step": 4246 }, { "epoch": 0.64, "grad_norm": 1.6267106533050537, "learning_rate": 3.0610466808875693e-06, "loss": 0.4563, "step": 4247 }, { "epoch": 0.64, "grad_norm": 1.1950501203536987, "learning_rate": 3.058804572633742e-06, "loss": 0.4957, "step": 4248 }, { "epoch": 0.64, "grad_norm": 1.5286623239517212, "learning_rate": 3.0565629239030946e-06, "loss": 0.4834, "step": 4249 }, { "epoch": 0.64, "grad_norm": 0.9686097502708435, "learning_rate": 3.0543217352262746e-06, "loss": 0.4386, "step": 4250 }, { "epoch": 0.64, "grad_norm": 1.4213485717773438, "learning_rate": 3.052081007133819e-06, "loss": 0.4501, "step": 4251 }, { "epoch": 0.64, "grad_norm": 1.3817428350448608, "learning_rate": 3.0498407401561586e-06, "loss": 0.4718, "step": 4252 }, { "epoch": 0.64, "grad_norm": 1.385147213935852, "learning_rate": 3.0476009348236117e-06, "loss": 0.4152, "step": 4253 }, { "epoch": 0.64, "grad_norm": 1.3299559354782104, "learning_rate": 3.0453615916663894e-06, "loss": 0.4627, "step": 4254 }, { "epoch": 0.64, "grad_norm": 1.016465187072754, "learning_rate": 3.0431227112145933e-06, "loss": 0.4805, "step": 4255 }, { "epoch": 0.64, "grad_norm": 1.6399199962615967, "learning_rate": 3.040884293998215e-06, "loss": 0.4054, "step": 4256 }, { "epoch": 0.64, "grad_norm": 1.054490566253662, "learning_rate": 3.0386463405471362e-06, "loss": 0.4361, "step": 4257 }, { "epoch": 0.64, "grad_norm": 1.1235274076461792, "learning_rate": 3.0364088513911283e-06, "loss": 0.4829, "step": 4258 }, { "epoch": 0.64, "grad_norm": 1.092665672302246, "learning_rate": 3.0341718270598557e-06, "loss": 0.512, "step": 4259 }, { "epoch": 0.64, "grad_norm": 1.1625473499298096, "learning_rate": 3.0319352680828696e-06, "loss": 0.4885, "step": 4260 }, { "epoch": 0.64, "grad_norm": 1.3811845779418945, "learning_rate": 3.0296991749896133e-06, "loss": 0.4575, "step": 4261 }, { "epoch": 0.64, "grad_norm": 1.3010365962982178, "learning_rate": 3.027463548309416e-06, "loss": 0.4904, "step": 4262 }, { "epoch": 0.64, "grad_norm": 1.4706380367279053, "learning_rate": 3.0252283885715024e-06, "loss": 0.4959, "step": 4263 }, { "epoch": 0.64, "grad_norm": 1.3928755521774292, "learning_rate": 3.0229936963049824e-06, "loss": 0.4657, "step": 4264 }, { "epoch": 0.64, "grad_norm": 1.387969732284546, "learning_rate": 3.0207594720388546e-06, "loss": 0.4672, "step": 4265 }, { "epoch": 0.64, "grad_norm": 1.2878376245498657, "learning_rate": 3.018525716302009e-06, "loss": 0.5284, "step": 4266 }, { "epoch": 0.64, "grad_norm": 1.231919765472412, "learning_rate": 3.0162924296232254e-06, "loss": 0.5271, "step": 4267 }, { "epoch": 0.64, "grad_norm": 1.1553974151611328, "learning_rate": 3.0140596125311707e-06, "loss": 0.4308, "step": 4268 }, { "epoch": 0.64, "grad_norm": 1.336836576461792, "learning_rate": 3.011827265554401e-06, "loss": 0.4435, "step": 4269 }, { "epoch": 0.64, "grad_norm": 1.1282306909561157, "learning_rate": 3.0095953892213615e-06, "loss": 0.5339, "step": 4270 }, { "epoch": 0.64, "grad_norm": 1.1024484634399414, "learning_rate": 3.0073639840603863e-06, "loss": 0.4086, "step": 4271 }, { "epoch": 0.64, "grad_norm": 1.4389541149139404, "learning_rate": 3.0051330505996973e-06, "loss": 0.5749, "step": 4272 }, { "epoch": 0.64, "grad_norm": 1.9937409162521362, "learning_rate": 3.002902589367405e-06, "loss": 0.4622, "step": 4273 }, { "epoch": 0.64, "grad_norm": 1.3017104864120483, "learning_rate": 3.0006726008915077e-06, "loss": 0.5086, "step": 4274 }, { "epoch": 0.64, "grad_norm": 1.111936330795288, "learning_rate": 2.9984430856998934e-06, "loss": 0.4967, "step": 4275 }, { "epoch": 0.64, "grad_norm": 1.7595922946929932, "learning_rate": 2.9962140443203363e-06, "loss": 0.5353, "step": 4276 }, { "epoch": 0.64, "grad_norm": 1.3905385732650757, "learning_rate": 2.9939854772804977e-06, "loss": 0.4603, "step": 4277 }, { "epoch": 0.64, "grad_norm": 1.2593666315078735, "learning_rate": 2.991757385107929e-06, "loss": 0.4742, "step": 4278 }, { "epoch": 0.64, "grad_norm": 1.0843298435211182, "learning_rate": 2.989529768330068e-06, "loss": 0.6977, "step": 4279 }, { "epoch": 0.64, "grad_norm": 1.2482932806015015, "learning_rate": 2.98730262747424e-06, "loss": 0.4295, "step": 4280 }, { "epoch": 0.64, "grad_norm": 1.2098227739334106, "learning_rate": 2.985075963067656e-06, "loss": 0.4863, "step": 4281 }, { "epoch": 0.64, "grad_norm": 1.428836703300476, "learning_rate": 2.9828497756374184e-06, "loss": 0.6144, "step": 4282 }, { "epoch": 0.64, "grad_norm": 1.4185266494750977, "learning_rate": 2.9806240657105144e-06, "loss": 0.569, "step": 4283 }, { "epoch": 0.64, "grad_norm": 1.6222589015960693, "learning_rate": 2.9783988338138157e-06, "loss": 0.5338, "step": 4284 }, { "epoch": 0.64, "grad_norm": 1.4907513856887817, "learning_rate": 2.9761740804740856e-06, "loss": 0.551, "step": 4285 }, { "epoch": 0.64, "grad_norm": 1.1559386253356934, "learning_rate": 2.9739498062179696e-06, "loss": 0.4267, "step": 4286 }, { "epoch": 0.64, "grad_norm": 1.0403388738632202, "learning_rate": 2.971726011572002e-06, "loss": 0.4613, "step": 4287 }, { "epoch": 0.64, "grad_norm": 1.3988362550735474, "learning_rate": 2.969502697062605e-06, "loss": 0.5032, "step": 4288 }, { "epoch": 0.64, "grad_norm": 1.1970796585083008, "learning_rate": 2.967279863216084e-06, "loss": 0.462, "step": 4289 }, { "epoch": 0.64, "grad_norm": 1.2645912170410156, "learning_rate": 2.9650575105586314e-06, "loss": 0.4046, "step": 4290 }, { "epoch": 0.64, "grad_norm": 1.247699499130249, "learning_rate": 2.9628356396163283e-06, "loss": 0.4786, "step": 4291 }, { "epoch": 0.64, "grad_norm": 1.2457770109176636, "learning_rate": 2.9606142509151364e-06, "loss": 0.529, "step": 4292 }, { "epoch": 0.64, "grad_norm": 1.389303207397461, "learning_rate": 2.9583933449809128e-06, "loss": 0.4621, "step": 4293 }, { "epoch": 0.65, "grad_norm": 1.3317210674285889, "learning_rate": 2.95617292233939e-06, "loss": 0.5397, "step": 4294 }, { "epoch": 0.65, "grad_norm": 1.21831476688385, "learning_rate": 2.95395298351619e-06, "loss": 0.4527, "step": 4295 }, { "epoch": 0.65, "grad_norm": 1.3685903549194336, "learning_rate": 2.9517335290368227e-06, "loss": 0.4705, "step": 4296 }, { "epoch": 0.65, "grad_norm": 1.3793829679489136, "learning_rate": 2.94951455942668e-06, "loss": 0.5702, "step": 4297 }, { "epoch": 0.65, "grad_norm": 1.2195662260055542, "learning_rate": 2.9472960752110404e-06, "loss": 0.46, "step": 4298 }, { "epoch": 0.65, "grad_norm": 1.3776253461837769, "learning_rate": 2.9450780769150675e-06, "loss": 0.4964, "step": 4299 }, { "epoch": 0.65, "grad_norm": 1.1256458759307861, "learning_rate": 2.94286056506381e-06, "loss": 0.7464, "step": 4300 }, { "epoch": 0.65, "grad_norm": 1.2781198024749756, "learning_rate": 2.940643540182202e-06, "loss": 0.5041, "step": 4301 }, { "epoch": 0.65, "grad_norm": 1.1652072668075562, "learning_rate": 2.9384270027950594e-06, "loss": 0.4858, "step": 4302 }, { "epoch": 0.65, "grad_norm": 1.0374200344085693, "learning_rate": 2.9362109534270844e-06, "loss": 0.4742, "step": 4303 }, { "epoch": 0.65, "grad_norm": 1.7010226249694824, "learning_rate": 2.9339953926028664e-06, "loss": 0.4477, "step": 4304 }, { "epoch": 0.65, "grad_norm": 3.4754645824432373, "learning_rate": 2.9317803208468777e-06, "loss": 0.429, "step": 4305 }, { "epoch": 0.65, "grad_norm": 1.4287759065628052, "learning_rate": 2.929565738683471e-06, "loss": 0.5257, "step": 4306 }, { "epoch": 0.65, "grad_norm": 1.6684770584106445, "learning_rate": 2.9273516466368877e-06, "loss": 0.493, "step": 4307 }, { "epoch": 0.65, "grad_norm": 1.4855554103851318, "learning_rate": 2.925138045231251e-06, "loss": 0.4392, "step": 4308 }, { "epoch": 0.65, "grad_norm": 1.2371604442596436, "learning_rate": 2.9229249349905686e-06, "loss": 0.6805, "step": 4309 }, { "epoch": 0.65, "grad_norm": 1.1810919046401978, "learning_rate": 2.9207123164387318e-06, "loss": 0.5548, "step": 4310 }, { "epoch": 0.65, "grad_norm": 1.3399872779846191, "learning_rate": 2.9185001900995168e-06, "loss": 0.4448, "step": 4311 }, { "epoch": 0.65, "grad_norm": 1.3611321449279785, "learning_rate": 2.916288556496579e-06, "loss": 0.425, "step": 4312 }, { "epoch": 0.65, "grad_norm": 1.050117015838623, "learning_rate": 2.9140774161534633e-06, "loss": 0.4937, "step": 4313 }, { "epoch": 0.65, "grad_norm": 1.2953977584838867, "learning_rate": 2.911866769593592e-06, "loss": 0.5104, "step": 4314 }, { "epoch": 0.65, "grad_norm": 1.237457036972046, "learning_rate": 2.909656617340275e-06, "loss": 0.7762, "step": 4315 }, { "epoch": 0.65, "grad_norm": 1.2500289678573608, "learning_rate": 2.9074469599167045e-06, "loss": 0.4972, "step": 4316 }, { "epoch": 0.65, "grad_norm": 1.1386971473693848, "learning_rate": 2.9052377978459512e-06, "loss": 0.5145, "step": 4317 }, { "epoch": 0.65, "grad_norm": 2.105661630630493, "learning_rate": 2.9030291316509756e-06, "loss": 0.5074, "step": 4318 }, { "epoch": 0.65, "grad_norm": 1.2458120584487915, "learning_rate": 2.9008209618546134e-06, "loss": 0.4374, "step": 4319 }, { "epoch": 0.65, "grad_norm": 1.4121533632278442, "learning_rate": 2.89861328897959e-06, "loss": 0.5083, "step": 4320 }, { "epoch": 0.65, "grad_norm": 1.8877933025360107, "learning_rate": 2.8964061135485076e-06, "loss": 0.5366, "step": 4321 }, { "epoch": 0.65, "grad_norm": 1.3019108772277832, "learning_rate": 2.894199436083851e-06, "loss": 0.4896, "step": 4322 }, { "epoch": 0.65, "grad_norm": 1.4432741403579712, "learning_rate": 2.8919932571079925e-06, "loss": 0.4716, "step": 4323 }, { "epoch": 0.65, "grad_norm": 1.1405900716781616, "learning_rate": 2.8897875771431778e-06, "loss": 0.5017, "step": 4324 }, { "epoch": 0.65, "grad_norm": 1.0690838098526, "learning_rate": 2.8875823967115426e-06, "loss": 0.4731, "step": 4325 }, { "epoch": 0.65, "grad_norm": 1.6502783298492432, "learning_rate": 2.8853777163351027e-06, "loss": 0.5136, "step": 4326 }, { "epoch": 0.65, "grad_norm": 1.2432841062545776, "learning_rate": 2.88317353653575e-06, "loss": 0.4772, "step": 4327 }, { "epoch": 0.65, "grad_norm": 1.2803298234939575, "learning_rate": 2.880969857835266e-06, "loss": 0.5227, "step": 4328 }, { "epoch": 0.65, "grad_norm": 1.6145129203796387, "learning_rate": 2.878766680755304e-06, "loss": 0.4354, "step": 4329 }, { "epoch": 0.65, "grad_norm": 1.177534580230713, "learning_rate": 2.8765640058174096e-06, "loss": 0.5125, "step": 4330 }, { "epoch": 0.65, "grad_norm": 1.4912227392196655, "learning_rate": 2.874361833542999e-06, "loss": 0.5359, "step": 4331 }, { "epoch": 0.65, "grad_norm": 1.3847614526748657, "learning_rate": 2.8721601644533787e-06, "loss": 0.5338, "step": 4332 }, { "epoch": 0.65, "grad_norm": 1.241571068763733, "learning_rate": 2.8699589990697274e-06, "loss": 0.5052, "step": 4333 }, { "epoch": 0.65, "grad_norm": 1.3024877309799194, "learning_rate": 2.8677583379131124e-06, "loss": 0.514, "step": 4334 }, { "epoch": 0.65, "grad_norm": 1.0328795909881592, "learning_rate": 2.8655581815044775e-06, "loss": 0.4145, "step": 4335 }, { "epoch": 0.65, "grad_norm": 0.9815438985824585, "learning_rate": 2.8633585303646414e-06, "loss": 0.5076, "step": 4336 }, { "epoch": 0.65, "grad_norm": 1.427781105041504, "learning_rate": 2.8611593850143197e-06, "loss": 0.4375, "step": 4337 }, { "epoch": 0.65, "grad_norm": 1.2854487895965576, "learning_rate": 2.8589607459740924e-06, "loss": 0.4607, "step": 4338 }, { "epoch": 0.65, "grad_norm": 1.2772749662399292, "learning_rate": 2.8567626137644232e-06, "loss": 0.4638, "step": 4339 }, { "epoch": 0.65, "grad_norm": 1.0269016027450562, "learning_rate": 2.854564988905663e-06, "loss": 0.4989, "step": 4340 }, { "epoch": 0.65, "grad_norm": 1.651062250137329, "learning_rate": 2.8523678719180325e-06, "loss": 0.4453, "step": 4341 }, { "epoch": 0.65, "grad_norm": 1.13922119140625, "learning_rate": 2.8501712633216403e-06, "loss": 0.4138, "step": 4342 }, { "epoch": 0.65, "grad_norm": 1.243543267250061, "learning_rate": 2.8479751636364693e-06, "loss": 0.409, "step": 4343 }, { "epoch": 0.65, "grad_norm": 1.1351475715637207, "learning_rate": 2.845779573382387e-06, "loss": 0.4892, "step": 4344 }, { "epoch": 0.65, "grad_norm": 1.6363146305084229, "learning_rate": 2.8435844930791325e-06, "loss": 0.5051, "step": 4345 }, { "epoch": 0.65, "grad_norm": 1.1675444841384888, "learning_rate": 2.841389923246335e-06, "loss": 0.4466, "step": 4346 }, { "epoch": 0.65, "grad_norm": 1.2183853387832642, "learning_rate": 2.839195864403491e-06, "loss": 0.4387, "step": 4347 }, { "epoch": 0.65, "grad_norm": 1.0989991426467896, "learning_rate": 2.8370023170699856e-06, "loss": 0.4219, "step": 4348 }, { "epoch": 0.65, "grad_norm": 1.1316732168197632, "learning_rate": 2.834809281765081e-06, "loss": 0.4999, "step": 4349 }, { "epoch": 0.65, "grad_norm": 0.9538774490356445, "learning_rate": 2.8326167590079123e-06, "loss": 0.5522, "step": 4350 }, { "epoch": 0.65, "grad_norm": 1.0305209159851074, "learning_rate": 2.830424749317498e-06, "loss": 0.4779, "step": 4351 }, { "epoch": 0.65, "grad_norm": 1.2298933267593384, "learning_rate": 2.8282332532127367e-06, "loss": 0.4593, "step": 4352 }, { "epoch": 0.65, "grad_norm": 1.146284818649292, "learning_rate": 2.8260422712124e-06, "loss": 0.4899, "step": 4353 }, { "epoch": 0.65, "grad_norm": 1.1725326776504517, "learning_rate": 2.8238518038351448e-06, "loss": 0.4939, "step": 4354 }, { "epoch": 0.65, "grad_norm": 1.0730575323104858, "learning_rate": 2.821661851599498e-06, "loss": 0.4614, "step": 4355 }, { "epoch": 0.65, "grad_norm": 1.3861353397369385, "learning_rate": 2.819472415023874e-06, "loss": 0.4523, "step": 4356 }, { "epoch": 0.65, "grad_norm": 1.2037205696105957, "learning_rate": 2.8172834946265547e-06, "loss": 0.4772, "step": 4357 }, { "epoch": 0.65, "grad_norm": 1.0792938470840454, "learning_rate": 2.81509509092571e-06, "loss": 0.5545, "step": 4358 }, { "epoch": 0.65, "grad_norm": 1.1097036600112915, "learning_rate": 2.8129072044393785e-06, "loss": 0.5019, "step": 4359 }, { "epoch": 0.65, "grad_norm": 1.500617265701294, "learning_rate": 2.8107198356854827e-06, "loss": 0.4782, "step": 4360 }, { "epoch": 0.66, "grad_norm": 1.1075726747512817, "learning_rate": 2.8085329851818215e-06, "loss": 0.484, "step": 4361 }, { "epoch": 0.66, "grad_norm": 1.5261400938034058, "learning_rate": 2.806346653446067e-06, "loss": 0.5261, "step": 4362 }, { "epoch": 0.66, "grad_norm": 1.7961992025375366, "learning_rate": 2.8041608409957753e-06, "loss": 0.4765, "step": 4363 }, { "epoch": 0.66, "grad_norm": 1.3696656227111816, "learning_rate": 2.8019755483483735e-06, "loss": 0.4789, "step": 4364 }, { "epoch": 0.66, "grad_norm": 1.6909167766571045, "learning_rate": 2.799790776021165e-06, "loss": 0.4513, "step": 4365 }, { "epoch": 0.66, "grad_norm": 1.3224612474441528, "learning_rate": 2.7976065245313388e-06, "loss": 0.7606, "step": 4366 }, { "epoch": 0.66, "grad_norm": 1.2582327127456665, "learning_rate": 2.79542279439595e-06, "loss": 0.5131, "step": 4367 }, { "epoch": 0.66, "grad_norm": 0.944237470626831, "learning_rate": 2.7932395861319385e-06, "loss": 0.4383, "step": 4368 }, { "epoch": 0.66, "grad_norm": 1.1584080457687378, "learning_rate": 2.7910569002561137e-06, "loss": 0.5274, "step": 4369 }, { "epoch": 0.66, "grad_norm": 1.3260369300842285, "learning_rate": 2.7888747372851666e-06, "loss": 0.5233, "step": 4370 }, { "epoch": 0.66, "grad_norm": 1.0793379545211792, "learning_rate": 2.7866930977356644e-06, "loss": 0.7113, "step": 4371 }, { "epoch": 0.66, "grad_norm": 1.8942729234695435, "learning_rate": 2.7845119821240453e-06, "loss": 0.4736, "step": 4372 }, { "epoch": 0.66, "grad_norm": 1.1258329153060913, "learning_rate": 2.7823313909666295e-06, "loss": 0.4314, "step": 4373 }, { "epoch": 0.66, "grad_norm": 1.1188169717788696, "learning_rate": 2.780151324779608e-06, "loss": 0.5066, "step": 4374 }, { "epoch": 0.66, "grad_norm": 1.386891484260559, "learning_rate": 2.7779717840790532e-06, "loss": 0.477, "step": 4375 }, { "epoch": 0.66, "grad_norm": 1.2304670810699463, "learning_rate": 2.7757927693809074e-06, "loss": 0.5187, "step": 4376 }, { "epoch": 0.66, "grad_norm": 1.1947076320648193, "learning_rate": 2.77361428120099e-06, "loss": 0.5132, "step": 4377 }, { "epoch": 0.66, "grad_norm": 1.3714967966079712, "learning_rate": 2.7714363200549986e-06, "loss": 0.4785, "step": 4378 }, { "epoch": 0.66, "grad_norm": 1.0661033391952515, "learning_rate": 2.769258886458501e-06, "loss": 0.4767, "step": 4379 }, { "epoch": 0.66, "grad_norm": 1.3623988628387451, "learning_rate": 2.767081980926948e-06, "loss": 0.526, "step": 4380 }, { "epoch": 0.66, "grad_norm": 1.0739184617996216, "learning_rate": 2.764905603975655e-06, "loss": 0.6062, "step": 4381 }, { "epoch": 0.66, "grad_norm": 1.1054322719573975, "learning_rate": 2.7627297561198206e-06, "loss": 0.4517, "step": 4382 }, { "epoch": 0.66, "grad_norm": 1.5669161081314087, "learning_rate": 2.7605544378745176e-06, "loss": 0.4119, "step": 4383 }, { "epoch": 0.66, "grad_norm": 1.1882458925247192, "learning_rate": 2.7583796497546855e-06, "loss": 0.5024, "step": 4384 }, { "epoch": 0.66, "grad_norm": 1.5631872415542603, "learning_rate": 2.75620539227515e-06, "loss": 0.4906, "step": 4385 }, { "epoch": 0.66, "grad_norm": 1.1725471019744873, "learning_rate": 2.7540316659506e-06, "loss": 0.4619, "step": 4386 }, { "epoch": 0.66, "grad_norm": 1.1428018808364868, "learning_rate": 2.7518584712956074e-06, "loss": 0.4924, "step": 4387 }, { "epoch": 0.66, "grad_norm": 1.2666014432907104, "learning_rate": 2.7496858088246144e-06, "loss": 0.4469, "step": 4388 }, { "epoch": 0.66, "grad_norm": 1.224772334098816, "learning_rate": 2.7475136790519334e-06, "loss": 0.4856, "step": 4389 }, { "epoch": 0.66, "grad_norm": 1.0979218482971191, "learning_rate": 2.74534208249176e-06, "loss": 0.4863, "step": 4390 }, { "epoch": 0.66, "grad_norm": 1.2831681966781616, "learning_rate": 2.743171019658154e-06, "loss": 0.4751, "step": 4391 }, { "epoch": 0.66, "grad_norm": 1.0731337070465088, "learning_rate": 2.741000491065056e-06, "loss": 0.5095, "step": 4392 }, { "epoch": 0.66, "grad_norm": 1.1334022283554077, "learning_rate": 2.7388304972262776e-06, "loss": 0.4689, "step": 4393 }, { "epoch": 0.66, "grad_norm": 1.2210475206375122, "learning_rate": 2.7366610386555005e-06, "loss": 0.4455, "step": 4394 }, { "epoch": 0.66, "grad_norm": 1.1216636896133423, "learning_rate": 2.734492115866287e-06, "loss": 0.499, "step": 4395 }, { "epoch": 0.66, "grad_norm": 1.6001759767532349, "learning_rate": 2.732323729372064e-06, "loss": 0.4915, "step": 4396 }, { "epoch": 0.66, "grad_norm": 1.2502681016921997, "learning_rate": 2.7301558796861393e-06, "loss": 0.5051, "step": 4397 }, { "epoch": 0.66, "grad_norm": 1.0631980895996094, "learning_rate": 2.7279885673216876e-06, "loss": 0.4526, "step": 4398 }, { "epoch": 0.66, "grad_norm": 1.1641143560409546, "learning_rate": 2.7258217927917607e-06, "loss": 0.5185, "step": 4399 }, { "epoch": 0.66, "grad_norm": 1.1054258346557617, "learning_rate": 2.7236555566092814e-06, "loss": 0.5211, "step": 4400 }, { "epoch": 0.66, "grad_norm": 1.09732985496521, "learning_rate": 2.721489859287041e-06, "loss": 0.4038, "step": 4401 }, { "epoch": 0.66, "grad_norm": 1.4220330715179443, "learning_rate": 2.719324701337713e-06, "loss": 0.5414, "step": 4402 }, { "epoch": 0.66, "grad_norm": 1.7818074226379395, "learning_rate": 2.71716008327383e-06, "loss": 0.5174, "step": 4403 }, { "epoch": 0.66, "grad_norm": 1.1221779584884644, "learning_rate": 2.7149960056078123e-06, "loss": 0.7155, "step": 4404 }, { "epoch": 0.66, "grad_norm": 1.4356663227081299, "learning_rate": 2.7128324688519404e-06, "loss": 0.4837, "step": 4405 }, { "epoch": 0.66, "grad_norm": 1.2198671102523804, "learning_rate": 2.710669473518369e-06, "loss": 0.4363, "step": 4406 }, { "epoch": 0.66, "grad_norm": 1.0920742750167847, "learning_rate": 2.7085070201191297e-06, "loss": 0.4192, "step": 4407 }, { "epoch": 0.66, "grad_norm": 1.3358449935913086, "learning_rate": 2.7063451091661187e-06, "loss": 0.4792, "step": 4408 }, { "epoch": 0.66, "grad_norm": 1.528233528137207, "learning_rate": 2.70418374117111e-06, "loss": 0.4866, "step": 4409 }, { "epoch": 0.66, "grad_norm": 1.1718482971191406, "learning_rate": 2.702022916645744e-06, "loss": 0.4695, "step": 4410 }, { "epoch": 0.66, "grad_norm": 1.1615207195281982, "learning_rate": 2.699862636101538e-06, "loss": 0.6692, "step": 4411 }, { "epoch": 0.66, "grad_norm": 1.111992597579956, "learning_rate": 2.6977029000498754e-06, "loss": 0.4458, "step": 4412 }, { "epoch": 0.66, "grad_norm": 1.5165174007415771, "learning_rate": 2.6955437090020108e-06, "loss": 0.4348, "step": 4413 }, { "epoch": 0.66, "grad_norm": 1.2378970384597778, "learning_rate": 2.6933850634690746e-06, "loss": 0.4524, "step": 4414 }, { "epoch": 0.66, "grad_norm": 1.1144176721572876, "learning_rate": 2.6912269639620637e-06, "loss": 0.4119, "step": 4415 }, { "epoch": 0.66, "grad_norm": 1.1413673162460327, "learning_rate": 2.68906941099185e-06, "loss": 0.5383, "step": 4416 }, { "epoch": 0.66, "grad_norm": 1.429449200630188, "learning_rate": 2.686912405069171e-06, "loss": 0.4639, "step": 4417 }, { "epoch": 0.66, "grad_norm": 1.1269845962524414, "learning_rate": 2.6847559467046346e-06, "loss": 0.4898, "step": 4418 }, { "epoch": 0.66, "grad_norm": 1.0366387367248535, "learning_rate": 2.682600036408727e-06, "loss": 0.554, "step": 4419 }, { "epoch": 0.66, "grad_norm": 1.3324127197265625, "learning_rate": 2.6804446746917924e-06, "loss": 0.4951, "step": 4420 }, { "epoch": 0.66, "grad_norm": 1.3374673128128052, "learning_rate": 2.678289862064058e-06, "loss": 0.4614, "step": 4421 }, { "epoch": 0.66, "grad_norm": 1.3322069644927979, "learning_rate": 2.67613559903561e-06, "loss": 0.4361, "step": 4422 }, { "epoch": 0.66, "grad_norm": 1.315969467163086, "learning_rate": 2.673981886116413e-06, "loss": 0.4818, "step": 4423 }, { "epoch": 0.66, "grad_norm": 1.1858863830566406, "learning_rate": 2.6718287238162963e-06, "loss": 0.661, "step": 4424 }, { "epoch": 0.66, "grad_norm": 1.1255645751953125, "learning_rate": 2.6696761126449557e-06, "loss": 0.4409, "step": 4425 }, { "epoch": 0.66, "grad_norm": 1.1325016021728516, "learning_rate": 2.6675240531119695e-06, "loss": 0.4377, "step": 4426 }, { "epoch": 0.67, "grad_norm": 1.2445045709609985, "learning_rate": 2.665372545726771e-06, "loss": 0.4852, "step": 4427 }, { "epoch": 0.67, "grad_norm": 3.4755325317382812, "learning_rate": 2.6632215909986725e-06, "loss": 0.4754, "step": 4428 }, { "epoch": 0.67, "grad_norm": 1.430940866470337, "learning_rate": 2.6610711894368503e-06, "loss": 0.479, "step": 4429 }, { "epoch": 0.67, "grad_norm": 1.1327118873596191, "learning_rate": 2.658921341550349e-06, "loss": 0.4319, "step": 4430 }, { "epoch": 0.67, "grad_norm": 1.1873689889907837, "learning_rate": 2.6567720478480876e-06, "loss": 0.4398, "step": 4431 }, { "epoch": 0.67, "grad_norm": 1.2421411275863647, "learning_rate": 2.6546233088388475e-06, "loss": 0.5502, "step": 4432 }, { "epoch": 0.67, "grad_norm": 1.1103107929229736, "learning_rate": 2.6524751250312863e-06, "loss": 0.4934, "step": 4433 }, { "epoch": 0.67, "grad_norm": 1.182761788368225, "learning_rate": 2.6503274969339204e-06, "loss": 0.489, "step": 4434 }, { "epoch": 0.67, "grad_norm": 1.2781084775924683, "learning_rate": 2.6481804250551456e-06, "loss": 0.4415, "step": 4435 }, { "epoch": 0.67, "grad_norm": 1.2689567804336548, "learning_rate": 2.6460339099032173e-06, "loss": 0.4818, "step": 4436 }, { "epoch": 0.67, "grad_norm": 1.2082895040512085, "learning_rate": 2.643887951986259e-06, "loss": 0.4827, "step": 4437 }, { "epoch": 0.67, "grad_norm": 1.1180554628372192, "learning_rate": 2.641742551812274e-06, "loss": 0.4715, "step": 4438 }, { "epoch": 0.67, "grad_norm": 1.2797601222991943, "learning_rate": 2.639597709889119e-06, "loss": 0.4791, "step": 4439 }, { "epoch": 0.67, "grad_norm": 1.4495826959609985, "learning_rate": 2.637453426724528e-06, "loss": 0.4877, "step": 4440 }, { "epoch": 0.67, "grad_norm": 1.0823191404342651, "learning_rate": 2.6353097028260975e-06, "loss": 0.4687, "step": 4441 }, { "epoch": 0.67, "grad_norm": 1.024369716644287, "learning_rate": 2.633166538701293e-06, "loss": 0.4884, "step": 4442 }, { "epoch": 0.67, "grad_norm": 1.084964394569397, "learning_rate": 2.6310239348574494e-06, "loss": 0.5093, "step": 4443 }, { "epoch": 0.67, "grad_norm": 1.260676622390747, "learning_rate": 2.6288818918017667e-06, "loss": 0.5171, "step": 4444 }, { "epoch": 0.67, "grad_norm": 1.1463185548782349, "learning_rate": 2.6267404100413153e-06, "loss": 0.4922, "step": 4445 }, { "epoch": 0.67, "grad_norm": 1.294204592704773, "learning_rate": 2.6245994900830265e-06, "loss": 0.5078, "step": 4446 }, { "epoch": 0.67, "grad_norm": 1.174198865890503, "learning_rate": 2.6224591324337074e-06, "loss": 0.4773, "step": 4447 }, { "epoch": 0.67, "grad_norm": 1.3419381380081177, "learning_rate": 2.6203193376000223e-06, "loss": 0.5447, "step": 4448 }, { "epoch": 0.67, "grad_norm": 1.3408390283584595, "learning_rate": 2.6181801060885105e-06, "loss": 0.5278, "step": 4449 }, { "epoch": 0.67, "grad_norm": 1.2337749004364014, "learning_rate": 2.6160414384055753e-06, "loss": 0.44, "step": 4450 }, { "epoch": 0.67, "grad_norm": 1.1671326160430908, "learning_rate": 2.6139033350574826e-06, "loss": 0.4295, "step": 4451 }, { "epoch": 0.67, "grad_norm": 1.174163818359375, "learning_rate": 2.6117657965503712e-06, "loss": 0.4239, "step": 4452 }, { "epoch": 0.67, "grad_norm": 1.26812744140625, "learning_rate": 2.609628823390242e-06, "loss": 0.4964, "step": 4453 }, { "epoch": 0.67, "grad_norm": 1.3181856870651245, "learning_rate": 2.6074924160829606e-06, "loss": 0.5524, "step": 4454 }, { "epoch": 0.67, "grad_norm": 1.3127559423446655, "learning_rate": 2.6053565751342647e-06, "loss": 0.5653, "step": 4455 }, { "epoch": 0.67, "grad_norm": 1.1104439496994019, "learning_rate": 2.6032213010497505e-06, "loss": 0.4776, "step": 4456 }, { "epoch": 0.67, "grad_norm": 1.171593427658081, "learning_rate": 2.601086594334888e-06, "loss": 0.4264, "step": 4457 }, { "epoch": 0.67, "grad_norm": 1.397636890411377, "learning_rate": 2.598952455495005e-06, "loss": 0.5107, "step": 4458 }, { "epoch": 0.67, "grad_norm": 1.2165426015853882, "learning_rate": 2.5968188850352993e-06, "loss": 0.4644, "step": 4459 }, { "epoch": 0.67, "grad_norm": 1.1400967836380005, "learning_rate": 2.594685883460837e-06, "loss": 0.7496, "step": 4460 }, { "epoch": 0.67, "grad_norm": 1.169296145439148, "learning_rate": 2.5925534512765416e-06, "loss": 0.4667, "step": 4461 }, { "epoch": 0.67, "grad_norm": 1.4805126190185547, "learning_rate": 2.59042158898721e-06, "loss": 0.5285, "step": 4462 }, { "epoch": 0.67, "grad_norm": 1.1365644931793213, "learning_rate": 2.5882902970974953e-06, "loss": 0.4513, "step": 4463 }, { "epoch": 0.67, "grad_norm": 1.0924394130706787, "learning_rate": 2.5861595761119263e-06, "loss": 0.4764, "step": 4464 }, { "epoch": 0.67, "grad_norm": 1.1970175504684448, "learning_rate": 2.584029426534888e-06, "loss": 0.4955, "step": 4465 }, { "epoch": 0.67, "grad_norm": 1.3345998525619507, "learning_rate": 2.5818998488706314e-06, "loss": 0.5098, "step": 4466 }, { "epoch": 0.67, "grad_norm": 1.069901943206787, "learning_rate": 2.5797708436232782e-06, "loss": 0.4568, "step": 4467 }, { "epoch": 0.67, "grad_norm": 1.240655541419983, "learning_rate": 2.577642411296806e-06, "loss": 0.5298, "step": 4468 }, { "epoch": 0.67, "grad_norm": 1.1069252490997314, "learning_rate": 2.5755145523950655e-06, "loss": 0.477, "step": 4469 }, { "epoch": 0.67, "grad_norm": 1.4988102912902832, "learning_rate": 2.5733872674217617e-06, "loss": 0.4423, "step": 4470 }, { "epoch": 0.67, "grad_norm": 1.0992438793182373, "learning_rate": 2.571260556880473e-06, "loss": 0.4279, "step": 4471 }, { "epoch": 0.67, "grad_norm": 1.092806339263916, "learning_rate": 2.5691344212746385e-06, "loss": 0.4766, "step": 4472 }, { "epoch": 0.67, "grad_norm": 1.4390416145324707, "learning_rate": 2.567008861107558e-06, "loss": 0.5118, "step": 4473 }, { "epoch": 0.67, "grad_norm": 1.396329402923584, "learning_rate": 2.564883876882401e-06, "loss": 0.4288, "step": 4474 }, { "epoch": 0.67, "grad_norm": 1.63970947265625, "learning_rate": 2.5627594691021938e-06, "loss": 0.4822, "step": 4475 }, { "epoch": 0.67, "grad_norm": 1.1133519411087036, "learning_rate": 2.5606356382698338e-06, "loss": 0.6723, "step": 4476 }, { "epoch": 0.67, "grad_norm": 1.084214687347412, "learning_rate": 2.5585123848880744e-06, "loss": 0.4621, "step": 4477 }, { "epoch": 0.67, "grad_norm": 1.411730408668518, "learning_rate": 2.556389709459539e-06, "loss": 0.4904, "step": 4478 }, { "epoch": 0.67, "grad_norm": 1.2579894065856934, "learning_rate": 2.5542676124867103e-06, "loss": 0.5293, "step": 4479 }, { "epoch": 0.67, "grad_norm": 1.3337349891662598, "learning_rate": 2.5521460944719323e-06, "loss": 0.5246, "step": 4480 }, { "epoch": 0.67, "grad_norm": 1.219702124595642, "learning_rate": 2.5500251559174155e-06, "loss": 0.5114, "step": 4481 }, { "epoch": 0.67, "grad_norm": 1.2530955076217651, "learning_rate": 2.5479047973252345e-06, "loss": 0.4313, "step": 4482 }, { "epoch": 0.67, "grad_norm": 1.9919633865356445, "learning_rate": 2.545785019197322e-06, "loss": 0.5303, "step": 4483 }, { "epoch": 0.67, "grad_norm": 1.1250789165496826, "learning_rate": 2.5436658220354778e-06, "loss": 0.4378, "step": 4484 }, { "epoch": 0.67, "grad_norm": 1.0301718711853027, "learning_rate": 2.541547206341358e-06, "loss": 0.4741, "step": 4485 }, { "epoch": 0.67, "grad_norm": 1.1107115745544434, "learning_rate": 2.5394291726164898e-06, "loss": 0.4216, "step": 4486 }, { "epoch": 0.67, "grad_norm": 1.2508615255355835, "learning_rate": 2.537311721362254e-06, "loss": 0.5541, "step": 4487 }, { "epoch": 0.67, "grad_norm": 1.3302743434906006, "learning_rate": 2.5351948530799004e-06, "loss": 0.4841, "step": 4488 }, { "epoch": 0.67, "grad_norm": 1.1754323244094849, "learning_rate": 2.5330785682705344e-06, "loss": 0.5323, "step": 4489 }, { "epoch": 0.67, "grad_norm": 1.6173943281173706, "learning_rate": 2.5309628674351306e-06, "loss": 0.4497, "step": 4490 }, { "epoch": 0.67, "grad_norm": 1.002780556678772, "learning_rate": 2.528847751074519e-06, "loss": 0.4931, "step": 4491 }, { "epoch": 0.67, "grad_norm": 1.1622984409332275, "learning_rate": 2.52673321968939e-06, "loss": 0.4074, "step": 4492 }, { "epoch": 0.67, "grad_norm": 1.164125680923462, "learning_rate": 2.524619273780308e-06, "loss": 0.4912, "step": 4493 }, { "epoch": 0.68, "grad_norm": 1.2061957120895386, "learning_rate": 2.5225059138476845e-06, "loss": 0.4274, "step": 4494 }, { "epoch": 0.68, "grad_norm": 2.1983964443206787, "learning_rate": 2.5203931403917957e-06, "loss": 0.4865, "step": 4495 }, { "epoch": 0.68, "grad_norm": 1.2377198934555054, "learning_rate": 2.518280953912787e-06, "loss": 0.7692, "step": 4496 }, { "epoch": 0.68, "grad_norm": 1.2041871547698975, "learning_rate": 2.516169354910653e-06, "loss": 0.431, "step": 4497 }, { "epoch": 0.68, "grad_norm": 1.1862761974334717, "learning_rate": 2.5140583438852595e-06, "loss": 0.5575, "step": 4498 }, { "epoch": 0.68, "grad_norm": 1.2143268585205078, "learning_rate": 2.511947921336325e-06, "loss": 0.4343, "step": 4499 }, { "epoch": 0.68, "grad_norm": 1.4053406715393066, "learning_rate": 2.5098380877634366e-06, "loss": 0.5301, "step": 4500 }, { "epoch": 0.68, "grad_norm": 1.1707050800323486, "learning_rate": 2.507728843666034e-06, "loss": 0.4987, "step": 4501 }, { "epoch": 0.68, "grad_norm": 1.1562824249267578, "learning_rate": 2.5056201895434242e-06, "loss": 0.5468, "step": 4502 }, { "epoch": 0.68, "grad_norm": 1.136733055114746, "learning_rate": 2.503512125894768e-06, "loss": 0.7239, "step": 4503 }, { "epoch": 0.68, "grad_norm": 1.3816301822662354, "learning_rate": 2.5014046532190916e-06, "loss": 0.4807, "step": 4504 }, { "epoch": 0.68, "grad_norm": 1.0648558139801025, "learning_rate": 2.499297772015282e-06, "loss": 0.7235, "step": 4505 }, { "epoch": 0.68, "grad_norm": 1.2665293216705322, "learning_rate": 2.49719148278208e-06, "loss": 0.509, "step": 4506 }, { "epoch": 0.68, "grad_norm": 1.1460983753204346, "learning_rate": 2.4950857860180943e-06, "loss": 0.5393, "step": 4507 }, { "epoch": 0.68, "grad_norm": 1.2337592840194702, "learning_rate": 2.492980682221786e-06, "loss": 0.4593, "step": 4508 }, { "epoch": 0.68, "grad_norm": 1.2374987602233887, "learning_rate": 2.4908761718914783e-06, "loss": 0.5276, "step": 4509 }, { "epoch": 0.68, "grad_norm": 1.2156091928482056, "learning_rate": 2.4887722555253574e-06, "loss": 0.5049, "step": 4510 }, { "epoch": 0.68, "grad_norm": 1.167044758796692, "learning_rate": 2.4866689336214623e-06, "loss": 0.4667, "step": 4511 }, { "epoch": 0.68, "grad_norm": 1.0633870363235474, "learning_rate": 2.4845662066777e-06, "loss": 0.5137, "step": 4512 }, { "epoch": 0.68, "grad_norm": 1.399523377418518, "learning_rate": 2.482464075191826e-06, "loss": 0.4909, "step": 4513 }, { "epoch": 0.68, "grad_norm": 1.029323935508728, "learning_rate": 2.4803625396614643e-06, "loss": 0.4595, "step": 4514 }, { "epoch": 0.68, "grad_norm": 1.0131299495697021, "learning_rate": 2.4782616005840954e-06, "loss": 0.4017, "step": 4515 }, { "epoch": 0.68, "grad_norm": 1.1188395023345947, "learning_rate": 2.4761612584570526e-06, "loss": 0.4363, "step": 4516 }, { "epoch": 0.68, "grad_norm": 1.1317822933197021, "learning_rate": 2.4740615137775375e-06, "loss": 0.4796, "step": 4517 }, { "epoch": 0.68, "grad_norm": 2.4839677810668945, "learning_rate": 2.471962367042601e-06, "loss": 0.5425, "step": 4518 }, { "epoch": 0.68, "grad_norm": 1.0781315565109253, "learning_rate": 2.46986381874916e-06, "loss": 0.5027, "step": 4519 }, { "epoch": 0.68, "grad_norm": 1.4309639930725098, "learning_rate": 2.4677658693939864e-06, "loss": 0.4317, "step": 4520 }, { "epoch": 0.68, "grad_norm": 4.478209018707275, "learning_rate": 2.4656685194737068e-06, "loss": 0.4688, "step": 4521 }, { "epoch": 0.68, "grad_norm": 1.1992387771606445, "learning_rate": 2.4635717694848145e-06, "loss": 0.6895, "step": 4522 }, { "epoch": 0.68, "grad_norm": 1.163556694984436, "learning_rate": 2.461475619923651e-06, "loss": 0.7259, "step": 4523 }, { "epoch": 0.68, "grad_norm": 1.3663398027420044, "learning_rate": 2.4593800712864254e-06, "loss": 0.4612, "step": 4524 }, { "epoch": 0.68, "grad_norm": 1.0881351232528687, "learning_rate": 2.4572851240691945e-06, "loss": 0.4902, "step": 4525 }, { "epoch": 0.68, "grad_norm": 0.9543591141700745, "learning_rate": 2.4551907787678807e-06, "loss": 0.5476, "step": 4526 }, { "epoch": 0.68, "grad_norm": 3.0198097229003906, "learning_rate": 2.453097035878263e-06, "loss": 0.5229, "step": 4527 }, { "epoch": 0.68, "grad_norm": 1.2076133489608765, "learning_rate": 2.451003895895972e-06, "loss": 0.5151, "step": 4528 }, { "epoch": 0.68, "grad_norm": 1.1211137771606445, "learning_rate": 2.448911359316503e-06, "loss": 0.4396, "step": 4529 }, { "epoch": 0.68, "grad_norm": 1.2243396043777466, "learning_rate": 2.4468194266352014e-06, "loss": 0.4899, "step": 4530 }, { "epoch": 0.68, "grad_norm": 1.0684202909469604, "learning_rate": 2.4447280983472773e-06, "loss": 0.4557, "step": 4531 }, { "epoch": 0.68, "grad_norm": 1.0011248588562012, "learning_rate": 2.4426373749477904e-06, "loss": 0.5012, "step": 4532 }, { "epoch": 0.68, "grad_norm": 1.7836642265319824, "learning_rate": 2.4405472569316597e-06, "loss": 0.5013, "step": 4533 }, { "epoch": 0.68, "grad_norm": 1.0713618993759155, "learning_rate": 2.438457744793665e-06, "loss": 0.4153, "step": 4534 }, { "epoch": 0.68, "grad_norm": 2.7535080909729004, "learning_rate": 2.436368839028435e-06, "loss": 0.4434, "step": 4535 }, { "epoch": 0.68, "grad_norm": 1.3453892469406128, "learning_rate": 2.434280540130464e-06, "loss": 0.53, "step": 4536 }, { "epoch": 0.68, "grad_norm": 1.1930243968963623, "learning_rate": 2.4321928485940937e-06, "loss": 0.4056, "step": 4537 }, { "epoch": 0.68, "grad_norm": 1.3182735443115234, "learning_rate": 2.4301057649135276e-06, "loss": 0.4292, "step": 4538 }, { "epoch": 0.68, "grad_norm": 1.6718130111694336, "learning_rate": 2.4280192895828254e-06, "loss": 0.5345, "step": 4539 }, { "epoch": 0.68, "grad_norm": 1.2998096942901611, "learning_rate": 2.4259334230958983e-06, "loss": 0.4391, "step": 4540 }, { "epoch": 0.68, "grad_norm": 1.432340383529663, "learning_rate": 2.423848165946519e-06, "loss": 0.4675, "step": 4541 }, { "epoch": 0.68, "grad_norm": 3.532484769821167, "learning_rate": 2.4217635186283103e-06, "loss": 0.512, "step": 4542 }, { "epoch": 0.68, "grad_norm": 1.2997404336929321, "learning_rate": 2.419679481634757e-06, "loss": 0.4879, "step": 4543 }, { "epoch": 0.68, "grad_norm": 2.072624683380127, "learning_rate": 2.4175960554591936e-06, "loss": 0.5794, "step": 4544 }, { "epoch": 0.68, "grad_norm": 1.2891952991485596, "learning_rate": 2.4155132405948117e-06, "loss": 0.4936, "step": 4545 }, { "epoch": 0.68, "grad_norm": 1.2014801502227783, "learning_rate": 2.413431037534662e-06, "loss": 0.5306, "step": 4546 }, { "epoch": 0.68, "grad_norm": 1.0859780311584473, "learning_rate": 2.411349446771643e-06, "loss": 0.4286, "step": 4547 }, { "epoch": 0.68, "grad_norm": 2.611074924468994, "learning_rate": 2.4092684687985157e-06, "loss": 0.4365, "step": 4548 }, { "epoch": 0.68, "grad_norm": 1.3071025609970093, "learning_rate": 2.407188104107893e-06, "loss": 0.4707, "step": 4549 }, { "epoch": 0.68, "grad_norm": 1.14737069606781, "learning_rate": 2.4051083531922402e-06, "loss": 0.4165, "step": 4550 }, { "epoch": 0.68, "grad_norm": 1.3672897815704346, "learning_rate": 2.403029216543883e-06, "loss": 0.4741, "step": 4551 }, { "epoch": 0.68, "grad_norm": 1.2727683782577515, "learning_rate": 2.4009506946549937e-06, "loss": 0.4206, "step": 4552 }, { "epoch": 0.68, "grad_norm": 1.089285135269165, "learning_rate": 2.398872788017608e-06, "loss": 0.4659, "step": 4553 }, { "epoch": 0.68, "grad_norm": 1.6022088527679443, "learning_rate": 2.396795497123608e-06, "loss": 0.4856, "step": 4554 }, { "epoch": 0.68, "grad_norm": 1.1792227029800415, "learning_rate": 2.3947188224647373e-06, "loss": 0.4007, "step": 4555 }, { "epoch": 0.68, "grad_norm": 1.1554450988769531, "learning_rate": 2.392642764532588e-06, "loss": 0.4602, "step": 4556 }, { "epoch": 0.68, "grad_norm": 1.2132693529129028, "learning_rate": 2.3905673238186054e-06, "loss": 0.7403, "step": 4557 }, { "epoch": 0.68, "grad_norm": 1.2478724718093872, "learning_rate": 2.388492500814097e-06, "loss": 0.541, "step": 4558 }, { "epoch": 0.68, "grad_norm": 1.0640368461608887, "learning_rate": 2.386418296010212e-06, "loss": 0.4831, "step": 4559 }, { "epoch": 0.68, "grad_norm": 1.3273688554763794, "learning_rate": 2.384344709897967e-06, "loss": 0.5264, "step": 4560 }, { "epoch": 0.69, "grad_norm": 1.4071879386901855, "learning_rate": 2.382271742968221e-06, "loss": 0.5027, "step": 4561 }, { "epoch": 0.69, "grad_norm": 1.8837318420410156, "learning_rate": 2.38019939571169e-06, "loss": 0.4889, "step": 4562 }, { "epoch": 0.69, "grad_norm": 1.269832730293274, "learning_rate": 2.3781276686189453e-06, "loss": 0.4734, "step": 4563 }, { "epoch": 0.69, "grad_norm": 1.2977657318115234, "learning_rate": 2.3760565621804076e-06, "loss": 0.4969, "step": 4564 }, { "epoch": 0.69, "grad_norm": 1.12651526927948, "learning_rate": 2.373986076886356e-06, "loss": 0.5536, "step": 4565 }, { "epoch": 0.69, "grad_norm": 1.314092993736267, "learning_rate": 2.371916213226916e-06, "loss": 0.522, "step": 4566 }, { "epoch": 0.69, "grad_norm": 0.9989048838615417, "learning_rate": 2.3698469716920725e-06, "loss": 0.7221, "step": 4567 }, { "epoch": 0.69, "grad_norm": 1.3975311517715454, "learning_rate": 2.3677783527716587e-06, "loss": 0.5411, "step": 4568 }, { "epoch": 0.69, "grad_norm": 1.3322374820709229, "learning_rate": 2.3657103569553595e-06, "loss": 0.4953, "step": 4569 }, { "epoch": 0.69, "grad_norm": 1.2989569902420044, "learning_rate": 2.3636429847327163e-06, "loss": 0.6931, "step": 4570 }, { "epoch": 0.69, "grad_norm": 2.115281820297241, "learning_rate": 2.361576236593121e-06, "loss": 0.5195, "step": 4571 }, { "epoch": 0.69, "grad_norm": 19.413938522338867, "learning_rate": 2.3595101130258203e-06, "loss": 0.4992, "step": 4572 }, { "epoch": 0.69, "grad_norm": 1.4231164455413818, "learning_rate": 2.3574446145199083e-06, "loss": 0.5016, "step": 4573 }, { "epoch": 0.69, "grad_norm": 1.2013226747512817, "learning_rate": 2.3553797415643315e-06, "loss": 0.4622, "step": 4574 }, { "epoch": 0.69, "grad_norm": 1.5455578565597534, "learning_rate": 2.3533154946478935e-06, "loss": 0.5505, "step": 4575 }, { "epoch": 0.69, "grad_norm": 1.118403434753418, "learning_rate": 2.3512518742592437e-06, "loss": 0.5125, "step": 4576 }, { "epoch": 0.69, "grad_norm": 1.7722214460372925, "learning_rate": 2.3491888808868894e-06, "loss": 0.3277, "step": 4577 }, { "epoch": 0.69, "grad_norm": 1.1262671947479248, "learning_rate": 2.347126515019182e-06, "loss": 0.4425, "step": 4578 }, { "epoch": 0.69, "grad_norm": 1.1805815696716309, "learning_rate": 2.3450647771443317e-06, "loss": 0.4895, "step": 4579 }, { "epoch": 0.69, "grad_norm": 1.349684238433838, "learning_rate": 2.343003667750396e-06, "loss": 0.457, "step": 4580 }, { "epoch": 0.69, "grad_norm": 1.0986727476119995, "learning_rate": 2.3409431873252797e-06, "loss": 0.4353, "step": 4581 }, { "epoch": 0.69, "grad_norm": 4.310070514678955, "learning_rate": 2.338883336356751e-06, "loss": 0.4488, "step": 4582 }, { "epoch": 0.69, "grad_norm": 1.5913963317871094, "learning_rate": 2.3368241153324157e-06, "loss": 0.5482, "step": 4583 }, { "epoch": 0.69, "grad_norm": 1.0154606103897095, "learning_rate": 2.33476552473974e-06, "loss": 0.3783, "step": 4584 }, { "epoch": 0.69, "grad_norm": 1.2265825271606445, "learning_rate": 2.332707565066036e-06, "loss": 0.4493, "step": 4585 }, { "epoch": 0.69, "grad_norm": 1.3515210151672363, "learning_rate": 2.330650236798464e-06, "loss": 0.493, "step": 4586 }, { "epoch": 0.69, "grad_norm": 2.723950147628784, "learning_rate": 2.3285935404240432e-06, "loss": 0.4948, "step": 4587 }, { "epoch": 0.69, "grad_norm": 1.1744364500045776, "learning_rate": 2.3265374764296343e-06, "loss": 0.4841, "step": 4588 }, { "epoch": 0.69, "grad_norm": 1.4553134441375732, "learning_rate": 2.3244820453019566e-06, "loss": 0.4949, "step": 4589 }, { "epoch": 0.69, "grad_norm": 1.8577526807785034, "learning_rate": 2.322427247527571e-06, "loss": 0.4817, "step": 4590 }, { "epoch": 0.69, "grad_norm": 1.4891034364700317, "learning_rate": 2.3203730835928974e-06, "loss": 0.4489, "step": 4591 }, { "epoch": 0.69, "grad_norm": 0.9873102307319641, "learning_rate": 2.3183195539841964e-06, "loss": 0.4276, "step": 4592 }, { "epoch": 0.69, "grad_norm": 1.3218863010406494, "learning_rate": 2.3162666591875852e-06, "loss": 0.4915, "step": 4593 }, { "epoch": 0.69, "grad_norm": 1.3254954814910889, "learning_rate": 2.314214399689031e-06, "loss": 0.4442, "step": 4594 }, { "epoch": 0.69, "grad_norm": 1.2175382375717163, "learning_rate": 2.3121627759743435e-06, "loss": 0.5291, "step": 4595 }, { "epoch": 0.69, "grad_norm": 1.306815266609192, "learning_rate": 2.310111788529192e-06, "loss": 0.5438, "step": 4596 }, { "epoch": 0.69, "grad_norm": 1.9429407119750977, "learning_rate": 2.308061437839087e-06, "loss": 0.4625, "step": 4597 }, { "epoch": 0.69, "grad_norm": 1.3904858827590942, "learning_rate": 2.306011724389389e-06, "loss": 0.4445, "step": 4598 }, { "epoch": 0.69, "grad_norm": 1.1572786569595337, "learning_rate": 2.3039626486653134e-06, "loss": 0.4351, "step": 4599 }, { "epoch": 0.69, "grad_norm": 0.9707667827606201, "learning_rate": 2.3019142111519182e-06, "loss": 0.4582, "step": 4600 }, { "epoch": 0.69, "grad_norm": 1.4729903936386108, "learning_rate": 2.2998664123341163e-06, "loss": 0.4755, "step": 4601 }, { "epoch": 0.69, "grad_norm": 1.08781099319458, "learning_rate": 2.297819252696662e-06, "loss": 0.5086, "step": 4602 }, { "epoch": 0.69, "grad_norm": 1.831851601600647, "learning_rate": 2.2957727327241673e-06, "loss": 0.4738, "step": 4603 }, { "epoch": 0.69, "grad_norm": 1.2914106845855713, "learning_rate": 2.2937268529010836e-06, "loss": 0.4257, "step": 4604 }, { "epoch": 0.69, "grad_norm": 1.2737462520599365, "learning_rate": 2.291681613711717e-06, "loss": 0.5639, "step": 4605 }, { "epoch": 0.69, "grad_norm": 1.2937034368515015, "learning_rate": 2.289637015640222e-06, "loss": 0.4734, "step": 4606 }, { "epoch": 0.69, "grad_norm": 1.5722649097442627, "learning_rate": 2.2875930591705957e-06, "loss": 0.5451, "step": 4607 }, { "epoch": 0.69, "grad_norm": 1.0378526449203491, "learning_rate": 2.2855497447866916e-06, "loss": 0.4919, "step": 4608 }, { "epoch": 0.69, "grad_norm": 1.1945708990097046, "learning_rate": 2.283507072972203e-06, "loss": 0.457, "step": 4609 }, { "epoch": 0.69, "grad_norm": 1.2313100099563599, "learning_rate": 2.2814650442106744e-06, "loss": 0.4597, "step": 4610 }, { "epoch": 0.69, "grad_norm": 1.3549264669418335, "learning_rate": 2.279423658985502e-06, "loss": 0.5275, "step": 4611 }, { "epoch": 0.69, "grad_norm": 1.1420097351074219, "learning_rate": 2.277382917779922e-06, "loss": 0.4444, "step": 4612 }, { "epoch": 0.69, "grad_norm": 1.2881519794464111, "learning_rate": 2.2753428210770256e-06, "loss": 0.5168, "step": 4613 }, { "epoch": 0.69, "grad_norm": 1.0519566535949707, "learning_rate": 2.2733033693597452e-06, "loss": 0.5473, "step": 4614 }, { "epoch": 0.69, "grad_norm": 1.0541282892227173, "learning_rate": 2.2712645631108644e-06, "loss": 0.4976, "step": 4615 }, { "epoch": 0.69, "grad_norm": 1.112925410270691, "learning_rate": 2.2692264028130158e-06, "loss": 0.5477, "step": 4616 }, { "epoch": 0.69, "grad_norm": 1.209088921546936, "learning_rate": 2.2671888889486714e-06, "loss": 0.4605, "step": 4617 }, { "epoch": 0.69, "grad_norm": 1.1109566688537598, "learning_rate": 2.2651520220001594e-06, "loss": 0.3909, "step": 4618 }, { "epoch": 0.69, "grad_norm": 1.3602445125579834, "learning_rate": 2.263115802449647e-06, "loss": 0.5189, "step": 4619 }, { "epoch": 0.69, "grad_norm": 0.9714940190315247, "learning_rate": 2.261080230779155e-06, "loss": 0.4507, "step": 4620 }, { "epoch": 0.69, "grad_norm": 1.1338040828704834, "learning_rate": 2.259045307470544e-06, "loss": 0.386, "step": 4621 }, { "epoch": 0.69, "grad_norm": 1.2064327001571655, "learning_rate": 2.2570110330055284e-06, "loss": 0.5348, "step": 4622 }, { "epoch": 0.69, "grad_norm": 1.097904086112976, "learning_rate": 2.254977407865663e-06, "loss": 0.454, "step": 4623 }, { "epoch": 0.69, "grad_norm": 1.5276329517364502, "learning_rate": 2.2529444325323495e-06, "loss": 0.4208, "step": 4624 }, { "epoch": 0.69, "grad_norm": 1.2882845401763916, "learning_rate": 2.2509121074868405e-06, "loss": 0.5554, "step": 4625 }, { "epoch": 0.69, "grad_norm": 1.0690333843231201, "learning_rate": 2.248880433210229e-06, "loss": 0.4117, "step": 4626 }, { "epoch": 0.7, "grad_norm": 1.2355719804763794, "learning_rate": 2.2468494101834577e-06, "loss": 0.5687, "step": 4627 }, { "epoch": 0.7, "grad_norm": 1.1330041885375977, "learning_rate": 2.244819038887315e-06, "loss": 0.4791, "step": 4628 }, { "epoch": 0.7, "grad_norm": 1.190897822380066, "learning_rate": 2.242789319802432e-06, "loss": 0.5366, "step": 4629 }, { "epoch": 0.7, "grad_norm": 1.163482427597046, "learning_rate": 2.2407602534092896e-06, "loss": 0.4598, "step": 4630 }, { "epoch": 0.7, "grad_norm": 1.2229737043380737, "learning_rate": 2.238731840188209e-06, "loss": 0.4005, "step": 4631 }, { "epoch": 0.7, "grad_norm": 2.1181914806365967, "learning_rate": 2.236704080619363e-06, "loss": 0.4648, "step": 4632 }, { "epoch": 0.7, "grad_norm": 1.177695631980896, "learning_rate": 2.234676975182763e-06, "loss": 0.5558, "step": 4633 }, { "epoch": 0.7, "grad_norm": 1.0820051431655884, "learning_rate": 2.232650524358272e-06, "loss": 0.4164, "step": 4634 }, { "epoch": 0.7, "grad_norm": 1.3153983354568481, "learning_rate": 2.2306247286255938e-06, "loss": 0.7586, "step": 4635 }, { "epoch": 0.7, "grad_norm": 1.4951857328414917, "learning_rate": 2.2285995884642765e-06, "loss": 0.5155, "step": 4636 }, { "epoch": 0.7, "grad_norm": 1.2969073057174683, "learning_rate": 2.2265751043537166e-06, "loss": 0.5287, "step": 4637 }, { "epoch": 0.7, "grad_norm": 2.2480390071868896, "learning_rate": 2.224551276773155e-06, "loss": 0.4532, "step": 4638 }, { "epoch": 0.7, "grad_norm": 1.209214448928833, "learning_rate": 2.222528106201672e-06, "loss": 0.4493, "step": 4639 }, { "epoch": 0.7, "grad_norm": 1.4975048303604126, "learning_rate": 2.2205055931182005e-06, "loss": 0.5202, "step": 4640 }, { "epoch": 0.7, "grad_norm": 1.2027925252914429, "learning_rate": 2.218483738001509e-06, "loss": 0.4024, "step": 4641 }, { "epoch": 0.7, "grad_norm": 2.013226270675659, "learning_rate": 2.2164625413302186e-06, "loss": 0.4849, "step": 4642 }, { "epoch": 0.7, "grad_norm": 1.3002341985702515, "learning_rate": 2.214442003582786e-06, "loss": 0.5284, "step": 4643 }, { "epoch": 0.7, "grad_norm": 1.1675161123275757, "learning_rate": 2.2124221252375215e-06, "loss": 0.4613, "step": 4644 }, { "epoch": 0.7, "grad_norm": 1.4733424186706543, "learning_rate": 2.21040290677257e-06, "loss": 0.4567, "step": 4645 }, { "epoch": 0.7, "grad_norm": 1.65140700340271, "learning_rate": 2.208384348665928e-06, "loss": 0.4157, "step": 4646 }, { "epoch": 0.7, "grad_norm": 1.2032607793807983, "learning_rate": 2.20636645139543e-06, "loss": 0.4577, "step": 4647 }, { "epoch": 0.7, "grad_norm": 1.0570966005325317, "learning_rate": 2.2043492154387518e-06, "loss": 0.4825, "step": 4648 }, { "epoch": 0.7, "grad_norm": 1.1618914604187012, "learning_rate": 2.2023326412734263e-06, "loss": 0.4587, "step": 4649 }, { "epoch": 0.7, "grad_norm": 1.5006239414215088, "learning_rate": 2.200316729376814e-06, "loss": 0.4363, "step": 4650 }, { "epoch": 0.7, "grad_norm": 1.1992019414901733, "learning_rate": 2.198301480226128e-06, "loss": 0.5252, "step": 4651 }, { "epoch": 0.7, "grad_norm": 1.3950185775756836, "learning_rate": 2.1962868942984213e-06, "loss": 0.4428, "step": 4652 }, { "epoch": 0.7, "grad_norm": 1.2559919357299805, "learning_rate": 2.1942729720705876e-06, "loss": 0.5092, "step": 4653 }, { "epoch": 0.7, "grad_norm": 1.8155651092529297, "learning_rate": 2.192259714019369e-06, "loss": 0.5012, "step": 4654 }, { "epoch": 0.7, "grad_norm": 1.1264067888259888, "learning_rate": 2.190247120621345e-06, "loss": 0.4987, "step": 4655 }, { "epoch": 0.7, "grad_norm": 1.5747599601745605, "learning_rate": 2.1882351923529432e-06, "loss": 0.474, "step": 4656 }, { "epoch": 0.7, "grad_norm": 1.2372452020645142, "learning_rate": 2.1862239296904274e-06, "loss": 0.7813, "step": 4657 }, { "epoch": 0.7, "grad_norm": 1.561988353729248, "learning_rate": 2.184213333109911e-06, "loss": 0.4068, "step": 4658 }, { "epoch": 0.7, "grad_norm": 1.2543989419937134, "learning_rate": 2.1822034030873424e-06, "loss": 0.5144, "step": 4659 }, { "epoch": 0.7, "grad_norm": 1.1995855569839478, "learning_rate": 2.1801941400985165e-06, "loss": 0.4845, "step": 4660 }, { "epoch": 0.7, "grad_norm": 2.310561180114746, "learning_rate": 2.178185544619073e-06, "loss": 0.5017, "step": 4661 }, { "epoch": 0.7, "grad_norm": 1.521061897277832, "learning_rate": 2.176177617124485e-06, "loss": 0.4747, "step": 4662 }, { "epoch": 0.7, "grad_norm": 1.1617506742477417, "learning_rate": 2.1741703580900774e-06, "loss": 0.4971, "step": 4663 }, { "epoch": 0.7, "grad_norm": 1.41513991355896, "learning_rate": 2.1721637679910096e-06, "loss": 0.5085, "step": 4664 }, { "epoch": 0.7, "grad_norm": 1.1721171140670776, "learning_rate": 2.1701578473022833e-06, "loss": 0.4624, "step": 4665 }, { "epoch": 0.7, "grad_norm": 1.8693206310272217, "learning_rate": 2.1681525964987477e-06, "loss": 0.4879, "step": 4666 }, { "epoch": 0.7, "grad_norm": 1.4056673049926758, "learning_rate": 2.166148016055085e-06, "loss": 0.4208, "step": 4667 }, { "epoch": 0.7, "grad_norm": 1.1070432662963867, "learning_rate": 2.1641441064458276e-06, "loss": 0.4519, "step": 4668 }, { "epoch": 0.7, "grad_norm": 1.1863269805908203, "learning_rate": 2.1621408681453395e-06, "loss": 0.5622, "step": 4669 }, { "epoch": 0.7, "grad_norm": 1.169938087463379, "learning_rate": 2.1601383016278333e-06, "loss": 0.4858, "step": 4670 }, { "epoch": 0.7, "grad_norm": 1.1228634119033813, "learning_rate": 2.158136407367362e-06, "loss": 0.6874, "step": 4671 }, { "epoch": 0.7, "grad_norm": 1.1891838312149048, "learning_rate": 2.156135185837814e-06, "loss": 0.4521, "step": 4672 }, { "epoch": 0.7, "grad_norm": 1.3369313478469849, "learning_rate": 2.154134637512925e-06, "loss": 0.4444, "step": 4673 }, { "epoch": 0.7, "grad_norm": 1.1019717454910278, "learning_rate": 2.1521347628662655e-06, "loss": 0.6657, "step": 4674 }, { "epoch": 0.7, "grad_norm": 1.1553003787994385, "learning_rate": 2.150135562371252e-06, "loss": 0.4445, "step": 4675 }, { "epoch": 0.7, "grad_norm": 1.3439778089523315, "learning_rate": 2.1481370365011376e-06, "loss": 0.4103, "step": 4676 }, { "epoch": 0.7, "grad_norm": 1.234379529953003, "learning_rate": 2.146139185729015e-06, "loss": 0.475, "step": 4677 }, { "epoch": 0.7, "grad_norm": 1.2516276836395264, "learning_rate": 2.1441420105278223e-06, "loss": 0.443, "step": 4678 }, { "epoch": 0.7, "grad_norm": 1.2507092952728271, "learning_rate": 2.14214551137033e-06, "loss": 0.4826, "step": 4679 }, { "epoch": 0.7, "grad_norm": 1.2427802085876465, "learning_rate": 2.140149688729158e-06, "loss": 0.5236, "step": 4680 }, { "epoch": 0.7, "grad_norm": 1.3469290733337402, "learning_rate": 2.1381545430767565e-06, "loss": 0.4627, "step": 4681 }, { "epoch": 0.7, "grad_norm": 1.5266389846801758, "learning_rate": 2.1361600748854215e-06, "loss": 0.571, "step": 4682 }, { "epoch": 0.7, "grad_norm": 1.1374000310897827, "learning_rate": 2.1341662846272892e-06, "loss": 0.5168, "step": 4683 }, { "epoch": 0.7, "grad_norm": 1.1713087558746338, "learning_rate": 2.1321731727743293e-06, "loss": 0.4274, "step": 4684 }, { "epoch": 0.7, "grad_norm": 1.3883689641952515, "learning_rate": 2.1301807397983587e-06, "loss": 0.4326, "step": 4685 }, { "epoch": 0.7, "grad_norm": 1.1215664148330688, "learning_rate": 2.128188986171026e-06, "loss": 0.443, "step": 4686 }, { "epoch": 0.7, "grad_norm": 1.1160283088684082, "learning_rate": 2.1261979123638253e-06, "loss": 0.465, "step": 4687 }, { "epoch": 0.7, "grad_norm": 1.3646528720855713, "learning_rate": 2.124207518848086e-06, "loss": 0.4737, "step": 4688 }, { "epoch": 0.7, "grad_norm": 1.122957706451416, "learning_rate": 2.122217806094976e-06, "loss": 0.4357, "step": 4689 }, { "epoch": 0.7, "grad_norm": 1.1916556358337402, "learning_rate": 2.120228774575507e-06, "loss": 0.5107, "step": 4690 }, { "epoch": 0.7, "grad_norm": 1.1625488996505737, "learning_rate": 2.1182404247605217e-06, "loss": 0.4185, "step": 4691 }, { "epoch": 0.7, "grad_norm": 1.1169240474700928, "learning_rate": 2.11625275712071e-06, "loss": 0.4006, "step": 4692 }, { "epoch": 0.7, "grad_norm": 0.9134297370910645, "learning_rate": 2.1142657721265926e-06, "loss": 0.4129, "step": 4693 }, { "epoch": 0.71, "grad_norm": 1.0831823348999023, "learning_rate": 2.1122794702485334e-06, "loss": 0.4275, "step": 4694 }, { "epoch": 0.71, "grad_norm": 1.3640660047531128, "learning_rate": 2.110293851956735e-06, "loss": 0.4892, "step": 4695 }, { "epoch": 0.71, "grad_norm": 1.1314868927001953, "learning_rate": 2.108308917721234e-06, "loss": 0.5087, "step": 4696 }, { "epoch": 0.71, "grad_norm": 1.2271218299865723, "learning_rate": 2.1063246680119095e-06, "loss": 0.4536, "step": 4697 }, { "epoch": 0.71, "grad_norm": 1.1407561302185059, "learning_rate": 2.104341103298474e-06, "loss": 0.5175, "step": 4698 }, { "epoch": 0.71, "grad_norm": 1.1657683849334717, "learning_rate": 2.1023582240504836e-06, "loss": 0.454, "step": 4699 }, { "epoch": 0.71, "grad_norm": 1.8028119802474976, "learning_rate": 2.1003760307373273e-06, "loss": 0.4832, "step": 4700 }, { "epoch": 0.71, "grad_norm": 5.016295433044434, "learning_rate": 2.0983945238282316e-06, "loss": 0.5141, "step": 4701 }, { "epoch": 0.71, "grad_norm": 1.1341769695281982, "learning_rate": 2.096413703792266e-06, "loss": 0.4189, "step": 4702 }, { "epoch": 0.71, "grad_norm": 1.1882588863372803, "learning_rate": 2.09443357109833e-06, "loss": 0.5149, "step": 4703 }, { "epoch": 0.71, "grad_norm": 1.3597347736358643, "learning_rate": 2.0924541262151655e-06, "loss": 0.5344, "step": 4704 }, { "epoch": 0.71, "grad_norm": 1.1478551626205444, "learning_rate": 2.0904753696113524e-06, "loss": 0.7551, "step": 4705 }, { "epoch": 0.71, "grad_norm": 1.1705619096755981, "learning_rate": 2.0884973017553012e-06, "loss": 0.4628, "step": 4706 }, { "epoch": 0.71, "grad_norm": 1.3585331439971924, "learning_rate": 2.086519923115268e-06, "loss": 0.553, "step": 4707 }, { "epoch": 0.71, "grad_norm": 1.2325140237808228, "learning_rate": 2.0845432341593375e-06, "loss": 0.4498, "step": 4708 }, { "epoch": 0.71, "grad_norm": 1.1671494245529175, "learning_rate": 2.0825672353554383e-06, "loss": 0.4712, "step": 4709 }, { "epoch": 0.71, "grad_norm": 1.1615618467330933, "learning_rate": 2.0805919271713286e-06, "loss": 0.4507, "step": 4710 }, { "epoch": 0.71, "grad_norm": 1.075870156288147, "learning_rate": 2.078617310074611e-06, "loss": 0.5276, "step": 4711 }, { "epoch": 0.71, "grad_norm": 1.305402159690857, "learning_rate": 2.0766433845327185e-06, "loss": 0.5158, "step": 4712 }, { "epoch": 0.71, "grad_norm": 1.1682208776474, "learning_rate": 2.07467015101292e-06, "loss": 0.4676, "step": 4713 }, { "epoch": 0.71, "grad_norm": 1.1920651197433472, "learning_rate": 2.072697609982326e-06, "loss": 0.4691, "step": 4714 }, { "epoch": 0.71, "grad_norm": 1.5446168184280396, "learning_rate": 2.0707257619078746e-06, "loss": 0.4399, "step": 4715 }, { "epoch": 0.71, "grad_norm": 1.1565879583358765, "learning_rate": 2.0687546072563534e-06, "loss": 0.4477, "step": 4716 }, { "epoch": 0.71, "grad_norm": 1.1200799942016602, "learning_rate": 2.0667841464943735e-06, "loss": 0.498, "step": 4717 }, { "epoch": 0.71, "grad_norm": 1.2714626789093018, "learning_rate": 2.064814380088383e-06, "loss": 0.4771, "step": 4718 }, { "epoch": 0.71, "grad_norm": 1.5849595069885254, "learning_rate": 2.0628453085046737e-06, "loss": 0.5118, "step": 4719 }, { "epoch": 0.71, "grad_norm": 1.4168821573257446, "learning_rate": 2.0608769322093626e-06, "loss": 0.4644, "step": 4720 }, { "epoch": 0.71, "grad_norm": 1.080635666847229, "learning_rate": 2.0589092516684116e-06, "loss": 0.3905, "step": 4721 }, { "epoch": 0.71, "grad_norm": 1.433009386062622, "learning_rate": 2.05694226734761e-06, "loss": 0.4326, "step": 4722 }, { "epoch": 0.71, "grad_norm": 1.1848195791244507, "learning_rate": 2.054975979712588e-06, "loss": 0.5194, "step": 4723 }, { "epoch": 0.71, "grad_norm": 1.594752550125122, "learning_rate": 2.053010389228809e-06, "loss": 0.5155, "step": 4724 }, { "epoch": 0.71, "grad_norm": 1.2151983976364136, "learning_rate": 2.0510454963615672e-06, "loss": 0.5447, "step": 4725 }, { "epoch": 0.71, "grad_norm": 1.4308414459228516, "learning_rate": 2.049081301575999e-06, "loss": 0.4262, "step": 4726 }, { "epoch": 0.71, "grad_norm": 1.205617904663086, "learning_rate": 2.047117805337071e-06, "loss": 0.4897, "step": 4727 }, { "epoch": 0.71, "grad_norm": 1.1787652969360352, "learning_rate": 2.045155008109587e-06, "loss": 0.4994, "step": 4728 }, { "epoch": 0.71, "grad_norm": 1.6606736183166504, "learning_rate": 2.0431929103581832e-06, "loss": 0.5544, "step": 4729 }, { "epoch": 0.71, "grad_norm": 1.0889384746551514, "learning_rate": 2.041231512547328e-06, "loss": 0.6949, "step": 4730 }, { "epoch": 0.71, "grad_norm": 1.2304649353027344, "learning_rate": 2.0392708151413302e-06, "loss": 0.5017, "step": 4731 }, { "epoch": 0.71, "grad_norm": 1.2303887605667114, "learning_rate": 2.037310818604328e-06, "loss": 0.4619, "step": 4732 }, { "epoch": 0.71, "grad_norm": 1.3241920471191406, "learning_rate": 2.0353515234002964e-06, "loss": 0.49, "step": 4733 }, { "epoch": 0.71, "grad_norm": 1.4075112342834473, "learning_rate": 2.0333929299930406e-06, "loss": 0.4559, "step": 4734 }, { "epoch": 0.71, "grad_norm": 1.4750452041625977, "learning_rate": 2.031435038846206e-06, "loss": 0.438, "step": 4735 }, { "epoch": 0.71, "grad_norm": 1.1622450351715088, "learning_rate": 2.0294778504232638e-06, "loss": 0.4955, "step": 4736 }, { "epoch": 0.71, "grad_norm": 1.4220515489578247, "learning_rate": 2.027521365187526e-06, "loss": 0.5276, "step": 4737 }, { "epoch": 0.71, "grad_norm": 1.2884297370910645, "learning_rate": 2.0255655836021353e-06, "loss": 0.3844, "step": 4738 }, { "epoch": 0.71, "grad_norm": 1.2508748769760132, "learning_rate": 2.023610506130066e-06, "loss": 0.494, "step": 4739 }, { "epoch": 0.71, "grad_norm": 1.3375860452651978, "learning_rate": 2.02165613323413e-06, "loss": 0.5005, "step": 4740 }, { "epoch": 0.71, "grad_norm": 1.2492923736572266, "learning_rate": 2.019702465376968e-06, "loss": 0.4968, "step": 4741 }, { "epoch": 0.71, "grad_norm": 1.2031474113464355, "learning_rate": 2.0177495030210546e-06, "loss": 0.4762, "step": 4742 }, { "epoch": 0.71, "grad_norm": 1.2566853761672974, "learning_rate": 2.015797246628702e-06, "loss": 0.4632, "step": 4743 }, { "epoch": 0.71, "grad_norm": 1.423658013343811, "learning_rate": 2.0138456966620467e-06, "loss": 0.514, "step": 4744 }, { "epoch": 0.71, "grad_norm": 2.765317440032959, "learning_rate": 2.0118948535830683e-06, "loss": 0.4782, "step": 4745 }, { "epoch": 0.71, "grad_norm": 1.1975903511047363, "learning_rate": 2.00994471785357e-06, "loss": 0.4729, "step": 4746 }, { "epoch": 0.71, "grad_norm": 1.3081650733947754, "learning_rate": 2.007995289935194e-06, "loss": 0.5103, "step": 4747 }, { "epoch": 0.71, "grad_norm": 1.0716485977172852, "learning_rate": 2.0060465702894092e-06, "loss": 0.4939, "step": 4748 }, { "epoch": 0.71, "grad_norm": 1.1829921007156372, "learning_rate": 2.004098559377522e-06, "loss": 0.4514, "step": 4749 }, { "epoch": 0.71, "grad_norm": 1.2612953186035156, "learning_rate": 2.00215125766067e-06, "loss": 0.5564, "step": 4750 }, { "epoch": 0.71, "grad_norm": 1.0452473163604736, "learning_rate": 2.000204665599819e-06, "loss": 0.4587, "step": 4751 }, { "epoch": 0.71, "grad_norm": 1.5004791021347046, "learning_rate": 1.998258783655774e-06, "loss": 0.483, "step": 4752 }, { "epoch": 0.71, "grad_norm": 1.350020408630371, "learning_rate": 1.9963136122891636e-06, "loss": 0.5299, "step": 4753 }, { "epoch": 0.71, "grad_norm": 1.4525090456008911, "learning_rate": 1.9943691519604523e-06, "loss": 0.4776, "step": 4754 }, { "epoch": 0.71, "grad_norm": 1.1500145196914673, "learning_rate": 1.9924254031299388e-06, "loss": 0.4545, "step": 4755 }, { "epoch": 0.71, "grad_norm": 1.556917667388916, "learning_rate": 1.990482366257747e-06, "loss": 0.4849, "step": 4756 }, { "epoch": 0.71, "grad_norm": 0.9859728813171387, "learning_rate": 1.988540041803841e-06, "loss": 0.3591, "step": 4757 }, { "epoch": 0.71, "grad_norm": 1.4108054637908936, "learning_rate": 1.9865984302280062e-06, "loss": 0.4885, "step": 4758 }, { "epoch": 0.71, "grad_norm": 1.8155641555786133, "learning_rate": 1.984657531989866e-06, "loss": 0.3796, "step": 4759 }, { "epoch": 0.72, "grad_norm": 1.1310828924179077, "learning_rate": 1.982717347548876e-06, "loss": 0.4186, "step": 4760 }, { "epoch": 0.72, "grad_norm": 1.2360540628433228, "learning_rate": 1.980777877364316e-06, "loss": 0.5217, "step": 4761 }, { "epoch": 0.72, "grad_norm": 2.281724214553833, "learning_rate": 1.978839121895304e-06, "loss": 0.5055, "step": 4762 }, { "epoch": 0.72, "grad_norm": 1.155362606048584, "learning_rate": 1.9769010816007828e-06, "loss": 0.4743, "step": 4763 }, { "epoch": 0.72, "grad_norm": 1.090556263923645, "learning_rate": 1.974963756939532e-06, "loss": 0.4486, "step": 4764 }, { "epoch": 0.72, "grad_norm": 1.1051994562149048, "learning_rate": 1.973027148370154e-06, "loss": 0.4203, "step": 4765 }, { "epoch": 0.72, "grad_norm": 1.4186248779296875, "learning_rate": 1.9710912563510915e-06, "loss": 0.4593, "step": 4766 }, { "epoch": 0.72, "grad_norm": 1.2163641452789307, "learning_rate": 1.9691560813406084e-06, "loss": 0.5498, "step": 4767 }, { "epoch": 0.72, "grad_norm": 1.2407538890838623, "learning_rate": 1.967221623796803e-06, "loss": 0.5136, "step": 4768 }, { "epoch": 0.72, "grad_norm": 1.4766398668289185, "learning_rate": 1.9652878841776056e-06, "loss": 0.5112, "step": 4769 }, { "epoch": 0.72, "grad_norm": 1.621812343597412, "learning_rate": 1.963354862940771e-06, "loss": 0.4732, "step": 4770 }, { "epoch": 0.72, "grad_norm": 1.2277134656906128, "learning_rate": 1.96142256054389e-06, "loss": 0.497, "step": 4771 }, { "epoch": 0.72, "grad_norm": 1.1553936004638672, "learning_rate": 1.9594909774443817e-06, "loss": 0.5249, "step": 4772 }, { "epoch": 0.72, "grad_norm": 1.5598292350769043, "learning_rate": 1.9575601140994904e-06, "loss": 0.4351, "step": 4773 }, { "epoch": 0.72, "grad_norm": 1.475538969039917, "learning_rate": 1.955629970966297e-06, "loss": 0.4228, "step": 4774 }, { "epoch": 0.72, "grad_norm": 1.1859060525894165, "learning_rate": 1.9537005485017047e-06, "loss": 0.5115, "step": 4775 }, { "epoch": 0.72, "grad_norm": 1.2574831247329712, "learning_rate": 1.9517718471624534e-06, "loss": 0.4353, "step": 4776 }, { "epoch": 0.72, "grad_norm": 1.329885482788086, "learning_rate": 1.9498438674051057e-06, "loss": 0.5898, "step": 4777 }, { "epoch": 0.72, "grad_norm": 1.1584542989730835, "learning_rate": 1.9479166096860586e-06, "loss": 0.4513, "step": 4778 }, { "epoch": 0.72, "grad_norm": 1.315085768699646, "learning_rate": 1.945990074461535e-06, "loss": 0.49, "step": 4779 }, { "epoch": 0.72, "grad_norm": 1.1355102062225342, "learning_rate": 1.9440642621875868e-06, "loss": 0.4685, "step": 4780 }, { "epoch": 0.72, "grad_norm": 1.0931622982025146, "learning_rate": 1.9421391733200973e-06, "loss": 0.6593, "step": 4781 }, { "epoch": 0.72, "grad_norm": 1.249399185180664, "learning_rate": 1.9402148083147744e-06, "loss": 0.4523, "step": 4782 }, { "epoch": 0.72, "grad_norm": 1.22849702835083, "learning_rate": 1.938291167627159e-06, "loss": 0.5234, "step": 4783 }, { "epoch": 0.72, "grad_norm": 1.2347755432128906, "learning_rate": 1.9363682517126198e-06, "loss": 0.4584, "step": 4784 }, { "epoch": 0.72, "grad_norm": 1.165579915046692, "learning_rate": 1.93444606102635e-06, "loss": 0.4656, "step": 4785 }, { "epoch": 0.72, "grad_norm": 1.0273722410202026, "learning_rate": 1.9325245960233773e-06, "loss": 0.4004, "step": 4786 }, { "epoch": 0.72, "grad_norm": 1.2598834037780762, "learning_rate": 1.9306038571585507e-06, "loss": 0.5276, "step": 4787 }, { "epoch": 0.72, "grad_norm": 1.0569345951080322, "learning_rate": 1.9286838448865543e-06, "loss": 0.3949, "step": 4788 }, { "epoch": 0.72, "grad_norm": 3.5575382709503174, "learning_rate": 1.9267645596618934e-06, "loss": 0.5695, "step": 4789 }, { "epoch": 0.72, "grad_norm": 1.1438852548599243, "learning_rate": 1.9248460019389077e-06, "loss": 0.4598, "step": 4790 }, { "epoch": 0.72, "grad_norm": 1.2272979021072388, "learning_rate": 1.92292817217176e-06, "loss": 0.5552, "step": 4791 }, { "epoch": 0.72, "grad_norm": 1.112324595451355, "learning_rate": 1.921011070814441e-06, "loss": 0.4708, "step": 4792 }, { "epoch": 0.72, "grad_norm": 1.3307558298110962, "learning_rate": 1.9190946983207714e-06, "loss": 0.5122, "step": 4793 }, { "epoch": 0.72, "grad_norm": 1.3866984844207764, "learning_rate": 1.9171790551443988e-06, "loss": 0.3951, "step": 4794 }, { "epoch": 0.72, "grad_norm": 1.2062265872955322, "learning_rate": 1.9152641417387986e-06, "loss": 0.7091, "step": 4795 }, { "epoch": 0.72, "grad_norm": 1.114076018333435, "learning_rate": 1.913349958557272e-06, "loss": 0.384, "step": 4796 }, { "epoch": 0.72, "grad_norm": 1.4052088260650635, "learning_rate": 1.9114365060529443e-06, "loss": 0.4888, "step": 4797 }, { "epoch": 0.72, "grad_norm": 1.3717442750930786, "learning_rate": 1.909523784678776e-06, "loss": 0.5798, "step": 4798 }, { "epoch": 0.72, "grad_norm": 1.2696058750152588, "learning_rate": 1.907611794887546e-06, "loss": 0.4921, "step": 4799 }, { "epoch": 0.72, "grad_norm": 2.2308740615844727, "learning_rate": 1.9057005371318677e-06, "loss": 0.513, "step": 4800 }, { "epoch": 0.72, "grad_norm": 1.0432411432266235, "learning_rate": 1.9037900118641739e-06, "loss": 0.6732, "step": 4801 }, { "epoch": 0.72, "grad_norm": 1.244374394416809, "learning_rate": 1.9018802195367303e-06, "loss": 0.5096, "step": 4802 }, { "epoch": 0.72, "grad_norm": 1.2164244651794434, "learning_rate": 1.8999711606016253e-06, "loss": 0.5618, "step": 4803 }, { "epoch": 0.72, "grad_norm": 1.0989468097686768, "learning_rate": 1.8980628355107705e-06, "loss": 0.4875, "step": 4804 }, { "epoch": 0.72, "grad_norm": 1.1956349611282349, "learning_rate": 1.8961552447159154e-06, "loss": 0.5021, "step": 4805 }, { "epoch": 0.72, "grad_norm": 1.719895601272583, "learning_rate": 1.894248388668623e-06, "loss": 0.4195, "step": 4806 }, { "epoch": 0.72, "grad_norm": 1.5598423480987549, "learning_rate": 1.8923422678202908e-06, "loss": 0.4525, "step": 4807 }, { "epoch": 0.72, "grad_norm": 1.307220458984375, "learning_rate": 1.8904368826221375e-06, "loss": 0.4666, "step": 4808 }, { "epoch": 0.72, "grad_norm": 1.2654500007629395, "learning_rate": 1.8885322335252076e-06, "loss": 0.4527, "step": 4809 }, { "epoch": 0.72, "grad_norm": 1.2438102960586548, "learning_rate": 1.8866283209803766e-06, "loss": 0.5013, "step": 4810 }, { "epoch": 0.72, "grad_norm": 1.0633751153945923, "learning_rate": 1.8847251454383374e-06, "loss": 0.549, "step": 4811 }, { "epoch": 0.72, "grad_norm": 1.5336066484451294, "learning_rate": 1.882822707349618e-06, "loss": 0.4778, "step": 4812 }, { "epoch": 0.72, "grad_norm": 1.3194743394851685, "learning_rate": 1.8809210071645628e-06, "loss": 0.5103, "step": 4813 }, { "epoch": 0.72, "grad_norm": 1.0962669849395752, "learning_rate": 1.879020045333349e-06, "loss": 0.509, "step": 4814 }, { "epoch": 0.72, "grad_norm": 1.2170311212539673, "learning_rate": 1.8771198223059721e-06, "loss": 0.4277, "step": 4815 }, { "epoch": 0.72, "grad_norm": 1.2159992456436157, "learning_rate": 1.875220338532258e-06, "loss": 0.4796, "step": 4816 }, { "epoch": 0.72, "grad_norm": 1.3194276094436646, "learning_rate": 1.8733215944618582e-06, "loss": 0.5293, "step": 4817 }, { "epoch": 0.72, "grad_norm": 1.2866120338439941, "learning_rate": 1.8714235905442418e-06, "loss": 0.397, "step": 4818 }, { "epoch": 0.72, "grad_norm": 1.3590037822723389, "learning_rate": 1.8695263272287124e-06, "loss": 0.5084, "step": 4819 }, { "epoch": 0.72, "grad_norm": 1.1931920051574707, "learning_rate": 1.8676298049643915e-06, "loss": 0.4607, "step": 4820 }, { "epoch": 0.72, "grad_norm": 1.9440938234329224, "learning_rate": 1.8657340242002248e-06, "loss": 0.5378, "step": 4821 }, { "epoch": 0.72, "grad_norm": 1.2042078971862793, "learning_rate": 1.8638389853849887e-06, "loss": 0.4476, "step": 4822 }, { "epoch": 0.72, "grad_norm": 1.454418420791626, "learning_rate": 1.8619446889672771e-06, "loss": 0.4731, "step": 4823 }, { "epoch": 0.72, "grad_norm": 1.166202187538147, "learning_rate": 1.8600511353955143e-06, "loss": 0.5043, "step": 4824 }, { "epoch": 0.72, "grad_norm": 0.990622878074646, "learning_rate": 1.8581583251179413e-06, "loss": 0.4484, "step": 4825 }, { "epoch": 0.72, "grad_norm": 0.9644473195075989, "learning_rate": 1.8562662585826303e-06, "loss": 0.4454, "step": 4826 }, { "epoch": 0.73, "grad_norm": 1.3327714204788208, "learning_rate": 1.8543749362374764e-06, "loss": 0.4731, "step": 4827 }, { "epoch": 0.73, "grad_norm": 1.0897088050842285, "learning_rate": 1.852484358530192e-06, "loss": 0.4177, "step": 4828 }, { "epoch": 0.73, "grad_norm": 1.722696304321289, "learning_rate": 1.850594525908323e-06, "loss": 0.474, "step": 4829 }, { "epoch": 0.73, "grad_norm": 1.286591649055481, "learning_rate": 1.84870543881923e-06, "loss": 0.4887, "step": 4830 }, { "epoch": 0.73, "grad_norm": 1.6081829071044922, "learning_rate": 1.8468170977101036e-06, "loss": 0.5355, "step": 4831 }, { "epoch": 0.73, "grad_norm": 0.9893311858177185, "learning_rate": 1.844929503027954e-06, "loss": 0.3724, "step": 4832 }, { "epoch": 0.73, "grad_norm": 1.1520490646362305, "learning_rate": 1.8430426552196145e-06, "loss": 0.7055, "step": 4833 }, { "epoch": 0.73, "grad_norm": 1.469104528427124, "learning_rate": 1.841156554731746e-06, "loss": 0.4917, "step": 4834 }, { "epoch": 0.73, "grad_norm": 1.0473686456680298, "learning_rate": 1.8392712020108266e-06, "loss": 0.4206, "step": 4835 }, { "epoch": 0.73, "grad_norm": 1.3277462720870972, "learning_rate": 1.8373865975031636e-06, "loss": 0.447, "step": 4836 }, { "epoch": 0.73, "grad_norm": 1.4408676624298096, "learning_rate": 1.8355027416548799e-06, "loss": 0.5206, "step": 4837 }, { "epoch": 0.73, "grad_norm": 1.484357237815857, "learning_rate": 1.8336196349119273e-06, "loss": 0.5272, "step": 4838 }, { "epoch": 0.73, "grad_norm": 1.1274592876434326, "learning_rate": 1.8317372777200798e-06, "loss": 0.4973, "step": 4839 }, { "epoch": 0.73, "grad_norm": 1.4325188398361206, "learning_rate": 1.8298556705249287e-06, "loss": 0.495, "step": 4840 }, { "epoch": 0.73, "grad_norm": 1.196458101272583, "learning_rate": 1.8279748137718951e-06, "loss": 0.4225, "step": 4841 }, { "epoch": 0.73, "grad_norm": 1.3037630319595337, "learning_rate": 1.8260947079062153e-06, "loss": 0.5376, "step": 4842 }, { "epoch": 0.73, "grad_norm": 1.2956267595291138, "learning_rate": 1.824215353372954e-06, "loss": 0.4984, "step": 4843 }, { "epoch": 0.73, "grad_norm": 1.0868397951126099, "learning_rate": 1.8223367506169943e-06, "loss": 0.5011, "step": 4844 }, { "epoch": 0.73, "grad_norm": 1.2210391759872437, "learning_rate": 1.8204589000830403e-06, "loss": 0.4954, "step": 4845 }, { "epoch": 0.73, "grad_norm": 1.6790274381637573, "learning_rate": 1.8185818022156238e-06, "loss": 0.5206, "step": 4846 }, { "epoch": 0.73, "grad_norm": 1.270649790763855, "learning_rate": 1.816705457459091e-06, "loss": 0.4658, "step": 4847 }, { "epoch": 0.73, "grad_norm": 1.4677929878234863, "learning_rate": 1.8148298662576148e-06, "loss": 0.4498, "step": 4848 }, { "epoch": 0.73, "grad_norm": 1.407099723815918, "learning_rate": 1.8129550290551913e-06, "loss": 0.5147, "step": 4849 }, { "epoch": 0.73, "grad_norm": 1.0778216123580933, "learning_rate": 1.8110809462956302e-06, "loss": 0.5081, "step": 4850 }, { "epoch": 0.73, "grad_norm": 1.0993876457214355, "learning_rate": 1.8092076184225727e-06, "loss": 0.4927, "step": 4851 }, { "epoch": 0.73, "grad_norm": 1.0305124521255493, "learning_rate": 1.807335045879472e-06, "loss": 0.4636, "step": 4852 }, { "epoch": 0.73, "grad_norm": 1.2898223400115967, "learning_rate": 1.8054632291096103e-06, "loss": 0.4677, "step": 4853 }, { "epoch": 0.73, "grad_norm": 1.0848816633224487, "learning_rate": 1.8035921685560836e-06, "loss": 0.536, "step": 4854 }, { "epoch": 0.73, "grad_norm": 1.0772778987884521, "learning_rate": 1.8017218646618169e-06, "loss": 0.4899, "step": 4855 }, { "epoch": 0.73, "grad_norm": 3.7446792125701904, "learning_rate": 1.7998523178695493e-06, "loss": 0.4807, "step": 4856 }, { "epoch": 0.73, "grad_norm": 1.0647706985473633, "learning_rate": 1.7979835286218418e-06, "loss": 0.7398, "step": 4857 }, { "epoch": 0.73, "grad_norm": 1.0804579257965088, "learning_rate": 1.796115497361081e-06, "loss": 0.4459, "step": 4858 }, { "epoch": 0.73, "grad_norm": 1.2070261240005493, "learning_rate": 1.7942482245294669e-06, "loss": 0.4838, "step": 4859 }, { "epoch": 0.73, "grad_norm": 1.2448567152023315, "learning_rate": 1.792381710569025e-06, "loss": 0.4547, "step": 4860 }, { "epoch": 0.73, "grad_norm": 1.070936679840088, "learning_rate": 1.7905159559216024e-06, "loss": 0.4998, "step": 4861 }, { "epoch": 0.73, "grad_norm": 1.2654318809509277, "learning_rate": 1.7886509610288595e-06, "loss": 0.4818, "step": 4862 }, { "epoch": 0.73, "grad_norm": 1.2087030410766602, "learning_rate": 1.7867867263322846e-06, "loss": 0.4965, "step": 4863 }, { "epoch": 0.73, "grad_norm": 1.3970595598220825, "learning_rate": 1.7849232522731797e-06, "loss": 0.4525, "step": 4864 }, { "epoch": 0.73, "grad_norm": 1.4244470596313477, "learning_rate": 1.783060539292673e-06, "loss": 0.5025, "step": 4865 }, { "epoch": 0.73, "grad_norm": 1.4043614864349365, "learning_rate": 1.7811985878317057e-06, "loss": 0.512, "step": 4866 }, { "epoch": 0.73, "grad_norm": 1.4233318567276, "learning_rate": 1.7793373983310452e-06, "loss": 0.4745, "step": 4867 }, { "epoch": 0.73, "grad_norm": 1.3138879537582397, "learning_rate": 1.777476971231274e-06, "loss": 0.5114, "step": 4868 }, { "epoch": 0.73, "grad_norm": 1.2186611890792847, "learning_rate": 1.7756173069727943e-06, "loss": 0.5045, "step": 4869 }, { "epoch": 0.73, "grad_norm": 1.5815666913986206, "learning_rate": 1.7737584059958323e-06, "loss": 0.4966, "step": 4870 }, { "epoch": 0.73, "grad_norm": 1.9521244764328003, "learning_rate": 1.7719002687404252e-06, "loss": 0.5144, "step": 4871 }, { "epoch": 0.73, "grad_norm": 1.3908638954162598, "learning_rate": 1.770042895646441e-06, "loss": 0.4159, "step": 4872 }, { "epoch": 0.73, "grad_norm": 1.2474966049194336, "learning_rate": 1.7681862871535577e-06, "loss": 0.4671, "step": 4873 }, { "epoch": 0.73, "grad_norm": 1.593888759613037, "learning_rate": 1.7663304437012725e-06, "loss": 0.474, "step": 4874 }, { "epoch": 0.73, "grad_norm": 1.5083874464035034, "learning_rate": 1.7644753657289071e-06, "loss": 0.4955, "step": 4875 }, { "epoch": 0.73, "grad_norm": 1.3731902837753296, "learning_rate": 1.7626210536755961e-06, "loss": 0.4789, "step": 4876 }, { "epoch": 0.73, "grad_norm": 1.3293275833129883, "learning_rate": 1.7607675079802989e-06, "loss": 0.5386, "step": 4877 }, { "epoch": 0.73, "grad_norm": 1.4650728702545166, "learning_rate": 1.7589147290817859e-06, "loss": 0.4387, "step": 4878 }, { "epoch": 0.73, "grad_norm": 1.3583118915557861, "learning_rate": 1.7570627174186543e-06, "loss": 0.5556, "step": 4879 }, { "epoch": 0.73, "grad_norm": 1.2100166082382202, "learning_rate": 1.7552114734293118e-06, "loss": 0.4673, "step": 4880 }, { "epoch": 0.73, "grad_norm": 1.5093921422958374, "learning_rate": 1.7533609975519916e-06, "loss": 0.487, "step": 4881 }, { "epoch": 0.73, "grad_norm": 1.2962480783462524, "learning_rate": 1.7515112902247383e-06, "loss": 0.5396, "step": 4882 }, { "epoch": 0.73, "grad_norm": 1.4044826030731201, "learning_rate": 1.7496623518854195e-06, "loss": 0.4882, "step": 4883 }, { "epoch": 0.73, "grad_norm": 1.2045128345489502, "learning_rate": 1.747814182971721e-06, "loss": 0.5126, "step": 4884 }, { "epoch": 0.73, "grad_norm": 1.4073457717895508, "learning_rate": 1.745966783921142e-06, "loss": 0.3639, "step": 4885 }, { "epoch": 0.73, "grad_norm": 1.2129772901535034, "learning_rate": 1.7441201551710019e-06, "loss": 0.5356, "step": 4886 }, { "epoch": 0.73, "grad_norm": 1.4491692781448364, "learning_rate": 1.7422742971584395e-06, "loss": 0.4856, "step": 4887 }, { "epoch": 0.73, "grad_norm": 1.2962559461593628, "learning_rate": 1.7404292103204073e-06, "loss": 0.5103, "step": 4888 }, { "epoch": 0.73, "grad_norm": 1.2910059690475464, "learning_rate": 1.7385848950936811e-06, "loss": 0.4717, "step": 4889 }, { "epoch": 0.73, "grad_norm": 1.2693413496017456, "learning_rate": 1.7367413519148462e-06, "loss": 0.4173, "step": 4890 }, { "epoch": 0.73, "grad_norm": 1.1682024002075195, "learning_rate": 1.7348985812203128e-06, "loss": 0.4949, "step": 4891 }, { "epoch": 0.73, "grad_norm": 1.3131310939788818, "learning_rate": 1.7330565834463026e-06, "loss": 0.5102, "step": 4892 }, { "epoch": 0.74, "grad_norm": 1.7007486820220947, "learning_rate": 1.7312153590288567e-06, "loss": 0.4142, "step": 4893 }, { "epoch": 0.74, "grad_norm": 1.0936098098754883, "learning_rate": 1.7293749084038352e-06, "loss": 0.539, "step": 4894 }, { "epoch": 0.74, "grad_norm": 1.5036734342575073, "learning_rate": 1.72753523200691e-06, "loss": 0.5266, "step": 4895 }, { "epoch": 0.74, "grad_norm": 1.4185842275619507, "learning_rate": 1.7256963302735752e-06, "loss": 0.4714, "step": 4896 }, { "epoch": 0.74, "grad_norm": 1.3362966775894165, "learning_rate": 1.7238582036391372e-06, "loss": 0.5234, "step": 4897 }, { "epoch": 0.74, "grad_norm": 1.2962000370025635, "learning_rate": 1.7220208525387193e-06, "loss": 0.4808, "step": 4898 }, { "epoch": 0.74, "grad_norm": 1.7980648279190063, "learning_rate": 1.7201842774072652e-06, "loss": 0.4745, "step": 4899 }, { "epoch": 0.74, "grad_norm": 1.2887043952941895, "learning_rate": 1.7183484786795297e-06, "loss": 0.4162, "step": 4900 }, { "epoch": 0.74, "grad_norm": 2.1728882789611816, "learning_rate": 1.7165134567900887e-06, "loss": 0.5632, "step": 4901 }, { "epoch": 0.74, "grad_norm": 1.0961376428604126, "learning_rate": 1.7146792121733297e-06, "loss": 0.4053, "step": 4902 }, { "epoch": 0.74, "grad_norm": 1.1886131763458252, "learning_rate": 1.71284574526346e-06, "loss": 0.4299, "step": 4903 }, { "epoch": 0.74, "grad_norm": 1.2058991193771362, "learning_rate": 1.7110130564944988e-06, "loss": 0.5317, "step": 4904 }, { "epoch": 0.74, "grad_norm": 1.43122398853302, "learning_rate": 1.709181146300285e-06, "loss": 0.5149, "step": 4905 }, { "epoch": 0.74, "grad_norm": 1.758541464805603, "learning_rate": 1.707350015114473e-06, "loss": 0.4634, "step": 4906 }, { "epoch": 0.74, "grad_norm": 1.1396510601043701, "learning_rate": 1.7055196633705279e-06, "loss": 0.495, "step": 4907 }, { "epoch": 0.74, "grad_norm": 1.664313554763794, "learning_rate": 1.7036900915017379e-06, "loss": 0.4947, "step": 4908 }, { "epoch": 0.74, "grad_norm": 1.2102530002593994, "learning_rate": 1.701861299941198e-06, "loss": 0.4351, "step": 4909 }, { "epoch": 0.74, "grad_norm": 1.0717499256134033, "learning_rate": 1.7000332891218275e-06, "loss": 0.4104, "step": 4910 }, { "epoch": 0.74, "grad_norm": 1.204673409461975, "learning_rate": 1.698206059476354e-06, "loss": 0.4538, "step": 4911 }, { "epoch": 0.74, "grad_norm": 0.9883139133453369, "learning_rate": 1.69637961143732e-06, "loss": 0.4547, "step": 4912 }, { "epoch": 0.74, "grad_norm": 1.2490075826644897, "learning_rate": 1.6945539454370901e-06, "loss": 0.4977, "step": 4913 }, { "epoch": 0.74, "grad_norm": 1.2534494400024414, "learning_rate": 1.6927290619078357e-06, "loss": 0.4011, "step": 4914 }, { "epoch": 0.74, "grad_norm": 1.4756669998168945, "learning_rate": 1.6909049612815475e-06, "loss": 0.4933, "step": 4915 }, { "epoch": 0.74, "grad_norm": 1.116776466369629, "learning_rate": 1.6890816439900314e-06, "loss": 0.4793, "step": 4916 }, { "epoch": 0.74, "grad_norm": 1.6631370782852173, "learning_rate": 1.6872591104649038e-06, "loss": 0.4265, "step": 4917 }, { "epoch": 0.74, "grad_norm": 0.9793209433555603, "learning_rate": 1.6854373611376006e-06, "loss": 0.4387, "step": 4918 }, { "epoch": 0.74, "grad_norm": 1.6379830837249756, "learning_rate": 1.6836163964393664e-06, "loss": 0.4699, "step": 4919 }, { "epoch": 0.74, "grad_norm": 1.3030709028244019, "learning_rate": 1.6817962168012665e-06, "loss": 0.5042, "step": 4920 }, { "epoch": 0.74, "grad_norm": 1.1374515295028687, "learning_rate": 1.6799768226541735e-06, "loss": 0.4054, "step": 4921 }, { "epoch": 0.74, "grad_norm": 1.2225286960601807, "learning_rate": 1.6781582144287811e-06, "loss": 0.7092, "step": 4922 }, { "epoch": 0.74, "grad_norm": 1.1655610799789429, "learning_rate": 1.6763403925555915e-06, "loss": 0.5364, "step": 4923 }, { "epoch": 0.74, "grad_norm": 1.7319560050964355, "learning_rate": 1.674523357464921e-06, "loss": 0.5527, "step": 4924 }, { "epoch": 0.74, "grad_norm": 1.198408603668213, "learning_rate": 1.6727071095869053e-06, "loss": 0.5034, "step": 4925 }, { "epoch": 0.74, "grad_norm": 1.3215759992599487, "learning_rate": 1.670891649351486e-06, "loss": 0.4758, "step": 4926 }, { "epoch": 0.74, "grad_norm": 2.023226737976074, "learning_rate": 1.6690769771884236e-06, "loss": 0.4565, "step": 4927 }, { "epoch": 0.74, "grad_norm": 1.3069088459014893, "learning_rate": 1.6672630935272922e-06, "loss": 0.5582, "step": 4928 }, { "epoch": 0.74, "grad_norm": 1.1080260276794434, "learning_rate": 1.6654499987974742e-06, "loss": 0.4412, "step": 4929 }, { "epoch": 0.74, "grad_norm": 1.3772518634796143, "learning_rate": 1.6636376934281716e-06, "loss": 0.4861, "step": 4930 }, { "epoch": 0.74, "grad_norm": 3.881237506866455, "learning_rate": 1.661826177848394e-06, "loss": 0.4046, "step": 4931 }, { "epoch": 0.74, "grad_norm": 1.4010015726089478, "learning_rate": 1.6600154524869693e-06, "loss": 0.5476, "step": 4932 }, { "epoch": 0.74, "grad_norm": 1.4925771951675415, "learning_rate": 1.658205517772532e-06, "loss": 0.5543, "step": 4933 }, { "epoch": 0.74, "grad_norm": 1.340942144393921, "learning_rate": 1.6563963741335366e-06, "loss": 0.4874, "step": 4934 }, { "epoch": 0.74, "grad_norm": 1.2364331483840942, "learning_rate": 1.6545880219982451e-06, "loss": 0.4777, "step": 4935 }, { "epoch": 0.74, "grad_norm": 1.2626478672027588, "learning_rate": 1.6527804617947323e-06, "loss": 0.4545, "step": 4936 }, { "epoch": 0.74, "grad_norm": 1.3069003820419312, "learning_rate": 1.6509736939508886e-06, "loss": 0.463, "step": 4937 }, { "epoch": 0.74, "grad_norm": 1.0287466049194336, "learning_rate": 1.6491677188944156e-06, "loss": 0.4633, "step": 4938 }, { "epoch": 0.74, "grad_norm": 2.004826545715332, "learning_rate": 1.6473625370528274e-06, "loss": 0.5275, "step": 4939 }, { "epoch": 0.74, "grad_norm": 1.1256396770477295, "learning_rate": 1.645558148853449e-06, "loss": 0.4072, "step": 4940 }, { "epoch": 0.74, "grad_norm": 0.944260835647583, "learning_rate": 1.6437545547234163e-06, "loss": 0.6912, "step": 4941 }, { "epoch": 0.74, "grad_norm": 1.3731597661972046, "learning_rate": 1.641951755089683e-06, "loss": 0.4451, "step": 4942 }, { "epoch": 0.74, "grad_norm": 1.3217201232910156, "learning_rate": 1.6401497503790077e-06, "loss": 0.5152, "step": 4943 }, { "epoch": 0.74, "grad_norm": 1.2205530405044556, "learning_rate": 1.6383485410179667e-06, "loss": 0.4761, "step": 4944 }, { "epoch": 0.74, "grad_norm": 1.2215852737426758, "learning_rate": 1.6365481274329432e-06, "loss": 0.4845, "step": 4945 }, { "epoch": 0.74, "grad_norm": 1.1359233856201172, "learning_rate": 1.6347485100501365e-06, "loss": 0.4812, "step": 4946 }, { "epoch": 0.74, "grad_norm": 1.2966076135635376, "learning_rate": 1.6329496892955544e-06, "loss": 0.4516, "step": 4947 }, { "epoch": 0.74, "grad_norm": 1.0488516092300415, "learning_rate": 1.6311516655950144e-06, "loss": 0.4499, "step": 4948 }, { "epoch": 0.74, "grad_norm": 1.215573787689209, "learning_rate": 1.6293544393741506e-06, "loss": 0.5039, "step": 4949 }, { "epoch": 0.74, "grad_norm": 4.055634498596191, "learning_rate": 1.6275580110584043e-06, "loss": 0.463, "step": 4950 }, { "epoch": 0.74, "grad_norm": 1.4587305784225464, "learning_rate": 1.6257623810730316e-06, "loss": 0.4516, "step": 4951 }, { "epoch": 0.74, "grad_norm": 1.0295759439468384, "learning_rate": 1.623967549843095e-06, "loss": 0.4842, "step": 4952 }, { "epoch": 0.74, "grad_norm": 1.176425814628601, "learning_rate": 1.6221735177934688e-06, "loss": 0.459, "step": 4953 }, { "epoch": 0.74, "grad_norm": 1.2829036712646484, "learning_rate": 1.6203802853488426e-06, "loss": 0.5327, "step": 4954 }, { "epoch": 0.74, "grad_norm": 1.229225516319275, "learning_rate": 1.6185878529337107e-06, "loss": 0.4681, "step": 4955 }, { "epoch": 0.74, "grad_norm": 1.3568006753921509, "learning_rate": 1.616796220972383e-06, "loss": 0.5754, "step": 4956 }, { "epoch": 0.74, "grad_norm": 1.2300307750701904, "learning_rate": 1.6150053898889756e-06, "loss": 0.4385, "step": 4957 }, { "epoch": 0.74, "grad_norm": 1.2373507022857666, "learning_rate": 1.6132153601074202e-06, "loss": 0.4412, "step": 4958 }, { "epoch": 0.74, "grad_norm": 1.3955541849136353, "learning_rate": 1.6114261320514535e-06, "loss": 0.4909, "step": 4959 }, { "epoch": 0.75, "grad_norm": 1.2916877269744873, "learning_rate": 1.6096377061446222e-06, "loss": 0.4682, "step": 4960 }, { "epoch": 0.75, "grad_norm": 1.1291093826293945, "learning_rate": 1.6078500828102917e-06, "loss": 0.5057, "step": 4961 }, { "epoch": 0.75, "grad_norm": 1.1351968050003052, "learning_rate": 1.6060632624716266e-06, "loss": 0.4493, "step": 4962 }, { "epoch": 0.75, "grad_norm": 1.2742259502410889, "learning_rate": 1.6042772455516092e-06, "loss": 0.4801, "step": 4963 }, { "epoch": 0.75, "grad_norm": 1.153572916984558, "learning_rate": 1.6024920324730275e-06, "loss": 0.4367, "step": 4964 }, { "epoch": 0.75, "grad_norm": 1.3633357286453247, "learning_rate": 1.600707623658478e-06, "loss": 0.4696, "step": 4965 }, { "epoch": 0.75, "grad_norm": 1.2841967344284058, "learning_rate": 1.5989240195303723e-06, "loss": 0.4185, "step": 4966 }, { "epoch": 0.75, "grad_norm": 1.2443900108337402, "learning_rate": 1.5971412205109256e-06, "loss": 0.4665, "step": 4967 }, { "epoch": 0.75, "grad_norm": 1.7848454713821411, "learning_rate": 1.595359227022168e-06, "loss": 0.4969, "step": 4968 }, { "epoch": 0.75, "grad_norm": 1.287833571434021, "learning_rate": 1.593578039485933e-06, "loss": 0.4696, "step": 4969 }, { "epoch": 0.75, "grad_norm": 1.1073180437088013, "learning_rate": 1.5917976583238693e-06, "loss": 0.4564, "step": 4970 }, { "epoch": 0.75, "grad_norm": 0.9340315461158752, "learning_rate": 1.5900180839574298e-06, "loss": 0.4573, "step": 4971 }, { "epoch": 0.75, "grad_norm": 1.183265209197998, "learning_rate": 1.5882393168078786e-06, "loss": 0.5445, "step": 4972 }, { "epoch": 0.75, "grad_norm": 1.8171789646148682, "learning_rate": 1.5864613572962912e-06, "loss": 0.5052, "step": 4973 }, { "epoch": 0.75, "grad_norm": 1.1715142726898193, "learning_rate": 1.5846842058435457e-06, "loss": 0.5078, "step": 4974 }, { "epoch": 0.75, "grad_norm": 1.0127100944519043, "learning_rate": 1.582907862870336e-06, "loss": 0.484, "step": 4975 }, { "epoch": 0.75, "grad_norm": 1.0276811122894287, "learning_rate": 1.5811323287971585e-06, "loss": 0.4075, "step": 4976 }, { "epoch": 0.75, "grad_norm": 1.1236668825149536, "learning_rate": 1.5793576040443204e-06, "loss": 0.4846, "step": 4977 }, { "epoch": 0.75, "grad_norm": 1.3794586658477783, "learning_rate": 1.5775836890319407e-06, "loss": 0.4571, "step": 4978 }, { "epoch": 0.75, "grad_norm": 1.2102934122085571, "learning_rate": 1.5758105841799393e-06, "loss": 0.5146, "step": 4979 }, { "epoch": 0.75, "grad_norm": 0.939616858959198, "learning_rate": 1.5740382899080532e-06, "loss": 0.3926, "step": 4980 }, { "epoch": 0.75, "grad_norm": 1.4848772287368774, "learning_rate": 1.5722668066358194e-06, "loss": 0.4556, "step": 4981 }, { "epoch": 0.75, "grad_norm": 1.332842230796814, "learning_rate": 1.5704961347825875e-06, "loss": 0.5388, "step": 4982 }, { "epoch": 0.75, "grad_norm": 0.9185745716094971, "learning_rate": 1.568726274767517e-06, "loss": 0.4337, "step": 4983 }, { "epoch": 0.75, "grad_norm": 1.2037221193313599, "learning_rate": 1.5669572270095674e-06, "loss": 0.7027, "step": 4984 }, { "epoch": 0.75, "grad_norm": 1.0707296133041382, "learning_rate": 1.5651889919275153e-06, "loss": 0.7217, "step": 4985 }, { "epoch": 0.75, "grad_norm": 1.4542914628982544, "learning_rate": 1.5634215699399363e-06, "loss": 0.4379, "step": 4986 }, { "epoch": 0.75, "grad_norm": 1.398055076599121, "learning_rate": 1.5616549614652216e-06, "loss": 0.4584, "step": 4987 }, { "epoch": 0.75, "grad_norm": 1.2065765857696533, "learning_rate": 1.5598891669215638e-06, "loss": 0.4884, "step": 4988 }, { "epoch": 0.75, "grad_norm": 1.1457759141921997, "learning_rate": 1.558124186726963e-06, "loss": 0.4302, "step": 4989 }, { "epoch": 0.75, "grad_norm": 1.4905292987823486, "learning_rate": 1.5563600212992326e-06, "loss": 0.4431, "step": 4990 }, { "epoch": 0.75, "grad_norm": 1.4302992820739746, "learning_rate": 1.5545966710559846e-06, "loss": 0.3836, "step": 4991 }, { "epoch": 0.75, "grad_norm": 1.0632346868515015, "learning_rate": 1.5528341364146465e-06, "loss": 0.4595, "step": 4992 }, { "epoch": 0.75, "grad_norm": 1.387381911277771, "learning_rate": 1.5510724177924447e-06, "loss": 0.5027, "step": 4993 }, { "epoch": 0.75, "grad_norm": 1.3747549057006836, "learning_rate": 1.549311515606418e-06, "loss": 0.4329, "step": 4994 }, { "epoch": 0.75, "grad_norm": 1.1772490739822388, "learning_rate": 1.5475514302734119e-06, "loss": 0.4939, "step": 4995 }, { "epoch": 0.75, "grad_norm": 2.040417194366455, "learning_rate": 1.5457921622100742e-06, "loss": 0.525, "step": 4996 }, { "epoch": 0.75, "grad_norm": 1.062502384185791, "learning_rate": 1.5440337118328646e-06, "loss": 0.4401, "step": 4997 }, { "epoch": 0.75, "grad_norm": 1.3651766777038574, "learning_rate": 1.5422760795580433e-06, "loss": 0.4693, "step": 4998 }, { "epoch": 0.75, "grad_norm": 1.1817954778671265, "learning_rate": 1.5405192658016832e-06, "loss": 0.4407, "step": 4999 }, { "epoch": 0.75, "grad_norm": 1.4865210056304932, "learning_rate": 1.5387632709796596e-06, "loss": 0.406, "step": 5000 }, { "epoch": 0.75, "grad_norm": 1.5371102094650269, "learning_rate": 1.5370080955076521e-06, "loss": 0.5021, "step": 5001 }, { "epoch": 0.75, "grad_norm": 1.490299940109253, "learning_rate": 1.5352537398011525e-06, "loss": 0.5305, "step": 5002 }, { "epoch": 0.75, "grad_norm": 1.1081291437149048, "learning_rate": 1.5335002042754516e-06, "loss": 0.4858, "step": 5003 }, { "epoch": 0.75, "grad_norm": 2.5592548847198486, "learning_rate": 1.5317474893456507e-06, "loss": 0.4757, "step": 5004 }, { "epoch": 0.75, "grad_norm": 1.3614758253097534, "learning_rate": 1.5299955954266583e-06, "loss": 0.4709, "step": 5005 }, { "epoch": 0.75, "grad_norm": 1.3316657543182373, "learning_rate": 1.5282445229331817e-06, "loss": 0.5002, "step": 5006 }, { "epoch": 0.75, "grad_norm": 1.532286524772644, "learning_rate": 1.5264942722797404e-06, "loss": 0.4791, "step": 5007 }, { "epoch": 0.75, "grad_norm": 1.0799715518951416, "learning_rate": 1.524744843880655e-06, "loss": 0.4495, "step": 5008 }, { "epoch": 0.75, "grad_norm": 1.3556913137435913, "learning_rate": 1.5229962381500563e-06, "loss": 0.4645, "step": 5009 }, { "epoch": 0.75, "grad_norm": 1.2008870840072632, "learning_rate": 1.521248455501873e-06, "loss": 0.3911, "step": 5010 }, { "epoch": 0.75, "grad_norm": 1.166757345199585, "learning_rate": 1.5195014963498472e-06, "loss": 0.6598, "step": 5011 }, { "epoch": 0.75, "grad_norm": 1.1075860261917114, "learning_rate": 1.5177553611075207e-06, "loss": 0.4897, "step": 5012 }, { "epoch": 0.75, "grad_norm": 1.5516341924667358, "learning_rate": 1.5160100501882397e-06, "loss": 0.4548, "step": 5013 }, { "epoch": 0.75, "grad_norm": 1.1260886192321777, "learning_rate": 1.5142655640051602e-06, "loss": 0.4929, "step": 5014 }, { "epoch": 0.75, "grad_norm": 1.0762088298797607, "learning_rate": 1.5125219029712363e-06, "loss": 0.4558, "step": 5015 }, { "epoch": 0.75, "grad_norm": 1.5203322172164917, "learning_rate": 1.5107790674992357e-06, "loss": 0.5035, "step": 5016 }, { "epoch": 0.75, "grad_norm": 1.3736004829406738, "learning_rate": 1.5090370580017233e-06, "loss": 0.4695, "step": 5017 }, { "epoch": 0.75, "grad_norm": 1.104666829109192, "learning_rate": 1.5072958748910677e-06, "loss": 0.4128, "step": 5018 }, { "epoch": 0.75, "grad_norm": 1.3316905498504639, "learning_rate": 1.5055555185794496e-06, "loss": 0.4512, "step": 5019 }, { "epoch": 0.75, "grad_norm": 1.1768772602081299, "learning_rate": 1.5038159894788446e-06, "loss": 0.5031, "step": 5020 }, { "epoch": 0.75, "grad_norm": 1.0955071449279785, "learning_rate": 1.5020772880010414e-06, "loss": 0.4211, "step": 5021 }, { "epoch": 0.75, "grad_norm": 1.27809739112854, "learning_rate": 1.5003394145576245e-06, "loss": 0.4914, "step": 5022 }, { "epoch": 0.75, "grad_norm": 1.3918647766113281, "learning_rate": 1.49860236955999e-06, "loss": 0.4435, "step": 5023 }, { "epoch": 0.75, "grad_norm": 1.2173380851745605, "learning_rate": 1.4968661534193308e-06, "loss": 0.5217, "step": 5024 }, { "epoch": 0.75, "grad_norm": 10.548365592956543, "learning_rate": 1.4951307665466502e-06, "loss": 0.5486, "step": 5025 }, { "epoch": 0.75, "grad_norm": 1.3081036806106567, "learning_rate": 1.4933962093527503e-06, "loss": 0.4275, "step": 5026 }, { "epoch": 0.76, "grad_norm": 1.2957119941711426, "learning_rate": 1.4916624822482356e-06, "loss": 0.4506, "step": 5027 }, { "epoch": 0.76, "grad_norm": 1.325648307800293, "learning_rate": 1.489929585643523e-06, "loss": 0.4905, "step": 5028 }, { "epoch": 0.76, "grad_norm": 1.7107242345809937, "learning_rate": 1.4881975199488247e-06, "loss": 0.5315, "step": 5029 }, { "epoch": 0.76, "grad_norm": 1.0236284732818604, "learning_rate": 1.4864662855741552e-06, "loss": 0.4776, "step": 5030 }, { "epoch": 0.76, "grad_norm": 1.0513657331466675, "learning_rate": 1.4847358829293397e-06, "loss": 0.469, "step": 5031 }, { "epoch": 0.76, "grad_norm": 1.1934282779693604, "learning_rate": 1.4830063124239979e-06, "loss": 0.383, "step": 5032 }, { "epoch": 0.76, "grad_norm": 1.1855942010879517, "learning_rate": 1.4812775744675612e-06, "loss": 0.4931, "step": 5033 }, { "epoch": 0.76, "grad_norm": 1.2496418952941895, "learning_rate": 1.4795496694692557e-06, "loss": 0.4956, "step": 5034 }, { "epoch": 0.76, "grad_norm": 1.1431697607040405, "learning_rate": 1.4778225978381173e-06, "loss": 0.4796, "step": 5035 }, { "epoch": 0.76, "grad_norm": 1.7560118436813354, "learning_rate": 1.4760963599829775e-06, "loss": 0.5743, "step": 5036 }, { "epoch": 0.76, "grad_norm": 1.2661824226379395, "learning_rate": 1.4743709563124786e-06, "loss": 0.4191, "step": 5037 }, { "epoch": 0.76, "grad_norm": 0.9728396534919739, "learning_rate": 1.4726463872350577e-06, "loss": 0.5155, "step": 5038 }, { "epoch": 0.76, "grad_norm": 1.4640291929244995, "learning_rate": 1.4709226531589594e-06, "loss": 0.5201, "step": 5039 }, { "epoch": 0.76, "grad_norm": 1.2306228876113892, "learning_rate": 1.4691997544922303e-06, "loss": 0.4182, "step": 5040 }, { "epoch": 0.76, "grad_norm": 1.5740476846694946, "learning_rate": 1.4674776916427163e-06, "loss": 0.532, "step": 5041 }, { "epoch": 0.76, "grad_norm": 1.2833200693130493, "learning_rate": 1.4657564650180667e-06, "loss": 0.4934, "step": 5042 }, { "epoch": 0.76, "grad_norm": 1.4474372863769531, "learning_rate": 1.464036075025735e-06, "loss": 0.5271, "step": 5043 }, { "epoch": 0.76, "grad_norm": 1.3811957836151123, "learning_rate": 1.462316522072973e-06, "loss": 0.4805, "step": 5044 }, { "epoch": 0.76, "grad_norm": 1.17985200881958, "learning_rate": 1.460597806566838e-06, "loss": 0.3594, "step": 5045 }, { "epoch": 0.76, "grad_norm": 1.4918595552444458, "learning_rate": 1.4588799289141859e-06, "loss": 0.4707, "step": 5046 }, { "epoch": 0.76, "grad_norm": 1.3777337074279785, "learning_rate": 1.4571628895216778e-06, "loss": 0.4652, "step": 5047 }, { "epoch": 0.76, "grad_norm": 1.3272305727005005, "learning_rate": 1.4554466887957708e-06, "loss": 0.4344, "step": 5048 }, { "epoch": 0.76, "grad_norm": 1.0759782791137695, "learning_rate": 1.4537313271427295e-06, "loss": 0.4532, "step": 5049 }, { "epoch": 0.76, "grad_norm": 1.1511003971099854, "learning_rate": 1.4520168049686184e-06, "loss": 0.4667, "step": 5050 }, { "epoch": 0.76, "grad_norm": 1.3975605964660645, "learning_rate": 1.4503031226792986e-06, "loss": 0.474, "step": 5051 }, { "epoch": 0.76, "grad_norm": 1.260606288909912, "learning_rate": 1.4485902806804403e-06, "loss": 0.6836, "step": 5052 }, { "epoch": 0.76, "grad_norm": 1.3176772594451904, "learning_rate": 1.446878279377506e-06, "loss": 0.5135, "step": 5053 }, { "epoch": 0.76, "grad_norm": 1.576862096786499, "learning_rate": 1.4451671191757676e-06, "loss": 0.5606, "step": 5054 }, { "epoch": 0.76, "grad_norm": 1.1972805261611938, "learning_rate": 1.4434568004802918e-06, "loss": 0.5084, "step": 5055 }, { "epoch": 0.76, "grad_norm": 1.2950843572616577, "learning_rate": 1.4417473236959478e-06, "loss": 0.4393, "step": 5056 }, { "epoch": 0.76, "grad_norm": 1.2885648012161255, "learning_rate": 1.4400386892274076e-06, "loss": 0.4323, "step": 5057 }, { "epoch": 0.76, "grad_norm": 1.2269790172576904, "learning_rate": 1.4383308974791399e-06, "loss": 0.5029, "step": 5058 }, { "epoch": 0.76, "grad_norm": 1.2813271284103394, "learning_rate": 1.436623948855419e-06, "loss": 0.6993, "step": 5059 }, { "epoch": 0.76, "grad_norm": 1.1397422552108765, "learning_rate": 1.434917843760314e-06, "loss": 0.4637, "step": 5060 }, { "epoch": 0.76, "grad_norm": 1.3925926685333252, "learning_rate": 1.4332125825976984e-06, "loss": 0.4546, "step": 5061 }, { "epoch": 0.76, "grad_norm": 1.36578369140625, "learning_rate": 1.431508165771246e-06, "loss": 0.4793, "step": 5062 }, { "epoch": 0.76, "grad_norm": 2.0728559494018555, "learning_rate": 1.4298045936844262e-06, "loss": 0.489, "step": 5063 }, { "epoch": 0.76, "grad_norm": 1.5578423738479614, "learning_rate": 1.428101866740515e-06, "loss": 0.4808, "step": 5064 }, { "epoch": 0.76, "grad_norm": 3.148526191711426, "learning_rate": 1.4263999853425815e-06, "loss": 0.4565, "step": 5065 }, { "epoch": 0.76, "grad_norm": 1.288733720779419, "learning_rate": 1.424698949893501e-06, "loss": 0.5141, "step": 5066 }, { "epoch": 0.76, "grad_norm": 1.1132296323776245, "learning_rate": 1.422998760795945e-06, "loss": 0.4293, "step": 5067 }, { "epoch": 0.76, "grad_norm": 1.4272493124008179, "learning_rate": 1.4212994184523826e-06, "loss": 0.4937, "step": 5068 }, { "epoch": 0.76, "grad_norm": 2.754197835922241, "learning_rate": 1.4196009232650887e-06, "loss": 0.4566, "step": 5069 }, { "epoch": 0.76, "grad_norm": 1.152745246887207, "learning_rate": 1.4179032756361306e-06, "loss": 0.4926, "step": 5070 }, { "epoch": 0.76, "grad_norm": 3.3471198081970215, "learning_rate": 1.4162064759673804e-06, "loss": 0.5032, "step": 5071 }, { "epoch": 0.76, "grad_norm": 1.511496901512146, "learning_rate": 1.414510524660509e-06, "loss": 0.5233, "step": 5072 }, { "epoch": 0.76, "grad_norm": 1.5123772621154785, "learning_rate": 1.4128154221169815e-06, "loss": 0.4332, "step": 5073 }, { "epoch": 0.76, "grad_norm": 1.3131970167160034, "learning_rate": 1.4111211687380688e-06, "loss": 0.4827, "step": 5074 }, { "epoch": 0.76, "grad_norm": 1.1578214168548584, "learning_rate": 1.4094277649248345e-06, "loss": 0.7149, "step": 5075 }, { "epoch": 0.76, "grad_norm": 1.3462793827056885, "learning_rate": 1.4077352110781473e-06, "loss": 0.4685, "step": 5076 }, { "epoch": 0.76, "grad_norm": 1.3398022651672363, "learning_rate": 1.4060435075986684e-06, "loss": 0.5, "step": 5077 }, { "epoch": 0.76, "grad_norm": 2.545867443084717, "learning_rate": 1.404352654886864e-06, "loss": 0.4815, "step": 5078 }, { "epoch": 0.76, "grad_norm": 1.2382850646972656, "learning_rate": 1.4026626533429938e-06, "loss": 0.4551, "step": 5079 }, { "epoch": 0.76, "grad_norm": 1.6508108377456665, "learning_rate": 1.4009735033671163e-06, "loss": 0.4806, "step": 5080 }, { "epoch": 0.76, "grad_norm": 1.0864062309265137, "learning_rate": 1.3992852053590938e-06, "loss": 0.6714, "step": 5081 }, { "epoch": 0.76, "grad_norm": 1.2951751947402954, "learning_rate": 1.397597759718578e-06, "loss": 0.4476, "step": 5082 }, { "epoch": 0.76, "grad_norm": 1.2119197845458984, "learning_rate": 1.3959111668450305e-06, "loss": 0.4421, "step": 5083 }, { "epoch": 0.76, "grad_norm": 1.223123550415039, "learning_rate": 1.3942254271377004e-06, "loss": 0.5395, "step": 5084 }, { "epoch": 0.76, "grad_norm": 1.3822985887527466, "learning_rate": 1.3925405409956388e-06, "loss": 0.4798, "step": 5085 }, { "epoch": 0.76, "grad_norm": 1.3505154848098755, "learning_rate": 1.3908565088176973e-06, "loss": 0.4798, "step": 5086 }, { "epoch": 0.76, "grad_norm": 1.2840265035629272, "learning_rate": 1.3891733310025202e-06, "loss": 0.4974, "step": 5087 }, { "epoch": 0.76, "grad_norm": 1.2894991636276245, "learning_rate": 1.3874910079485549e-06, "loss": 0.4795, "step": 5088 }, { "epoch": 0.76, "grad_norm": 1.2125660181045532, "learning_rate": 1.3858095400540411e-06, "loss": 0.4715, "step": 5089 }, { "epoch": 0.76, "grad_norm": 7.913717746734619, "learning_rate": 1.3841289277170223e-06, "loss": 0.4746, "step": 5090 }, { "epoch": 0.76, "grad_norm": 1.3537591695785522, "learning_rate": 1.3824491713353332e-06, "loss": 0.4767, "step": 5091 }, { "epoch": 0.76, "grad_norm": 1.4886586666107178, "learning_rate": 1.3807702713066084e-06, "loss": 0.4908, "step": 5092 }, { "epoch": 0.77, "grad_norm": 1.4820754528045654, "learning_rate": 1.379092228028281e-06, "loss": 0.49, "step": 5093 }, { "epoch": 0.77, "grad_norm": 1.8238012790679932, "learning_rate": 1.3774150418975807e-06, "loss": 0.4915, "step": 5094 }, { "epoch": 0.77, "grad_norm": 3.4273457527160645, "learning_rate": 1.3757387133115346e-06, "loss": 0.4412, "step": 5095 }, { "epoch": 0.77, "grad_norm": 1.4087101221084595, "learning_rate": 1.374063242666966e-06, "loss": 0.5228, "step": 5096 }, { "epoch": 0.77, "grad_norm": 1.3729703426361084, "learning_rate": 1.3723886303604922e-06, "loss": 0.5657, "step": 5097 }, { "epoch": 0.77, "grad_norm": 1.347570776939392, "learning_rate": 1.3707148767885348e-06, "loss": 0.5216, "step": 5098 }, { "epoch": 0.77, "grad_norm": 1.3003116846084595, "learning_rate": 1.3690419823473034e-06, "loss": 0.4998, "step": 5099 }, { "epoch": 0.77, "grad_norm": 1.6432799100875854, "learning_rate": 1.3673699474328122e-06, "loss": 0.5032, "step": 5100 }, { "epoch": 0.77, "grad_norm": 1.22113037109375, "learning_rate": 1.3656987724408655e-06, "loss": 0.7045, "step": 5101 }, { "epoch": 0.77, "grad_norm": 1.5287823677062988, "learning_rate": 1.3640284577670692e-06, "loss": 0.5006, "step": 5102 }, { "epoch": 0.77, "grad_norm": 1.7441080808639526, "learning_rate": 1.3623590038068219e-06, "loss": 0.4328, "step": 5103 }, { "epoch": 0.77, "grad_norm": 1.3002272844314575, "learning_rate": 1.3606904109553165e-06, "loss": 0.483, "step": 5104 }, { "epoch": 0.77, "grad_norm": 1.1455419063568115, "learning_rate": 1.3590226796075518e-06, "loss": 0.4671, "step": 5105 }, { "epoch": 0.77, "grad_norm": 1.3176320791244507, "learning_rate": 1.3573558101583106e-06, "loss": 0.4899, "step": 5106 }, { "epoch": 0.77, "grad_norm": 1.1458773612976074, "learning_rate": 1.3556898030021804e-06, "loss": 0.6668, "step": 5107 }, { "epoch": 0.77, "grad_norm": 2.6337485313415527, "learning_rate": 1.3540246585335403e-06, "loss": 0.4388, "step": 5108 }, { "epoch": 0.77, "grad_norm": 1.6392725706100464, "learning_rate": 1.3523603771465648e-06, "loss": 0.4821, "step": 5109 }, { "epoch": 0.77, "grad_norm": 1.6033244132995605, "learning_rate": 1.3506969592352282e-06, "loss": 0.7537, "step": 5110 }, { "epoch": 0.77, "grad_norm": 1.4846642017364502, "learning_rate": 1.349034405193294e-06, "loss": 0.5013, "step": 5111 }, { "epoch": 0.77, "grad_norm": 1.1970728635787964, "learning_rate": 1.3473727154143296e-06, "loss": 0.4561, "step": 5112 }, { "epoch": 0.77, "grad_norm": 1.2336565256118774, "learning_rate": 1.3457118902916883e-06, "loss": 0.4332, "step": 5113 }, { "epoch": 0.77, "grad_norm": 1.5057328939437866, "learning_rate": 1.3440519302185278e-06, "loss": 0.5121, "step": 5114 }, { "epoch": 0.77, "grad_norm": 1.1704418659210205, "learning_rate": 1.3423928355877947e-06, "loss": 0.4609, "step": 5115 }, { "epoch": 0.77, "grad_norm": 1.1870063543319702, "learning_rate": 1.3407346067922294e-06, "loss": 0.4271, "step": 5116 }, { "epoch": 0.77, "grad_norm": 1.8678385019302368, "learning_rate": 1.3390772442243776e-06, "loss": 0.4884, "step": 5117 }, { "epoch": 0.77, "grad_norm": 1.6021320819854736, "learning_rate": 1.3374207482765683e-06, "loss": 0.4898, "step": 5118 }, { "epoch": 0.77, "grad_norm": 1.1966593265533447, "learning_rate": 1.3357651193409333e-06, "loss": 0.6879, "step": 5119 }, { "epoch": 0.77, "grad_norm": 1.4647274017333984, "learning_rate": 1.3341103578093934e-06, "loss": 0.5216, "step": 5120 }, { "epoch": 0.77, "grad_norm": 1.1584670543670654, "learning_rate": 1.3324564640736664e-06, "loss": 0.4391, "step": 5121 }, { "epoch": 0.77, "grad_norm": 1.297419786453247, "learning_rate": 1.3308034385252672e-06, "loss": 0.45, "step": 5122 }, { "epoch": 0.77, "grad_norm": 1.3308727741241455, "learning_rate": 1.3291512815555008e-06, "loss": 0.4632, "step": 5123 }, { "epoch": 0.77, "grad_norm": 1.6983466148376465, "learning_rate": 1.32749999355547e-06, "loss": 0.4857, "step": 5124 }, { "epoch": 0.77, "grad_norm": 1.2715405225753784, "learning_rate": 1.3258495749160694e-06, "loss": 0.515, "step": 5125 }, { "epoch": 0.77, "grad_norm": 1.4013983011245728, "learning_rate": 1.3242000260279909e-06, "loss": 0.5089, "step": 5126 }, { "epoch": 0.77, "grad_norm": 1.0458475351333618, "learning_rate": 1.3225513472817158e-06, "loss": 0.5453, "step": 5127 }, { "epoch": 0.77, "grad_norm": 1.518961787223816, "learning_rate": 1.320903539067524e-06, "loss": 0.4921, "step": 5128 }, { "epoch": 0.77, "grad_norm": 1.151631474494934, "learning_rate": 1.3192566017754894e-06, "loss": 0.4699, "step": 5129 }, { "epoch": 0.77, "grad_norm": 1.5177713632583618, "learning_rate": 1.3176105357954733e-06, "loss": 0.4868, "step": 5130 }, { "epoch": 0.77, "grad_norm": 1.1036796569824219, "learning_rate": 1.3159653415171402e-06, "loss": 0.4539, "step": 5131 }, { "epoch": 0.77, "grad_norm": 1.5872591733932495, "learning_rate": 1.3143210193299405e-06, "loss": 0.5102, "step": 5132 }, { "epoch": 0.77, "grad_norm": 1.296143889427185, "learning_rate": 1.3126775696231204e-06, "loss": 0.5301, "step": 5133 }, { "epoch": 0.77, "grad_norm": 1.2115212678909302, "learning_rate": 1.311034992785723e-06, "loss": 0.4585, "step": 5134 }, { "epoch": 0.77, "grad_norm": 1.181646466255188, "learning_rate": 1.3093932892065787e-06, "loss": 0.4143, "step": 5135 }, { "epoch": 0.77, "grad_norm": 1.7298307418823242, "learning_rate": 1.3077524592743174e-06, "loss": 0.479, "step": 5136 }, { "epoch": 0.77, "grad_norm": 1.415168046951294, "learning_rate": 1.3061125033773564e-06, "loss": 0.4564, "step": 5137 }, { "epoch": 0.77, "grad_norm": 1.2510900497436523, "learning_rate": 1.3044734219039108e-06, "loss": 0.4931, "step": 5138 }, { "epoch": 0.77, "grad_norm": 1.4316002130508423, "learning_rate": 1.3028352152419876e-06, "loss": 0.4286, "step": 5139 }, { "epoch": 0.77, "grad_norm": 1.119730830192566, "learning_rate": 1.3011978837793832e-06, "loss": 0.4256, "step": 5140 }, { "epoch": 0.77, "grad_norm": 1.2308636903762817, "learning_rate": 1.2995614279036933e-06, "loss": 0.4686, "step": 5141 }, { "epoch": 0.77, "grad_norm": 1.2288298606872559, "learning_rate": 1.2979258480022983e-06, "loss": 0.4464, "step": 5142 }, { "epoch": 0.77, "grad_norm": 1.2382113933563232, "learning_rate": 1.2962911444623799e-06, "loss": 0.4392, "step": 5143 }, { "epoch": 0.77, "grad_norm": 1.1770073175430298, "learning_rate": 1.2946573176709055e-06, "loss": 0.4553, "step": 5144 }, { "epoch": 0.77, "grad_norm": 1.2064756155014038, "learning_rate": 1.293024368014636e-06, "loss": 0.5338, "step": 5145 }, { "epoch": 0.77, "grad_norm": 1.2104554176330566, "learning_rate": 1.2913922958801294e-06, "loss": 0.462, "step": 5146 }, { "epoch": 0.77, "grad_norm": 1.163084626197815, "learning_rate": 1.2897611016537304e-06, "loss": 0.4806, "step": 5147 }, { "epoch": 0.77, "grad_norm": 1.2276240587234497, "learning_rate": 1.2881307857215797e-06, "loss": 0.449, "step": 5148 }, { "epoch": 0.77, "grad_norm": 1.597174882888794, "learning_rate": 1.286501348469606e-06, "loss": 0.4934, "step": 5149 }, { "epoch": 0.77, "grad_norm": 1.1726422309875488, "learning_rate": 1.2848727902835351e-06, "loss": 0.4782, "step": 5150 }, { "epoch": 0.77, "grad_norm": 0.9928596615791321, "learning_rate": 1.283245111548882e-06, "loss": 0.5037, "step": 5151 }, { "epoch": 0.77, "grad_norm": 1.1135780811309814, "learning_rate": 1.2816183126509519e-06, "loss": 0.6517, "step": 5152 }, { "epoch": 0.77, "grad_norm": 1.0595273971557617, "learning_rate": 1.279992393974846e-06, "loss": 0.4902, "step": 5153 }, { "epoch": 0.77, "grad_norm": 1.157655954360962, "learning_rate": 1.2783673559054515e-06, "loss": 0.4925, "step": 5154 }, { "epoch": 0.77, "grad_norm": 1.64413321018219, "learning_rate": 1.2767431988274538e-06, "loss": 0.4911, "step": 5155 }, { "epoch": 0.77, "grad_norm": 6.047730445861816, "learning_rate": 1.2751199231253236e-06, "loss": 0.5009, "step": 5156 }, { "epoch": 0.77, "grad_norm": 1.2122552394866943, "learning_rate": 1.2734975291833252e-06, "loss": 0.4823, "step": 5157 }, { "epoch": 0.77, "grad_norm": 1.4129664897918701, "learning_rate": 1.271876017385517e-06, "loss": 0.4746, "step": 5158 }, { "epoch": 0.77, "grad_norm": 1.0376558303833008, "learning_rate": 1.2702553881157438e-06, "loss": 0.7034, "step": 5159 }, { "epoch": 0.78, "grad_norm": 1.3682985305786133, "learning_rate": 1.2686356417576439e-06, "loss": 0.4996, "step": 5160 }, { "epoch": 0.78, "grad_norm": 1.1913741827011108, "learning_rate": 1.2670167786946496e-06, "loss": 0.4927, "step": 5161 }, { "epoch": 0.78, "grad_norm": 1.4525730609893799, "learning_rate": 1.265398799309977e-06, "loss": 0.4417, "step": 5162 }, { "epoch": 0.78, "grad_norm": 1.5202876329421997, "learning_rate": 1.2637817039866402e-06, "loss": 0.4735, "step": 5163 }, { "epoch": 0.78, "grad_norm": 1.8436311483383179, "learning_rate": 1.262165493107438e-06, "loss": 0.545, "step": 5164 }, { "epoch": 0.78, "grad_norm": 1.1948179006576538, "learning_rate": 1.2605501670549653e-06, "loss": 0.5274, "step": 5165 }, { "epoch": 0.78, "grad_norm": 1.508226990699768, "learning_rate": 1.2589357262116019e-06, "loss": 0.4826, "step": 5166 }, { "epoch": 0.78, "grad_norm": 0.9920307993888855, "learning_rate": 1.2573221709595246e-06, "loss": 0.465, "step": 5167 }, { "epoch": 0.78, "grad_norm": 1.8773112297058105, "learning_rate": 1.2557095016806943e-06, "loss": 0.4323, "step": 5168 }, { "epoch": 0.78, "grad_norm": 1.204723834991455, "learning_rate": 1.2540977187568665e-06, "loss": 0.5794, "step": 5169 }, { "epoch": 0.78, "grad_norm": 1.2903342247009277, "learning_rate": 1.2524868225695851e-06, "loss": 0.4626, "step": 5170 }, { "epoch": 0.78, "grad_norm": 1.2884584665298462, "learning_rate": 1.2508768135001808e-06, "loss": 0.4954, "step": 5171 }, { "epoch": 0.78, "grad_norm": 1.189881682395935, "learning_rate": 1.2492676919297837e-06, "loss": 0.4966, "step": 5172 }, { "epoch": 0.78, "grad_norm": 1.1066924333572388, "learning_rate": 1.2476594582393048e-06, "loss": 0.5134, "step": 5173 }, { "epoch": 0.78, "grad_norm": 1.092934250831604, "learning_rate": 1.2460521128094467e-06, "loss": 0.4961, "step": 5174 }, { "epoch": 0.78, "grad_norm": 1.303300142288208, "learning_rate": 1.2444456560207058e-06, "loss": 0.3996, "step": 5175 }, { "epoch": 0.78, "grad_norm": 1.113066554069519, "learning_rate": 1.2428400882533625e-06, "loss": 0.7089, "step": 5176 }, { "epoch": 0.78, "grad_norm": 1.1445443630218506, "learning_rate": 1.2412354098874924e-06, "loss": 0.4279, "step": 5177 }, { "epoch": 0.78, "grad_norm": 1.3877766132354736, "learning_rate": 1.2396316213029553e-06, "loss": 0.46, "step": 5178 }, { "epoch": 0.78, "grad_norm": 1.0957484245300293, "learning_rate": 1.2380287228794058e-06, "loss": 0.4463, "step": 5179 }, { "epoch": 0.78, "grad_norm": 1.0584664344787598, "learning_rate": 1.236426714996281e-06, "loss": 0.5043, "step": 5180 }, { "epoch": 0.78, "grad_norm": 1.432837724685669, "learning_rate": 1.234825598032815e-06, "loss": 0.4318, "step": 5181 }, { "epoch": 0.78, "grad_norm": 1.0642553567886353, "learning_rate": 1.2332253723680231e-06, "loss": 0.4754, "step": 5182 }, { "epoch": 0.78, "grad_norm": 1.098127007484436, "learning_rate": 1.2316260383807161e-06, "loss": 0.475, "step": 5183 }, { "epoch": 0.78, "grad_norm": 1.3100306987762451, "learning_rate": 1.2300275964494918e-06, "loss": 0.4153, "step": 5184 }, { "epoch": 0.78, "grad_norm": 1.408657193183899, "learning_rate": 1.228430046952735e-06, "loss": 0.445, "step": 5185 }, { "epoch": 0.78, "grad_norm": 1.2904014587402344, "learning_rate": 1.2268333902686185e-06, "loss": 0.3493, "step": 5186 }, { "epoch": 0.78, "grad_norm": 1.0180528163909912, "learning_rate": 1.2252376267751093e-06, "loss": 0.4517, "step": 5187 }, { "epoch": 0.78, "grad_norm": 1.1695377826690674, "learning_rate": 1.223642756849956e-06, "loss": 0.7049, "step": 5188 }, { "epoch": 0.78, "grad_norm": 1.12647545337677, "learning_rate": 1.2220487808707016e-06, "loss": 0.4518, "step": 5189 }, { "epoch": 0.78, "grad_norm": 1.2134227752685547, "learning_rate": 1.2204556992146726e-06, "loss": 0.4629, "step": 5190 }, { "epoch": 0.78, "grad_norm": 1.0722935199737549, "learning_rate": 1.2188635122589887e-06, "loss": 0.5073, "step": 5191 }, { "epoch": 0.78, "grad_norm": 1.825192928314209, "learning_rate": 1.2172722203805526e-06, "loss": 0.4887, "step": 5192 }, { "epoch": 0.78, "grad_norm": 1.2410863637924194, "learning_rate": 1.2156818239560604e-06, "loss": 0.5458, "step": 5193 }, { "epoch": 0.78, "grad_norm": 1.6481162309646606, "learning_rate": 1.21409232336199e-06, "loss": 0.5241, "step": 5194 }, { "epoch": 0.78, "grad_norm": 1.2146188020706177, "learning_rate": 1.212503718974613e-06, "loss": 0.582, "step": 5195 }, { "epoch": 0.78, "grad_norm": 1.3334040641784668, "learning_rate": 1.2109160111699875e-06, "loss": 0.3926, "step": 5196 }, { "epoch": 0.78, "grad_norm": 2.437013864517212, "learning_rate": 1.2093292003239565e-06, "loss": 0.4818, "step": 5197 }, { "epoch": 0.78, "grad_norm": 1.0740773677825928, "learning_rate": 1.2077432868121547e-06, "loss": 0.4322, "step": 5198 }, { "epoch": 0.78, "grad_norm": 1.0161254405975342, "learning_rate": 1.2061582710100006e-06, "loss": 0.4346, "step": 5199 }, { "epoch": 0.78, "grad_norm": 1.1999781131744385, "learning_rate": 1.2045741532927014e-06, "loss": 0.4727, "step": 5200 }, { "epoch": 0.78, "grad_norm": 1.9308167695999146, "learning_rate": 1.2029909340352547e-06, "loss": 0.5006, "step": 5201 }, { "epoch": 0.78, "grad_norm": 1.8421958684921265, "learning_rate": 1.2014086136124398e-06, "loss": 0.421, "step": 5202 }, { "epoch": 0.78, "grad_norm": 1.1937922239303589, "learning_rate": 1.1998271923988298e-06, "loss": 0.5151, "step": 5203 }, { "epoch": 0.78, "grad_norm": 1.7359713315963745, "learning_rate": 1.1982466707687784e-06, "loss": 0.5212, "step": 5204 }, { "epoch": 0.78, "grad_norm": 1.291639804840088, "learning_rate": 1.1966670490964306e-06, "loss": 0.4292, "step": 5205 }, { "epoch": 0.78, "grad_norm": 1.2564584016799927, "learning_rate": 1.1950883277557185e-06, "loss": 0.4795, "step": 5206 }, { "epoch": 0.78, "grad_norm": 0.9746639132499695, "learning_rate": 1.1935105071203572e-06, "loss": 0.4119, "step": 5207 }, { "epoch": 0.78, "grad_norm": 1.9037411212921143, "learning_rate": 1.1919335875638543e-06, "loss": 0.5104, "step": 5208 }, { "epoch": 0.78, "grad_norm": 1.3986256122589111, "learning_rate": 1.1903575694594982e-06, "loss": 0.4657, "step": 5209 }, { "epoch": 0.78, "grad_norm": 1.388136625289917, "learning_rate": 1.1887824531803688e-06, "loss": 0.5034, "step": 5210 }, { "epoch": 0.78, "grad_norm": 1.4342368841171265, "learning_rate": 1.1872082390993294e-06, "loss": 0.4836, "step": 5211 }, { "epoch": 0.78, "grad_norm": 1.172695279121399, "learning_rate": 1.185634927589029e-06, "loss": 0.5331, "step": 5212 }, { "epoch": 0.78, "grad_norm": 1.2169365882873535, "learning_rate": 1.184062519021908e-06, "loss": 0.4519, "step": 5213 }, { "epoch": 0.78, "grad_norm": 1.1855236291885376, "learning_rate": 1.1824910137701861e-06, "loss": 0.4072, "step": 5214 }, { "epoch": 0.78, "grad_norm": 1.2773420810699463, "learning_rate": 1.180920412205877e-06, "loss": 0.4331, "step": 5215 }, { "epoch": 0.78, "grad_norm": 1.7530148029327393, "learning_rate": 1.1793507147007716e-06, "loss": 0.4375, "step": 5216 }, { "epoch": 0.78, "grad_norm": 1.8814982175827026, "learning_rate": 1.177781921626454e-06, "loss": 0.3676, "step": 5217 }, { "epoch": 0.78, "grad_norm": 1.735316514968872, "learning_rate": 1.1762140333542932e-06, "loss": 0.5013, "step": 5218 }, { "epoch": 0.78, "grad_norm": 1.2529829740524292, "learning_rate": 1.174647050255439e-06, "loss": 0.5429, "step": 5219 }, { "epoch": 0.78, "grad_norm": 1.3276078701019287, "learning_rate": 1.173080972700834e-06, "loss": 0.4565, "step": 5220 }, { "epoch": 0.78, "grad_norm": 1.1579833030700684, "learning_rate": 1.1715158010611987e-06, "loss": 0.6735, "step": 5221 }, { "epoch": 0.78, "grad_norm": 1.492163062095642, "learning_rate": 1.1699515357070473e-06, "loss": 0.5106, "step": 5222 }, { "epoch": 0.78, "grad_norm": 1.381911039352417, "learning_rate": 1.1683881770086735e-06, "loss": 0.4864, "step": 5223 }, { "epoch": 0.78, "grad_norm": 1.6579194068908691, "learning_rate": 1.166825725336157e-06, "loss": 0.4446, "step": 5224 }, { "epoch": 0.78, "grad_norm": 1.5040885210037231, "learning_rate": 1.1652641810593667e-06, "loss": 0.5337, "step": 5225 }, { "epoch": 0.79, "grad_norm": 1.6347612142562866, "learning_rate": 1.1637035445479517e-06, "loss": 0.4391, "step": 5226 }, { "epoch": 0.79, "grad_norm": 1.255651831626892, "learning_rate": 1.1621438161713494e-06, "loss": 0.5167, "step": 5227 }, { "epoch": 0.79, "grad_norm": 1.351214051246643, "learning_rate": 1.1605849962987836e-06, "loss": 0.5038, "step": 5228 }, { "epoch": 0.79, "grad_norm": 1.484582781791687, "learning_rate": 1.159027085299258e-06, "loss": 0.4511, "step": 5229 }, { "epoch": 0.79, "grad_norm": 1.357679843902588, "learning_rate": 1.157470083541566e-06, "loss": 0.3826, "step": 5230 }, { "epoch": 0.79, "grad_norm": 1.4021155834197998, "learning_rate": 1.1559139913942812e-06, "loss": 0.437, "step": 5231 }, { "epoch": 0.79, "grad_norm": 1.233782172203064, "learning_rate": 1.1543588092257685e-06, "loss": 0.4773, "step": 5232 }, { "epoch": 0.79, "grad_norm": 1.017952799797058, "learning_rate": 1.152804537404169e-06, "loss": 0.3741, "step": 5233 }, { "epoch": 0.79, "grad_norm": 1.8111140727996826, "learning_rate": 1.151251176297416e-06, "loss": 0.4996, "step": 5234 }, { "epoch": 0.79, "grad_norm": 1.2227641344070435, "learning_rate": 1.1496987262732224e-06, "loss": 0.5022, "step": 5235 }, { "epoch": 0.79, "grad_norm": 1.499895691871643, "learning_rate": 1.1481471876990858e-06, "loss": 0.4532, "step": 5236 }, { "epoch": 0.79, "grad_norm": 1.2319769859313965, "learning_rate": 1.146596560942292e-06, "loss": 0.4485, "step": 5237 }, { "epoch": 0.79, "grad_norm": 1.4712387323379517, "learning_rate": 1.1450468463699033e-06, "loss": 0.4691, "step": 5238 }, { "epoch": 0.79, "grad_norm": 1.2902554273605347, "learning_rate": 1.1434980443487764e-06, "loss": 0.4616, "step": 5239 }, { "epoch": 0.79, "grad_norm": 1.6719781160354614, "learning_rate": 1.1419501552455442e-06, "loss": 0.5024, "step": 5240 }, { "epoch": 0.79, "grad_norm": 1.191707730293274, "learning_rate": 1.1404031794266246e-06, "loss": 0.5172, "step": 5241 }, { "epoch": 0.79, "grad_norm": 1.459499478340149, "learning_rate": 1.1388571172582218e-06, "loss": 0.5209, "step": 5242 }, { "epoch": 0.79, "grad_norm": 1.1030367612838745, "learning_rate": 1.137311969106321e-06, "loss": 0.4442, "step": 5243 }, { "epoch": 0.79, "grad_norm": 1.1510344743728638, "learning_rate": 1.1357677353366947e-06, "loss": 0.5088, "step": 5244 }, { "epoch": 0.79, "grad_norm": 1.2425199747085571, "learning_rate": 1.1342244163148935e-06, "loss": 0.4709, "step": 5245 }, { "epoch": 0.79, "grad_norm": 1.4287878274917603, "learning_rate": 1.1326820124062581e-06, "loss": 0.4918, "step": 5246 }, { "epoch": 0.79, "grad_norm": 1.4446824789047241, "learning_rate": 1.1311405239759065e-06, "loss": 0.502, "step": 5247 }, { "epoch": 0.79, "grad_norm": 1.1616995334625244, "learning_rate": 1.1295999513887423e-06, "loss": 0.4763, "step": 5248 }, { "epoch": 0.79, "grad_norm": 1.223138689994812, "learning_rate": 1.1280602950094532e-06, "loss": 0.4585, "step": 5249 }, { "epoch": 0.79, "grad_norm": 1.5374183654785156, "learning_rate": 1.12652155520251e-06, "loss": 0.507, "step": 5250 }, { "epoch": 0.79, "grad_norm": 1.3095018863677979, "learning_rate": 1.1249837323321666e-06, "loss": 0.5671, "step": 5251 }, { "epoch": 0.79, "grad_norm": 1.321160078048706, "learning_rate": 1.1234468267624581e-06, "loss": 0.5072, "step": 5252 }, { "epoch": 0.79, "grad_norm": 1.2105897665023804, "learning_rate": 1.121910838857202e-06, "loss": 0.481, "step": 5253 }, { "epoch": 0.79, "grad_norm": 2.141547441482544, "learning_rate": 1.1203757689800026e-06, "loss": 0.4665, "step": 5254 }, { "epoch": 0.79, "grad_norm": 2.028266668319702, "learning_rate": 1.1188416174942423e-06, "loss": 0.4498, "step": 5255 }, { "epoch": 0.79, "grad_norm": 1.2922582626342773, "learning_rate": 1.11730838476309e-06, "loss": 0.4624, "step": 5256 }, { "epoch": 0.79, "grad_norm": 1.4292535781860352, "learning_rate": 1.115776071149493e-06, "loss": 0.5133, "step": 5257 }, { "epoch": 0.79, "grad_norm": 1.6194052696228027, "learning_rate": 1.1142446770161858e-06, "loss": 0.5838, "step": 5258 }, { "epoch": 0.79, "grad_norm": 1.2409374713897705, "learning_rate": 1.1127142027256809e-06, "loss": 0.4376, "step": 5259 }, { "epoch": 0.79, "grad_norm": 1.1999760866165161, "learning_rate": 1.1111846486402733e-06, "loss": 0.6579, "step": 5260 }, { "epoch": 0.79, "grad_norm": 1.3922451734542847, "learning_rate": 1.1096560151220464e-06, "loss": 0.4843, "step": 5261 }, { "epoch": 0.79, "grad_norm": 1.3055238723754883, "learning_rate": 1.108128302532857e-06, "loss": 0.5611, "step": 5262 }, { "epoch": 0.79, "grad_norm": 1.0832792520523071, "learning_rate": 1.1066015112343503e-06, "loss": 0.5064, "step": 5263 }, { "epoch": 0.79, "grad_norm": 1.127637267112732, "learning_rate": 1.1050756415879504e-06, "loss": 0.4451, "step": 5264 }, { "epoch": 0.79, "grad_norm": 1.11567223072052, "learning_rate": 1.1035506939548618e-06, "loss": 0.3901, "step": 5265 }, { "epoch": 0.79, "grad_norm": 1.2557203769683838, "learning_rate": 1.1020266686960762e-06, "loss": 0.4984, "step": 5266 }, { "epoch": 0.79, "grad_norm": 2.0025510787963867, "learning_rate": 1.1005035661723595e-06, "loss": 0.4237, "step": 5267 }, { "epoch": 0.79, "grad_norm": 1.1509838104248047, "learning_rate": 1.0989813867442677e-06, "loss": 0.4751, "step": 5268 }, { "epoch": 0.79, "grad_norm": 1.3359730243682861, "learning_rate": 1.0974601307721295e-06, "loss": 0.432, "step": 5269 }, { "epoch": 0.79, "grad_norm": 1.2945027351379395, "learning_rate": 1.095939798616063e-06, "loss": 0.4378, "step": 5270 }, { "epoch": 0.79, "grad_norm": 1.2123647928237915, "learning_rate": 1.0944203906359607e-06, "loss": 0.4921, "step": 5271 }, { "epoch": 0.79, "grad_norm": 1.2058935165405273, "learning_rate": 1.092901907191501e-06, "loss": 0.4671, "step": 5272 }, { "epoch": 0.79, "grad_norm": 1.440938115119934, "learning_rate": 1.0913843486421427e-06, "loss": 0.499, "step": 5273 }, { "epoch": 0.79, "grad_norm": 1.6170365810394287, "learning_rate": 1.0898677153471239e-06, "loss": 0.4441, "step": 5274 }, { "epoch": 0.79, "grad_norm": 1.608162760734558, "learning_rate": 1.0883520076654652e-06, "loss": 0.4175, "step": 5275 }, { "epoch": 0.79, "grad_norm": 1.4640309810638428, "learning_rate": 1.086837225955968e-06, "loss": 0.4005, "step": 5276 }, { "epoch": 0.79, "grad_norm": 2.5489931106567383, "learning_rate": 1.085323370577212e-06, "loss": 0.4565, "step": 5277 }, { "epoch": 0.79, "grad_norm": 1.2239753007888794, "learning_rate": 1.0838104418875622e-06, "loss": 0.4984, "step": 5278 }, { "epoch": 0.79, "grad_norm": 1.26395845413208, "learning_rate": 1.0822984402451591e-06, "loss": 0.466, "step": 5279 }, { "epoch": 0.79, "grad_norm": 1.434630036354065, "learning_rate": 1.0807873660079293e-06, "loss": 0.4334, "step": 5280 }, { "epoch": 0.79, "grad_norm": 1.080852746963501, "learning_rate": 1.0792772195335749e-06, "loss": 0.4569, "step": 5281 }, { "epoch": 0.79, "grad_norm": 1.398804783821106, "learning_rate": 1.0777680011795815e-06, "loss": 0.5172, "step": 5282 }, { "epoch": 0.79, "grad_norm": 1.090566873550415, "learning_rate": 1.0762597113032124e-06, "loss": 0.4098, "step": 5283 }, { "epoch": 0.79, "grad_norm": 1.4861781597137451, "learning_rate": 1.0747523502615136e-06, "loss": 0.4358, "step": 5284 }, { "epoch": 0.79, "grad_norm": 1.343839168548584, "learning_rate": 1.073245918411312e-06, "loss": 0.4415, "step": 5285 }, { "epoch": 0.79, "grad_norm": 1.3297418355941772, "learning_rate": 1.0717404161092098e-06, "loss": 0.4822, "step": 5286 }, { "epoch": 0.79, "grad_norm": 1.3794288635253906, "learning_rate": 1.0702358437115945e-06, "loss": 0.4251, "step": 5287 }, { "epoch": 0.79, "grad_norm": 1.5304949283599854, "learning_rate": 1.0687322015746304e-06, "loss": 0.5412, "step": 5288 }, { "epoch": 0.79, "grad_norm": 1.4989720582962036, "learning_rate": 1.06722949005426e-06, "loss": 0.5258, "step": 5289 }, { "epoch": 0.79, "grad_norm": 1.1211397647857666, "learning_rate": 1.065727709506212e-06, "loss": 0.4296, "step": 5290 }, { "epoch": 0.79, "grad_norm": 1.2827403545379639, "learning_rate": 1.0642268602859862e-06, "loss": 0.5522, "step": 5291 }, { "epoch": 0.79, "grad_norm": 1.26280677318573, "learning_rate": 1.0627269427488702e-06, "loss": 0.5154, "step": 5292 }, { "epoch": 0.8, "grad_norm": 1.9662882089614868, "learning_rate": 1.0612279572499234e-06, "loss": 0.4794, "step": 5293 }, { "epoch": 0.8, "grad_norm": 1.2877905368804932, "learning_rate": 1.0597299041439901e-06, "loss": 0.4553, "step": 5294 }, { "epoch": 0.8, "grad_norm": 1.395349144935608, "learning_rate": 1.0582327837856937e-06, "loss": 0.4772, "step": 5295 }, { "epoch": 0.8, "grad_norm": 1.774394154548645, "learning_rate": 1.0567365965294318e-06, "loss": 0.4472, "step": 5296 }, { "epoch": 0.8, "grad_norm": 1.2695887088775635, "learning_rate": 1.0552413427293885e-06, "loss": 0.443, "step": 5297 }, { "epoch": 0.8, "grad_norm": 1.1567492485046387, "learning_rate": 1.053747022739518e-06, "loss": 0.4629, "step": 5298 }, { "epoch": 0.8, "grad_norm": 1.483871579170227, "learning_rate": 1.0522536369135634e-06, "loss": 0.4796, "step": 5299 }, { "epoch": 0.8, "grad_norm": 1.230148434638977, "learning_rate": 1.050761185605038e-06, "loss": 0.507, "step": 5300 }, { "epoch": 0.8, "grad_norm": 1.2418253421783447, "learning_rate": 1.0492696691672382e-06, "loss": 0.5008, "step": 5301 }, { "epoch": 0.8, "grad_norm": 1.1936326026916504, "learning_rate": 1.0477790879532397e-06, "loss": 0.4832, "step": 5302 }, { "epoch": 0.8, "grad_norm": 2.144523859024048, "learning_rate": 1.0462894423158936e-06, "loss": 0.509, "step": 5303 }, { "epoch": 0.8, "grad_norm": 1.3552961349487305, "learning_rate": 1.0448007326078336e-06, "loss": 0.4742, "step": 5304 }, { "epoch": 0.8, "grad_norm": 1.5140776634216309, "learning_rate": 1.0433129591814668e-06, "loss": 0.5296, "step": 5305 }, { "epoch": 0.8, "grad_norm": 1.493344783782959, "learning_rate": 1.0418261223889836e-06, "loss": 0.439, "step": 5306 }, { "epoch": 0.8, "grad_norm": 1.317837119102478, "learning_rate": 1.0403402225823517e-06, "loss": 0.4916, "step": 5307 }, { "epoch": 0.8, "grad_norm": 1.5169870853424072, "learning_rate": 1.038855260113313e-06, "loss": 0.4963, "step": 5308 }, { "epoch": 0.8, "grad_norm": 1.0931382179260254, "learning_rate": 1.0373712353333925e-06, "loss": 0.5053, "step": 5309 }, { "epoch": 0.8, "grad_norm": 2.11301589012146, "learning_rate": 1.0358881485938898e-06, "loss": 0.4174, "step": 5310 }, { "epoch": 0.8, "grad_norm": 1.3517929315567017, "learning_rate": 1.0344060002458862e-06, "loss": 0.6965, "step": 5311 }, { "epoch": 0.8, "grad_norm": 1.3026354312896729, "learning_rate": 1.0329247906402345e-06, "loss": 0.4861, "step": 5312 }, { "epoch": 0.8, "grad_norm": 1.3213437795639038, "learning_rate": 1.0314445201275737e-06, "loss": 0.4561, "step": 5313 }, { "epoch": 0.8, "grad_norm": 1.2695817947387695, "learning_rate": 1.0299651890583128e-06, "loss": 0.4779, "step": 5314 }, { "epoch": 0.8, "grad_norm": 1.2916409969329834, "learning_rate": 1.0284867977826418e-06, "loss": 0.5336, "step": 5315 }, { "epoch": 0.8, "grad_norm": 1.5957245826721191, "learning_rate": 1.0270093466505292e-06, "loss": 0.4678, "step": 5316 }, { "epoch": 0.8, "grad_norm": 1.1746344566345215, "learning_rate": 1.0255328360117207e-06, "loss": 0.4023, "step": 5317 }, { "epoch": 0.8, "grad_norm": 1.7026183605194092, "learning_rate": 1.0240572662157355e-06, "loss": 0.4909, "step": 5318 }, { "epoch": 0.8, "grad_norm": 1.390560269355774, "learning_rate": 1.0225826376118769e-06, "loss": 0.4366, "step": 5319 }, { "epoch": 0.8, "grad_norm": 1.2308005094528198, "learning_rate": 1.0211089505492178e-06, "loss": 0.4791, "step": 5320 }, { "epoch": 0.8, "grad_norm": 1.4054207801818848, "learning_rate": 1.019636205376615e-06, "loss": 0.4628, "step": 5321 }, { "epoch": 0.8, "grad_norm": 1.3277747631072998, "learning_rate": 1.0181644024426968e-06, "loss": 0.4751, "step": 5322 }, { "epoch": 0.8, "grad_norm": 1.4267942905426025, "learning_rate": 1.0166935420958736e-06, "loss": 0.5221, "step": 5323 }, { "epoch": 0.8, "grad_norm": 1.2084177732467651, "learning_rate": 1.0152236246843272e-06, "loss": 0.5193, "step": 5324 }, { "epoch": 0.8, "grad_norm": 1.274044156074524, "learning_rate": 1.013754650556022e-06, "loss": 0.4974, "step": 5325 }, { "epoch": 0.8, "grad_norm": 1.3673888444900513, "learning_rate": 1.0122866200586945e-06, "loss": 0.4979, "step": 5326 }, { "epoch": 0.8, "grad_norm": 1.4806805849075317, "learning_rate": 1.0108195335398568e-06, "loss": 0.5097, "step": 5327 }, { "epoch": 0.8, "grad_norm": 1.4786325693130493, "learning_rate": 1.0093533913468056e-06, "loss": 0.413, "step": 5328 }, { "epoch": 0.8, "grad_norm": 1.6454493999481201, "learning_rate": 1.007888193826606e-06, "loss": 0.4877, "step": 5329 }, { "epoch": 0.8, "grad_norm": 1.3517731428146362, "learning_rate": 1.0064239413261006e-06, "loss": 0.4612, "step": 5330 }, { "epoch": 0.8, "grad_norm": 1.8115017414093018, "learning_rate": 1.0049606341919127e-06, "loss": 0.4967, "step": 5331 }, { "epoch": 0.8, "grad_norm": 1.3283820152282715, "learning_rate": 1.0034982727704363e-06, "loss": 0.4546, "step": 5332 }, { "epoch": 0.8, "grad_norm": 1.0659986734390259, "learning_rate": 1.0020368574078464e-06, "loss": 0.4749, "step": 5333 }, { "epoch": 0.8, "grad_norm": 1.399449348449707, "learning_rate": 1.0005763884500901e-06, "loss": 0.4698, "step": 5334 }, { "epoch": 0.8, "grad_norm": 1.4367395639419556, "learning_rate": 9.99116866242894e-07, "loss": 0.4599, "step": 5335 }, { "epoch": 0.8, "grad_norm": 1.4371423721313477, "learning_rate": 9.976582911317562e-07, "loss": 0.4922, "step": 5336 }, { "epoch": 0.8, "grad_norm": 1.3972607851028442, "learning_rate": 9.96200663461956e-07, "loss": 0.4858, "step": 5337 }, { "epoch": 0.8, "grad_norm": 1.300761103630066, "learning_rate": 9.947439835785421e-07, "loss": 0.5403, "step": 5338 }, { "epoch": 0.8, "grad_norm": 1.2235558032989502, "learning_rate": 9.932882518263454e-07, "loss": 0.4652, "step": 5339 }, { "epoch": 0.8, "grad_norm": 1.3187122344970703, "learning_rate": 9.918334685499686e-07, "loss": 0.4591, "step": 5340 }, { "epoch": 0.8, "grad_norm": 1.1858710050582886, "learning_rate": 9.903796340937893e-07, "loss": 0.6346, "step": 5341 }, { "epoch": 0.8, "grad_norm": 1.7228049039840698, "learning_rate": 9.889267488019645e-07, "loss": 0.4771, "step": 5342 }, { "epoch": 0.8, "grad_norm": 1.376063585281372, "learning_rate": 9.87474813018421e-07, "loss": 0.4724, "step": 5343 }, { "epoch": 0.8, "grad_norm": 1.0260107517242432, "learning_rate": 9.860238270868633e-07, "loss": 0.4935, "step": 5344 }, { "epoch": 0.8, "grad_norm": 1.1788634061813354, "learning_rate": 9.84573791350774e-07, "loss": 0.4943, "step": 5345 }, { "epoch": 0.8, "grad_norm": 1.8191865682601929, "learning_rate": 9.831247061534049e-07, "loss": 0.4974, "step": 5346 }, { "epoch": 0.8, "grad_norm": 1.3451123237609863, "learning_rate": 9.816765718377896e-07, "loss": 0.4378, "step": 5347 }, { "epoch": 0.8, "grad_norm": 1.608346939086914, "learning_rate": 9.80229388746729e-07, "loss": 0.446, "step": 5348 }, { "epoch": 0.8, "grad_norm": 1.3473141193389893, "learning_rate": 9.78783157222805e-07, "loss": 0.5494, "step": 5349 }, { "epoch": 0.8, "grad_norm": 1.2907721996307373, "learning_rate": 9.773378776083736e-07, "loss": 0.4238, "step": 5350 }, { "epoch": 0.8, "grad_norm": 1.2324309349060059, "learning_rate": 9.758935502455607e-07, "loss": 0.5116, "step": 5351 }, { "epoch": 0.8, "grad_norm": 1.1612650156021118, "learning_rate": 9.74450175476273e-07, "loss": 0.4183, "step": 5352 }, { "epoch": 0.8, "grad_norm": 1.0876835584640503, "learning_rate": 9.730077536421862e-07, "loss": 0.437, "step": 5353 }, { "epoch": 0.8, "grad_norm": 1.3827357292175293, "learning_rate": 9.715662850847547e-07, "loss": 0.472, "step": 5354 }, { "epoch": 0.8, "grad_norm": 1.5585404634475708, "learning_rate": 9.701257701452049e-07, "loss": 0.3958, "step": 5355 }, { "epoch": 0.8, "grad_norm": 1.4902106523513794, "learning_rate": 9.686862091645366e-07, "loss": 0.4563, "step": 5356 }, { "epoch": 0.8, "grad_norm": 1.2142107486724854, "learning_rate": 9.672476024835276e-07, "loss": 0.4419, "step": 5357 }, { "epoch": 0.8, "grad_norm": 1.4382320642471313, "learning_rate": 9.658099504427249e-07, "loss": 0.4804, "step": 5358 }, { "epoch": 0.81, "grad_norm": 1.3454498052597046, "learning_rate": 9.643732533824545e-07, "loss": 0.4509, "step": 5359 }, { "epoch": 0.81, "grad_norm": 1.3653641939163208, "learning_rate": 9.62937511642812e-07, "loss": 0.5222, "step": 5360 }, { "epoch": 0.81, "grad_norm": 1.336193323135376, "learning_rate": 9.61502725563669e-07, "loss": 0.4925, "step": 5361 }, { "epoch": 0.81, "grad_norm": 1.1868230104446411, "learning_rate": 9.60068895484672e-07, "loss": 0.3881, "step": 5362 }, { "epoch": 0.81, "grad_norm": 1.2164652347564697, "learning_rate": 9.58636021745238e-07, "loss": 0.4983, "step": 5363 }, { "epoch": 0.81, "grad_norm": 1.2667707204818726, "learning_rate": 9.572041046845604e-07, "loss": 0.4581, "step": 5364 }, { "epoch": 0.81, "grad_norm": 1.1050809621810913, "learning_rate": 9.557731446416046e-07, "loss": 0.3989, "step": 5365 }, { "epoch": 0.81, "grad_norm": 1.0287413597106934, "learning_rate": 9.543431419551108e-07, "loss": 0.4577, "step": 5366 }, { "epoch": 0.81, "grad_norm": 1.0467731952667236, "learning_rate": 9.529140969635914e-07, "loss": 0.4506, "step": 5367 }, { "epoch": 0.81, "grad_norm": 1.5913903713226318, "learning_rate": 9.514860100053308e-07, "loss": 0.4718, "step": 5368 }, { "epoch": 0.81, "grad_norm": 1.4217182397842407, "learning_rate": 9.500588814183909e-07, "loss": 0.5043, "step": 5369 }, { "epoch": 0.81, "grad_norm": 1.3822872638702393, "learning_rate": 9.486327115406013e-07, "loss": 0.4602, "step": 5370 }, { "epoch": 0.81, "grad_norm": 1.409705638885498, "learning_rate": 9.472075007095704e-07, "loss": 0.5028, "step": 5371 }, { "epoch": 0.81, "grad_norm": 1.3629306554794312, "learning_rate": 9.457832492626739e-07, "loss": 0.4993, "step": 5372 }, { "epoch": 0.81, "grad_norm": 1.3706574440002441, "learning_rate": 9.443599575370638e-07, "loss": 0.4667, "step": 5373 }, { "epoch": 0.81, "grad_norm": 1.2921191453933716, "learning_rate": 9.429376258696665e-07, "loss": 0.4992, "step": 5374 }, { "epoch": 0.81, "grad_norm": 1.021911382675171, "learning_rate": 9.415162545971757e-07, "loss": 0.4635, "step": 5375 }, { "epoch": 0.81, "grad_norm": 1.0042957067489624, "learning_rate": 9.400958440560632e-07, "loss": 0.4464, "step": 5376 }, { "epoch": 0.81, "grad_norm": 1.3037315607070923, "learning_rate": 9.386763945825689e-07, "loss": 0.4747, "step": 5377 }, { "epoch": 0.81, "grad_norm": 1.445459008216858, "learning_rate": 9.372579065127102e-07, "loss": 0.5199, "step": 5378 }, { "epoch": 0.81, "grad_norm": 1.4589463472366333, "learning_rate": 9.358403801822724e-07, "loss": 0.5066, "step": 5379 }, { "epoch": 0.81, "grad_norm": 1.067132592201233, "learning_rate": 9.344238159268132e-07, "loss": 0.6965, "step": 5380 }, { "epoch": 0.81, "grad_norm": 1.327163815498352, "learning_rate": 9.330082140816677e-07, "loss": 0.4363, "step": 5381 }, { "epoch": 0.81, "grad_norm": 1.3500516414642334, "learning_rate": 9.315935749819361e-07, "loss": 0.4588, "step": 5382 }, { "epoch": 0.81, "grad_norm": 1.063126564025879, "learning_rate": 9.301798989624961e-07, "loss": 0.4, "step": 5383 }, { "epoch": 0.81, "grad_norm": 1.1582027673721313, "learning_rate": 9.287671863579967e-07, "loss": 0.4444, "step": 5384 }, { "epoch": 0.81, "grad_norm": 1.0456960201263428, "learning_rate": 9.273554375028548e-07, "loss": 0.3859, "step": 5385 }, { "epoch": 0.81, "grad_norm": 1.479244351387024, "learning_rate": 9.259446527312654e-07, "loss": 0.493, "step": 5386 }, { "epoch": 0.81, "grad_norm": 1.5993095636367798, "learning_rate": 9.245348323771886e-07, "loss": 0.4348, "step": 5387 }, { "epoch": 0.81, "grad_norm": 1.6091183423995972, "learning_rate": 9.231259767743622e-07, "loss": 0.4709, "step": 5388 }, { "epoch": 0.81, "grad_norm": 1.368463397026062, "learning_rate": 9.217180862562914e-07, "loss": 0.5189, "step": 5389 }, { "epoch": 0.81, "grad_norm": 1.5544015169143677, "learning_rate": 9.203111611562554e-07, "loss": 0.4433, "step": 5390 }, { "epoch": 0.81, "grad_norm": 1.313355565071106, "learning_rate": 9.189052018073042e-07, "loss": 0.4073, "step": 5391 }, { "epoch": 0.81, "grad_norm": 1.1562832593917847, "learning_rate": 9.175002085422569e-07, "loss": 0.4792, "step": 5392 }, { "epoch": 0.81, "grad_norm": 1.2009384632110596, "learning_rate": 9.160961816937086e-07, "loss": 0.4387, "step": 5393 }, { "epoch": 0.81, "grad_norm": 1.0337849855422974, "learning_rate": 9.146931215940197e-07, "loss": 0.5696, "step": 5394 }, { "epoch": 0.81, "grad_norm": 1.509091854095459, "learning_rate": 9.132910285753294e-07, "loss": 0.4784, "step": 5395 }, { "epoch": 0.81, "grad_norm": 1.2176220417022705, "learning_rate": 9.11889902969541e-07, "loss": 0.4563, "step": 5396 }, { "epoch": 0.81, "grad_norm": 1.391541838645935, "learning_rate": 9.104897451083305e-07, "loss": 0.4322, "step": 5397 }, { "epoch": 0.81, "grad_norm": 1.6072241067886353, "learning_rate": 9.090905553231488e-07, "loss": 0.4896, "step": 5398 }, { "epoch": 0.81, "grad_norm": 1.7802515029907227, "learning_rate": 9.076923339452115e-07, "loss": 0.504, "step": 5399 }, { "epoch": 0.81, "grad_norm": 1.3288367986679077, "learning_rate": 9.062950813055099e-07, "loss": 0.4494, "step": 5400 }, { "epoch": 0.81, "grad_norm": 1.3900622129440308, "learning_rate": 9.048987977348029e-07, "loss": 0.4629, "step": 5401 }, { "epoch": 0.81, "grad_norm": 1.419651746749878, "learning_rate": 9.035034835636225e-07, "loss": 0.4371, "step": 5402 }, { "epoch": 0.81, "grad_norm": 2.162537097930908, "learning_rate": 9.021091391222691e-07, "loss": 0.404, "step": 5403 }, { "epoch": 0.81, "grad_norm": 1.4321773052215576, "learning_rate": 9.007157647408132e-07, "loss": 0.3995, "step": 5404 }, { "epoch": 0.81, "grad_norm": 1.2556853294372559, "learning_rate": 8.993233607490981e-07, "loss": 0.4976, "step": 5405 }, { "epoch": 0.81, "grad_norm": 1.3017536401748657, "learning_rate": 8.979319274767362e-07, "loss": 0.7839, "step": 5406 }, { "epoch": 0.81, "grad_norm": 1.1414854526519775, "learning_rate": 8.965414652531112e-07, "loss": 0.4705, "step": 5407 }, { "epoch": 0.81, "grad_norm": 1.1906046867370605, "learning_rate": 8.951519744073744e-07, "loss": 0.4801, "step": 5408 }, { "epoch": 0.81, "grad_norm": 1.453603744506836, "learning_rate": 8.937634552684471e-07, "loss": 0.4777, "step": 5409 }, { "epoch": 0.81, "grad_norm": 1.4183403253555298, "learning_rate": 8.923759081650247e-07, "loss": 0.5558, "step": 5410 }, { "epoch": 0.81, "grad_norm": 2.3989930152893066, "learning_rate": 8.90989333425567e-07, "loss": 0.425, "step": 5411 }, { "epoch": 0.81, "grad_norm": 1.319345235824585, "learning_rate": 8.896037313783096e-07, "loss": 0.4212, "step": 5412 }, { "epoch": 0.81, "grad_norm": 1.1768808364868164, "learning_rate": 8.88219102351251e-07, "loss": 0.4989, "step": 5413 }, { "epoch": 0.81, "grad_norm": 1.4116730690002441, "learning_rate": 8.868354466721668e-07, "loss": 0.4325, "step": 5414 }, { "epoch": 0.81, "grad_norm": 1.269790768623352, "learning_rate": 8.854527646685962e-07, "loss": 0.4557, "step": 5415 }, { "epoch": 0.81, "grad_norm": 1.388624906539917, "learning_rate": 8.840710566678479e-07, "loss": 0.522, "step": 5416 }, { "epoch": 0.81, "grad_norm": 1.2837817668914795, "learning_rate": 8.826903229970074e-07, "loss": 0.5204, "step": 5417 }, { "epoch": 0.81, "grad_norm": 1.5151586532592773, "learning_rate": 8.813105639829206e-07, "loss": 0.4674, "step": 5418 }, { "epoch": 0.81, "grad_norm": 1.381638765335083, "learning_rate": 8.799317799522089e-07, "loss": 0.4085, "step": 5419 }, { "epoch": 0.81, "grad_norm": 1.433566927909851, "learning_rate": 8.785539712312591e-07, "loss": 0.5167, "step": 5420 }, { "epoch": 0.81, "grad_norm": 1.1543890237808228, "learning_rate": 8.771771381462274e-07, "loss": 0.7441, "step": 5421 }, { "epoch": 0.81, "grad_norm": 1.2211841344833374, "learning_rate": 8.758012810230426e-07, "loss": 0.4622, "step": 5422 }, { "epoch": 0.81, "grad_norm": 2.6210503578186035, "learning_rate": 8.744264001873976e-07, "loss": 0.4465, "step": 5423 }, { "epoch": 0.81, "grad_norm": 1.4163613319396973, "learning_rate": 8.730524959647585e-07, "loss": 0.4726, "step": 5424 }, { "epoch": 0.81, "grad_norm": 1.374395728111267, "learning_rate": 8.716795686803564e-07, "loss": 0.4234, "step": 5425 }, { "epoch": 0.82, "grad_norm": 1.5118792057037354, "learning_rate": 8.703076186591958e-07, "loss": 0.4678, "step": 5426 }, { "epoch": 0.82, "grad_norm": 1.2016195058822632, "learning_rate": 8.68936646226044e-07, "loss": 0.5559, "step": 5427 }, { "epoch": 0.82, "grad_norm": 1.3827694654464722, "learning_rate": 8.675666517054416e-07, "loss": 0.4626, "step": 5428 }, { "epoch": 0.82, "grad_norm": 1.1899564266204834, "learning_rate": 8.661976354216972e-07, "loss": 0.7001, "step": 5429 }, { "epoch": 0.82, "grad_norm": 1.484931230545044, "learning_rate": 8.648295976988846e-07, "loss": 0.4807, "step": 5430 }, { "epoch": 0.82, "grad_norm": 1.2188804149627686, "learning_rate": 8.634625388608498e-07, "loss": 0.4285, "step": 5431 }, { "epoch": 0.82, "grad_norm": 1.3072158098220825, "learning_rate": 8.620964592312048e-07, "loss": 0.4863, "step": 5432 }, { "epoch": 0.82, "grad_norm": 1.8035645484924316, "learning_rate": 8.607313591333288e-07, "loss": 0.5004, "step": 5433 }, { "epoch": 0.82, "grad_norm": 1.550441861152649, "learning_rate": 8.593672388903734e-07, "loss": 0.51, "step": 5434 }, { "epoch": 0.82, "grad_norm": 1.169861912727356, "learning_rate": 8.58004098825253e-07, "loss": 0.5052, "step": 5435 }, { "epoch": 0.82, "grad_norm": 1.0967134237289429, "learning_rate": 8.566419392606546e-07, "loss": 0.4335, "step": 5436 }, { "epoch": 0.82, "grad_norm": 1.9949522018432617, "learning_rate": 8.552807605190288e-07, "loss": 0.4956, "step": 5437 }, { "epoch": 0.82, "grad_norm": 1.391495943069458, "learning_rate": 8.539205629225977e-07, "loss": 0.4553, "step": 5438 }, { "epoch": 0.82, "grad_norm": 1.3766359090805054, "learning_rate": 8.525613467933508e-07, "loss": 0.4517, "step": 5439 }, { "epoch": 0.82, "grad_norm": 1.2282681465148926, "learning_rate": 8.512031124530412e-07, "loss": 0.4676, "step": 5440 }, { "epoch": 0.82, "grad_norm": 1.2108017206192017, "learning_rate": 8.498458602231957e-07, "loss": 0.5134, "step": 5441 }, { "epoch": 0.82, "grad_norm": 1.3098530769348145, "learning_rate": 8.484895904251023e-07, "loss": 0.4927, "step": 5442 }, { "epoch": 0.82, "grad_norm": 1.7792540788650513, "learning_rate": 8.471343033798224e-07, "loss": 0.4633, "step": 5443 }, { "epoch": 0.82, "grad_norm": 1.0609774589538574, "learning_rate": 8.457799994081806e-07, "loss": 0.4576, "step": 5444 }, { "epoch": 0.82, "grad_norm": 1.2607618570327759, "learning_rate": 8.444266788307687e-07, "loss": 0.4593, "step": 5445 }, { "epoch": 0.82, "grad_norm": 1.6578419208526611, "learning_rate": 8.430743419679494e-07, "loss": 0.5118, "step": 5446 }, { "epoch": 0.82, "grad_norm": 1.3808000087738037, "learning_rate": 8.417229891398487e-07, "loss": 0.4925, "step": 5447 }, { "epoch": 0.82, "grad_norm": 1.578214406967163, "learning_rate": 8.403726206663626e-07, "loss": 0.5073, "step": 5448 }, { "epoch": 0.82, "grad_norm": 1.21957528591156, "learning_rate": 8.390232368671508e-07, "loss": 0.4913, "step": 5449 }, { "epoch": 0.82, "grad_norm": 1.3031704425811768, "learning_rate": 8.376748380616434e-07, "loss": 0.4558, "step": 5450 }, { "epoch": 0.82, "grad_norm": 1.2115637063980103, "learning_rate": 8.363274245690362e-07, "loss": 0.4613, "step": 5451 }, { "epoch": 0.82, "grad_norm": 1.2984898090362549, "learning_rate": 8.349809967082889e-07, "loss": 0.4715, "step": 5452 }, { "epoch": 0.82, "grad_norm": 1.7266449928283691, "learning_rate": 8.336355547981328e-07, "loss": 0.5355, "step": 5453 }, { "epoch": 0.82, "grad_norm": 1.432650089263916, "learning_rate": 8.322910991570616e-07, "loss": 0.4532, "step": 5454 }, { "epoch": 0.82, "grad_norm": 1.112542748451233, "learning_rate": 8.309476301033387e-07, "loss": 0.438, "step": 5455 }, { "epoch": 0.82, "grad_norm": 1.1199692487716675, "learning_rate": 8.296051479549899e-07, "loss": 0.5178, "step": 5456 }, { "epoch": 0.82, "grad_norm": 1.5469669103622437, "learning_rate": 8.282636530298133e-07, "loss": 0.5098, "step": 5457 }, { "epoch": 0.82, "grad_norm": 1.3615190982818604, "learning_rate": 8.26923145645368e-07, "loss": 0.4118, "step": 5458 }, { "epoch": 0.82, "grad_norm": 1.0168806314468384, "learning_rate": 8.255836261189798e-07, "loss": 0.4407, "step": 5459 }, { "epoch": 0.82, "grad_norm": 1.4166334867477417, "learning_rate": 8.242450947677455e-07, "loss": 0.4717, "step": 5460 }, { "epoch": 0.82, "grad_norm": 1.216442346572876, "learning_rate": 8.229075519085211e-07, "loss": 0.5094, "step": 5461 }, { "epoch": 0.82, "grad_norm": 1.2472484111785889, "learning_rate": 8.21570997857934e-07, "loss": 0.4641, "step": 5462 }, { "epoch": 0.82, "grad_norm": 1.4769102334976196, "learning_rate": 8.202354329323769e-07, "loss": 0.5228, "step": 5463 }, { "epoch": 0.82, "grad_norm": 1.4270734786987305, "learning_rate": 8.189008574480039e-07, "loss": 0.4981, "step": 5464 }, { "epoch": 0.82, "grad_norm": 1.4587727785110474, "learning_rate": 8.175672717207412e-07, "loss": 0.5142, "step": 5465 }, { "epoch": 0.82, "grad_norm": 1.0867685079574585, "learning_rate": 8.162346760662748e-07, "loss": 0.4701, "step": 5466 }, { "epoch": 0.82, "grad_norm": 1.3626177310943604, "learning_rate": 8.149030708000616e-07, "loss": 0.5169, "step": 5467 }, { "epoch": 0.82, "grad_norm": 1.1789454221725464, "learning_rate": 8.135724562373193e-07, "loss": 0.4389, "step": 5468 }, { "epoch": 0.82, "grad_norm": 1.5703494548797607, "learning_rate": 8.122428326930348e-07, "loss": 0.4841, "step": 5469 }, { "epoch": 0.82, "grad_norm": 1.2458155155181885, "learning_rate": 8.109142004819592e-07, "loss": 0.4166, "step": 5470 }, { "epoch": 0.82, "grad_norm": 1.4882937669754028, "learning_rate": 8.09586559918606e-07, "loss": 0.4618, "step": 5471 }, { "epoch": 0.82, "grad_norm": 1.4707077741622925, "learning_rate": 8.082599113172584e-07, "loss": 0.3893, "step": 5472 }, { "epoch": 0.82, "grad_norm": 1.496580719947815, "learning_rate": 8.069342549919645e-07, "loss": 0.4679, "step": 5473 }, { "epoch": 0.82, "grad_norm": 2.654087781906128, "learning_rate": 8.056095912565337e-07, "loss": 0.4933, "step": 5474 }, { "epoch": 0.82, "grad_norm": 1.2355921268463135, "learning_rate": 8.04285920424544e-07, "loss": 0.4828, "step": 5475 }, { "epoch": 0.82, "grad_norm": 1.1272302865982056, "learning_rate": 8.029632428093359e-07, "loss": 0.4694, "step": 5476 }, { "epoch": 0.82, "grad_norm": 1.0614129304885864, "learning_rate": 8.016415587240178e-07, "loss": 0.6868, "step": 5477 }, { "epoch": 0.82, "grad_norm": 1.163740634918213, "learning_rate": 8.003208684814584e-07, "loss": 0.4451, "step": 5478 }, { "epoch": 0.82, "grad_norm": 2.2546401023864746, "learning_rate": 7.99001172394297e-07, "loss": 0.5083, "step": 5479 }, { "epoch": 0.82, "grad_norm": 1.2100169658660889, "learning_rate": 7.976824707749309e-07, "loss": 0.7271, "step": 5480 }, { "epoch": 0.82, "grad_norm": 1.3941982984542847, "learning_rate": 7.963647639355287e-07, "loss": 0.4639, "step": 5481 }, { "epoch": 0.82, "grad_norm": 1.6242502927780151, "learning_rate": 7.950480521880194e-07, "loss": 0.5017, "step": 5482 }, { "epoch": 0.82, "grad_norm": 1.9351414442062378, "learning_rate": 7.937323358440935e-07, "loss": 0.4341, "step": 5483 }, { "epoch": 0.82, "grad_norm": 1.4482216835021973, "learning_rate": 7.924176152152158e-07, "loss": 0.4257, "step": 5484 }, { "epoch": 0.82, "grad_norm": 1.2307004928588867, "learning_rate": 7.911038906126051e-07, "loss": 0.4758, "step": 5485 }, { "epoch": 0.82, "grad_norm": 1.186149001121521, "learning_rate": 7.89791162347251e-07, "loss": 0.4692, "step": 5486 }, { "epoch": 0.82, "grad_norm": 1.1156235933303833, "learning_rate": 7.884794307299037e-07, "loss": 0.501, "step": 5487 }, { "epoch": 0.82, "grad_norm": 1.4256330728530884, "learning_rate": 7.871686960710773e-07, "loss": 0.481, "step": 5488 }, { "epoch": 0.82, "grad_norm": 1.1183531284332275, "learning_rate": 7.858589586810539e-07, "loss": 0.3827, "step": 5489 }, { "epoch": 0.82, "grad_norm": 1.2699693441390991, "learning_rate": 7.845502188698745e-07, "loss": 0.4369, "step": 5490 }, { "epoch": 0.82, "grad_norm": 1.2360343933105469, "learning_rate": 7.832424769473479e-07, "loss": 0.4111, "step": 5491 }, { "epoch": 0.82, "grad_norm": 1.1051559448242188, "learning_rate": 7.819357332230437e-07, "loss": 0.4135, "step": 5492 }, { "epoch": 0.83, "grad_norm": 2.4672951698303223, "learning_rate": 7.806299880062978e-07, "loss": 0.4612, "step": 5493 }, { "epoch": 0.83, "grad_norm": 1.4671359062194824, "learning_rate": 7.793252416062075e-07, "loss": 0.4843, "step": 5494 }, { "epoch": 0.83, "grad_norm": 1.187828779220581, "learning_rate": 7.780214943316344e-07, "loss": 0.4438, "step": 5495 }, { "epoch": 0.83, "grad_norm": 1.5475678443908691, "learning_rate": 7.767187464912063e-07, "loss": 0.4946, "step": 5496 }, { "epoch": 0.83, "grad_norm": 1.3446955680847168, "learning_rate": 7.754169983933085e-07, "loss": 0.5208, "step": 5497 }, { "epoch": 0.83, "grad_norm": 1.0834232568740845, "learning_rate": 7.741162503460959e-07, "loss": 0.6946, "step": 5498 }, { "epoch": 0.83, "grad_norm": 1.4416255950927734, "learning_rate": 7.728165026574829e-07, "loss": 0.4757, "step": 5499 }, { "epoch": 0.83, "grad_norm": 1.6479456424713135, "learning_rate": 7.715177556351467e-07, "loss": 0.484, "step": 5500 }, { "epoch": 0.83, "grad_norm": 1.5485515594482422, "learning_rate": 7.702200095865315e-07, "loss": 0.4923, "step": 5501 }, { "epoch": 0.83, "grad_norm": 2.179560661315918, "learning_rate": 7.689232648188394e-07, "loss": 0.4821, "step": 5502 }, { "epoch": 0.83, "grad_norm": 1.2888444662094116, "learning_rate": 7.676275216390406e-07, "loss": 0.4527, "step": 5503 }, { "epoch": 0.83, "grad_norm": 1.247420072555542, "learning_rate": 7.663327803538628e-07, "loss": 0.5185, "step": 5504 }, { "epoch": 0.83, "grad_norm": 1.049088954925537, "learning_rate": 7.650390412698011e-07, "loss": 0.4591, "step": 5505 }, { "epoch": 0.83, "grad_norm": 1.0945757627487183, "learning_rate": 7.637463046931132e-07, "loss": 0.7029, "step": 5506 }, { "epoch": 0.83, "grad_norm": 1.5633584260940552, "learning_rate": 7.624545709298153e-07, "loss": 0.4839, "step": 5507 }, { "epoch": 0.83, "grad_norm": 1.8612096309661865, "learning_rate": 7.611638402856908e-07, "loss": 0.4829, "step": 5508 }, { "epoch": 0.83, "grad_norm": 1.0740227699279785, "learning_rate": 7.598741130662817e-07, "loss": 0.6406, "step": 5509 }, { "epoch": 0.83, "grad_norm": 2.2854881286621094, "learning_rate": 7.585853895768974e-07, "loss": 0.4822, "step": 5510 }, { "epoch": 0.83, "grad_norm": 1.238484501838684, "learning_rate": 7.572976701226043e-07, "loss": 0.4454, "step": 5511 }, { "epoch": 0.83, "grad_norm": 1.5960758924484253, "learning_rate": 7.56010955008234e-07, "loss": 0.4987, "step": 5512 }, { "epoch": 0.83, "grad_norm": 1.1547558307647705, "learning_rate": 7.547252445383807e-07, "loss": 0.4664, "step": 5513 }, { "epoch": 0.83, "grad_norm": 1.4669997692108154, "learning_rate": 7.534405390173994e-07, "loss": 0.4562, "step": 5514 }, { "epoch": 0.83, "grad_norm": 1.2875217199325562, "learning_rate": 7.521568387494083e-07, "loss": 0.534, "step": 5515 }, { "epoch": 0.83, "grad_norm": 1.2137233018875122, "learning_rate": 7.508741440382866e-07, "loss": 0.4641, "step": 5516 }, { "epoch": 0.83, "grad_norm": 1.305601954460144, "learning_rate": 7.49592455187676e-07, "loss": 0.498, "step": 5517 }, { "epoch": 0.83, "grad_norm": 1.3424242734909058, "learning_rate": 7.483117725009814e-07, "loss": 0.4685, "step": 5518 }, { "epoch": 0.83, "grad_norm": 1.3110005855560303, "learning_rate": 7.470320962813665e-07, "loss": 0.474, "step": 5519 }, { "epoch": 0.83, "grad_norm": 1.2370771169662476, "learning_rate": 7.457534268317607e-07, "loss": 0.468, "step": 5520 }, { "epoch": 0.83, "grad_norm": 1.2500998973846436, "learning_rate": 7.444757644548505e-07, "loss": 0.5126, "step": 5521 }, { "epoch": 0.83, "grad_norm": 1.3357704877853394, "learning_rate": 7.431991094530883e-07, "loss": 0.4082, "step": 5522 }, { "epoch": 0.83, "grad_norm": 1.1352055072784424, "learning_rate": 7.419234621286853e-07, "loss": 0.4169, "step": 5523 }, { "epoch": 0.83, "grad_norm": 2.101754903793335, "learning_rate": 7.406488227836139e-07, "loss": 0.4668, "step": 5524 }, { "epoch": 0.83, "grad_norm": 1.1308618783950806, "learning_rate": 7.393751917196112e-07, "loss": 0.5213, "step": 5525 }, { "epoch": 0.83, "grad_norm": 1.109596610069275, "learning_rate": 7.381025692381705e-07, "loss": 0.4755, "step": 5526 }, { "epoch": 0.83, "grad_norm": 1.0645970106124878, "learning_rate": 7.368309556405523e-07, "loss": 0.3605, "step": 5527 }, { "epoch": 0.83, "grad_norm": 1.3950494527816772, "learning_rate": 7.355603512277726e-07, "loss": 0.4637, "step": 5528 }, { "epoch": 0.83, "grad_norm": 1.1060194969177246, "learning_rate": 7.342907563006125e-07, "loss": 0.4409, "step": 5529 }, { "epoch": 0.83, "grad_norm": 1.938745379447937, "learning_rate": 7.330221711596136e-07, "loss": 0.4743, "step": 5530 }, { "epoch": 0.83, "grad_norm": 1.4974015951156616, "learning_rate": 7.317545961050748e-07, "loss": 0.4856, "step": 5531 }, { "epoch": 0.83, "grad_norm": 1.934098720550537, "learning_rate": 7.304880314370621e-07, "loss": 0.4704, "step": 5532 }, { "epoch": 0.83, "grad_norm": 1.1952285766601562, "learning_rate": 7.292224774553958e-07, "loss": 0.5063, "step": 5533 }, { "epoch": 0.83, "grad_norm": 1.2336809635162354, "learning_rate": 7.279579344596627e-07, "loss": 0.4752, "step": 5534 }, { "epoch": 0.83, "grad_norm": 1.3128124475479126, "learning_rate": 7.266944027492062e-07, "loss": 0.4985, "step": 5535 }, { "epoch": 0.83, "grad_norm": 1.1560648679733276, "learning_rate": 7.254318826231305e-07, "loss": 0.4696, "step": 5536 }, { "epoch": 0.83, "grad_norm": 1.3627266883850098, "learning_rate": 7.241703743803046e-07, "loss": 0.5159, "step": 5537 }, { "epoch": 0.83, "grad_norm": 1.3858790397644043, "learning_rate": 7.229098783193517e-07, "loss": 0.3966, "step": 5538 }, { "epoch": 0.83, "grad_norm": 1.26559317111969, "learning_rate": 7.216503947386605e-07, "loss": 0.41, "step": 5539 }, { "epoch": 0.83, "grad_norm": 1.3646889925003052, "learning_rate": 7.203919239363794e-07, "loss": 0.4679, "step": 5540 }, { "epoch": 0.83, "grad_norm": 1.4816699028015137, "learning_rate": 7.191344662104127e-07, "loss": 0.4744, "step": 5541 }, { "epoch": 0.83, "grad_norm": 1.3138967752456665, "learning_rate": 7.178780218584308e-07, "loss": 0.5924, "step": 5542 }, { "epoch": 0.83, "grad_norm": 1.4359849691390991, "learning_rate": 7.166225911778591e-07, "loss": 0.4737, "step": 5543 }, { "epoch": 0.83, "grad_norm": 1.2400566339492798, "learning_rate": 7.153681744658874e-07, "loss": 0.3826, "step": 5544 }, { "epoch": 0.83, "grad_norm": 1.3230236768722534, "learning_rate": 7.141147720194619e-07, "loss": 0.5134, "step": 5545 }, { "epoch": 0.83, "grad_norm": 1.2012251615524292, "learning_rate": 7.128623841352916e-07, "loss": 0.4655, "step": 5546 }, { "epoch": 0.83, "grad_norm": 1.2046703100204468, "learning_rate": 7.116110111098435e-07, "loss": 0.4716, "step": 5547 }, { "epoch": 0.83, "grad_norm": 1.283958911895752, "learning_rate": 7.103606532393431e-07, "loss": 0.4476, "step": 5548 }, { "epoch": 0.83, "grad_norm": 1.370582103729248, "learning_rate": 7.091113108197794e-07, "loss": 0.4367, "step": 5549 }, { "epoch": 0.83, "grad_norm": 1.4331903457641602, "learning_rate": 7.078629841468965e-07, "loss": 0.5564, "step": 5550 }, { "epoch": 0.83, "grad_norm": 1.4872709512710571, "learning_rate": 7.066156735162044e-07, "loss": 0.5206, "step": 5551 }, { "epoch": 0.83, "grad_norm": 1.1222434043884277, "learning_rate": 7.05369379222966e-07, "loss": 0.4687, "step": 5552 }, { "epoch": 0.83, "grad_norm": 1.3354703187942505, "learning_rate": 7.041241015622063e-07, "loss": 0.4656, "step": 5553 }, { "epoch": 0.83, "grad_norm": 1.0461957454681396, "learning_rate": 7.028798408287108e-07, "loss": 0.4513, "step": 5554 }, { "epoch": 0.83, "grad_norm": 1.0901970863342285, "learning_rate": 7.016365973170208e-07, "loss": 0.4678, "step": 5555 }, { "epoch": 0.83, "grad_norm": 1.183050274848938, "learning_rate": 7.003943713214422e-07, "loss": 0.4779, "step": 5556 }, { "epoch": 0.83, "grad_norm": 1.6503148078918457, "learning_rate": 6.991531631360338e-07, "loss": 0.48, "step": 5557 }, { "epoch": 0.83, "grad_norm": 1.3751907348632812, "learning_rate": 6.979129730546191e-07, "loss": 0.4848, "step": 5558 }, { "epoch": 0.84, "grad_norm": 1.2722278833389282, "learning_rate": 6.966738013707774e-07, "loss": 0.4835, "step": 5559 }, { "epoch": 0.84, "grad_norm": 1.2095599174499512, "learning_rate": 6.954356483778457e-07, "loss": 0.496, "step": 5560 }, { "epoch": 0.84, "grad_norm": 1.2514269351959229, "learning_rate": 6.941985143689239e-07, "loss": 0.4915, "step": 5561 }, { "epoch": 0.84, "grad_norm": 1.519559621810913, "learning_rate": 6.929623996368673e-07, "loss": 0.4913, "step": 5562 }, { "epoch": 0.84, "grad_norm": 1.3415253162384033, "learning_rate": 6.917273044742928e-07, "loss": 0.5417, "step": 5563 }, { "epoch": 0.84, "grad_norm": 1.2393532991409302, "learning_rate": 6.904932291735739e-07, "loss": 0.3846, "step": 5564 }, { "epoch": 0.84, "grad_norm": 1.28200364112854, "learning_rate": 6.892601740268406e-07, "loss": 0.5389, "step": 5565 }, { "epoch": 0.84, "grad_norm": 2.161771297454834, "learning_rate": 6.880281393259869e-07, "loss": 0.4325, "step": 5566 }, { "epoch": 0.84, "grad_norm": 1.383599042892456, "learning_rate": 6.867971253626599e-07, "loss": 0.4875, "step": 5567 }, { "epoch": 0.84, "grad_norm": 1.0748186111450195, "learning_rate": 6.855671324282697e-07, "loss": 0.483, "step": 5568 }, { "epoch": 0.84, "grad_norm": 1.4131108522415161, "learning_rate": 6.843381608139798e-07, "loss": 0.5034, "step": 5569 }, { "epoch": 0.84, "grad_norm": 1.3720372915267944, "learning_rate": 6.831102108107174e-07, "loss": 0.4411, "step": 5570 }, { "epoch": 0.84, "grad_norm": 1.5093458890914917, "learning_rate": 6.818832827091632e-07, "loss": 0.4962, "step": 5571 }, { "epoch": 0.84, "grad_norm": 1.2651846408843994, "learning_rate": 6.806573767997555e-07, "loss": 0.6764, "step": 5572 }, { "epoch": 0.84, "grad_norm": 1.1637730598449707, "learning_rate": 6.794324933726976e-07, "loss": 0.4759, "step": 5573 }, { "epoch": 0.84, "grad_norm": 1.232790231704712, "learning_rate": 6.782086327179432e-07, "loss": 0.4803, "step": 5574 }, { "epoch": 0.84, "grad_norm": 1.250647783279419, "learning_rate": 6.769857951252078e-07, "loss": 0.453, "step": 5575 }, { "epoch": 0.84, "grad_norm": 1.5991096496582031, "learning_rate": 6.757639808839638e-07, "loss": 0.5229, "step": 5576 }, { "epoch": 0.84, "grad_norm": 1.2101142406463623, "learning_rate": 6.745431902834393e-07, "loss": 0.4507, "step": 5577 }, { "epoch": 0.84, "grad_norm": 1.1539146900177002, "learning_rate": 6.733234236126245e-07, "loss": 0.4546, "step": 5578 }, { "epoch": 0.84, "grad_norm": 1.4602924585342407, "learning_rate": 6.721046811602622e-07, "loss": 0.4765, "step": 5579 }, { "epoch": 0.84, "grad_norm": 1.4124667644500732, "learning_rate": 6.708869632148579e-07, "loss": 0.5029, "step": 5580 }, { "epoch": 0.84, "grad_norm": 1.083106279373169, "learning_rate": 6.696702700646695e-07, "loss": 0.4893, "step": 5581 }, { "epoch": 0.84, "grad_norm": 1.1103501319885254, "learning_rate": 6.684546019977167e-07, "loss": 0.5116, "step": 5582 }, { "epoch": 0.84, "grad_norm": 1.3270694017410278, "learning_rate": 6.672399593017726e-07, "loss": 0.4621, "step": 5583 }, { "epoch": 0.84, "grad_norm": 1.422852873802185, "learning_rate": 6.66026342264371e-07, "loss": 0.4592, "step": 5584 }, { "epoch": 0.84, "grad_norm": 1.2820311784744263, "learning_rate": 6.64813751172802e-07, "loss": 0.5189, "step": 5585 }, { "epoch": 0.84, "grad_norm": 1.293696403503418, "learning_rate": 6.636021863141101e-07, "loss": 0.4139, "step": 5586 }, { "epoch": 0.84, "grad_norm": 1.3182659149169922, "learning_rate": 6.623916479751013e-07, "loss": 0.5091, "step": 5587 }, { "epoch": 0.84, "grad_norm": 1.4764114618301392, "learning_rate": 6.61182136442336e-07, "loss": 0.4691, "step": 5588 }, { "epoch": 0.84, "grad_norm": 1.7051920890808105, "learning_rate": 6.599736520021299e-07, "loss": 0.4701, "step": 5589 }, { "epoch": 0.84, "grad_norm": 1.384182095527649, "learning_rate": 6.587661949405599e-07, "loss": 0.4391, "step": 5590 }, { "epoch": 0.84, "grad_norm": 1.2842036485671997, "learning_rate": 6.575597655434551e-07, "loss": 0.4199, "step": 5591 }, { "epoch": 0.84, "grad_norm": 1.3822736740112305, "learning_rate": 6.563543640964059e-07, "loss": 0.4765, "step": 5592 }, { "epoch": 0.84, "grad_norm": 1.2235318422317505, "learning_rate": 6.551499908847553e-07, "loss": 0.4842, "step": 5593 }, { "epoch": 0.84, "grad_norm": 1.2066402435302734, "learning_rate": 6.539466461936051e-07, "loss": 0.7046, "step": 5594 }, { "epoch": 0.84, "grad_norm": 1.2448214292526245, "learning_rate": 6.527443303078146e-07, "loss": 0.4523, "step": 5595 }, { "epoch": 0.84, "grad_norm": 1.183775782585144, "learning_rate": 6.515430435119957e-07, "loss": 0.4806, "step": 5596 }, { "epoch": 0.84, "grad_norm": 1.310619831085205, "learning_rate": 6.503427860905215e-07, "loss": 0.4665, "step": 5597 }, { "epoch": 0.84, "grad_norm": 0.978628933429718, "learning_rate": 6.491435583275168e-07, "loss": 0.4396, "step": 5598 }, { "epoch": 0.84, "grad_norm": 1.3488281965255737, "learning_rate": 6.479453605068676e-07, "loss": 0.3921, "step": 5599 }, { "epoch": 0.84, "grad_norm": 1.5252355337142944, "learning_rate": 6.467481929122111e-07, "loss": 0.478, "step": 5600 }, { "epoch": 0.84, "grad_norm": 1.4391058683395386, "learning_rate": 6.455520558269423e-07, "loss": 0.5125, "step": 5601 }, { "epoch": 0.84, "grad_norm": 3.317091464996338, "learning_rate": 6.443569495342156e-07, "loss": 0.4394, "step": 5602 }, { "epoch": 0.84, "grad_norm": 1.7683212757110596, "learning_rate": 6.431628743169354e-07, "loss": 0.4267, "step": 5603 }, { "epoch": 0.84, "grad_norm": 1.0848894119262695, "learning_rate": 6.419698304577687e-07, "loss": 0.4032, "step": 5604 }, { "epoch": 0.84, "grad_norm": 1.3365119695663452, "learning_rate": 6.407778182391322e-07, "loss": 0.5073, "step": 5605 }, { "epoch": 0.84, "grad_norm": 2.5799572467803955, "learning_rate": 6.395868379432024e-07, "loss": 0.449, "step": 5606 }, { "epoch": 0.84, "grad_norm": 1.4371402263641357, "learning_rate": 6.383968898519111e-07, "loss": 0.4957, "step": 5607 }, { "epoch": 0.84, "grad_norm": 1.4449572563171387, "learning_rate": 6.372079742469422e-07, "loss": 0.4613, "step": 5608 }, { "epoch": 0.84, "grad_norm": 1.212769627571106, "learning_rate": 6.360200914097408e-07, "loss": 0.3895, "step": 5609 }, { "epoch": 0.84, "grad_norm": 1.371399164199829, "learning_rate": 6.348332416215025e-07, "loss": 0.4101, "step": 5610 }, { "epoch": 0.84, "grad_norm": 1.1417509317398071, "learning_rate": 6.336474251631824e-07, "loss": 0.4595, "step": 5611 }, { "epoch": 0.84, "grad_norm": 1.374954104423523, "learning_rate": 6.324626423154867e-07, "loss": 0.4871, "step": 5612 }, { "epoch": 0.84, "grad_norm": 1.1980615854263306, "learning_rate": 6.312788933588815e-07, "loss": 0.4715, "step": 5613 }, { "epoch": 0.84, "grad_norm": 1.178670048713684, "learning_rate": 6.300961785735849e-07, "loss": 0.5448, "step": 5614 }, { "epoch": 0.84, "grad_norm": 1.1933034658432007, "learning_rate": 6.2891449823957e-07, "loss": 0.464, "step": 5615 }, { "epoch": 0.84, "grad_norm": 1.8134125471115112, "learning_rate": 6.277338526365689e-07, "loss": 0.4968, "step": 5616 }, { "epoch": 0.84, "grad_norm": 1.2870420217514038, "learning_rate": 6.265542420440635e-07, "loss": 0.5408, "step": 5617 }, { "epoch": 0.84, "grad_norm": 1.1043814420700073, "learning_rate": 6.253756667412947e-07, "loss": 0.4393, "step": 5618 }, { "epoch": 0.84, "grad_norm": 1.130846619606018, "learning_rate": 6.241981270072572e-07, "loss": 0.5823, "step": 5619 }, { "epoch": 0.84, "grad_norm": 1.2791954278945923, "learning_rate": 6.230216231206992e-07, "loss": 0.4516, "step": 5620 }, { "epoch": 0.84, "grad_norm": 1.2133784294128418, "learning_rate": 6.218461553601263e-07, "loss": 0.513, "step": 5621 }, { "epoch": 0.84, "grad_norm": 1.2928529977798462, "learning_rate": 6.206717240037946e-07, "loss": 0.4886, "step": 5622 }, { "epoch": 0.84, "grad_norm": 1.3082791566848755, "learning_rate": 6.194983293297207e-07, "loss": 0.5159, "step": 5623 }, { "epoch": 0.84, "grad_norm": 1.2487196922302246, "learning_rate": 6.183259716156703e-07, "loss": 0.4164, "step": 5624 }, { "epoch": 0.84, "grad_norm": 1.1420488357543945, "learning_rate": 6.17154651139168e-07, "loss": 0.4553, "step": 5625 }, { "epoch": 0.85, "grad_norm": 1.7696754932403564, "learning_rate": 6.15984368177489e-07, "loss": 0.4574, "step": 5626 }, { "epoch": 0.85, "grad_norm": 1.290030598640442, "learning_rate": 6.148151230076648e-07, "loss": 0.5086, "step": 5627 }, { "epoch": 0.85, "grad_norm": 1.492785930633545, "learning_rate": 6.136469159064817e-07, "loss": 0.4556, "step": 5628 }, { "epoch": 0.85, "grad_norm": 1.3650782108306885, "learning_rate": 6.124797471504795e-07, "loss": 0.4359, "step": 5629 }, { "epoch": 0.85, "grad_norm": 1.3455979824066162, "learning_rate": 6.113136170159545e-07, "loss": 0.4513, "step": 5630 }, { "epoch": 0.85, "grad_norm": 2.3504185676574707, "learning_rate": 6.10148525778953e-07, "loss": 0.5155, "step": 5631 }, { "epoch": 0.85, "grad_norm": 1.2352229356765747, "learning_rate": 6.089844737152761e-07, "loss": 0.511, "step": 5632 }, { "epoch": 0.85, "grad_norm": 1.0526957511901855, "learning_rate": 6.078214611004835e-07, "loss": 0.4519, "step": 5633 }, { "epoch": 0.85, "grad_norm": 1.4111160039901733, "learning_rate": 6.066594882098831e-07, "loss": 0.5041, "step": 5634 }, { "epoch": 0.85, "grad_norm": 1.57499361038208, "learning_rate": 6.054985553185405e-07, "loss": 0.4562, "step": 5635 }, { "epoch": 0.85, "grad_norm": 1.391884207725525, "learning_rate": 6.043386627012726e-07, "loss": 0.393, "step": 5636 }, { "epoch": 0.85, "grad_norm": 1.1466941833496094, "learning_rate": 6.031798106326525e-07, "loss": 0.5045, "step": 5637 }, { "epoch": 0.85, "grad_norm": 1.2251625061035156, "learning_rate": 6.020219993870057e-07, "loss": 0.4801, "step": 5638 }, { "epoch": 0.85, "grad_norm": 1.1827622652053833, "learning_rate": 6.008652292384081e-07, "loss": 0.4384, "step": 5639 }, { "epoch": 0.85, "grad_norm": 1.3426192998886108, "learning_rate": 5.997095004606973e-07, "loss": 0.4082, "step": 5640 }, { "epoch": 0.85, "grad_norm": 1.3188694715499878, "learning_rate": 5.98554813327456e-07, "loss": 0.4666, "step": 5641 }, { "epoch": 0.85, "grad_norm": 1.1918145418167114, "learning_rate": 5.974011681120256e-07, "loss": 0.4639, "step": 5642 }, { "epoch": 0.85, "grad_norm": 1.300564169883728, "learning_rate": 5.962485650874988e-07, "loss": 0.4344, "step": 5643 }, { "epoch": 0.85, "grad_norm": 1.2044371366500854, "learning_rate": 5.950970045267196e-07, "loss": 0.4752, "step": 5644 }, { "epoch": 0.85, "grad_norm": 1.3299375772476196, "learning_rate": 5.939464867022909e-07, "loss": 0.4035, "step": 5645 }, { "epoch": 0.85, "grad_norm": 1.5482159852981567, "learning_rate": 5.927970118865617e-07, "loss": 0.5083, "step": 5646 }, { "epoch": 0.85, "grad_norm": 1.1124098300933838, "learning_rate": 5.916485803516409e-07, "loss": 0.4652, "step": 5647 }, { "epoch": 0.85, "grad_norm": 1.320040225982666, "learning_rate": 5.905011923693848e-07, "loss": 0.3994, "step": 5648 }, { "epoch": 0.85, "grad_norm": 1.2669583559036255, "learning_rate": 5.89354848211407e-07, "loss": 0.4742, "step": 5649 }, { "epoch": 0.85, "grad_norm": 1.343021273612976, "learning_rate": 5.882095481490701e-07, "loss": 0.5118, "step": 5650 }, { "epoch": 0.85, "grad_norm": 1.5429046154022217, "learning_rate": 5.870652924534925e-07, "loss": 0.5361, "step": 5651 }, { "epoch": 0.85, "grad_norm": 1.209148645401001, "learning_rate": 5.859220813955458e-07, "loss": 0.4825, "step": 5652 }, { "epoch": 0.85, "grad_norm": 1.2484159469604492, "learning_rate": 5.847799152458506e-07, "loss": 0.5376, "step": 5653 }, { "epoch": 0.85, "grad_norm": 1.8383487462997437, "learning_rate": 5.836387942747845e-07, "loss": 0.4529, "step": 5654 }, { "epoch": 0.85, "grad_norm": 1.2842903137207031, "learning_rate": 5.82498718752475e-07, "loss": 0.4721, "step": 5655 }, { "epoch": 0.85, "grad_norm": 1.7362695932388306, "learning_rate": 5.81359688948801e-07, "loss": 0.4419, "step": 5656 }, { "epoch": 0.85, "grad_norm": 2.0088448524475098, "learning_rate": 5.802217051333981e-07, "loss": 0.4484, "step": 5657 }, { "epoch": 0.85, "grad_norm": 1.1446845531463623, "learning_rate": 5.7908476757565e-07, "loss": 0.7165, "step": 5658 }, { "epoch": 0.85, "grad_norm": 1.1953352689743042, "learning_rate": 5.779488765446967e-07, "loss": 0.4262, "step": 5659 }, { "epoch": 0.85, "grad_norm": 1.372582197189331, "learning_rate": 5.768140323094252e-07, "loss": 0.4345, "step": 5660 }, { "epoch": 0.85, "grad_norm": 1.1722561120986938, "learning_rate": 5.7568023513848e-07, "loss": 0.507, "step": 5661 }, { "epoch": 0.85, "grad_norm": 1.2230902910232544, "learning_rate": 5.745474853002564e-07, "loss": 0.4592, "step": 5662 }, { "epoch": 0.85, "grad_norm": 1.2270315885543823, "learning_rate": 5.734157830628984e-07, "loss": 0.4396, "step": 5663 }, { "epoch": 0.85, "grad_norm": 1.346459150314331, "learning_rate": 5.722851286943065e-07, "loss": 0.4226, "step": 5664 }, { "epoch": 0.85, "grad_norm": 1.3676844835281372, "learning_rate": 5.711555224621296e-07, "loss": 0.4068, "step": 5665 }, { "epoch": 0.85, "grad_norm": 1.562192678451538, "learning_rate": 5.700269646337725e-07, "loss": 0.4915, "step": 5666 }, { "epoch": 0.85, "grad_norm": 1.335858941078186, "learning_rate": 5.688994554763871e-07, "loss": 0.5097, "step": 5667 }, { "epoch": 0.85, "grad_norm": 1.4400678873062134, "learning_rate": 5.677729952568794e-07, "loss": 0.4426, "step": 5668 }, { "epoch": 0.85, "grad_norm": 1.0742590427398682, "learning_rate": 5.666475842419089e-07, "loss": 0.6807, "step": 5669 }, { "epoch": 0.85, "grad_norm": 1.3595503568649292, "learning_rate": 5.655232226978824e-07, "loss": 0.4709, "step": 5670 }, { "epoch": 0.85, "grad_norm": 1.4259556531906128, "learning_rate": 5.643999108909631e-07, "loss": 0.4718, "step": 5671 }, { "epoch": 0.85, "grad_norm": 1.505281686782837, "learning_rate": 5.632776490870612e-07, "loss": 0.4746, "step": 5672 }, { "epoch": 0.85, "grad_norm": 1.1748039722442627, "learning_rate": 5.621564375518413e-07, "loss": 0.4834, "step": 5673 }, { "epoch": 0.85, "grad_norm": 1.9819262027740479, "learning_rate": 5.6103627655072e-07, "loss": 0.4871, "step": 5674 }, { "epoch": 0.85, "grad_norm": 1.2755908966064453, "learning_rate": 5.599171663488617e-07, "loss": 0.4446, "step": 5675 }, { "epoch": 0.85, "grad_norm": 1.3469656705856323, "learning_rate": 5.58799107211186e-07, "loss": 0.4964, "step": 5676 }, { "epoch": 0.85, "grad_norm": 1.335910677909851, "learning_rate": 5.576820994023596e-07, "loss": 0.4498, "step": 5677 }, { "epoch": 0.85, "grad_norm": 1.140385627746582, "learning_rate": 5.565661431868046e-07, "loss": 0.3909, "step": 5678 }, { "epoch": 0.85, "grad_norm": 1.0407267808914185, "learning_rate": 5.554512388286915e-07, "loss": 0.6916, "step": 5679 }, { "epoch": 0.85, "grad_norm": 1.2497386932373047, "learning_rate": 5.543373865919411e-07, "loss": 0.4549, "step": 5680 }, { "epoch": 0.85, "grad_norm": 1.372178077697754, "learning_rate": 5.53224586740228e-07, "loss": 0.4775, "step": 5681 }, { "epoch": 0.85, "grad_norm": 1.157373070716858, "learning_rate": 5.521128395369751e-07, "loss": 0.5242, "step": 5682 }, { "epoch": 0.85, "grad_norm": 1.05069899559021, "learning_rate": 5.510021452453579e-07, "loss": 0.3807, "step": 5683 }, { "epoch": 0.85, "grad_norm": 1.6563806533813477, "learning_rate": 5.498925041283027e-07, "loss": 0.4937, "step": 5684 }, { "epoch": 0.85, "grad_norm": 1.5732946395874023, "learning_rate": 5.487839164484831e-07, "loss": 0.4269, "step": 5685 }, { "epoch": 0.85, "grad_norm": 1.2809808254241943, "learning_rate": 5.47676382468329e-07, "loss": 0.4718, "step": 5686 }, { "epoch": 0.85, "grad_norm": 1.3167831897735596, "learning_rate": 5.465699024500148e-07, "loss": 0.5366, "step": 5687 }, { "epoch": 0.85, "grad_norm": 1.4162253141403198, "learning_rate": 5.454644766554712e-07, "loss": 0.493, "step": 5688 }, { "epoch": 0.85, "grad_norm": 1.3992840051651, "learning_rate": 5.443601053463743e-07, "loss": 0.3973, "step": 5689 }, { "epoch": 0.85, "grad_norm": 1.5078604221343994, "learning_rate": 5.432567887841544e-07, "loss": 0.4182, "step": 5690 }, { "epoch": 0.85, "grad_norm": 2.1535189151763916, "learning_rate": 5.4215452722999e-07, "loss": 0.4408, "step": 5691 }, { "epoch": 0.86, "grad_norm": 1.7924646139144897, "learning_rate": 5.410533209448088e-07, "loss": 0.4328, "step": 5692 }, { "epoch": 0.86, "grad_norm": 1.3874784708023071, "learning_rate": 5.39953170189293e-07, "loss": 0.4666, "step": 5693 }, { "epoch": 0.86, "grad_norm": 1.5043389797210693, "learning_rate": 5.388540752238703e-07, "loss": 0.4737, "step": 5694 }, { "epoch": 0.86, "grad_norm": 1.2548701763153076, "learning_rate": 5.377560363087204e-07, "loss": 0.4937, "step": 5695 }, { "epoch": 0.86, "grad_norm": 1.781494140625, "learning_rate": 5.366590537037747e-07, "loss": 0.4539, "step": 5696 }, { "epoch": 0.86, "grad_norm": 1.520639181137085, "learning_rate": 5.355631276687107e-07, "loss": 0.5154, "step": 5697 }, { "epoch": 0.86, "grad_norm": 1.2819455862045288, "learning_rate": 5.344682584629601e-07, "loss": 0.5162, "step": 5698 }, { "epoch": 0.86, "grad_norm": 1.4269957542419434, "learning_rate": 5.333744463456997e-07, "loss": 0.4755, "step": 5699 }, { "epoch": 0.86, "grad_norm": 1.2044777870178223, "learning_rate": 5.322816915758616e-07, "loss": 0.45, "step": 5700 }, { "epoch": 0.86, "grad_norm": 3.0103096961975098, "learning_rate": 5.311899944121218e-07, "loss": 0.4438, "step": 5701 }, { "epoch": 0.86, "grad_norm": 1.7668193578720093, "learning_rate": 5.300993551129108e-07, "loss": 0.4158, "step": 5702 }, { "epoch": 0.86, "grad_norm": 2.4883222579956055, "learning_rate": 5.290097739364064e-07, "loss": 0.4636, "step": 5703 }, { "epoch": 0.86, "grad_norm": 1.309893012046814, "learning_rate": 5.279212511405341e-07, "loss": 0.4105, "step": 5704 }, { "epoch": 0.86, "grad_norm": 1.035967469215393, "learning_rate": 5.268337869829737e-07, "loss": 0.6697, "step": 5705 }, { "epoch": 0.86, "grad_norm": 1.4633333683013916, "learning_rate": 5.257473817211483e-07, "loss": 0.447, "step": 5706 }, { "epoch": 0.86, "grad_norm": 1.1482552289962769, "learning_rate": 5.246620356122378e-07, "loss": 0.5595, "step": 5707 }, { "epoch": 0.86, "grad_norm": 1.4269808530807495, "learning_rate": 5.23577748913165e-07, "loss": 0.4521, "step": 5708 }, { "epoch": 0.86, "grad_norm": 1.1240094900131226, "learning_rate": 5.224945218806032e-07, "loss": 0.5119, "step": 5709 }, { "epoch": 0.86, "grad_norm": 1.2922700643539429, "learning_rate": 5.214123547709777e-07, "loss": 0.5246, "step": 5710 }, { "epoch": 0.86, "grad_norm": 1.0697638988494873, "learning_rate": 5.203312478404598e-07, "loss": 0.4555, "step": 5711 }, { "epoch": 0.86, "grad_norm": 1.5595964193344116, "learning_rate": 5.192512013449719e-07, "loss": 0.4204, "step": 5712 }, { "epoch": 0.86, "grad_norm": 1.5395146608352661, "learning_rate": 5.181722155401831e-07, "loss": 0.5084, "step": 5713 }, { "epoch": 0.86, "grad_norm": 1.1236275434494019, "learning_rate": 5.170942906815152e-07, "loss": 0.3905, "step": 5714 }, { "epoch": 0.86, "grad_norm": 1.198848009109497, "learning_rate": 5.160174270241341e-07, "loss": 0.4133, "step": 5715 }, { "epoch": 0.86, "grad_norm": 1.6784346103668213, "learning_rate": 5.149416248229578e-07, "loss": 0.5472, "step": 5716 }, { "epoch": 0.86, "grad_norm": 1.2322394847869873, "learning_rate": 5.138668843326511e-07, "loss": 0.5456, "step": 5717 }, { "epoch": 0.86, "grad_norm": 1.2565834522247314, "learning_rate": 5.127932058076296e-07, "loss": 0.5268, "step": 5718 }, { "epoch": 0.86, "grad_norm": 1.6430697441101074, "learning_rate": 5.117205895020571e-07, "loss": 0.4461, "step": 5719 }, { "epoch": 0.86, "grad_norm": 1.5419710874557495, "learning_rate": 5.106490356698446e-07, "loss": 0.5133, "step": 5720 }, { "epoch": 0.86, "grad_norm": 1.1569777727127075, "learning_rate": 5.095785445646506e-07, "loss": 0.4984, "step": 5721 }, { "epoch": 0.86, "grad_norm": 1.3441087007522583, "learning_rate": 5.085091164398853e-07, "loss": 0.4528, "step": 5722 }, { "epoch": 0.86, "grad_norm": 1.4583717584609985, "learning_rate": 5.074407515487051e-07, "loss": 0.5314, "step": 5723 }, { "epoch": 0.86, "grad_norm": 1.237492322921753, "learning_rate": 5.063734501440154e-07, "loss": 0.4793, "step": 5724 }, { "epoch": 0.86, "grad_norm": 1.5183963775634766, "learning_rate": 5.053072124784692e-07, "loss": 0.4568, "step": 5725 }, { "epoch": 0.86, "grad_norm": 1.2424224615097046, "learning_rate": 5.042420388044689e-07, "loss": 0.6813, "step": 5726 }, { "epoch": 0.86, "grad_norm": 1.1041728258132935, "learning_rate": 5.031779293741635e-07, "loss": 0.463, "step": 5727 }, { "epoch": 0.86, "grad_norm": 1.9061527252197266, "learning_rate": 5.021148844394502e-07, "loss": 0.4956, "step": 5728 }, { "epoch": 0.86, "grad_norm": 1.4387863874435425, "learning_rate": 5.010529042519779e-07, "loss": 0.541, "step": 5729 }, { "epoch": 0.86, "grad_norm": 1.41838538646698, "learning_rate": 4.999919890631366e-07, "loss": 0.4452, "step": 5730 }, { "epoch": 0.86, "grad_norm": 1.3497596979141235, "learning_rate": 4.989321391240709e-07, "loss": 0.528, "step": 5731 }, { "epoch": 0.86, "grad_norm": 1.354416012763977, "learning_rate": 4.978733546856695e-07, "loss": 0.5492, "step": 5732 }, { "epoch": 0.86, "grad_norm": 1.2838377952575684, "learning_rate": 4.968156359985682e-07, "loss": 0.547, "step": 5733 }, { "epoch": 0.86, "grad_norm": 1.3751649856567383, "learning_rate": 4.957589833131543e-07, "loss": 0.5027, "step": 5734 }, { "epoch": 0.86, "grad_norm": 1.3888509273529053, "learning_rate": 4.947033968795584e-07, "loss": 0.5053, "step": 5735 }, { "epoch": 0.86, "grad_norm": 1.3901509046554565, "learning_rate": 4.936488769476628e-07, "loss": 0.4905, "step": 5736 }, { "epoch": 0.86, "grad_norm": 1.5712753534317017, "learning_rate": 4.925954237670927e-07, "loss": 0.5057, "step": 5737 }, { "epoch": 0.86, "grad_norm": 1.3641784191131592, "learning_rate": 4.915430375872266e-07, "loss": 0.4848, "step": 5738 }, { "epoch": 0.86, "grad_norm": 1.274431824684143, "learning_rate": 4.904917186571839e-07, "loss": 0.4573, "step": 5739 }, { "epoch": 0.86, "grad_norm": 1.0767269134521484, "learning_rate": 4.894414672258369e-07, "loss": 0.3934, "step": 5740 }, { "epoch": 0.86, "grad_norm": 1.161360740661621, "learning_rate": 4.883922835418032e-07, "loss": 0.4495, "step": 5741 }, { "epoch": 0.86, "grad_norm": 1.313768982887268, "learning_rate": 4.873441678534457e-07, "loss": 0.5209, "step": 5742 }, { "epoch": 0.86, "grad_norm": 1.4227392673492432, "learning_rate": 4.862971204088774e-07, "loss": 0.3585, "step": 5743 }, { "epoch": 0.86, "grad_norm": 1.4705276489257812, "learning_rate": 4.852511414559575e-07, "loss": 0.4514, "step": 5744 }, { "epoch": 0.86, "grad_norm": 1.0203375816345215, "learning_rate": 4.842062312422902e-07, "loss": 0.452, "step": 5745 }, { "epoch": 0.86, "grad_norm": 1.277108073234558, "learning_rate": 4.831623900152304e-07, "loss": 0.3797, "step": 5746 }, { "epoch": 0.86, "grad_norm": 1.2291964292526245, "learning_rate": 4.821196180218762e-07, "loss": 0.4947, "step": 5747 }, { "epoch": 0.86, "grad_norm": 1.5811678171157837, "learning_rate": 4.810779155090761e-07, "loss": 0.5445, "step": 5748 }, { "epoch": 0.86, "grad_norm": 1.5446375608444214, "learning_rate": 4.800372827234229e-07, "loss": 0.501, "step": 5749 }, { "epoch": 0.86, "grad_norm": 1.0916460752487183, "learning_rate": 4.789977199112567e-07, "loss": 0.5229, "step": 5750 }, { "epoch": 0.86, "grad_norm": 1.1650772094726562, "learning_rate": 4.779592273186656e-07, "loss": 0.681, "step": 5751 }, { "epoch": 0.86, "grad_norm": 1.1381590366363525, "learning_rate": 4.769218051914825e-07, "loss": 0.5415, "step": 5752 }, { "epoch": 0.86, "grad_norm": 1.21806001663208, "learning_rate": 4.758854537752894e-07, "loss": 0.4321, "step": 5753 }, { "epoch": 0.86, "grad_norm": 1.2906018495559692, "learning_rate": 4.748501733154104e-07, "loss": 0.548, "step": 5754 }, { "epoch": 0.86, "grad_norm": 1.646530032157898, "learning_rate": 4.7381596405692174e-07, "loss": 0.3934, "step": 5755 }, { "epoch": 0.86, "grad_norm": 1.5396677255630493, "learning_rate": 4.72782826244641e-07, "loss": 0.4661, "step": 5756 }, { "epoch": 0.86, "grad_norm": 1.5505884885787964, "learning_rate": 4.717507601231364e-07, "loss": 0.4634, "step": 5757 }, { "epoch": 0.86, "grad_norm": 1.8735148906707764, "learning_rate": 4.707197659367191e-07, "loss": 0.3879, "step": 5758 }, { "epoch": 0.87, "grad_norm": 1.3538057804107666, "learning_rate": 4.6968984392944786e-07, "loss": 0.5376, "step": 5759 }, { "epoch": 0.87, "grad_norm": 1.2998621463775635, "learning_rate": 4.6866099434512815e-07, "loss": 0.4452, "step": 5760 }, { "epoch": 0.87, "grad_norm": 1.6706013679504395, "learning_rate": 4.6763321742731016e-07, "loss": 0.446, "step": 5761 }, { "epoch": 0.87, "grad_norm": 1.94974684715271, "learning_rate": 4.6660651341929155e-07, "loss": 0.5575, "step": 5762 }, { "epoch": 0.87, "grad_norm": 1.3326992988586426, "learning_rate": 4.6558088256411673e-07, "loss": 0.3923, "step": 5763 }, { "epoch": 0.87, "grad_norm": 1.262518048286438, "learning_rate": 4.6455632510457313e-07, "loss": 0.4866, "step": 5764 }, { "epoch": 0.87, "grad_norm": 1.3351033926010132, "learning_rate": 4.6353284128319684e-07, "loss": 0.4752, "step": 5765 }, { "epoch": 0.87, "grad_norm": 1.4883580207824707, "learning_rate": 4.625104313422674e-07, "loss": 0.4808, "step": 5766 }, { "epoch": 0.87, "grad_norm": 1.548270583152771, "learning_rate": 4.6148909552381326e-07, "loss": 0.5456, "step": 5767 }, { "epoch": 0.87, "grad_norm": 1.080400824546814, "learning_rate": 4.604688340696056e-07, "loss": 0.665, "step": 5768 }, { "epoch": 0.87, "grad_norm": 1.4065220355987549, "learning_rate": 4.5944964722116294e-07, "loss": 0.4777, "step": 5769 }, { "epoch": 0.87, "grad_norm": 1.3362189531326294, "learning_rate": 4.584315352197494e-07, "loss": 0.474, "step": 5770 }, { "epoch": 0.87, "grad_norm": 1.2623366117477417, "learning_rate": 4.5741449830637275e-07, "loss": 0.5105, "step": 5771 }, { "epoch": 0.87, "grad_norm": 1.1418867111206055, "learning_rate": 4.563985367217888e-07, "loss": 0.3676, "step": 5772 }, { "epoch": 0.87, "grad_norm": 1.3012250661849976, "learning_rate": 4.5538365070649795e-07, "loss": 0.4636, "step": 5773 }, { "epoch": 0.87, "grad_norm": 1.3862754106521606, "learning_rate": 4.543698405007452e-07, "loss": 0.5443, "step": 5774 }, { "epoch": 0.87, "grad_norm": 1.4745864868164062, "learning_rate": 4.5335710634452246e-07, "loss": 0.3659, "step": 5775 }, { "epoch": 0.87, "grad_norm": 1.1953107118606567, "learning_rate": 4.523454484775647e-07, "loss": 0.5307, "step": 5776 }, { "epoch": 0.87, "grad_norm": 5.738935470581055, "learning_rate": 4.5133486713935416e-07, "loss": 0.5009, "step": 5777 }, { "epoch": 0.87, "grad_norm": 1.1996501684188843, "learning_rate": 4.503253625691167e-07, "loss": 0.4229, "step": 5778 }, { "epoch": 0.87, "grad_norm": 1.258773922920227, "learning_rate": 4.493169350058252e-07, "loss": 0.4398, "step": 5779 }, { "epoch": 0.87, "grad_norm": 1.2255470752716064, "learning_rate": 4.483095846881946e-07, "loss": 0.5191, "step": 5780 }, { "epoch": 0.87, "grad_norm": 1.6604315042495728, "learning_rate": 4.473033118546893e-07, "loss": 0.5391, "step": 5781 }, { "epoch": 0.87, "grad_norm": 1.5097013711929321, "learning_rate": 4.4629811674351373e-07, "loss": 0.5332, "step": 5782 }, { "epoch": 0.87, "grad_norm": 1.1052536964416504, "learning_rate": 4.4529399959261923e-07, "loss": 0.4432, "step": 5783 }, { "epoch": 0.87, "grad_norm": 2.1667540073394775, "learning_rate": 4.442909606397033e-07, "loss": 0.4915, "step": 5784 }, { "epoch": 0.87, "grad_norm": 1.4477158784866333, "learning_rate": 4.4328900012220623e-07, "loss": 0.5023, "step": 5785 }, { "epoch": 0.87, "grad_norm": 1.5051778554916382, "learning_rate": 4.4228811827731586e-07, "loss": 0.4913, "step": 5786 }, { "epoch": 0.87, "grad_norm": 1.7960697412490845, "learning_rate": 4.4128831534196116e-07, "loss": 0.3901, "step": 5787 }, { "epoch": 0.87, "grad_norm": 1.209144949913025, "learning_rate": 4.4028959155281603e-07, "loss": 0.4622, "step": 5788 }, { "epoch": 0.87, "grad_norm": 1.773363709449768, "learning_rate": 4.3929194714630306e-07, "loss": 0.5155, "step": 5789 }, { "epoch": 0.87, "grad_norm": 1.2882311344146729, "learning_rate": 4.382953823585834e-07, "loss": 0.5141, "step": 5790 }, { "epoch": 0.87, "grad_norm": 1.4723049402236938, "learning_rate": 4.3729989742556765e-07, "loss": 0.5029, "step": 5791 }, { "epoch": 0.87, "grad_norm": 1.2167713642120361, "learning_rate": 4.3630549258290745e-07, "loss": 0.4137, "step": 5792 }, { "epoch": 0.87, "grad_norm": 1.370766520500183, "learning_rate": 4.3531216806600105e-07, "loss": 0.4944, "step": 5793 }, { "epoch": 0.87, "grad_norm": 1.2383371591567993, "learning_rate": 4.343199241099905e-07, "loss": 0.4803, "step": 5794 }, { "epoch": 0.87, "grad_norm": 1.270437479019165, "learning_rate": 4.3332876094975775e-07, "loss": 0.4937, "step": 5795 }, { "epoch": 0.87, "grad_norm": 1.8050605058670044, "learning_rate": 4.3233867881993794e-07, "loss": 0.4092, "step": 5796 }, { "epoch": 0.87, "grad_norm": 1.240796685218811, "learning_rate": 4.313496779549015e-07, "loss": 0.4424, "step": 5797 }, { "epoch": 0.87, "grad_norm": 1.1508861780166626, "learning_rate": 4.303617585887687e-07, "loss": 0.4689, "step": 5798 }, { "epoch": 0.87, "grad_norm": 1.3224644660949707, "learning_rate": 4.2937492095540043e-07, "loss": 0.4747, "step": 5799 }, { "epoch": 0.87, "grad_norm": 1.2204903364181519, "learning_rate": 4.2838916528840146e-07, "loss": 0.4862, "step": 5800 }, { "epoch": 0.87, "grad_norm": 1.3218785524368286, "learning_rate": 4.2740449182112344e-07, "loss": 0.4877, "step": 5801 }, { "epoch": 0.87, "grad_norm": 1.4064173698425293, "learning_rate": 4.264209007866588e-07, "loss": 0.5978, "step": 5802 }, { "epoch": 0.87, "grad_norm": 1.3977251052856445, "learning_rate": 4.2543839241784624e-07, "loss": 0.4651, "step": 5803 }, { "epoch": 0.87, "grad_norm": 1.4514684677124023, "learning_rate": 4.2445696694726536e-07, "loss": 0.4635, "step": 5804 }, { "epoch": 0.87, "grad_norm": 1.284026026725769, "learning_rate": 4.234766246072419e-07, "loss": 0.4287, "step": 5805 }, { "epoch": 0.87, "grad_norm": 1.2260769605636597, "learning_rate": 4.224973656298437e-07, "loss": 0.5247, "step": 5806 }, { "epoch": 0.87, "grad_norm": 1.3695334196090698, "learning_rate": 4.2151919024688237e-07, "loss": 0.426, "step": 5807 }, { "epoch": 0.87, "grad_norm": 1.4396092891693115, "learning_rate": 4.20542098689915e-07, "loss": 0.4514, "step": 5808 }, { "epoch": 0.87, "grad_norm": 1.0505397319793701, "learning_rate": 4.1956609119023825e-07, "loss": 0.4064, "step": 5809 }, { "epoch": 0.87, "grad_norm": 1.3564114570617676, "learning_rate": 4.185911679788962e-07, "loss": 0.4614, "step": 5810 }, { "epoch": 0.87, "grad_norm": 1.243330717086792, "learning_rate": 4.176173292866731e-07, "loss": 0.4982, "step": 5811 }, { "epoch": 0.87, "grad_norm": 1.3106462955474854, "learning_rate": 4.1664457534409785e-07, "loss": 0.4388, "step": 5812 }, { "epoch": 0.87, "grad_norm": 1.1562539339065552, "learning_rate": 4.156729063814435e-07, "loss": 0.692, "step": 5813 }, { "epoch": 0.87, "grad_norm": 1.190917730331421, "learning_rate": 4.1470232262872337e-07, "loss": 0.482, "step": 5814 }, { "epoch": 0.87, "grad_norm": 1.2456929683685303, "learning_rate": 4.1373282431569793e-07, "loss": 0.429, "step": 5815 }, { "epoch": 0.87, "grad_norm": 1.6944959163665771, "learning_rate": 4.1276441167186596e-07, "loss": 0.4741, "step": 5816 }, { "epoch": 0.87, "grad_norm": 1.396299123764038, "learning_rate": 4.117970849264741e-07, "loss": 0.4803, "step": 5817 }, { "epoch": 0.87, "grad_norm": 1.1598061323165894, "learning_rate": 4.1083084430850906e-07, "loss": 0.533, "step": 5818 }, { "epoch": 0.87, "grad_norm": 1.275476098060608, "learning_rate": 4.098656900467002e-07, "loss": 0.4487, "step": 5819 }, { "epoch": 0.87, "grad_norm": 1.5112571716308594, "learning_rate": 4.089016223695219e-07, "loss": 0.4617, "step": 5820 }, { "epoch": 0.87, "grad_norm": 1.3385977745056152, "learning_rate": 4.079386415051884e-07, "loss": 0.4253, "step": 5821 }, { "epoch": 0.87, "grad_norm": 2.36067271232605, "learning_rate": 4.069767476816594e-07, "loss": 0.451, "step": 5822 }, { "epoch": 0.87, "grad_norm": 1.8232041597366333, "learning_rate": 4.060159411266362e-07, "loss": 0.3988, "step": 5823 }, { "epoch": 0.87, "grad_norm": 1.217154860496521, "learning_rate": 4.050562220675608e-07, "loss": 0.4455, "step": 5824 }, { "epoch": 0.88, "grad_norm": 1.167877197265625, "learning_rate": 4.040975907316219e-07, "loss": 0.4513, "step": 5825 }, { "epoch": 0.88, "grad_norm": 1.1791692972183228, "learning_rate": 4.031400473457464e-07, "loss": 0.3883, "step": 5826 }, { "epoch": 0.88, "grad_norm": 1.267273187637329, "learning_rate": 4.0218359213660685e-07, "loss": 0.451, "step": 5827 }, { "epoch": 0.88, "grad_norm": 1.701665997505188, "learning_rate": 4.0122822533061657e-07, "loss": 0.432, "step": 5828 }, { "epoch": 0.88, "grad_norm": 0.9967778325080872, "learning_rate": 4.002739471539313e-07, "loss": 0.4031, "step": 5829 }, { "epoch": 0.88, "grad_norm": 1.8835318088531494, "learning_rate": 3.993207578324515e-07, "loss": 0.4626, "step": 5830 }, { "epoch": 0.88, "grad_norm": 1.2900604009628296, "learning_rate": 3.9836865759181444e-07, "loss": 0.4316, "step": 5831 }, { "epoch": 0.88, "grad_norm": 1.0271174907684326, "learning_rate": 3.9741764665740647e-07, "loss": 0.4712, "step": 5832 }, { "epoch": 0.88, "grad_norm": 1.5349581241607666, "learning_rate": 3.964677252543497e-07, "loss": 0.5214, "step": 5833 }, { "epoch": 0.88, "grad_norm": 1.2014700174331665, "learning_rate": 3.9551889360751325e-07, "loss": 0.4572, "step": 5834 }, { "epoch": 0.88, "grad_norm": 1.3585841655731201, "learning_rate": 3.945711519415052e-07, "loss": 0.4724, "step": 5835 }, { "epoch": 0.88, "grad_norm": 3.3137271404266357, "learning_rate": 3.9362450048067544e-07, "loss": 0.5053, "step": 5836 }, { "epoch": 0.88, "grad_norm": 1.1139453649520874, "learning_rate": 3.9267893944911985e-07, "loss": 0.6572, "step": 5837 }, { "epoch": 0.88, "grad_norm": 1.4078222513198853, "learning_rate": 3.917344690706704e-07, "loss": 0.4499, "step": 5838 }, { "epoch": 0.88, "grad_norm": 1.1315357685089111, "learning_rate": 3.9079108956890544e-07, "loss": 0.4063, "step": 5839 }, { "epoch": 0.88, "grad_norm": 1.2483779191970825, "learning_rate": 3.8984880116714364e-07, "loss": 0.4716, "step": 5840 }, { "epoch": 0.88, "grad_norm": 2.022677421569824, "learning_rate": 3.8890760408844374e-07, "loss": 0.5001, "step": 5841 }, { "epoch": 0.88, "grad_norm": 1.215976357460022, "learning_rate": 3.879674985556092e-07, "loss": 0.4649, "step": 5842 }, { "epoch": 0.88, "grad_norm": 1.601202368736267, "learning_rate": 3.870284847911815e-07, "loss": 0.4818, "step": 5843 }, { "epoch": 0.88, "grad_norm": 1.3203068971633911, "learning_rate": 3.8609056301744773e-07, "loss": 0.4388, "step": 5844 }, { "epoch": 0.88, "grad_norm": 1.0847198963165283, "learning_rate": 3.8515373345643315e-07, "loss": 0.4001, "step": 5845 }, { "epoch": 0.88, "grad_norm": 1.4153834581375122, "learning_rate": 3.8421799632990585e-07, "loss": 0.4195, "step": 5846 }, { "epoch": 0.88, "grad_norm": 1.5404630899429321, "learning_rate": 3.832833518593759e-07, "loss": 0.5225, "step": 5847 }, { "epoch": 0.88, "grad_norm": 1.2804975509643555, "learning_rate": 3.823498002660925e-07, "loss": 0.4234, "step": 5848 }, { "epoch": 0.88, "grad_norm": 1.3251495361328125, "learning_rate": 3.814173417710487e-07, "loss": 0.4997, "step": 5849 }, { "epoch": 0.88, "grad_norm": 1.132570743560791, "learning_rate": 3.8048597659497643e-07, "loss": 0.477, "step": 5850 }, { "epoch": 0.88, "grad_norm": 1.2903666496276855, "learning_rate": 3.795557049583526e-07, "loss": 0.5154, "step": 5851 }, { "epoch": 0.88, "grad_norm": 1.2173075675964355, "learning_rate": 3.78626527081391e-07, "loss": 0.4349, "step": 5852 }, { "epoch": 0.88, "grad_norm": 1.2490650415420532, "learning_rate": 3.7769844318404805e-07, "loss": 0.4643, "step": 5853 }, { "epoch": 0.88, "grad_norm": 1.5088294744491577, "learning_rate": 3.767714534860223e-07, "loss": 0.5034, "step": 5854 }, { "epoch": 0.88, "grad_norm": 1.3197709321975708, "learning_rate": 3.7584555820675106e-07, "loss": 0.5341, "step": 5855 }, { "epoch": 0.88, "grad_norm": 1.6537511348724365, "learning_rate": 3.749207575654157e-07, "loss": 0.5204, "step": 5856 }, { "epoch": 0.88, "grad_norm": 1.6351423263549805, "learning_rate": 3.7399705178093493e-07, "loss": 0.458, "step": 5857 }, { "epoch": 0.88, "grad_norm": 1.3672014474868774, "learning_rate": 3.7307444107197113e-07, "loss": 0.4885, "step": 5858 }, { "epoch": 0.88, "grad_norm": 1.1746742725372314, "learning_rate": 3.7215292565692617e-07, "loss": 0.5368, "step": 5859 }, { "epoch": 0.88, "grad_norm": 1.269039273262024, "learning_rate": 3.7123250575394234e-07, "loss": 0.4769, "step": 5860 }, { "epoch": 0.88, "grad_norm": 1.742734670639038, "learning_rate": 3.7031318158090354e-07, "loss": 0.4257, "step": 5861 }, { "epoch": 0.88, "grad_norm": 1.284219741821289, "learning_rate": 3.693949533554314e-07, "loss": 0.4325, "step": 5862 }, { "epoch": 0.88, "grad_norm": 1.232530951499939, "learning_rate": 3.6847782129489527e-07, "loss": 0.4512, "step": 5863 }, { "epoch": 0.88, "grad_norm": 1.260513186454773, "learning_rate": 3.675617856163971e-07, "loss": 0.4413, "step": 5864 }, { "epoch": 0.88, "grad_norm": 1.3149341344833374, "learning_rate": 3.6664684653678226e-07, "loss": 0.4122, "step": 5865 }, { "epoch": 0.88, "grad_norm": 1.056342363357544, "learning_rate": 3.657330042726381e-07, "loss": 0.4751, "step": 5866 }, { "epoch": 0.88, "grad_norm": 1.8077998161315918, "learning_rate": 3.648202590402894e-07, "loss": 0.4763, "step": 5867 }, { "epoch": 0.88, "grad_norm": 1.0109426975250244, "learning_rate": 3.639086110558049e-07, "loss": 0.4536, "step": 5868 }, { "epoch": 0.88, "grad_norm": 1.167965292930603, "learning_rate": 3.629980605349898e-07, "loss": 0.4404, "step": 5869 }, { "epoch": 0.88, "grad_norm": 1.7413363456726074, "learning_rate": 3.6208860769339214e-07, "loss": 0.4378, "step": 5870 }, { "epoch": 0.88, "grad_norm": 1.1455248594284058, "learning_rate": 3.611802527462976e-07, "loss": 0.4013, "step": 5871 }, { "epoch": 0.88, "grad_norm": 1.644896149635315, "learning_rate": 3.6027299590873643e-07, "loss": 0.4173, "step": 5872 }, { "epoch": 0.88, "grad_norm": 1.0388145446777344, "learning_rate": 3.5936683739547284e-07, "loss": 0.4505, "step": 5873 }, { "epoch": 0.88, "grad_norm": 1.2111014127731323, "learning_rate": 3.5846177742101586e-07, "loss": 0.5022, "step": 5874 }, { "epoch": 0.88, "grad_norm": 1.0027049779891968, "learning_rate": 3.5755781619961406e-07, "loss": 0.4065, "step": 5875 }, { "epoch": 0.88, "grad_norm": 1.312615990638733, "learning_rate": 3.5665495394525296e-07, "loss": 0.505, "step": 5876 }, { "epoch": 0.88, "grad_norm": 1.2265894412994385, "learning_rate": 3.557531908716599e-07, "loss": 0.4894, "step": 5877 }, { "epoch": 0.88, "grad_norm": 1.2951987981796265, "learning_rate": 3.5485252719230256e-07, "loss": 0.4558, "step": 5878 }, { "epoch": 0.88, "grad_norm": 1.1706600189208984, "learning_rate": 3.5395296312038643e-07, "loss": 0.3983, "step": 5879 }, { "epoch": 0.88, "grad_norm": 4.470645427703857, "learning_rate": 3.5305449886886e-07, "loss": 0.46, "step": 5880 }, { "epoch": 0.88, "grad_norm": 1.3513381481170654, "learning_rate": 3.5215713465040656e-07, "loss": 0.5996, "step": 5881 }, { "epoch": 0.88, "grad_norm": 1.1464120149612427, "learning_rate": 3.5126087067745395e-07, "loss": 0.4599, "step": 5882 }, { "epoch": 0.88, "grad_norm": 1.9484531879425049, "learning_rate": 3.503657071621658e-07, "loss": 0.4827, "step": 5883 }, { "epoch": 0.88, "grad_norm": 1.4495044946670532, "learning_rate": 3.4947164431644754e-07, "loss": 0.521, "step": 5884 }, { "epoch": 0.88, "grad_norm": 1.064531683921814, "learning_rate": 3.4857868235194383e-07, "loss": 0.6794, "step": 5885 }, { "epoch": 0.88, "grad_norm": 1.2603801488876343, "learning_rate": 3.476868214800372e-07, "loss": 0.4816, "step": 5886 }, { "epoch": 0.88, "grad_norm": 1.169357419013977, "learning_rate": 3.467960619118516e-07, "loss": 0.4027, "step": 5887 }, { "epoch": 0.88, "grad_norm": 1.2400124073028564, "learning_rate": 3.459064038582482e-07, "loss": 0.4961, "step": 5888 }, { "epoch": 0.88, "grad_norm": 1.323620319366455, "learning_rate": 3.4501784752982824e-07, "loss": 0.464, "step": 5889 }, { "epoch": 0.88, "grad_norm": 1.239330530166626, "learning_rate": 3.441303931369339e-07, "loss": 0.4226, "step": 5890 }, { "epoch": 0.88, "grad_norm": 1.496081829071045, "learning_rate": 3.432440408896426e-07, "loss": 0.517, "step": 5891 }, { "epoch": 0.89, "grad_norm": 1.1956486701965332, "learning_rate": 3.423587909977755e-07, "loss": 0.423, "step": 5892 }, { "epoch": 0.89, "grad_norm": 1.5928553342819214, "learning_rate": 3.414746436708882e-07, "loss": 0.48, "step": 5893 }, { "epoch": 0.89, "grad_norm": 1.2423745393753052, "learning_rate": 3.405915991182801e-07, "loss": 0.5044, "step": 5894 }, { "epoch": 0.89, "grad_norm": 1.1283551454544067, "learning_rate": 3.39709657548985e-07, "loss": 0.646, "step": 5895 }, { "epoch": 0.89, "grad_norm": 1.3341530561447144, "learning_rate": 3.388288191717787e-07, "loss": 0.5266, "step": 5896 }, { "epoch": 0.89, "grad_norm": 1.1668083667755127, "learning_rate": 3.379490841951749e-07, "loss": 0.4521, "step": 5897 }, { "epoch": 0.89, "grad_norm": 1.540973424911499, "learning_rate": 3.370704528274249e-07, "loss": 0.4243, "step": 5898 }, { "epoch": 0.89, "grad_norm": 1.4287227392196655, "learning_rate": 3.361929252765217e-07, "loss": 0.5344, "step": 5899 }, { "epoch": 0.89, "grad_norm": 1.1913496255874634, "learning_rate": 3.353165017501936e-07, "loss": 0.4942, "step": 5900 }, { "epoch": 0.89, "grad_norm": 1.4008874893188477, "learning_rate": 3.3444118245590963e-07, "loss": 0.515, "step": 5901 }, { "epoch": 0.89, "grad_norm": 0.9904476404190063, "learning_rate": 3.335669676008774e-07, "loss": 0.7074, "step": 5902 }, { "epoch": 0.89, "grad_norm": 1.499394178390503, "learning_rate": 3.326938573920413e-07, "loss": 0.5832, "step": 5903 }, { "epoch": 0.89, "grad_norm": 1.4861931800842285, "learning_rate": 3.3182185203608663e-07, "loss": 0.5379, "step": 5904 }, { "epoch": 0.89, "grad_norm": 1.7761331796646118, "learning_rate": 3.3095095173943545e-07, "loss": 0.4326, "step": 5905 }, { "epoch": 0.89, "grad_norm": 1.3854326009750366, "learning_rate": 3.3008115670824846e-07, "loss": 0.4489, "step": 5906 }, { "epoch": 0.89, "grad_norm": 1.2052209377288818, "learning_rate": 3.2921246714842704e-07, "loss": 0.4064, "step": 5907 }, { "epoch": 0.89, "grad_norm": 1.3453086614608765, "learning_rate": 3.283448832656061e-07, "loss": 0.4625, "step": 5908 }, { "epoch": 0.89, "grad_norm": 1.2193259000778198, "learning_rate": 3.2747840526516414e-07, "loss": 0.4814, "step": 5909 }, { "epoch": 0.89, "grad_norm": 1.1362065076828003, "learning_rate": 3.2661303335221327e-07, "loss": 0.4381, "step": 5910 }, { "epoch": 0.89, "grad_norm": 1.180698037147522, "learning_rate": 3.2574876773160733e-07, "loss": 0.4372, "step": 5911 }, { "epoch": 0.89, "grad_norm": 1.1688076257705688, "learning_rate": 3.2488560860793603e-07, "loss": 0.4456, "step": 5912 }, { "epoch": 0.89, "grad_norm": 1.2396061420440674, "learning_rate": 3.2402355618552816e-07, "loss": 0.4419, "step": 5913 }, { "epoch": 0.89, "grad_norm": 1.126447081565857, "learning_rate": 3.23162610668451e-07, "loss": 0.4643, "step": 5914 }, { "epoch": 0.89, "grad_norm": 1.663999319076538, "learning_rate": 3.223027722605071e-07, "loss": 0.4092, "step": 5915 }, { "epoch": 0.89, "grad_norm": 1.361820936203003, "learning_rate": 3.214440411652409e-07, "loss": 0.5145, "step": 5916 }, { "epoch": 0.89, "grad_norm": 1.1807781457901, "learning_rate": 3.2058641758593135e-07, "loss": 0.481, "step": 5917 }, { "epoch": 0.89, "grad_norm": 1.2907850742340088, "learning_rate": 3.1972990172559725e-07, "loss": 0.4346, "step": 5918 }, { "epoch": 0.89, "grad_norm": 2.043349504470825, "learning_rate": 3.188744937869953e-07, "loss": 0.3979, "step": 5919 }, { "epoch": 0.89, "grad_norm": 1.232675552368164, "learning_rate": 3.180201939726174e-07, "loss": 0.5062, "step": 5920 }, { "epoch": 0.89, "grad_norm": 1.1126089096069336, "learning_rate": 3.1716700248469677e-07, "loss": 0.4495, "step": 5921 }, { "epoch": 0.89, "grad_norm": 1.2823982238769531, "learning_rate": 3.163149195252002e-07, "loss": 0.4378, "step": 5922 }, { "epoch": 0.89, "grad_norm": 1.194734811782837, "learning_rate": 3.154639452958369e-07, "loss": 0.4953, "step": 5923 }, { "epoch": 0.89, "grad_norm": 1.1489267349243164, "learning_rate": 3.1461407999804903e-07, "loss": 0.4747, "step": 5924 }, { "epoch": 0.89, "grad_norm": 1.2370442152023315, "learning_rate": 3.1376532383301906e-07, "loss": 0.6897, "step": 5925 }, { "epoch": 0.89, "grad_norm": 1.2325271368026733, "learning_rate": 3.129176770016662e-07, "loss": 0.4966, "step": 5926 }, { "epoch": 0.89, "grad_norm": 1.0817829370498657, "learning_rate": 3.12071139704645e-07, "loss": 0.696, "step": 5927 }, { "epoch": 0.89, "grad_norm": 1.5083856582641602, "learning_rate": 3.112257121423512e-07, "loss": 0.4262, "step": 5928 }, { "epoch": 0.89, "grad_norm": 1.5613324642181396, "learning_rate": 3.1038139451491533e-07, "loss": 0.5079, "step": 5929 }, { "epoch": 0.89, "grad_norm": 1.7020124197006226, "learning_rate": 3.095381870222064e-07, "loss": 0.46, "step": 5930 }, { "epoch": 0.89, "grad_norm": 1.4741543531417847, "learning_rate": 3.086960898638297e-07, "loss": 0.4876, "step": 5931 }, { "epoch": 0.89, "grad_norm": 1.1740416288375854, "learning_rate": 3.078551032391269e-07, "loss": 0.4914, "step": 5932 }, { "epoch": 0.89, "grad_norm": 1.6771345138549805, "learning_rate": 3.070152273471788e-07, "loss": 0.4096, "step": 5933 }, { "epoch": 0.89, "grad_norm": 1.572300672531128, "learning_rate": 3.0617646238680187e-07, "loss": 0.396, "step": 5934 }, { "epoch": 0.89, "grad_norm": 1.8540294170379639, "learning_rate": 3.0533880855655064e-07, "loss": 0.5105, "step": 5935 }, { "epoch": 0.89, "grad_norm": 1.275725245475769, "learning_rate": 3.045022660547148e-07, "loss": 0.511, "step": 5936 }, { "epoch": 0.89, "grad_norm": 1.7818326950073242, "learning_rate": 3.036668350793232e-07, "loss": 0.5211, "step": 5937 }, { "epoch": 0.89, "grad_norm": 1.160630702972412, "learning_rate": 3.0283251582814087e-07, "loss": 0.4169, "step": 5938 }, { "epoch": 0.89, "grad_norm": 1.2063020467758179, "learning_rate": 3.019993084986667e-07, "loss": 0.4565, "step": 5939 }, { "epoch": 0.89, "grad_norm": 1.2328261137008667, "learning_rate": 3.011672132881427e-07, "loss": 0.3834, "step": 5940 }, { "epoch": 0.89, "grad_norm": 1.2033635377883911, "learning_rate": 3.0033623039354144e-07, "loss": 0.4579, "step": 5941 }, { "epoch": 0.89, "grad_norm": 1.3433572053909302, "learning_rate": 2.9950636001157607e-07, "loss": 0.5179, "step": 5942 }, { "epoch": 0.89, "grad_norm": 1.3216500282287598, "learning_rate": 2.986776023386939e-07, "loss": 0.5012, "step": 5943 }, { "epoch": 0.89, "grad_norm": 1.2729287147521973, "learning_rate": 2.978499575710797e-07, "loss": 0.4404, "step": 5944 }, { "epoch": 0.89, "grad_norm": 1.2963907718658447, "learning_rate": 2.9702342590465674e-07, "loss": 0.4653, "step": 5945 }, { "epoch": 0.89, "grad_norm": 1.2925209999084473, "learning_rate": 2.9619800753508065e-07, "loss": 0.5225, "step": 5946 }, { "epoch": 0.89, "grad_norm": 1.1697988510131836, "learning_rate": 2.95373702657748e-07, "loss": 0.4605, "step": 5947 }, { "epoch": 0.89, "grad_norm": 1.0020414590835571, "learning_rate": 2.9455051146778766e-07, "loss": 0.5066, "step": 5948 }, { "epoch": 0.89, "grad_norm": 1.2377445697784424, "learning_rate": 2.9372843416006933e-07, "loss": 0.4808, "step": 5949 }, { "epoch": 0.89, "grad_norm": 1.1219159364700317, "learning_rate": 2.929074709291946e-07, "loss": 0.4986, "step": 5950 }, { "epoch": 0.89, "grad_norm": 1.2349863052368164, "learning_rate": 2.920876219695029e-07, "loss": 0.4907, "step": 5951 }, { "epoch": 0.89, "grad_norm": 1.6811262369155884, "learning_rate": 2.9126888747507296e-07, "loss": 0.512, "step": 5952 }, { "epoch": 0.89, "grad_norm": 1.4170417785644531, "learning_rate": 2.9045126763971475e-07, "loss": 0.4539, "step": 5953 }, { "epoch": 0.89, "grad_norm": 1.3134173154830933, "learning_rate": 2.8963476265697786e-07, "loss": 0.4544, "step": 5954 }, { "epoch": 0.89, "grad_norm": 1.143348217010498, "learning_rate": 2.888193727201466e-07, "loss": 0.4226, "step": 5955 }, { "epoch": 0.89, "grad_norm": 1.44021737575531, "learning_rate": 2.8800509802224044e-07, "loss": 0.4101, "step": 5956 }, { "epoch": 0.89, "grad_norm": 1.738464593887329, "learning_rate": 2.8719193875601794e-07, "loss": 0.5115, "step": 5957 }, { "epoch": 0.89, "grad_norm": 1.291473627090454, "learning_rate": 2.86379895113969e-07, "loss": 0.4761, "step": 5958 }, { "epoch": 0.9, "grad_norm": 2.009899139404297, "learning_rate": 2.855689672883238e-07, "loss": 0.4488, "step": 5959 }, { "epoch": 0.9, "grad_norm": 1.5289630889892578, "learning_rate": 2.8475915547104606e-07, "loss": 0.4175, "step": 5960 }, { "epoch": 0.9, "grad_norm": 1.3436003923416138, "learning_rate": 2.839504598538362e-07, "loss": 0.4371, "step": 5961 }, { "epoch": 0.9, "grad_norm": 1.1850850582122803, "learning_rate": 2.8314288062812946e-07, "loss": 0.5262, "step": 5962 }, { "epoch": 0.9, "grad_norm": 1.4265918731689453, "learning_rate": 2.8233641798509794e-07, "loss": 0.5071, "step": 5963 }, { "epoch": 0.9, "grad_norm": 1.2986016273498535, "learning_rate": 2.815310721156489e-07, "loss": 0.4928, "step": 5964 }, { "epoch": 0.9, "grad_norm": 1.5596036911010742, "learning_rate": 2.807268432104243e-07, "loss": 0.5128, "step": 5965 }, { "epoch": 0.9, "grad_norm": 1.4862617254257202, "learning_rate": 2.7992373145980465e-07, "loss": 0.4782, "step": 5966 }, { "epoch": 0.9, "grad_norm": 0.9676753282546997, "learning_rate": 2.7912173705390235e-07, "loss": 0.437, "step": 5967 }, { "epoch": 0.9, "grad_norm": 1.1575238704681396, "learning_rate": 2.78320860182566e-07, "loss": 0.532, "step": 5968 }, { "epoch": 0.9, "grad_norm": 1.4527525901794434, "learning_rate": 2.775211010353834e-07, "loss": 0.5425, "step": 5969 }, { "epoch": 0.9, "grad_norm": 1.6492761373519897, "learning_rate": 2.7672245980167214e-07, "loss": 0.5189, "step": 5970 }, { "epoch": 0.9, "grad_norm": 1.380149483680725, "learning_rate": 2.759249366704897e-07, "loss": 0.489, "step": 5971 }, { "epoch": 0.9, "grad_norm": 1.338646650314331, "learning_rate": 2.7512853183062626e-07, "loss": 0.4993, "step": 5972 }, { "epoch": 0.9, "grad_norm": 1.294735074043274, "learning_rate": 2.743332454706088e-07, "loss": 0.4503, "step": 5973 }, { "epoch": 0.9, "grad_norm": 1.0933321714401245, "learning_rate": 2.735390777786989e-07, "loss": 0.4513, "step": 5974 }, { "epoch": 0.9, "grad_norm": 1.1988582611083984, "learning_rate": 2.7274602894289235e-07, "loss": 0.4822, "step": 5975 }, { "epoch": 0.9, "grad_norm": 1.1465917825698853, "learning_rate": 2.719540991509223e-07, "loss": 0.4386, "step": 5976 }, { "epoch": 0.9, "grad_norm": 1.1032752990722656, "learning_rate": 2.7116328859025487e-07, "loss": 0.467, "step": 5977 }, { "epoch": 0.9, "grad_norm": 1.2715179920196533, "learning_rate": 2.703735974480931e-07, "loss": 0.4773, "step": 5978 }, { "epoch": 0.9, "grad_norm": 1.2273317575454712, "learning_rate": 2.695850259113736e-07, "loss": 0.4428, "step": 5979 }, { "epoch": 0.9, "grad_norm": 1.2789483070373535, "learning_rate": 2.6879757416676756e-07, "loss": 0.5108, "step": 5980 }, { "epoch": 0.9, "grad_norm": 1.1678218841552734, "learning_rate": 2.680112424006831e-07, "loss": 0.4712, "step": 5981 }, { "epoch": 0.9, "grad_norm": 1.4968293905258179, "learning_rate": 2.672260307992608e-07, "loss": 0.5202, "step": 5982 }, { "epoch": 0.9, "grad_norm": 1.3897415399551392, "learning_rate": 2.664419395483786e-07, "loss": 0.4875, "step": 5983 }, { "epoch": 0.9, "grad_norm": 1.3287019729614258, "learning_rate": 2.656589688336475e-07, "loss": 0.4977, "step": 5984 }, { "epoch": 0.9, "grad_norm": 1.3669674396514893, "learning_rate": 2.648771188404131e-07, "loss": 0.4478, "step": 5985 }, { "epoch": 0.9, "grad_norm": 1.3219853639602661, "learning_rate": 2.640963897537574e-07, "loss": 0.4431, "step": 5986 }, { "epoch": 0.9, "grad_norm": 1.2533092498779297, "learning_rate": 2.6331678175849474e-07, "loss": 0.4954, "step": 5987 }, { "epoch": 0.9, "grad_norm": 1.1865092515945435, "learning_rate": 2.6253829503917694e-07, "loss": 0.4363, "step": 5988 }, { "epoch": 0.9, "grad_norm": 1.9052858352661133, "learning_rate": 2.6176092978008713e-07, "loss": 0.4651, "step": 5989 }, { "epoch": 0.9, "grad_norm": 1.1689833402633667, "learning_rate": 2.6098468616524544e-07, "loss": 0.4851, "step": 5990 }, { "epoch": 0.9, "grad_norm": 1.868735671043396, "learning_rate": 2.602095643784058e-07, "loss": 0.4672, "step": 5991 }, { "epoch": 0.9, "grad_norm": 1.3629720211029053, "learning_rate": 2.5943556460305495e-07, "loss": 0.4996, "step": 5992 }, { "epoch": 0.9, "grad_norm": 1.1588342189788818, "learning_rate": 2.586626870224174e-07, "loss": 0.3995, "step": 5993 }, { "epoch": 0.9, "grad_norm": 1.2872159481048584, "learning_rate": 2.578909318194489e-07, "loss": 0.471, "step": 5994 }, { "epoch": 0.9, "grad_norm": 1.7589589357376099, "learning_rate": 2.5712029917684014e-07, "loss": 0.4785, "step": 5995 }, { "epoch": 0.9, "grad_norm": 1.3211679458618164, "learning_rate": 2.563507892770184e-07, "loss": 0.5008, "step": 5996 }, { "epoch": 0.9, "grad_norm": 1.1851335763931274, "learning_rate": 2.5558240230214194e-07, "loss": 0.4585, "step": 5997 }, { "epoch": 0.9, "grad_norm": 1.4799013137817383, "learning_rate": 2.5481513843410576e-07, "loss": 0.5048, "step": 5998 }, { "epoch": 0.9, "grad_norm": 1.3366613388061523, "learning_rate": 2.5404899785453685e-07, "loss": 0.4742, "step": 5999 }, { "epoch": 0.9, "grad_norm": 1.1157755851745605, "learning_rate": 2.5328398074479844e-07, "loss": 0.3852, "step": 6000 }, { "epoch": 0.9, "grad_norm": 6.167869567871094, "learning_rate": 2.525200872859856e-07, "loss": 0.4987, "step": 6001 }, { "epoch": 0.9, "grad_norm": 1.2753958702087402, "learning_rate": 2.517573176589294e-07, "loss": 0.5264, "step": 6002 }, { "epoch": 0.9, "grad_norm": 1.047828197479248, "learning_rate": 2.509956720441936e-07, "loss": 0.4998, "step": 6003 }, { "epoch": 0.9, "grad_norm": 1.3350567817687988, "learning_rate": 2.502351506220757e-07, "loss": 0.4572, "step": 6004 }, { "epoch": 0.9, "grad_norm": 1.327681303024292, "learning_rate": 2.4947575357260945e-07, "loss": 0.4871, "step": 6005 }, { "epoch": 0.9, "grad_norm": 1.1242735385894775, "learning_rate": 2.487174810755577e-07, "loss": 0.6483, "step": 6006 }, { "epoch": 0.9, "grad_norm": 1.1255316734313965, "learning_rate": 2.47960333310423e-07, "loss": 0.5645, "step": 6007 }, { "epoch": 0.9, "grad_norm": 1.1492217779159546, "learning_rate": 2.472043104564381e-07, "loss": 0.4577, "step": 6008 }, { "epoch": 0.9, "grad_norm": 1.0053598880767822, "learning_rate": 2.4644941269256873e-07, "loss": 0.449, "step": 6009 }, { "epoch": 0.9, "grad_norm": 1.183863639831543, "learning_rate": 2.4569564019751633e-07, "loss": 0.6995, "step": 6010 }, { "epoch": 0.9, "grad_norm": 1.3725996017456055, "learning_rate": 2.449429931497155e-07, "loss": 0.4871, "step": 6011 }, { "epoch": 0.9, "grad_norm": 1.2905778884887695, "learning_rate": 2.441914717273342e-07, "loss": 0.4088, "step": 6012 }, { "epoch": 0.9, "grad_norm": 1.265895962715149, "learning_rate": 2.4344107610827283e-07, "loss": 0.5046, "step": 6013 }, { "epoch": 0.9, "grad_norm": 1.151951551437378, "learning_rate": 2.4269180647016776e-07, "loss": 0.4657, "step": 6014 }, { "epoch": 0.9, "grad_norm": 1.2203885316848755, "learning_rate": 2.419436629903865e-07, "loss": 0.4466, "step": 6015 }, { "epoch": 0.9, "grad_norm": 1.210483431816101, "learning_rate": 2.411966458460319e-07, "loss": 0.4268, "step": 6016 }, { "epoch": 0.9, "grad_norm": 1.100608468055725, "learning_rate": 2.404507552139379e-07, "loss": 0.4717, "step": 6017 }, { "epoch": 0.9, "grad_norm": 1.1579445600509644, "learning_rate": 2.397059912706734e-07, "loss": 0.6966, "step": 6018 }, { "epoch": 0.9, "grad_norm": 1.1490697860717773, "learning_rate": 2.389623541925407e-07, "loss": 0.4239, "step": 6019 }, { "epoch": 0.9, "grad_norm": 1.2932358980178833, "learning_rate": 2.3821984415557498e-07, "loss": 0.5073, "step": 6020 }, { "epoch": 0.9, "grad_norm": 1.542092204093933, "learning_rate": 2.3747846133554353e-07, "loss": 0.4456, "step": 6021 }, { "epoch": 0.9, "grad_norm": 1.2141945362091064, "learning_rate": 2.367382059079487e-07, "loss": 0.4884, "step": 6022 }, { "epoch": 0.9, "grad_norm": 1.230737566947937, "learning_rate": 2.359990780480248e-07, "loss": 0.4943, "step": 6023 }, { "epoch": 0.9, "grad_norm": 1.5487433671951294, "learning_rate": 2.352610779307396e-07, "loss": 0.4576, "step": 6024 }, { "epoch": 0.91, "grad_norm": 1.2459936141967773, "learning_rate": 2.345242057307928e-07, "loss": 0.4781, "step": 6025 }, { "epoch": 0.91, "grad_norm": 1.2093324661254883, "learning_rate": 2.337884616226199e-07, "loss": 0.5162, "step": 6026 }, { "epoch": 0.91, "grad_norm": 1.0936589241027832, "learning_rate": 2.3305384578038547e-07, "loss": 0.4352, "step": 6027 }, { "epoch": 0.91, "grad_norm": 0.9890527725219727, "learning_rate": 2.323203583779904e-07, "loss": 0.4869, "step": 6028 }, { "epoch": 0.91, "grad_norm": 1.2007614374160767, "learning_rate": 2.3158799958906741e-07, "loss": 0.4962, "step": 6029 }, { "epoch": 0.91, "grad_norm": 1.1227102279663086, "learning_rate": 2.3085676958698012e-07, "loss": 0.4529, "step": 6030 }, { "epoch": 0.91, "grad_norm": 1.2247613668441772, "learning_rate": 2.301266685448289e-07, "loss": 0.3818, "step": 6031 }, { "epoch": 0.91, "grad_norm": 1.334748387336731, "learning_rate": 2.2939769663544275e-07, "loss": 0.5311, "step": 6032 }, { "epoch": 0.91, "grad_norm": 1.2376986742019653, "learning_rate": 2.2866985403138532e-07, "loss": 0.4557, "step": 6033 }, { "epoch": 0.91, "grad_norm": 1.3811931610107422, "learning_rate": 2.2794314090495372e-07, "loss": 0.47, "step": 6034 }, { "epoch": 0.91, "grad_norm": 1.631800889968872, "learning_rate": 2.2721755742817542e-07, "loss": 0.4212, "step": 6035 }, { "epoch": 0.91, "grad_norm": 1.4630876779556274, "learning_rate": 2.2649310377281353e-07, "loss": 0.4962, "step": 6036 }, { "epoch": 0.91, "grad_norm": 1.34295654296875, "learning_rate": 2.2576978011035978e-07, "loss": 0.5152, "step": 6037 }, { "epoch": 0.91, "grad_norm": 1.7770229578018188, "learning_rate": 2.250475866120433e-07, "loss": 0.4826, "step": 6038 }, { "epoch": 0.91, "grad_norm": 2.241450309753418, "learning_rate": 2.243265234488201e-07, "loss": 0.4647, "step": 6039 }, { "epoch": 0.91, "grad_norm": 1.7549692392349243, "learning_rate": 2.236065907913837e-07, "loss": 0.5115, "step": 6040 }, { "epoch": 0.91, "grad_norm": 3.001791000366211, "learning_rate": 2.2288778881015716e-07, "loss": 0.399, "step": 6041 }, { "epoch": 0.91, "grad_norm": 1.0589327812194824, "learning_rate": 2.2217011767529605e-07, "loss": 0.4976, "step": 6042 }, { "epoch": 0.91, "grad_norm": 1.173000454902649, "learning_rate": 2.2145357755669006e-07, "loss": 0.5037, "step": 6043 }, { "epoch": 0.91, "grad_norm": 2.248868227005005, "learning_rate": 2.2073816862395793e-07, "loss": 0.4696, "step": 6044 }, { "epoch": 0.91, "grad_norm": 1.8161250352859497, "learning_rate": 2.2002389104645472e-07, "loss": 0.4536, "step": 6045 }, { "epoch": 0.91, "grad_norm": 1.557204008102417, "learning_rate": 2.1931074499326355e-07, "loss": 0.4705, "step": 6046 }, { "epoch": 0.91, "grad_norm": 1.938049554824829, "learning_rate": 2.1859873063320214e-07, "loss": 0.5315, "step": 6047 }, { "epoch": 0.91, "grad_norm": 1.4173325300216675, "learning_rate": 2.1788784813482012e-07, "loss": 0.48, "step": 6048 }, { "epoch": 0.91, "grad_norm": 1.037218451499939, "learning_rate": 2.171780976663984e-07, "loss": 0.4373, "step": 6049 }, { "epoch": 0.91, "grad_norm": 1.141210675239563, "learning_rate": 2.1646947939595153e-07, "loss": 0.4946, "step": 6050 }, { "epoch": 0.91, "grad_norm": 1.2426280975341797, "learning_rate": 2.1576199349122307e-07, "loss": 0.4319, "step": 6051 }, { "epoch": 0.91, "grad_norm": 1.513839840888977, "learning_rate": 2.150556401196907e-07, "loss": 0.455, "step": 6052 }, { "epoch": 0.91, "grad_norm": 1.3029088973999023, "learning_rate": 2.1435041944856517e-07, "loss": 0.4662, "step": 6053 }, { "epoch": 0.91, "grad_norm": 1.173945665359497, "learning_rate": 2.1364633164478622e-07, "loss": 0.465, "step": 6054 }, { "epoch": 0.91, "grad_norm": 1.2218071222305298, "learning_rate": 2.129433768750272e-07, "loss": 0.4321, "step": 6055 }, { "epoch": 0.91, "grad_norm": 1.5137618780136108, "learning_rate": 2.1224155530569223e-07, "loss": 0.4549, "step": 6056 }, { "epoch": 0.91, "grad_norm": 1.466481328010559, "learning_rate": 2.11540867102919e-07, "loss": 0.4929, "step": 6057 }, { "epoch": 0.91, "grad_norm": 1.2024606466293335, "learning_rate": 2.1084131243257422e-07, "loss": 0.4559, "step": 6058 }, { "epoch": 0.91, "grad_norm": 2.6641170978546143, "learning_rate": 2.1014289146025823e-07, "loss": 0.4894, "step": 6059 }, { "epoch": 0.91, "grad_norm": 1.3617771863937378, "learning_rate": 2.0944560435130268e-07, "loss": 0.4495, "step": 6060 }, { "epoch": 0.91, "grad_norm": 1.2450300455093384, "learning_rate": 2.0874945127076994e-07, "loss": 0.523, "step": 6061 }, { "epoch": 0.91, "grad_norm": 1.403038501739502, "learning_rate": 2.0805443238345546e-07, "loss": 0.4733, "step": 6062 }, { "epoch": 0.91, "grad_norm": 1.2698363065719604, "learning_rate": 2.0736054785388536e-07, "loss": 0.4973, "step": 6063 }, { "epoch": 0.91, "grad_norm": 1.261439561843872, "learning_rate": 2.0666779784631608e-07, "loss": 0.5089, "step": 6064 }, { "epoch": 0.91, "grad_norm": 1.2117012739181519, "learning_rate": 2.059761825247375e-07, "loss": 0.4416, "step": 6065 }, { "epoch": 0.91, "grad_norm": 1.2227336168289185, "learning_rate": 2.052857020528698e-07, "loss": 0.7251, "step": 6066 }, { "epoch": 0.91, "grad_norm": 1.4409080743789673, "learning_rate": 2.0459635659416498e-07, "loss": 0.4343, "step": 6067 }, { "epoch": 0.91, "grad_norm": 1.1082463264465332, "learning_rate": 2.0390814631180588e-07, "loss": 0.5056, "step": 6068 }, { "epoch": 0.91, "grad_norm": 1.3740967512130737, "learning_rate": 2.0322107136870662e-07, "loss": 0.4653, "step": 6069 }, { "epoch": 0.91, "grad_norm": 1.373706579208374, "learning_rate": 2.0253513192751374e-07, "loss": 0.5361, "step": 6070 }, { "epoch": 0.91, "grad_norm": 1.2820115089416504, "learning_rate": 2.0185032815060235e-07, "loss": 0.4152, "step": 6071 }, { "epoch": 0.91, "grad_norm": 1.2282321453094482, "learning_rate": 2.0116666020008168e-07, "loss": 0.5157, "step": 6072 }, { "epoch": 0.91, "grad_norm": 1.1528432369232178, "learning_rate": 2.0048412823778952e-07, "loss": 0.389, "step": 6073 }, { "epoch": 0.91, "grad_norm": 1.484161376953125, "learning_rate": 1.9980273242529825e-07, "loss": 0.4537, "step": 6074 }, { "epoch": 0.91, "grad_norm": 1.2444125413894653, "learning_rate": 1.9912247292390774e-07, "loss": 0.5152, "step": 6075 }, { "epoch": 0.91, "grad_norm": 1.293257713317871, "learning_rate": 1.9844334989464975e-07, "loss": 0.4775, "step": 6076 }, { "epoch": 0.91, "grad_norm": 1.3392599821090698, "learning_rate": 1.9776536349828846e-07, "loss": 0.5919, "step": 6077 }, { "epoch": 0.91, "grad_norm": 1.252747893333435, "learning_rate": 1.9708851389531714e-07, "loss": 0.4323, "step": 6078 }, { "epoch": 0.91, "grad_norm": 1.1506712436676025, "learning_rate": 1.9641280124596153e-07, "loss": 0.7171, "step": 6079 }, { "epoch": 0.91, "grad_norm": 1.5756139755249023, "learning_rate": 1.9573822571017643e-07, "loss": 0.4942, "step": 6080 }, { "epoch": 0.91, "grad_norm": 1.175487995147705, "learning_rate": 1.9506478744765022e-07, "loss": 0.4364, "step": 6081 }, { "epoch": 0.91, "grad_norm": 0.9507508873939514, "learning_rate": 1.9439248661779987e-07, "loss": 0.6731, "step": 6082 }, { "epoch": 0.91, "grad_norm": 1.3985565900802612, "learning_rate": 1.937213233797719e-07, "loss": 0.4709, "step": 6083 }, { "epoch": 0.91, "grad_norm": 1.4112985134124756, "learning_rate": 1.9305129789244648e-07, "loss": 0.5187, "step": 6084 }, { "epoch": 0.91, "grad_norm": 1.090680718421936, "learning_rate": 1.9238241031443394e-07, "loss": 0.6991, "step": 6085 }, { "epoch": 0.91, "grad_norm": 1.0758627653121948, "learning_rate": 1.9171466080407374e-07, "loss": 0.4303, "step": 6086 }, { "epoch": 0.91, "grad_norm": 1.180816650390625, "learning_rate": 1.9104804951943723e-07, "loss": 0.4865, "step": 6087 }, { "epoch": 0.91, "grad_norm": 2.2770519256591797, "learning_rate": 1.9038257661832483e-07, "loss": 0.4585, "step": 6088 }, { "epoch": 0.91, "grad_norm": 1.1992034912109375, "learning_rate": 1.8971824225826996e-07, "loss": 0.4435, "step": 6089 }, { "epoch": 0.91, "grad_norm": 1.4818551540374756, "learning_rate": 1.8905504659653296e-07, "loss": 0.4973, "step": 6090 }, { "epoch": 0.91, "grad_norm": 1.6872882843017578, "learning_rate": 1.8839298979010823e-07, "loss": 0.6021, "step": 6091 }, { "epoch": 0.92, "grad_norm": 1.3530234098434448, "learning_rate": 1.8773207199571875e-07, "loss": 0.4192, "step": 6092 }, { "epoch": 0.92, "grad_norm": 1.2066092491149902, "learning_rate": 1.8707229336981768e-07, "loss": 0.517, "step": 6093 }, { "epoch": 0.92, "grad_norm": 1.1100459098815918, "learning_rate": 1.8641365406858958e-07, "loss": 0.4074, "step": 6094 }, { "epoch": 0.92, "grad_norm": 1.3713841438293457, "learning_rate": 1.8575615424794747e-07, "loss": 0.5093, "step": 6095 }, { "epoch": 0.92, "grad_norm": 1.2143545150756836, "learning_rate": 1.8509979406353796e-07, "loss": 0.4584, "step": 6096 }, { "epoch": 0.92, "grad_norm": 1.263182520866394, "learning_rate": 1.8444457367073344e-07, "loss": 0.3664, "step": 6097 }, { "epoch": 0.92, "grad_norm": 1.0403507947921753, "learning_rate": 1.8379049322464094e-07, "loss": 0.484, "step": 6098 }, { "epoch": 0.92, "grad_norm": 1.226783037185669, "learning_rate": 1.831375528800944e-07, "loss": 0.4433, "step": 6099 }, { "epoch": 0.92, "grad_norm": 1.224359154701233, "learning_rate": 1.8248575279165847e-07, "loss": 0.7079, "step": 6100 }, { "epoch": 0.92, "grad_norm": 1.5770361423492432, "learning_rate": 1.8183509311363035e-07, "loss": 0.5078, "step": 6101 }, { "epoch": 0.92, "grad_norm": 1.1214983463287354, "learning_rate": 1.8118557400003288e-07, "loss": 0.4791, "step": 6102 }, { "epoch": 0.92, "grad_norm": 1.3320794105529785, "learning_rate": 1.8053719560462312e-07, "loss": 0.4819, "step": 6103 }, { "epoch": 0.92, "grad_norm": 1.0991966724395752, "learning_rate": 1.798899580808855e-07, "loss": 0.4242, "step": 6104 }, { "epoch": 0.92, "grad_norm": 1.29206120967865, "learning_rate": 1.7924386158203576e-07, "loss": 0.5364, "step": 6105 }, { "epoch": 0.92, "grad_norm": 1.4555288553237915, "learning_rate": 1.7859890626101828e-07, "loss": 0.4488, "step": 6106 }, { "epoch": 0.92, "grad_norm": 1.1167752742767334, "learning_rate": 1.7795509227050868e-07, "loss": 0.4605, "step": 6107 }, { "epoch": 0.92, "grad_norm": 1.3553376197814941, "learning_rate": 1.7731241976291226e-07, "loss": 0.4276, "step": 6108 }, { "epoch": 0.92, "grad_norm": 1.2290257215499878, "learning_rate": 1.7667088889036178e-07, "loss": 0.4416, "step": 6109 }, { "epoch": 0.92, "grad_norm": 1.4431174993515015, "learning_rate": 1.7603049980472352e-07, "loss": 0.4941, "step": 6110 }, { "epoch": 0.92, "grad_norm": 1.3475068807601929, "learning_rate": 1.7539125265759016e-07, "loss": 0.4427, "step": 6111 }, { "epoch": 0.92, "grad_norm": 1.4420604705810547, "learning_rate": 1.747531476002856e-07, "loss": 0.4177, "step": 6112 }, { "epoch": 0.92, "grad_norm": 1.2229853868484497, "learning_rate": 1.7411618478386406e-07, "loss": 0.4896, "step": 6113 }, { "epoch": 0.92, "grad_norm": 1.5182392597198486, "learning_rate": 1.7348036435910765e-07, "loss": 0.5305, "step": 6114 }, { "epoch": 0.92, "grad_norm": 1.4087728261947632, "learning_rate": 1.7284568647652934e-07, "loss": 0.4707, "step": 6115 }, { "epoch": 0.92, "grad_norm": 1.2125341892242432, "learning_rate": 1.7221215128637003e-07, "loss": 0.5583, "step": 6116 }, { "epoch": 0.92, "grad_norm": 1.3377796411514282, "learning_rate": 1.715797589386031e-07, "loss": 0.4619, "step": 6117 }, { "epoch": 0.92, "grad_norm": 1.1702831983566284, "learning_rate": 1.709485095829283e-07, "loss": 0.446, "step": 6118 }, { "epoch": 0.92, "grad_norm": 1.5697861909866333, "learning_rate": 1.7031840336877603e-07, "loss": 0.374, "step": 6119 }, { "epoch": 0.92, "grad_norm": 1.1464041471481323, "learning_rate": 1.6968944044530755e-07, "loss": 0.4787, "step": 6120 }, { "epoch": 0.92, "grad_norm": 1.1089611053466797, "learning_rate": 1.6906162096141043e-07, "loss": 0.4599, "step": 6121 }, { "epoch": 0.92, "grad_norm": 1.1633623838424683, "learning_rate": 1.6843494506570413e-07, "loss": 0.4214, "step": 6122 }, { "epoch": 0.92, "grad_norm": 1.0879391431808472, "learning_rate": 1.6780941290653662e-07, "loss": 0.4637, "step": 6123 }, { "epoch": 0.92, "grad_norm": 1.2639684677124023, "learning_rate": 1.6718502463198338e-07, "loss": 0.4651, "step": 6124 }, { "epoch": 0.92, "grad_norm": 1.1799719333648682, "learning_rate": 1.6656178038985282e-07, "loss": 0.4402, "step": 6125 }, { "epoch": 0.92, "grad_norm": 1.1721020936965942, "learning_rate": 1.6593968032767916e-07, "loss": 0.4923, "step": 6126 }, { "epoch": 0.92, "grad_norm": 1.4942673444747925, "learning_rate": 1.6531872459272735e-07, "loss": 0.427, "step": 6127 }, { "epoch": 0.92, "grad_norm": 1.1404526233673096, "learning_rate": 1.6469891333199095e-07, "loss": 0.4539, "step": 6128 }, { "epoch": 0.92, "grad_norm": 1.0387738943099976, "learning_rate": 1.640802466921926e-07, "loss": 0.482, "step": 6129 }, { "epoch": 0.92, "grad_norm": 1.0233893394470215, "learning_rate": 1.6346272481978508e-07, "loss": 0.447, "step": 6130 }, { "epoch": 0.92, "grad_norm": 1.2150996923446655, "learning_rate": 1.6284634786094765e-07, "loss": 0.7426, "step": 6131 }, { "epoch": 0.92, "grad_norm": 1.587141990661621, "learning_rate": 1.6223111596159192e-07, "loss": 0.458, "step": 6132 }, { "epoch": 0.92, "grad_norm": 1.171364426612854, "learning_rate": 1.616170292673558e-07, "loss": 0.4074, "step": 6133 }, { "epoch": 0.92, "grad_norm": 1.3075531721115112, "learning_rate": 1.610040879236069e-07, "loss": 0.4736, "step": 6134 }, { "epoch": 0.92, "grad_norm": 1.2431747913360596, "learning_rate": 1.6039229207544194e-07, "loss": 0.3892, "step": 6135 }, { "epoch": 0.92, "grad_norm": 1.6376389265060425, "learning_rate": 1.597816418676862e-07, "loss": 0.5402, "step": 6136 }, { "epoch": 0.92, "grad_norm": 1.1529837846755981, "learning_rate": 1.59172137444894e-07, "loss": 0.4352, "step": 6137 }, { "epoch": 0.92, "grad_norm": 1.0116592645645142, "learning_rate": 1.5856377895134779e-07, "loss": 0.5617, "step": 6138 }, { "epoch": 0.92, "grad_norm": 1.239894151687622, "learning_rate": 1.5795656653106007e-07, "loss": 0.4831, "step": 6139 }, { "epoch": 0.92, "grad_norm": 1.0880399942398071, "learning_rate": 1.5735050032777033e-07, "loss": 0.4321, "step": 6140 }, { "epoch": 0.92, "grad_norm": 1.0844826698303223, "learning_rate": 1.5674558048494826e-07, "loss": 0.4561, "step": 6141 }, { "epoch": 0.92, "grad_norm": 1.2805031538009644, "learning_rate": 1.5614180714579153e-07, "loss": 0.4471, "step": 6142 }, { "epoch": 0.92, "grad_norm": 2.364750623703003, "learning_rate": 1.5553918045322636e-07, "loss": 0.4223, "step": 6143 }, { "epoch": 0.92, "grad_norm": 1.2886261940002441, "learning_rate": 1.5493770054990753e-07, "loss": 0.5097, "step": 6144 }, { "epoch": 0.92, "grad_norm": 1.2397620677947998, "learning_rate": 1.5433736757821838e-07, "loss": 0.493, "step": 6145 }, { "epoch": 0.92, "grad_norm": 1.150155782699585, "learning_rate": 1.5373818168027076e-07, "loss": 0.4553, "step": 6146 }, { "epoch": 0.92, "grad_norm": 1.2570173740386963, "learning_rate": 1.5314014299790515e-07, "loss": 0.47, "step": 6147 }, { "epoch": 0.92, "grad_norm": 1.6822221279144287, "learning_rate": 1.525432516726899e-07, "loss": 0.4346, "step": 6148 }, { "epoch": 0.92, "grad_norm": 1.2192423343658447, "learning_rate": 1.5194750784592317e-07, "loss": 0.4513, "step": 6149 }, { "epoch": 0.92, "grad_norm": 1.2163164615631104, "learning_rate": 1.513529116586293e-07, "loss": 0.4128, "step": 6150 }, { "epoch": 0.92, "grad_norm": 1.2966856956481934, "learning_rate": 1.5075946325156242e-07, "loss": 0.4986, "step": 6151 }, { "epoch": 0.92, "grad_norm": 1.2877408266067505, "learning_rate": 1.501671627652057e-07, "loss": 0.4598, "step": 6152 }, { "epoch": 0.92, "grad_norm": 1.3641480207443237, "learning_rate": 1.4957601033976755e-07, "loss": 0.5474, "step": 6153 }, { "epoch": 0.92, "grad_norm": 1.2341012954711914, "learning_rate": 1.4898600611518876e-07, "loss": 0.4326, "step": 6154 }, { "epoch": 0.92, "grad_norm": 1.2184312343597412, "learning_rate": 1.4839715023113487e-07, "loss": 0.4938, "step": 6155 }, { "epoch": 0.92, "grad_norm": 2.3659138679504395, "learning_rate": 1.478094428270016e-07, "loss": 0.4685, "step": 6156 }, { "epoch": 0.92, "grad_norm": 1.0522929430007935, "learning_rate": 1.4722288404191098e-07, "loss": 0.3883, "step": 6157 }, { "epoch": 0.93, "grad_norm": 1.1606091260910034, "learning_rate": 1.4663747401471584e-07, "loss": 0.4618, "step": 6158 }, { "epoch": 0.93, "grad_norm": 1.2126615047454834, "learning_rate": 1.4605321288399365e-07, "loss": 0.4498, "step": 6159 }, { "epoch": 0.93, "grad_norm": 1.4075806140899658, "learning_rate": 1.454701007880538e-07, "loss": 0.4207, "step": 6160 }, { "epoch": 0.93, "grad_norm": 1.176819920539856, "learning_rate": 1.4488813786492974e-07, "loss": 0.435, "step": 6161 }, { "epoch": 0.93, "grad_norm": 1.487697958946228, "learning_rate": 1.443073242523846e-07, "loss": 0.4247, "step": 6162 }, { "epoch": 0.93, "grad_norm": 1.4391273260116577, "learning_rate": 1.4372766008791174e-07, "loss": 0.4596, "step": 6163 }, { "epoch": 0.93, "grad_norm": 1.296582818031311, "learning_rate": 1.431491455087286e-07, "loss": 0.4865, "step": 6164 }, { "epoch": 0.93, "grad_norm": 1.141400694847107, "learning_rate": 1.4257178065178233e-07, "loss": 0.4163, "step": 6165 }, { "epoch": 0.93, "grad_norm": 1.2694823741912842, "learning_rate": 1.419955656537486e-07, "loss": 0.4614, "step": 6166 }, { "epoch": 0.93, "grad_norm": 1.8404266834259033, "learning_rate": 1.4142050065102885e-07, "loss": 0.5593, "step": 6167 }, { "epoch": 0.93, "grad_norm": 1.2027010917663574, "learning_rate": 1.4084658577975417e-07, "loss": 0.4064, "step": 6168 }, { "epoch": 0.93, "grad_norm": 1.0978864431381226, "learning_rate": 1.402738211757826e-07, "loss": 0.3996, "step": 6169 }, { "epoch": 0.93, "grad_norm": 1.5224086046218872, "learning_rate": 1.3970220697470005e-07, "loss": 0.4066, "step": 6170 }, { "epoch": 0.93, "grad_norm": 1.3493101596832275, "learning_rate": 1.3913174331181946e-07, "loss": 0.4458, "step": 6171 }, { "epoch": 0.93, "grad_norm": 1.0674264430999756, "learning_rate": 1.3856243032218274e-07, "loss": 0.4558, "step": 6172 }, { "epoch": 0.93, "grad_norm": 1.2305158376693726, "learning_rate": 1.3799426814055827e-07, "loss": 0.4713, "step": 6173 }, { "epoch": 0.93, "grad_norm": 1.2164055109024048, "learning_rate": 1.3742725690144233e-07, "loss": 0.5066, "step": 6174 }, { "epoch": 0.93, "grad_norm": 1.2959429025650024, "learning_rate": 1.3686139673905973e-07, "loss": 0.5081, "step": 6175 }, { "epoch": 0.93, "grad_norm": 1.3977694511413574, "learning_rate": 1.3629668778736117e-07, "loss": 0.4777, "step": 6176 }, { "epoch": 0.93, "grad_norm": 1.967624545097351, "learning_rate": 1.3573313018002466e-07, "loss": 0.465, "step": 6177 }, { "epoch": 0.93, "grad_norm": 1.0886082649230957, "learning_rate": 1.3517072405045796e-07, "loss": 0.4492, "step": 6178 }, { "epoch": 0.93, "grad_norm": 1.1915738582611084, "learning_rate": 1.3460946953179343e-07, "loss": 0.4103, "step": 6179 }, { "epoch": 0.93, "grad_norm": 1.9106593132019043, "learning_rate": 1.3404936675689428e-07, "loss": 0.487, "step": 6180 }, { "epoch": 0.93, "grad_norm": 1.0474212169647217, "learning_rate": 1.3349041585834664e-07, "loss": 0.4253, "step": 6181 }, { "epoch": 0.93, "grad_norm": 1.2600092887878418, "learning_rate": 1.3293261696846804e-07, "loss": 0.4329, "step": 6182 }, { "epoch": 0.93, "grad_norm": 1.3376681804656982, "learning_rate": 1.3237597021930116e-07, "loss": 0.4443, "step": 6183 }, { "epoch": 0.93, "grad_norm": 1.302730917930603, "learning_rate": 1.3182047574261557e-07, "loss": 0.4181, "step": 6184 }, { "epoch": 0.93, "grad_norm": 1.2807424068450928, "learning_rate": 1.312661336699106e-07, "loss": 0.4876, "step": 6185 }, { "epoch": 0.93, "grad_norm": 1.310624599456787, "learning_rate": 1.3071294413240954e-07, "loss": 0.5199, "step": 6186 }, { "epoch": 0.93, "grad_norm": 1.080029010772705, "learning_rate": 1.3016090726106546e-07, "loss": 0.416, "step": 6187 }, { "epoch": 0.93, "grad_norm": 1.4374712705612183, "learning_rate": 1.2961002318655603e-07, "loss": 0.4926, "step": 6188 }, { "epoch": 0.93, "grad_norm": 1.419718861579895, "learning_rate": 1.2906029203928917e-07, "loss": 0.4478, "step": 6189 }, { "epoch": 0.93, "grad_norm": 1.4131076335906982, "learning_rate": 1.2851171394939743e-07, "loss": 0.4959, "step": 6190 }, { "epoch": 0.93, "grad_norm": 1.2670960426330566, "learning_rate": 1.2796428904674085e-07, "loss": 0.5154, "step": 6191 }, { "epoch": 0.93, "grad_norm": 1.2689543962478638, "learning_rate": 1.27418017460908e-07, "loss": 0.4635, "step": 6192 }, { "epoch": 0.93, "grad_norm": 1.507603406906128, "learning_rate": 1.268728993212115e-07, "loss": 0.4867, "step": 6193 }, { "epoch": 0.93, "grad_norm": 1.3655719757080078, "learning_rate": 1.2632893475669428e-07, "loss": 0.4273, "step": 6194 }, { "epoch": 0.93, "grad_norm": 1.172127604484558, "learning_rate": 1.2578612389612333e-07, "loss": 0.5223, "step": 6195 }, { "epoch": 0.93, "grad_norm": 1.1957396268844604, "learning_rate": 1.2524446686799418e-07, "loss": 0.579, "step": 6196 }, { "epoch": 0.93, "grad_norm": 1.1988147497177124, "learning_rate": 1.2470396380052986e-07, "loss": 0.4507, "step": 6197 }, { "epoch": 0.93, "grad_norm": 1.3002946376800537, "learning_rate": 1.2416461482167742e-07, "loss": 0.4919, "step": 6198 }, { "epoch": 0.93, "grad_norm": 1.2496017217636108, "learning_rate": 1.2362642005911418e-07, "loss": 0.3991, "step": 6199 }, { "epoch": 0.93, "grad_norm": 1.345002293586731, "learning_rate": 1.2308937964024105e-07, "loss": 0.4962, "step": 6200 }, { "epoch": 0.93, "grad_norm": 1.1186848878860474, "learning_rate": 1.225534936921885e-07, "loss": 0.4339, "step": 6201 }, { "epoch": 0.93, "grad_norm": 1.198361873626709, "learning_rate": 1.2201876234181232e-07, "loss": 0.4739, "step": 6202 }, { "epoch": 0.93, "grad_norm": 1.2575273513793945, "learning_rate": 1.2148518571569346e-07, "loss": 0.4856, "step": 6203 }, { "epoch": 0.93, "grad_norm": 1.8933255672454834, "learning_rate": 1.2095276394014254e-07, "loss": 0.4785, "step": 6204 }, { "epoch": 0.93, "grad_norm": 1.17628812789917, "learning_rate": 1.2042149714119488e-07, "loss": 0.5209, "step": 6205 }, { "epoch": 0.93, "grad_norm": 1.157539963722229, "learning_rate": 1.1989138544461375e-07, "loss": 0.5748, "step": 6206 }, { "epoch": 0.93, "grad_norm": 1.0880801677703857, "learning_rate": 1.1936242897588656e-07, "loss": 0.4449, "step": 6207 }, { "epoch": 0.93, "grad_norm": 1.1900413036346436, "learning_rate": 1.1883462786022982e-07, "loss": 0.491, "step": 6208 }, { "epoch": 0.93, "grad_norm": 1.22221839427948, "learning_rate": 1.1830798222258577e-07, "loss": 0.5105, "step": 6209 }, { "epoch": 0.93, "grad_norm": 1.6301896572113037, "learning_rate": 1.1778249218762194e-07, "loss": 0.4348, "step": 6210 }, { "epoch": 0.93, "grad_norm": 1.3319669961929321, "learning_rate": 1.1725815787973438e-07, "loss": 0.5298, "step": 6211 }, { "epoch": 0.93, "grad_norm": 1.2360161542892456, "learning_rate": 1.1673497942304324e-07, "loss": 0.4945, "step": 6212 }, { "epoch": 0.93, "grad_norm": 1.3266514539718628, "learning_rate": 1.1621295694139723e-07, "loss": 0.4205, "step": 6213 }, { "epoch": 0.93, "grad_norm": 1.2146342992782593, "learning_rate": 1.1569209055837027e-07, "loss": 0.5872, "step": 6214 }, { "epoch": 0.93, "grad_norm": 1.4834685325622559, "learning_rate": 1.1517238039726153e-07, "loss": 0.4253, "step": 6215 }, { "epoch": 0.93, "grad_norm": 1.2025139331817627, "learning_rate": 1.146538265810998e-07, "loss": 0.6908, "step": 6216 }, { "epoch": 0.93, "grad_norm": 1.4028797149658203, "learning_rate": 1.1413642923263635e-07, "loss": 0.4618, "step": 6217 }, { "epoch": 0.93, "grad_norm": 1.3842943906784058, "learning_rate": 1.1362018847435097e-07, "loss": 0.4549, "step": 6218 }, { "epoch": 0.93, "grad_norm": 1.2109556198120117, "learning_rate": 1.1310510442844924e-07, "loss": 0.4565, "step": 6219 }, { "epoch": 0.93, "grad_norm": 1.3283369541168213, "learning_rate": 1.1259117721686252e-07, "loss": 0.7184, "step": 6220 }, { "epoch": 0.93, "grad_norm": 2.6019065380096436, "learning_rate": 1.1207840696124905e-07, "loss": 0.4558, "step": 6221 }, { "epoch": 0.93, "grad_norm": 1.1543385982513428, "learning_rate": 1.1156679378299174e-07, "loss": 0.6673, "step": 6222 }, { "epoch": 0.93, "grad_norm": 1.7805516719818115, "learning_rate": 1.1105633780320146e-07, "loss": 0.4779, "step": 6223 }, { "epoch": 0.93, "grad_norm": 1.0591888427734375, "learning_rate": 1.105470391427138e-07, "loss": 0.4923, "step": 6224 }, { "epoch": 0.94, "grad_norm": 1.5481915473937988, "learning_rate": 1.1003889792209122e-07, "loss": 0.5468, "step": 6225 }, { "epoch": 0.94, "grad_norm": 1.2516605854034424, "learning_rate": 1.0953191426162136e-07, "loss": 0.4458, "step": 6226 }, { "epoch": 0.94, "grad_norm": 0.9552688002586365, "learning_rate": 1.0902608828131822e-07, "loss": 0.4354, "step": 6227 }, { "epoch": 0.94, "grad_norm": 1.1803569793701172, "learning_rate": 1.085214201009227e-07, "loss": 0.3912, "step": 6228 }, { "epoch": 0.94, "grad_norm": 1.143990159034729, "learning_rate": 1.0801790983989869e-07, "loss": 0.4761, "step": 6229 }, { "epoch": 0.94, "grad_norm": 1.450386881828308, "learning_rate": 1.0751555761744026e-07, "loss": 0.4752, "step": 6230 }, { "epoch": 0.94, "grad_norm": 1.4191539287567139, "learning_rate": 1.0701436355246452e-07, "loss": 0.5359, "step": 6231 }, { "epoch": 0.94, "grad_norm": 2.182546377182007, "learning_rate": 1.0651432776361437e-07, "loss": 0.5284, "step": 6232 }, { "epoch": 0.94, "grad_norm": 1.5335290431976318, "learning_rate": 1.0601545036925953e-07, "loss": 0.5334, "step": 6233 }, { "epoch": 0.94, "grad_norm": 2.5434653759002686, "learning_rate": 1.05517731487495e-07, "loss": 0.4477, "step": 6234 }, { "epoch": 0.94, "grad_norm": 1.5485002994537354, "learning_rate": 1.0502117123614209e-07, "loss": 0.5193, "step": 6235 }, { "epoch": 0.94, "grad_norm": 1.3346433639526367, "learning_rate": 1.0452576973274675e-07, "loss": 0.4523, "step": 6236 }, { "epoch": 0.94, "grad_norm": 1.2815593481063843, "learning_rate": 1.0403152709458186e-07, "loss": 0.4666, "step": 6237 }, { "epoch": 0.94, "grad_norm": 2.395580768585205, "learning_rate": 1.035384434386455e-07, "loss": 0.5446, "step": 6238 }, { "epoch": 0.94, "grad_norm": 1.3128292560577393, "learning_rate": 1.0304651888166039e-07, "loss": 0.4627, "step": 6239 }, { "epoch": 0.94, "grad_norm": 4.29817008972168, "learning_rate": 1.0255575354007619e-07, "loss": 0.4089, "step": 6240 }, { "epoch": 0.94, "grad_norm": 1.2485730648040771, "learning_rate": 1.0206614753006827e-07, "loss": 0.4427, "step": 6241 }, { "epoch": 0.94, "grad_norm": 1.240339994430542, "learning_rate": 1.0157770096753672e-07, "loss": 0.4059, "step": 6242 }, { "epoch": 0.94, "grad_norm": 1.305830955505371, "learning_rate": 1.0109041396810738e-07, "loss": 0.5271, "step": 6243 }, { "epoch": 0.94, "grad_norm": 1.1764887571334839, "learning_rate": 1.0060428664713129e-07, "loss": 0.431, "step": 6244 }, { "epoch": 0.94, "grad_norm": 2.0524744987487793, "learning_rate": 1.0011931911968642e-07, "loss": 0.4727, "step": 6245 }, { "epoch": 0.94, "grad_norm": 1.3958117961883545, "learning_rate": 9.963551150057427e-08, "loss": 0.532, "step": 6246 }, { "epoch": 0.94, "grad_norm": 1.405836820602417, "learning_rate": 9.915286390432266e-08, "loss": 0.4463, "step": 6247 }, { "epoch": 0.94, "grad_norm": 1.397648811340332, "learning_rate": 9.86713764451852e-08, "loss": 0.5433, "step": 6248 }, { "epoch": 0.94, "grad_norm": 2.17073655128479, "learning_rate": 9.819104923714018e-08, "loss": 0.4708, "step": 6249 }, { "epoch": 0.94, "grad_norm": 1.476394534111023, "learning_rate": 9.771188239389162e-08, "loss": 0.5646, "step": 6250 }, { "epoch": 0.94, "grad_norm": 1.2624880075454712, "learning_rate": 9.723387602886769e-08, "loss": 0.4372, "step": 6251 }, { "epoch": 0.94, "grad_norm": 1.0149319171905518, "learning_rate": 9.67570302552251e-08, "loss": 0.4661, "step": 6252 }, { "epoch": 0.94, "grad_norm": 1.220304012298584, "learning_rate": 9.628134518584131e-08, "loss": 0.4173, "step": 6253 }, { "epoch": 0.94, "grad_norm": 1.5626509189605713, "learning_rate": 9.580682093332295e-08, "loss": 0.4672, "step": 6254 }, { "epoch": 0.94, "grad_norm": 1.6026262044906616, "learning_rate": 9.533345761000012e-08, "loss": 0.4062, "step": 6255 }, { "epoch": 0.94, "grad_norm": 1.4582202434539795, "learning_rate": 9.486125532792656e-08, "loss": 0.4988, "step": 6256 }, { "epoch": 0.94, "grad_norm": 1.3062713146209717, "learning_rate": 9.439021419888505e-08, "loss": 0.4743, "step": 6257 }, { "epoch": 0.94, "grad_norm": 1.4589028358459473, "learning_rate": 9.392033433437975e-08, "loss": 0.4691, "step": 6258 }, { "epoch": 0.94, "grad_norm": 1.1508336067199707, "learning_rate": 9.345161584564221e-08, "loss": 0.3981, "step": 6259 }, { "epoch": 0.94, "grad_norm": 1.3657234907150269, "learning_rate": 9.29840588436276e-08, "loss": 0.4825, "step": 6260 }, { "epoch": 0.94, "grad_norm": 1.164241909980774, "learning_rate": 9.251766343901735e-08, "loss": 0.4697, "step": 6261 }, { "epoch": 0.94, "grad_norm": 1.2961406707763672, "learning_rate": 9.205242974221707e-08, "loss": 0.4398, "step": 6262 }, { "epoch": 0.94, "grad_norm": 1.0145031213760376, "learning_rate": 9.158835786335806e-08, "loss": 0.6509, "step": 6263 }, { "epoch": 0.94, "grad_norm": 1.4878363609313965, "learning_rate": 9.112544791229638e-08, "loss": 0.4688, "step": 6264 }, { "epoch": 0.94, "grad_norm": 1.5373011827468872, "learning_rate": 9.066369999861213e-08, "loss": 0.497, "step": 6265 }, { "epoch": 0.94, "grad_norm": 1.190170407295227, "learning_rate": 9.020311423161177e-08, "loss": 0.5127, "step": 6266 }, { "epoch": 0.94, "grad_norm": 1.4805817604064941, "learning_rate": 8.974369072032584e-08, "loss": 0.4442, "step": 6267 }, { "epoch": 0.94, "grad_norm": 1.152275800704956, "learning_rate": 8.928542957350961e-08, "loss": 0.7147, "step": 6268 }, { "epoch": 0.94, "grad_norm": 1.1782182455062866, "learning_rate": 8.882833089964349e-08, "loss": 0.4699, "step": 6269 }, { "epoch": 0.94, "grad_norm": 1.2063273191452026, "learning_rate": 8.83723948069326e-08, "loss": 0.4514, "step": 6270 }, { "epoch": 0.94, "grad_norm": 1.2408361434936523, "learning_rate": 8.791762140330784e-08, "loss": 0.4805, "step": 6271 }, { "epoch": 0.94, "grad_norm": 1.1432504653930664, "learning_rate": 8.746401079642252e-08, "loss": 0.5029, "step": 6272 }, { "epoch": 0.94, "grad_norm": 1.3265061378479004, "learning_rate": 8.701156309365688e-08, "loss": 0.4767, "step": 6273 }, { "epoch": 0.94, "grad_norm": 1.0427541732788086, "learning_rate": 8.656027840211578e-08, "loss": 0.4882, "step": 6274 }, { "epoch": 0.94, "grad_norm": 1.3221508264541626, "learning_rate": 8.611015682862711e-08, "loss": 0.4964, "step": 6275 }, { "epoch": 0.94, "grad_norm": 1.1509898900985718, "learning_rate": 8.566119847974508e-08, "loss": 0.467, "step": 6276 }, { "epoch": 0.94, "grad_norm": 1.6717634201049805, "learning_rate": 8.521340346174689e-08, "loss": 0.5222, "step": 6277 }, { "epoch": 0.94, "grad_norm": 1.1427887678146362, "learning_rate": 8.47667718806372e-08, "loss": 0.4664, "step": 6278 }, { "epoch": 0.94, "grad_norm": 1.5466383695602417, "learning_rate": 8.432130384214143e-08, "loss": 0.5347, "step": 6279 }, { "epoch": 0.94, "grad_norm": 1.1461542844772339, "learning_rate": 8.387699945171245e-08, "loss": 0.4746, "step": 6280 }, { "epoch": 0.94, "grad_norm": 1.2600769996643066, "learning_rate": 8.343385881452726e-08, "loss": 0.4583, "step": 6281 }, { "epoch": 0.94, "grad_norm": 2.6727895736694336, "learning_rate": 8.29918820354858e-08, "loss": 0.4728, "step": 6282 }, { "epoch": 0.94, "grad_norm": 1.3913127183914185, "learning_rate": 8.255106921921497e-08, "loss": 0.3917, "step": 6283 }, { "epoch": 0.94, "grad_norm": 1.3956694602966309, "learning_rate": 8.211142047006293e-08, "loss": 0.4345, "step": 6284 }, { "epoch": 0.94, "grad_norm": 1.241567850112915, "learning_rate": 8.167293589210534e-08, "loss": 0.5192, "step": 6285 }, { "epoch": 0.94, "grad_norm": 1.1695960760116577, "learning_rate": 8.12356155891414e-08, "loss": 0.4775, "step": 6286 }, { "epoch": 0.94, "grad_norm": 1.65263831615448, "learning_rate": 8.079945966469327e-08, "loss": 0.4428, "step": 6287 }, { "epoch": 0.94, "grad_norm": 1.2410829067230225, "learning_rate": 8.03644682220095e-08, "loss": 0.5614, "step": 6288 }, { "epoch": 0.94, "grad_norm": 1.3135679960250854, "learning_rate": 7.993064136406103e-08, "loss": 0.4049, "step": 6289 }, { "epoch": 0.94, "grad_norm": 1.0358327627182007, "learning_rate": 7.949797919354518e-08, "loss": 0.467, "step": 6290 }, { "epoch": 0.95, "grad_norm": 1.8819741010665894, "learning_rate": 7.906648181288168e-08, "loss": 0.3439, "step": 6291 }, { "epoch": 0.95, "grad_norm": 1.4798946380615234, "learning_rate": 7.863614932421548e-08, "loss": 0.4306, "step": 6292 }, { "epoch": 0.95, "grad_norm": 1.8780813217163086, "learning_rate": 7.820698182941621e-08, "loss": 0.4437, "step": 6293 }, { "epoch": 0.95, "grad_norm": 1.543448567390442, "learning_rate": 7.777897943007595e-08, "loss": 0.4526, "step": 6294 }, { "epoch": 0.95, "grad_norm": 1.5969704389572144, "learning_rate": 7.735214222751363e-08, "loss": 0.5268, "step": 6295 }, { "epoch": 0.95, "grad_norm": 1.8480788469314575, "learning_rate": 7.692647032276956e-08, "loss": 0.4458, "step": 6296 }, { "epoch": 0.95, "grad_norm": 1.2603952884674072, "learning_rate": 7.650196381661035e-08, "loss": 0.503, "step": 6297 }, { "epoch": 0.95, "grad_norm": 1.4263375997543335, "learning_rate": 7.607862280952616e-08, "loss": 0.4742, "step": 6298 }, { "epoch": 0.95, "grad_norm": 1.1164069175720215, "learning_rate": 7.565644740173073e-08, "loss": 0.4695, "step": 6299 }, { "epoch": 0.95, "grad_norm": 5.662881851196289, "learning_rate": 7.523543769316188e-08, "loss": 0.4868, "step": 6300 }, { "epoch": 0.95, "grad_norm": 1.4724469184875488, "learning_rate": 7.481559378348158e-08, "loss": 0.501, "step": 6301 }, { "epoch": 0.95, "grad_norm": 1.134722352027893, "learning_rate": 7.439691577207697e-08, "loss": 0.4541, "step": 6302 }, { "epoch": 0.95, "grad_norm": 1.1811020374298096, "learning_rate": 7.397940375805712e-08, "loss": 0.5284, "step": 6303 }, { "epoch": 0.95, "grad_norm": 2.485797166824341, "learning_rate": 7.356305784025686e-08, "loss": 0.5117, "step": 6304 }, { "epoch": 0.95, "grad_norm": 1.1285111904144287, "learning_rate": 7.314787811723456e-08, "loss": 0.4118, "step": 6305 }, { "epoch": 0.95, "grad_norm": 1.3677358627319336, "learning_rate": 7.273386468727162e-08, "loss": 0.5427, "step": 6306 }, { "epoch": 0.95, "grad_norm": 1.3954987525939941, "learning_rate": 7.232101764837407e-08, "loss": 0.5005, "step": 6307 }, { "epoch": 0.95, "grad_norm": 1.7673951387405396, "learning_rate": 7.190933709827208e-08, "loss": 0.4616, "step": 6308 }, { "epoch": 0.95, "grad_norm": 1.15227210521698, "learning_rate": 7.149882313441991e-08, "loss": 0.4676, "step": 6309 }, { "epoch": 0.95, "grad_norm": 1.1434550285339355, "learning_rate": 7.108947585399429e-08, "loss": 0.4814, "step": 6310 }, { "epoch": 0.95, "grad_norm": 1.2931584119796753, "learning_rate": 7.068129535389657e-08, "loss": 0.4425, "step": 6311 }, { "epoch": 0.95, "grad_norm": 1.0984195470809937, "learning_rate": 7.027428173075279e-08, "loss": 0.7195, "step": 6312 }, { "epoch": 0.95, "grad_norm": 1.4520745277404785, "learning_rate": 6.986843508091146e-08, "loss": 0.4538, "step": 6313 }, { "epoch": 0.95, "grad_norm": 1.4441320896148682, "learning_rate": 6.946375550044515e-08, "loss": 0.4484, "step": 6314 }, { "epoch": 0.95, "grad_norm": 1.139894962310791, "learning_rate": 6.906024308515003e-08, "loss": 0.438, "step": 6315 }, { "epoch": 0.95, "grad_norm": 1.8097423315048218, "learning_rate": 6.865789793054745e-08, "loss": 0.5037, "step": 6316 }, { "epoch": 0.95, "grad_norm": 1.2243798971176147, "learning_rate": 6.825672013188012e-08, "loss": 0.4108, "step": 6317 }, { "epoch": 0.95, "grad_norm": 1.403903841972351, "learning_rate": 6.78567097841154e-08, "loss": 0.4398, "step": 6318 }, { "epoch": 0.95, "grad_norm": 1.3863590955734253, "learning_rate": 6.745786698194589e-08, "loss": 0.4463, "step": 6319 }, { "epoch": 0.95, "grad_norm": 1.0537868738174438, "learning_rate": 6.706019181978551e-08, "loss": 0.6887, "step": 6320 }, { "epoch": 0.95, "grad_norm": 1.4274094104766846, "learning_rate": 6.666368439177173e-08, "loss": 0.5534, "step": 6321 }, { "epoch": 0.95, "grad_norm": 1.2708693742752075, "learning_rate": 6.62683447917678e-08, "loss": 0.4766, "step": 6322 }, { "epoch": 0.95, "grad_norm": 1.1859041452407837, "learning_rate": 6.587417311335886e-08, "loss": 0.3853, "step": 6323 }, { "epoch": 0.95, "grad_norm": 1.2621090412139893, "learning_rate": 6.548116944985361e-08, "loss": 0.4613, "step": 6324 }, { "epoch": 0.95, "grad_norm": 1.2945261001586914, "learning_rate": 6.50893338942843e-08, "loss": 0.4308, "step": 6325 }, { "epoch": 0.95, "grad_norm": 1.379584550857544, "learning_rate": 6.469866653940782e-08, "loss": 0.5029, "step": 6326 }, { "epoch": 0.95, "grad_norm": 1.4846841096878052, "learning_rate": 6.430916747770299e-08, "loss": 0.5539, "step": 6327 }, { "epoch": 0.95, "grad_norm": 1.1719516515731812, "learning_rate": 6.392083680137329e-08, "loss": 0.4392, "step": 6328 }, { "epoch": 0.95, "grad_norm": 1.1183443069458008, "learning_rate": 6.353367460234406e-08, "loss": 0.4321, "step": 6329 }, { "epoch": 0.95, "grad_norm": 1.224784255027771, "learning_rate": 6.314768097226586e-08, "loss": 0.4242, "step": 6330 }, { "epoch": 0.95, "grad_norm": 1.3100966215133667, "learning_rate": 6.27628560025112e-08, "loss": 0.4668, "step": 6331 }, { "epoch": 0.95, "grad_norm": 1.1981130838394165, "learning_rate": 6.237919978417717e-08, "loss": 0.4662, "step": 6332 }, { "epoch": 0.95, "grad_norm": 1.2621006965637207, "learning_rate": 6.199671240808336e-08, "loss": 0.4936, "step": 6333 }, { "epoch": 0.95, "grad_norm": 1.26851487159729, "learning_rate": 6.161539396477234e-08, "loss": 0.503, "step": 6334 }, { "epoch": 0.95, "grad_norm": 1.18439519405365, "learning_rate": 6.123524454451079e-08, "loss": 0.4731, "step": 6335 }, { "epoch": 0.95, "grad_norm": 1.1460027694702148, "learning_rate": 6.085626423728897e-08, "loss": 0.4824, "step": 6336 }, { "epoch": 0.95, "grad_norm": 1.447311282157898, "learning_rate": 6.047845313281842e-08, "loss": 0.4725, "step": 6337 }, { "epoch": 0.95, "grad_norm": 1.0596262216567993, "learning_rate": 6.010181132053649e-08, "loss": 0.5032, "step": 6338 }, { "epoch": 0.95, "grad_norm": 1.1691430807113647, "learning_rate": 5.972633888960133e-08, "loss": 0.4826, "step": 6339 }, { "epoch": 0.95, "grad_norm": 3.092895746231079, "learning_rate": 5.935203592889627e-08, "loss": 0.4845, "step": 6340 }, { "epoch": 0.95, "grad_norm": 1.8030647039413452, "learning_rate": 5.897890252702765e-08, "loss": 0.4724, "step": 6341 }, { "epoch": 0.95, "grad_norm": 1.041519284248352, "learning_rate": 5.86069387723226e-08, "loss": 0.7258, "step": 6342 }, { "epoch": 0.95, "grad_norm": 1.1141091585159302, "learning_rate": 5.823614475283401e-08, "loss": 0.4712, "step": 6343 }, { "epoch": 0.95, "grad_norm": 1.2016409635543823, "learning_rate": 5.786652055633668e-08, "loss": 0.481, "step": 6344 }, { "epoch": 0.95, "grad_norm": 1.0644290447235107, "learning_rate": 5.749806627032895e-08, "loss": 0.4753, "step": 6345 }, { "epoch": 0.95, "grad_norm": 1.180622935295105, "learning_rate": 5.713078198203215e-08, "loss": 0.4279, "step": 6346 }, { "epoch": 0.95, "grad_norm": 1.9846141338348389, "learning_rate": 5.676466777838896e-08, "loss": 0.4656, "step": 6347 }, { "epoch": 0.95, "grad_norm": 1.1517200469970703, "learning_rate": 5.639972374606839e-08, "loss": 0.5365, "step": 6348 }, { "epoch": 0.95, "grad_norm": 1.216528058052063, "learning_rate": 5.603594997145967e-08, "loss": 0.5118, "step": 6349 }, { "epoch": 0.95, "grad_norm": 1.2905166149139404, "learning_rate": 5.5673346540676684e-08, "loss": 0.4733, "step": 6350 }, { "epoch": 0.95, "grad_norm": 1.3490352630615234, "learning_rate": 5.531191353955412e-08, "loss": 0.484, "step": 6351 }, { "epoch": 0.95, "grad_norm": 1.7306586503982544, "learning_rate": 5.495165105365241e-08, "loss": 0.4404, "step": 6352 }, { "epoch": 0.95, "grad_norm": 1.200249195098877, "learning_rate": 5.459255916825335e-08, "loss": 0.4659, "step": 6353 }, { "epoch": 0.95, "grad_norm": 1.4919756650924683, "learning_rate": 5.4234637968361704e-08, "loss": 0.4974, "step": 6354 }, { "epoch": 0.95, "grad_norm": 1.3804247379302979, "learning_rate": 5.387788753870471e-08, "loss": 0.4393, "step": 6355 }, { "epoch": 0.95, "grad_norm": 1.6717658042907715, "learning_rate": 5.3522307963733146e-08, "loss": 0.551, "step": 6356 }, { "epoch": 0.95, "grad_norm": 1.2327864170074463, "learning_rate": 5.3167899327620784e-08, "loss": 0.4444, "step": 6357 }, { "epoch": 0.96, "grad_norm": 1.4990653991699219, "learning_rate": 5.281466171426386e-08, "loss": 0.4382, "step": 6358 }, { "epoch": 0.96, "grad_norm": 2.0008604526519775, "learning_rate": 5.246259520728103e-08, "loss": 0.5569, "step": 6359 }, { "epoch": 0.96, "grad_norm": 1.183708667755127, "learning_rate": 5.211169989001397e-08, "loss": 0.394, "step": 6360 }, { "epoch": 0.96, "grad_norm": 1.1271870136260986, "learning_rate": 5.176197584552789e-08, "loss": 0.426, "step": 6361 }, { "epoch": 0.96, "grad_norm": 1.4129399061203003, "learning_rate": 5.1413423156609355e-08, "loss": 0.4879, "step": 6362 }, { "epoch": 0.96, "grad_norm": 1.0494784116744995, "learning_rate": 5.106604190576902e-08, "loss": 0.4518, "step": 6363 }, { "epoch": 0.96, "grad_norm": 1.1532095670700073, "learning_rate": 5.07198321752389e-08, "loss": 0.5241, "step": 6364 }, { "epoch": 0.96, "grad_norm": 1.10902738571167, "learning_rate": 5.03747940469751e-08, "loss": 0.4141, "step": 6365 }, { "epoch": 0.96, "grad_norm": 1.465057373046875, "learning_rate": 5.003092760265504e-08, "loss": 0.5606, "step": 6366 }, { "epoch": 0.96, "grad_norm": 1.548142671585083, "learning_rate": 4.9688232923679744e-08, "loss": 0.4531, "step": 6367 }, { "epoch": 0.96, "grad_norm": 1.2283333539962769, "learning_rate": 4.9346710091172646e-08, "loss": 0.5079, "step": 6368 }, { "epoch": 0.96, "grad_norm": 2.5211377143859863, "learning_rate": 4.900635918597907e-08, "loss": 0.4049, "step": 6369 }, { "epoch": 0.96, "grad_norm": 1.3132412433624268, "learning_rate": 4.86671802886679e-08, "loss": 0.5224, "step": 6370 }, { "epoch": 0.96, "grad_norm": 1.448166847229004, "learning_rate": 4.832917347952992e-08, "loss": 0.5505, "step": 6371 }, { "epoch": 0.96, "grad_norm": 1.2590383291244507, "learning_rate": 4.799233883857946e-08, "loss": 0.5316, "step": 6372 }, { "epoch": 0.96, "grad_norm": 1.5697314739227295, "learning_rate": 4.7656676445551054e-08, "loss": 0.4854, "step": 6373 }, { "epoch": 0.96, "grad_norm": 1.4245426654815674, "learning_rate": 4.7322186379904486e-08, "loss": 0.4352, "step": 6374 }, { "epoch": 0.96, "grad_norm": 1.0338441133499146, "learning_rate": 4.698886872082087e-08, "loss": 0.4616, "step": 6375 }, { "epoch": 0.96, "grad_norm": 1.9659850597381592, "learning_rate": 4.665672354720319e-08, "loss": 0.4688, "step": 6376 }, { "epoch": 0.96, "grad_norm": 1.5329093933105469, "learning_rate": 4.632575093767744e-08, "loss": 0.457, "step": 6377 }, { "epoch": 0.96, "grad_norm": 1.1989282369613647, "learning_rate": 4.5995950970592616e-08, "loss": 0.4657, "step": 6378 }, { "epoch": 0.96, "grad_norm": 1.1072088479995728, "learning_rate": 4.5667323724019055e-08, "loss": 0.4332, "step": 6379 }, { "epoch": 0.96, "grad_norm": 2.7604477405548096, "learning_rate": 4.533986927575007e-08, "loss": 0.4901, "step": 6380 }, { "epoch": 0.96, "grad_norm": 1.1291857957839966, "learning_rate": 4.501358770330144e-08, "loss": 0.4283, "step": 6381 }, { "epoch": 0.96, "grad_norm": 1.583723783493042, "learning_rate": 4.468847908391083e-08, "loss": 0.4552, "step": 6382 }, { "epoch": 0.96, "grad_norm": 1.1830849647521973, "learning_rate": 4.436454349453834e-08, "loss": 0.4697, "step": 6383 }, { "epoch": 0.96, "grad_norm": 1.3657557964324951, "learning_rate": 4.4041781011867624e-08, "loss": 0.4424, "step": 6384 }, { "epoch": 0.96, "grad_norm": 1.3972066640853882, "learning_rate": 4.3720191712302016e-08, "loss": 0.4279, "step": 6385 }, { "epoch": 0.96, "grad_norm": 1.205125093460083, "learning_rate": 4.3399775671970066e-08, "loss": 0.4393, "step": 6386 }, { "epoch": 0.96, "grad_norm": 1.8724693059921265, "learning_rate": 4.3080532966720546e-08, "loss": 0.4737, "step": 6387 }, { "epoch": 0.96, "grad_norm": 1.288082242012024, "learning_rate": 4.276246367212522e-08, "loss": 0.488, "step": 6388 }, { "epoch": 0.96, "grad_norm": 1.3832508325576782, "learning_rate": 4.244556786347831e-08, "loss": 0.5724, "step": 6389 }, { "epoch": 0.96, "grad_norm": 1.2095081806182861, "learning_rate": 4.2129845615794806e-08, "loss": 0.4731, "step": 6390 }, { "epoch": 0.96, "grad_norm": 1.2910466194152832, "learning_rate": 4.1815297003814923e-08, "loss": 0.5377, "step": 6391 }, { "epoch": 0.96, "grad_norm": 1.3027790784835815, "learning_rate": 4.150192210199799e-08, "loss": 0.4559, "step": 6392 }, { "epoch": 0.96, "grad_norm": 1.43697190284729, "learning_rate": 4.1189720984526337e-08, "loss": 0.4984, "step": 6393 }, { "epoch": 0.96, "grad_norm": 1.2778549194335938, "learning_rate": 4.087869372530584e-08, "loss": 0.4348, "step": 6394 }, { "epoch": 0.96, "grad_norm": 1.676323413848877, "learning_rate": 4.056884039796261e-08, "loss": 0.5254, "step": 6395 }, { "epoch": 0.96, "grad_norm": 1.3182823657989502, "learning_rate": 4.026016107584574e-08, "loss": 0.4859, "step": 6396 }, { "epoch": 0.96, "grad_norm": 1.2978887557983398, "learning_rate": 3.995265583202623e-08, "loss": 0.4091, "step": 6397 }, { "epoch": 0.96, "grad_norm": 1.4185787439346313, "learning_rate": 3.964632473929808e-08, "loss": 0.4714, "step": 6398 }, { "epoch": 0.96, "grad_norm": 2.3615105152130127, "learning_rate": 3.9341167870176056e-08, "loss": 0.5614, "step": 6399 }, { "epoch": 0.96, "grad_norm": 1.3553584814071655, "learning_rate": 3.903718529689682e-08, "loss": 0.3916, "step": 6400 }, { "epoch": 0.96, "grad_norm": 1.2113322019577026, "learning_rate": 3.873437709142058e-08, "loss": 0.5101, "step": 6401 }, { "epoch": 0.96, "grad_norm": 1.2291970252990723, "learning_rate": 3.843274332542834e-08, "loss": 0.4442, "step": 6402 }, { "epoch": 0.96, "grad_norm": 1.2026106119155884, "learning_rate": 3.813228407032299e-08, "loss": 0.4054, "step": 6403 }, { "epoch": 0.96, "grad_norm": 2.132495641708374, "learning_rate": 3.783299939722984e-08, "loss": 0.4208, "step": 6404 }, { "epoch": 0.96, "grad_norm": 1.6255266666412354, "learning_rate": 3.7534889376996676e-08, "loss": 0.4668, "step": 6405 }, { "epoch": 0.96, "grad_norm": 1.0125855207443237, "learning_rate": 3.723795408019204e-08, "loss": 0.6876, "step": 6406 }, { "epoch": 0.96, "grad_norm": 1.2290571928024292, "learning_rate": 3.6942193577106377e-08, "loss": 0.5047, "step": 6407 }, { "epoch": 0.96, "grad_norm": 1.1420401334762573, "learning_rate": 3.6647607937754235e-08, "loss": 0.4248, "step": 6408 }, { "epoch": 0.96, "grad_norm": 1.2638883590698242, "learning_rate": 3.635419723186984e-08, "loss": 0.4359, "step": 6409 }, { "epoch": 0.96, "grad_norm": 1.4000290632247925, "learning_rate": 3.606196152890928e-08, "loss": 0.4498, "step": 6410 }, { "epoch": 0.96, "grad_norm": 1.313783884048462, "learning_rate": 3.5770900898051685e-08, "loss": 0.5263, "step": 6411 }, { "epoch": 0.96, "grad_norm": 1.3939416408538818, "learning_rate": 3.548101540819693e-08, "loss": 0.4965, "step": 6412 }, { "epoch": 0.96, "grad_norm": 1.453622817993164, "learning_rate": 3.5192305127967894e-08, "loss": 0.4469, "step": 6413 }, { "epoch": 0.96, "grad_norm": 2.0196666717529297, "learning_rate": 3.4904770125707696e-08, "loss": 0.5856, "step": 6414 }, { "epoch": 0.96, "grad_norm": 1.0887237787246704, "learning_rate": 3.4618410469483e-08, "loss": 0.4382, "step": 6415 }, { "epoch": 0.96, "grad_norm": 1.4916820526123047, "learning_rate": 3.4333226227080685e-08, "loss": 0.4393, "step": 6416 }, { "epoch": 0.96, "grad_norm": 1.1582444906234741, "learning_rate": 3.40492174660112e-08, "loss": 0.674, "step": 6417 }, { "epoch": 0.96, "grad_norm": 1.0743165016174316, "learning_rate": 3.376638425350409e-08, "loss": 0.7294, "step": 6418 }, { "epoch": 0.96, "grad_norm": 1.1883723735809326, "learning_rate": 3.348472665651248e-08, "loss": 0.4259, "step": 6419 }, { "epoch": 0.96, "grad_norm": 1.1881664991378784, "learning_rate": 3.3204244741711913e-08, "loss": 0.4422, "step": 6420 }, { "epoch": 0.96, "grad_norm": 1.1686804294586182, "learning_rate": 3.292493857549761e-08, "loss": 0.4697, "step": 6421 }, { "epoch": 0.96, "grad_norm": 1.4147392511367798, "learning_rate": 3.2646808223987804e-08, "loss": 0.4582, "step": 6422 }, { "epoch": 0.96, "grad_norm": 1.2092987298965454, "learning_rate": 3.236985375302148e-08, "loss": 0.4163, "step": 6423 }, { "epoch": 0.96, "grad_norm": 1.5284786224365234, "learning_rate": 3.2094075228160656e-08, "loss": 0.4385, "step": 6424 }, { "epoch": 0.97, "grad_norm": 1.232529878616333, "learning_rate": 3.1819472714686994e-08, "loss": 0.4593, "step": 6425 }, { "epoch": 0.97, "grad_norm": 1.2241843938827515, "learning_rate": 3.1546046277605716e-08, "loss": 0.4123, "step": 6426 }, { "epoch": 0.97, "grad_norm": 1.1170227527618408, "learning_rate": 3.127379598164282e-08, "loss": 0.4444, "step": 6427 }, { "epoch": 0.97, "grad_norm": 1.3886311054229736, "learning_rate": 3.100272189124509e-08, "loss": 0.5007, "step": 6428 }, { "epoch": 0.97, "grad_norm": 1.225082278251648, "learning_rate": 3.0732824070582314e-08, "loss": 0.4429, "step": 6429 }, { "epoch": 0.97, "grad_norm": 1.0639877319335938, "learning_rate": 3.0464102583545595e-08, "loss": 0.4356, "step": 6430 }, { "epoch": 0.97, "grad_norm": 1.5302420854568481, "learning_rate": 3.0196557493745726e-08, "loss": 0.4154, "step": 6431 }, { "epoch": 0.97, "grad_norm": 1.8590818643569946, "learning_rate": 2.9930188864518153e-08, "loss": 0.5017, "step": 6432 }, { "epoch": 0.97, "grad_norm": 1.198996901512146, "learning_rate": 2.9664996758916874e-08, "loss": 0.4414, "step": 6433 }, { "epoch": 0.97, "grad_norm": 1.2120656967163086, "learning_rate": 2.9400981239719463e-08, "loss": 0.5013, "step": 6434 }, { "epoch": 0.97, "grad_norm": 1.6320310831069946, "learning_rate": 2.9138142369423694e-08, "loss": 0.4931, "step": 6435 }, { "epoch": 0.97, "grad_norm": 1.5085234642028809, "learning_rate": 2.8876480210248693e-08, "loss": 0.4996, "step": 6436 }, { "epoch": 0.97, "grad_norm": 2.7455945014953613, "learning_rate": 2.8615994824136572e-08, "loss": 0.4359, "step": 6437 }, { "epoch": 0.97, "grad_norm": 1.3450493812561035, "learning_rate": 2.8356686272749678e-08, "loss": 0.4976, "step": 6438 }, { "epoch": 0.97, "grad_norm": 1.0768003463745117, "learning_rate": 2.8098554617471685e-08, "loss": 0.4217, "step": 6439 }, { "epoch": 0.97, "grad_norm": 1.1085306406021118, "learning_rate": 2.78415999194076e-08, "loss": 0.3818, "step": 6440 }, { "epoch": 0.97, "grad_norm": 1.175804615020752, "learning_rate": 2.7585822239384884e-08, "loss": 0.4902, "step": 6441 }, { "epoch": 0.97, "grad_norm": 1.2081719636917114, "learning_rate": 2.7331221637951767e-08, "loss": 0.4714, "step": 6442 }, { "epoch": 0.97, "grad_norm": 1.2116611003875732, "learning_rate": 2.7077798175377256e-08, "loss": 0.437, "step": 6443 }, { "epoch": 0.97, "grad_norm": 1.7125883102416992, "learning_rate": 2.6825551911652813e-08, "loss": 0.4731, "step": 6444 }, { "epoch": 0.97, "grad_norm": 16.531421661376953, "learning_rate": 2.6574482906489563e-08, "loss": 0.3765, "step": 6445 }, { "epoch": 0.97, "grad_norm": 2.8123369216918945, "learning_rate": 2.6324591219322183e-08, "loss": 0.5031, "step": 6446 }, { "epoch": 0.97, "grad_norm": 1.404613971710205, "learning_rate": 2.6075876909304465e-08, "loss": 0.6036, "step": 6447 }, { "epoch": 0.97, "grad_norm": 1.1563831567764282, "learning_rate": 2.5828340035313205e-08, "loss": 0.4346, "step": 6448 }, { "epoch": 0.97, "grad_norm": 1.2274672985076904, "learning_rate": 2.5581980655945415e-08, "loss": 0.4638, "step": 6449 }, { "epoch": 0.97, "grad_norm": 1.170447587966919, "learning_rate": 2.5336798829519448e-08, "loss": 0.448, "step": 6450 }, { "epoch": 0.97, "grad_norm": 1.2345649003982544, "learning_rate": 2.50927946140761e-08, "loss": 0.4696, "step": 6451 }, { "epoch": 0.97, "grad_norm": 1.277222990989685, "learning_rate": 2.484996806737583e-08, "loss": 0.5051, "step": 6452 }, { "epoch": 0.97, "grad_norm": 1.1121746301651, "learning_rate": 2.4608319246900436e-08, "loss": 0.6861, "step": 6453 }, { "epoch": 0.97, "grad_norm": 1.2612348794937134, "learning_rate": 2.4367848209854717e-08, "loss": 0.4515, "step": 6454 }, { "epoch": 0.97, "grad_norm": 1.478682518005371, "learning_rate": 2.412855501316258e-08, "loss": 0.5073, "step": 6455 }, { "epoch": 0.97, "grad_norm": 1.4903665781021118, "learning_rate": 2.3890439713470382e-08, "loss": 0.5172, "step": 6456 }, { "epoch": 0.97, "grad_norm": 1.0746333599090576, "learning_rate": 2.36535023671447e-08, "loss": 0.6758, "step": 6457 }, { "epoch": 0.97, "grad_norm": 1.2502976655960083, "learning_rate": 2.3417743030274e-08, "loss": 0.4482, "step": 6458 }, { "epoch": 0.97, "grad_norm": 1.1144837141036987, "learning_rate": 2.318316175866697e-08, "loss": 0.4755, "step": 6459 }, { "epoch": 0.97, "grad_norm": 1.2498667240142822, "learning_rate": 2.294975860785531e-08, "loss": 0.4593, "step": 6460 }, { "epoch": 0.97, "grad_norm": 1.1095200777053833, "learning_rate": 2.2717533633090373e-08, "loss": 0.4463, "step": 6461 }, { "epoch": 0.97, "grad_norm": 1.2201648950576782, "learning_rate": 2.2486486889343738e-08, "loss": 0.4576, "step": 6462 }, { "epoch": 0.97, "grad_norm": 1.1106226444244385, "learning_rate": 2.225661843131055e-08, "loss": 0.4041, "step": 6463 }, { "epoch": 0.97, "grad_norm": 1.5256267786026, "learning_rate": 2.2027928313405056e-08, "loss": 0.4589, "step": 6464 }, { "epoch": 0.97, "grad_norm": 1.195257544517517, "learning_rate": 2.180041658976284e-08, "loss": 0.4545, "step": 6465 }, { "epoch": 0.97, "grad_norm": 1.4974431991577148, "learning_rate": 2.1574083314241933e-08, "loss": 0.4926, "step": 6466 }, { "epoch": 0.97, "grad_norm": 1.3331302404403687, "learning_rate": 2.1348928540418923e-08, "loss": 0.4937, "step": 6467 }, { "epoch": 0.97, "grad_norm": 1.1114842891693115, "learning_rate": 2.1124952321594505e-08, "loss": 0.4802, "step": 6468 }, { "epoch": 0.97, "grad_norm": 1.7998888492584229, "learning_rate": 2.090215471078738e-08, "loss": 0.5213, "step": 6469 }, { "epoch": 0.97, "grad_norm": 1.5532550811767578, "learning_rate": 2.0680535760738695e-08, "loss": 0.4499, "step": 6470 }, { "epoch": 0.97, "grad_norm": 1.3801575899124146, "learning_rate": 2.0460095523910928e-08, "loss": 0.4793, "step": 6471 }, { "epoch": 0.97, "grad_norm": 1.2686493396759033, "learning_rate": 2.0240834052487335e-08, "loss": 0.4793, "step": 6472 }, { "epoch": 0.97, "grad_norm": 1.4037790298461914, "learning_rate": 2.0022751398371397e-08, "loss": 0.4907, "step": 6473 }, { "epoch": 0.97, "grad_norm": 1.2361035346984863, "learning_rate": 1.9805847613187936e-08, "loss": 0.5119, "step": 6474 }, { "epoch": 0.97, "grad_norm": 1.1188669204711914, "learning_rate": 1.959012274828309e-08, "loss": 0.4254, "step": 6475 }, { "epoch": 0.97, "grad_norm": 1.366775393486023, "learning_rate": 1.93755768547238e-08, "loss": 0.5294, "step": 6476 }, { "epoch": 0.97, "grad_norm": 1.1607519388198853, "learning_rate": 1.916220998329721e-08, "loss": 0.4866, "step": 6477 }, { "epoch": 0.97, "grad_norm": 1.1938663721084595, "learning_rate": 1.895002218451236e-08, "loss": 0.5665, "step": 6478 }, { "epoch": 0.97, "grad_norm": 1.425392508506775, "learning_rate": 1.873901350859797e-08, "loss": 0.4563, "step": 6479 }, { "epoch": 0.97, "grad_norm": 1.7919881343841553, "learning_rate": 1.8529184005505184e-08, "loss": 0.4614, "step": 6480 }, { "epoch": 0.97, "grad_norm": 1.0482043027877808, "learning_rate": 1.832053372490483e-08, "loss": 0.4775, "step": 6481 }, { "epoch": 0.97, "grad_norm": 1.5271514654159546, "learning_rate": 1.8113062716189622e-08, "loss": 0.4092, "step": 6482 }, { "epoch": 0.97, "grad_norm": 1.0430903434753418, "learning_rate": 1.790677102847138e-08, "loss": 0.4511, "step": 6483 }, { "epoch": 0.97, "grad_norm": 1.3931684494018555, "learning_rate": 1.7701658710584935e-08, "loss": 0.3734, "step": 6484 }, { "epoch": 0.97, "grad_norm": 1.1573240756988525, "learning_rate": 1.7497725811083665e-08, "loss": 0.4309, "step": 6485 }, { "epoch": 0.97, "grad_norm": 1.2147595882415771, "learning_rate": 1.7294972378243957e-08, "loss": 0.7139, "step": 6486 }, { "epoch": 0.97, "grad_norm": 1.9770656824111938, "learning_rate": 1.7093398460061307e-08, "loss": 0.5266, "step": 6487 }, { "epoch": 0.97, "grad_norm": 1.075493574142456, "learning_rate": 1.6893004104253095e-08, "loss": 0.4759, "step": 6488 }, { "epoch": 0.97, "grad_norm": 1.6756329536437988, "learning_rate": 1.669378935825694e-08, "loss": 0.4719, "step": 6489 }, { "epoch": 0.97, "grad_norm": 1.2892950773239136, "learning_rate": 1.6495754269230668e-08, "loss": 0.4447, "step": 6490 }, { "epoch": 0.98, "grad_norm": 1.3494101762771606, "learning_rate": 1.629889888405456e-08, "loss": 0.5211, "step": 6491 }, { "epoch": 0.98, "grad_norm": 1.3454186916351318, "learning_rate": 1.6103223249327447e-08, "loss": 0.4102, "step": 6492 }, { "epoch": 0.98, "grad_norm": 1.087449550628662, "learning_rate": 1.5908727411371172e-08, "loss": 0.6971, "step": 6493 }, { "epoch": 0.98, "grad_norm": 1.2499090433120728, "learning_rate": 1.571541141622612e-08, "loss": 0.4655, "step": 6494 }, { "epoch": 0.98, "grad_norm": 1.2455006837844849, "learning_rate": 1.5523275309654584e-08, "loss": 0.4603, "step": 6495 }, { "epoch": 0.98, "grad_norm": 1.4505507946014404, "learning_rate": 1.5332319137140173e-08, "loss": 0.4375, "step": 6496 }, { "epoch": 0.98, "grad_norm": 1.5338350534439087, "learning_rate": 1.5142542943885618e-08, "loss": 0.4599, "step": 6497 }, { "epoch": 0.98, "grad_norm": 1.0606495141983032, "learning_rate": 1.495394677481443e-08, "loss": 0.6715, "step": 6498 }, { "epoch": 0.98, "grad_norm": 2.9046130180358887, "learning_rate": 1.4766530674573122e-08, "loss": 0.5211, "step": 6499 }, { "epoch": 0.98, "grad_norm": 1.4641022682189941, "learning_rate": 1.4580294687526198e-08, "loss": 0.4526, "step": 6500 }, { "epoch": 0.98, "grad_norm": 1.6208966970443726, "learning_rate": 1.4395238857759508e-08, "loss": 0.473, "step": 6501 }, { "epoch": 0.98, "grad_norm": 1.3961838483810425, "learning_rate": 1.4211363229080233e-08, "loss": 0.4463, "step": 6502 }, { "epoch": 0.98, "grad_norm": 1.5109872817993164, "learning_rate": 1.4028667845015775e-08, "loss": 0.4338, "step": 6503 }, { "epoch": 0.98, "grad_norm": 1.4217674732208252, "learning_rate": 1.3847152748813763e-08, "loss": 0.4507, "step": 6504 }, { "epoch": 0.98, "grad_norm": 1.1835612058639526, "learning_rate": 1.3666817983442604e-08, "loss": 0.4172, "step": 6505 }, { "epoch": 0.98, "grad_norm": 1.3058656454086304, "learning_rate": 1.3487663591592037e-08, "loss": 0.4535, "step": 6506 }, { "epoch": 0.98, "grad_norm": 1.1703338623046875, "learning_rate": 1.3309689615672029e-08, "loss": 0.4613, "step": 6507 }, { "epoch": 0.98, "grad_norm": 1.3179799318313599, "learning_rate": 1.3132896097811653e-08, "loss": 0.4981, "step": 6508 }, { "epoch": 0.98, "grad_norm": 1.4616698026657104, "learning_rate": 1.2957283079862992e-08, "loss": 0.5428, "step": 6509 }, { "epoch": 0.98, "grad_norm": 1.0607259273529053, "learning_rate": 1.2782850603397234e-08, "loss": 0.4905, "step": 6510 }, { "epoch": 0.98, "grad_norm": 1.357379674911499, "learning_rate": 1.2609598709705794e-08, "loss": 0.51, "step": 6511 }, { "epoch": 0.98, "grad_norm": 1.6374133825302124, "learning_rate": 1.2437527439801422e-08, "loss": 0.4857, "step": 6512 }, { "epoch": 0.98, "grad_norm": 1.6848649978637695, "learning_rate": 1.2266636834417645e-08, "loss": 0.4664, "step": 6513 }, { "epoch": 0.98, "grad_norm": 1.2463326454162598, "learning_rate": 1.2096926934007103e-08, "loss": 0.5526, "step": 6514 }, { "epoch": 0.98, "grad_norm": 1.1294342279434204, "learning_rate": 1.1928397778744327e-08, "loss": 0.4665, "step": 6515 }, { "epoch": 0.98, "grad_norm": 1.2078651189804077, "learning_rate": 1.1761049408524072e-08, "loss": 0.6831, "step": 6516 }, { "epoch": 0.98, "grad_norm": 1.408690333366394, "learning_rate": 1.159488186296076e-08, "loss": 0.4273, "step": 6517 }, { "epoch": 0.98, "grad_norm": 1.1888436079025269, "learning_rate": 1.1429895181390704e-08, "loss": 0.7118, "step": 6518 }, { "epoch": 0.98, "grad_norm": 1.1336827278137207, "learning_rate": 1.1266089402868773e-08, "loss": 0.4935, "step": 6519 }, { "epoch": 0.98, "grad_norm": 1.3291648626327515, "learning_rate": 1.1103464566171729e-08, "loss": 0.4975, "step": 6520 }, { "epoch": 0.98, "grad_norm": 1.3032941818237305, "learning_rate": 1.0942020709797107e-08, "loss": 0.4472, "step": 6521 }, { "epoch": 0.98, "grad_norm": 0.9771184921264648, "learning_rate": 1.0781757871961007e-08, "loss": 0.4382, "step": 6522 }, { "epoch": 0.98, "grad_norm": 1.1396502256393433, "learning_rate": 1.0622676090601969e-08, "loss": 0.477, "step": 6523 }, { "epoch": 0.98, "grad_norm": 1.003466248512268, "learning_rate": 1.0464775403378202e-08, "loss": 0.4123, "step": 6524 }, { "epoch": 0.98, "grad_norm": 1.119829535484314, "learning_rate": 1.0308055847667586e-08, "loss": 0.4729, "step": 6525 }, { "epoch": 0.98, "grad_norm": 1.1957907676696777, "learning_rate": 1.0152517460569333e-08, "loss": 0.4034, "step": 6526 }, { "epoch": 0.98, "grad_norm": 1.118333339691162, "learning_rate": 9.99816027890288e-09, "loss": 0.4013, "step": 6527 }, { "epoch": 0.98, "grad_norm": 1.5535695552825928, "learning_rate": 9.844984339207886e-09, "loss": 0.4307, "step": 6528 }, { "epoch": 0.98, "grad_norm": 1.5883654356002808, "learning_rate": 9.692989677744235e-09, "loss": 0.3282, "step": 6529 }, { "epoch": 0.98, "grad_norm": 1.5550665855407715, "learning_rate": 9.542176330493147e-09, "loss": 0.4795, "step": 6530 }, { "epoch": 0.98, "grad_norm": 1.9106321334838867, "learning_rate": 9.392544333154397e-09, "loss": 0.4954, "step": 6531 }, { "epoch": 0.98, "grad_norm": 1.2786811590194702, "learning_rate": 9.244093721149094e-09, "loss": 0.4402, "step": 6532 }, { "epoch": 0.98, "grad_norm": 1.1403621435165405, "learning_rate": 9.096824529619686e-09, "loss": 0.4364, "step": 6533 }, { "epoch": 0.98, "grad_norm": 1.1667561531066895, "learning_rate": 8.950736793427172e-09, "loss": 0.3883, "step": 6534 }, { "epoch": 0.98, "grad_norm": 1.8597928285598755, "learning_rate": 8.805830547154448e-09, "loss": 0.4591, "step": 6535 }, { "epoch": 0.98, "grad_norm": 1.1872494220733643, "learning_rate": 8.662105825103518e-09, "loss": 0.5294, "step": 6536 }, { "epoch": 0.98, "grad_norm": 1.238773226737976, "learning_rate": 8.51956266129661e-09, "loss": 0.5469, "step": 6537 }, { "epoch": 0.98, "grad_norm": 1.32309889793396, "learning_rate": 8.378201089477844e-09, "loss": 0.5633, "step": 6538 }, { "epoch": 0.98, "grad_norm": 2.587364435195923, "learning_rate": 8.238021143109898e-09, "loss": 0.4589, "step": 6539 }, { "epoch": 0.98, "grad_norm": 1.3615851402282715, "learning_rate": 8.099022855376782e-09, "loss": 0.5317, "step": 6540 }, { "epoch": 0.98, "grad_norm": 1.21680748462677, "learning_rate": 7.96120625918162e-09, "loss": 0.4684, "step": 6541 }, { "epoch": 0.98, "grad_norm": 1.0844584703445435, "learning_rate": 7.824571387149982e-09, "loss": 0.5066, "step": 6542 }, { "epoch": 0.98, "grad_norm": 1.1460307836532593, "learning_rate": 7.68911827162544e-09, "loss": 0.5396, "step": 6543 }, { "epoch": 0.98, "grad_norm": 1.2064989805221558, "learning_rate": 7.554846944672345e-09, "loss": 0.4999, "step": 6544 }, { "epoch": 0.98, "grad_norm": 1.4336892366409302, "learning_rate": 7.421757438076382e-09, "loss": 0.5077, "step": 6545 }, { "epoch": 0.98, "grad_norm": 1.2142618894577026, "learning_rate": 7.289849783342351e-09, "loss": 0.4751, "step": 6546 }, { "epoch": 0.98, "grad_norm": 1.125733494758606, "learning_rate": 7.159124011695828e-09, "loss": 0.4764, "step": 6547 }, { "epoch": 0.98, "grad_norm": 1.3133701086044312, "learning_rate": 7.029580154082061e-09, "loss": 0.4325, "step": 6548 }, { "epoch": 0.98, "grad_norm": 2.08272385597229, "learning_rate": 6.901218241167629e-09, "loss": 0.4858, "step": 6549 }, { "epoch": 0.98, "grad_norm": 1.1324065923690796, "learning_rate": 6.774038303338226e-09, "loss": 0.443, "step": 6550 }, { "epoch": 0.98, "grad_norm": 1.115015983581543, "learning_rate": 6.648040370699771e-09, "loss": 0.4304, "step": 6551 }, { "epoch": 0.98, "grad_norm": 1.0292776823043823, "learning_rate": 6.52322447307896e-09, "loss": 0.4463, "step": 6552 }, { "epoch": 0.98, "grad_norm": 1.3269453048706055, "learning_rate": 6.3995906400221575e-09, "loss": 0.4884, "step": 6553 }, { "epoch": 0.98, "grad_norm": 2.751124858856201, "learning_rate": 6.277138900797064e-09, "loss": 0.4229, "step": 6554 }, { "epoch": 0.98, "grad_norm": 1.3103632926940918, "learning_rate": 6.155869284389937e-09, "loss": 0.4136, "step": 6555 }, { "epoch": 0.98, "grad_norm": 1.3291497230529785, "learning_rate": 6.035781819507813e-09, "loss": 0.42, "step": 6556 }, { "epoch": 0.98, "grad_norm": 1.3289257287979126, "learning_rate": 5.916876534578508e-09, "loss": 0.475, "step": 6557 }, { "epoch": 0.99, "grad_norm": 1.274371862411499, "learning_rate": 5.7991534577495065e-09, "loss": 0.4371, "step": 6558 }, { "epoch": 0.99, "grad_norm": 1.197880744934082, "learning_rate": 5.682612616887961e-09, "loss": 0.4412, "step": 6559 }, { "epoch": 0.99, "grad_norm": 1.318459153175354, "learning_rate": 5.567254039581804e-09, "loss": 0.4655, "step": 6560 }, { "epoch": 0.99, "grad_norm": 1.1414963006973267, "learning_rate": 5.453077753139191e-09, "loss": 0.4514, "step": 6561 }, { "epoch": 0.99, "grad_norm": 1.2298868894577026, "learning_rate": 5.3400837845873906e-09, "loss": 0.4325, "step": 6562 }, { "epoch": 0.99, "grad_norm": 1.0851366519927979, "learning_rate": 5.228272160675563e-09, "loss": 0.4804, "step": 6563 }, { "epoch": 0.99, "grad_norm": 1.0908024311065674, "learning_rate": 5.117642907870868e-09, "loss": 0.4578, "step": 6564 }, { "epoch": 0.99, "grad_norm": 1.3559277057647705, "learning_rate": 5.008196052362912e-09, "loss": 0.4846, "step": 6565 }, { "epoch": 0.99, "grad_norm": 1.1171698570251465, "learning_rate": 4.899931620058751e-09, "loss": 0.4362, "step": 6566 }, { "epoch": 0.99, "grad_norm": 1.6982452869415283, "learning_rate": 4.792849636588437e-09, "loss": 0.4528, "step": 6567 }, { "epoch": 0.99, "grad_norm": 0.9788281917572021, "learning_rate": 4.686950127298917e-09, "loss": 0.3988, "step": 6568 }, { "epoch": 0.99, "grad_norm": 1.610626220703125, "learning_rate": 4.582233117260693e-09, "loss": 0.5408, "step": 6569 }, { "epoch": 0.99, "grad_norm": 1.3329657316207886, "learning_rate": 4.478698631260603e-09, "loss": 0.5238, "step": 6570 }, { "epoch": 0.99, "grad_norm": 1.6159418821334839, "learning_rate": 4.3763466938095965e-09, "loss": 0.5339, "step": 6571 }, { "epoch": 0.99, "grad_norm": 1.1567901372909546, "learning_rate": 4.275177329134961e-09, "loss": 0.4813, "step": 6572 }, { "epoch": 0.99, "grad_norm": 1.5410300493240356, "learning_rate": 4.175190561186426e-09, "loss": 0.552, "step": 6573 }, { "epoch": 0.99, "grad_norm": 1.2599109411239624, "learning_rate": 4.076386413632838e-09, "loss": 0.4451, "step": 6574 }, { "epoch": 0.99, "grad_norm": 1.2713203430175781, "learning_rate": 3.978764909863264e-09, "loss": 0.4789, "step": 6575 }, { "epoch": 0.99, "grad_norm": 1.4056956768035889, "learning_rate": 3.882326072987552e-09, "loss": 0.5172, "step": 6576 }, { "epoch": 0.99, "grad_norm": 1.0376335382461548, "learning_rate": 3.787069925833553e-09, "loss": 0.4511, "step": 6577 }, { "epoch": 0.99, "grad_norm": 1.3150761127471924, "learning_rate": 3.6929964909515615e-09, "loss": 0.5515, "step": 6578 }, { "epoch": 0.99, "grad_norm": 1.3333179950714111, "learning_rate": 3.6001057906104307e-09, "loss": 0.5086, "step": 6579 }, { "epoch": 0.99, "grad_norm": 1.3534773588180542, "learning_rate": 3.5083978467992384e-09, "loss": 0.4846, "step": 6580 }, { "epoch": 0.99, "grad_norm": 1.854665994644165, "learning_rate": 3.4178726812272857e-09, "loss": 0.4774, "step": 6581 }, { "epoch": 0.99, "grad_norm": 1.60867178440094, "learning_rate": 3.3285303153240965e-09, "loss": 0.46, "step": 6582 }, { "epoch": 0.99, "grad_norm": 1.2760547399520874, "learning_rate": 3.2403707702388653e-09, "loss": 0.4519, "step": 6583 }, { "epoch": 0.99, "grad_norm": 1.1147000789642334, "learning_rate": 3.153394066840454e-09, "loss": 0.452, "step": 6584 }, { "epoch": 0.99, "grad_norm": 1.307886004447937, "learning_rate": 3.0676002257190583e-09, "loss": 0.449, "step": 6585 }, { "epoch": 0.99, "grad_norm": 1.1371709108352661, "learning_rate": 2.9829892671828785e-09, "loss": 0.4732, "step": 6586 }, { "epoch": 0.99, "grad_norm": 2.026939868927002, "learning_rate": 2.899561211262003e-09, "loss": 0.4459, "step": 6587 }, { "epoch": 0.99, "grad_norm": 1.6575475931167603, "learning_rate": 2.817316077705079e-09, "loss": 0.4134, "step": 6588 }, { "epoch": 0.99, "grad_norm": 1.1321301460266113, "learning_rate": 2.7362538859820873e-09, "loss": 0.5028, "step": 6589 }, { "epoch": 0.99, "grad_norm": 1.6459823846817017, "learning_rate": 2.6563746552815683e-09, "loss": 0.4979, "step": 6590 }, { "epoch": 0.99, "grad_norm": 1.3041099309921265, "learning_rate": 2.5776784045128402e-09, "loss": 0.5175, "step": 6591 }, { "epoch": 0.99, "grad_norm": 1.451062560081482, "learning_rate": 2.5001651523048897e-09, "loss": 0.4635, "step": 6592 }, { "epoch": 0.99, "grad_norm": 1.2380998134613037, "learning_rate": 2.4238349170069287e-09, "loss": 0.376, "step": 6593 }, { "epoch": 0.99, "grad_norm": 1.1580040454864502, "learning_rate": 2.3486877166878363e-09, "loss": 0.487, "step": 6594 }, { "epoch": 0.99, "grad_norm": 1.4495079517364502, "learning_rate": 2.2747235691367164e-09, "loss": 0.4689, "step": 6595 }, { "epoch": 0.99, "grad_norm": 0.967579185962677, "learning_rate": 2.201942491862341e-09, "loss": 0.66, "step": 6596 }, { "epoch": 0.99, "grad_norm": 1.2749197483062744, "learning_rate": 2.1303445020937063e-09, "loss": 0.7438, "step": 6597 }, { "epoch": 0.99, "grad_norm": 1.1991722583770752, "learning_rate": 2.059929616780032e-09, "loss": 0.3574, "step": 6598 }, { "epoch": 0.99, "grad_norm": 1.160581111907959, "learning_rate": 1.9906978525896513e-09, "loss": 0.4946, "step": 6599 }, { "epoch": 0.99, "grad_norm": 1.0309306383132935, "learning_rate": 1.9226492259111216e-09, "loss": 0.4335, "step": 6600 }, { "epoch": 0.99, "grad_norm": 1.2188133001327515, "learning_rate": 1.855783752852669e-09, "loss": 0.4517, "step": 6601 }, { "epoch": 0.99, "grad_norm": 1.3173702955245972, "learning_rate": 1.7901014492438528e-09, "loss": 0.535, "step": 6602 }, { "epoch": 0.99, "grad_norm": 1.1533011198043823, "learning_rate": 1.7256023306322366e-09, "loss": 0.4904, "step": 6603 }, { "epoch": 0.99, "grad_norm": 1.835828185081482, "learning_rate": 1.6622864122867176e-09, "loss": 0.5462, "step": 6604 }, { "epoch": 0.99, "grad_norm": 1.1804709434509277, "learning_rate": 1.6001537091947518e-09, "loss": 0.437, "step": 6605 }, { "epoch": 0.99, "grad_norm": 2.1992244720458984, "learning_rate": 1.5392042360656834e-09, "loss": 0.4813, "step": 6606 }, { "epoch": 0.99, "grad_norm": 1.4475458860397339, "learning_rate": 1.479438007326306e-09, "loss": 0.549, "step": 6607 }, { "epoch": 0.99, "grad_norm": 1.0532828569412231, "learning_rate": 1.4208550371258568e-09, "loss": 0.6959, "step": 6608 }, { "epoch": 0.99, "grad_norm": 1.397408127784729, "learning_rate": 1.3634553393310213e-09, "loss": 0.4644, "step": 6609 }, { "epoch": 0.99, "grad_norm": 1.206068992614746, "learning_rate": 1.3072389275303742e-09, "loss": 0.7143, "step": 6610 }, { "epoch": 0.99, "grad_norm": 1.046967625617981, "learning_rate": 1.252205815031049e-09, "loss": 0.4704, "step": 6611 }, { "epoch": 0.99, "grad_norm": 1.2138469219207764, "learning_rate": 1.1983560148609575e-09, "loss": 0.7314, "step": 6612 }, { "epoch": 0.99, "grad_norm": 1.2519057989120483, "learning_rate": 1.1456895397671253e-09, "loss": 0.5423, "step": 6613 }, { "epoch": 0.99, "grad_norm": 1.290885090827942, "learning_rate": 1.094206402217357e-09, "loss": 0.498, "step": 6614 }, { "epoch": 0.99, "grad_norm": 1.3178579807281494, "learning_rate": 1.043906614398571e-09, "loss": 0.4712, "step": 6615 }, { "epoch": 0.99, "grad_norm": 1.303978443145752, "learning_rate": 9.94790188217909e-10, "loss": 0.4527, "step": 6616 }, { "epoch": 0.99, "grad_norm": 1.2067432403564453, "learning_rate": 9.468571353021816e-10, "loss": 0.5014, "step": 6617 }, { "epoch": 0.99, "grad_norm": 1.2731270790100098, "learning_rate": 9.001074669978682e-10, "loss": 0.3807, "step": 6618 }, { "epoch": 0.99, "grad_norm": 1.1141492128372192, "learning_rate": 8.545411943722271e-10, "loss": 0.4231, "step": 6619 }, { "epoch": 0.99, "grad_norm": 1.2962679862976074, "learning_rate": 8.101583282110747e-10, "loss": 0.441, "step": 6620 }, { "epoch": 0.99, "grad_norm": 1.339800477027893, "learning_rate": 7.669588790221172e-10, "loss": 0.4426, "step": 6621 }, { "epoch": 0.99, "grad_norm": 1.6862704753875732, "learning_rate": 7.249428570299532e-10, "loss": 0.4637, "step": 6622 }, { "epoch": 0.99, "grad_norm": 1.3498127460479736, "learning_rate": 6.841102721821813e-10, "loss": 0.505, "step": 6623 }, { "epoch": 1.0, "grad_norm": 1.739976406097412, "learning_rate": 6.444611341432927e-10, "loss": 0.5085, "step": 6624 }, { "epoch": 1.0, "grad_norm": 1.1581361293792725, "learning_rate": 6.059954523002232e-10, "loss": 0.4316, "step": 6625 }, { "epoch": 1.0, "grad_norm": 1.8274320363998413, "learning_rate": 5.687132357584669e-10, "loss": 0.4639, "step": 6626 }, { "epoch": 1.0, "grad_norm": 1.608225703239441, "learning_rate": 5.326144933431864e-10, "loss": 0.459, "step": 6627 }, { "epoch": 1.0, "grad_norm": 1.1995601654052734, "learning_rate": 4.976992336003239e-10, "loss": 0.3932, "step": 6628 }, { "epoch": 1.0, "grad_norm": 1.5523881912231445, "learning_rate": 4.6396746479382413e-10, "loss": 0.5248, "step": 6629 }, { "epoch": 1.0, "grad_norm": 1.476560354232788, "learning_rate": 4.3141919491007655e-10, "loss": 0.5069, "step": 6630 }, { "epoch": 1.0, "grad_norm": 1.6130136251449585, "learning_rate": 4.000544316534738e-10, "loss": 0.415, "step": 6631 }, { "epoch": 1.0, "grad_norm": 1.4222397804260254, "learning_rate": 3.6987318244863234e-10, "loss": 0.4072, "step": 6632 }, { "epoch": 1.0, "grad_norm": 1.2574961185455322, "learning_rate": 3.408754544403925e-10, "loss": 0.4615, "step": 6633 }, { "epoch": 1.0, "grad_norm": 1.4597762823104858, "learning_rate": 3.130612544927081e-10, "loss": 0.447, "step": 6634 }, { "epoch": 1.0, "grad_norm": 1.1678069829940796, "learning_rate": 2.864305891903119e-10, "loss": 0.5228, "step": 6635 }, { "epoch": 1.0, "grad_norm": 1.0194542407989502, "learning_rate": 2.609834648364951e-10, "loss": 0.4578, "step": 6636 }, { "epoch": 1.0, "grad_norm": 1.2340418100357056, "learning_rate": 2.3671988745588294e-10, "loss": 0.4618, "step": 6637 }, { "epoch": 1.0, "grad_norm": 1.7295855283737183, "learning_rate": 2.1363986279221428e-10, "loss": 0.4562, "step": 6638 }, { "epoch": 1.0, "grad_norm": 1.233452320098877, "learning_rate": 1.917433963083415e-10, "loss": 0.4635, "step": 6639 }, { "epoch": 1.0, "grad_norm": 1.5712958574295044, "learning_rate": 1.71030493188451e-10, "loss": 0.4507, "step": 6640 }, { "epoch": 1.0, "grad_norm": 1.6380467414855957, "learning_rate": 1.5150115833528766e-10, "loss": 0.4426, "step": 6641 }, { "epoch": 1.0, "grad_norm": 1.2209997177124023, "learning_rate": 1.3315539637182018e-10, "loss": 0.5003, "step": 6642 }, { "epoch": 1.0, "grad_norm": 2.7006747722625732, "learning_rate": 1.15993211641241e-10, "loss": 0.4925, "step": 6643 }, { "epoch": 1.0, "grad_norm": 1.512739658355713, "learning_rate": 1.0001460820530107e-10, "loss": 0.5354, "step": 6644 }, { "epoch": 1.0, "grad_norm": 1.3235480785369873, "learning_rate": 8.521958984819556e-11, "loss": 0.4497, "step": 6645 }, { "epoch": 1.0, "grad_norm": 1.193535566329956, "learning_rate": 7.160816007045768e-11, "loss": 0.5099, "step": 6646 }, { "epoch": 1.0, "grad_norm": 1.1531627178192139, "learning_rate": 5.918032209506486e-11, "loss": 0.5128, "step": 6647 }, { "epoch": 1.0, "grad_norm": 1.1910308599472046, "learning_rate": 4.7936078864108116e-11, "loss": 0.4876, "step": 6648 }, { "epoch": 1.0, "grad_norm": 1.3204643726348877, "learning_rate": 3.787543303879204e-11, "loss": 0.4572, "step": 6649 }, { "epoch": 1.0, "grad_norm": 1.5355675220489502, "learning_rate": 2.8998387001100137e-11, "loss": 0.4722, "step": 6650 }, { "epoch": 1.0, "grad_norm": 1.2546058893203735, "learning_rate": 2.1304942852129472e-11, "loss": 0.4571, "step": 6651 }, { "epoch": 1.0, "grad_norm": 1.3445994853973389, "learning_rate": 1.4795102413200924e-11, "loss": 0.4434, "step": 6652 }, { "epoch": 1.0, "grad_norm": 1.0274502038955688, "learning_rate": 9.468867225859158e-12, "loss": 0.5254, "step": 6653 }, { "epoch": 1.0, "grad_norm": 1.941650629043579, "learning_rate": 5.3262385496521965e-12, "loss": 0.5441, "step": 6654 }, { "epoch": 1.0, "grad_norm": 1.151333212852478, "learning_rate": 2.3672173665723052e-12, "loss": 0.4815, "step": 6655 }, { "epoch": 1.0, "grad_norm": 1.2545979022979736, "learning_rate": 5.918043766151016e-13, "loss": 0.4571, "step": 6656 }, { "epoch": 1.0, "grad_norm": 1.5229897499084473, "learning_rate": 0.0, "loss": 0.6322, "step": 6657 }, { "epoch": 1.0, "step": 6657, "total_flos": 9.52097885147208e+18, "train_loss": 0.4997730143519338, "train_runtime": 20681.7324, "train_samples_per_second": 41.201, "train_steps_per_second": 0.322 } ], "logging_steps": 1.0, "max_steps": 6657, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 9.52097885147208e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }